Source code for bfgn.reporting.visualizations.samples

import itertools
from typing import Iterator, List

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np

from bfgn.reporting import samples
from bfgn.reporting.visualizations import subplots


LABEL_CLASSIFICATION = 'CLASSIFICATION'
LABEL_REGRESSION = 'REGRESSION'


[docs]def plot_classification_samples( sampled: samples.Samples, max_pages: int = 8, max_samples_per_page: int = 10, max_features_per_page: int = 5, max_responses_per_page: int = 5 ) -> List[plt.Figure]: return _plot_samples( sampled, max_pages, max_samples_per_page, max_features_per_page, max_responses_per_page, sample_type=LABEL_CLASSIFICATION )
[docs]def plot_regression_samples( sampled: samples.Samples, max_pages: int = 8, max_samples_per_page: int = 10, max_features_per_page: int = 5, max_responses_per_page: int = 5 ) -> List[plt.Figure]: return _plot_samples( sampled, max_pages, max_samples_per_page, max_features_per_page, max_responses_per_page, sample_type=LABEL_REGRESSION )
def _plot_samples( sampled: samples.Samples, max_pages: int, max_samples_per_page: int, max_features_per_page: int, max_responses_per_page: int, sample_type: str ) -> List[plt.Figure]: # Calculate figure parameters figures = list() num_pages = min(max_pages, np.ceil(sampled.num_samples / max_samples_per_page)) num_features = min(max_features_per_page, sampled.num_features) num_responses = min(max_responses_per_page, sampled.num_responses) if sample_type is LABEL_CLASSIFICATION: sample_plotter = _plot_classification_sample elif sample_type is LABEL_REGRESSION: sample_plotter = _plot_regression_sample null_axes = itertools.repeat(None) num_subplots = sample_plotter(sampled, 0, num_features, num_responses, null_axes) # Iterate through pages and samples for idx_page in range(num_pages): width = 1.5 * num_subplots height = 1.5 * max_samples_per_page fig = plt.figure(figsize=(width, height)) grid = gridspec.GridSpec(max_samples_per_page, num_subplots) idxs_samples = range(idx_page * max_samples_per_page, (1 + idx_page) * max_samples_per_page) for idx_sample in idxs_samples: sample_axes = iter([plt.subplot(grid[idx_sample, idx_subplot]) for idx_subplot in range(num_subplots)]) sample_plotter(sampled, idx_sample, num_features, num_responses, sample_axes) fig.suptitle('{}Sequence Samples (page {})'.format(sampled.data_sequence_label + ' ' or '', idx_page + 1)) figures.append(fig) return figures def _plot_classification_sample( sampled: samples.Samples, idx_sample: int, num_features: int, num_responses: int, sample_axes: Iterator = None ) -> int: num_subplots = 0 for idx_feature in range(num_features): num_subplots += 1 subplots.plot_raw_features( sampled, idx_sample, idx_feature, sample_axes.__next__(), idx_sample == 0, idx_feature == 0) for idx_feature in range(num_features): num_subplots += 1 subplots.plot_transformed_features( sampled, idx_sample, idx_feature, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_categorical_responses(sampled, idx_sample, idx_feature, sample_axes.__next__(), idx_sample == 0, False) if sampled.raw_predictions is not None: num_subplots += 1 subplots.plot_classification_predictions_max_likelihood( sampled, idx_sample, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_binary_error_classification(sampled, idx_sample, sample_axes.__next__(), idx_sample == 0, False) for idx_response in range(num_responses): num_subplots += 1 subplots.plot_raw_predictions( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_weights(sampled, idx_sample, sample_axes.__next__(), idx_sample == 0, False) return num_subplots def _plot_regression_sample( sampled: samples.Samples, idx_sample: int, num_features: int, num_responses: int, sample_axes: Iterator = None ) -> int: num_subplots = 0 for idx_feature in range(num_features): num_subplots += 1 subplots.plot_raw_features( sampled, idx_sample, idx_feature, sample_axes.__next__(), idx_sample == 0, idx_feature == 0) for idx_feature in range(num_features): num_subplots += 1 subplots.plot_transformed_features( sampled, idx_sample, idx_feature, sample_axes.__next__(), idx_sample == 0, False) for idx_response in range(num_responses): num_subplots += 1 subplots.plot_raw_responses( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) for idx_response in range(num_responses): num_subplots += 1 subplots.plot_transformed_responses( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) if sampled.raw_predictions is not None: for idx_response in range(num_responses): num_subplots += 1 subplots.plot_raw_predictions( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) for idx_response in range(num_responses): num_subplots += 1 subplots.plot_transformed_predictions( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_raw_error_regression( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_transformed_error_regression( sampled, idx_sample, idx_response, sample_axes.__next__(), idx_sample == 0, False) num_subplots += 1 subplots.plot_weights(sampled, idx_sample, sample_axes.__next__(), idx_sample == 0, False) return num_subplots
[docs]def plot_sample_histograms( sampled: samples.Samples, max_responses_per_page: int = 15 ): max_responses_per_page = min(max_responses_per_page, sampled.num_responses) _response_ind = 0 # Training Raw Space fig_list = [] while _response_ind < sampled.num_responses: fig = plt.figure(figsize=(6 * max_responses_per_page, 10)) gs1 = gridspec.GridSpec(4, max_responses_per_page) for _r in range(_response_ind, min(_response_ind+max_responses_per_page, sampled.num_responses)): hist_range = [None, None] hist_range[0] = np.nanmin(sampled.trans_responses[..., _r]) hist_range[1] = np.nanmax(sampled.trans_responses[..., _r]) ax = plt.subplot(gs1[0, _r]) b, h = _get_lhist(sampled.trans_responses[..., _r], hist_range=hist_range) ax.plot(h, b, color='green') if sampled.trans_predictions is not None: b, h = _get_lhist(sampled.trans_predictions[..., _r], hist_range=hist_range) ax.plot(h + (h[1]-h[0])/20., b, color='black') raw_max = np.max(sampled.trans_predictions[..., _r]) if (raw_max > hist_range[1]): plt.ylim([-0.02*np.max(b), np.max(b)*1.2]) plt.annotate('Max pred.: ' + str(np.round(raw_max, 4)), xy=(1.0, 0.92), xytext=(0.90, 0.92), xycoords='axes fraction', horizontalalignment='right', verticalalignment='center', fontsize=8, arrowprops=dict(arrowstyle="->")) raw_min = np.min(sampled.trans_predictions[..., _r]) if (raw_min < hist_range[0]): plt.ylim([-0.02*np.max(b), np.max(b)*1.2]) plt.annotate('Min pred.: ' + str(np.round(raw_min, 4)), xy=(0.00, 0.92), xytext=(0.10, 0.92), xycoords='axes fraction', horizontalalignment='left', verticalalignment='center', fontsize=8, arrowprops=dict(arrowstyle="->")) if (_r == _response_ind): plt.ylabel('Transformed') plt.title('Response ' + str(_r)) ax = plt.subplot(gs1[1, _r]) hist_range = [None, None] hist_range[0] = np.nanmin(sampled.raw_responses[..., _r]) hist_range[1] = np.nanmax(sampled.raw_responses[..., _r]) b, h = _get_lhist(sampled.raw_responses[..., _r], hist_range=hist_range) ax.plot(h, b, color='green') if sampled.raw_predictions is not None: b, h = _get_lhist(sampled.raw_predictions[..., _r], hist_range=hist_range) ax.plot(h + (h[1]-h[0])/20., b, color='black') raw_max = np.max(sampled.raw_predictions[..., _r]) if (raw_max > hist_range[1]): plt.ylim([-0.02*np.max(b), np.max(b)*1.2]) plt.annotate('Max pred.: ' + str(np.round(raw_max, 4)), xy=(1.0, 0.92), xytext=(0.90, 0.92), xycoords='axes fraction', horizontalalignment='right', verticalalignment='center', fontsize=8, arrowprops=dict(arrowstyle="->")) raw_min = np.min(sampled.raw_predictions[..., _r]) if (raw_min < hist_range[0]): plt.ylim([-0.02*np.max(b), np.max(b)*1.2]) plt.annotate('Min pred.: ' + str(np.round(raw_min, 4)), xy=(0.00, 0.92), xytext=(0.10, 0.92), xycoords='axes fraction', horizontalalignment='left', verticalalignment='center', fontsize=8, arrowprops=dict(arrowstyle="->")) plt.legend(['Response', 'Prediction'], loc='center right') else: plt.legend(['Response'], loc='center right') if (_r == _response_ind): plt.ylabel('Raw') _response_ind += max_responses_per_page fig_list.append(fig) fig.suptitle('{} Sequence Response Histogram (page {})'.format(sampled.data_sequence_label, len(fig_list))) return fig_list
def _get_lhist(data, bins=20, hist_range=[None, None]): for _i in range(len(hist_range)): if (hist_range[_i] is None): hist_range[_i] = np.nanmin(data) hist, edge = np.histogram(data, bins=bins, range=hist_range) hist = hist.tolist() edge = edge.tolist() phist = [0] pedge = [edge[0]] for _e in range(0, len(edge)-1): phist.append(hist[_e]) phist.append(hist[_e]) pedge.append(edge[_e]) pedge.append(edge[_e+1]) phist.append(0) pedge.append(edge[-1]) phist = np.array(phist) pedge = np.array(pedge) return phist, pedge