import logging
import os
import sys
from typing import List
import joblib
import numpy as np
import sklearn.preprocessing
_logger = logging.getLogger(__name__)
[docs]def get_available_scalers() -> List[str]:
"""Gets list of available scaler names.
Returns:
List of available scaler names.
"""
return sorted([attr for attr in sys.modules[__name__].__dict__.keys()
if not attr.startswith('Base') and attr.endswith('Scaler')])
[docs]def get_scaler(scaler_name: str, scaler_options: dict) -> 'BaseGlobalScaler':
"""Gets scaler matching the provided name.
Args:
scaler_name: Scaler name from available scalers.
scaler_options: Configuration for requested scaler.
Returns:
Scaler matching the provided name.
"""
available_scalers = get_available_scalers()
assert scaler_name in available_scalers, \
'Scaler {} not in available scalers: {}'.format(scaler_name, available_scalers)
return getattr(sys.modules[__name__], scaler_name)(**scaler_options)
[docs]class BaseGlobalScaler(object):
"""
Scalers handle the process of transforming data prior to fitting or predicting using the neural network, as
well as inverse transforming the data for applications or review afterwards. In this case, we use readily available
scalers from the scikit-learn package to handle the nitty-gritty of the transform and inverse transform, and we use
the Scaler class to handle the nitty-gritty of reshaping and otherwise handling the image arrays.
"""
savename = None
scaler_name = None
def __init__(self, savename_base=None):
"""
:param savename_base: the directory and optionally filename prefix for saving data
"""
if (savename_base is not None):
self.savename = savename_base + self.scaler_name
self.is_fitted = False
# TODO: In all of the below, handle (IE ignore) transformations on categorical data
[docs] def fit(self, image_array):
assert self.is_fitted is False, 'Scaler has already been fit to data'
self._fit(image_array)
self.is_fitted = True
def _fit(self, image_array):
raise NotImplementedError
[docs] def save(self):
raise NotImplementedError
[docs] def load(self):
raise NotImplementedError
[docs]class BaseSklearnScaler(BaseGlobalScaler):
scaler = None
def __init__(self, savename_base):
self.scaler_name = 'sklearn_' + self.scaler.__class__.__name__
super().__init__(savename_base)
def _fit(self, image_array):
# Reshape to (num_samples, num_features)
image_array = self._reshape_image_array(image_array)
self.scaler.fit(image_array)
def _reshape_image_array(self, image_array):
# The second dimension is image_array.shape[-1] which is the num_channels, so the first dimension is
# image width x image height
if (len(image_array.shape) > 2):
return image_array.reshape(-1, image_array.shape[-1])
else:
return image_array
[docs] def save(self):
joblib.dump(self.scaler, self.savename)
[docs] def load(self):
if (os.path.isfile(self.savename)):
self.scaler = joblib.load(self.savename)
self.is_fitted = True
[docs]class NullScaler(BaseGlobalScaler):
def __init__(self, savename_base):
self.scaler_name = 'NullScaler'
super().__init__(savename_base)
def _fit(self, image_array):
return image_array
[docs] def load(self):
self.is_fitted = True
[docs]class ConstantScaler(BaseGlobalScaler):
constant_scaler = None
constant_offset = None
def __init__(self, savename_base, constant_scaler=None, constant_offset=None):
self.constant_scaler = constant_scaler
self.constant_offset = constant_offset
self.scaler_name = 'ConstantScaler'
super().__init__(savename_base)
def _fit(self, image_array):
pass
[docs] def save(self):
np.savez(self.savename + '.npz', constant_scaler=self.constant_scaler, constant_offset=self.constant_offset)
[docs] def load(self):
if (os.path.isfile(self.savename + '.npz')):
npzf = np.load(self.savename + '.npz')
self.constant_scaler = npzf['constant_scaler']
self.constant_offset = npzf['constant_offset']
self.is_fitted = True
[docs]class StandardScaler(BaseSklearnScaler):
def __init__(self, savename_base):
self.scaler = sklearn.preprocessing.StandardScaler(copy=True)
super().__init__(savename_base)
[docs]class MinMaxScaler(BaseSklearnScaler):
def __init__(self, savename_base, feature_range=(0, 1)):
self.scaler = sklearn.preprocessing.MinMaxScaler(feature_range=feature_range, copy=True)
super().__init__(savename_base)
[docs]class RobustScaler(BaseSklearnScaler):
def __init__(self, savename_base, quantile_range=(10.0, 90.0)):
self.scaler = sklearn.preprocessing.RobustScaler(quantile_range=quantile_range, copy=True)
super().__init__(savename_base)
[docs]class PowerScaler(BaseSklearnScaler):
def __init__(self, savename_base, method='box-cox'):
self.scaler = sklearn.preprocessing.PowerTransformer(method=method, copy=True)
super().__init__(savename_base)