from collections import OrderedDict
import copy
import logging
import os
from typing import Dict, List
import yaml
from bfgn.architectures import config_sections
from bfgn.configuration import sections
_logger = logging.getLogger(__name__)
DEFAULT_FILENAME_CONFIG = 'config.yaml'
[docs]class Config(object):
"""
Handles the reading and formatting of raw data files, the building and training of models and architectures, and
the reporting of training and validation results.
"""
raw_files = None
"""sections.RawFiles: RawFiles config section."""
data_build = None
"""sections.DataBuild: DataBuild config section."""
data_samples = None
"""sections.DataSamples: DataSamples config section."""
model_training = None
"""sections.ModelTraining: ModelTraining config section."""
architecture = None
"""sections.Architecture: Architecture config section."""
model_reporting = None
"""sections.ModelReporting: ModelReporting config section."""
callback_general = None
"""sections.CallbacksGeneral: CallbacksGeneral config section."""
callback_tensorboard = None
"""sections.Tensorboard: Tensorboard config section."""
callback_early_stopping = None
"""sections.EarlyStopping: EarlyStopping config section."""
callback_reduced_learning_rate = None
"""sections.CallBackReducedLearningRate: CallBackReducedLearningRate config section."""
def __init__(
self,
raw_files: sections.RawFiles = None,
data_build: sections.DataBuild = None,
data_samples: sections.DataSamples = None,
model_training: sections.ModelTraining = None,
architecture: config_sections.BaseArchitectureConfigSection = None,
model_reporting: sections.ModelReporting = None,
callback_general: sections.CallbackGeneral = None,
callback_tensorboard: sections.CallbackTensorboard = None,
callback_early_stopping: sections.CallbackEarlyStopping = None,
callback_reduced_learning_rate: sections.CallbackReducedLearningRate = None
) -> None:
# Note: it's undesireable to have so many parameters passed to the __init__ method and have so much boilerplate
# code, but I've chosen to write it this way because we can use Python typing and modern IDEs to autocomplete
# all of the attributes and subattributes in downstream scripts. For example, "config.a" will autocomplete to
# "config.architecture" and, more importantly, "config.architecture.w" will autocomplete to
# "config.architecture.weighted". Without this autocomplete feature, the programmer is required to know the
# names of individual options and due to the nature of scientific computing and the number of parameters that
# can be configured, this becomes burdensome.
self.raw_files = raw_files
self.data_build = data_build
self.data_samples = data_samples
self.model_training = model_training
self.architecture = architecture
self.model_reporting = model_reporting
self.callback_general = callback_general
self.callback_tensorboard = callback_tensorboard
self.callback_early_stopping = callback_early_stopping
self.callback_reduced_learning_rate = callback_reduced_learning_rate
[docs] def get_config_as_dict(self) -> dict:
"""Get configuration options as a nested dictionary with delineated sections.
Returns:
Configuration options as a nested dictionary with delineated sections.
"""
config = OrderedDict()
for config_section in sections.get_config_sections():
section_name = config_section.get_config_name_as_snake_case()
populated_section = getattr(self, section_name)
config[section_name] = populated_section.get_config_options_as_dict()
if config_section is sections.ModelTraining:
# Given ordered output, architecture options make the most sense after model training options
config['architecture'] = self.architecture.get_config_options_as_dict()
return config
[docs] def get_config_errors(self, include_sections: List[str] = None, exclude_sections: List[str] = None) -> list:
"""Get configuration option errors by checking the validity of each config section.
Args:
include_sections: Config sections that should be included. All config sections are included if None and
exclude_sections is not specified. Cannot specify both include_sections and exclude_sections.
exclude_sections: Config sections that should be excluded. All config sections are included if None and
exclude_sections is not specified. Cannot specify both include_sections and exclude_sections.
Returns:
List of errors associated with the current configuration.
"""
assert not (include_sections and exclude_sections), \
'Both include_sections and exclude_sections cannot be specified.'
_logger.debug('Checking config sections for configuration issues')
errors = list()
config_sections = sections.get_config_sections()
if include_sections:
_logger.debug('Only checking config sections: {}'.format(', '.join(include_sections)))
config_sections = [section for section in config_sections
if section.get_config_name_as_snake_case() in include_sections]
if exclude_sections:
_logger.debug('Not checking config sections: {}'.format(', '.join(exclude_sections)))
config_sections = [section for section in config_sections
if section.get_config_name_as_snake_case() not in exclude_sections]
for config_section in config_sections:
section_name = config_section.get_config_name_as_snake_case()
populated_section = getattr(self, section_name)
errors.extend(populated_section.check_config_validity())
if config_section is sections.ModelTraining:
errors.extend(self.architecture.check_config_validity())
_logger.debug('{} configuration issues found'.format(len(errors)))
return errors
[docs] def get_human_readable_config_errors(
self,
include_sections: List[str] = None,
exclude_sections: List[str] = None
) -> str:
"""Generates a human-readable string of configuration option errors.
Args:
include_sections: Config sections that should be included. All config sections are included if None and
exclude_sections is not specified. Cannot specify both include_sections and exclude_sections.
exclude_sections: Config sections that should be excluded. All config sections are included if None and
exclude_sections is not specified. Cannot specify both include_sections and exclude_sections.
Returns:
Human-readable string of configuration option errors.
"""
errors = self.get_config_errors(include_sections=include_sections, exclude_sections=exclude_sections)
if not errors:
return ''
return 'List of configuration section and option errors is as follows:\n' + '\n'.join(error for error in errors)
[docs]def create_config_from_file(filepath: str) -> Config:
"""Creates a Config object from a YAML file.
Args:
filepath: Filepath to existing YAML file.
Returns:
Config object with parsed YAML file attributes.
"""
assert os.path.exists(filepath), 'No config file found at {}'.format(filepath)
_logger.debug('Loading config file from {}'.format(filepath))
with open(filepath) as file_:
raw_config = yaml.safe_load(file_)
return _create_config(raw_config, is_template=False)
[docs]def create_config_template(architecture_name: str, filepath: str = None) -> Config:
"""Creates a template version of a Config for a given architecture, with required and optional parameters
highlighted, and default values for other parameters. Config is returned but can optionally be written to YAML file.
Args:
architecture_name: Name of available architecture.
filepath: Filepath to which template YAML file is saved, if desired.
Returns:
Template version of a Config.
"""
_logger.debug('Creating config template for architecture {} at {}'.format(architecture_name, filepath))
config_options = {'model_training': {'architecture_name': architecture_name}}
config = _create_config(config_options, is_template=True)
if filepath is not None:
save_config_to_file(config, filepath)
return config
def _create_config(config_options: dict, is_template: bool) -> Config:
config_copy = copy.deepcopy(config_options) # Use a copy because config options are popped from the dict
# Populate config sections with the provided configuration options, tracking errors
populated_sections = dict()
for config_section in sections.get_config_sections():
section_name = config_section.get_config_name_as_snake_case()
populated_section = config_section()
populated_section.set_config_options(config_copy.get(section_name, dict()), is_template)
populated_sections[section_name] = populated_section
# Populate architecture options given architecture name
architecture_name = populated_sections['model_training'].architecture_name
architecture = config_sections.get_architecture_config_section(architecture_name)
architecture.set_config_options(config_copy.get('architecture', dict()), is_template)
populated_sections['architecture'] = architecture
return Config(**populated_sections)
[docs]def save_config_to_file(config: Config, filepath: str, include_sections: List[str] = None) -> None:
"""Saves/serializes a Config object to a YAML file.
Args:
config: Config object.
filepath: Filepath to which YAML file is saved.
include_sections: Config sections that should be included. All config sections are included if None.
Returns:
None
"""
def _represent_dictionary_order(self, dict_data):
# via https://stackoverflow.com/questions/31605131/dumping-a-dictionary-to-a-yaml-file-while-preserving-order
return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
def _represent_list_inline(self, list_data):
return self.represent_sequence('tag:yaml.org,2002:seq', list_data, flow_style=True)
yaml.add_representer(OrderedDict, _represent_dictionary_order)
yaml.add_representer(list, _represent_list_inline)
config_out = config.get_config_as_dict()
_logger.debug('Saving config file to {}'.format(filepath))
if include_sections:
_logger.debug('Only saving config sections: {}'.format(', '.join(include_sections)))
config_out = {section: config_out[section] for section in include_sections}
with open(filepath, 'w') as file_:
yaml.dump(config_out, file_, default_flow_style=False)
[docs]def get_config_differences(config_a: Config, config_b: Config) -> Dict:
differing_items = dict()
dict_a = config_a.get_config_as_dict()
dict_b = config_b.get_config_as_dict()
all_sections = set(list(dict_a.keys()) + list(dict_b.keys()))
for section in all_sections:
section_a = dict_a.get(section, dict())
section_b = dict_b.get(section, dict())
all_options = set(list(section_a.keys()) + list(section_b.keys()))
for option in all_options:
if section == 'model_training' and option == 'dir_out':
continue
value_a = section_a.get(option, None)
value_b = section_b.get(option, None)
if value_a != value_b:
_logger.debug('Configs have different values for option {} in section {}: {} and {}'.format(
option, section, value_a, value_b))
differing_items.setdefault(section, dict())[option] = (value_a, value_b)
return differing_items