Source code for bfgn.configuration.configs

from collections import OrderedDict
import copy
import logging
import os
from typing import Dict, List

import yaml

from bfgn.architectures import config_sections
from bfgn.configuration import sections


_logger = logging.getLogger(__name__)


DEFAULT_FILENAME_CONFIG = 'config.yaml'


[docs]class Config(object): """ Handles the reading and formatting of raw data files, the building and training of models and architectures, and the reporting of training and validation results. """ raw_files = None """sections.RawFiles: RawFiles config section.""" data_build = None """sections.DataBuild: DataBuild config section.""" data_samples = None """sections.DataSamples: DataSamples config section.""" model_training = None """sections.ModelTraining: ModelTraining config section.""" architecture = None """sections.Architecture: Architecture config section.""" model_reporting = None """sections.ModelReporting: ModelReporting config section.""" callback_general = None """sections.CallbacksGeneral: CallbacksGeneral config section.""" callback_tensorboard = None """sections.Tensorboard: Tensorboard config section.""" callback_early_stopping = None """sections.EarlyStopping: EarlyStopping config section.""" callback_reduced_learning_rate = None """sections.CallBackReducedLearningRate: CallBackReducedLearningRate config section.""" def __init__( self, raw_files: sections.RawFiles = None, data_build: sections.DataBuild = None, data_samples: sections.DataSamples = None, model_training: sections.ModelTraining = None, architecture: config_sections.BaseArchitectureConfigSection = None, model_reporting: sections.ModelReporting = None, callback_general: sections.CallbackGeneral = None, callback_tensorboard: sections.CallbackTensorboard = None, callback_early_stopping: sections.CallbackEarlyStopping = None, callback_reduced_learning_rate: sections.CallbackReducedLearningRate = None ) -> None: # Note: it's undesireable to have so many parameters passed to the __init__ method and have so much boilerplate # code, but I've chosen to write it this way because we can use Python typing and modern IDEs to autocomplete # all of the attributes and subattributes in downstream scripts. For example, "config.a" will autocomplete to # "config.architecture" and, more importantly, "config.architecture.w" will autocomplete to # "config.architecture.weighted". Without this autocomplete feature, the programmer is required to know the # names of individual options and due to the nature of scientific computing and the number of parameters that # can be configured, this becomes burdensome. self.raw_files = raw_files self.data_build = data_build self.data_samples = data_samples self.model_training = model_training self.architecture = architecture self.model_reporting = model_reporting self.callback_general = callback_general self.callback_tensorboard = callback_tensorboard self.callback_early_stopping = callback_early_stopping self.callback_reduced_learning_rate = callback_reduced_learning_rate
[docs] def get_config_as_dict(self) -> dict: """Get configuration options as a nested dictionary with delineated sections. Returns: Configuration options as a nested dictionary with delineated sections. """ config = OrderedDict() for config_section in sections.get_config_sections(): section_name = config_section.get_config_name_as_snake_case() populated_section = getattr(self, section_name) config[section_name] = populated_section.get_config_options_as_dict() if config_section is sections.ModelTraining: # Given ordered output, architecture options make the most sense after model training options config['architecture'] = self.architecture.get_config_options_as_dict() return config
[docs] def get_config_errors(self, include_sections: List[str] = None, exclude_sections: List[str] = None) -> list: """Get configuration option errors by checking the validity of each config section. Args: include_sections: Config sections that should be included. All config sections are included if None and exclude_sections is not specified. Cannot specify both include_sections and exclude_sections. exclude_sections: Config sections that should be excluded. All config sections are included if None and exclude_sections is not specified. Cannot specify both include_sections and exclude_sections. Returns: List of errors associated with the current configuration. """ assert not (include_sections and exclude_sections), \ 'Both include_sections and exclude_sections cannot be specified.' _logger.debug('Checking config sections for configuration issues') errors = list() config_sections = sections.get_config_sections() if include_sections: _logger.debug('Only checking config sections: {}'.format(', '.join(include_sections))) config_sections = [section for section in config_sections if section.get_config_name_as_snake_case() in include_sections] if exclude_sections: _logger.debug('Not checking config sections: {}'.format(', '.join(exclude_sections))) config_sections = [section for section in config_sections if section.get_config_name_as_snake_case() not in exclude_sections] for config_section in config_sections: section_name = config_section.get_config_name_as_snake_case() populated_section = getattr(self, section_name) errors.extend(populated_section.check_config_validity()) if config_section is sections.ModelTraining: errors.extend(self.architecture.check_config_validity()) _logger.debug('{} configuration issues found'.format(len(errors))) return errors
[docs] def get_human_readable_config_errors( self, include_sections: List[str] = None, exclude_sections: List[str] = None ) -> str: """Generates a human-readable string of configuration option errors. Args: include_sections: Config sections that should be included. All config sections are included if None and exclude_sections is not specified. Cannot specify both include_sections and exclude_sections. exclude_sections: Config sections that should be excluded. All config sections are included if None and exclude_sections is not specified. Cannot specify both include_sections and exclude_sections. Returns: Human-readable string of configuration option errors. """ errors = self.get_config_errors(include_sections=include_sections, exclude_sections=exclude_sections) if not errors: return '' return 'List of configuration section and option errors is as follows:\n' + '\n'.join(error for error in errors)
[docs]def create_config_from_file(filepath: str) -> Config: """Creates a Config object from a YAML file. Args: filepath: Filepath to existing YAML file. Returns: Config object with parsed YAML file attributes. """ assert os.path.exists(filepath), 'No config file found at {}'.format(filepath) _logger.debug('Loading config file from {}'.format(filepath)) with open(filepath) as file_: raw_config = yaml.safe_load(file_) return _create_config(raw_config, is_template=False)
[docs]def create_config_template(architecture_name: str, filepath: str = None) -> Config: """Creates a template version of a Config for a given architecture, with required and optional parameters highlighted, and default values for other parameters. Config is returned but can optionally be written to YAML file. Args: architecture_name: Name of available architecture. filepath: Filepath to which template YAML file is saved, if desired. Returns: Template version of a Config. """ _logger.debug('Creating config template for architecture {} at {}'.format(architecture_name, filepath)) config_options = {'model_training': {'architecture_name': architecture_name}} config = _create_config(config_options, is_template=True) if filepath is not None: save_config_to_file(config, filepath) return config
def _create_config(config_options: dict, is_template: bool) -> Config: config_copy = copy.deepcopy(config_options) # Use a copy because config options are popped from the dict # Populate config sections with the provided configuration options, tracking errors populated_sections = dict() for config_section in sections.get_config_sections(): section_name = config_section.get_config_name_as_snake_case() populated_section = config_section() populated_section.set_config_options(config_copy.get(section_name, dict()), is_template) populated_sections[section_name] = populated_section # Populate architecture options given architecture name architecture_name = populated_sections['model_training'].architecture_name architecture = config_sections.get_architecture_config_section(architecture_name) architecture.set_config_options(config_copy.get('architecture', dict()), is_template) populated_sections['architecture'] = architecture return Config(**populated_sections)
[docs]def save_config_to_file(config: Config, filepath: str, include_sections: List[str] = None) -> None: """Saves/serializes a Config object to a YAML file. Args: config: Config object. filepath: Filepath to which YAML file is saved. include_sections: Config sections that should be included. All config sections are included if None. Returns: None """ def _represent_dictionary_order(self, dict_data): # via https://stackoverflow.com/questions/31605131/dumping-a-dictionary-to-a-yaml-file-while-preserving-order return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items()) def _represent_list_inline(self, list_data): return self.represent_sequence('tag:yaml.org,2002:seq', list_data, flow_style=True) yaml.add_representer(OrderedDict, _represent_dictionary_order) yaml.add_representer(list, _represent_list_inline) config_out = config.get_config_as_dict() _logger.debug('Saving config file to {}'.format(filepath)) if include_sections: _logger.debug('Only saving config sections: {}'.format(', '.join(include_sections))) config_out = {section: config_out[section] for section in include_sections} with open(filepath, 'w') as file_: yaml.dump(config_out, file_, default_flow_style=False)
[docs]def get_config_differences(config_a: Config, config_b: Config) -> Dict: differing_items = dict() dict_a = config_a.get_config_as_dict() dict_b = config_b.get_config_as_dict() all_sections = set(list(dict_a.keys()) + list(dict_b.keys())) for section in all_sections: section_a = dict_a.get(section, dict()) section_b = dict_b.get(section, dict()) all_options = set(list(section_a.keys()) + list(section_b.keys())) for option in all_options: if section == 'model_training' and option == 'dir_out': continue value_a = section_a.get(option, None) value_b = section_b.get(option, None) if value_a != value_b: _logger.debug('Configs have different values for option {} in section {}: {} and {}'.format( option, section, value_a, value_b)) differing_items.setdefault(section, dict())[option] = (value_a, value_b) return differing_items