From 3b6a5eb473b7c8bcf44d2da2444cf42b1d18520b Mon Sep 17 00:00:00 2001 From: Michael Fenton Date: Tue, 25 Jul 2017 14:08:11 +0200 Subject: [PATCH] Updated grammar analyser to print out scientific numbers. Added new functions to utilities.fitness.math_functions for enabling printing of very large (i.e. long) scientific numbers. --- datasets/Dow/Test.csv | 0 datasets/Dow/Train.csv | 0 datasets/Keijzer6/Test.txt | 0 datasets/Keijzer6/Train.txt | 0 datasets/Paige1/Test.txt | 0 datasets/Paige1/Train.txt | 0 datasets/Vladislavleva4/Test.txt | 0 datasets/Vladislavleva4/Train.txt | 0 datasets/make_Banknote.sh | 0 grammars/letter.bnf | 0 grammars/moo/moo_zdt123.bnf | 0 grammars/supervised_learning/Dow.bnf | 0 grammars/supervised_learning/Keijzer6.bnf | 0 .../supervised_learning/Vladislavleva4.bnf | 0 src/algorithm/parameters.py | 60 ++-- src/algorithm/search_loop.py | 0 src/algorithm/step.py | 0 src/fitness/__init__.py | 0 src/fitness/evaluation.py | 0 .../binary_phenotype_to_float.py | 0 src/fitness/string_match.py | 0 .../supervised_learning/classification.py | 0 src/fitness/supervised_learning/regression.py | 0 .../supervised_learning.py | 13 +- src/operators/__init__.py | 0 src/operators/crossover.py | 0 src/operators/mutation.py | 0 src/operators/replacement.py | 0 src/operators/selection.py | 0 src/ponyge.py | 0 src/representation/__init__.py | 0 src/representation/grammar.py | 257 +++++++++++++++++- src/representation/individual.py | 0 src/representation/tree.py | 0 src/scripts/baselines.py | 0 src/scripts/grammar_analyser.py | 5 +- src/stats/stats.py | 20 ++ src/utilities/__init__.py | 0 src/utilities/fitness/error_metric.py | 0 src/utilities/fitness/evaluate.py | 26 ++ src/utilities/fitness/math_functions.py | 49 +++- src/utilities/stats/file_io.py | 22 +- 42 files changed, 394 insertions(+), 58 deletions(-) mode change 100755 => 100644 datasets/Dow/Test.csv mode change 100755 => 100644 datasets/Dow/Train.csv mode change 100755 => 100644 datasets/Keijzer6/Test.txt mode change 100755 => 100644 datasets/Keijzer6/Train.txt mode change 100755 => 100644 datasets/Paige1/Test.txt mode change 100755 => 100644 datasets/Paige1/Train.txt mode change 100755 => 100644 datasets/Vladislavleva4/Test.txt mode change 100755 => 100644 datasets/Vladislavleva4/Train.txt mode change 100755 => 100644 datasets/make_Banknote.sh mode change 100755 => 100644 grammars/letter.bnf mode change 100755 => 100644 grammars/moo/moo_zdt123.bnf mode change 100755 => 100644 grammars/supervised_learning/Dow.bnf mode change 100755 => 100644 grammars/supervised_learning/Keijzer6.bnf mode change 100755 => 100644 grammars/supervised_learning/Vladislavleva4.bnf mode change 100755 => 100644 src/algorithm/parameters.py mode change 100755 => 100644 src/algorithm/search_loop.py mode change 100755 => 100644 src/algorithm/step.py mode change 100755 => 100644 src/fitness/__init__.py mode change 100755 => 100644 src/fitness/evaluation.py mode change 100755 => 100644 src/fitness/multi_objective/binary_phenotype_to_float.py mode change 100755 => 100644 src/fitness/string_match.py mode change 100755 => 100644 src/fitness/supervised_learning/classification.py mode change 100755 => 100644 src/fitness/supervised_learning/regression.py mode change 100755 => 100644 src/operators/__init__.py mode change 100755 => 100644 src/operators/crossover.py mode change 100755 => 100644 src/operators/mutation.py mode change 100755 => 100644 src/operators/replacement.py mode change 100755 => 100644 src/operators/selection.py mode change 100755 => 100644 src/ponyge.py mode change 100755 => 100644 src/representation/__init__.py mode change 100755 => 100644 src/representation/grammar.py mode change 100755 => 100644 src/representation/individual.py mode change 100755 => 100644 src/representation/tree.py mode change 100755 => 100644 src/scripts/baselines.py mode change 100755 => 100644 src/utilities/__init__.py mode change 100755 => 100644 src/utilities/fitness/error_metric.py create mode 100644 src/utilities/fitness/evaluate.py diff --git a/datasets/Dow/Test.csv b/datasets/Dow/Test.csv old mode 100755 new mode 100644 diff --git a/datasets/Dow/Train.csv b/datasets/Dow/Train.csv old mode 100755 new mode 100644 diff --git a/datasets/Keijzer6/Test.txt b/datasets/Keijzer6/Test.txt old mode 100755 new mode 100644 diff --git a/datasets/Keijzer6/Train.txt b/datasets/Keijzer6/Train.txt old mode 100755 new mode 100644 diff --git a/datasets/Paige1/Test.txt b/datasets/Paige1/Test.txt old mode 100755 new mode 100644 diff --git a/datasets/Paige1/Train.txt b/datasets/Paige1/Train.txt old mode 100755 new mode 100644 diff --git a/datasets/Vladislavleva4/Test.txt b/datasets/Vladislavleva4/Test.txt old mode 100755 new mode 100644 diff --git a/datasets/Vladislavleva4/Train.txt b/datasets/Vladislavleva4/Train.txt old mode 100755 new mode 100644 diff --git a/datasets/make_Banknote.sh b/datasets/make_Banknote.sh old mode 100755 new mode 100644 diff --git a/grammars/letter.bnf b/grammars/letter.bnf old mode 100755 new mode 100644 diff --git a/grammars/moo/moo_zdt123.bnf b/grammars/moo/moo_zdt123.bnf old mode 100755 new mode 100644 diff --git a/grammars/supervised_learning/Dow.bnf b/grammars/supervised_learning/Dow.bnf old mode 100755 new mode 100644 diff --git a/grammars/supervised_learning/Keijzer6.bnf b/grammars/supervised_learning/Keijzer6.bnf old mode 100755 new mode 100644 diff --git a/grammars/supervised_learning/Vladislavleva4.bnf b/grammars/supervised_learning/Vladislavleva4.bnf old mode 100755 new mode 100644 diff --git a/src/algorithm/parameters.py b/src/algorithm/parameters.py old mode 100755 new mode 100644 index 37b5c620..b217263a --- a/src/algorithm/parameters.py +++ b/src/algorithm/parameters.py @@ -34,7 +34,7 @@ # Set grammar file 'GRAMMAR_FILE': "supervised_learning/Vladislavleva4.bnf", - + # Set the number of depths permutations are calculated for # (starting from the minimum path of the grammar). # Mainly for use with the grammar analyser script. @@ -81,7 +81,7 @@ # Boolean flag for selecting whether or not mutation is confined to # within the used portion of the genome. Default set to True. 'WITHIN_USED': True, - + # CROSSOVER # Set crossover operator. 'CROSSOVER': "operators.crossover.variable_onepoint", @@ -126,7 +126,7 @@ # Save a plot of the evolution of the best fitness result for each # generation. 'SAVE_PLOTS': True, - + # MULTIPROCESSING # Multi-core parallel processing of phenotype evaluations. 'MULTICORE': False, @@ -144,7 +144,7 @@ # full file path to the desired state file. Note that state files have # no file type. 'LOAD_STATE': None, - + # SEEDING # Specify a list of PonyGE2 individuals with which to seed the initial # population. @@ -158,7 +158,7 @@ # Set Random Seed for all Random Number Generators to be used by # PonyGE2, including the standard Python RNG and the NumPy RNG. 'RANDOM_SEED': None, - + # CACHING # The cache tracks unique individuals across evolution by saving a # string of each phenotype in a big list of all phenotypes. Saves all @@ -175,11 +175,11 @@ # with mutated versions of the original individual. Hopefully this will # encourage diversity in the population. 'MUTATE_DUPLICATES': False, - + # OTHER # Set machine name (useful for doing multiple runs) 'MACHINE': machine_name - + } @@ -204,19 +204,19 @@ def load_params(file_name): content = parameters.readlines() for line in content: - + # Parameters files are parsed by finding the first instance of a # colon. split = line.find(":") - + # Everything to the left of the colon is the parameter key, # everything to the right is the parameter value. key, value = line[:split], line[split+1:].strip() - + # Evaluate parameters. try: value = eval(value) - + except: # We can't evaluate, leave value as a string. pass @@ -236,16 +236,16 @@ def set_params(command_line_args, create_files=True): :param command_line_args: Command line arguments specified by the user. :return: Nothing. """ - + print("Importing various things ") from utilities.algorithm.initialise_run import initialise_run_params from utilities.algorithm.initialise_run import set_param_imports from utilities.fitness.math_functions import return_one_percent - from representation import grammar - import utilities.algorithm.command_line_parser as parser + from utilities.algorithm.command_line_parser import parse_cmd_args from utilities.stats import trackers, clean_stats + from representation import grammar + print("Parsing command line args") + cmd_args, unknown = parse_cmd_args(command_line_args) - cmd_args, unknown = parser.parse_cmd_args(command_line_args) - if unknown: # We currently do not parse unknown parameters. Raise error. s = "algorithm.parameters.set_params\nError: " \ @@ -253,7 +253,7 @@ def set_params(command_line_args, create_files=True): "add code to recognise this parameter, or use " \ "--extra_parameters" % str(unknown) raise Exception(s) - + print("Loading params dict") # LOAD PARAMETERS FILE # NOTE that the parameters file overwrites all previously set parameters. if 'PARAMETERS' in cmd_args: @@ -282,25 +282,25 @@ def set_params(command_line_args, create_files=True): # Set steady state step and replacement. params['STEP'] = "steady_state_step" params['GENERATION_SIZE'] = 2 - + else: # Elite size is set to either 1 or 1% of the population size, # whichever is bigger if no elite size is previously set. if params['ELITE_SIZE'] is None: params['ELITE_SIZE'] = return_one_percent(1, params[ 'POPULATION_SIZE']) - + # Set the size of a generation params['GENERATION_SIZE'] = params['POPULATION_SIZE'] - \ params['ELITE_SIZE'] - + print("Initialising run lists and folders") # Initialise run lists and folders before we set imports.r initialise_run_params(create_files) - + print("Setting param imports") # Set correct param imports for specified function options, including # error metrics and fitness functions. set_param_imports() - + print("Cleaning stats") # Clean the stats dict to remove unused stats. clean_stats.clean_stats() @@ -310,10 +310,10 @@ def set_params(command_line_args, create_files=True): params['GENOME_OPERATIONS'] = True else: params['GENOME_OPERATIONS'] = False - + # Ensure correct operators are used if multiple fitness functions used. if hasattr(params['FITNESS_FUNCTION'], 'multi_objective'): - + # Check that multi-objective compatible selection is specified. if not hasattr(params['SELECTION'], "multi_objective"): s = "algorithm.parameters.set_params\n" \ @@ -321,9 +321,9 @@ def set_params(command_line_args, create_files=True): "operator not specified for use with multiple " \ "fitness functions." raise Exception(s) - + if not hasattr(params['REPLACEMENT'], "multi_objective"): - + # Check that multi-objective compatible replacement is # specified. if not hasattr(params['REPLACEMENT'], "multi_objective"): @@ -332,17 +332,17 @@ def set_params(command_line_args, create_files=True): "operator not specified for use with multiple " \ "fitness functions." raise Exception(s) - + print("Parsing grammar") # Parse grammar file and set grammar class. params['BNF_GRAMMAR'] = grammar.Grammar(path.join("..", "grammars", params['GRAMMAR_FILE'])) # Population loading for seeding runs (if specified) if params['TARGET_SEED_FOLDER']: - + # Import population loading function. from operators.initialisation import load_population - + print("Loading population") # A target folder containing seed individuals has been given. params['SEED_INDIVIDUALS'] = load_population( params['TARGET_SEED_FOLDER']) @@ -352,6 +352,6 @@ def set_params(command_line_args, create_files=True): # Import GE LR Parser. from scripts import GE_LR_parser - + print("Parsing seed individual") # Parse seed individual and store in params. params['SEED_INDIVIDUALS'] = [GE_LR_parser.main()] diff --git a/src/algorithm/search_loop.py b/src/algorithm/search_loop.py old mode 100755 new mode 100644 diff --git a/src/algorithm/step.py b/src/algorithm/step.py old mode 100755 new mode 100644 diff --git a/src/fitness/__init__.py b/src/fitness/__init__.py old mode 100755 new mode 100644 diff --git a/src/fitness/evaluation.py b/src/fitness/evaluation.py old mode 100755 new mode 100644 diff --git a/src/fitness/multi_objective/binary_phenotype_to_float.py b/src/fitness/multi_objective/binary_phenotype_to_float.py old mode 100755 new mode 100644 diff --git a/src/fitness/string_match.py b/src/fitness/string_match.py old mode 100755 new mode 100644 diff --git a/src/fitness/supervised_learning/classification.py b/src/fitness/supervised_learning/classification.py old mode 100755 new mode 100644 diff --git a/src/fitness/supervised_learning/regression.py b/src/fitness/supervised_learning/regression.py old mode 100755 new mode 100644 diff --git a/src/fitness/supervised_learning/supervised_learning.py b/src/fitness/supervised_learning/supervised_learning.py index 7243a501..ba6662f9 100644 --- a/src/fitness/supervised_learning/supervised_learning.py +++ b/src/fitness/supervised_learning/supervised_learning.py @@ -5,6 +5,7 @@ from utilities.fitness.get_data import get_data from utilities.fitness.math_functions import * from utilities.fitness.optimize_constants import optimize_constants +from utilities.fitness.evaluate import eval_or_exec from fitness.base_ff_classes.base_ff import base_ff @@ -81,8 +82,12 @@ def evaluate(self, ind, **kwargs): # this string has been created during training phen = ind.phenotype_consec_consts c = ind.opt_consts + + # Combine local and global dictionaries. + locals().update(globals()) + # phen will refer to x (ie test_in), and possibly to c - yhat = eval(phen) + yhat = eval_or_exec(phen, locals()) assert np.isrealobj(yhat) # let's always call the error function with the @@ -90,8 +95,12 @@ def evaluate(self, ind, **kwargs): return params['ERROR_METRIC'](y, yhat) else: + + # Combine local and global dictinoaries. + locals().update(globals()) + # phenotype won't refer to C - yhat = eval(ind.phenotype) + yhat = eval_or_exec(ind.phenotype, locals()) assert np.isrealobj(yhat) # let's always call the error function with the true diff --git a/src/operators/__init__.py b/src/operators/__init__.py old mode 100755 new mode 100644 diff --git a/src/operators/crossover.py b/src/operators/crossover.py old mode 100755 new mode 100644 diff --git a/src/operators/mutation.py b/src/operators/mutation.py old mode 100755 new mode 100644 diff --git a/src/operators/replacement.py b/src/operators/replacement.py old mode 100755 new mode 100644 diff --git a/src/operators/selection.py b/src/operators/selection.py old mode 100755 new mode 100644 diff --git a/src/ponyge.py b/src/ponyge.py old mode 100755 new mode 100644 diff --git a/src/representation/__init__.py b/src/representation/__init__.py old mode 100755 new mode 100644 diff --git a/src/representation/grammar.py b/src/representation/grammar.py old mode 100755 new mode 100644 index 06028809..09fea2bf --- a/src/representation/grammar.py +++ b/src/representation/grammar.py @@ -1,6 +1,7 @@ from math import floor from re import match, finditer, DOTALL, MULTILINE from sys import maxsize +from copy import deepcopy from algorithm.parameters import params @@ -34,9 +35,10 @@ def __init__(self, file_name): self.rules, self.permutations = {}, {} # Initialise dicts for terminals and non terminals, set params. - self.non_terminals, self.terminals = {}, {} + self.non_terminals, self.terminals, self.NT_parents = {}, {}, {} self.start_rule, self.codon_size = None, params['CODON_SIZE'] self.min_path, self.max_arity, self.min_ramp = None, None, None + self.ADFs, self.n_ADFs = False, 0 # Set regular expressions for parsing BNF grammar. self.ruleregex = '(?P<\S+>)\s*::=\s*(?P(?:(?=\#)\#[^\r\n]*|(?!<\S+>\s*::=).+?)+)' @@ -47,12 +49,16 @@ def __init__(self, file_name): # non-terminals. self.read_bnf_file(file_name) - # Check the minimum depths of all non-terminals in the grammar. - self.check_depths() - # Check which non-terminals are recursive. self.check_recursion(self.start_rule["symbol"], []) + if self.ADFs: + # Expand the grammar to accommodate ADFs. + self.inject_adfs() + + # Check the minimum depths of all non-terminals in the grammar. + self.check_depths() + # Set the minimum path and maximum arity of the grammar. self.set_arity() @@ -111,6 +117,10 @@ def read_bnf_file(self, file_name): 'recursive': True, 'b_factor': 0} + # Create and add a new NT parent instance + if rule.group('rulename') not in self.NT_parents: + self.NT_parents[rule.group('rulename')] = [] + # Initialise empty list of all production choices for this # rule. tmp_productions = [] @@ -168,6 +178,49 @@ def read_bnf_file(self, file_name): # (but later productions in same rule will work) continue + # special case: GE_ADFS:n will be transformed to + # productions adfs[0] | adfs[1] | ... | adfs[n-1] + ADFS_regex = r'GE_ADFS:(?P\w*)' + m = match(ADFS_regex, p.group('production')) + if m: + # ADFs have been specified in the grammar. + self.ADFs = True + + try: + # assume it's just an int + n = int(m.group('range')) + except (ValueError, AttributeError): + raise ValueError("Bad use of GE_ADFS: " + + m.group() + + "\nPlease specify an integer.") + + # Set number of desired ADFs. + self.n_ADFs = n + + for i in range(n): + # add a terminal symbol + tmp_production, terminalparts = [], None + symbol = { + "symbol": "adfs["+str(i)+"]", + "type": "T", + "min_steps": 0, + "recursive": False} + tmp_production.append(symbol) + if str(i) not in self.terminals: + self.terminals[str(i)] = \ + [rule.group('rulename')] + elif rule.group('rulename') not in \ + self.terminals[str(i)]: + self.terminals[str(i)].append( + rule.group('rulename')) + tmp_productions.append({"choice": tmp_production, + "recursive": False, + "NT_kids": False, + "ADF": True}) + # don't try to process this production further + # (but later productions in same rule will work) + continue + for sub_p in finditer(self.productionpartsregex, p.group('production').strip()): # Split production into terminal and non terminal @@ -195,6 +248,16 @@ def read_bnf_file(self, file_name): {"symbol": sub_p.group('subrule'), "type": "NT"}) + # Set NT parents + if sub_p.group('subrule') not in self.NT_parents: + self.NT_parents[sub_p.group('subrule')] = \ + [rule.group('rulename')] + + elif rule.group('rulename') not in \ + self.NT_parents[sub_p.group('subrule')]: + self.NT_parents[sub_p.group('subrule')].append( + rule.group('rulename')) + else: # Unescape special characters (\n, \t etc.) if terminalparts is None: @@ -626,6 +689,192 @@ def find_concatination_NTs(self): if conc not in self.concat_NTs[NT]: self.concat_NTs[NT].append(conc) + def inject_adfs(self): + """ + Inject a given number of ADFs into a fully parsed grammar. + + :return: Nothing. + """ + + # Initialise a mapping from original NTs to new ADF NTs. + ADF_mapping = {} + + # Create a copy of all NTs for use by ADFs + for NT in sorted(list(self.non_terminals)): + # Create copy of non_terminals dict as we will be changing it + # during iteration. + + # Create new NT id + new_id = '")+'>' + + # Check for start node. + if NT == self.start_rule['symbol']: + self.ADF_start_rule = new_id + + # Create a copy of this non-terminal for the adfs + new_NT = {'id': new_id, + 'min_steps': maxsize, + 'expanded': False, + 'recursive': True, + 'b_factor': 0} + + # Create a mapping from the old NT to the new ADF NT. + ADF_mapping[NT] = new_id + + # Add new copy to the ADF non terminals list. + self.non_terminals[new_id] = new_NT + + # Create new ADF production rules as copies of the original rules. + self.rules[new_id] = deepcopy(self.rules[NT]) + + # Iterate over everything once more now that everything is defined. + for NT in sorted(ADF_mapping): + + # Create copy of NT parents for new ADF NT. + self.NT_parents[ADF_mapping[NT]] = [ADF_mapping[i] for i in + self.NT_parents[NT]] + + # Change all NT production choices for the current rule. + choices = self.rules[ADF_mapping[NT]]['choices'] + + for i, choice in enumerate(list(choices)): + # Iterate over all production choices. + + # Check each symbol and change NTs to their respective ADF + # NTs. + for sym in [s for s in choice['choice'] if s['type'] == 'NT']: + sym['symbol'] = ADF_mapping[sym['symbol']] + + def set_ADF_attribute(NT_parent, ADF_child): + """ + Given a parent NT, and a child NT that contains only ADF + production choices, set an "ADF" attribute to the relevant + production choice of the parent NT. + + :param NT_parent: A parent NT. + :param ADF_child: A NT which is a child of the parent NT and + which contains only ADFs. + :return: Nothing. + """ + + # Get production choices of parent NT. + choices = self.rules[NT_parent]['choices'] + + # Set ADF attribute to correct choice. + for i, choice in enumerate(choices): + if any([sym['symbol'] == ADF_child + for sym in choice['choice']]): + # This choice contains the ADF child production. + # Set attribute. + choice['ADF'] = True + + # Recurse through ADF recursive function with parent NT. + recurse_ADF_nts(NT_parent) + + def recurse_ADF_nts(NT): + """ + Recursive function for removing ADF production choices and rules + from the ADF portion of the grammar. + + :param NT: A non-terminal to check for ADF-specific production + choices. + :return: Nothing. + """ + + # Remove ADF production choices from rule if they exist. + self.rules[NT]['choices'] = \ + [c for c in self.rules[NT]['choices'] if "ADF" not in c] + + # Re-set number of production choices accordingly. + self.rules[NT]['no_choices'] = len(self.rules[NT]['choices']) + + if self.rules[NT]['no_choices'] == 0: + # No remaining production choices in this rule + + # Remove production rule entirely. + del(self.rules[NT]) + + # Find parent NT rules that contain this NT as a choice. + for parent in self.NT_parents[NT]: + set_ADF_attribute(parent, NT) + + # Remove NT from NT_parents. + del(self.NT_parents[NT]) + + # Remove NT from self.non_terminals. + del(self.non_terminals[NT]) + + # Remove ADF production choices from ADF parts of grammar. + for NT in sorted(ADF_mapping): + + # Call the recursive function. + recurse_ADF_nts(ADF_mapping[NT]) + + # Generate new production choice for the new start rule. + new_rule = {"choices": [{"choice": [{"symbol": "adfs = [", + "type": "T", + "min_steps": 0, + "recursive": False}, + {"symbol": self.start_rule[ + 'symbol'], + "type": "NT"}], + "recursive": False, + "NT_kids": False}], + "no_choices": 1} + + if self.python_mode: + # Grammar will generate code anyway. No need to define variable. + exp = {"symbol": "]{::}", "type": "T", "min_steps": 0, + "recursive": False} + + else: + # The grammar will generate an expression. Need to define a + # variable "XXXeval_or_exec_outputXXX" which will be used to + # capture output from the generated program. + exp = {"symbol": "]{::}XXXeval_or_exec_outputXXX = ", + "type": "T", "min_steps": 0, "recursive": False} + + # # Generate closing terminal node. + # close = {"symbol": "'", "type": "T", "min_steps": 0, + # "recursive": False} + # + # # Append closing T to production choice. + # new_rule['choices'][0]['choice'].append(close) + + new_rule['choices'][0]['choice'].insert(1, exp) + + # Pre-load ADF non-terminals and separator terminals. + ADF_NT = {"symbol": self.ADF_start_rule, "type": "NT"} + sep_T = {"symbol": ", ", "type": "T", "min_steps": 0, + "recursive": False} + + # Dynamically add the specified number of ADFs. + for i in range(self.n_ADFs): + new_rule['choices'][0]['choice'].insert(i + 1, ADF_NT) + + # Dynamically add comma separator terminals for number of ADFs. + for i in range(self.n_ADFs - 1): + new_rule['choices'][0]['choice'].insert(2*(i + 1), sep_T) + + # Add new rule to self.rules + self.rules['<_adf_start>'] = new_rule + + # Create new start rule. + self.start_rule = {'symbol': '<_adf_start>', 'type': 'NT'} + + # Create a new NT for the new start rule. + new_NT = {'id': '<_adf_start>', + 'min_steps': maxsize, + 'expanded': False, + 'recursive': True, + 'b_factor': 0} + + # Add new NT to self.non_terminals + self.non_terminals['<_adf_start>'] = new_NT + + # Convert grammar into python grammar with "python_mode" flag. + self.python_mode = True + def __str__(self): return "%s %s %s %s" % (self.terminals, self.non_terminals, self.rules, self.start_rule) diff --git a/src/representation/individual.py b/src/representation/individual.py old mode 100755 new mode 100644 diff --git a/src/representation/tree.py b/src/representation/tree.py old mode 100755 new mode 100644 diff --git a/src/scripts/baselines.py b/src/scripts/baselines.py old mode 100755 new mode 100644 diff --git a/src/scripts/grammar_analyser.py b/src/scripts/grammar_analyser.py index a5062d8e..21c7e6cc 100644 --- a/src/scripts/grammar_analyser.py +++ b/src/scripts/grammar_analyser.py @@ -8,6 +8,7 @@ from algorithm.parameters import params import utilities.algorithm.command_line_parser as parser from representation.grammar import Grammar +from utilities.fitness.math_functions import sci_notation import sys import os @@ -38,8 +39,8 @@ def main(command_line_args): for depth in grammar.permutations: - print(" Depth: %d \t Number of unique solutions: %d" % - (depth, grammar.permutations[depth])) + print(" Depth: %d \t Number of unique solutions: %s" % + (depth, sci_notation(grammar.permutations[depth]))) if __name__ == "__main__": diff --git a/src/stats/stats.py b/src/stats/stats.py index 440343a0..a4507bcf 100644 --- a/src/stats/stats.py +++ b/src/stats/stats.py @@ -193,8 +193,28 @@ def get_moo_stats(individuals, end): if not end: trackers.first_pareto_list.append(all_arr) + # Append empty array to best fitness list. + trackers.best_fitness_list.append([]) + + # Get best fitness for each objective. + for o in range(params['FITNESS_FUNCTION'].num_obj): + fits = sorted(trackers.best_ever, key=lambda item: + params['FITNESS_FUNCTION'].value(item.fitness, o)) + + # Append best fitness to trackers list. + trackers.best_fitness_list[-1].append(fits[0].fitness[o]) + if params['VERBOSE'] or end: + # Plot best fitness for each objective. + for o in range(params['FITNESS_FUNCTION'].num_obj): + to_plot = [i[o] for i in trackers.best_fitness_list] + + # Plot fitness data for objective o. + save_plot_from_data(to_plot, + params['FITNESS_FUNCTION']. + fitness_functions[o].__class__.__name__) + # TODO: PonyGE2 can currently only plot moo problems with 2 objectives. # Check that the number of fitness objectives is not greater than 2 if params['FITNESS_FUNCTION'].num_obj > 2: diff --git a/src/utilities/__init__.py b/src/utilities/__init__.py old mode 100755 new mode 100644 diff --git a/src/utilities/fitness/error_metric.py b/src/utilities/fitness/error_metric.py old mode 100755 new mode 100644 diff --git a/src/utilities/fitness/evaluate.py b/src/utilities/fitness/evaluate.py new file mode 100644 index 00000000..954c75d5 --- /dev/null +++ b/src/utilities/fitness/evaluate.py @@ -0,0 +1,26 @@ +def eval_or_exec(phenotype, dictionary): + """ + Use eval or exec to interpret a given phenotype string. A limitation in + Python is the distinction between eval and exec. The former can only be + used to return the value of a simple expression (not a statement) and the + latter does not return anything. + + :param phenotype: A phenotype string. + :return: The output of the evaluated phenotype string. + """ + + try: + locals().update(dictionary) + retval = eval(phenotype) + + except SyntaxError: + # SyntaxError will be thrown by eval() if s is compound, + # ie not a simple expression, eg if it contains function + # definitions, multiple lines, etc. Then we must use + # exec(). Then we assume that s will define a variable + # called "XXXeval_or_exec_outputXXX", and we'll use that. + exec(phenotype, dictionary) + retval = dictionary["XXXeval_or_exec_outputXXX"] + + return retval + diff --git a/src/utilities/fitness/math_functions.py b/src/utilities/fitness/math_functions.py index 1d6fda21..e0be7e3e 100644 --- a/src/utilities/fitness/math_functions.py +++ b/src/utilities/fitness/math_functions.py @@ -71,7 +71,7 @@ def pdiv(x, y): this always evaluates x / y before running np.where, so that will raise a 'divide' error (in Numpy's terminology), which we ignore using a context manager. - + In some instances, Numpy can raise a FloatingPointError. These are ignored with 'invalid = ignore'. @@ -180,7 +180,7 @@ def percentile(sorted_list, p): :param p: The percetile :return: The element corresponding to the percentile """ - + return sorted_list[ceil(len(sorted_list) * p / 100) - 1] @@ -195,25 +195,56 @@ def binary_phen_to_float(phen, n_codon, min_value, max_value): :param max_value: Maximum value for a gene :return: A list os float values, representing the chromosome """ - + i, count, chromosome = 0, 0, [] - + while i < len(phen): # Get the current gene from the phenotype string. gene = phen[i:(i + n_codon)] - + # Convert the bit string in gene to an float/int gene_i = int(gene, 2) gene_f = float(gene_i) / (2 ** n_codon - 1) - + # Define the variation for the gene delta = max_value[count] - min_value[count] - + # Append the float value to the chromosome list chromosome.append(gene_f * delta + min_value[count]) - + # Increment the index and count. i = i + n_codon count += 1 - + return chromosome + + +def ilog(n, base): + """ + Find the integer log of n with respect to the base. + + >>> import math + >>> for base in range(2, 16 + 1): + ... for n in range(1, 1000): + ... assert ilog(n, base) == int(math.log(n, base) + 1e-10), '%s %s' % (n, base) + """ + count = 0 + while n >= base: + count += 1 + n //= base + return count + + +def sci_notation(n, prec=3): + """ + Represent n in scientific notation, with the specified precision. + + >>> sci_notation(1234 * 10**1000) + '1.234e+1003' + >>> sci_notation(10**1000 // 2, prec=1) + '5.0e+999' + """ + base = 10 + exponent = ilog(n, base) + mantissa = n / base**exponent + return '{0:.{1}f}e{2:+d}'.format(mantissa, prec, exponent) diff --git a/src/utilities/stats/file_io.py b/src/utilities/stats/file_io.py index bcd99b8b..d4438bba 100644 --- a/src/utilities/stats/file_io.py +++ b/src/utilities/stats/file_io.py @@ -79,10 +79,10 @@ def save_best_ind_to_file(stats, ind, end=False, name="best"): savefile.close() -def save_first_front_to_file(stats, end=False, name="first_front"): +def save_first_front_to_file(stats, end=False, name="first"): """ Saves all individuals in the first front to individual files in a folder. - + :param stats: The stats.stats.stats dictionary. :param end: A boolean flag indicating whether or not the evolutionary process has finished. @@ -94,24 +94,24 @@ def save_first_front_to_file(stats, end=False, name="first_front"): orig_file_path = copy(params['FILE_PATH']) # Define the new file path. - params['FILE_PATH'] = path.join(orig_file_path, str(name)) - + params['FILE_PATH'] = path.join(orig_file_path, str(name)+"_front") + # Check if the front folder exists already - if path.isdir(params['FILE_PATH']): - + if path.exists(params['FILE_PATH']): + # Remove previous files. rmtree(params['FILE_PATH']) - + # Create front folder. makedirs(params['FILE_PATH']) - + for i, ind in enumerate(trackers.best_ever): # Save each individual in the first front to file. save_best_ind_to_file(stats, ind, end, name=str(i)) - + # Re-set the file path. params['FILE_PATH'] = copy(orig_file_path) - + def generate_folders_and_files(): """ @@ -165,7 +165,7 @@ def save_params_to_file(): col_width = max(len(param) for param in params.keys()) for param in sorted(params.keys()): - + # Create whitespace buffer for pretty printing/saving. spaces = [" " for _ in range(col_width - len(param))] savefile.write(str(param) + ": " + "".join(spaces) +