Skip to content

Commit

Permalink
GE LR Parser was actually parsing the target string twice, once in al…
Browse files Browse the repository at this point in the history
…gorithm.parameters.set_params, and once in GE_LR_Parser. Fixed so that it only does the job once.
  • Loading branch information
mikefenton committed Jul 25, 2017
1 parent 14155b2 commit ab47b56
Showing 1 changed file with 57 additions and 60 deletions.
117 changes: 57 additions & 60 deletions src/scripts/GE_LR_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ def parse_terminals(target):
"""
Given a target string, build up a list of terminals which match certain
portions of the target string.
:return: A list of terminals in order of appearance in the target string.
"""

if params['VERBOSE']:
print("Target:\n", target)

# Pre-load all terminals and non-terminal rules from the grammar.
terms, rules = params['BNF_GRAMMAR'].terminals, params['BNF_GRAMMAR'].rules

Expand All @@ -35,11 +35,11 @@ def parse_terminals(target):
# which the output matches, along with the root node of the subtree. The
# value is the subtree itself.
trackers.snippets = {}

# Initialise dict for deleted snippets, to ensure they aren't generated
# again.
trackers.deleted_snippets = []

for T in sorted(terms.keys()):
# Iterate over all Terminals.

Expand All @@ -53,12 +53,12 @@ def parse_terminals(target):
index += len(T)
else:
break

for idx in occurrances:
# Check each occurrence of this terminal in the target string.

for NT in terms[T]:

if any([[T] == i for i in [[sym['symbol'] for sym in
choice['choice']] for choice in
rules[NT]['choices']]]):
Expand All @@ -67,29 +67,29 @@ def parse_terminals(target):

# Generate a key for the snippets repository.
key = " ".join([str([idx, idx+len(T)]), NT])

# Get index of production choice.
index = [[sym['symbol'] for sym in choice['choice']] for
choice in rules[NT]['choices']].index([T])

# Get production choice.
choice = rules[NT]['choices'][index]['choice']

# Generate a tree for this choice.
parent = Tree(NT, None)

# Generate a codon for this choice.
parent.codon = generate_codon(NT, choice)

# Set the snippet key for the parent.
parent.snippet = key

# Create child for terminal.
child = Tree(T, parent)

# Add child to parent.
parent.children.append(child)

# Add snippet to snippets repository.
trackers.snippets[key] = parent

Expand All @@ -100,34 +100,34 @@ def reduce(solution):
iteratively combines and reduces subtrees to generate larger matching
subtrees. This process continues until the list of matching subtrees has
been completely reduced into a target string.
:param solution: A list of all snippets (i.e. matching subtrees found in
the target string.
:return: Nothing.
"""

# Find all non-terminals in the grammar that can be used to concatenate
# subtrees to new/larger subtrees.
reduce_NTs = params['BNF_GRAMMAR'].concat_NTs

# Pre-load the target string.
target = params['REVERSE_MAPPING_TARGET']

for idx, snippet_info in enumerate(solution):
# Get current snippet.
snippet = snippet_info[2]

# Find current snippet info.
NT = snippet_info[1]

# Get indexes of the current snippet
indexes = snippet_info[0]
start, end = indexes[0], indexes[1]

# Find if the snippet root (NT) exists anywhere in the
# reduction NTs.
if NT in reduce_NTs:

for reduce in reduce_NTs[NT]:
# Now we're searching for a specific subset of keys in the
# snippets dictionary.
Expand All @@ -138,7 +138,7 @@ def reduce(solution):
if len(NTs) == 1:
# This choice leads directly to the parent, check if parent
# snippet already exists.

# Child is current snippet.
child = [[snippet, trackers.snippets[snippet]]]

Expand All @@ -148,11 +148,11 @@ def reduce(solution):

# Create a new node for the solution list.
new_entry = [indexes, reduce[1], key]

# Insert the current node into the solution.
if new_entry not in solution:
solution.insert(idx + 1, new_entry)

else:
# Find the index of the snippet root in the current
# reduction production choice.
Expand All @@ -167,49 +167,49 @@ def reduce(solution):
# The current snippet is at the start of the
# reduction attempt.
pre, aft = None, end

elif start == 0 and loc != 0:
# The current snippet is at the start of the target
# string, but we are trying to reduce_trees it with
# something before it.
break

elif end == len(params['TARGET']) and loc != \
NT_locs[-1]:
# The current snippet is at the end of the target
# string, but we are trying to reduce_trees it with
# something after it.
break

elif loc == len(NTs):
# The current snippet is at the end of the
# reduction attempt.
pre, aft = start, None

else:
# The current snippet is in the middle of the
# reduction attempt.
pre, aft = start, end

alt_cs = list(range(len(NTs)))

# Initialise a list of children to be reduced.
children = [[] for _ in range(len(NTs))]

# Set original snippet into children.
children[loc] = [snippet, trackers.snippets[snippet]]

curr_idx = solution.index(snippet_info)

# Step 1: reduce everything before the current loc.
for item in reversed(alt_cs[:loc]):

if NTs[item][1] == "T":
# This is a terminal, decrement by length of T.

# Check output of target string.
check = target[pre-len(NTs[item][0]):pre]

if check == NTs[item][0]:
# We have a match.

Expand All @@ -221,21 +221,21 @@ def reduce(solution):

# Add to children.
children[item] = [key, T_tree]

# Decrement target string index.
pre -= len(NTs[item][0])

else:
# No match.
break

else:
# This is a NT. Check solution list for
# matching node.
available = [sol for sol in solution[:curr_idx]
if sol[1] == NTs[item][0] and
sol[0][1] == pre]

for check in available:
# We have a match.

Expand All @@ -250,16 +250,16 @@ def reduce(solution):
pre -= child_len[1] - child_len[0]

break

# Step 2: reduce everything after the loc.
for i, item in enumerate(alt_cs[loc+1:]):

if NTs[item][1] == "T":
# This is a terminal, decrement by length of T.

# Check output of target string.
check = target[aft: aft + len(NTs[item][0])]

if check == NTs[item][0]:
# We have a match.

Expand All @@ -274,7 +274,7 @@ def reduce(solution):

# Increment target string index.
aft += len(NTs[item][0])

else:
# No match.
break
Expand All @@ -283,36 +283,36 @@ def reduce(solution):
# We haven't looked ahead in the string,
# we can't add things we don't know yet.
break

if all([child != [] for child in children]):
# We have expanded all children and can collapse
# a node.

key, pre, aft = generate_key_and_check(pre, aft,
reduce,
children)

# Create a new node for the solution list.
new_entry = [[pre, aft], reduce[1], key]

# Add the new reduced entry to the solution.
if new_entry not in solution:
solution.insert(idx + 1, new_entry)


def parse_target_string():
"""
Takes a list of terminal nodes and iteratively reduces that list until
the solution has been found.
:return: The complete parsed solution in the form of a GE individual.
"""

# Sort snippets keys to generate the initial solution list of terminals.
solution = sorted([[get_num_from_str(snippet),
get_NT_from_str(snippet),
snippet] for snippet in trackers.snippets.keys()])

# Perform reduction on the solution list.
reduce(solution)

Expand All @@ -325,16 +325,16 @@ def parse_target_string():
def main():
"""
Run all functions to parse a target string into a GE individual.
:return: A GE individual.
"""

# Ensure there is a target to parse.
if not params['REVERSE_MAPPING_TARGET']:
s = "scripts.GE_LR_Parser.main\n" \
"Error: No target string specified for parsing."
raise Exception(s)

# Parse the terminals in the target string.
parse_terminals(params['REVERSE_MAPPING_TARGET'])

Expand All @@ -343,17 +343,14 @@ def main():

# Check the mapping of the solution and all aspects to ensure it is valid.
check_ind_from_parser(solution, params['REVERSE_MAPPING_TARGET'])

return solution


if __name__ == '__main__':

# Set parameters
set_params(sys.argv[1:], create_files=False)

# Generate solution.
solution = main()


# Print parsed GE genome.
print("\nGenome:\n", solution.genome)
print("\nGenome:\n", params['SEED_INDIVIDUALS'][0].genome)

0 comments on commit ab47b56

Please sign in to comment.