1 | """A bottom-up tree matching algorithm implementation meant to speed |
---|
2 | up 2to3's matching process. After the tree patterns are reduced to |
---|
3 | their rarest linear path, a linear Aho-Corasick automaton is |
---|
4 | created. The linear automaton traverses the linear paths from the |
---|
5 | leaves to the root of the AST and returns a set of nodes for further |
---|
6 | matching. This reduces significantly the number of candidate nodes.""" |
---|
7 | |
---|
8 | __author__ = "George Boutsioukis <gboutsioukis@gmail.com>" |
---|
9 | |
---|
10 | import logging |
---|
11 | import itertools |
---|
12 | from collections import defaultdict |
---|
13 | |
---|
14 | from . import pytree |
---|
15 | from .btm_utils import reduce_tree |
---|
16 | |
---|
17 | class BMNode(object): |
---|
18 | """Class for a node of the Aho-Corasick automaton used in matching""" |
---|
19 | count = itertools.count() |
---|
20 | def __init__(self): |
---|
21 | self.transition_table = {} |
---|
22 | self.fixers = [] |
---|
23 | self.id = next(BMNode.count) |
---|
24 | self.content = '' |
---|
25 | |
---|
26 | class BottomMatcher(object): |
---|
27 | """The main matcher class. After instantiating the patterns should |
---|
28 | be added using the add_fixer method""" |
---|
29 | |
---|
30 | def __init__(self): |
---|
31 | self.match = set() |
---|
32 | self.root = BMNode() |
---|
33 | self.nodes = [self.root] |
---|
34 | self.fixers = [] |
---|
35 | self.logger = logging.getLogger("RefactoringTool") |
---|
36 | |
---|
37 | def add_fixer(self, fixer): |
---|
38 | """Reduces a fixer's pattern tree to a linear path and adds it |
---|
39 | to the matcher(a common Aho-Corasick automaton). The fixer is |
---|
40 | appended on the matching states and called when they are |
---|
41 | reached""" |
---|
42 | self.fixers.append(fixer) |
---|
43 | tree = reduce_tree(fixer.pattern_tree) |
---|
44 | linear = tree.get_linear_subpattern() |
---|
45 | match_nodes = self.add(linear, start=self.root) |
---|
46 | for match_node in match_nodes: |
---|
47 | match_node.fixers.append(fixer) |
---|
48 | |
---|
49 | def add(self, pattern, start): |
---|
50 | "Recursively adds a linear pattern to the AC automaton" |
---|
51 | #print("adding pattern", pattern, "to", start) |
---|
52 | if not pattern: |
---|
53 | #print("empty pattern") |
---|
54 | return [start] |
---|
55 | if isinstance(pattern[0], tuple): |
---|
56 | #alternatives |
---|
57 | #print("alternatives") |
---|
58 | match_nodes = [] |
---|
59 | for alternative in pattern[0]: |
---|
60 | #add all alternatives, and add the rest of the pattern |
---|
61 | #to each end node |
---|
62 | end_nodes = self.add(alternative, start=start) |
---|
63 | for end in end_nodes: |
---|
64 | match_nodes.extend(self.add(pattern[1:], end)) |
---|
65 | return match_nodes |
---|
66 | else: |
---|
67 | #single token |
---|
68 | #not last |
---|
69 | if pattern[0] not in start.transition_table: |
---|
70 | #transition did not exist, create new |
---|
71 | next_node = BMNode() |
---|
72 | start.transition_table[pattern[0]] = next_node |
---|
73 | else: |
---|
74 | #transition exists already, follow |
---|
75 | next_node = start.transition_table[pattern[0]] |
---|
76 | |
---|
77 | if pattern[1:]: |
---|
78 | end_nodes = self.add(pattern[1:], start=next_node) |
---|
79 | else: |
---|
80 | end_nodes = [next_node] |
---|
81 | return end_nodes |
---|
82 | |
---|
83 | def run(self, leaves): |
---|
84 | """The main interface with the bottom matcher. The tree is |
---|
85 | traversed from the bottom using the constructed |
---|
86 | automaton. Nodes are only checked once as the tree is |
---|
87 | retraversed. When the automaton fails, we give it one more |
---|
88 | shot(in case the above tree matches as a whole with the |
---|
89 | rejected leaf), then we break for the next leaf. There is the |
---|
90 | special case of multiple arguments(see code comments) where we |
---|
91 | recheck the nodes |
---|
92 | |
---|
93 | Args: |
---|
94 | The leaves of the AST tree to be matched |
---|
95 | |
---|
96 | Returns: |
---|
97 | A dictionary of node matches with fixers as the keys |
---|
98 | """ |
---|
99 | current_ac_node = self.root |
---|
100 | results = defaultdict(list) |
---|
101 | for leaf in leaves: |
---|
102 | current_ast_node = leaf |
---|
103 | while current_ast_node: |
---|
104 | current_ast_node.was_checked = True |
---|
105 | for child in current_ast_node.children: |
---|
106 | # multiple statements, recheck |
---|
107 | if isinstance(child, pytree.Leaf) and child.value == u";": |
---|
108 | current_ast_node.was_checked = False |
---|
109 | break |
---|
110 | if current_ast_node.type == 1: |
---|
111 | #name |
---|
112 | node_token = current_ast_node.value |
---|
113 | else: |
---|
114 | node_token = current_ast_node.type |
---|
115 | |
---|
116 | if node_token in current_ac_node.transition_table: |
---|
117 | #token matches |
---|
118 | current_ac_node = current_ac_node.transition_table[node_token] |
---|
119 | for fixer in current_ac_node.fixers: |
---|
120 | if not fixer in results: |
---|
121 | results[fixer] = [] |
---|
122 | results[fixer].append(current_ast_node) |
---|
123 | |
---|
124 | else: |
---|
125 | #matching failed, reset automaton |
---|
126 | current_ac_node = self.root |
---|
127 | if (current_ast_node.parent is not None |
---|
128 | and current_ast_node.parent.was_checked): |
---|
129 | #the rest of the tree upwards has been checked, next leaf |
---|
130 | break |
---|
131 | |
---|
132 | #recheck the rejected node once from the root |
---|
133 | if node_token in current_ac_node.transition_table: |
---|
134 | #token matches |
---|
135 | current_ac_node = current_ac_node.transition_table[node_token] |
---|
136 | for fixer in current_ac_node.fixers: |
---|
137 | if not fixer in results.keys(): |
---|
138 | results[fixer] = [] |
---|
139 | results[fixer].append(current_ast_node) |
---|
140 | |
---|
141 | current_ast_node = current_ast_node.parent |
---|
142 | return results |
---|
143 | |
---|
144 | def print_ac(self): |
---|
145 | "Prints a graphviz diagram of the BM automaton(for debugging)" |
---|
146 | print("digraph g{") |
---|
147 | def print_node(node): |
---|
148 | for subnode_key in node.transition_table.keys(): |
---|
149 | subnode = node.transition_table[subnode_key] |
---|
150 | print("%d -> %d [label=%s] //%s" % |
---|
151 | (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) |
---|
152 | if subnode_key == 1: |
---|
153 | print(subnode.content) |
---|
154 | print_node(subnode) |
---|
155 | print_node(self.root) |
---|
156 | print("}") |
---|
157 | |
---|
158 | # taken from pytree.py for debugging; only used by print_ac |
---|
159 | _type_reprs = {} |
---|
160 | def type_repr(type_num): |
---|
161 | global _type_reprs |
---|
162 | if not _type_reprs: |
---|
163 | from .pygram import python_symbols |
---|
164 | # printing tokens is possible but not as useful |
---|
165 | # from .pgen2 import token // token.__dict__.items(): |
---|
166 | for name, val in python_symbols.__dict__.items(): |
---|
167 | if type(val) == int: _type_reprs[val] = name |
---|
168 | return _type_reprs.setdefault(type_num, type_num) |
---|