首先实现parser_transitions.py,接着实现parser_model.py,最后运行run.py进行展示。
1.parser_transitions.py
1.1PartialParse类
1 class PartialParse(object): 2 def __init__(self, sentence): 3 """Initializes this partial parse. 4 5 @param sentence (list of str): The sentence to be parsed as a list of words. 6 Your code should not modify the sentence. 7 """ 8 # The sentence being parsed is kept for bookkeeping purposes. Do not alter it in your code. 9 self.sentence = sentence 10 11 ### YOUR CODE HERE (3 Lines) 12 ### Your code should initialize the following fields: 13 ### self.stack: The current stack represented as a list with the top of the stack as the last element of the list. 14 ### self.buffer: The current buffer represented as a list with the first item on the buffer as the first item of the list 15 ### self.dependencies: The list of dependencies produced so far. Represented as a list of 16 ### tuples where each tuple is of the form (head, dependent). 17 ### Order for this list doesn't matter. 18 ### 19 ### Note: The root token should be represented with the string "ROOT" 20 ### 21 self.stack = ['ROOT'] 22 self.buffer = self.sentence.copy() 23 self.dependencies =[] 24 ### END YOUR CODE 25 26 27 def parse_step(self, transition): 28 """Performs a single parse step by applying the given transition to this partial parse 29 30 @param transition (str): A string that equals "S", "LA", or "RA" representing the shift, 31 left-arc, and right-arc transitions. You can assume the provided 32 transition is a legal transition. 33 """ 34 ### YOUR CODE HERE (~7-10 Lines) 35 ### TODO: 36 ### Implement a single parsing step, i.e. the logic for the following as 37 ### described in the pdf handout: 38 ### 1. Shift 39 ### 2. Left Arc 40 ### 3. Right Arc 41 if transition == 'S': 42 self.stack.append(self.buffer.pop(0)) 43 elif transition == 'LA': 44 dependent = self.stack.pop(-2) 45 self.dependencies.append((self.stack[-1], dependent)) 46 elif transition == 'RA': 47 dependent = self.stack.pop() 48 self.dependencies.append((self.stack[-1], dependent)) 49 ### END YOUR CODE 50 51 52 def parse(self, transitions): 53 """Applies the provided transitions to this PartialParse 54 55 @param transitions (list of str): The list of transitions in the order they should be applied 56 57 @return dsependencies (list of string tuples): The list of dependencies produced when 58 parsing the sentence. Represented as a list of 59 tuples where each tuple is of the form (head, dependent). 60 """ 61 for transition in transitions: 62 self.parse_step(transition) 63 return self.dependencies
1.2minibatch_parse函数
1 def minibatch_parse(sentences, model, batch_size): 2 """Parses a list of sentences in minibatches using a model. 3 4 @param sentences (list of list of str): A list of sentences to be parsed 5 (each sentence is a list of words and each word is of type string) 6 @param model (ParserModel): The model that makes parsing decisions. It is assumed to have a function 7 model.predict(partial_parses) that takes in a list of PartialParses as input and 8 returns a list of transitions predicted for each parse. That is, after calling 9 transitions = model.predict(partial_parses) 10 transitions[i] will be the next transition to apply to partial_parses[i]. 11 @param batch_size (int): The number of PartialParses to include in each minibatch 12 13 14 @return dependencies (list of dependency lists): A list where each element is the dependencies 15 list for a parsed sentence. Ordering should be the 16 same as in sentences (i.e., dependencies[i] should 17 contain the parse for sentences[i]). 18 """ 19 dependencies = [] 20 21 ### YOUR CODE HERE (~8-10 Lines) 22 ### TODO: 23 ### Implement the minibatch parse algorithm as described in the pdf handout 24 ### 25 ### Note: A shallow copy (as denoted in the PDF) can be made with the "=" sign in python, e.g. unfinished_parses = partial_parses[:]. 26 ### Here `unfinished_parses` is a shallow copy of `partial_parses`. 27 ### In Python, a shallow copied list like `unfinished_parses` does not contain new instances 28 ### of the object stored in `partial_parses`. Rather both lists refer to the same objects. 29 ### In our case, `partial_parses` contains a list of partial parses. `unfinished_parses` 30 ### contains references to the same objects. Thus, you should NOT use the `del` operator 31 ### to remove objects from the `unfinished_parses` list. This will free the underlying memory that 32 ### is being accessed by `partial_parses` and may cause your code to crash. 33 partial_parses = [PartialParse(s) for s in sentences] #为每个句子初始化PartialParse对象 34 unfinished_parses = partial_parses.copy() 35 36 while unfinished_parses: 37 minibatch = unfinished_parses[:batch_size] 38 transitions = model.predict(minibatch) 39 for i, parse in enumerate(minibatch): #取出minibatch的每一个的parse来进行一次transition操作 40 parse.parse_step(transitions[i]) 41 if len(parse.stack)==1 and not parse.buffer: 42 unfinished_parses.remove(parse) #过滤掉已经完成的parse 43 44 dependencies = [parse.dependencies for parse in partial_parses] #获得所有的依赖 45 ### END YOUR CODE 46 47 return dependencies
1.3测试
1 def test_step(name, transition, stack, buf, deps, 2 ex_stack, ex_buf, ex_deps): 3 """Tests that a single parse step returns the expected output""" 4 pp = PartialParse([]) 5 pp.stack, pp.buffer, pp.dependencies = stack, buf, deps 6 7 pp.parse_step(transition) 8 stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) 9 assert stack == ex_stack, \ 10 "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) 11 assert buf == ex_buf, \ 12 "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) 13 assert deps == ex_deps, \ 14 "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 15 print("{:} test passed!".format(name)) 16 17 18 def test_parse_step(): 19 """Simple tests for the PartialParse.parse_step function 20 Warning: these are not exhaustive 21 """ 22 test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], 23 ("ROOT", "the", "cat"), ("sat",), ()) 24 test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], 25 ("ROOT", "cat",), ("sat",), (("cat", "the"),)) 26 test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], 27 ("ROOT", "run",), (), (("run", "fast"),)) 28 29 30 def test_parse(): 31 """Simple tests for the PartialParse.parse function 32 Warning: these are not exhaustive 33 """ 34 sentence = ["parse", "this", "sentence"] 35 dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) 36 dependencies = tuple(sorted(dependencies)) 37 expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) 38 assert dependencies == expected, \ 39 "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) 40 assert tuple(sentence) == ("parse", "this", "sentence"), \ 41 "parse test failed: the input sentence should not be modified" 42 print("parse test passed!") 43 44 45 class DummyModel(object): 46 """Dummy model for testing the minibatch_parse function 47 """ 48 def __init__(self, mode = "unidirectional"): 49 self.mode = mode 50 51 def predict(self, partial_parses): 52 if self.mode == "unidirectional": 53 return self.unidirectional_predict(partial_parses) 54 elif self.mode == "interleave": 55 return self.interleave_predict(partial_parses) 56 else: 57 raise NotImplementedError() 58 59 def unidirectional_predict(self, partial_parses): 60 """First shifts everything onto the stack and then does exclusively right arcs if the first word of 61 the sentence is "right", "left" if otherwise. 62 """ 63 return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" 64 for pp in partial_parses] 65 66 def interleave_predict(self, partial_parses): 67 """First shifts everything onto the stack and then interleaves "right" and "left". 68 """ 69 return [("RA" if len(pp.stack) % 2 == 0 else "LA") if len(pp.buffer) == 0 else "S" 70 for pp in partial_parses] 71 72 def test_dependencies(name, deps, ex_deps): 73 """Tests the provided dependencies match the expected dependencies""" 74 deps = tuple(sorted(deps)) 75 assert deps == ex_deps, \ 76 "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 77 78 79 def test_minibatch_parse(): 80 """Simple tests for the minibatch_parse function 81 Warning: these are not exhaustive 82 """ 83 84 # Unidirectional arcs test 85 sentences = [["right", "arcs", "only"], 86 ["right", "arcs", "only", "again"], 87 ["left", "arcs", "only"], 88 ["left", "arcs", "only", "again"]] 89 deps = minibatch_parse(sentences, DummyModel(), 2) 90 test_dependencies("minibatch_parse", deps[0], 91 (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) 92 test_dependencies("minibatch_parse", deps[1], 93 (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) 94 test_dependencies("minibatch_parse", deps[2], 95 (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) 96 test_dependencies("minibatch_parse", deps[3], 97 (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) 98 99 # Out-of-bound test 100 sentences = [["right"]] 101 deps = minibatch_parse(sentences, DummyModel(), 2) 102 test_dependencies("minibatch_parse", deps[0], (('ROOT', 'right'),)) 103 104 # Mixed arcs test 105 sentences = [["this", "is", "interleaving", "dependency", "test"]] 106 deps = minibatch_parse(sentences, DummyModel(mode="interleave"), 1) 107 test_dependencies("minibatch_parse", deps[0], 108 (('ROOT', 'is'), ('dependency', 'interleaving'), 109 ('dependency', 'test'), ('is', 'dependency'), ('is', 'this'))) 110 print("minibatch_parse test passed!") 111 112 113 if __name__ == '__main__': 114 args = sys.argv 115 if len(args) != 2: 116 raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script") 117 elif args[1] == "part_c": 118 test_parse_step() 119 test_parse() 120 elif args[1] == "part_d": 121 test_minibatch_parse() 122 else: 123 raise Exception("You did not provide a valid keyword. Either provide 'part_c' or 'part_d', when executing this script")