1
2
3
4
5
7 """
8 Class for top down recursive descent parser.
9 """
10
12 """
13 A parser instance must have a grammar at instance creation time.
14 The terminals, productions, and start_cat for the grammar
15 are stored upon creation.
16
17 If a string is supplied it is converted to a list words
18 and stored in self.input. A string may also be supplied
19 when the recognizer- or parser- method is called.
20 """
21 self.productions = grammar.productions
22 self.start_cat = grammar.start_cat
23 if not grammar.terminals:
24 grammar.compute_terminals()
25 self.terminals = grammar.terminals
26 self.input = self.string_to_wordlist(string)
27 self.recursion_limit=100
28 self.recursion_depth=0
29
30
31
33 """
34 @param string: A string. The string to be parsed.
35 Immediately converted to a [WordList].by self.legal_input.
36
37 @return: The result of calling recurse_recognize on the start-cat,
38 the empty agenda, and the wordlist generated from string.
39 """
40 if self.legal_input(string):
41 self.recursion_depth = 0
42 return self.recurse_recognize([self.start_cat],[],self.input)
43 else:
44 return False
45
47 """
48 Parse wordlist using C{goals} (derivation). Return true
49 if current grammar accepts C{wordlist}. Else False.
50
51 Return True whenever C{goals} and C{wordlist} are empty.
52
53 Suppose C{goals} is non-empty:
54
55 If C{goals}[0] is a nonterminal, call C{expand} (expand it with the grammar
56 and continue parsing recursively). If C{goals}[0] is a terminal
57 then call C{match_word} (try to match the next word and continue parsing).
58 Both C{expand} and C{match_word} contain recursive calls to recurse_recognize.
59
60 Suppose C{goals} is empty:
61
62 Then if C{wordlist} is empty, return True. Else backtrack (If {agenda} is non-empty
63 continue parsing with the next state on the agenda; else return False)
64 backtrack also contains a recursive call to C{recurse_recognize}.
65
66 @param goals: A list of categories and/or words proposed to cover
67 wordlist, a partial derivation from the grammar.
68 @param agenda: A list of C{ParserState}s: each a pair of ([GoalList],[WordList])
69 @param wordlist: a list of words.
70 @rtype:
71 """
72 if len(goals) > 0:
73 next_goal = goals[0]
74 if not next_goal in self.terminals:
75 return self.expand(next_goal,goals[1:],agenda,wordlist)
76 else:
77 return self.match_word(next_goal,wordlist,goals[1:],agenda)
78 elif len(wordlist) == 0:
79 return True
80 else:
81
82 return self.backtrack(agenda)
83
84 - def expand(self,next_goal,rest_goals,agenda,wordlist):
85 """
86 Expand C{next_goal} using grammar (C{self.productions})
87 Generate new C{GoalList} using one production and add unused productions
88 to C{agenda}. Keep parsing with new C{GoalList}, new C{agenda}, old C{WordList}.
89 @param next_goal: grammar nonterminal
90 @param rest_goals: other gramar elements from the same gramar production
91 @param agenda: list of parser states.
92 @param wordlist: list of words (strings).
93 """
94 productions = self.productions[next_goal]
95 next_production = productions[0]
96 for p in productions[1:]:
97 agenda = [ParserState(p+rest_goals,wordlist)] + agenda
98 self.trace_expand(next_goal,next_production,rest_goals, agenda,wordlist)
99 self.recursion_check()
100 return self.recurse_recognize(next_production+rest_goals, agenda, wordlist)
101
102 - def match_word(self,next_goal,wordlist,rest_goals,agenda):
103 """
104 C{next_goal} is a terminal.
105
106 Suppose C{wordlist} is non-empty:
107
108 If next_goal matches C{wordlist}[0], keep parsing with C{rest_goals}, C{agenda}
109 and C{wordlist}[1:].
110
111 Else this parse path fails; call backtrack (if C{agenda}
112 is non-empty, continue parsing with the next parser state
113 on C{agenda}, and if C{agenda} is empty, <Return>: False)
114
115 Suppose C{wordlist} is empty.
116
117 This parse path fails. Call C{backtrack}.
118
119 @param next_goal: grammar preterminal
120 @param wordlist: list of words (strings).
121 @param rest_goals: other gramar elements from the same gramar production
122 @param agenda: list of parser states.
123 """
124 if len(wordlist) > 0:
125 if next_goal == wordlist[0]:
126
127 self.trace_match(True,next_goal,wordlist[0],rest_goals,wordlist[1:])
128 return self.recurse_recognize(rest_goals,agenda,wordlist[1:])
129 else:
130 self.trace_match(False,next_goal,wordlist[0],rest_goals,wordlist[1:])
131 return self.backtrack(agenda)
132 else:
133 self.trace_match(False,next_goal,'**empty**',rest_goals,[])
134 return self.backtrack(agenda)
135
136
138 """
139 Suppose C{agenda} is non-empty:
140
141 Then::
142 agenda[0].GoalList = new goals
143 agenda[0].WordList = new wordlist
144 Keep parsing with new goals and new worldlist and popped agenda.
145
146 Suppose C{agenda} is empty
147
148 return False
149 """
150 if len(agenda) > 0:
151 self.trace_backtrack(agenda[0].GoalList,agenda[0].WordList)
152 return self.recurse_recognize(agenda[0].GoalList,agenda[1:],agenda[0].WordList)
153 else:
154 return False
155
156
157
159 """
160 Not yet implemented
161 """
162 if self.legal_input(string):
163 print 'Not yet implemented!'
164 return False
165
166
167
168
169 - def trace (self,boolean=True):
170 print boolean
171 if boolean:
172 self._trace = True
173 else:
174 self._trace = False
175
176 - def trace_match(self, boolean,next_goal,word,rest_goals,rest_wordlist):
177 if boolean and self._trace==True:
178 print 'Match succeeded: %s %s %s %s' % (next_goal, word, rest_goals,rest_wordlist)
179 elif self._trace == True:
180 print 'Match failed: %s %s %s %s' % (next_goal, word, rest_goals,rest_wordlist)
181
182 - def trace_expand(self,next_goal,next_production,goals, agenda,wordlist):
183 if self._trace==True:
184 print 'Expanding %s as %s' % (next_goal,next_production)
185 print ' Goals: %s' % (next_production+goals)
186 print ' Agenda: '
187 for pstate in agenda:
188 print ' %s' % (pstate,)
189
190
192 if self._trace == True:
193 print 'Backtracking to '
194 print ' Goals: %s' % goals
195 print ' Wordlist: %s' % wordlist
196
198 return string.lower().split()
199
210
212 if self.recursion_limit:
213 if self.recursion_depth > self.recursion_limit:
214 raise Exception('Recursion Depth exceeded!')
215 else:
216 self.recursion_depth += 1
217
219 """
220 Class for Context free grammars
221
222 Grammar rules stored in self.productions
223 Start Cat stored in self.start_cat
224 Provided: a method for computing the terminals of the grammar.
225 Unchecked assumption: terminals never use upper case (built into parser)
226 """
227
228 - def __init__(self, start_cat, trace=False):
229 """
230 C{self.productions}: a dictionary whose keys are categories
231 and whose values are lists of productions
232 """
233 self._trace = trace
234 self.start_cat = start_cat
235 self.productions = {}
236 self.terminals = []
237
239 """
240 If g1 is a grammar object run 'print g1' to
241 trigger this code, which builds the
242 string representation of the grammar that is printed
243 """
244 str =''
245 for Cat in self.productions:
246 str += Cat
247 for R in self.productions[Cat]:
248 str += '\t=> '
249 for dtr in R:
250 str += '%s ' % dtr
251 str += '\n'
252 str += '\t ------\n'
253 return str
254
255
257 self.productions[cat]=self.productions.get(cat,[])+[production]
258 if self.terminals:
259 self.compute_terminals()
260 return production
261
263 """
264 Place a set of grammar terminals in self.terminals
265
266 Assumption: No symbol that ever occurs in the LHS
267 of a production ever needs to occur in an input string.
268
269 Counterexample: If 's' is the start symbol of the grammar
270 it is also the possession-marking 'word' in English.
271 """
272 terminals=[]
273 for cat in self.productions:
274 for p in self.productions[cat]:
275 for d in p:
276 if not d in self.productions:
277 terminals+=[d]
278 self.terminals = terminals
279
281 """
282 A parser state is basically a pair of a goallist (derivation)
283 and a wordlist, with a Pythonic class instance wrapper.
284 """
285
287 self.GoalList = GoalList
288 self.WordList = WordList
289
291 """
292 Return nice string rep.
293
294 Look like a pair!
295 Because that's what you are!
296 @rtype: string
297 """
298 return "(%s, %s)" % (self.GoalList,self.WordList)
299
301 """
302 This is a little silly! Purely for illustration!
303 Allows ParserState instances to be accessed using
304 the usual Python sequence syntax
305
306 >>> ps = ParserState(['S'],['a', 'dog', 'walks'])
307 >>> ps[0]
308 ['S]
309 >>> ps[1]
310 ['a', 'dog', 'walks']
311 """
312 if i == 0:
313 return self.GoalList
314 elif i == 1:
315 return self.WordList
316 else:
317 raise Exception('Illegal index for parse state!')
318
319
320
321
322
324 global g1, p1, g2, p2
325
326 g1 = Grammar('s')
327 g1.add_production('s',['np','vp'])
328 g1.add_production('s',['vp'])
329 g1.add_production('vp',['v'])
330 g1.add_production('vp',['v','np'])
331 g1.add_production('np',['pname'])
332 g1.add_production('pname',['john'])
333 g1.add_production('pname',['mary'])
334 g1.add_production('np',['d','n'])
335 g1.add_production('d',['the'])
336 g1.add_production('n',['boy'])
337 g1.add_production('n',['girl'])
338 g1.add_production('n',['beans'])
339 g1.add_production('v',['likes'])
340 g1.add_production('v',['run'])
341 g1.add_production('v',['eat'])
342 p1 = Parser(g1)
343 p1.trace()
344
345 g2 = Grammar('s')
346 g2.add_production('s',['np','vp'])
347 g2.add_production('s',['vp'])
348 g2.add_production('vp',['v'])
349 g2.add_production('vp',['v','np'])
350 g2.add_production('np',['pname'])
351 g2.add_production('pname',['john'])
352 g2.add_production('pname',['mary'])
353 g2.add_production('np',['d','n'])
354 g2.add_production('d',['the'])
355 g2.add_production('n',['boy'])
356 g2.add_production('n',['girl'])
357 g2.add_production('n',['tree'])
358 g2.add_production('n',['flowers'])
359 g2.add_production('n',['beans'])
360 g2.add_production('v',['likes'])
361 g2.add_production('v',['run'])
362 g2.add_production('v',['eat'])
363 g2.add_production('p',['with'])
364 g2.add_production('pp',['p','np'])
365 g2.add_production('np',['np','pp'])
366 p2 = Parser(g2)
367 p2.trace()
368
369 for s in strings:
370 print s
371 print '---------'
372 print p1.recognize_string(s)
373 print
374
375
376 if __name__ == '__main__':
377 strings = ['John likes Mary',
378 'The boy likes the girl',
379 'Eat beans']
380 demo(strings)
381