1
2
4 """
5 Read in either, case(a), a file representing 2 parameter probs in format::
6 p1 p2 prob
7 Or, case(b), a file containing a pronounciation dict with lines of the form::
8 word:
9 phon1 phon2 prob
10 ...
11 phoni phonj prob
12 In Case (a) return 2-param prob dictionary::
13 D[w1][2] = probability of w2 given w1
14 In Case (b) return 3-param prob dictionary::
15 D[w][phon1][phon2] = probability of phon2 given phon1 when
16 pronouncing word w
17 These dictionaries are printed to files with the same base
18 as filename, but a ".py" extension. Default: extension
19 of input files will be ".txt".
20 """
21
22 bi_dict = {}
23 pron_dict = {}
24
25 dispatcher = DictDispatcher(bi_dict)
26 ifh = open(filename,'r')
27 for line in ifh:
28 if line[0] == '%':
29 continue
30 line = line.rstrip()
31 dispatcher.add_line_to_dict(line,pron_dict,debug)
32 ifh.close()
33
34 (base,ext) = os.path.splitext(filename)
35 ofh = open(base+'.py','w')
36 print >> ofh, '# Automatically generated from %s' % filename
37 if bi_dict:
38 print >> ofh, base+'_dict'+'=', bi_dict
39 else:
40 print >> ofh, base+'_dict'+'=', pron_dict
41 ofh.close()
42 return (bi_dict,pron_dict, dispatcher)
43
44 import re
45 word_re = re.compile(r'(\w+):$')
46 bigram_re = re.compile(r'(\#|\w+)\s+(\w+)\s+(\d?\.\d+)$')
47 blank_re = re.compile(r'\w*$')
48 start_re = re.compile(r'(\#)')
49
51 - def __init__(self,active_dictionary={}):
52 object.__init__(self)
53 self.active_dictionary=active_dictionary
54
56 if debug:
57 print line
58 word_match = re.match(word_re,line)
59 bigram_match = re.match(bigram_re,line)
60 if word_match:
61 'Word dict detected!'
62 self.active_dictionary=pron_dict.setdefault(word_match.groups()[0],{})
63 elif bigram_match:
64 w1 = bigram_match.groups()[0]
65 if re.match(start_re, w1):
66 w1 = 'start'
67 w1_dict = self.active_dictionary.setdefault(w1, {})
68 print bigram_match.groups()[2]
69 w1_dict[bigram_match.groups()[1]] = float(bigram_match.groups()[2])
70 else:
71 if not re.match(blank_re,line):
72 print 'Unprocessed line %s' % line
73
74 if __name__ == '__main__':
75 import sys, os
76 files = sys.argv[1:]
77 for file_str in files:
78 print 'Processing %s' % file_str
79 print
80 read_probs_file (file_str,True)
81