1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.misc import optrecurse
33 from translate.misc.multistring import multistring
34 from translate.lang import data
35 import re
36 import locale
37
38
40 """Just a small data structure that represents a search match."""
41
42
43 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
44 self.unit = unit
45 self.part = part
46 self.part_n = part_n
47 self.start = start
48 self.end = end
49
50
52 if self.part == 'target':
53 if self.unit.hasplural():
54 getter = lambda: self.unit.target.strings[self.part_n]
55 else:
56 getter = lambda: self.unit.target
57 return getter
58 elif self.part == 'source':
59 if self.unit.hasplural():
60 getter = lambda: self.unit.source.strings[self.part_n]
61 else:
62 getter = lambda: self.unit.source
63 return getter
64 elif self.part == 'notes':
65 def getter():
66 return self.unit.getnotes()[self.part_n]
67 return getter
68 elif self.part == 'locations':
69 def getter():
70 return self.unit.getlocations()[self.part_n]
71 return getter
72
74 if self.part == 'target':
75 if self.unit.hasplural():
76 def setter(value):
77 strings = self.unit.target.strings
78 strings[self.part_n] = value
79 self.unit.target = strings
80 else:
81 def setter(value):
82 self.unit.target = value
83 return setter
84
85
94
97
99 """Calculate the real index in the unnormalized string that corresponds to
100 the index nfc_index in the normalized string."""
101 length = nfc_index
102 max_length = len(string)
103 while len(data.normalize(string[:length])) <= nfc_index:
104 if length == max_length:
105 return length
106 length += 1
107 return length - 1
108
109
122
124 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
125 invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False,
126 max_matches=0):
127 """builds a checkfilter using the given checker"""
128 if isinstance(searchstring, unicode):
129 self.searchstring = searchstring
130 else:
131 self.searchstring = searchstring.decode(encoding)
132 self.searchstring = data.normalize(self.searchstring)
133 if searchparts:
134
135
136 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
137 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
138 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
139 self.search_locations = 'locations' in searchparts
140 else:
141 self.search_source = True
142 self.search_target = True
143 self.search_notes = False
144 self.search_locations = False
145 self.ignorecase = ignorecase
146 if self.ignorecase:
147 self.searchstring = self.searchstring.lower()
148 self.useregexp = useregexp
149 if self.useregexp:
150 self.searchpattern = re.compile(self.searchstring)
151 self.invertmatch = invertmatch
152 self.accelchar = accelchar
153 self.includeheader = includeheader
154 self.max_matches = max_matches
155
157 if teststr is None:
158 return False
159 teststr = data.normalize(teststr)
160 if self.ignorecase:
161 teststr = teststr.lower()
162 if self.accelchar:
163 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
164 teststr = re.sub(self.accelchar, "", teststr)
165 if self.useregexp:
166 found = self.searchpattern.search(teststr)
167 else:
168 found = teststr.find(self.searchstring) != -1
169 if self.invertmatch:
170 found = not found
171 return found
172
174 """runs filters on an element"""
175 if unit.isheader(): return []
176
177 if self.search_source:
178 if isinstance(unit.source, multistring):
179 strings = unit.source.strings
180 else:
181 strings = [unit.source]
182 for string in strings:
183 if self.matches(string):
184 return True
185
186 if self.search_target:
187 if isinstance(unit.target, multistring):
188 strings = unit.target.strings
189 else:
190 strings = [unit.target]
191 for string in strings:
192 if self.matches(string):
193 return True
194
195 if self.search_notes:
196 if self.matches(unit.getnotes()):
197 return True
198 if self.search_locations:
199 if self.matches(u" ".join(unit.getlocations())):
200 return True
201 return False
202
217
219 if not self.searchstring:
220 return [], []
221
222 searchstring = self.searchstring
223 flags = re.LOCALE | re.MULTILINE | re.UNICODE
224
225 if self.ignorecase:
226 flags |= re.IGNORECASE
227 if not self.useregexp:
228 searchstring = re.escape(searchstring)
229 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
230
231 matches = []
232 indexes = []
233
234 for index, unit in enumerate(units):
235 old_length = len(matches)
236
237 if self.search_target:
238 if unit.hasplural():
239 targets = unit.target.strings
240 else:
241 targets = [unit.target]
242 matches.extend(find_matches(unit, 'target', targets, self.re_search))
243 if self.search_source:
244 if unit.hasplural():
245 sources = unit.source.strings
246 else:
247 sources = [unit.source]
248 matches.extend(find_matches(unit, 'source', sources, self.re_search))
249 if self.search_notes:
250 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
251
252 if self.search_locations:
253 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
254
255
256
257
258 if self.max_matches and len(matches) > self.max_matches:
259 raise Exception("Too many matches found")
260
261 if len(matches) > old_length:
262 old_length = len(matches)
263 indexes.append(index)
264
265 return matches, indexes
266
268 """a specialized Option Parser for the grep tool..."""
270 """parses the command line options, handling implicit input/output args"""
271 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
272
273 if args:
274 options.searchstring = args[0]
275 args = args[1:]
276 else:
277 self.error("At least one argument must be given for the search string")
278 if args and not options.input:
279 if not options.output:
280 options.input = args[:-1]
281 args = args[-1:]
282 else:
283 options.input = args
284 args = []
285 if args and not options.output:
286 options.output = args[-1]
287 args = args[:-1]
288 if args:
289 self.error("You have used an invalid combination of --input, --output and freestanding args")
290 if isinstance(options.input, list) and len(options.input) == 1:
291 options.input = options.input[0]
292 return (options, args)
293
295 """sets the usage string - if usage not given, uses getusagestring for each option"""
296 if usage is None:
297 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
298 else:
299 super(GrepOptionParser, self).set_usage(usage)
300
309
310 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
311 """reads in inputfile, filters using checkfilter, writes to outputfile"""
312 fromfile = factory.getobject(inputfile)
313 tofile = checkfilter.filterfile(fromfile)
314 if tofile.isempty():
315 return False
316 outputfile.write(str(tofile))
317 return True
318
320 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
321 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
322 "tmx":("tmx", rungrep),
323 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
324 None:("po", rungrep)}
325 parser = GrepOptionParser(formats)
326 parser.add_option("", "--search", dest="searchparts",
327 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
328 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
329 parser.add_option("-I", "--ignore-case", dest="ignorecase",
330 action="store_true", default=False, help="ignore case distinctions")
331 parser.add_option("-e", "--regexp", dest="useregexp",
332 action="store_true", default=False, help="use regular expression matching")
333 parser.add_option("-v", "--invert-match", dest="invertmatch",
334 action="store_true", default=False, help="select non-matching lines")
335 parser.add_option("", "--accelerator", dest="accelchar",
336 action="store", type="choice", choices=["&", "_", "~"],
337 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
338 parser.add_option("", "--header", dest="includeheader",
339 action="store_true", default=False,
340 help="include a PO header in the output")
341 parser.set_usage()
342 parser.passthrough.append('checkfilter')
343 parser.description = __doc__
344 return parser
345
347 parser = cmdlineparser()
348 parser.run()
349
350 if __name__ == '__main__':
351 main()
352