1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.misc import optrecurse
33 from translate.misc.multistring import multistring
34 from translate.lang import data
35 import re
36 import locale
37
38
40 """Just a small data structure that represents a search match."""
41
42
43 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
44 self.unit = unit
45 self.part = part
46 self.part_n = part_n
47 self.start = start
48 self.end = end
49
50
52 if self.part == 'target':
53 if self.unit.hasplural():
54 getter = lambda: self.unit.target.strings[self.part_n]
55 else:
56 getter = lambda: self.unit.target
57 return getter
58 elif self.part == 'source':
59 if self.unit.hasplural():
60 getter = lambda: self.unit.source.strings[self.part_n]
61 else:
62 getter = lambda: self.unit.source
63 return getter
64 elif self.part == 'notes':
65 def getter():
66 return self.unit.getnotes()[self.part_n]
67 return getter
68 elif self.part == 'locations':
69 def getter():
70 return self.unit.getlocations()[self.part_n]
71 return getter
72
74 if self.part == 'target':
75 if self.unit.hasplural():
76 def setter(value):
77 strings = self.unit.target.strings
78 strings[self.part_n] = value
79 self.unit.target = strings
80 else:
81 def setter(value):
82 self.unit.target = value
83 return setter
84
85
94
97
99 """Calculate the real index in the unnormalized string that corresponds to
100 the index nfc_index in the normalized string."""
101 length = nfc_index
102 max_length = len(string)
103 while len(data.normalize(string[:length])) <= nfc_index:
104 if length == max_length:
105 return length
106 length += 1
107 return length - 1
108
109
121
123 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
124 invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False,
125 max_matches=0):
126 """builds a checkfilter using the given checker"""
127 if isinstance(searchstring, unicode):
128 self.searchstring = searchstring
129 else:
130 self.searchstring = searchstring.decode(encoding)
131 self.searchstring = data.normalize(self.searchstring)
132 if searchparts:
133
134
135 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
136 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
137 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
138 self.search_locations = 'locations' in searchparts
139 else:
140 self.search_source = True
141 self.search_target = True
142 self.search_notes = False
143 self.search_locations = False
144 self.ignorecase = ignorecase
145 if self.ignorecase:
146 self.searchstring = self.searchstring.lower()
147 self.useregexp = useregexp
148 if self.useregexp:
149 self.searchpattern = re.compile(self.searchstring)
150 self.invertmatch = invertmatch
151 self.accelchar = accelchar
152 self.includeheader = includeheader
153 self.max_matches = max_matches
154
156 if teststr is None:
157 return False
158 teststr = data.normalize(teststr)
159 if self.ignorecase:
160 teststr = teststr.lower()
161 if self.accelchar:
162 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
163 teststr = re.sub(self.accelchar, "", teststr)
164 if self.useregexp:
165 found = self.searchpattern.search(teststr)
166 else:
167 found = teststr.find(self.searchstring) != -1
168 if self.invertmatch:
169 found = not found
170 return found
171
173 """runs filters on an element"""
174 if unit.isheader(): return []
175
176 if self.search_source:
177 if isinstance(unit.source, multistring):
178 strings = unit.source.strings
179 else:
180 strings = [unit.source]
181 for string in strings:
182 if self.matches(string):
183 return True
184
185 if self.search_target:
186 if isinstance(unit.target, multistring):
187 strings = unit.target.strings
188 else:
189 strings = [unit.target]
190 for string in strings:
191 if self.matches(string):
192 return True
193
194 if self.search_notes:
195 if self.matches(unit.getnotes()):
196 return True
197 if self.search_locations:
198 if self.matches(u" ".join(unit.getlocations())):
199 return True
200 return False
201
216
218 if not self.searchstring:
219 return [], []
220
221 searchstring = self.searchstring
222 flags = re.LOCALE | re.MULTILINE | re.UNICODE
223
224 if self.ignorecase:
225 flags |= re.IGNORECASE
226 if not self.useregexp:
227 searchstring = re.escape(searchstring)
228 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
229
230 matches = []
231 indexes = []
232
233 for index, unit in enumerate(units):
234 old_length = len(matches)
235
236 if self.search_target:
237 if unit.hasplural():
238 targets = unit.target.strings
239 else:
240 targets = [unit.target]
241 matches.extend(find_matches(unit, 'target', targets, self.re_search))
242 if self.search_source:
243 if unit.hasplural():
244 sources = unit.source.strings
245 else:
246 sources = [unit.source]
247 matches.extend(find_matches(unit, 'source', sources, self.re_search))
248 if self.search_notes:
249 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
250
251 if self.search_locations:
252 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
253
254
255
256
257 if self.max_matches and len(matches) > self.max_matches:
258 raise Exception("Too many matches found")
259
260 if len(matches) > old_length:
261 old_length = len(matches)
262 indexes.append(index)
263
264 return matches, indexes
265
267 """a specialized Option Parser for the grep tool..."""
269 """parses the command line options, handling implicit input/output args"""
270 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
271
272 if args:
273 options.searchstring = args[0]
274 args = args[1:]
275 else:
276 self.error("At least one argument must be given for the search string")
277 if args and not options.input:
278 if not options.output:
279 options.input = args[:-1]
280 args = args[-1:]
281 else:
282 options.input = args
283 args = []
284 if args and not options.output:
285 options.output = args[-1]
286 args = args[:-1]
287 if args:
288 self.error("You have used an invalid combination of --input, --output and freestanding args")
289 if isinstance(options.input, list) and len(options.input) == 1:
290 options.input = options.input[0]
291 return (options, args)
292
294 """sets the usage string - if usage not given, uses getusagestring for each option"""
295 if usage is None:
296 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
297 else:
298 super(GrepOptionParser, self).set_usage(usage)
299
308
309 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
310 """reads in inputfile, filters using checkfilter, writes to outputfile"""
311 fromfile = factory.getobject(inputfile)
312 tofile = checkfilter.filterfile(fromfile)
313 if tofile.isempty():
314 return False
315 outputfile.write(str(tofile))
316 return True
317
319 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
320 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
321 "tmx":("tmx", rungrep),
322 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
323 None:("po", rungrep)}
324 parser = GrepOptionParser(formats)
325 parser.add_option("", "--search", dest="searchparts",
326 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
327 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
328 parser.add_option("-I", "--ignore-case", dest="ignorecase",
329 action="store_true", default=False, help="ignore case distinctions")
330 parser.add_option("-e", "--regexp", dest="useregexp",
331 action="store_true", default=False, help="use regular expression matching")
332 parser.add_option("-v", "--invert-match", dest="invertmatch",
333 action="store_true", default=False, help="select non-matching lines")
334 parser.add_option("", "--accelerator", dest="accelchar",
335 action="store", type="choice", choices=["&", "_", "~"],
336 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
337 parser.add_option("", "--header", dest="includeheader",
338 action="store_true", default=False,
339 help="include a PO header in the output")
340 parser.set_usage()
341 parser.passthrough.append('checkfilter')
342 parser.description = __doc__
343 return parser
344
346 parser = cmdlineparser()
347 parser.run()
348
349 if __name__ == '__main__':
350 main()
351