1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.storage.poheader import poheader
33 from translate.misc import optrecurse
34 from translate.misc.multistring import multistring
35 from translate.lang import data
36 import re
37 import locale
38
39
41 """Just a small data structure that represents a search match."""
42
43
44 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
45 self.unit = unit
46 self.part = part
47 self.part_n = part_n
48 self.start = start
49 self.end = end
50
51
53 if self.part == 'target':
54 if self.unit.hasplural():
55 getter = lambda: self.unit.target.strings[self.part_n]
56 else:
57 getter = lambda: self.unit.target
58 return getter
59 elif self.part == 'source':
60 if self.unit.hasplural():
61 getter = lambda: self.unit.source.strings[self.part_n]
62 else:
63 getter = lambda: self.unit.source
64 return getter
65 elif self.part == 'notes':
66 def getter():
67 return self.unit.getnotes()[self.part_n]
68 return getter
69 elif self.part == 'locations':
70 def getter():
71 return self.unit.getlocations()[self.part_n]
72 return getter
73
75 if self.part == 'target':
76 if self.unit.hasplural():
77 def setter(value):
78 strings = self.unit.target.strings
79 strings[self.part_n] = value
80 self.unit.target = strings
81 else:
82 def setter(value):
83 self.unit.target = value
84 return setter
85
86
95
98
100 """Calculate the real index in the unnormalized string that corresponds to
101 the index nfc_index in the normalized string."""
102 length = nfc_index
103 max_length = len(string)
104 while len(data.normalize(string[:length])) <= nfc_index:
105 if length == max_length:
106 return length
107 length += 1
108 return length - 1
109
110
123
125 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
126 invertmatch=False, accelchar=None, encoding='utf-8',
127 max_matches=0):
128 """builds a checkfilter using the given checker"""
129 if isinstance(searchstring, unicode):
130 self.searchstring = searchstring
131 else:
132 self.searchstring = searchstring.decode(encoding)
133 self.searchstring = data.normalize(self.searchstring)
134 if searchparts:
135
136
137 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
138 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
139 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
140 self.search_locations = 'locations' in searchparts
141 else:
142 self.search_source = True
143 self.search_target = True
144 self.search_notes = False
145 self.search_locations = False
146 self.ignorecase = ignorecase
147 if self.ignorecase:
148 self.searchstring = self.searchstring.lower()
149 self.useregexp = useregexp
150 if self.useregexp:
151 self.searchpattern = re.compile(self.searchstring)
152 self.invertmatch = invertmatch
153 self.accelchar = accelchar
154 self.max_matches = max_matches
155
157 if teststr is None:
158 return False
159 teststr = data.normalize(teststr)
160 if self.ignorecase:
161 teststr = teststr.lower()
162 if self.accelchar:
163 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
164 teststr = re.sub(self.accelchar, "", teststr)
165 if self.useregexp:
166 found = self.searchpattern.search(teststr)
167 else:
168 found = teststr.find(self.searchstring) != -1
169 if self.invertmatch:
170 found = not found
171 return found
172
174 """runs filters on an element"""
175 if unit.isheader():
176 return []
177
178 if self.search_source:
179 if isinstance(unit.source, multistring):
180 strings = unit.source.strings
181 else:
182 strings = [unit.source]
183 for string in strings:
184 if self.matches(string):
185 return True
186
187 if self.search_target:
188 if isinstance(unit.target, multistring):
189 strings = unit.target.strings
190 else:
191 strings = [unit.target]
192 for string in strings:
193 if self.matches(string):
194 return True
195
196 if self.search_notes:
197 if self.matches(unit.getnotes()):
198 return True
199 if self.search_locations:
200 if self.matches(u" ".join(unit.getlocations())):
201 return True
202 return False
203
216
218 if not self.searchstring:
219 return [], []
220
221 searchstring = self.searchstring
222 flags = re.LOCALE | re.MULTILINE | re.UNICODE
223
224 if self.ignorecase:
225 flags |= re.IGNORECASE
226 if not self.useregexp:
227 searchstring = re.escape(searchstring)
228 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
229
230 matches = []
231 indexes = []
232
233 for index, unit in enumerate(units):
234 old_length = len(matches)
235
236 if self.search_target:
237 if unit.hasplural():
238 targets = unit.target.strings
239 else:
240 targets = [unit.target]
241 matches.extend(find_matches(unit, 'target', targets, self.re_search))
242 if self.search_source:
243 if unit.hasplural():
244 sources = unit.source.strings
245 else:
246 sources = [unit.source]
247 matches.extend(find_matches(unit, 'source', sources, self.re_search))
248 if self.search_notes:
249 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
250
251 if self.search_locations:
252 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
253
254
255
256
257 if self.max_matches and len(matches) > self.max_matches:
258 raise Exception("Too many matches found")
259
260 if len(matches) > old_length:
261 old_length = len(matches)
262 indexes.append(index)
263
264 return matches, indexes
265
267 """a specialized Option Parser for the grep tool..."""
269 """parses the command line options, handling implicit input/output args"""
270 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
271
272 if args:
273 options.searchstring = args[0]
274 args = args[1:]
275 else:
276 self.error("At least one argument must be given for the search string")
277 if args and not options.input:
278 if not options.output:
279 options.input = args[:-1]
280 args = args[-1:]
281 else:
282 options.input = args
283 args = []
284 if args and not options.output:
285 options.output = args[-1]
286 args = args[:-1]
287 if args:
288 self.error("You have used an invalid combination of --input, --output and freestanding args")
289 if isinstance(options.input, list) and len(options.input) == 1:
290 options.input = options.input[0]
291 return (options, args)
292
294 """sets the usage string - if usage not given, uses getusagestring for each option"""
295 if usage is None:
296 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
297 else:
298 super(GrepOptionParser, self).set_usage(usage)
299
308
309 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
310 """reads in inputfile, filters using checkfilter, writes to outputfile"""
311 fromfile = factory.getobject(inputfile)
312 tofile = checkfilter.filterfile(fromfile)
313 if tofile.isempty():
314 return False
315 outputfile.write(str(tofile))
316 return True
317
319 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
320 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
321 "tmx":("tmx", rungrep),
322 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
323 None:("po", rungrep)}
324 parser = GrepOptionParser(formats)
325 parser.add_option("", "--search", dest="searchparts",
326 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
327 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
328 parser.add_option("-I", "--ignore-case", dest="ignorecase",
329 action="store_true", default=False, help="ignore case distinctions")
330 parser.add_option("-e", "--regexp", dest="useregexp",
331 action="store_true", default=False, help="use regular expression matching")
332 parser.add_option("-v", "--invert-match", dest="invertmatch",
333 action="store_true", default=False, help="select non-matching lines")
334 parser.add_option("", "--accelerator", dest="accelchar",
335 action="store", type="choice", choices=["&", "_", "~"],
336 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
337 parser.set_usage()
338 parser.passthrough.append('checkfilter')
339 parser.description = __doc__
340 return parser
341
343 parser = cmdlineparser()
344 parser.run()
345
346 if __name__ == '__main__':
347 main()
348