1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import re
23
24 """
25 From the GNU gettext manual:
26 WHITE-SPACE
27 # TRANSLATOR-COMMENTS
28 #. AUTOMATIC-COMMENTS
29 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
30 #: REFERENCE...
31 #, FLAG...
32 msgctxt CONTEXT (Gettext 0.15)
33 msgid UNTRANSLATED-STRING
34 msgstr TRANSLATED-STRING
35 """
36
37 isspace = str.isspace
38 find = str.find
39 rfind = str.rfind
40 startswith = str.startswith
41 append = list.append
42 decode = str.decode
43
45 - def __init__(self, input_iterator, UnitClass, encoding = None):
46 self._input_iterator = input_iterator
47 self.next_line = ''
48 self.eof = False
49 self.encoding = encoding
50 self.read_line()
51 self.UnitClass = UnitClass
52
54 if self.encoding is not None:
55 return decode(string, self.encoding)
56 else:
57 return string
58
60 current = self.next_line
61 if self.eof:
62 return current
63 try:
64 self.next_line = self._input_iterator.next()
65 while not self.eof and isspace(self.next_line):
66 self.next_line = self._input_iterator.next()
67 except StopIteration:
68 self.next_line = ''
69 self.eof = True
70 return current
71
74
76 """Read all the lines belonging starting with #|. These lines contain
77 the previous msgid and msgctxt info. We strip away the leading '#| '
78 and read until we stop seeing #|."""
79 prevmsgid_lines = []
80 next_line = parse_state.next_line
81 while startswith(next_line, '#| ') or startswith(next_line, '| '):
82 content = parse_state.read_line()
83 prefix_len = content.index('| ')
84 content = content[prefix_len+2:]
85 append(prevmsgid_lines, content)
86 next_line = parse_state.next_line
87 return prevmsgid_lines
88
90 parse_message(parse_state, 'msgctxt', 7, unit.prev_msgctxt)
91 return len(unit.prev_msgctxt) > 0
92
94 parse_message(parse_state, 'msgid', 5, unit.prev_msgid)
95 return len(unit.prev_msgid) > 0
96
98 parse_message(parse_state, 'msgid_plural', 12, unit.prev_msgid_plural)
99 return len(unit.prev_msgid_plural) > 0
100
132
140
142 """Read all the lines belonging to the current unit if obsolete."""
143 obsolete_lines = []
144 next_line = parse_state.next_line
145 while startswith(next_line, '#~'):
146 content = parse_state.read_line()[2:].lstrip()
147 append(obsolete_lines, content)
148 next_line = parse_state.next_line
149 if startswith(content, 'msgstr'):
150
151
152 while startswith(next_line, '#~ "') or startswith(next_line, '#~ msgstr'):
153 content = parse_state.read_line()[3:]
154 append(obsolete_lines, content)
155 next_line = parse_state.next_line
156 break
157 return obsolete_lines
158
167
169 line = parse_state.next_line
170 left = find(line, '"', start_pos)
171 if left == start_pos or isspace(line[start_pos:left]):
172 right = rfind(line, '"')
173 if left != right:
174 return parse_state.read_line()[left:right+1]
175 else:
176
177
178 return parse_state.read_line()[left:-1] + '"'
179 return None
180
188
190 string = parse_quoted(parse_state, first_start_pos)
191 while string is not None:
192 if not startswith(string, '"_:'):
193 append(msg_list, parse_state.decode(string))
194 string = parse_quoted(parse_state)
195 else:
196 string = parse_msg_comment(parse_state, msg_comment_list, string)
197
198 -def parse_message(parse_state, start_of_string, start_of_string_len, msg_list, msg_comment_list=None):
199 if msg_comment_list is None:
200 msg_comment_list = []
201 if startswith(parse_state.next_line, start_of_string):
202 return parse_multiple_quoted(parse_state, msg_list, msg_comment_list, start_of_string_len)
203
205 parse_message(parse_state, 'msgctxt', 7, unit.msgctxt)
206 return len(unit.msgctxt) > 0
207
209 parse_message(parse_state, 'msgid', 5, unit.msgid, unit.msgidcomments)
210 return len(unit.msgid) > 0 or len(unit.msgidcomments) > 0
211
213 parse_message(parse_state, 'msgstr', 6, unit.msgstr)
214 return len(unit.msgstr) > 0
215
219
220 MSGSTR_ARRAY_ENTRY_LEN = len('msgstr[')
221
222 -def add_to_dict(msgstr_dict, line, right_bracket_pos, entry):
223 index = int(line[MSGSTR_ARRAY_ENTRY_LEN:right_bracket_pos])
224 if index not in msgstr_dict:
225 msgstr_dict[index] = []
226 msgstr_dict[index].extend(entry)
227
228 -def get_entry(parse_state, right_bracket_pos):
229 entry = []
230 parse_message(parse_state, 'msgstr[', right_bracket_pos + 1, entry)
231 return entry
232
233 -def parse_msgstr_array_entry(parse_state, msgstr_dict):
234 line = parse_state.next_line
235 right_bracket_pos = find(line, ']', MSGSTR_ARRAY_ENTRY_LEN)
236 if right_bracket_pos >= 0:
237 entry = get_entry(parse_state, right_bracket_pos)
238 if len(entry) > 0:
239 add_to_dict(msgstr_dict, line, right_bracket_pos, entry)
240 return True
241 else:
242 return False
243 else:
244 return False
245
255
262
270
272 unit = unit or parse_state.UnitClass()
273 parsed_comments = parse_comments(parse_state, unit)
274 obsolete_unit = parse_obsolete(parse_state, unit)
275 if obsolete_unit is not None:
276 return obsolete_unit
277 parsed_msg_entries = parse_msg_entries(parse_state, unit)
278 unit.infer_state()
279 if parsed_comments or parsed_msg_entries:
280 return unit
281 else:
282 return None
283
285 charset = None
286 if isinstance(unit.msgstr, list) and len(unit.msgstr) > 0 and isinstance(unit.msgstr[0], str):
287 charset = re.search("charset=([^\\s\\\\n]+)", "".join(unit.msgstr))
288 if charset:
289 encoding = charset.group(1)
290 if encoding != 'CHARSET':
291 store._encoding = encoding
292 else:
293 store._encoding = 'utf-8'
294 else:
295 store._encoding = 'utf-8'
296 parse_state.encoding = store._encoding
297
299 return [decode(item) for item in lst]
300
302 for attr in ('msgctxt', 'msgid', 'msgid_pluralcomments',
303 'msgid_plural', 'msgstr', 'obsoletemsgctxt',
304 'obsoletemsgid', 'obsoletemsgid_pluralcomments',
305 'obsoletemsgid_plural', 'obsoletemsgstr',
306 'othercomments', 'automaticcomments', 'sourcecomments',
307 'typecomments', 'msgidcomments', 'obsoletemsgidcomments'):
308 element = getattr(unit, attr)
309 if isinstance(element, list):
310 setattr(unit, attr, decode_list(element, decode))
311 else:
312 setattr(unit, attr, dict([(key, decode_list(value, decode)) for key, value in element.items()]))
313
321
323 unit = parse_header(parse_state, store)
324 while unit:
325 store.addunit(unit)
326 unit = parse_unit(parse_state)
327 return parse_state.eof
328