1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import re
23
24 """
25 From the GNU gettext manual:
26 WHITE-SPACE
27 # TRANSLATOR-COMMENTS
28 #. AUTOMATIC-COMMENTS
29 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
30 #: REFERENCE...
31 #, FLAG...
32 msgctxt CONTEXT (Gettext 0.15)
33 msgid UNTRANSLATED-STRING
34 msgstr TRANSLATED-STRING
35 """
36
37 isspace = str.isspace
38 find = str.find
39 rfind = str.rfind
40 startswith = str.startswith
41 append = list.append
42 decode = str.decode
43
45 - def __init__(self, input_iterator, UnitClass, encoding = None):
46 self._input_iterator = input_iterator
47 self.next_line = ''
48 self.eof = False
49 self.encoding = encoding
50 self.read_line()
51 self.UnitClass = UnitClass
52
54 if self.encoding is not None:
55 return decode(string, self.encoding)
56 else:
57 return string
58
60 current = self.next_line
61 if self.eof:
62 return current
63 try:
64 self.next_line = self._input_iterator.next()
65 while not self.eof and isspace(self.next_line):
66 self.next_line = self._input_iterator.next()
67 except StopIteration:
68 self.next_line = ''
69 self.eof = True
70 return current
71
74
76 """Read all the lines belonging starting with #|. These lines contain
77 the previous msgid and msgctxt info. We strip away the leading '#| '
78 and read until we stop seeing #|."""
79 prevmsgid_lines = []
80 next_line = parse_state.next_line
81 while startswith(next_line, '#| '):
82 append(prevmsgid_lines, parse_state.read_line()[3:])
83 next_line = parse_state.next_line
84 return prevmsgid_lines
85
87 parse_message(parse_state, 'msgctxt', 7, unit.prev_msgctxt)
88 return len(unit.prev_msgctxt) > 0
89
91 parse_message(parse_state, 'msgid', 5, unit.prev_msgid)
92 return len(unit.prev_msgid) > 0
93
95 parse_message(parse_state, 'msgid_plural', 12, unit.prev_msgid_plural)
96 return len(unit.prev_msgid_plural) > 0
97
129
137
139 """Read all the lines belonging to the current unit if obsolete."""
140 obsolete_lines = []
141 if startswith(parse_state.next_line, '#~ '):
142 append(obsolete_lines, parse_state.read_line()[3:])
143 else:
144 return obsolete_lines
145
146
147 next_line = parse_state.next_line
148 if startswith(next_line, '#~ msgid ') and obsolete_lines[-1].startswith('msgctxt'):
149 append(obsolete_lines, parse_state.read_line()[3:])
150 next_line = parse_state.next_line
151 while startswith(next_line, '#~ ') and not (startswith(next_line, '#~ msgid ') or startswith(next_line, '#~ msgctxt')):
152 append(obsolete_lines, parse_state.read_line()[3:])
153 next_line = parse_state.next_line
154 return obsolete_lines
155
164
166 line = parse_state.next_line
167 left = find(line, '"', start_pos)
168 if left == start_pos or isspace(line[start_pos:left]):
169 right = rfind(line, '"')
170 if left != right:
171 return parse_state.read_line()[left:right+1]
172 else:
173 return parse_state.read_line()[left:] + '"'
174 return None
175
183
185 string = parse_quoted(parse_state, first_start_pos)
186 while string is not None:
187 if not startswith(string, '"_:'):
188 append(msg_list, parse_state.decode(string))
189 string = parse_quoted(parse_state)
190 else:
191 string = parse_msg_comment(parse_state, msg_comment_list, string)
192
193 -def parse_message(parse_state, start_of_string, start_of_string_len, msg_list, msg_comment_list=None):
194 if msg_comment_list is None:
195 msg_comment_list = []
196 if startswith(parse_state.next_line, start_of_string):
197 return parse_multiple_quoted(parse_state, msg_list, msg_comment_list, start_of_string_len)
198
200 parse_message(parse_state, 'msgctxt', 7, unit.msgctxt)
201 return len(unit.msgctxt) > 0
202
204 parse_message(parse_state, 'msgid', 5, unit.msgid, unit.msgidcomments)
205 return len(unit.msgid) > 0 or len(unit.msgidcomments) > 0
206
208 parse_message(parse_state, 'msgstr', 6, unit.msgstr)
209 return len(unit.msgstr) > 0
210
214
215 MSGSTR_ARRAY_ENTRY_LEN = len('msgstr[')
216
217 -def add_to_dict(msgstr_dict, line, right_bracket_pos, entry):
218 index = int(line[MSGSTR_ARRAY_ENTRY_LEN:right_bracket_pos])
219 if index not in msgstr_dict:
220 msgstr_dict[index] = []
221 msgstr_dict[index].extend(entry)
222
223 -def get_entry(parse_state, right_bracket_pos):
224 entry = []
225 parse_message(parse_state, 'msgstr[', right_bracket_pos + 1, entry)
226 return entry
227
228 -def parse_msgstr_array_entry(parse_state, msgstr_dict):
229 line = parse_state.next_line
230 right_bracket_pos = find(line, ']', MSGSTR_ARRAY_ENTRY_LEN)
231 if right_bracket_pos >= 0:
232 entry = get_entry(parse_state, right_bracket_pos)
233 if len(entry) > 0:
234 add_to_dict(msgstr_dict, line, right_bracket_pos, entry)
235 return True
236 else:
237 return False
238 else:
239 return False
240
250
257
265
267 unit = unit or parse_state.UnitClass()
268 parsed_comments = parse_comments(parse_state, unit)
269 obsolete_unit = parse_obsolete(parse_state, unit)
270 if obsolete_unit is not None:
271 return obsolete_unit
272 parsed_msg_entries = parse_msg_entries(parse_state, unit)
273 if parsed_comments or parsed_msg_entries:
274 return unit
275 else:
276 return None
277
279 charset = None
280 if isinstance(unit.msgstr, list) and len(unit.msgstr) > 0 and isinstance(unit.msgstr[0], str):
281 charset = re.search("charset=([^\\s\\\\n]+)", "".join(unit.msgstr))
282 if charset:
283 encoding = charset.group(1)
284 if encoding != 'CHARSET':
285 store._encoding = encoding
286 else:
287 store._encoding = 'utf-8'
288 else:
289 store._encoding = 'utf-8'
290 parse_state.encoding = store._encoding
291
293 return [decode(item) for item in lst]
294
296 for attr in ('msgctxt', 'msgid', 'msgid_pluralcomments',
297 'msgid_plural', 'msgstr', 'obsoletemsgctxt',
298 'obsoletemsgid', 'obsoletemsgid_pluralcomments',
299 'obsoletemsgid_plural', 'obsoletemsgstr',
300 'othercomments', 'automaticcomments', 'sourcecomments',
301 'typecomments', 'msgidcomments', 'obsoletemsgidcomments'):
302 element = getattr(unit, attr)
303 if isinstance(element, list):
304 setattr(unit, attr, decode_list(element, decode))
305 else:
306 setattr(unit, attr, dict([(key, decode_list(value, decode)) for key, value in element.items()]))
307
315
317 unit = parse_header(parse_state, store)
318 while unit:
319 store.addunit(unit)
320 unit = parse_unit(parse_state)
321 return parse_state.eof
322