1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
24
25 from translate.storage import base
26 from translate.misc import quote
27
28 import re
29 import warnings
30 try:
31 from lxml import etree
32 import StringIO
33 except ImportError:
34 etree = None
35
36 labelsuffixes = (".label", ".title")
37 """Label suffixes: entries with this suffix are able to be comibed with accesskeys
38 found in in entries ending with L{accesskeysuffixes}"""
39 accesskeysuffixes = (".accesskey", ".accessKey", ".akey")
40 """Accesskey Suffixes: entries with this suffix may be combined with labels
41 ending in L{labelsuffixes} into accelerator notation"""
42
51
64
66 """Find and remove ampersands that are not part of an entity definition.
67
68 A stray & in a DTD file can break an applications ability to parse the file. In Mozilla
69 localisation this is very important and these can break the parsing of files used in XUL
70 and thus break interface rendering. Tracking down the problem is very difficult,
71 thus by removing potential broken & and warning the users we can ensure that the output
72 DTD will always be parsable.
73
74 @type name: String
75 @param name: Entity name
76 @type value: String
77 @param value: Entity text value
78 @rtype: String
79 @return: Entity value without bad ampersands
80 """
81 def is_valid_entity_name(name):
82 """Check that supplied L{name} is a valid entity name"""
83 if name.replace('.', '').isalnum():
84 return True
85 elif name[0] == '#' and name[1:].isalnum():
86 return True
87 return False
88
89 amppos = 0
90 invalid_amps = []
91 while amppos >= 0:
92 amppos = value.find("&", amppos)
93 if amppos != -1:
94 amppos += 1
95 semipos = value.find(";", amppos)
96 if semipos != -1:
97 if is_valid_entity_name(value[amppos:semipos]):
98 continue
99 invalid_amps.append(amppos-1)
100 if len(invalid_amps) > 0:
101 warnings.warn("invalid ampersands in dtd entity %s" % (name))
102 adjustment = 0
103 for amppos in invalid_amps:
104 value = value[:amppos-adjustment] + value[amppos-adjustment+1:]
105 adjustment += 1
106 return value
107
108 -class dtdunit(base.TranslationUnit):
109 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
111 """construct the dtdunit, prepare it for parsing"""
112 super(dtdunit, self).__init__(source)
113 self.comments = []
114 self.unparsedlines = []
115 self.incomment = False
116 self.inentity = False
117 self.entity = "FakeEntityOnlyForInitialisationAndTesting"
118 self.source = source
119
120
122 """Sets the definition to the quoted value of source"""
123 self.definition = quotefordtd(source)
124
126 """gets the unquoted source string"""
127 return unquotefromdtd(self.definition)
128 source = property(getsource, setsource)
129
135
137 """gets the unquoted target string"""
138 return unquotefromdtd(self.definition)
139 target = property(gettarget, settarget)
140
142 """returns whether this dtdunit doesn't actually have an entity definition"""
143
144
145 return self.entity is None
146
147 - def parse(self, dtdsrc):
148 """read the first dtd element from the source code into this object, return linesprocessed"""
149 self.comments = []
150
151 self.locfilenotes = self.comments
152 self.locgroupstarts = self.comments
153 self.locgroupends = self.comments
154 self.locnotes = self.comments
155
156
157
158
159
160 self.entity = None
161 self.definition = ''
162 if not dtdsrc:
163 return 0
164 lines = dtdsrc.split("\n")
165 linesprocessed = 0
166 comment = ""
167 for line in lines:
168 line += "\n"
169 linesprocessed += 1
170
171 if not self.incomment:
172 if (line.find('<!--') != -1):
173 self.incomment = True
174 self.continuecomment = False
175
176 (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
177 if comment.find('LOCALIZATION NOTE') != -1:
178 l = quote.findend(comment,'LOCALIZATION NOTE')
179 while (comment[l] == ' '):
180 l += 1
181 if comment.find('FILE', l) == l:
182 self.commenttype = "locfile"
183 elif comment.find('BEGIN', l) == l:
184 self.commenttype = "locgroupstart"
185 elif comment.find('END', l) == l:
186 self.commenttype = "locgroupend"
187 else:
188 self.commenttype = "locnote"
189 else:
190
191 self.commenttype = "comment"
192
193 elif not self.inentity and re.search("%.*;", line):
194
195 self.comments.append(("comment", line))
196 line = ""
197 continue
198
199 if self.incomment:
200
201 (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
202
203 self.continuecomment = self.incomment
204
205 line = line.replace(comment, "", 1)
206
207 if not self.incomment:
208 if line.isspace():
209 comment += line
210 line = ''
211 else:
212 comment += '\n'
213
214
215
216
217
218
219
220 commentpair = (self.commenttype, comment)
221 if self.commenttype == "locfile":
222 self.locfilenotes.append(commentpair)
223 elif self.commenttype == "locgroupstart":
224 self.locgroupstarts.append(commentpair)
225 elif self.commenttype == "locgroupend":
226 self.locgroupends.append(commentpair)
227 elif self.commenttype == "locnote":
228 self.locnotes.append(commentpair)
229 elif self.commenttype == "comment":
230 self.comments.append(commentpair)
231
232 if not self.inentity and not self.incomment:
233 entitypos = line.find('<!ENTITY')
234 if entitypos != -1:
235 self.inentity = True
236 beforeentity = line[:entitypos].strip()
237 if beforeentity.startswith("#"):
238 self.hashprefix = beforeentity
239 self.entitypart = "start"
240 else:
241 self.unparsedlines.append(line)
242
243 if self.inentity:
244 if self.entitypart == "start":
245
246 e = quote.findend(line,'<!ENTITY')
247 line = line[e:]
248 self.entitypart = "name"
249 self.entitytype = "internal"
250 if self.entitypart == "name":
251 e = 0
252 while (e < len(line) and line[e].isspace()):
253 e += 1
254 self.entity = ''
255 if (e < len(line) and line[e] == '%'):
256 self.entitytype = "external"
257 self.entityparameter = ""
258 e += 1
259 while (e < len(line) and line[e].isspace()):
260 e += 1
261 while (e < len(line) and not line[e].isspace()):
262 self.entity += line[e]
263 e += 1
264 while (e < len(line) and line[e].isspace()):
265 e += 1
266 if self.entity:
267 if self.entitytype == "external":
268 self.entitypart = "parameter"
269 else:
270 self.entitypart = "definition"
271
272 if e == len(line):
273 self.entityhelp = None
274 e = 0
275 continue
276 elif self.entitypart == "definition":
277 self.entityhelp = (e, line[e])
278 self.instring = False
279 if self.entitypart == "parameter":
280 while (e < len(line) and line[e].isspace()):
281 e += 1
282 paramstart = e
283 while (e < len(line) and line[e].isalnum()):
284 e += 1
285 self.entityparameter += line[paramstart:e]
286 while (e < len(line) and line[e].isspace()):
287 e += 1
288 line = line[e:]
289 e = 0
290 if not line:
291 continue
292 if line[0] in ('"', "'"):
293 self.entitypart = "definition"
294 self.entityhelp = (e, line[e])
295 self.instring = False
296 if self.entitypart == "definition":
297 if self.entityhelp is None:
298 e = 0
299 while (e < len(line) and line[e].isspace()):
300 e += 1
301 if e == len(line):
302 continue
303 self.entityhelp = (e, line[e])
304 self.instring = False
305
306 e = self.entityhelp[0]
307 if (self.entityhelp[1] == "'"):
308 (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
309 elif (self.entityhelp[1] == '"'):
310 (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
311 else:
312 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
313
314 self.entityhelp = (0, self.entityhelp[1])
315 self.definition += defpart
316 if not self.instring:
317 self.inentity = False
318 break
319
320
321 if 0:
322 for attr in dir(self):
323 r = repr(getattr(self, attr))
324 if len(r) > 60:
325 r = r[:57]+"..."
326 self.comments.append(("comment", "self.%s = %s" % (attr, r) ))
327 return linesprocessed
328
335
337 """convert the dtd entity back to string form"""
338 lines = []
339 lines.extend([comment for commenttype, comment in self.comments])
340 lines.extend(self.unparsedlines)
341 if self.isnull():
342 result = "".join(lines)
343 return result.rstrip() + "\n"
344
345
346
347
348 if len(self.entity) > 0:
349 if getattr(self, 'entitytype', None) == 'external':
350 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>'
351 else:
352 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>'
353 if getattr(self, 'hashprefix', None):
354 entityline = self.hashprefix + " " + entityline
355 if isinstance(entityline, unicode):
356 entityline = entityline.encode('UTF-8')
357 lines.append(entityline+'\n')
358 return "".join(lines)
359
360 -class dtdfile(base.TranslationStore):
361 """this class represents a .dtd file, made up of dtdunits"""
362 UnitClass = dtdunit
364 """construct a dtdfile, optionally reading in from inputfile"""
365 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
366 self.filename = getattr(inputfile, 'name', '')
367 if inputfile is not None:
368 dtdsrc = inputfile.read()
369 self.parse(dtdsrc)
370 self.makeindex()
371
372 - def parse(self, dtdsrc):
373 """read the source code of a dtd file in and include them as dtdunits in self.units"""
374 start = 0
375 end = 0
376 lines = dtdsrc.split("\n")
377 while end < len(lines):
378 if (start == end):
379 end += 1
380 foundentity = False
381 while end < len(lines):
382 if end >= len(lines):
383 break
384 if lines[end].find('<!ENTITY') > -1:
385 foundentity = True
386 if foundentity and re.match("[\"']\s*>", lines[end]):
387 end += 1
388 break
389 end += 1
390
391
392 linesprocessed = 1
393 while linesprocessed >= 1:
394 newdtd = dtdunit()
395 try:
396 linesprocessed = newdtd.parse("\n".join(lines[start:end]))
397 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines):
398 self.units.append(newdtd)
399 except Exception, e:
400 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end])))
401 start += linesprocessed
402
404 """convert to a string. double check that unicode is handled somehow here"""
405 source = self.getoutput()
406 if not self._valid_store():
407 warnings.warn("DTD file '%s' does not validate" % self.filename)
408 return None
409 if isinstance(source, unicode):
410 return source.encode(getattr(self, "encoding", "UTF-8"))
411 return source
412
414 """convert the units back to source"""
415 sources = [str(dtd) for dtd in self.units]
416 return "".join(sources)
417
419 """makes self.index dictionary keyed on entities"""
420 self.index = {}
421 for dtd in self.units:
422 if not dtd.isnull():
423 self.index[dtd.entity] = dtd
424
426 """Validate the store to determine if it is valid
427
428 This uses ElementTree to parse the DTD
429
430 @return: If the store passes validation
431 @rtype: Boolean
432 """
433 if etree is not None:
434 try:
435
436 dtd = etree.DTD(StringIO.StringIO(re.sub("#expand", "", self.getoutput())))
437 except etree.DTDParseError:
438 return False
439 return True
440