1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
34 file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
40 """
41
42 from translate.storage import base
43 from translate.misc.multistring import multistring
44 import codecs
45 import struct
46 import sys
47
48 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
49
51 """Helper to unpack Qt .qm files into a Python string"""
52 f = open(qmfile)
53 s = f.read()
54 print "\\x%02x"*len(s) % tuple(map(ord, s))
55 f.close()
56
57 -class qmunit(base.TranslationUnit):
58 """A class representing a .qm translation message."""
61
62 -class qmfile(base.TranslationStore):
63 """A class representing a .qm file."""
64 UnitClass = qmunit
65 Name = _("Qt .qm file")
66 Mimetypes = ["application/x-qm"]
67 Extensions = ["qm"]
68 _binary = True
76
78 """Output a string representation of the .qm data file"""
79 return ""
80
82 """parses the given file or file source string"""
83 if hasattr(input, 'name'):
84 self.filename = input.name
85 elif not getattr(self, 'filename', ''):
86 self.filename = ''
87 if hasattr(input, "read"):
88 qmsrc = input.read()
89 input.close()
90 input = qmsrc
91 if len(input) < 16:
92 raise ValueError("This is not a .qm file: file empty or too small")
93 magic = struct.unpack(">4L", input[:16])
94 if magic != QM_MAGIC_NUMBER:
95 raise ValueError("This is not a .qm file: invalid magic number")
96 startsection = 16
97 sectionheader = 5
98 while startsection < len(input):
99 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader])
100 if section_type == 0x42:
101
102 hashash = True
103 hash_start = startsection+sectionheader
104 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
105 elif section_type == 0x69:
106
107 hasmessages = True
108 messages_start = startsection+sectionheader
109 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
110 elif section_type == 0x2f:
111
112 hascontexts = True
113 contexts_start = startsection+sectionheader
114 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
115 startsection = startsection+sectionheader+length
116 pos = messages_start
117 source = target = None
118 while pos < messages_start + len(messages_data):
119 subsection, = struct.unpack(">b", input[pos:pos+1])
120 if subsection == 0x01:
121
122 pos = pos+1
123 if not source is None and not target is None:
124 newunit = self.addsourceunit(source)
125 newunit.target = target
126 source = target = None
127 else:
128 raise ValueError("Old .qm format with no source defined")
129 continue
130
131 pos = pos+1
132 length, = struct.unpack(">l", input[pos:pos+4])
133 if subsection == 0x03:
134 if length != -1:
135 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length])
136 string, templen = codecs.utf_16_be_decode(raw)
137 if target:
138 target.strings.append(string)
139 else:
140 target = multistring(string)
141 pos = pos+4+length
142 else:
143 target = ""
144 pos = pos+4
145
146 elif subsection == 0x06:
147 source = input[pos+4:pos+4+length].decode('iso-8859-1')
148
149 pos = pos+4+length
150 elif subsection == 0x07:
151 context = input[pos+4:pos+4+length].decode('iso-8859-1')
152
153 pos = pos+4+length
154 elif subsection == 0x08:
155 comment = input[pos+4:pos+4+length]
156
157 pos = pos+4+length
158 elif subsection == 0x05:
159 hash = input[pos:pos+4]
160
161 pos = pos+4
162 else:
163 if subsection == 0x02:
164 subsection_name = "SourceText16"
165 elif subsection == 0x04:
166 subsection_name = "Context16"
167 else:
168 subsection_name = "Unkown"
169 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name)
170 return
171
173 raise Exception("Writing of .qm files is not supported yet")
174