1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26 from translate.lang import data
27
29 """returns all the whitespace from the start of the string"""
30 newstring = ""
31 for c in str1:
32 if not c.isspace(): return newstring
33 else: newstring += c
34 return newstring
35
37 """returns all the whitespace from the end of the string"""
38 newstring = ""
39 for n in range(len(str1)):
40 c = str1[-1-n]
41 if not c.isspace(): return newstring
42 else: newstring = c + newstring
43 return newstring
44
46 """returns all the punctuation from the start of the string"""
47 newstring = ""
48 for c in str1:
49 if c not in punctuation and not c.isspace(): return newstring
50 else: newstring += c
51 return newstring
52
54 """returns all the punctuation from the end of the string"""
55 newstring = ""
56 for n in range(len(str1)):
57 c = str1[-1-n]
58 if c not in punctuation and not c.isspace(): return newstring
59 else: newstring = c + newstring
60 return newstring
61
63 """checks whether the string is entirely punctuation"""
64 for c in str1:
65 if c.isalnum():
66 return False
67 return len(str1)
68
70 """returns whether the given accelerator character is valid
71
72 @type accelerator: character
73 @param accelerator: A character to be checked for accelerator validity
74 @type acceptlist: String
75 @param acceptlist: A list of characters that are permissible as accelerators
76 @rtype: Boolean
77 @return: True if the supplied character is an acceptable accelerator
78 """
79 assert isinstance(accelerator, unicode)
80 assert isinstance(acceptlist, unicode) or acceptlist is None
81 if len(accelerator) == 0:
82 return False
83 if acceptlist is not None:
84 acceptlist = data.normalize(acceptlist)
85 if accelerator in acceptlist:
86 return True
87 return False
88 else:
89
90 accelerator = accelerator.replace("_","")
91 if accelerator in u"-?":
92 return True
93 if not accelerator.isalnum():
94 return False
95
96
97
98 decomposition = unicodedata.decomposition(accelerator)
99
100 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
101 return decomposition.count(" ") == 0
102
104 """returns all the accelerators and locations in str1 marked with a given marker"""
105 accelerators = []
106 badaccelerators = []
107 currentpos = 0
108 while currentpos >= 0:
109 currentpos = str1.find(accelmarker, currentpos)
110 if currentpos >= 0:
111 accelstart = currentpos
112 currentpos += len(accelmarker)
113
114 accelend = currentpos + 1
115 if accelend > len(str1): break
116 accelerator = str1[currentpos:accelend]
117 currentpos = accelend
118 if isvalidaccelerator(accelerator, acceptlist):
119 accelerators.append((accelstart, accelerator))
120 else:
121 badaccelerators.append((accelstart, accelerator))
122 return accelerators, badaccelerators
123
125 """returns all the variables and locations in str1 marked with a given marker"""
126 variables = []
127 currentpos = 0
128 while currentpos >= 0:
129 variable = None
130 currentpos = str1.find(startmarker, currentpos)
131 if currentpos >= 0:
132 startmatch = currentpos
133 currentpos += len(startmarker)
134 if endmarker is None:
135
136 endmatch = currentpos
137 for n in range(currentpos, len(str1)):
138 if not (str1[n].isalnum() or str1[n] == '_'):
139 endmatch = n
140 break
141 if currentpos == endmatch: endmatch = len(str1)
142 if currentpos < endmatch:
143 variable = str1[currentpos:endmatch]
144 currentpos = endmatch
145 elif type(endmarker) == int:
146
147 endmatch = currentpos + endmarker
148 if endmatch > len(str1): break
149 variable = str1[currentpos:endmatch]
150 currentpos = endmatch
151 else:
152 endmatch = str1.find(endmarker, currentpos)
153 if endmatch == -1: break
154
155 start2 = str1.rfind(startmarker, currentpos, endmatch)
156 if start2 != -1:
157 startmatch2 = start2
158 start2 += len(startmarker)
159 if start2 != currentpos:
160 currentpos = start2
161 startmatch = startmatch2
162 variable = str1[currentpos:endmatch]
163 currentpos = endmatch + len(endmarker)
164 if variable is not None and variable not in ignorelist:
165 if not variable or variable.replace("_","").replace(".","").isalnum():
166 variables.append((startmatch, variable))
167 return variables
168
170 """returns a function that gets a list of accelerators marked using accelmarker"""
171 def getmarkedaccelerators(str1):
172 """returns all the accelerators in str1 marked with a given marker"""
173 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
174 accelerators = [accelerator for accelstart, accelerator in acclocs]
175 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
176 return accelerators, badaccelerators
177 return getmarkedaccelerators
178
180 """returns a function that gets a list of variables marked using startmarker and endmarker"""
181 def getmarkedvariables(str1):
182 """returns all the variables in str1 marked with a given marker"""
183 varlocs = findmarkedvariables(str1, startmarker, endmarker)
184 variables = [variable for accelstart, variable in varlocs]
185 return variables
186 return getmarkedvariables
187
189 """returns any numbers that are in the string"""
190
191 assert isinstance(str1, unicode)
192 numbers = []
193 innumber = False
194 degreesign = u'\xb0'
195 lastnumber = ""
196 carryperiod = ""
197 for chr1 in str1:
198 if chr1.isdigit():
199 innumber = True
200 elif innumber:
201 if not (chr1 == '.' or chr1 == degreesign):
202 innumber = False
203 if lastnumber:
204 numbers.append(lastnumber)
205 lastnumber = ""
206 if innumber:
207 if chr1 == degreesign:
208 lastnumber += chr1
209 elif chr1 == '.':
210 carryperiod += chr1
211 else:
212 lastnumber += carryperiod + chr1
213 carryperiod = ""
214 else:
215 carryperiod = ""
216 if innumber:
217 if lastnumber:
218 numbers.append(lastnumber)
219 return numbers
220
226
228 """returns the email addresses that are in a string"""
229 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
230
232 """returns the URIs in a string"""
233 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
234 'ftp:[\w/\.:;+\-~\%#?=&,]+'
235 return re.findall(URLPAT, str1)
236
238 """returns a function that counts the number of accelerators marked with the given marker"""
239 def countmarkedaccelerators(str1):
240 """returns all the variables in str1 marked with a given marker"""
241 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
242 return len(acclocs), len(badlocs)
243 return countmarkedaccelerators
244