Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26  from translate.lang import data 
 27   
28 -def spacestart(str1):
29 """returns all the whitespace from the start of the string""" 30 newstring = "" 31 for c in str1: 32 if not c.isspace(): return newstring 33 else: newstring += c 34 return newstring
35
36 -def spaceend(str1):
37 """returns all the whitespace from the end of the string""" 38 newstring = "" 39 for n in range(len(str1)): 40 c = str1[-1-n] 41 if not c.isspace(): return newstring 42 else: newstring = c + newstring 43 return newstring
44
45 -def puncstart(str1, punctuation):
46 """returns all the punctuation from the start of the string""" 47 newstring = "" 48 for c in str1: 49 if c not in punctuation and not c.isspace(): return newstring 50 else: newstring += c 51 return newstring
52
53 -def puncend(str1, punctuation):
54 """returns all the punctuation from the end of the string""" 55 newstring = "" 56 for n in range(len(str1)): 57 c = str1[-1-n] 58 if c not in punctuation and not c.isspace(): return newstring 59 else: newstring = c + newstring 60 return newstring
61
62 -def ispurepunctuation(str1):
63 """checks whether the string is entirely punctuation""" 64 for c in str1: 65 if c.isalnum(): 66 return False 67 return len(str1)
68
69 -def isvalidaccelerator(accelerator, acceptlist=None):
70 """returns whether the given accelerator character is valid 71 72 @type accelerator: character 73 @param accelerator: A character to be checked for accelerator validity 74 @type acceptlist: String 75 @param acceptlist: A list of characters that are permissible as accelerators 76 @rtype: Boolean 77 @return: True if the supplied character is an acceptable accelerator 78 """ 79 assert isinstance(accelerator, unicode) 80 assert isinstance(acceptlist, unicode) or acceptlist is None 81 if len(accelerator) == 0: 82 return False 83 if acceptlist is not None: 84 acceptlist = data.normalize(acceptlist) 85 if accelerator in acceptlist: 86 return True 87 return False 88 else: 89 # Old code path - ensures that we don't get a large number of regressions 90 accelerator = accelerator.replace("_","") 91 if accelerator in u"-?": 92 return True 93 if not accelerator.isalnum(): 94 return False 95 96 # We don't want to have accelerators on characters with diacritics, so let's 97 # see if the character can decompose. 98 decomposition = unicodedata.decomposition(accelerator) 99 # Next we strip out any extra information like <this> 100 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 101 return decomposition.count(" ") == 0
102
103 -def findaccelerators(str1, accelmarker, acceptlist=None):
104 """returns all the accelerators and locations in str1 marked with a given marker""" 105 accelerators = [] 106 badaccelerators = [] 107 currentpos = 0 108 while currentpos >= 0: 109 currentpos = str1.find(accelmarker, currentpos) 110 if currentpos >= 0: 111 accelstart = currentpos 112 currentpos += len(accelmarker) 113 # we assume accelerators are single characters 114 accelend = currentpos + 1 115 if accelend > len(str1): break 116 accelerator = str1[currentpos:accelend] 117 currentpos = accelend 118 if isvalidaccelerator(accelerator, acceptlist): 119 accelerators.append((accelstart, accelerator)) 120 else: 121 badaccelerators.append((accelstart, accelerator)) 122 return accelerators, badaccelerators
123
124 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
125 """returns all the variables and locations in str1 marked with a given marker""" 126 variables = [] 127 currentpos = 0 128 while currentpos >= 0: 129 variable = None 130 currentpos = str1.find(startmarker, currentpos) 131 if currentpos >= 0: 132 startmatch = currentpos 133 currentpos += len(startmarker) 134 if endmarker is None: 135 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 136 endmatch = currentpos 137 for n in range(currentpos, len(str1)): 138 if not (str1[n].isalnum() or str1[n] == '_'): 139 endmatch = n 140 break 141 if currentpos == endmatch: endmatch = len(str1) 142 if currentpos < endmatch: 143 variable = str1[currentpos:endmatch] 144 currentpos = endmatch 145 elif type(endmarker) == int: 146 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 147 endmatch = currentpos + endmarker 148 if endmatch > len(str1): break 149 variable = str1[currentpos:endmatch] 150 currentpos = endmatch 151 else: 152 endmatch = str1.find(endmarker, currentpos) 153 if endmatch == -1: break 154 # search backwards in case there's an intervening startmarker (if not it's OK)... 155 start2 = str1.rfind(startmarker, currentpos, endmatch) 156 if start2 != -1: 157 startmatch2 = start2 158 start2 += len(startmarker) 159 if start2 != currentpos: 160 currentpos = start2 161 startmatch = startmatch2 162 variable = str1[currentpos:endmatch] 163 currentpos = endmatch + len(endmarker) 164 if variable is not None and variable not in ignorelist: 165 if not variable or variable.replace("_","").replace(".","").isalnum(): 166 variables.append((startmatch, variable)) 167 return variables
168
169 -def getaccelerators(accelmarker, acceptlist=None):
170 """returns a function that gets a list of accelerators marked using accelmarker""" 171 def getmarkedaccelerators(str1): 172 """returns all the accelerators in str1 marked with a given marker""" 173 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 174 accelerators = [accelerator for accelstart, accelerator in acclocs] 175 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 176 return accelerators, badaccelerators
177 return getmarkedaccelerators 178
179 -def getvariables(startmarker, endmarker):
180 """returns a function that gets a list of variables marked using startmarker and endmarker""" 181 def getmarkedvariables(str1): 182 """returns all the variables in str1 marked with a given marker""" 183 varlocs = findmarkedvariables(str1, startmarker, endmarker) 184 variables = [variable for accelstart, variable in varlocs] 185 return variables
186 return getmarkedvariables 187
188 -def getnumbers(str1):
189 """returns any numbers that are in the string""" 190 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 191 assert isinstance(str1, unicode) 192 numbers = [] 193 innumber = False 194 degreesign = u'\xb0' 195 lastnumber = "" 196 carryperiod = "" 197 for chr1 in str1: 198 if chr1.isdigit(): 199 innumber = True 200 elif innumber: 201 if not (chr1 == '.' or chr1 == degreesign): 202 innumber = False 203 if lastnumber: 204 numbers.append(lastnumber) 205 lastnumber = "" 206 if innumber: 207 if chr1 == degreesign: 208 lastnumber += chr1 209 elif chr1 == '.': 210 carryperiod += chr1 211 else: 212 lastnumber += carryperiod + chr1 213 carryperiod = "" 214 else: 215 carryperiod = "" 216 if innumber: 217 if lastnumber: 218 numbers.append(lastnumber) 219 return numbers
220
221 -def getfunctions(str1, punctuation):
222 """returns the functions() that are in a string, while ignoring the trailing 223 punctuation in the given parameter""" 224 punctuation = punctuation.replace("(", "").replace(")", "") 225 return [word.rstrip(punctuation) for word in str1.split() if word.rstrip(punctuation).endswith("()")]
226
227 -def getemails(str1):
228 """returns the email addresses that are in a string""" 229 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
230
231 -def geturls(str1):
232 """returns the URIs in a string""" 233 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 234 'ftp:[\w/\.:;+\-~\%#?=&,]+' 235 return re.findall(URLPAT, str1)
236
237 -def countaccelerators(accelmarker, acceptlist=None):
238 """returns a function that counts the number of accelerators marked with the given marker""" 239 def countmarkedaccelerators(str1): 240 """returns all the variables in str1 marked with a given marker""" 241 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 242 return len(acclocs), len(badlocs)
243 return countmarkedaccelerators 244