Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26   
27 -def spacestart(str1):
28 """returns all the whitespace from the start of the string""" 29 newstring = "" 30 for c in str1: 31 if not c.isspace(): return newstring 32 else: newstring += c 33 return newstring
34
35 -def spaceend(str1):
36 """returns all the whitespace from the end of the string""" 37 newstring = "" 38 for n in range(len(str1)): 39 c = str1[-1-n] 40 if not c.isspace(): return newstring 41 else: newstring = c + newstring 42 return newstring
43
44 -def puncstart(str1, punctuation):
45 """returns all the punctuation from the start of the string""" 46 newstring = "" 47 for c in str1: 48 if c not in punctuation and not c.isspace(): return newstring 49 else: newstring += c 50 return newstring
51
52 -def puncend(str1, punctuation):
53 """returns all the punctuation from the end of the string""" 54 newstring = "" 55 for n in range(len(str1)): 56 c = str1[-1-n] 57 if c not in punctuation and not c.isspace(): return newstring 58 else: newstring = c + newstring 59 return newstring
60
61 -def ispurepunctuation(str1):
62 """checks whether the string is entirely punctuation""" 63 for c in str1: 64 if c.isalpha(): return 0 65 return len(str1)
66
67 -def isvalidaccelerator(accelerator, ignorelist=[]):
68 """returns whether the given accelerator string is a valid one...""" 69 if len(accelerator) == 0 or accelerator in ignorelist: 70 return 0 71 accelerator = accelerator.replace("_","") 72 if not accelerator.isalnum(): 73 return False 74 75 # We don't want to have accelerators on characters with diacritics, so let's 76 # see if the character can decompose. 77 decomposition = unicodedata.decomposition(accelerator) 78 # Next we strip out any extra information like <this> 79 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 80 return decomposition.count(" ") == 0
81
82 -def findaccelerators(str1, accelmarker, ignorelist=[]):
83 """returns all the accelerators and locations in str1 marked with a given marker""" 84 accelerators = [] 85 badaccelerators = [] 86 currentpos = 0 87 while currentpos >= 0: 88 currentpos = str1.find(accelmarker, currentpos) 89 if currentpos >= 0: 90 accelstart = currentpos 91 currentpos += len(accelmarker) 92 # we assume accelerators are single characters 93 accelend = currentpos + 1 94 if accelend > len(str1): break 95 accelerator = str1[currentpos:accelend] 96 currentpos = accelend 97 if isvalidaccelerator(accelerator, ignorelist): 98 accelerators.append((accelstart, accelerator)) 99 else: 100 badaccelerators.append((accelstart, accelerator)) 101 return accelerators, badaccelerators
102
103 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
104 """returns all the variables and locations in str1 marked with a given marker""" 105 variables = [] 106 currentpos = 0 107 while currentpos >= 0: 108 variable = None 109 currentpos = str1.find(startmarker, currentpos) 110 if currentpos >= 0: 111 startmatch = currentpos 112 currentpos += len(startmarker) 113 if endmarker is None: 114 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 115 endmatch = currentpos 116 for n in range(currentpos, len(str1)): 117 if not str1[n].isalnum(): 118 endmatch = n 119 break 120 if currentpos == endmatch: endmatch = len(str1) 121 if currentpos < endmatch: 122 variable = str1[currentpos:endmatch] 123 currentpos = endmatch 124 elif type(endmarker) == int: 125 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 126 endmatch = currentpos + endmarker 127 if endmatch > len(str1): break 128 variable = str1[currentpos:endmatch] 129 currentpos = endmatch 130 else: 131 endmatch = str1.find(endmarker, currentpos) 132 if endmatch == -1: break 133 # search backwards in case there's an intervening startmarker (if not it's OK)... 134 start2 = str1.rfind(startmarker, currentpos, endmatch) 135 if start2 != -1: 136 startmatch2 = start2 137 start2 += len(startmarker) 138 if start2 != currentpos: 139 currentpos = start2 140 startmatch = startmatch2 141 variable = str1[currentpos:endmatch] 142 currentpos = endmatch + len(endmarker) 143 if variable is not None and variable not in ignorelist: 144 if not variable or variable.replace("_","").replace(".","").isalnum(): 145 variables.append((startmatch, variable)) 146 return variables
147
148 -def getaccelerators(accelmarker, ignorelist=[]):
149 """returns a function that gets a list of accelerators marked using accelmarker""" 150 def getmarkedaccelerators(str1): 151 """returns all the accelerators in str1 marked with a given marker""" 152 acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist) 153 accelerators = [accelerator for accelstart, accelerator in acclocs] 154 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 155 return accelerators, badaccelerators
156 return getmarkedaccelerators 157
158 -def getvariables(startmarker, endmarker):
159 """returns a function that gets a list of variables marked using startmarker and endmarker""" 160 def getmarkedvariables(str1): 161 """returns all the variables in str1 marked with a given marker""" 162 varlocs = findmarkedvariables(str1, startmarker, endmarker) 163 variables = [variable for accelstart, variable in varlocs] 164 return variables
165 return getmarkedvariables 166
167 -def getnumbers(str1):
168 """returns any numbers that are in the string""" 169 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 170 numbers = [] 171 innumber = False 172 try: 173 wasstr = isinstance(str1, str) 174 if wasstr: 175 str1 = str1.decode('utf8') 176 degreesign = u'\xb0' 177 except: 178 degreesign = None 179 lastnumber = "" 180 carryperiod = "" 181 for chr1 in str1: 182 if chr1.isdigit(): 183 innumber = True 184 elif innumber: 185 if not (chr1 == '.' or chr1 == degreesign): 186 innumber = False 187 if lastnumber: 188 numbers.append(lastnumber) 189 lastnumber = "" 190 if innumber: 191 if chr1 == degreesign: 192 lastnumber += chr1 193 elif chr1 == '.': 194 carryperiod += chr1 195 else: 196 lastnumber += carryperiod + chr1 197 carryperiod = "" 198 else: 199 carryperiod = "" 200 if innumber: 201 if lastnumber: 202 numbers.append(lastnumber) 203 if wasstr and degreesign: 204 numbers = [number.encode('utf8') for number in numbers] 205 return numbers
206
207 -def getfunctions(str1, punctuation):
208 """returns the functions() that are in a string, while ignoring the trailing 209 punctuation in the given parameter""" 210 punctuation = punctuation.replace("(", "").replace(")", "") 211 return [word.rstrip(punctuation) for word in str1.split() if word.rstrip(punctuation).endswith("()")]
212
213 -def getemails(str1):
214 """returns the email addresses that are in a string""" 215 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
216
217 -def geturls(str1):
218 """returns the URIs in a string""" 219 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 220 'ftp:[\w/\.:;+\-~\%#?=&,]+' 221 return re.findall(URLPAT, str1)
222
223 -def countaccelerators(accelmarker, ignorelist=[]):
224 """returns a function that counts the number of accelerators marked with the given marker""" 225 def countmarkedaccelerators(str1): 226 """returns all the variables in str1 marked with a given marker""" 227 acclocs, badlocs = findaccelerators(str1, accelmarker, ignorelist) 228 return len(acclocs), len(badlocs)
229 return countmarkedaccelerators 230