Package logilab-common-0 :: Package 39 :: Package 0 :: Module patricia
[frames] | no frames]

Source Code for Module logilab-common-0.39.0.patricia

  1  """A Python implementation of PATRICIA tree. 
  2   
  3  PATRICIA - Practical Algorithm to Retrieve Information Coded in Alphanumeric 
  4             D.R.Morrison (1968). 
  5  See http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Tree/PATRICIA.html if you 
  6  want to know what's a PATRICIA tree... 
  7   
  8  TODO: _ advanced search 
  9        _ profile code 
 10        _ use mxTextTools ? 
 11   
 12  :copyright: 2000-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
 13  :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr 
 14  :license: General Public License version 2 - http://www.gnu.org/licenses 
 15  """ 
 16  __docformat__ = "restructuredtext en" 
 17   
 18  from warnings import warn 
 19  warn('logilab.common.patricia module is deprecated and will disappear in a near release', 
 20       DeprecationWarning, stacklevel=2) 
 21   
22 -def prefix(prfx, string):
23 """return the index of the first character from string which differs from 24 prefix 25 """ 26 i = 0 27 while i < len(prfx): 28 if i == len(string) or prfx[i] != string[i]: 29 break 30 i += 1 31 return i
32
33 -def split(index, string):
34 """split a string on index, returning a 3-uple : 35 (string before index, character at index, string after index) 36 """ 37 return string[:index], string[index], string[index+1:]
38 39
40 -class PatriciaNode:
41 """a PATRICIA trie node 42 """ 43
44 - def __init__(self, value='', leaf=0, data=None):
45 self.value = value 46 self.edges = {} 47 if leaf: 48 self.datas = [data] 49 else: 50 self.datas = []
51
52 - def insert(self, string, data):
53 """ insert the string in the trie and associate data to it 54 if the string exists is the trie, data is added to the existing datas 55 """ 56 # are we arrived ? 57 if self.value == string: 58 self.datas.append(data) 59 # not yet ! 60 else: 61 # check we don't break compression (value don't match) 62 ind = prefix(self.value, string) 63 if ind < len(self.value): 64 # split this node 65 pfx, e, self.value = split(ind, self.value) 66 if ind < len(string): 67 n = PatriciaNode(pfx) 68 n.edges[string[ind]] = PatriciaNode(string[ind+1:], 1, data) 69 else: 70 n = PatriciaNode(pfx, 1, data) 71 n.edges[e] = self 72 return n 73 n_pfx, n_e, n_sfx = split(len(self.value), string) 74 if n_e in self.edges: 75 self.edges[n_e] = self.edges[n_e].insert(n_sfx, data) 76 else: 77 self.edges[n_e] = PatriciaNode(n_sfx, 1, data) 78 return self
79
80 - def remove(self, string):
81 """ return datas associated with string and remove string from the trie 82 raise KeyError if the key isn't found 83 FIXME: we should change the trie structure 84 """ 85 if string == self.value and self.datas: 86 datas = self.datas 87 self.datas = [] 88 return datas 89 else: 90 pfx, e, sfx = split(len(self.value), string) 91 if self.value == pfx: 92 return self.edges[e].remove(sfx) 93 raise KeyError(string)
94
95 - def lookup(self, string):
96 """ return datas associated with string 97 raise KeyError if the key isn't found 98 """ 99 if string == self.value: 100 if self.datas: 101 return self.datas 102 raise KeyError(string) 103 else: # len(self.value) < len(string): 104 pfx, e, sfx = split(len(self.value), string) 105 if self.value == pfx: 106 return self.edges[e].lookup(sfx) 107 raise KeyError(string)
108
109 - def pfx_search(self, pfx, depth=-1):
110 """ return all string with prefix pfx """ 111 sfxs = [] 112 if pfx and self.value[:len(pfx)] != pfx: 113 pfx, e, sfx = split(len(self.value), pfx) 114 if self.value == pfx and e in self.edges: 115 sfxs = ['%s%s%s' % (self.value, e, sfx) 116 for sfx in self.edges[e].pfx_search(sfx, depth)] 117 else: 118 if depth != 0: 119 for e, child in self.edges.items(): 120 search = child.pfx_search('', depth-1-len(self.value)) 121 sfxs += ['%s%s%s' % (self.value, e, sfx) 122 for sfx in search] 123 if (depth < 0 or len(self.value) <= depth): 124 if self.datas: 125 sfxs.append(self.value) 126 return sfxs
127
128 - def __str__(self, indent=''):
129 node_str = ''.join([' %s%s:\n%s' % (indent, key, 130 a.__str__(' %s' % indent)) 131 for key, a in self.edges.items()]) 132 return '%s%s, %s\n%s' % (indent, self.value, self.datas, node_str)
133
134 - def __repr__(self):
135 return '<PatriciaNode id=%s value=%s childs=%s datas=%s>' % ( 136 id(self), self.value, self.edges.keys(), self.datas)
137 138
139 -class PatriciaTrie:
140 """ wrapper class for a patricia tree 141 delegates to the root of the tree (PatriciaNode) 142 """ 143
144 - def __init__(self):
145 self._trie = None 146 self.words = 0
147
148 - def insert(self, string, data=None):
149 """ insert a string into the tree """ 150 self.words += 1 151 if self._trie is None: 152 self._trie = PatriciaNode(string, 1, data) 153 else: 154 self._trie = self._trie.insert(string, data)
155
156 - def remove(self, string):
157 """ remove a string from the tree """ 158 if self._trie is not None: 159 return self._trie.remove(string) 160 raise KeyError(string)
161
162 - def lookup(self, string):
163 """ look for a string into the tree """ 164 if self._trie is not None: 165 return self._trie.lookup(string) 166 raise KeyError(string)
167
168 - def pfx_search(self, string, depth=-1):
169 """ search all words begining by <string> """ 170 if self._trie is not None: 171 return self._trie.pfx_search(string, depth) 172 raise KeyError(string)
173
174 - def __str__(self):
175 return self._trie.__str__()
176
177 - def __repr__(self):
178 return '<PatriciaTrie id=%s words=%s>' % (id(self), self.words)
179