Package logilab-common-0 ::
Package 39 ::
Package 0 ::
Module patricia
|
|
1 """A Python implementation of PATRICIA tree.
2
3 PATRICIA - Practical Algorithm to Retrieve Information Coded in Alphanumeric
4 D.R.Morrison (1968).
5 See http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Tree/PATRICIA.html if you
6 want to know what's a PATRICIA tree...
7
8 TODO: _ advanced search
9 _ profile code
10 _ use mxTextTools ?
11
12 :copyright: 2000-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
13 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
14 :license: General Public License version 2 - http://www.gnu.org/licenses
15 """
16 __docformat__ = "restructuredtext en"
17
18 from warnings import warn
19 warn('logilab.common.patricia module is deprecated and will disappear in a near release',
20 DeprecationWarning, stacklevel=2)
21
23 """return the index of the first character from string which differs from
24 prefix
25 """
26 i = 0
27 while i < len(prfx):
28 if i == len(string) or prfx[i] != string[i]:
29 break
30 i += 1
31 return i
32
34 """split a string on index, returning a 3-uple :
35 (string before index, character at index, string after index)
36 """
37 return string[:index], string[index], string[index+1:]
38
39
41 """a PATRICIA trie node
42 """
43
44 - def __init__(self, value='', leaf=0, data=None):
45 self.value = value
46 self.edges = {}
47 if leaf:
48 self.datas = [data]
49 else:
50 self.datas = []
51
52 - def insert(self, string, data):
53 """ insert the string in the trie and associate data to it
54 if the string exists is the trie, data is added to the existing datas
55 """
56
57 if self.value == string:
58 self.datas.append(data)
59
60 else:
61
62 ind = prefix(self.value, string)
63 if ind < len(self.value):
64
65 pfx, e, self.value = split(ind, self.value)
66 if ind < len(string):
67 n = PatriciaNode(pfx)
68 n.edges[string[ind]] = PatriciaNode(string[ind+1:], 1, data)
69 else:
70 n = PatriciaNode(pfx, 1, data)
71 n.edges[e] = self
72 return n
73 n_pfx, n_e, n_sfx = split(len(self.value), string)
74 if n_e in self.edges:
75 self.edges[n_e] = self.edges[n_e].insert(n_sfx, data)
76 else:
77 self.edges[n_e] = PatriciaNode(n_sfx, 1, data)
78 return self
79
81 """ return datas associated with string and remove string from the trie
82 raise KeyError if the key isn't found
83 FIXME: we should change the trie structure
84 """
85 if string == self.value and self.datas:
86 datas = self.datas
87 self.datas = []
88 return datas
89 else:
90 pfx, e, sfx = split(len(self.value), string)
91 if self.value == pfx:
92 return self.edges[e].remove(sfx)
93 raise KeyError(string)
94
96 """ return datas associated with string
97 raise KeyError if the key isn't found
98 """
99 if string == self.value:
100 if self.datas:
101 return self.datas
102 raise KeyError(string)
103 else:
104 pfx, e, sfx = split(len(self.value), string)
105 if self.value == pfx:
106 return self.edges[e].lookup(sfx)
107 raise KeyError(string)
108
110 """ return all string with prefix pfx """
111 sfxs = []
112 if pfx and self.value[:len(pfx)] != pfx:
113 pfx, e, sfx = split(len(self.value), pfx)
114 if self.value == pfx and e in self.edges:
115 sfxs = ['%s%s%s' % (self.value, e, sfx)
116 for sfx in self.edges[e].pfx_search(sfx, depth)]
117 else:
118 if depth != 0:
119 for e, child in self.edges.items():
120 search = child.pfx_search('', depth-1-len(self.value))
121 sfxs += ['%s%s%s' % (self.value, e, sfx)
122 for sfx in search]
123 if (depth < 0 or len(self.value) <= depth):
124 if self.datas:
125 sfxs.append(self.value)
126 return sfxs
127
129 node_str = ''.join([' %s%s:\n%s' % (indent, key,
130 a.__str__(' %s' % indent))
131 for key, a in self.edges.items()])
132 return '%s%s, %s\n%s' % (indent, self.value, self.datas, node_str)
133
135 return '<PatriciaNode id=%s value=%s childs=%s datas=%s>' % (
136 id(self), self.value, self.edges.keys(), self.datas)
137
138
140 """ wrapper class for a patricia tree
141 delegates to the root of the tree (PatriciaNode)
142 """
143
145 self._trie = None
146 self.words = 0
147
148 - def insert(self, string, data=None):
149 """ insert a string into the tree """
150 self.words += 1
151 if self._trie is None:
152 self._trie = PatriciaNode(string, 1, data)
153 else:
154 self._trie = self._trie.insert(string, data)
155
157 """ remove a string from the tree """
158 if self._trie is not None:
159 return self._trie.remove(string)
160 raise KeyError(string)
161
163 """ look for a string into the tree """
164 if self._trie is not None:
165 return self._trie.lookup(string)
166 raise KeyError(string)
167
169 """ search all words begining by <string> """
170 if self._trie is not None:
171 return self._trie.pfx_search(string, depth)
172 raise KeyError(string)
173
176
178 return '<PatriciaTrie id=%s words=%s>' % (id(self), self.words)
179