Package mvpa :: Package misc :: Module support
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.support

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Support function -- little helpers in everyday life""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14  import re 
 15   
 16  from copy import copy, deepcopy 
 17  from operator import isSequenceType 
 18   
 19  if __debug__: 
 20      from mvpa.misc import debug 
 21   
22 -def transformWithBoxcar( data, startpoints, boxlength, offset=0, fx = N.mean ):
23 """This function transforms a dataset by calculating the mean of a set of 24 patterns. Such a pattern set is defined by a starting point and the size 25 of the window along the first axis of the data ('boxlength'). 26 27 Parameters: 28 data: An array with an arbitrary number of dimensions. 29 startpoints: A sequence of index value along the first axis of 30 'data'. 31 boxlength: The number of elements after 'startpoint' along the 32 first axis of 'data' to be considered for averaging. 33 offset: The offset between the starting point and the 34 averaging window (boxcar). 35 36 The functions returns an array with the length of the first axis being 37 equal to the length of the 'startpoints' sequence. 38 """ 39 if boxlength < 1: 40 raise ValueError, "Boxlength lower than 1 makes no sense." 41 42 # check for illegal boxes 43 for sp in startpoints: 44 if ( sp + offset + boxlength - 1 > len(data)-1 ) \ 45 or ( sp + offset < 0 ): 46 raise ValueError, \ 47 'Illegal box: start: %i, offset: %i, length: %i' \ 48 % (sp, offset, boxlength) 49 50 # build a list of list where each sublist contains the indexes of to be 51 # averaged data elements 52 selector = [ range( i + offset, i + offset + boxlength ) \ 53 for i in startpoints ] 54 55 # average each box 56 selected = [ fx( data[ N.array(box) ], axis=0 ) for box in selector ] 57 58 return N.array( selected )
59 60 61
62 -def getUniqueLengthNCombinations(data, n):
63 """Generates a list of lists containing all combinations of 64 elements of data of length 'n' without repetitions. 65 66 data: list 67 n: integer 68 69 This function is adapted from a Java version posted in some forum on 70 the web as an answer to the question 'How can I generate all possible 71 combinations of length n?'. Unfortunately I cannot remember which 72 forum it was. 73 """ 74 75 # to be returned 76 combos = [] 77 78 # local function that will be called recursively to collect the 79 # combination elements 80 def take(data, occupied, depth, taken): 81 for i, d in enumerate(data): 82 # only do something if this element hasn't been touch yet 83 if occupied[i] == False: 84 # see whether will reached the desired length 85 if depth < n-1: 86 # flag the current element as touched 87 occupied[i] = True 88 # next level 89 take(data, occupied, depth+1, taken + [d]) 90 # if the current element would be set 'free', it would 91 # results in ALL combinations of elements (obeying order 92 # of elements) and not just in the unique sets of 93 # combinations (without order) 94 #occupied[i] = False 95 else: 96 # store the final combination 97 combos.append(taken + [d])
98 # some kind of bitset that stores the status of each element 99 # (contained in combination or not) 100 occupied = [False] * len(data) 101 # get the combinations 102 take(data, occupied, 0, []) 103 104 # return the result 105 return combos 106 107
108 -def indentDoc(v):
109 """Given a `value` returns a string where each line is indented 110 111 Needed for a cleaner __repr__ output 112 `v` - arbitrary 113 """ 114 return re.sub('\n', '\n ', str(v))
115 116 117
118 -def idhash(val):
119 """Craft unique id+hash for an object 120 """ 121 res = id(val) 122 if isinstance(val, list): 123 val = tuple(val) 124 try: 125 res += hash(buffer(val)) 126 except: 127 try: 128 res += hash(val) 129 except: 130 pass 131 pass 132 return res
133
134 -def isSorted(items):
135 """Check if listed items are in sorted order. 136 137 :Parameters: 138 `items`: iterable container 139 140 :return: `True` if were sorted. Otherwise `False` + Warning 141 """ 142 itemsOld = deepcopy(items) 143 items.sort() 144 equality = itemsOld == items 145 # XXX yarik forgotten analog to isiterable 146 if hasattr(equality, '__iter__'): 147 equality = N.all(equality) 148 return equality
149 150
151 -def getBreakPoints(items, contiguous=True):
152 """Return a list of break points. 153 154 :Parameters: 155 items : iterable 156 list of items, such as chunks 157 contiguous : bool 158 if `True` (default) then raise Value Error if items are not 159 contiguous, i.e. a label occur in multiple contiguous sets 160 161 :raises: ValueError 162 163 :return: list of indexes for every new set of items 164 """ 165 prev = None # pylint happiness event! 166 known = [] 167 """List of items which was already seen""" 168 result = [] 169 """Resultant list""" 170 for index in xrange(len(items)): 171 item = items[index] 172 if item in known: 173 if index > 0: 174 if prev != item: # breakpoint 175 if contiguous: 176 raise ValueError, \ 177 "Item %s was already seen before" % str(item) 178 else: 179 result.append(index) 180 else: 181 known.append(item) 182 result.append(index) 183 prev = item 184 return result
185 186
187 -class MapOverlap(object):
188 """Compute some overlap stats from a sequence of binary maps. 189 190 When called with a sequence of binary maps (e.g. lists or arrays) the 191 fraction of mask elements that are non-zero in a customizable proportion 192 of the maps is returned. By default this threshold is set to 1.0, i.e. 193 such an element has to be non-zero in *all* maps. 194 195 Three additional maps (same size as original) are computed: 196 197 * overlap_map: binary map which is non-zero for each overlapping element. 198 * spread_map: binary map which is non-zero for each element that is 199 non-zero in any map, but does not exceed the overlap 200 threshold. 201 * ovstats_map: map of float with the raw elementwise fraction of overlap. 202 203 All maps are available via class members. 204 """
205 - def __init__(self, overlap_threshold=1.0):
206 """Nothing to be seen here. 207 """ 208 self.__overlap_threshold = overlap_threshold 209 210 # pylint happiness block 211 self.overlap_map = None 212 self.spread_map = None 213 self.ovstats_map = None
214 215
216 - def __call__(self, maps):
217 """Returns fraction of overlapping elements. 218 """ 219 ovstats = N.mean(maps, axis=0) 220 221 self.overlap_map = (ovstats >= self.__overlap_threshold ) 222 self.spread_map = N.logical_and(ovstats > 0.0, 223 ovstats < self.__overlap_threshold) 224 self.ovstats_map = ovstats 225 226 return N.mean(ovstats >= self.__overlap_threshold)
227 228
229 -class HarvesterCall(object):
230 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True, 231 copy_attribs=True):
232 """Initialize 233 234 :Parameters: 235 expand_args : bool 236 Either to expand the output of looper into a list of arguments for 237 call 238 attribs : list of basestr 239 What attributes of call to store and return later on? 240 copy_attribs : bool 241 Force copying values of attributes 242 """ 243 244 self.call = call 245 """Call which gets called in the harvester.""" 246 247 if attribs is None: 248 attribs = [] 249 if not isSequenceType(attribs): 250 raise ValueError, "'attribs' have to specified as a sequence." 251 252 if not (argfilter is None or isSequenceType(argfilter)): 253 raise ValueError, "'argfilter' have to be a sequence or None." 254 255 # now give it to me... 256 self.argfilter = argfilter 257 self.expand_args = expand_args 258 self.copy_attribs = copy_attribs 259 self.attribs = attribs
260 261 262
263 -class Harvester(object):
264 """World domination helper: do whatever it is asked and accumulate results 265 266 XXX Thinks about: 267 - Might we need to deepcopy attributes values? 268 - Might we need to specify what attribs to copy and which just to bind? 269 """ 270
271 - def __init__(self, source, calls, simplify_results=True):
272 """Initialize 273 274 :Parameters: 275 source 276 Generator which produce food for the calls. 277 calls : sequence of HarvesterCall instances 278 Calls which are processed in the loop. All calls are processed in 279 order of apperance in the sequence. 280 simplify_results: bool 281 Remove unecessary overhead in results if possible (nested lists 282 and dictionaries). 283 """ 284 if not isSequenceType(calls): 285 raise ValueError, "'calls' have to specified as a sequence." 286 287 self.__source = source 288 """Generator which feeds the harvester""" 289 290 self.__calls = calls 291 """Calls which gets called with each generated source""" 292 293 self.__simplify_results = simplify_results
294 295
296 - def __call__(self, *args, **kwargs):
297 """ 298 """ 299 # prepare complex result structure for all calls and their respective 300 # attributes: calls x dict(attributes x loop iterations) 301 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \ 302 for c in self.__calls] 303 304 # Lets do it! 305 for (i, X) in enumerate(self.__source(*args, **kwargs)): 306 for (c, call) in enumerate(self.__calls): 307 # sanity check 308 if i == 0 and call.expand_args and not isSequenceType(X): 309 raise RuntimeError, \ 310 "Cannot expand non-sequence result from %s" % \ 311 `self.__source` 312 313 # apply argument filter (and reorder) if requested 314 if call.argfilter: 315 filtered_args = [X[f] for f in call.argfilter] 316 else: 317 filtered_args = X 318 319 if call.expand_args: 320 result = call.call(*filtered_args) 321 else: 322 result = call.call(filtered_args) 323 324 # # XXX pylint doesn't like `` for some reason 325 # if __debug__: 326 # debug("LOOP", "Iteration %i on call %s. Got result %s" % 327 # (i, `self.__call`, `result`)) 328 329 330 results[c]['result'].append(result) 331 332 for attrib in call.attribs: 333 attrv = call.call.__getattribute__(attrib) 334 335 if call.copy_attribs: 336 attrv = copy(attrv) 337 338 results[c][attrib].append(attrv) 339 340 # reduce results structure 341 if self.__simplify_results: 342 # get rid of dictionary if just the results are requested 343 for (c, call) in enumerate(self.__calls): 344 if not len(call.attribs): 345 results[c] = results[c]['result'] 346 347 if len(self.__calls) == 1: 348 results = results[0] 349 350 return results
351 352 353 # XXX MH: this doesn't work in all cases, as you cannot have *args after a 354 # kwarg. 355 #def loop(looper, call, 356 # unroll=True, attribs=None, copy_attribs=True, *args, **kwargs): 357 # """XXX Loop twin brother 358 # 359 # Helper for those who just wants to do smth like 360 # loop(blah, bleh, grgr) 361 # instead of 362 # Loop(blah, bleh)(grgr) 363 # """ 364 # print looper, call, unroll, attribs, copy_attribs 365 # print args, kwargs 366 # return Loop(looper=looper, call=call, unroll=unroll, 367 # attribs=attribs, copy_attribs=copy_attribs)(*args, **kwargs) 368