Package mvpa :: Package misc :: Module support
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.support

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Support function -- little helpers in everyday life""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14  import re, os 
 15   
 16  from mvpa.misc.copy import copy, deepcopy 
 17  from operator import isSequenceType 
 18   
 19  if __debug__: 
 20      from mvpa.base import debug 
 21   
 22   
23 -def reuseAbsolutePath(file1, file2, force=False):
24 """Use path to file1 as the path to file2 is no absolute 25 path is given for file2 26 27 :Parameters: 28 force : bool 29 if True, force it even if the file2 starts with / 30 """ 31 if not file2.startswith('/') or force: 32 # lets reuse path to file1 33 return os.path.join(os.path.dirname(file1), file2.lstrip('/')) 34 else: 35 return file2
36 37
38 -def transformWithBoxcar(data, startpoints, boxlength, offset=0, fx=N.mean):
39 """This function extracts boxcar windows from an array. Such a boxcar is 40 defined by a starting point and the size of the window along the first axis 41 of the array (`boxlength`). Afterwards a customizable function is applied 42 to each boxcar individually (Default: averaging). 43 44 :param data: An array with an arbitrary number of dimensions. 45 :type data: array 46 :param startpoints: Boxcar startpoints as index along the first array axis 47 :type startpoints: sequence 48 :param boxlength: Length of the boxcar window in #array elements 49 :type boxlength: int 50 :param offset: Optional offset between the configured starting point and the 51 actual begining of the boxcar window. 52 :type offset: int 53 :rtype: array (len(startpoints) x data.shape[1:]) 54 """ 55 if boxlength < 1: 56 raise ValueError, "Boxlength lower than 1 makes no sense." 57 58 # check for illegal boxes 59 for sp in startpoints: 60 if ( sp + offset + boxlength - 1 > len(data)-1 ) \ 61 or ( sp + offset < 0 ): 62 raise ValueError, \ 63 'Illegal box: start: %i, offset: %i, length: %i' \ 64 % (sp, offset, boxlength) 65 66 # build a list of list where each sublist contains the indexes of to be 67 # averaged data elements 68 selector = [ range( i + offset, i + offset + boxlength ) \ 69 for i in startpoints ] 70 71 # average each box 72 selected = [ fx( data[ N.array(box) ], axis=0 ) for box in selector ] 73 74 return N.array( selected )
75 76 77
78 -def getUniqueLengthNCombinations(data, n):
79 """Generates a list of lists containing all combinations of 80 elements of data of length 'n' without repetitions. 81 82 data: list 83 n: integer 84 85 This function is adapted from a Java version posted in some forum on 86 the web as an answer to the question 'How can I generate all possible 87 combinations of length n?'. Unfortunately I cannot remember which 88 forum it was. 89 """ 90 91 # to be returned 92 combos = [] 93 94 # local function that will be called recursively to collect the 95 # combination elements 96 def take(data, occupied, depth, taken): 97 for i, d in enumerate(data): 98 # only do something if this element hasn't been touch yet 99 if occupied[i] == False: 100 # see whether will reached the desired length 101 if depth < n-1: 102 # flag the current element as touched 103 occupied[i] = True 104 # next level 105 take(data, occupied, depth+1, taken + [d]) 106 # if the current element would be set 'free', it would 107 # results in ALL combinations of elements (obeying order 108 # of elements) and not just in the unique sets of 109 # combinations (without order) 110 #occupied[i] = False 111 else: 112 # store the final combination 113 combos.append(taken + [d])
114 # some kind of bitset that stores the status of each element 115 # (contained in combination or not) 116 occupied = [False] * len(data) 117 # get the combinations 118 take(data, occupied, 0, []) 119 120 # return the result 121 return combos 122 123
124 -def indentDoc(v):
125 """Given a `value` returns a string where each line is indented 126 127 Needed for a cleaner __repr__ output 128 `v` - arbitrary 129 """ 130 return re.sub('\n', '\n ', str(v))
131 132
133 -def idhash(val):
134 """Craft unique id+hash for an object 135 """ 136 res = "%s" % id(val) 137 if isinstance(val, list): 138 val = tuple(val) 139 try: 140 res += ":%s" % hash(buffer(val)) 141 except: 142 try: 143 res += ":%s" % hash(val) 144 except: 145 pass 146 pass 147 return res
148
149 -def isSorted(items):
150 """Check if listed items are in sorted order. 151 152 :Parameters: 153 `items`: iterable container 154 155 :return: `True` if were sorted. Otherwise `False` + Warning 156 """ 157 itemsOld = deepcopy(items) 158 items.sort() 159 equality = itemsOld == items 160 # XXX yarik forgotten analog to isiterable 161 if hasattr(equality, '__iter__'): 162 equality = N.all(equality) 163 return equality
164 165
166 -def isInVolume(coord, shape):
167 """For given coord check if it is within a specified volume size. 168 169 Returns True/False. Assumes that volume coordinates start at 0. 170 No more generalization (arbitrary minimal coord) is done to save 171 on performance 172 """ 173 for i in xrange(len(coord)): 174 if coord[i] < 0 or coord[i] >= shape[i]: 175 return False 176 return True
177 178
179 -def getBreakPoints(items, contiguous=True):
180 """Return a list of break points. 181 182 :Parameters: 183 items : iterable 184 list of items, such as chunks 185 contiguous : bool 186 if `True` (default) then raise Value Error if items are not 187 contiguous, i.e. a label occur in multiple contiguous sets 188 189 :raises: ValueError 190 191 :return: list of indexes for every new set of items 192 """ 193 prev = None # pylint happiness event! 194 known = [] 195 """List of items which was already seen""" 196 result = [] 197 """Resultant list""" 198 for index in xrange(len(items)): 199 item = items[index] 200 if item in known: 201 if index > 0: 202 if prev != item: # breakpoint 203 if contiguous: 204 raise ValueError, \ 205 "Item %s was already seen before" % str(item) 206 else: 207 result.append(index) 208 else: 209 known.append(item) 210 result.append(index) 211 prev = item 212 return result
213 214
215 -def RFEHistory2maps(history):
216 """Convert history generated by RFE into the array of binary maps 217 218 Example: 219 history2maps(N.array( [ 3,2,1,0 ] )) 220 results in 221 array([[ 1., 1., 1., 1.], 222 [ 1., 1., 1., 0.], 223 [ 1., 1., 0., 0.], 224 [ 1., 0., 0., 0.]]) 225 """ 226 227 # assure that it is an array 228 history = N.array(history) 229 nfeatures, steps = len(history), max(history) - min(history) + 1 230 history_maps = N.zeros((steps, nfeatures)) 231 232 for step in xrange(steps): 233 history_maps[step, history >= step] = 1 234 235 return history_maps
236 237
238 -class MapOverlap(object):
239 """Compute some overlap stats from a sequence of binary maps. 240 241 When called with a sequence of binary maps (e.g. lists or arrays) the 242 fraction of mask elements that are non-zero in a customizable proportion 243 of the maps is returned. By default this threshold is set to 1.0, i.e. 244 such an element has to be non-zero in *all* maps. 245 246 Three additional maps (same size as original) are computed: 247 248 * overlap_map: binary map which is non-zero for each overlapping element. 249 * spread_map: binary map which is non-zero for each element that is 250 non-zero in any map, but does not exceed the overlap 251 threshold. 252 * ovstats_map: map of float with the raw elementwise fraction of overlap. 253 254 All maps are available via class members. 255 """
256 - def __init__(self, overlap_threshold=1.0):
257 """Nothing to be seen here. 258 """ 259 self.__overlap_threshold = overlap_threshold 260 261 # pylint happiness block 262 self.overlap_map = None 263 self.spread_map = None 264 self.ovstats_map = None
265 266
267 - def __call__(self, maps):
268 """Returns fraction of overlapping elements. 269 """ 270 ovstats = N.mean(maps, axis=0) 271 272 self.overlap_map = (ovstats >= self.__overlap_threshold ) 273 self.spread_map = N.logical_and(ovstats > 0.0, 274 ovstats < self.__overlap_threshold) 275 self.ovstats_map = ovstats 276 277 return N.mean(ovstats >= self.__overlap_threshold)
278 279
280 -class Event(dict):
281 """Simple class to define properties of an event. 282 283 The class is basically a dictionary. Any properties can 284 be pass as keyword arguments to the constructor, e.g.: 285 286 >>> ev = Event(onset=12, duration=2.45) 287 288 Conventions for keys: 289 290 `onset` 291 The onset of the event in some unit. 292 `duration` 293 The duration of the event in the same unit as `onset`. 294 `label` 295 E.g. the condition this event is part of. 296 `chunk` 297 Group this event is part of (if any), e.g. experimental run. 298 `features` 299 Any amount of additional features of the event. This might include 300 things like physiological measures, stimulus intensity. Must be a mutable 301 sequence (e.g. list), if present. 302 """ 303 _MUSTHAVE = ['onset'] 304
305 - def __init__(self, **kwargs):
306 # store everything 307 dict.__init__(self, **kwargs) 308 309 # basic checks 310 for k in Event._MUSTHAVE: 311 if not self.has_key(k): 312 raise ValueError, "Event must have '%s' defined." % k
313 314
315 - def asDescreteTime(self, dt, storeoffset=False):
316 """Convert `onset` and `duration` information into descrete timepoints. 317 318 :Parameters: 319 dt: float 320 Temporal distance between two timepoints in the same unit as `onset` 321 and `duration`. 322 storeoffset: bool 323 If True, the temporal offset between original `onset` and 324 descretized `onset` is stored as an additional item in `features`. 325 326 :Return: 327 A copy of the original `Event` with `onset` and optionally `duration` 328 replaced by their corresponding descrete timepoint. The new onset will 329 correspond to the timepoint just before or exactly at the original 330 onset. The new duration will be the number of timepoints covering the 331 event from the computed onset timepoint till the timepoint exactly at 332 the end, or just after the event. 333 334 Note again, that the new values are expressed as #timepoint and not 335 in their original unit! 336 """ 337 dt = float(dt) 338 onset = self['onset'] 339 out = deepcopy(self) 340 341 # get the timepoint just prior the onset 342 out['onset'] = int(N.floor(onset / dt)) 343 344 if storeoffset: 345 # compute offset 346 offset = onset - (out['onset'] * dt) 347 348 if out.has_key('features'): 349 out['features'].append(offset) 350 else: 351 out['features'] = [offset] 352 353 if out.has_key('duration'): 354 # how many timepoint cover the event (from computed onset 355 # to the one timepoint just after the end of the event 356 out['duration'] = int(N.ceil((onset + out['duration']) / dt) \ 357 - out['onset']) 358 359 return out
360 361 362
363 -class HarvesterCall(object):
364 - def __init__(self, call, attribs=None, argfilter=None, expand_args=True, 365 copy_attribs=True):
366 """Initialize 367 368 :Parameters: 369 expand_args : bool 370 Either to expand the output of looper into a list of arguments for 371 call 372 attribs : list of basestr 373 What attributes of call to store and return later on? 374 copy_attribs : bool 375 Force copying values of attributes 376 """ 377 378 self.call = call 379 """Call which gets called in the harvester.""" 380 381 if attribs is None: 382 attribs = [] 383 if not isSequenceType(attribs): 384 raise ValueError, "'attribs' have to specified as a sequence." 385 386 if not (argfilter is None or isSequenceType(argfilter)): 387 raise ValueError, "'argfilter' have to be a sequence or None." 388 389 # now give it to me... 390 self.argfilter = argfilter 391 self.expand_args = expand_args 392 self.copy_attribs = copy_attribs 393 self.attribs = attribs
394 395 396
397 -class Harvester(object):
398 """World domination helper: do whatever it is asked and accumulate results 399 400 XXX Thinks about: 401 - Might we need to deepcopy attributes values? 402 - Might we need to specify what attribs to copy and which just to bind? 403 """ 404
405 - def __init__(self, source, calls, simplify_results=True):
406 """Initialize 407 408 :Parameters: 409 source 410 Generator which produce food for the calls. 411 calls : sequence of HarvesterCall instances 412 Calls which are processed in the loop. All calls are processed in 413 order of apperance in the sequence. 414 simplify_results: bool 415 Remove unecessary overhead in results if possible (nested lists 416 and dictionaries). 417 """ 418 if not isSequenceType(calls): 419 raise ValueError, "'calls' have to specified as a sequence." 420 421 self.__source = source 422 """Generator which feeds the harvester""" 423 424 self.__calls = calls 425 """Calls which gets called with each generated source""" 426 427 self.__simplify_results = simplify_results
428 429
430 - def __call__(self, *args, **kwargs):
431 """ 432 """ 433 # prepare complex result structure for all calls and their respective 434 # attributes: calls x dict(attributes x loop iterations) 435 results = [dict([('result', [])] + [(a, []) for a in c.attribs]) \ 436 for c in self.__calls] 437 438 # Lets do it! 439 for (i, X) in enumerate(self.__source(*args, **kwargs)): 440 for (c, call) in enumerate(self.__calls): 441 # sanity check 442 if i == 0 and call.expand_args and not isSequenceType(X): 443 raise RuntimeError, \ 444 "Cannot expand non-sequence result from %s" % \ 445 `self.__source` 446 447 # apply argument filter (and reorder) if requested 448 if call.argfilter: 449 filtered_args = [X[f] for f in call.argfilter] 450 else: 451 filtered_args = X 452 453 if call.expand_args: 454 result = call.call(*filtered_args) 455 else: 456 result = call.call(filtered_args) 457 458 # # XXX pylint doesn't like `` for some reason 459 # if __debug__: 460 # debug("LOOP", "Iteration %i on call %s. Got result %s" % 461 # (i, `self.__call`, `result`)) 462 463 464 results[c]['result'].append(result) 465 466 for attrib in call.attribs: 467 attrv = call.call.__getattribute__(attrib) 468 469 if call.copy_attribs: 470 attrv = copy(attrv) 471 472 results[c][attrib].append(attrv) 473 474 # reduce results structure 475 if self.__simplify_results: 476 # get rid of dictionary if just the results are requested 477 for (c, call) in enumerate(self.__calls): 478 if not len(call.attribs): 479 results[c] = results[c]['result'] 480 481 if len(self.__calls) == 1: 482 results = results[0] 483 484 return results
485 486 487 # XXX MH: this doesn't work in all cases, as you cannot have *args after a 488 # kwarg. 489 #def loop(looper, call, 490 # unroll=True, attribs=None, copy_attribs=True, *args, **kwargs): 491 # """XXX Loop twin brother 492 # 493 # Helper for those who just wants to do smth like 494 # loop(blah, bleh, grgr) 495 # instead of 496 # Loop(blah, bleh)(grgr) 497 # """ 498 # print looper, call, unroll, attribs, copy_attribs 499 # print args, kwargs 500 # return Loop(looper=looper, call=call, unroll=unroll, 501 # attribs=attribs, copy_attribs=copy_attribs)(*args, **kwargs) 502