Package mvpa :: Package misc :: Module state
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.state

   1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
   2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
   3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   4  # 
   5  #   See COPYING file distributed along with the PyMVPA package for the 
   6  #   copyright and license terms. 
   7  # 
   8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   9  """Classes to control and store state information. 
  10   
  11  It was devised to provide conditional storage  
  12  """ 
  13   
  14  # XXX: MH: The use of `index` as variable name confuses me. IMHO `index` refers 
  15  #          to a position in a container (i.e. list access). However, in this 
  16  #          file it is mostly used in the context of a `key` for dictionary 
  17  #          access. Can we refactor that? 
  18  __docformat__ = 'restructuredtext' 
  19   
  20  import operator, copy 
  21  from sets import Set 
  22  from textwrap import TextWrapper 
  23   
  24  import numpy as N 
  25   
  26  from mvpa.misc.vproperty import VProperty 
  27  from mvpa.misc.exceptions import UnknownStateError 
  28  from mvpa.misc.attributes import CollectableAttribute, StateVariable 
  29  from mvpa.base.dochelpers import enhancedDocString 
  30   
  31  from mvpa.base import externals 
  32   
  33  if __debug__: 
  34      from mvpa.base import debug 
  35   
  36   
  37  _in_ipython = externals.exists('running ipython env') 
  38  # Separators around definitions, needed for ReST, but bogus for 
  39  # interactive sessions 
  40  _def_sep = ('`', '')[int(_in_ipython)] 
  41   
  42  _object_getattribute = object.__getattribute__ 
  43  _object_setattr = object.__setattr__ 
  44   
  45   
  46  ################################################################### 
  47  # Collections 
  48  # 
  49  # TODO: refactor into collections.py. state.py now has 
  50  #       little in common with the main part of this file 
  51  # 
52 -class Collection(object):
53 """Container of some CollectableAttributes. 54 55 :Groups: 56 - `Public Access Functions`: `isKnown` 57 - `Access Implementors`: `_getListing`, `_getNames` 58 - `Mutators`: `__init__` 59 - `R/O Properties`: `listing`, `names`, `items` 60 61 XXX Seems to be not used and duplicating functionality: `_getListing` 62 (thus `listing` property) 63 """ 64
65 - def __init__(self, items=None, owner=None, name=None):
66 """Initialize the Collection 67 68 :Parameters: 69 items : dict of CollectableAttribute's 70 items to initialize with 71 owner : object 72 an object to which collection belongs 73 name : basestring 74 name of the collection (as seen in the owner, e.g. 'states') 75 """ 76 77 self.__owner = owner 78 79 if items == None: 80 items = {} 81 self._items = items 82 """Dictionary to contain registered states as keys and 83 values signal either they are enabled 84 """ 85 self.__name = name
86
87 - def _setName(self, name):
88 self.__name = name
89
90 - def __str__(self):
91 num = len(self._items) 92 if __debug__ and "ST" in debug.active: 93 maxnumber = 1000 # I guess all 94 else: 95 maxnumber = 4 96 if self.__name is not None: 97 res = self.__name 98 else: 99 res = "" 100 res += "{" 101 for i in xrange(min(num, maxnumber)): 102 if i > 0: 103 res += " " 104 res += "%s" % str(self._items.values()[i]) 105 if len(self._items) > maxnumber: 106 res += "..." 107 res += "}" 108 if __debug__: 109 if "ST" in debug.active: 110 res += " owner:%s#%s" % (self.owner.__class__.__name__, 111 id(self.owner)) 112 return res
113 114
115 - def _cls_repr(self):
116 """Collection specific part of __repr__ for a class containing 117 it, ie a part of __repr__ for the owner object 118 119 :Return: 120 list of items to be appended within __repr__ after a .join() 121 """ 122 # XXX For now we do not expect any pure non-specialized 123 # collection , thus just override in derived classes 124 raise NotImplementedError, "Class %s should override _cls_repr" \ 125 % self.__class__.__name__
126
127 - def _is_initializable(self, index):
128 """Checks if index could be assigned within collection via 129 _initialize 130 131 :Return: bool value for a given `index` 132 133 It is to facilitate dynamic assignment of collections' items 134 within derived classes' __init__ depending on the present 135 collections in the class. 136 """ 137 # XXX Each collection has to provide what indexes it allows 138 # to be set within constructor. Custom handling of some 139 # arguments (like (dis|en)able_states) is to be performed 140 # in _initialize 141 # raise NotImplementedError, \ 142 # "Class %s should override _is_initializable" \ 143 # % self.__class__.__name__ 144 145 # YYY lets just check if it is in the keys 146 return index in self._items.keys()
147 148
149 - def _initialize(self, index, value):
150 """Initialize `index` (no check performed) with `value` 151 """ 152 # by default we just set corresponding value 153 self[index].value = value
154 155
156 - def __repr__(self):
157 s = "%s(" % self.__class__.__name__ 158 items_s = "" 159 sep = "" 160 for item in self._items: 161 try: 162 itemvalue = "%s" % `self._items[item].value` 163 if len(itemvalue)>50: 164 itemvalue = itemvalue[:10] + '...' + itemvalue[-10:] 165 items_s += "%s'%s':%s" % (sep, item, itemvalue) 166 sep = ', ' 167 except: 168 pass 169 if items_s != "": 170 s += "items={%s}" % items_s 171 if self.owner is not None: 172 s += "%sowner=%s" % (sep, `self.owner`) 173 s += ")" 174 return s
175 176 177 # 178 # XXX TODO: figure out if there is a way to define proper 179 # __copy__'s for a hierarchy of classes. Probably we had 180 # to define __getinitargs__, etc... read more... 181 # 182 #def __copy__(self): 183 # TODO Remove or refactor? 184 # def _copy_states_(self, fromstate, deep=False): 185 # """Copy known here states from `fromstate` object into current object 186 # 187 # Crafted to overcome a problem mentioned above in the comment 188 # and is to be called from __copy__ of derived classes 189 # 190 # Probably sooner than later will get proper __getstate__, 191 # __setstate__ 192 # """ 193 # # Bad check... doesn't generalize well... 194 # # if not issubclass(fromstate.__class__, self.__class__): 195 # # raise ValueError, \ 196 # # "Class %s is not subclass of %s, " % \ 197 # # (fromstate.__class__, self.__class__) + \ 198 # # "thus not eligible for _copy_states_" 199 # # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 200 # operation = { True: copy.deepcopy, 201 # False: copy.copy }[deep] 202 # 203 # if isinstance(fromstate, ClassWithCollections): 204 # fromstate = fromstate.states 205 # 206 # self.enabled = fromstate.enabled 207 # for name in self.names: 208 # if fromstate.isKnown(name): 209 # self._items[name] = operation(fromstate._items[name]) 210 211
212 - def isKnown(self, index):
213 """Returns `True` if state `index` is known at all""" 214 return self._items.has_key(index)
215 216
217 - def isSet(self, index=None):
218 """If item (or any in the present or listed) was set 219 220 :Parameters: 221 index : None or basestring or list of basestring 222 What items to check if they were set in the collection 223 """ 224 _items = self._items 225 if not (index is None): 226 if isinstance(index, basestring): 227 self._checkIndex(index) # process just that single index 228 return _items[index].isSet 229 else: 230 items = index # assume that we got some list 231 else: 232 items = self._items # go through all the items 233 234 for index in items: 235 self._checkIndex(index) 236 if _items[index].isSet: 237 return True 238 return False
239 240
241 - def whichSet(self):
242 """Return list of indexes which were set""" 243 result = [] 244 # go through all members and if any isSet -- return True 245 for index,v in self._items.iteritems(): 246 if v.isSet: 247 result.append(index) 248 return result
249 250
251 - def _checkIndex(self, index):
252 """Verify that given `index` is a known/registered state. 253 254 :Raise `KeyError`: if given `index` is not known 255 """ 256 # OPT: lets not reuse isKnown, to don't incure 1 more function 257 # call 258 if not self._items.has_key(index): 259 raise KeyError, \ 260 "%s of %s has no key '%s' registered" \ 261 % (self.__class__.__name__, 262 self.__owner.__class__.__name__, 263 index)
264 265
266 - def add(self, item):
267 """Add a new CollectableAttribute to the collection 268 269 :Parameters: 270 item : CollectableAttribute 271 or of derived class. Must have 'name' assigned 272 273 TODO: we should make it stricter to don't add smth of 274 wrong type into Collection since it might lead to problems 275 276 Also we might convert to __setitem__ 277 """ 278 # local binding 279 name = item.name 280 if not isinstance(item, CollectableAttribute): 281 raise ValueError, \ 282 "Collection can add only instances of " + \ 283 "CollectableAttribute-derived classes. Got %s" % `item` 284 285 if name is None: 286 raise ValueError, \ 287 "CollectableAttribute to be added %s must have 'name' set" % \ 288 item 289 self._items[name] = item 290 291 if not self.owner is None: 292 self._updateOwner(name)
293 294
295 - def remove(self, index):
296 """Remove item from the collection 297 """ 298 self._checkIndex(index) 299 self._updateOwner(index, register=False) 300 discard = self._items.pop(index)
301 302
303 - def __getattribute__(self, index):
304 """ 305 """ 306 #return all private and protected ones first since we will not have 307 # collectable's with _ (we should not have!) 308 if index[0] == '_': 309 return _object_getattribute(self, index) 310 _items = _object_getattribute(self, '_items') 311 if index in _items: 312 return _items[index].value 313 return _object_getattribute(self, index)
314 315
316 - def __setattr__(self, index, value):
317 if index[0] == '_': 318 return _object_setattr(self, index, value) 319 _items = _object_getattribute(self, '_items') 320 if index in _items: 321 _items[index].value = value 322 else: 323 _object_setattr(self, index, value)
324 325
326 - def __getitem__(self, index):
327 _items = _object_getattribute(self, '_items') 328 if index in _items: 329 self._checkIndex(index) 330 return _items[index] 331 else: 332 raise AttributeError("State collection %s has no %s attribute" 333 % (self, index))
334 335 336 # Probably not needed -- enable if need arises 337 # 338 #def __setattr__(self, index, value): 339 # if self._items.has_key(index): 340 # self._updateOwner(index, register=False) 341 # self._items[index] = value 342 # self._updateOwner(index, register=True) 343 # 344 # _object_setattr(self, index, value) 345 346
347 - def get(self, index, default):
348 """Access the value by a given index. 349 350 Mimiquing regular dictionary behavior, if value cannot be obtained 351 (i.e. if any exception is caught) return default value. 352 """ 353 try: 354 return self[index].value 355 except Exception, e: 356 #if default is not None: 357 return default
358 #else: 359 # raise e 360 361
362 - def _action(self, index, func, missingok=False, **kwargs):
363 """Run specific func either on a single item or on all of them 364 365 :Parameters: 366 index : basestr 367 Name of the state variable 368 func 369 Function (not bound) to call given an item, and **kwargs 370 missingok : bool 371 If True - do not complain about wrong index 372 """ 373 if isinstance(index, basestring): 374 if index.upper() == 'ALL': 375 for index_ in self._items: 376 self._action(index_, func, missingok=missingok, **kwargs) 377 else: 378 try: 379 self._checkIndex(index) 380 func(self._items[index], **kwargs) 381 except: 382 if missingok: 383 return 384 raise 385 elif operator.isSequenceType(index): 386 for item in index: 387 self._action(item, func, missingok=missingok, **kwargs) 388 else: 389 raise ValueError, \ 390 "Don't know how to handle variable given by %s" % index
391 392
393 - def reset(self, index=None):
394 """Reset the state variable defined by `index`""" 395 396 if not index is None: 397 indexes = [ index ] 398 else: 399 indexes = self.names 400 401 if len(self.items): 402 for index in indexes: 403 # XXX Check if that works as desired 404 self._action(index, self._items.values()[0].__class__.reset, 405 missingok=False)
406 407
408 - def _getListing(self):
409 """Return a list of registered states along with the documentation""" 410 411 # lets assure consistent litsting order 412 items = self._items.items() 413 items.sort() 414 return [ "%s%s%s: %s" % (_def_sep, str(x[1]), _def_sep, x[1].__doc__) 415 for x in items ]
416 417
418 - def _getNames(self):
419 """Return ids for all registered state variables""" 420 return self._items.keys()
421 422
423 - def _getOwner(self):
424 return self.__owner
425 426
427 - def _setOwner(self, owner):
428 if not isinstance(owner, ClassWithCollections): 429 raise ValueError, \ 430 "Owner of the StateCollection must be ClassWithCollections object" 431 if __debug__: 432 try: strowner = str(owner) 433 except: strowner = "UNDEF: <%s#%s>" % (owner.__class__, id(owner)) 434 debug("ST", "Setting owner for %s to be %s" % (self, strowner)) 435 if not self.__owner is None: 436 # Remove attributes which were registered to that owner previousely 437 self._updateOwner(register=False) 438 self.__owner = owner 439 if not self.__owner is None: 440 self._updateOwner(register=True)
441 442
443 - def _updateOwner(self, index=None, register=True):
444 """Define an entry within owner's __dict__ 445 so ipython could easily complete it 446 447 :Parameters: 448 index : basestring or list of basestring 449 Name of the attribute. If None -- all known get registered 450 register : bool 451 Register if True or unregister if False 452 453 XXX Needs refactoring since we duplicate the logic of expansion of 454 index value 455 """ 456 if not index is None: 457 if not index in self._items: 458 raise ValueError, \ 459 "Attribute %s is not known to %s" % (index, self) 460 indexes = [ index ] 461 else: 462 indexes = self.names 463 464 ownerdict = self.owner.__dict__ 465 selfdict = self.__dict__ 466 owner_known = ownerdict['_known_attribs'] 467 for index_ in indexes: 468 if register: 469 if index_ in ownerdict: 470 raise RuntimeError, \ 471 "Cannot register attribute %s within %s " % \ 472 (index_, self.owner) + "since it has one already" 473 ownerdict[index_] = self._items[index_] 474 if index_ in selfdict: 475 raise RuntimeError, \ 476 "Cannot register attribute %s within %s " % \ 477 (index_, self) + "since it has one already" 478 selfdict[index_] = self._items[index_] 479 owner_known[index_] = self.__name 480 else: 481 if index_ in ownerdict: 482 # yoh doesn't think that we need to complain if False 483 ownerdict.pop(index_) 484 owner_known.pop(index_) 485 if index_ in selfdict: 486 selfdict.pop(index_)
487 488 489 # Properties 490 names = property(fget=_getNames) 491 items = property(fget=lambda x:x._items) 492 owner = property(fget=_getOwner, fset=_setOwner) 493 name = property(fget=lambda x:x.__name, fset=_setName) 494 495 # Virtual properties 496 listing = VProperty(fget=_getListing)
497 498 499
500 -class ParameterCollection(Collection):
501 """Container of Parameters for a stateful object. 502 """ 503 504 # def __init__(self, items=None, owner=None, name=None): 505 # """Initialize the state variables of a derived class 506 # 507 # :Parameters: 508 # items : dict 509 # dictionary of states 510 # """ 511 # Collection.__init__(self, items, owner, name) 512 # 513
514 - def _cls_repr(self):
515 """Part of __repr__ for the owner object 516 """ 517 prefixes = [] 518 for k in self.names: 519 # list only params with not default values 520 if self[k].isDefault: 521 continue 522 prefixes.append("%s=%s" % (k, self[k].value)) 523 return prefixes
524 525
526 - def resetvalue(self, index, missingok=False):
527 """Reset all parameters to default values""" 528 from param import Parameter 529 self._action(index, Parameter.resetvalue, missingok=False)
530 531
532 -class SampleAttributesCollection(Collection):
533 """Container for data and attributes of samples (ie data/labels/chunks/...) 534 """ 535 536 # def __init__(self, items=None, owner=None, name=None): 537 # """Initialize the state variables of a derived class 538 # 539 # :Parameters: 540 # items : dict 541 # dictionary of states 542 # """ 543 # Collection.__init__(self, items, owner, name) 544 # 545
546 - def _cls_repr(self):
547 """Part of __repr__ for the owner object 548 """ 549 return [] # TODO: return I guess samples/labels/chunks
550 551 552
553 -class StateCollection(Collection):
554 """Container of StateVariables for a stateful object. 555 556 :Groups: 557 - `Public Access Functions`: `isKnown`, `isEnabled`, `isActive` 558 - `Access Implementors`: `_getListing`, `_getNames`, `_getEnabled` 559 - `Mutators`: `__init__`, `enable`, `disable`, `_setEnabled` 560 - `R/O Properties`: `listing`, `names`, `items` 561 - `R/W Properties`: `enabled` 562 """ 563
564 - def __init__(self, items=None, owner=None):
565 """Initialize the state variables of a derived class 566 567 :Parameters: 568 items : dict 569 dictionary of states 570 owner : ClassWithCollections 571 object which owns the collection 572 name : basestring 573 literal description. Usually just attribute name for the 574 collection, e.g. 'states' 575 """ 576 Collection.__init__(self, items=items, owner=owner) 577 578 self.__storedTemporarily = [] 579 """List to contain sets of enabled states which were enabled 580 temporarily. 581 """
582 583 # 584 # XXX TODO: figure out if there is a way to define proper 585 # __copy__'s for a hierarchy of classes. Probably we had 586 # to define __getinitargs__, etc... read more... 587 # 588 #def __copy__(self): 589
590 - def _cls_repr(self):
591 """Part of __repr__ for the owner object 592 """ 593 prefixes = [] 594 for name, invert in ( ('enable', False), ('disable', True) ): 595 states = self._getEnabled(nondefault=False, 596 invert=invert) 597 if len(states): 598 prefixes.append("%s_states=%s" % (name, str(states))) 599 return prefixes
600 601
602 - def _is_initializable(self, index):
603 """Checks if index could be assigned within collection via 604 setvalue 605 """ 606 return index in ['enable_states', 'disable_states']
607 608
609 - def _initialize(self, index, value):
610 if value is None: 611 value = [] 612 if index == 'enable_states': 613 self.enable(value, missingok=True) 614 elif index == 'disable_states': 615 self.disable(value) 616 else: 617 raise ValueError, "StateCollection can accept only enable_states " \ 618 "and disable_states arguments for the initialization. " \ 619 "Got %s" % index
620 621
622 - def _copy_states_(self, fromstate, index=None, deep=False):
623 """Copy known here states from `fromstate` object into current object 624 625 :Parameters: 626 fromstate : Collection or ClassWithCollections 627 Source states to copy from 628 index : None or list of basestring 629 If not to copy all set state variables, index provides 630 selection of what to copy 631 deep : bool 632 Optional control over the way to copy 633 634 Crafted to overcome a problem mentioned above in the comment 635 and is to be called from __copy__ of derived classes 636 637 Probably sooner than later will get proper __getstate__, 638 __setstate__ 639 """ 640 # Bad check... doesn't generalize well... 641 # if not issubclass(fromstate.__class__, self.__class__): 642 # raise ValueError, \ 643 # "Class %s is not subclass of %s, " % \ 644 # (fromstate.__class__, self.__class__) + \ 645 # "thus not eligible for _copy_states_" 646 # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 647 operation = { True: copy.deepcopy, 648 False: copy.copy }[deep] 649 650 if isinstance(fromstate, ClassWithCollections): 651 fromstate = fromstate.states 652 653 #self.enabled = fromstate.enabled 654 _items, from_items = self._items, fromstate._items 655 if index is None: 656 # copy all set ones 657 for name in fromstate.whichSet():#self.names: 658 #if fromstate.isKnown(name): 659 _items[name] = operation(from_items[name]) 660 else: 661 isKnown = fromstate.isKnown 662 for name in index: 663 if isKnown(name): 664 _items[name] = operation(from_items[name])
665 666
667 - def isEnabled(self, index):
668 """Returns `True` if state `index` is enabled""" 669 self._checkIndex(index) 670 return self._items[index].isEnabled
671 672
673 - def isActive(self, index):
674 """Returns `True` if state `index` is known and is enabled""" 675 return self.isKnown(index) and self.isEnabled(index)
676 677
678 - def enable(self, index, value=True, missingok=False):
679 """Enable state variable given in `index`""" 680 self._action(index, StateVariable.enable, missingok=missingok, 681 value=value)
682 683
684 - def disable(self, index):
685 """Disable state variable defined by `index` id""" 686 self._action(index, StateVariable.enable, missingok=False, value=False)
687 688 689 # TODO XXX think about some more generic way to grab temporary 690 # snapshot of CollectableAttributes to be restored later on...
691 - def _changeTemporarily(self, enable_states=None, 692 disable_states=None, other=None):
693 """Temporarily enable/disable needed states for computation 694 695 Enable or disable states which are enabled in `other` and listed in 696 `enable _states`. Use `resetEnabledTemporarily` to reset 697 to previous state of enabled. 698 699 `other` can be a ClassWithCollections object or StateCollection 700 """ 701 if enable_states == None: 702 enable_states = [] 703 if disable_states == None: 704 disable_states = [] 705 self.__storedTemporarily.append(self.enabled) 706 other_ = other 707 if isinstance(other, ClassWithCollections): 708 other = other.states 709 710 if not other is None: 711 # lets take states which are enabled in other but not in 712 # self 713 add_enable_states = list(Set(other.enabled).difference( 714 Set(enable_states)).intersection(self.names)) 715 if len(add_enable_states)>0: 716 if __debug__: 717 debug("ST", 718 "Adding states %s from %s to be enabled temporarily" % 719 (add_enable_states, other_) + 720 " since they are not enabled in %s" % 721 (self)) 722 enable_states += add_enable_states 723 724 # Lets go one by one enabling only disabled once... but could be as 725 # simple as 726 self.enable(enable_states) 727 self.disable(disable_states)
728 729
730 - def _resetEnabledTemporarily(self):
731 """Reset to previousely stored set of enabled states""" 732 if __debug__: 733 debug("ST", "Resetting to previous set of enabled states") 734 if len(self.enabled)>0: 735 self.enabled = self.__storedTemporarily.pop() 736 else: 737 raise ValueError("Trying to restore not-stored list of enabled " \ 738 "states")
739 740
741 - def _getEnabled(self, nondefault=True, invert=False):
742 """Return list of enabled states 743 744 :Parameters: 745 nondefault : bool 746 Either to return also states which are enabled simply by default 747 invert : bool 748 Would invert the meaning, ie would return disabled states 749 """ 750 if invert: 751 fmatch = lambda y: not self.isEnabled(y) 752 else: 753 fmatch = lambda y: self.isEnabled(y) 754 755 if nondefault: 756 ffunc = fmatch 757 else: 758 ffunc = lambda y: fmatch(y) and \ 759 self._items[y]._defaultenabled != self.isEnabled(y) 760 return filter(ffunc, self.names)
761 762
763 - def _setEnabled(self, indexlist):
764 """Given `indexlist` make only those in the list enabled 765 766 It might be handy to store set of enabled states and then to restore 767 it later on. It can be easily accomplished now:: 768 769 >>> from mvpa.misc.state import ClassWithCollections, StateVariable 770 >>> class Blah(ClassWithCollections): 771 ... bleh = StateVariable(enabled=False, doc='Example') 772 ... 773 >>> blah = Blah() 774 >>> states_enabled = blah.states.enabled 775 >>> blah.states.enabled = ['bleh'] 776 >>> blah.states.enabled = states_enabled 777 """ 778 for index in self._items.keys(): 779 self.enable(index, index in indexlist)
780 781 782 # Properties 783 enabled = property(fget=_getEnabled, fset=_setEnabled)
784 785 786 ################################################################## 787 # Base classes (and metaclass) which use collections 788 # 789 790 791 # 792 # Helper dictionaries for AttributesCollector 793 # 794 _known_collections = { 795 # Quite a generic one but mostly in classifiers 796 'StateVariable': ("states", StateCollection), 797 # For classifiers only 798 'Parameter': ("params", ParameterCollection), 799 'KernelParameter': ("kernel_params", ParameterCollection), 800 # For datasets 801 # XXX custom collections needed? 802 'SampleAttribute': ("sa", SampleAttributesCollection), 803 'FeatureAttribute': ("fa", SampleAttributesCollection), 804 'DatasetAttribute': ("dsa", SampleAttributesCollection), 805 } 806 807 808 _col2class = dict(_known_collections.values()) 809 """Mapping from collection name into Collection class""" 810 811 812 _COLLECTIONS_ORDER = ['sa', 'fa', 'dsa', 813 'params', 'kernel_params', 'states'] 814 815
816 -class AttributesCollector(type):
817 """Intended to collect and compose StateCollection for any child 818 class of this metaclass 819 """ 820 821
822 - def __init__(cls, name, bases, dict):
823 824 if __debug__: 825 debug( 826 "COLR", 827 "AttributesCollector call for %s.%s, where bases=%s, dict=%s " \ 828 % (cls, name, bases, dict)) 829 830 super(AttributesCollector, cls).__init__(name, bases, dict) 831 832 collections = {} 833 for name, value in dict.iteritems(): 834 if isinstance(value, CollectableAttribute): 835 baseclassname = value.__class__.__name__ 836 col = _known_collections[baseclassname][0] 837 # XXX should we allow to throw exceptions here? 838 if not collections.has_key(col): 839 collections[col] = {} 840 collections[col][name] = value 841 # and assign name if not yet was set 842 if value.name is None: 843 value._setName(name) 844 # !!! We do not keep copy of this attribute static in the class. 845 # Due to below traversal of base classes, we should be 846 # able to construct proper collections even in derived classes 847 delattr(cls, name) 848 849 # XXX can we first collect parent's states and then populate with ours? 850 # TODO 851 852 for base in bases: 853 if hasattr(base, "__metaclass__") and \ 854 base.__metaclass__ == AttributesCollector: 855 # TODO take care about overriding one from super class 856 # for state in base.states: 857 # if state[0] = 858 newcollections = base._collections_template 859 if len(newcollections) == 0: 860 continue 861 if __debug__: 862 debug("COLR", 863 "Collect collections %s for %s from %s" % 864 (newcollections, cls, base)) 865 for col, collection in newcollections.iteritems(): 866 newitems = collection.items 867 if collections.has_key(col): 868 collections[col].update(newitems) 869 else: 870 collections[col] = newitems 871 872 873 if __debug__: 874 debug("COLR", 875 "Creating StateCollection template %s with collections %s" 876 % (cls, collections.keys())) 877 878 # if there is an explicit 879 if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"): 880 for col in cls._ATTRIBUTE_COLLECTIONS: 881 if not col in _col2class: 882 raise ValueError, \ 883 "Requested collection %s is unknown to collector" % \ 884 col 885 if not col in collections: 886 collections[col] = None 887 888 # TODO: check on conflict in names of Collections' items! since 889 # otherwise even order is not definite since we use dict for 890 # collections. 891 # XXX should we switch to tuple? 892 893 for col, colitems in collections.iteritems(): 894 collections[col] = _col2class[col](colitems) 895 896 setattr(cls, "_collections_template", collections) 897 898 # 899 # Expand documentation for the class based on the listed 900 # parameters an if it is stateful 901 # 902 # TODO -- figure nice way on how to alter __init__ doc directly... 903 textwrapper = TextWrapper(subsequent_indent=" ", 904 initial_indent=" ", 905 width=70) 906 907 # Parameters 908 paramsdoc = "" 909 paramscols = [] 910 for col in ('params', 'kernel_params'): 911 if collections.has_key(col): 912 paramscols.append(col) 913 # lets at least sort the parameters for consistent output 914 col_items = collections[col].items 915 params = [(v._instance_index, k) for k,v in col_items.iteritems()] 916 params.sort() 917 paramsdoc += '\n'.join( 918 [col_items[param].doc(indent=' ') 919 for index,param in params]) + '\n' 920 921 # Parameters collection could be taked hash of to decide if 922 # any were changed? XXX may be not needed at all? 923 setattr(cls, "_paramscols", paramscols) 924 925 # States doc 926 statesdoc = "" 927 if collections.has_key('states'): 928 paramsdoc += """ enable_states : None or list of basestring 929 Names of the state variables which should be enabled additionally 930 to default ones 931 disable_states : None or list of basestring 932 Names of the state variables which should be disabled 933 """ 934 statesdoc = " * " 935 statesdoc += '\n * '.join(collections['states'].listing) 936 statesdoc += "\n\n(States enabled by default are listed with `+`)" 937 if __debug__: 938 debug("COLR", "Assigning __statesdoc to be %s" % statesdoc) 939 setattr(cls, "_statesdoc", statesdoc) 940 941 if paramsdoc != "": 942 if __debug__ and 'COLR' in debug.active: 943 debug("COLR", "Assigning __paramsdoc to be %s" % paramsdoc) 944 setattr(cls, "_paramsdoc", paramsdoc) 945 946 if paramsdoc + statesdoc != "": 947 cls.__doc__ = enhancedDocString(cls, *bases)
948 949 950
951 -class ClassWithCollections(object):
952 """Base class for objects which contain any known collection 953 954 Classes inherited from this class gain ability to access 955 collections and their items as simple attributes. Access to 956 collection items "internals" is done via <collection_name> attribute 957 and interface of a corresponding `Collection`. 958 """ 959 960 _DEV__doc__ = """ 961 TODO: rename 'descr'? -- it should simply 962 be 'doc' -- no need to drag classes docstring imho. 963 """ 964 965 __metaclass__ = AttributesCollector 966
967 - def __new__(cls, *args, **kwargs):
968 """Initialize ClassWithCollections object 969 970 :Parameters: 971 descr : basestring 972 Description of the instance 973 """ 974 self = super(ClassWithCollections, cls).__new__(cls) 975 976 s__dict__ = self.__dict__ 977 978 # init variable 979 # XXX: Added as pylint complained (rightfully) -- not sure if false 980 # is the proper default 981 self.__params_set = False 982 983 # need to check to avoid override of enabled states in the case 984 # of multiple inheritance, like both ClassWithCollectionsl and Harvestable 985 if not s__dict__.has_key('_collections'): 986 s__class__ = self.__class__ 987 988 collections = copy.deepcopy(s__class__._collections_template) 989 s__dict__['_collections'] = collections 990 s__dict__['_known_attribs'] = {} 991 """Dictionary to contain 'links' to the collections from each 992 known attribute. Is used to gain some speed up in lookup within 993 __getattribute__ and __setattr__ 994 """ 995 996 # Assign owner to all collections 997 for col, collection in collections.iteritems(): 998 if col in s__dict__: 999 raise ValueError, \ 1000 "Object %s has already attribute %s" % \ 1001 (self, col) 1002 s__dict__[col] = collection 1003 collection.name = col 1004 collection.owner = self 1005 1006 self.__params_set = False 1007 1008 if __debug__: 1009 descr = kwargs.get('descr', None) 1010 debug("COL", "ClassWithCollections.__new__ was done " 1011 "for %s#%s with descr=%s" \ 1012 % (s__class__.__name__, id(self), descr)) 1013 1014 return self
1015 1016
1017 - def __init__(self, descr=None, **kwargs):
1018 1019 if not self.__params_set: 1020 self.__descr = descr 1021 """Set humane description for the object""" 1022 1023 # To avoid double initialization in case of multiple inheritance 1024 self.__params_set = True 1025 1026 collections = self._collections 1027 # Assign attributes values if they are given among 1028 # **kwargs 1029 for arg, argument in kwargs.items(): 1030 set = False 1031 for collection in collections.itervalues(): 1032 if collection._is_initializable(arg): 1033 collection._initialize(arg, argument) 1034 set = True 1035 break 1036 if set: 1037 trash = kwargs.pop(arg) 1038 else: 1039 known_params = reduce( 1040 lambda x,y:x+y, 1041 [x.items.keys() for x in collections.itervalues()], []) 1042 raise TypeError, \ 1043 "Unexpected keyword argument %s=%s for %s." \ 1044 % (arg, argument, self) \ 1045 + " Valid parameters are %s" % known_params 1046 1047 ## Initialize other base classes 1048 ## commented out since it seems to be of no use for now 1049 #if init_classes is not None: 1050 # # return back stateful arguments since they might be 1051 # # processed by underlying classes 1052 # kwargs.update(kwargs_stateful) 1053 # for cls in init_classes: 1054 # cls.__init__(self, **kwargs) 1055 #else: 1056 # if len(kwargs)>0: 1057 # known_params = reduce(lambda x, y: x + y, \ 1058 # [x.items.keys() for x in collections], 1059 # []) 1060 # raise TypeError, \ 1061 # "Unknown parameters %s for %s." % (kwargs.keys(), 1062 # self) \ 1063 # + " Valid parameters are %s" % known_params 1064 if __debug__: 1065 debug("COL", "ClassWithCollections.__init__ was done " 1066 "for %s#%s with descr=%s" \ 1067 % (self.__class__.__name__, id(self), descr))
1068 1069 1070 #__doc__ = enhancedDocString('ClassWithCollections', locals()) 1071 1072
1073 - def __getattribute__(self, index):
1074 # return all private ones first since smth like __dict__ might be 1075 # queried by copy before instance is __init__ed 1076 if index[0] == '_': 1077 return _object_getattribute(self, index) 1078 1079 s_dict = _object_getattribute(self, '__dict__') 1080 # check if it is a known collection 1081 collections = s_dict['_collections'] 1082 if index in collections: 1083 return collections[index] 1084 1085 # check if it is a part of any collection 1086 known_attribs = s_dict['_known_attribs'] 1087 if index in known_attribs: 1088 return collections[known_attribs[index]]._items[index].value 1089 1090 # just a generic return 1091 return _object_getattribute(self, index)
1092 1093
1094 - def __setattr__(self, index, value):
1095 if index[0] == '_': 1096 return _object_setattr(self, index, value) 1097 1098 # Check if a part of a collection, and set appropriately 1099 s_dict = _object_getattribute(self, '__dict__') 1100 known_attribs = s_dict['_known_attribs'] 1101 if index in known_attribs: 1102 collections = s_dict['_collections'] 1103 collections[known_attribs[index]][index].value = value 1104 return value 1105 1106 # Generic setattr 1107 return _object_setattr(self, index, value)
1108 1109 1110 # XXX not sure if we shouldn't implement anything else...
1111 - def reset(self):
1112 for collection in self._collections.values(): 1113 collection.reset()
1114 1115
1116 - def __str__(self):
1117 s = "%s:" % (self.__class__.__name__) 1118 if self.__descr is not None: 1119 s += "/%s " % self.__descr 1120 if hasattr(self, "_collections"): 1121 for col, collection in self._collections.iteritems(): 1122 s += " %d %s:%s" % (len(collection.items), col, str(collection)) 1123 return s
1124 1125
1126 - def __repr__(self, prefixes=None, fullname=False):
1127 """String definition of the object of ClassWithCollections object 1128 1129 :Parameters: 1130 fullname : bool 1131 Either to include full name of the module 1132 prefixes : list of strings 1133 What other prefixes to prepend to list of arguments 1134 """ 1135 if prefixes is None: 1136 prefixes = [] 1137 prefixes = prefixes[:] # copy list 1138 id_str = "" 1139 module_str = "" 1140 if __debug__: 1141 if 'MODULE_IN_REPR' in debug.active: 1142 fullname = True 1143 if 'ID_IN_REPR' in debug.active: 1144 id_str = '#%s' % id(self) 1145 1146 if fullname: 1147 modulename = '%s' % self.__class__.__module__ 1148 if modulename != "__main__": 1149 module_str = "%s." % modulename 1150 1151 # Collections' attributes 1152 collections = self._collections 1153 # we want them in this particular order 1154 for col in _COLLECTIONS_ORDER: 1155 collection = collections.get(col, None) 1156 if collection is None: 1157 continue 1158 prefixes += collection._cls_repr() 1159 1160 # Description if present 1161 descr = self.__descr 1162 if descr is not None: 1163 prefixes.append("descr=%s" % repr(descr)) 1164 1165 return "%s%s(%s)%s" % (module_str, self.__class__.__name__, 1166 ', '.join(prefixes), id_str)
1167 1168 1169 descr = property(lambda self: self.__descr, 1170 doc="Description of the object if any")
1171 1172 1173
1174 -class Harvestable(ClassWithCollections):
1175 """Classes inherited from this class intend to collect attributes 1176 within internal processing. 1177 1178 Subclassing Harvestable we gain ability to collect any internal 1179 data from the processing which is especially important if an 1180 object performs something in loop and discards some intermidiate 1181 possibly interesting results (like in case of 1182 CrossValidatedTransferError and states of the trained classifier 1183 or TransferError). 1184 1185 """ 1186 1187 harvested = StateVariable(enabled=False, doc= 1188 """Store specified attributes of classifiers at each split""") 1189 1190 _KNOWN_COPY_METHODS = [ None, 'copy', 'deepcopy' ] 1191 1192
1193 - def __init__(self, harvest_attribs=None, copy_attribs='copy', **kwargs):
1194 """Initialize state of harvestable 1195 1196 :Parameters: 1197 harvest_attribs : list of basestr or dicts 1198 What attributes of call to store and return within 1199 harvested state variable. If an item is a dictionary, 1200 following keys are used ['name', 'copy'] 1201 copy_attribs : None or basestr 1202 Default copying. If None -- no copying, 'copy' 1203 - shallow copying, 'deepcopy' -- deepcopying 1204 1205 """ 1206 ClassWithCollections.__init__(self, **kwargs) 1207 1208 self.__atribs = harvest_attribs 1209 self.__copy_attribs = copy_attribs 1210 1211 self._setAttribs(harvest_attribs)
1212 1213
1214 - def _setAttribs(self, attribs):
1215 """Set attributes to harvest 1216 1217 Each attribute in self.__attribs must have following fields 1218 - name : functional (or arbitrary if 'obj' or 'attr' is set) 1219 description of the thing to harvest, 1220 e.g. 'transerror.clf.training_time' 1221 - obj : name of the object to harvest from (if empty, 1222 'self' is assumed), 1223 e.g 'transerror' 1224 - attr : attribute of 'obj' to harvest, 1225 e.g. 'clf.training_time' 1226 - copy : None, 'copy' or 'deepcopy' - way to copy attribute 1227 """ 1228 if attribs: 1229 # force the state 1230 self.states.enable('harvested') 1231 self.__attribs = [] 1232 for i, attrib in enumerate(attribs): 1233 if isinstance(attrib, dict): 1234 if not 'name' in attrib: 1235 raise ValueError, \ 1236 "Harvestable: attribute must be a string or " + \ 1237 "a dictionary with 'name'" 1238 else: 1239 attrib = {'name': attrib} 1240 1241 # assign default method to copy 1242 if not 'copy' in attrib: 1243 attrib['copy'] = self.__copy_attribs 1244 1245 # check copy method 1246 if not attrib['copy'] in self._KNOWN_COPY_METHODS: 1247 raise ValueError, "Unknown method %s. Known are %s" % \ 1248 (attrib['copy'], self._KNOWN_COPY_METHODS) 1249 1250 if not ('obj' in attrib or 'attr' in attrib): 1251 # Process the item to harvest 1252 # split into obj, attr. If obj is empty, then assume self 1253 split = attrib['name'].split('.', 1) 1254 if len(split)==1: 1255 obj, attr = split[0], None 1256 else: 1257 obj, attr = split 1258 attrib.update({'obj':obj, 'attr':attr}) 1259 1260 if attrib['obj'] == '': 1261 attrib['obj'] = 'self' 1262 1263 # TODO: may be enabling of the states?? 1264 1265 self.__attribs.append(attrib) # place value back 1266 else: 1267 # just to make sure it is not None or 0 1268 self.__attribs = []
1269 1270
1271 - def _harvest(self, vars):
1272 """The harvesting function: must obtain dictionary of variables 1273 from the caller. 1274 1275 :Parameters: 1276 vars : dict 1277 Dictionary of available data. Most often locals() could be 1278 passed as `vars`. Mention that desired to be harvested 1279 private attributes better be bound locally to some variable 1280 1281 :Returns: 1282 nothing 1283 """ 1284 1285 if not self.states.isEnabled('harvested') or len(self.__attribs)==0: 1286 return 1287 1288 if not self.states.isSet('harvested'): 1289 self.harvested = dict([(a['name'], []) for a in self.__attribs]) 1290 1291 for attrib in self.__attribs: 1292 attrv = vars[attrib['obj']] 1293 1294 # access particular attribute if needed 1295 if not attrib['attr'] is None: 1296 attrv = eval('attrv.%s' % attrib['attr']) 1297 1298 # copy the value if needed 1299 attrv = {'copy':copy.copy, 1300 'deepcopy':copy.deepcopy, 1301 None:lambda x:x}[attrib['copy']](attrv) 1302 1303 self.harvested[attrib['name']].append(attrv)
1304 1305 1306 harvest_attribs = property(fget=lambda self:self.__attribs, 1307 fset=_setAttribs)
1308