learningdata Module -- learningdata.py in AIMA Python Code


"""Data sets for machine learning problems. (Chapters 18-21)."""

from __future__ import nested_scopes
import utils
from learning import *
import random


def RestaurantDataSet(examples): "Build a DataSet of Restaurant waiting examples." return DataSet(name='Restaurant', target='Wait', examples=examples, attrnames='Alternate Bar Fri/Sat Hungry Patrons Price ' + 'Raining Reservation Type WaitEstimate Wait', doc='Data from AIMA [Fig. 18.5]') restaurant = RestaurantDataSet(""" Yes No No Yes Some $$$ No Yes French 0-10 Yes Yes No No Yes Full $ No No Thai 30-60 No No Yes No No Some $ No No Burger 0-10 Yes Yes No Yes Yes Full $ No No Thai 10-30 Yes Yes No Yes No Full $$$ No Yes French >60 No No Yes No Yes Some $$ Yes Yes Italian 0-10 Yes No Yes No No None $ Yes No Burger 0-10 No No No No Yes Some $$ Yes Yes Thai 0-10 Yes No Yes Yes No Full $ Yes No Burger >60 No Yes Yes Yes Yes Full $$$ No Yes Italian 10-30 No No No No No None $ No No Thai 0-10 No Yes Yes Yes Yes Full $ No No Burger 30-60 Yes""") def SyntheticRestaurant(n=20): "Generate a DataSet with n examples." def T(attrname, branches): return DecisionTree(restaurant.attrnum(attrname), attrname, branches) tree = T('Patrons', {'None': 'No', 'Some': 'Yes', 'Full': T('WaitEstimate', {'>60': 'No', '0-10': 'Yes', '30-60': T('Alternate', {'No': T('Reservation', {'Yes': 'Yes', 'No': T('Bar', {'No':'No', 'Yes':'Yes'})}), 'Yes': T('Fri/Sat', {'No': 'No', 'Yes': 'Yes'})}), '10-30': T('Hungry', {'No': 'Yes', 'Yes': T('Alternate', {'No': 'Yes', 'Yes': T('Raining', {'No': 'No', 'Yes': 'Yes'})})})})}) def gen(): example = map(random.choice, restaurant.values) example[restaurant.target] = tree.predict(example) return example return RestaurantDataSet([gen() for i in range(n)])
orings = DataSet(name='O-Rings', attrnames="Rings Distressed Temp Pressure Flightnum", target='Distressed', examples=""" 6 0 66 50 1 6 1 70 50 2 6 0 69 50 3 6 0 68 50 4 6 0 67 50 5 6 0 72 50 6 6 0 73 100 7 6 0 70 100 8 6 1 57 200 9 6 1 63 200 10 6 1 70 200 11 6 0 78 200 12 6 0 67 200 13 6 2 53 200 14 6 0 67 200 15 6 0 75 200 16 6 0 70 200 17 6 0 81 200 18 6 0 76 200 19 6 0 79 200 20 6 0 75 200 21 6 0 76 200 22 6 1 58 200 23""", source="http://www1.ics.uci.edu/pub/machine-learning-databases/space-shuttle/", doc="""1. Title: Challenger Space Shuttle O-Ring Data (2 databases) 2. Sources: -- David Draper (draper@math.ucla.edu) University of California, Los Angeles -- Donor: David Draper (draper@math.ucla.edu) -- Date: 5 August 1993 3. Past Usage: 1. Draper,~D. (1993). Assessment and propagation of model uncertainty. In {\it Proceedings of the Fourth International Workshop on Artificial Intelligence and Statistics} (pp. 497--509). Ft. Lauderdale, FL: Unpublished. -- Discrete model uncertainty analysis -- Analysis suggests that obvious different extrapolations of the data exist at 31 degrees Fahrenheit (i.e., freezing), which sharply discredits the assumption of no temperature effect. 2. Dalal,~S.~R., Fowlkes,~E.~B., \& Hoadley,~B. (1989). Risk analysis of the space shuttle: pre-Challenger prediction of failure. {\it Journal of the American Statisticians Association}, {\it 84}, 945--957. 3. Lavine,~M. (1991). Problems in extrapolation illustrated with space shuttle O-ring data. {\it Journal of the American Statisticians Association}, {\it 86}, 919--922. 4. Martz~H.~F., \& Zimmer,~W.~J. (1992). The risk of catastrophic failure of the solid rocket boosters on the space shuttle. {\it American Statistics}, {\it 46}, 42--47. 4. Number of instances: 23 in each of two files 5. Relevant Information: There are two databases: (both use the same set of 5 attributes) 1. Primary o-ring erosion and/or blowby 2. Primary o-ring erosion only The two databases are identical except for the 2nd attribute of the 21st instance (confirmed by David Draper on 8/5/93). Edited from (Draper, 1993): The motivation for collecting this database was the explosion of the USA Space Shuttle Challenger on 28 January, 1986. An investigation ensued into the reliability of the shuttle's propulsion system. The explosion was eventually traced to the failure of one of the three field joints on one of the two solid booster rockets. Each of these six field joints includes two O-rings, designated as primary and secondary, which fail when phenomena called erosion and blowby both occur. The night before the launch a decision had to be made regarding launch safety. The discussion among engineers and managers leading to this decision included concern that the probability of failure of the O-rings depended on the temperature t at launch, which was forecase to be 31 degrees F. There are strong engineering reasons based on the composition of O-rings to support the judgment that failure probability may rise monotonically as temperature drops. One other variable, the pressure s at which safety testing for field join leaks was performed, was available, but its relevance to the failure process was unclear. Draper's paper includes a menacing figure graphing the number of field joints experiencing stress vs. liftoff temperature for the 23 shuttle flights previous to the Challenger disaster. No previous liftoff temperature was under 53 degrees F. Although tremendous extrapolation must be done from the given data to assess risk at 31 degrees F, it is obvious even to the layman "to foresee the unacceptably high risk created by launching at 31 degrees F." For more information, see Draper (1993) or the other previous analyses. The task is to predict the number of O-rings that will experience thermal distress for a given flight when the launch temperature is below freezing. 6. Number of Attributes: 5 1. Number of O-rings at risk on a given flight 2. Number experiencing thermal distress 3. Launch temperature (degrees F) 4. Leak-check pressure (psi) 5. Temporal order of flight 7. Attribute Information: all values are positive integers""")
zoo = DataSet(name='Zoo', target='type', exclude=['name'], attrnames="""name hair feathers eggs milk airborne aquatic predator toothed backbone breathes venomous fins legs tail domestic catsize type""", examples=""" aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,mammal antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,fish bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,mammal boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal buffalo,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal calf,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,mammal carp,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,fish catfish,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,fish cavy,1,0,0,1,0,0,0,1,1,1,0,0,4,0,1,0,mammal cheetah,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal chicken,0,1,1,0,1,0,0,0,1,1,0,0,2,1,1,0,bird chub,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,fish clam,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,shellfish crab,0,0,1,0,0,1,1,0,0,0,0,0,4,0,0,0,shellfish crayfish,0,0,1,0,0,1,1,0,0,0,0,0,6,0,0,0,shellfish crow,0,1,1,0,1,0,1,0,1,1,0,0,2,1,0,0,bird deer,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal dogfish,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,fish dolphin,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,mammal dove,0,1,1,0,1,0,0,0,1,1,0,0,2,1,1,0,bird duck,0,1,1,0,1,1,0,0,1,1,0,0,2,1,0,0,bird elephant,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal flamingo,0,1,1,0,1,0,0,0,1,1,0,0,2,1,0,1,bird flea,0,0,1,0,0,0,0,0,0,1,0,0,6,0,0,0,insect frog,0,0,1,0,0,1,1,1,1,1,0,0,4,0,0,0,amphibian frog,0,0,1,0,0,1,1,1,1,1,1,0,4,0,0,0,amphibian fruitbat,1,0,0,1,1,0,0,1,1,1,0,0,2,1,0,0,mammal giraffe,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal girl,1,0,0,1,0,0,1,1,1,1,0,0,2,0,1,1,mammal gnat,0,0,1,0,1,0,0,0,0,1,0,0,6,0,0,0,insect goat,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,mammal gorilla,1,0,0,1,0,0,0,1,1,1,0,0,2,0,0,1,mammal gull,0,1,1,0,1,1,1,0,1,1,0,0,2,1,0,0,bird haddock,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,fish hamster,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,0,mammal hare,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,0,mammal hawk,0,1,1,0,1,0,1,0,1,1,0,0,2,1,0,0,bird herring,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,fish honeybee,1,0,1,0,1,0,0,0,0,1,1,0,6,0,1,0,insect housefly,1,0,1,0,1,0,0,0,0,1,0,0,6,0,0,0,insect kiwi,0,1,1,0,0,0,1,0,1,1,0,0,2,1,0,0,bird ladybird,0,0,1,0,1,0,1,0,0,1,0,0,6,0,0,0,insect lark,0,1,1,0,1,0,0,0,1,1,0,0,2,1,0,0,bird leopard,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal lion,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal lobster,0,0,1,0,0,1,1,0,0,0,0,0,6,0,0,0,shellfish lynx,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal mink,1,0,0,1,0,1,1,1,1,1,0,0,4,1,0,1,mammal mole,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,0,mammal mongoose,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal moth,1,0,1,0,1,0,0,0,0,1,0,0,6,0,0,0,insect newt,0,0,1,0,0,1,1,1,1,1,0,0,4,1,0,0,amphibian octopus,0,0,1,0,0,1,1,0,0,0,0,0,8,0,0,1,shellfish opossum,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,0,mammal oryx,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,mammal ostrich,0,1,1,0,0,0,0,0,1,1,0,0,2,1,0,1,bird parakeet,0,1,1,0,1,0,0,0,1,1,0,0,2,1,1,0,bird penguin,0,1,1,0,0,1,1,0,1,1,0,0,2,1,0,1,bird pheasant,0,1,1,0,1,0,0,0,1,1,0,0,2,1,0,0,bird pike,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,fish piranha,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,fish pitviper,0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,reptile platypus,1,0,1,1,0,1,1,0,1,1,0,0,4,1,0,1,mammal polecat,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal pony,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,mammal porpoise,0,0,0,1,0,1,1,1,1,1,0,1,0,1,0,1,mammal puma,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal pussycat,1,0,0,1,0,0,1,1,1,1,0,0,4,1,1,1,mammal raccoon,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal reindeer,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,mammal rhea,0,1,1,0,0,0,1,0,1,1,0,0,2,1,0,1,bird scorpion,0,0,0,0,0,0,1,0,0,1,1,0,8,1,0,0,shellfish seahorse,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,fish seal,1,0,0,1,0,1,1,1,1,1,0,1,0,0,0,1,mammal sealion,1,0,0,1,0,1,1,1,1,1,0,1,2,1,0,1,mammal seasnake,0,0,0,0,0,1,1,1,1,0,1,0,0,1,0,0,reptile seawasp,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,shellfish skimmer,0,1,1,0,1,1,1,0,1,1,0,0,2,1,0,0,bird skua,0,1,1,0,1,1,1,0,1,1,0,0,2,1,0,0,bird slowworm,0,0,1,0,0,0,1,1,1,1,0,0,0,1,0,0,reptile slug,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,shellfish sole,0,0,1,0,0,1,0,1,1,0,0,1,0,1,0,0,fish sparrow,0,1,1,0,1,0,0,0,1,1,0,0,2,1,0,0,bird squirrel,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,0,mammal starfish,0,0,1,0,0,1,1,0,0,0,0,0,5,0,0,0,shellfish stingray,0,0,1,0,0,1,1,1,1,0,1,1,0,1,0,1,fish swan,0,1,1,0,1,1,0,0,1,1,0,0,2,1,0,1,bird termite,0,0,1,0,0,0,0,0,0,1,0,0,6,0,0,0,insect toad,0,0,1,0,0,1,0,1,1,1,0,0,4,0,0,0,amphibian tortoise,0,0,1,0,0,0,0,0,1,1,0,0,4,1,0,1,reptile tuatara,0,0,1,0,0,0,1,1,1,1,0,0,4,1,0,0,reptile tuna,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,1,fish vampire,1,0,0,1,1,0,0,1,1,1,0,0,2,1,0,0,mammal vole,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,0,mammal vulture,0,1,1,0,1,0,1,0,1,1,0,0,2,1,0,1,bird wallaby,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1,mammal wasp,1,0,1,0,1,0,0,0,0,1,1,0,6,0,0,0,insect wolf,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,mammal worm,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,shellfish wren,0,1,1,0,1,0,0,0,1,1,0,0,2,1,0,0,bird """, source='http://www1.ics.uci.edu/pub/machine-learning-databases/zoo/', doc=""" 1. Title: Zoo database 2. Source Information -- Creator: Richard Forsyth -- Donor: Richard S. Forsyth 8 Grosvenor Avenue Mapperley Park Nottingham NG3 5DX 0602-621676 -- Date: 5/15/1990 3. Past Usage: -- None known other than what is shown in Forsyth's PC/BEAGLE User's Guide. 4. Relevant Information: -- A simple database containing 17 Boolean-valued attributes. The "type" attribute appears to be the class attribute. Here is a breakdown of which animals are in which type: (I find it unusual that there are 2 instances of "frog" and one of "girl"!) Class# Set of animals: ====== =============================================================== 1 (41) aardvark, antelope, bear, boar, buffalo, calf, cavy, cheetah, deer, dolphin, elephant, fruitbat, giraffe, girl, goat, gorilla, hamster, hare, leopard, lion, lynx, mink, mole, mongoose, opossum, oryx, platypus, polecat, pony, porpoise, puma, pussycat, raccoon, reindeer, seal, sealion, squirrel, vampire, vole, wallaby,wolf 2 (20) chicken, crow, dove, duck, flamingo, gull, hawk, kiwi, lark, ostrich, parakeet, penguin, pheasant, rhea, skimmer, skua, sparrow, swan, vulture, wren 3 (5) pitviper, seasnake, slowworm, tortoise, tuatara 4 (13) bass, carp, catfish, chub, dogfish, haddock, herring, pike, piranha, seahorse, sole, stingray, tuna 5 (4) frog, frog, newt, toad 6 (8) flea, gnat, honeybee, housefly, ladybird, moth, termite, wasp 7 (10) clam, crab, crayfish, lobster, octopus, scorpion, seawasp, slug, starfish, worm 5. Number of Instances: 101 6. Number of Attributes: 18 (animal name, 15 Boolean attributes, 2 numerics) 7. Attribute Information: (name of attribute and type of value domain) 1. animal name: Unique for each instance 2. hair Boolean 3. feathers Boolean 4. eggs Boolean 5. milk Boolean 6. airborne Boolean 7. aquatic Boolean 8. predator Boolean 9. toothed Boolean 10. backbone Boolean 11. breathes Boolean 12. venomous Boolean 13. fins Boolean 14. legs Numeric (set of values: {0,2,4,5,6,8}) 15. tail Boolean 16. domestic Boolean 17. catsize Boolean 18. type Numeric (integer values in range [1,7]) 8. Missing Attribute Values: None 9. Class Distribution: Given above """)
iris = DataSet(name="Iris", attrnames="sepal-len sepal-width petal-len petal-width class", target="class", examples="""5.1,3.5,1.4,0.2,setosa 4.9,3.0,1.4,0.2,setosa 4.7,3.2,1.3,0.2,setosa 4.6,3.1,1.5,0.2,setosa 5.0,3.6,1.4,0.2,setosa 5.4,3.9,1.7,0.4,setosa 4.6,3.4,1.4,0.3,setosa 5.0,3.4,1.5,0.2,setosa 4.4,2.9,1.4,0.2,setosa 4.9,3.1,1.5,0.1,setosa 5.4,3.7,1.5,0.2,setosa 4.8,3.4,1.6,0.2,setosa 4.8,3.0,1.4,0.1,setosa 4.3,3.0,1.1,0.1,setosa 5.8,4.0,1.2,0.2,setosa 5.7,4.4,1.5,0.4,setosa 5.4,3.9,1.3,0.4,setosa 5.1,3.5,1.4,0.3,setosa 5.7,3.8,1.7,0.3,setosa 5.1,3.8,1.5,0.3,setosa 5.4,3.4,1.7,0.2,setosa 5.1,3.7,1.5,0.4,setosa 4.6,3.6,1.0,0.2,setosa 5.1,3.3,1.7,0.5,setosa 4.8,3.4,1.9,0.2,setosa 5.0,3.0,1.6,0.2,setosa 5.0,3.4,1.6,0.4,setosa 5.2,3.5,1.5,0.2,setosa 5.2,3.4,1.4,0.2,setosa 4.7,3.2,1.6,0.2,setosa 4.8,3.1,1.6,0.2,setosa 5.4,3.4,1.5,0.4,setosa 5.2,4.1,1.5,0.1,setosa 5.5,4.2,1.4,0.2,setosa 4.9,3.1,1.5,0.1,setosa 5.0,3.2,1.2,0.2,setosa 5.5,3.5,1.3,0.2,setosa 4.9,3.1,1.5,0.1,setosa 4.4,3.0,1.3,0.2,setosa 5.1,3.4,1.5,0.2,setosa 5.0,3.5,1.3,0.3,setosa 4.5,2.3,1.3,0.3,setosa 4.4,3.2,1.3,0.2,setosa 5.0,3.5,1.6,0.6,setosa 5.1,3.8,1.9,0.4,setosa 4.8,3.0,1.4,0.3,setosa 5.1,3.8,1.6,0.2,setosa 4.6,3.2,1.4,0.2,setosa 5.3,3.7,1.5,0.2,setosa 5.0,3.3,1.4,0.2,setosa 7.0,3.2,4.7,1.4,versicolor 6.4,3.2,4.5,1.5,versicolor 6.9,3.1,4.9,1.5,versicolor 5.5,2.3,4.0,1.3,versicolor 6.5,2.8,4.6,1.5,versicolor 5.7,2.8,4.5,1.3,versicolor 6.3,3.3,4.7,1.6,versicolor 4.9,2.4,3.3,1.0,versicolor 6.6,2.9,4.6,1.3,versicolor 5.2,2.7,3.9,1.4,versicolor 5.0,2.0,3.5,1.0,versicolor 5.9,3.0,4.2,1.5,versicolor 6.0,2.2,4.0,1.0,versicolor 6.1,2.9,4.7,1.4,versicolor 5.6,2.9,3.6,1.3,versicolor 6.7,3.1,4.4,1.4,versicolor 5.6,3.0,4.5,1.5,versicolor 5.8,2.7,4.1,1.0,versicolor 6.2,2.2,4.5,1.5,versicolor 5.6,2.5,3.9,1.1,versicolor 5.9,3.2,4.8,1.8,versicolor 6.1,2.8,4.0,1.3,versicolor 6.3,2.5,4.9,1.5,versicolor 6.1,2.8,4.7,1.2,versicolor 6.4,2.9,4.3,1.3,versicolor 6.6,3.0,4.4,1.4,versicolor 6.8,2.8,4.8,1.4,versicolor 6.7,3.0,5.0,1.7,versicolor 6.0,2.9,4.5,1.5,versicolor 5.7,2.6,3.5,1.0,versicolor 5.5,2.4,3.8,1.1,versicolor 5.5,2.4,3.7,1.0,versicolor 5.8,2.7,3.9,1.2,versicolor 6.0,2.7,5.1,1.6,versicolor 5.4,3.0,4.5,1.5,versicolor 6.0,3.4,4.5,1.6,versicolor 6.7,3.1,4.7,1.5,versicolor 6.3,2.3,4.4,1.3,versicolor 5.6,3.0,4.1,1.3,versicolor 5.5,2.5,4.0,1.3,versicolor 5.5,2.6,4.4,1.2,versicolor 6.1,3.0,4.6,1.4,versicolor 5.8,2.6,4.0,1.2,versicolor 5.0,2.3,3.3,1.0,versicolor 5.6,2.7,4.2,1.3,versicolor 5.7,3.0,4.2,1.2,versicolor 5.7,2.9,4.2,1.3,versicolor 6.2,2.9,4.3,1.3,versicolor 5.1,2.5,3.0,1.1,versicolor 5.7,2.8,4.1,1.3,versicolor 6.3,3.3,6.0,2.5,virginica 5.8,2.7,5.1,1.9,virginica 7.1,3.0,5.9,2.1,virginica 6.3,2.9,5.6,1.8,virginica 6.5,3.0,5.8,2.2,virginica 7.6,3.0,6.6,2.1,virginica 4.9,2.5,4.5,1.7,virginica 7.3,2.9,6.3,1.8,virginica 6.7,2.5,5.8,1.8,virginica 7.2,3.6,6.1,2.5,virginica 6.5,3.2,5.1,2.0,virginica 6.4,2.7,5.3,1.9,virginica 6.8,3.0,5.5,2.1,virginica 5.7,2.5,5.0,2.0,virginica 5.8,2.8,5.1,2.4,virginica 6.4,3.2,5.3,2.3,virginica 6.5,3.0,5.5,1.8,virginica 7.7,3.8,6.7,2.2,virginica 7.7,2.6,6.9,2.3,virginica 6.0,2.2,5.0,1.5,virginica 6.9,3.2,5.7,2.3,virginica 5.6,2.8,4.9,2.0,virginica 7.7,2.8,6.7,2.0,virginica 6.3,2.7,4.9,1.8,virginica 6.7,3.3,5.7,2.1,virginica 7.2,3.2,6.0,1.8,virginica 6.2,2.8,4.8,1.8,virginica 6.1,3.0,4.9,1.8,virginica 6.4,2.8,5.6,2.1,virginica 7.2,3.0,5.8,1.6,virginica 7.4,2.8,6.1,1.9,virginica 7.9,3.8,6.4,2.0,virginica 6.4,2.8,5.6,2.2,virginica 6.3,2.8,5.1,1.5,virginica 6.1,2.6,5.6,1.4,virginica 7.7,3.0,6.1,2.3,virginica 6.3,3.4,5.6,2.4,virginica 6.4,3.1,5.5,1.8,virginica 6.0,3.0,4.8,1.8,virginica 6.9,3.1,5.4,2.1,virginica 6.7,3.1,5.6,2.4,virginica 6.9,3.1,5.1,2.3,virginica 5.8,2.7,5.1,1.9,virginica 6.8,3.2,5.9,2.3,virginica 6.7,3.3,5.7,2.5,virginica 6.7,3.0,5.2,2.3,virginica 6.3,2.5,5.0,1.9,virginica 6.5,3.0,5.2,2.0,virginica 6.2,3.4,5.4,2.3,virginica 5.9,3.0,5.1,1.8,virginica""", doc="""1. Title: Iris Plants Database Updated Sept 21 by C.Blake - Added discrepency information 2. Sources: (a) Creator: R.A. Fisher (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) (c) Date: July, 1988 3. Past Usage: - Publications: too many to mention!!! Here are a few. 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to Mathematical Statistics" (John Wiley, NY, 1950). 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System Structure and Classification Rule for Recognition in Partially Exposed Environments". IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. PAMI-2, No. 1, 67-71. -- Results: -- very low misclassification rates (0% for the setosa class) 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions on Information Theory, May 1972, 431-433. -- Results: -- very low misclassification rates again 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II conceptual clustering system finds 3 classes in the data. 4. Relevant Information: --- This is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. --- Predicted attribute: class of iris plant. --- This is an exceedingly simple domain. --- This data differs from the data presented in Fishers article (identified by Steve Chadwick, spchadwick@espeedaz.net ) The 35th sample should be: 4.9,3.1,1.5,0.2,"Iris-setosa" where the error is in the fourth feature. The 38th sample: 4.9,3.6,1.4,0.1,"Iris-setosa" where the errors are in the second and third features. 5. Number of Instances: 150 (50 in each of three classes) 6. Number of Attributes: 4 numeric, predictive attributes and the class 7. Attribute Information: 1. sepal length in cm 2. sepal width in cm 3. petal length in cm 4. petal width in cm 5. class: -- Iris Setosa -- Iris Versicolour -- Iris Virginica 8. Missing Attribute Values: None Summary Statistics: Min Max Mean SD Class Correlation sepal length: 4.3 7.9 5.84 0.83 0.7826 sepal width: 2.0 4.4 3.05 0.43 -0.4194 petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) 9. Class Distribution: 33.3% for each of 3 classes.""")
# Artificial, generated examples. def Majority(k, n): """Return a DataSet with n k-bit examples of the majority problem: k random bits followed by a 1 if more than half the bits are 1, else 0.""" examples = [] for i in range(n): bits = [random.choice([0, 1]) for i in range(k)] bits.append(utils.sum(bits) > k/2) examples.append(bits) return DataSet(name="majority", examples=examples) def Parity(k, n, name="parity"): """Return a DataSet with n k-bit examples of the parity problem: k random bits followed by a 1 if an odd number of bits are 1, else 0.""" examples = [] for i in range(n): bits = [random.choice([0, 1]) for i in range(k)] bits.append(utils.sum(bits) % 2) examples.append(bits) return DataSet(name=name, examples=examples) def Xor(n): """Return a DataSet with n examples of 2-input xor.""" return Parity(2, n, name="xor") def ContinuousXor(n): "2 inputs are chosen uniformly form (0.0 .. 2.0]; output is xor of ints." examples = [] for i in range(n): x, y = [random.uniform(0.0, 2.0) for i in '12'] examples.append([x, y, int(x) != int(y)]) return DataSet(name="continuous xor", examples=examples)
def compare(algorithms=[MajorityLearner, NaiveBayesLearner, NearestNeighborLearner, DecisionTreeLearner], datasets=[iris, orings, zoo, restaurant, SyntheticRestaurant(20), Majority(7, 100), Parity(7, 100), Xor(100)], k=10, trials=1): """Compare various learners on various datasets using cross-validation. Print results as a table.""" utils.print_table([[a.__name__.replace('Learner','')] + [cross_validation(a(), d, k, trials) for d in datasets] for a in algorithms], header=[''] + [d.name[0:7] for d in datasets], round=2)


# Copyright (c) 2002, Peter Norvig
# See also AI Programming (Python), Python.org Tutorial, Language Ref, Libraries.