# data.table references
# https://rawgit.com/wiki/Rdatatable/data.table/vignettes/datatable-intro-vignette.html
# https://s3.amazonaws.com/assets.datacamp.com/img/blog/data+table+cheat+sheet.pdf
#  Scott Karr
#  HW3 Mushroom Classification
#  11.14.2015

## Remove objects in order to rerun as repeatable script making incremental improvements
rm(df_mushrooms)
## Warning in rm(df_mushrooms): object 'df_mushrooms' not found
rm(dt_mushrooms)
## Warning in rm(dt_mushrooms): object 'dt_mushrooms' not found
rm(theUrl)
## Warning in rm(theUrl): object 'theUrl' not found
## Load Data Frame from website
require(data.table)
## Loading required package: data.table
theUrl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
df_mushrooms <- read.table(file = theUrl, header = TRUE, sep = ",")
## Decode column labels and Load as data.table
setnames(
  df_mushrooms, 
  old=c(
    "p","x","s","n","t","p.1","f","c","n.1","k","e","e.1","s.1","s.2","w","w.1","p.2","w.2","o","p.3","k.1","s.3","u"
  ), 
  new=c(
        "class","cap_shape","cap_surface","cap_color","bruises","odor",
        "gill_attachment","gill_spacing","gill_size","gill_color",
        "stalk_shape","stalk_root","stalk_surface_above_ring","stalk_surface_below_ring",
        "stalk_color_above_ring","stalk_color_below_ring","veil_type","veil_color",
        "ring_number","ring_type","spore_print_color","population","habitat"
  )
)
dt_mushrooms <- data.table(df_mushrooms)
## Create column class as key column for indexing
setkey(dt_mushrooms,"class")
## Decode key-value pairs for each data.table column 
### 0. class                     edible=e,poisonous=p
dt_mushrooms$class <- ifelse(dt_mushrooms$class == "e","edible",
                      ifelse(dt_mushrooms$class == "p","poisonous","?")
)
### 1. cap-shape:  bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
dt_mushrooms$cap_shape <- 
  ifelse(dt_mushrooms$cap_shape == "b","bell",
  ifelse(dt_mushrooms$cap_shape == "c","conical",
  ifelse(dt_mushrooms$cap_shape == "x","convex",                          
  ifelse(dt_mushrooms$cap_shape == "f","flat",                            
  ifelse(dt_mushrooms$cap_shape == "k","knobbed",
  ifelse(dt_mushrooms$cap_shape == "s","sunken","?")))))
)
### 2. cap-surface:  fibrous=f,grooves=g,scaly=y,smooth=s
dt_mushrooms$cap_surface <- 
  ifelse(dt_mushrooms$cap_surface == "f","fibrous",
  ifelse(dt_mushrooms$cap_surface == "g","grooves",
  ifelse(dt_mushrooms$cap_surface == "y","scaly",
  ifelse(dt_mushrooms$cap_surface == "s","smooth","?")))
)
### 3. cap_color:  brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
dt_mushrooms$cap_color <- 
  ifelse(dt_mushrooms$cap_color == "n","brown",
  ifelse(dt_mushrooms$cap_color == "b","buff",                          
  ifelse(dt_mushrooms$cap_color == "c","cinnamon",
  ifelse(dt_mushrooms$cap_color == "g","gray",
  ifelse(dt_mushrooms$cap_color == "r","green",                          
  ifelse(dt_mushrooms$cap_color == "p","pink",                            
  ifelse(dt_mushrooms$cap_color == "u","purple",
  ifelse(dt_mushrooms$cap_color == "e","red",
  ifelse(dt_mushrooms$cap_color == "w","white",                                 
  ifelse(dt_mushrooms$cap_color == "y","yellow","?")))))))))
)
### 4. bruises:  bruises=t,no=f
dt_mushrooms$bruises <- 
  ifelse(dt_mushrooms$bruises == "t","yes",
  ifelse(dt_mushrooms$bruises == "f","no","?")
)
### 5. odor:  almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
dt_mushrooms$odor <-      
  ifelse(dt_mushrooms$odor == "a","almond",
  ifelse(dt_mushrooms$odor == "l","anise",                          
  ifelse(dt_mushrooms$odor == "c","creosote",
  ifelse(dt_mushrooms$odor == "y","fishy",
  ifelse(dt_mushrooms$odor == "f","foul",                          
  ifelse(dt_mushrooms$odor == "m","musty",
  ifelse(dt_mushrooms$odor == "n","none",
  ifelse(dt_mushrooms$odor == "p","pungent",
  ifelse(dt_mushrooms$odor == "s","spicy","?"))))))))
)
### 6. gill_attachment:  attached=a,descending=d,free=f,notched=n
dt_mushrooms$gill_attachment <- 
  ifelse(dt_mushrooms$gill_attachment == "a","attached",
  ifelse(dt_mushrooms$gill_attachment == "d","descending",
  ifelse(dt_mushrooms$gill_attachment == "f","free",
  ifelse(dt_mushrooms$gill_attachment == "n","notched","?")))
)
### 7. gill_spacing:  close=c,crowded=w,distant=d
dt_mushrooms$gill_spacing  <- 
  ifelse(dt_mushrooms$gill_spacing == "c","close",
  ifelse(dt_mushrooms$gill_spacing == "w","crowded",
  ifelse(dt_mushrooms$gill_spacing == "d","distance","?"))
)
### 8. gill_size:  broad=b,narrow=n
dt_mushrooms$gill_size <- 
  ifelse(dt_mushrooms$gill_size == "b","broad",
  ifelse(dt_mushrooms$gill_size == "n","narrow","?")
)
### 9. gill_color:  black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
dt_mushrooms$gill_color <-      
  ifelse(dt_mushrooms$gill_color == "k","black",
  ifelse(dt_mushrooms$gill_color == "n","brown",
  ifelse(dt_mushrooms$gill_color == "b","buff",
  ifelse(dt_mushrooms$gill_color == "h","chocolate",
  ifelse(dt_mushrooms$gill_color == "g","gray",              
  ifelse(dt_mushrooms$gill_color == "r","green",
  ifelse(dt_mushrooms$gill_color == "o","orange",
  ifelse(dt_mushrooms$gill_color == "p","pink",
  ifelse(dt_mushrooms$gill_color == "u","purple",
  ifelse(dt_mushrooms$gill_color == "e","red",
  ifelse(dt_mushrooms$gill_color == "w","white",
  ifelse(dt_mushrooms$gill_color == "y","yellow","?")))))))))))
)
###10. stalk_shape:              enlarging=e,tapering=t
dt_mushrooms$stalk_shape <- 
  ifelse(dt_mushrooms$stalk_shape == "e","enlarging",
  ifelse(dt_mushrooms$stalk_shape == "t","tapering","?")
)
###11. stalk_root:               bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
dt_mushrooms$stalk_root <-  
  ifelse(dt_mushrooms$stalk_root == "b","bulbous",
  ifelse(dt_mushrooms$stalk_root == "c","club",
  ifelse(dt_mushrooms$stalk_root == "u","cup",                          
  ifelse(dt_mushrooms$stalk_root == "e","equal",                            
  ifelse(dt_mushrooms$stalk_root == "z","rhizomorphs",
  ifelse(dt_mushrooms$stalk_root == "r","rooted","?")))))
)
###12. stalk_surface_above_ring: fibrous=f,scaly=y,silky=k,smooth=s
dt_mushrooms$stalk_surface_above_ring <-
  ifelse(dt_mushrooms$stalk_surface_above_ring == "f","fibrous",
  ifelse(dt_mushrooms$stalk_surface_above_ring == "y","scaly",
  ifelse(dt_mushrooms$stalk_surface_above_ring == "k","silky",
  ifelse(dt_mushrooms$stalk_surface_above_ring == "s","smooth","?")))
)
###13. stalk_surface_below_ring: fibrous=f,scaly=y,silky=k,smooth=s
dt_mushrooms$stalk_surface_below_ring <- 
  ifelse(dt_mushrooms$stalk_surface_below_ring == "f","fibrous",
  ifelse(dt_mushrooms$stalk_surface_below_ring == "y","scaly",
  ifelse(dt_mushrooms$stalk_surface_below_ring == "k","silky",
  ifelse(dt_mushrooms$stalk_surface_below_ring == "s","smooth","?")))
)
###14. stalk_color_above_ring:  brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
dt_mushrooms$stalk_color_above_ring <-  
  ifelse(dt_mushrooms$stalk_color_above_ring == "n","brown",
  ifelse(dt_mushrooms$stalk_color_above_ring == "b","buff",
  ifelse(dt_mushrooms$stalk_color_above_ring == "c","cinnamon",                     
  ifelse(dt_mushrooms$stalk_color_above_ring == "g","gray",
  ifelse(dt_mushrooms$stalk_color_above_ring == "o","orange",
  ifelse(dt_mushrooms$stalk_color_above_ring == "p","pink",
  ifelse(dt_mushrooms$stalk_color_above_ring == "e","red",
  ifelse(dt_mushrooms$stalk_color_above_ring == "w","white",
  ifelse(dt_mushrooms$stalk_color_above_ring == "y","yellow","?"))))))))
)
###15. stalk_color_below_ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
dt_mushrooms$stalk_color_below_ring <-  
  ifelse(dt_mushrooms$stalk_color_below_ring == "n","brown",
  ifelse(dt_mushrooms$stalk_color_below_ring == "b","buff",
  ifelse(dt_mushrooms$stalk_color_below_ring == "c","cinnamon",                     
  ifelse(dt_mushrooms$stalk_color_below_ring == "g","gray",
  ifelse(dt_mushrooms$stalk_color_below_ring == "o","orange",
  ifelse(dt_mushrooms$stalk_color_below_ring == "p","pink",
  ifelse(dt_mushrooms$stalk_color_below_ring == "e","red",
  ifelse(dt_mushrooms$stalk_color_below_ring == "w","white",
  ifelse(dt_mushrooms$stalk_color_below_ring == "y","yellow","?"))))))))
)
###16. veil_type:  partial=p,universal=u
dt_mushrooms$veil_type <- 
  ifelse(dt_mushrooms$veil_type == "p","partial",
  ifelse(dt_mushrooms$veil_type == "u","universal","?")
)
###17. veil_color:  brown=n,orange=o,white=w,yellow=y
dt_mushrooms$veil_color <-  
  ifelse(dt_mushrooms$veil_color == "n","brown",
  ifelse(dt_mushrooms$veil_color == "o","orange",      
  ifelse(dt_mushrooms$veil_color == "w","white",                                   
  ifelse(dt_mushrooms$veil_color == "y","yellow","?")))
)
###18. ring_number:  none=n,one=o,two=t
dt_mushrooms$ring_number <- 
  ifelse(dt_mushrooms$ring_number == "n","none",
  ifelse(dt_mushrooms$ring_number == "o","one",                                   
  ifelse(dt_mushrooms$ring_number == "t","two","?"))
)
###19. ring_type:  cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
dt_mushrooms$ring_type <-
  ifelse(dt_mushrooms$ring_type == "c","cobwebby",
  ifelse(dt_mushrooms$ring_type == "e","evanescent",
  ifelse(dt_mushrooms$ring_type == "f","flaring",
  ifelse(dt_mushrooms$ring_type == "l","large",
  ifelse(dt_mushrooms$ring_type == "n","none",
  ifelse(dt_mushrooms$ring_type == "p","pendant",
  ifelse(dt_mushrooms$ring_type == "s","sheathing",
  ifelse(dt_mushrooms$ring_type == "z","zone","?")))))))
)
###20. spore_print_color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
dt_mushrooms$spore_print_color <-  
  ifelse(dt_mushrooms$spore_print_color == "k","brown",
  ifelse(dt_mushrooms$spore_print_color == "n","buff",
  ifelse(dt_mushrooms$spore_print_color == "b","cinnamon",                     
  ifelse(dt_mushrooms$spore_print_color == "h","gray",
  ifelse(dt_mushrooms$spore_print_color == "r","orange",
  ifelse(dt_mushrooms$spore_print_color == "o","pink",
  ifelse(dt_mushrooms$spore_print_color == "u","red",
  ifelse(dt_mushrooms$spore_print_color == "w","white",
  ifelse(dt_mushrooms$spore_print_color == "y","yellow","?"))))))))
)
###21. population:  abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
dt_mushrooms$population <-  
  ifelse(dt_mushrooms$population == "a","abundant",
  ifelse(dt_mushrooms$population == "c","clustered",
  ifelse(dt_mushrooms$population == "n","numerous",                          
  ifelse(dt_mushrooms$population == "s","scattered",                           
  ifelse(dt_mushrooms$population == "v","several",
  ifelse(dt_mushrooms$population == "y","solitary","?")))))
)
###22. habitat:  grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d
dt_mushrooms$habitat <-  
  ifelse(dt_mushrooms$habitat == "g","grasses",
  ifelse(dt_mushrooms$habitat == "l","leaves",
  ifelse(dt_mushrooms$habitat == "m","meadows",                          
  ifelse(dt_mushrooms$habitat == "p","paths",                           
  ifelse(dt_mushrooms$habitat == "u","urban",                                    
  ifelse(dt_mushrooms$habitat == "w","waste",
  ifelse(dt_mushrooms$habitat == "d","woods","?"))))))
)
## Result set must be workable
## Load a result set data.table with a subset of interesting columns
## Filter on "edible" mushrooms to answer question posed
ans <- dt_mushrooms[class=="edible", .(cap_shape, cap_surface, cap_color, stalk_shape, gill_spacing, habitat)]
## Return summary of resulting data.table and show data from this object
summary(ans)
##   cap_shape         cap_surface         cap_color        
##  Length:4208        Length:4208        Length:4208       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##  stalk_shape        gill_spacing         habitat         
##  Length:4208        Length:4208        Length:4208       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character
ans
##       cap_shape cap_surface cap_color stalk_shape gill_spacing habitat
##    1:    convex      smooth    yellow   enlarging        close grasses
##    2:      bell      smooth     white   enlarging        close meadows
##    3:    convex      smooth      gray    tapering      crowded grasses
##    4:    convex       scaly    yellow   enlarging        close grasses
##    5:      bell      smooth     white   enlarging        close meadows
##   ---                                                                 
## 4204:    convex      smooth     brown   enlarging        close  leaves
## 4205:   knobbed      smooth     brown   enlarging        close  leaves
## 4206:    convex      smooth     brown   enlarging        close  leaves
## 4207:      flat      smooth     brown   enlarging        close  leaves
## 4208:    convex      smooth     brown   enlarging        close  leaves