# Mushroom data originally from UCI Machine Learning Repository: https://archive.ics.uci.edu/ml/datasets/Mushroom
# First, let's download the data from my GitHub
directory <- getwd()
download.file(url = "https://raw.githubusercontent.com/EyeDen/data607/master/agaricus-lepiota.data", destfile = paste(directory, "agaricus-lepiota.data", sep = "/"))
mushroom <- read.table("agaricus-lepiota.data", sep = ",")

# Check that we have read the table
head(mushroom, 10)
##    V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1   p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2   e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3   e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4   p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5   e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6   e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 7   e  b  s  w  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
## 8   e  b  y  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 9   p  x  y  w  t  p  f  c  n   p   e   e   s   s   w   w   p   w   o   p
## 10  e  b  s  y  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
##    V21 V22 V23
## 1    k   s   u
## 2    n   n   g
## 3    n   n   m
## 4    k   s   u
## 5    n   a   g
## 6    k   n   g
## 7    k   n   m
## 8    n   s   m
## 9    k   v   g
## 10   k   s   m
# Now, let's examine a subset
# The data dictionary is found here: https://raw.githubusercontent.com/EyeDen/data607/master/agaricus-lepiota.names

# Classes, cap-shape, cap-color, odor, population, and habitat sound interesting
subshroom <- mushroom[, c("V1", "V4", "V6", "V22", "V23")]

# Now let's rename the columns into something more understandable
colnames(subshroom) <- c("Classification", "CapColor", "Odor", "Population", "Habitat")

# Now we need to change the values for each column.
library(plyr)
subshroom$Classification <- mapvalues(subshroom$Classification, c("e", "p"), c("edible", "poisonous"))

subshroom$CapColor <- mapvalues(subshroom$CapColor, c("n", "b", "c", "g", "r", "p", "u", "e", "w", "y"), c("brown", "buff", "cinnamon", "gray", "green", "pink", "purple", "red", "white", "yellow"))

subshroom$Odor <- mapvalues(subshroom$Odor, c("a", "l", "c", "y", "f", "m", "n", "p", "s"), c("almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy"))

subshroom$Population <- mapvalues(subshroom$Population, c("a", "c", "n", "s", "v", "y"), c("abundant", "clustered", "numerous", "scattered", "several", "solitary"))

subshroom$Habitat <- mapvalues(subshroom$Habitat, c("g", "l", "m", "p", "u", "w", "d"), c("grasses", "leaves", "meadows", "paths", "urban", "waste", "woods"))


# All done.  Let's check.
head(subshroom, 20)
##    Classification CapColor    Odor Population Habitat
## 1       poisonous    brown pungent  scattered   urban
## 2          edible   yellow  almond   numerous grasses
## 3          edible    white   anise   numerous meadows
## 4       poisonous    white pungent  scattered   urban
## 5          edible     gray    none   abundant grasses
## 6          edible   yellow  almond   numerous grasses
## 7          edible    white  almond   numerous meadows
## 8          edible    white   anise  scattered meadows
## 9       poisonous    white pungent    several grasses
## 10         edible   yellow  almond  scattered meadows
## 11         edible   yellow   anise   numerous grasses
## 12         edible   yellow  almond  scattered meadows
## 13         edible   yellow  almond  scattered grasses
## 14      poisonous    white pungent    several   urban
## 15         edible    brown    none   abundant grasses
## 16         edible     gray    none   solitary   urban
## 17         edible    white    none   abundant grasses
## 18      poisonous    brown pungent  scattered grasses
## 19      poisonous    white pungent  scattered   urban
## 20      poisonous    brown pungent  scattered   urban