Mushroom Data

Pull Data
First step is to pull the data from the site and then put it a data frame

url <- 'https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'

mushrooms <- read.table(url, sep=",", header=FALSE, stringsAsFactors = FALSE)
dim(mushrooms)
## [1] 8124   23
#View a sample of the data
head(mushrooms)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Add Columns
Now that we have the data in the data frame we need to add the column names.

I copied and pasted the attribute information into a txt file in my working directory.

#Load the data from the txt file into a new data frame 
mushattr <- read.table("mushroomattr.txt", sep="|", header=TRUE, stringsAsFactors = FALSE)
head(mushattr)
##   Index   Attribute
## 1     0       class
## 2     1   cap-shape
## 3     2 cap-surface
## 4     3   cap-color
## 5     4    bruises?
## 6     5        odor
##                                                                       Information
## 1                                                            edible=e,poisonous=p
## 2                             bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
## 3                                            fibrous=f,grooves=g,scaly=y,smooth=s
## 4 brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
## 5                                                                  bruises=t,no=f
## 6     almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
#Add the column names from the attribute data frame
colnames(mushrooms) <- mushattr$Attribute
head(mushrooms)
##   class cap-shape cap-surface cap-color bruises? odor gill-attachment
## 1     p         x           s         n        t    p               f
## 2     e         x           s         y        t    a               f
## 3     e         b           s         w        t    l               f
## 4     p         x           y         w        t    p               f
## 5     e         x           s         g        f    n               f
## 6     e         x           y         y        t    a               f
##   gill-spacing gill-size gill-color stalk-shape stalk-root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g

Create Subset

mushroomsub <- subset(mushrooms, select= c("class","odor","ring-number","population"))
head(mushroomsub)
##   class odor ring-number population
## 1     p    p           o          s
## 2     e    a           o          n
## 3     e    l           o          n
## 4     p    p           o          s
## 5     e    n           o          a
## 6     e    a           o          n

Rename Values in Subset

#Using the revalue function from the plyr we should be able to change the values in the subset
library(plyr)

mushroomsub$class <- revalue(mushroomsub$class, c("e" = "edible"))
mushroomsub$class <- revalue(mushroomsub$class, c("p" = "poisonous"))

mushroomsub$odor <- revalue(mushroomsub$odor, c("a" = "almond"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("l" = "anise"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("c" = "creosote"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("y" = "fishy"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("f" = "foul"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("m" = "musty"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("n" = "none"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("p" = "pungent"))
mushroomsub$odor <- revalue(mushroomsub$odor, c("s" = "spicy"))


mushroomsub$`ring-number` <- revalue(mushroomsub$`ring-number`, c("n" = "none"))
mushroomsub$`ring-number` <- revalue(mushroomsub$`ring-number`, c("o" = "one"))
mushroomsub$`ring-number` <- revalue(mushroomsub$`ring-number`, c("t" = "two"))

mushroomsub$population <- revalue(mushroomsub$population, c("a" = "abundant"))
mushroomsub$population <- revalue(mushroomsub$population, c("c" = "clustered"))
mushroomsub$population <- revalue(mushroomsub$population, c("n" = "numerous"))
mushroomsub$population <- revalue(mushroomsub$population, c("s" = "scattered"))
mushroomsub$population <- revalue(mushroomsub$population, c("v" = "several"))
mushroomsub$population <- revalue(mushroomsub$population, c("y" = "solitary"))

head(mushroomsub)
##       class    odor ring-number population
## 1 poisonous pungent         one  scattered
## 2    edible  almond         one   numerous
## 3    edible   anise         one   numerous
## 4 poisonous pungent         one  scattered
## 5    edible    none         one   abundant
## 6    edible  almond         one   numerous