Load Dataset

location <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

mushroomData <- getURL(location)
mushroomDF <-  read.csv(text=mushroomData,header=F,sep=",")
head(mushroomDF)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Extract relevant columns

**The columns corresponding to cap-color, odor, gill-color, spore-print-color will be selected. Since the first column is the indicator of poisonous/edible this will translate to columns 1,4,6,10,21

mushroomDF <- mushroomDF[,c(1,4,6,10,21)]
str(mushroomDF)
## 'data.frame':    8124 obs. of  5 variables:
##  $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
##  $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
##  $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
##  $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...

Change column names:

colnames(mushroomDF) <- c("edibility", "cap-color", "odor", "gill-color","spore-print-color")
head(mushroomDF)
##   edibility cap-color odor gill-color spore-print-color
## 1         p         n    p          k                 k
## 2         e         y    a          k                 n
## 3         e         w    l          n                 n
## 4         p         w    p          n                 k
## 5         e         g    n          k                 n
## 6         e         y    a          n                 k

change letters to descriptive names

According to the data description found at the following link: (https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names) the abbreviations are as follows:

  1. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
  2. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
  3. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
  4. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
mushroomDF$edibility <- revalue(mushroomDF$edibility, c("e" = "edible", "p" = "poisonous"))
mushroomDF$`cap-color` <- revalue(mushroomDF$`cap-color`,c("n" = "brown", "b" = "buff", "c" = "cinnamon",
                                                           "g" = "gray", "r" = "green", "p" = "pink",
                                                           "u" = "purple", "e" = "red", "w" = "white",
                                                           "y" = "yellow"))
mushroomDF$odor<- revalue(mushroomDF$odor, c("a" = "almond", "l" = "anise", "c" = "creosote",
                                                           "y" = "fishy", "f" = "foul", "m" = "musty",
                                                           "n" = "none", "p" = "pungent", "s" = "spicy"))

mushroomDF$`gill-color`<- revalue(mushroomDF$`gill-color`, c("k" = "black", "n" = "brown", "b" = "buff", 
                                                             "h" = "chocolate", "g" = "gray", "r" = "green",                                                              "o" = "orange", "p" = "pink", "u" = "purple", 
                                                             "e" = "red", "w" = "white",
                                                             "y" = "yellow"))
mushroomDF$`spore-print-color` <- revalue(mushroomDF$`spore-print-color`, 
                                                            c("k" = "black", "n" = "brown", "b" = "buff", 
                                                             "h" = "chocolate", "r" = "green",                                                                            "o" = "orange", "u" = "purple", "w" = "white",                                                               "y" = "yellow"))

Display details of transformed data frame

str(mushroomDF)
## 'data.frame':    8124 obs. of  5 variables:
##  $ edibility        : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
##  $ cap-color        : Factor w/ 10 levels "buff","cinnamon",..: 5 10 9 9 4 10 9 9 9 10 ...
##  $ odor             : Factor w/ 9 levels "almond","creosote",..: 7 1 4 7 6 1 1 4 7 1 ...
##  $ gill-color       : Factor w/ 12 levels "buff","red","gray",..: 5 5 6 6 5 6 3 6 8 3 ...
##  $ spore-print-color: Factor w/ 9 levels "buff","chocolate",..: 3 4 4 3 4 3 3 4 3 3 ...
head(mushroomDF)
##   edibility cap-color    odor gill-color spore-print-color
## 1 poisonous     brown pungent      black             black
## 2    edible    yellow  almond      black             brown
## 3    edible     white   anise      brown             brown
## 4 poisonous     white pungent      brown             black
## 5    edible      gray    none      black             brown
## 6    edible    yellow  almond      brown             black