Load Data into R Studio

mushrooms = read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")

Inspect Data attributes

head(mushrooms, 10)
##    p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1  e x s y t   a f c   b k e   c   s   s w   w   p   w o   p   n   n g
## 2  e b s w t   l f c   b n e   c   s   s w   w   p   w o   p   n   n m
## 3  p x y w t   p f c   n n e   e   s   s w   w   p   w o   p   k   s u
## 4  e x s g f   n f w   b k t   e   s   s w   w   p   w o   e   n   a g
## 5  e x y y t   a f c   b n e   c   s   s w   w   p   w o   p   k   n g
## 6  e b s w t   a f c   b g e   c   s   s w   w   p   w o   p   k   n m
## 7  e b y w t   l f c   b n e   c   s   s w   w   p   w o   p   n   s m
## 8  p x y w t   p f c   n p e   e   s   s w   w   p   w o   p   k   v g
## 9  e b s y t   a f c   b g e   c   s   s w   w   p   w o   p   k   s m
## 10 e x y y t   l f c   b g e   c   s   s w   w   p   w o   p   n   n g
tail(mushrooms, 5)
##      p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 8119 e k s n f   n a c   b y e   ?   s   s o   o   p   o o   p   b   c l
## 8120 e x s n f   n a c   b y e   ?   s   s o   o   p   n o   p   b   v l
## 8121 e f s n f   n a c   b n e   ?   s   s o   o   p   o o   p   b   c l
## 8122 p k y n f   y f c   n b t   ?   s   k w   w   p   w o   e   w   v l
## 8123 e x s n f   n a c   b y e   ?   s   s o   o   p   o o   p   o   c l

Rename Columns

library(plyr)
mushroom.colnames <-rename(mushrooms, c("p" = "classes", "x" = "cap shape", "s" = "cap surface", "n" = "cap color", "t" = "bruises", "p.1"="odor", "f" = "gill attachement", "c" = "gill spacing", "n.1" = "gill size", "k" = "gill color", "e" = "stalk shape", "e.1" = "stalk root", "s.1" = "stalk surface above ring", "s.2" = "stalk surface below ring", "w" = "stalk color above veil", "w.1" = "stalk color below veil", "p.2" = "veil type", "w.2" = "veil color", "o" = "ring number", "p.3" = "ring type", "k.1" = "spore print color", "s.3" = "population", "u" = "habitat" ))
head(mushroom.colnames, 10)
##    classes cap shape cap surface cap color bruises odor gill attachement
## 1        e         x           s         y       t    a                f
## 2        e         b           s         w       t    l                f
## 3        p         x           y         w       t    p                f
## 4        e         x           s         g       f    n                f
## 5        e         x           y         y       t    a                f
## 6        e         b           s         w       t    a                f
## 7        e         b           y         w       t    l                f
## 8        p         x           y         w       t    p                f
## 9        e         b           s         y       t    a                f
## 10       e         x           y         y       t    l                f
##    gill spacing gill size gill color stalk shape stalk root
## 1             c         b          k           e          c
## 2             c         b          n           e          c
## 3             c         n          n           e          e
## 4             w         b          k           t          e
## 5             c         b          n           e          c
## 6             c         b          g           e          c
## 7             c         b          n           e          c
## 8             c         n          p           e          e
## 9             c         b          g           e          c
## 10            c         b          g           e          c
##    stalk surface above ring stalk surface below ring
## 1                         s                        s
## 2                         s                        s
## 3                         s                        s
## 4                         s                        s
## 5                         s                        s
## 6                         s                        s
## 7                         s                        s
## 8                         s                        s
## 9                         s                        s
## 10                        s                        s
##    stalk color above veil stalk color below veil veil type veil color
## 1                       w                      w         p          w
## 2                       w                      w         p          w
## 3                       w                      w         p          w
## 4                       w                      w         p          w
## 5                       w                      w         p          w
## 6                       w                      w         p          w
## 7                       w                      w         p          w
## 8                       w                      w         p          w
## 9                       w                      w         p          w
## 10                      w                      w         p          w
##    ring number ring type spore print color population habitat
## 1            o         p                 n          n       g
## 2            o         p                 n          n       m
## 3            o         p                 k          s       u
## 4            o         e                 n          a       g
## 5            o         p                 k          n       g
## 6            o         p                 k          n       m
## 7            o         p                 n          s       m
## 8            o         p                 k          v       g
## 9            o         p                 k          s       m
## 10           o         p                 n          n       g

subset data with poinous/edible, plus habitat, population, odor, cap color

mushroom.subset <- subset(mushroom.colnames, select = c("classes", "habitat", "population", "cap color"))
head(mushroom.subset, 10)
##    classes habitat population cap color
## 1        e       g          n         y
## 2        e       m          n         w
## 3        p       u          s         w
## 4        e       g          a         g
## 5        e       g          n         y
## 6        e       m          n         w
## 7        e       m          s         w
## 8        p       g          v         w
## 9        e       m          s         y
## 10       e       g          n         y

rename attributes in various columns in subsetted data frame

mushroom.subset <- transform(mushroom.subset,
          classes=revalue(classes,c("e"="edible", "p" = "poisonous")))
head(mushroom.subset, 10)
##      classes habitat population cap.color
## 1     edible       g          n         y
## 2     edible       m          n         w
## 3  poisonous       u          s         w
## 4     edible       g          a         g
## 5     edible       g          n         y
## 6     edible       m          n         w
## 7     edible       m          s         w
## 8  poisonous       g          v         w
## 9     edible       m          s         y
## 10    edible       g          n         y
mushroom.subset<- transform(mushroom.subset, habitat=revalue(habitat, c("g" = "grasses", "l" = "leaves", "m" = "meadows", "p" = "paths", "u" = "urban", "w" = "waste", "d"= "woods")))
head(mushroom.subset, 10)
##      classes habitat population cap.color
## 1     edible grasses          n         y
## 2     edible meadows          n         w
## 3  poisonous   urban          s         w
## 4     edible grasses          a         g
## 5     edible grasses          n         y
## 6     edible meadows          n         w
## 7     edible meadows          s         w
## 8  poisonous grasses          v         w
## 9     edible meadows          s         y
## 10    edible grasses          n         y
mushroom.subset<- transform(mushroom.subset, population=revalue(population, c("a" = "abundant", "c" = "clustered", "n" = "numerous", "s" = "scattered", "v" = "several", "y" = "solitary")))
head(mushroom.subset, 10)
##      classes habitat population cap.color
## 1     edible grasses   numerous         y
## 2     edible meadows   numerous         w
## 3  poisonous   urban  scattered         w
## 4     edible grasses   abundant         g
## 5     edible grasses   numerous         y
## 6     edible meadows   numerous         w
## 7     edible meadows  scattered         w
## 8  poisonous grasses    several         w
## 9     edible meadows  scattered         y
## 10    edible grasses   numerous         y
mushroom.subset<- transform(mushroom.subset, cap.color=revalue(cap.color, c("n" = "brown", "b" = "buff", "c" = "cinnamon", "g" = "gray", "r" = "green", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow")))
head(mushroom.subset, 10)
##      classes habitat population cap.color
## 1     edible grasses   numerous    yellow
## 2     edible meadows   numerous     white
## 3  poisonous   urban  scattered     white
## 4     edible grasses   abundant      gray
## 5     edible grasses   numerous    yellow
## 6     edible meadows   numerous     white
## 7     edible meadows  scattered     white
## 8  poisonous grasses    several     white
## 9     edible meadows  scattered    yellow
## 10    edible grasses   numerous    yellow
tail(mushroom.subset, 5)
##        classes habitat population cap.color
## 8119    edible  leaves  clustered     brown
## 8120    edible  leaves    several     brown
## 8121    edible  leaves  clustered     brown
## 8122 poisonous  leaves    several     brown
## 8123    edible  leaves  clustered     brown

see summary of new data

str(mushroom.subset)
## 'data.frame':    8123 obs. of  4 variables:
##  $ classes   : Factor w/ 2 levels "edible","poisonous": 1 1 2 1 1 1 1 2 1 1 ...
##  $ habitat   : Factor w/ 7 levels "woods","grasses",..: 2 4 6 2 2 4 4 2 4 2 ...
##  $ population: Factor w/ 6 levels "abundant","clustered",..: 3 3 4 1 3 3 4 5 4 3 ...
##  $ cap.color : Factor w/ 10 levels "buff","cinnamon",..: 10 9 9 4 10 9 9 9 10 10 ...