Mushroom data set

Load the .csv file into a data frame.

#retrieve and load data into a data.frame 

M <- "https://raw.githubusercontent.com/raghu74us/607_1/master/agaricus-lepiota.data"
mushrooms <- read.table(file = M, header = FALSE, sep = ",") 
head(mushrooms) 
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
summary(mushrooms)
##  V1       V2       V3             V4       V5             V6      
##  e:4208   b: 452   f:2320   n      :2284   f:4748   n      :3528  
##  p:3916   c:   4   g:   4   g      :1840   t:3376   f      :2160  
##           f:3152   s:2556   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3656            b      : 168            l      : 400  
##                             (Other): 220            (Other): 484  
##  V7       V8       V9            V10       V11      V12      V13     
##  a: 210   c:6812   b:5612   b      :1728   e:3516   ?:2480   f: 552  
##  f:7914   w:1312   n:2512   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5176  
##                             n      :1048            e:1120   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1170                             
##  V14           V15            V16       V17      V18      V19     
##  f: 600   w      :4464   w      :4384   p:8124   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7488  
##  s:4936   g      : 576   g      : 576            w:7924   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  V20           V21       V22      V23     
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1872   n: 400   l: 832  
##  n:  36   h      :1632   s:1248   m: 292  
##  p:3968   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 368  
##           (Other): 144            w: 192

Rename the columns according to data description

colnames(mushrooms) <- c( "classes","cap-shape", "cap-surface","cap-color", "bruises", "odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root",
"stalk-surface-above-ring", "stalk-surface-below-ring", "stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat" )

head(mushrooms)
##   classes cap-shape cap-surface cap-color bruises odor gill-attachment
## 1       p         x           s         n       t    p               f
## 2       e         x           s         y       t    a               f
## 3       e         b           s         w       t    l               f
## 4       p         x           y         w       t    p               f
## 5       e         x           s         g       f    n               f
## 6       e         x           y         y       t    a               f
##   gill-spacing gill-size gill-color stalk-shape stalk-root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g

Replace code with identifiers

require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(car)
## Loading required package: car
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
mush_class <- as.vector(recode(mushrooms[,1], " 'e' = 'edible'; 'p' = 'poisonous'")) 

cap_color <- as.vector(recode(mushrooms[,4], " 'n' = 'brown'; 'b' = 'buff'; 'c' = 'cinnamon'; 'g' = 'gray'; 'r' = 'green'; 'p' = 'pink'; 'u' = 'purple'; 'e' = 'red'; 'w' = 'white'; 
                              'y' = 'yellow'")) 

veil_color <- as.vector(recode(mushrooms[,18], "'n' = 'brown'; 'o' = 'orange'; 'w' = 'white'; 'y' = 'yellow'")) 

spore_color <- as.vector(recode(mushrooms[,21], "'k' = 'black'; 'n' = 'brown'; 'b' = 'buff'; 'h' = 'chocolate'; 'r' = 'green'; 'o' = 'orange'; 'u' = 'purple'; 'w' = 'white'; 'y' = 'yellow'"))

Create data frame with the above 4 fields for which code is replaced with identifiers.

m_colors <- as.data.frame(cbind(mush_class, cap_color, veil_color, spore_color)) 
class(m_colors)
## [1] "data.frame"
summary(m_colors)
##      mush_class     cap_color     veil_color      spore_color  
##  edible   :4208   brown  :2284   brown :  96   white    :2388  
##  poisonous:3916   gray   :1840   orange:  96   brown    :1968  
##                   red    :1500   white :7924   black    :1872  
##                   yellow :1072   yellow:   8   chocolate:1632  
##                   white  :1040                 green    :  72  
##                   buff   : 168                 buff     :  48  
##                   (Other): 220                 (Other)  : 144