Loading the data:

#loading library plyr to help rename factor levels
library(plyr)

#load data
mushrooms = read.csv("agaricus-lepiota.csv",header = F)

#rename columns
names(mushrooms) = c("Class",
                     "CapShape",
                     "CapSurface",
                     "CapColor",
                     "Bruises",
                     "Odor",
                     "GillAttachment",
                     "GillSpacing",
                     "GillSize",
                     "GillColor",
                     "StalkShape",
                     "StalkRoot",
                     "StalkSurface Above Ring",
                     "StalkSurface Below Ring",
                     "StalkColor Above Ring",
                     "StalkColor Below Ring",
                     "VeilType",
                     "VeilColor",
                     "RingNumber",
                     "RingType",
                     "SporePrintColor",
                     "Population",
                     "Habitat")

Renaming the factor levels:

#rename factor levels
mushrooms$Class = revalue(mushrooms$Class, c("e"="edible","p"="poisonous"))
mushrooms$CapShape = revalue(mushrooms$CapShape, c("b"="bell","c"="conical","x"="convex","f"="flat","k"="knobbed","s"="sunken"))
mushrooms$CapSurface = revalue(mushrooms$CapSurface, c("f"="fibrous","g"="grooves","y"="scaly","s"="smooth"))
mushrooms$CapColor = revalue(mushrooms$CapColor, c("n"="brown","b"="buff","c"="cinnamon","g"="gray","r"="green","p"="pink","u"="purple","e"="red","w"="white","y"="yellow"))

#show first few rows of data renamed
head(mushrooms)
##       Class CapShape CapSurface CapColor Bruises Odor GillAttachment
## 1 poisonous   convex     smooth    brown       t    p              f
## 2    edible   convex     smooth   yellow       t    a              f
## 3    edible     bell     smooth    white       t    l              f
## 4 poisonous   convex      scaly    white       t    p              f
## 5    edible   convex     smooth     gray       f    n              f
## 6    edible   convex      scaly   yellow       t    a              f
##   GillSpacing GillSize GillColor StalkShape StalkRoot
## 1           c        n         k          e         e
## 2           c        b         k          e         c
## 3           c        b         n          e         c
## 4           c        n         n          e         e
## 5           w        b         k          t         e
## 6           c        b         n          e         c
##   StalkSurface Above Ring StalkSurface Below Ring StalkColor Above Ring
## 1                       s                       s                     w
## 2                       s                       s                     w
## 3                       s                       s                     w
## 4                       s                       s                     w
## 5                       s                       s                     w
## 6                       s                       s                     w
##   StalkColor Below Ring VeilType VeilColor RingNumber RingType
## 1                     w        p         w          o        p
## 2                     w        p         w          o        p
## 3                     w        p         w          o        p
## 4                     w        p         w          o        p
## 5                     w        p         w          o        e
## 6                     w        p         w          o        p
##   SporePrintColor Population Habitat
## 1               k          s       u
## 2               n          n       g
## 3               n          n       m
## 4               k          s       u
## 5               n          a       g
## 6               k          n       g

Subsetting the Data Frame:

#subset first four columns of the data
MushroomSubset = data.frame(mushrooms$Class,mushrooms$CapShape,mushrooms$CapSurface,mushrooms$CapColor)

#show first few rows of the data
head(MushroomSubset)
##   mushrooms.Class mushrooms.CapShape mushrooms.CapSurface
## 1       poisonous             convex               smooth
## 2          edible             convex               smooth
## 3          edible               bell               smooth
## 4       poisonous             convex                scaly
## 5          edible             convex               smooth
## 6          edible             convex                scaly
##   mushrooms.CapColor
## 1              brown
## 2             yellow
## 3              white
## 4              white
## 5               gray
## 6             yellow

Extra Stuff (Logistic Model Fitting)

#random logistic model fitting, because I was curious
MushroomPartialModel = glm(Class~CapShape + CapSurface + CapColor,data = mushrooms, family = binomial(link = logit))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(MushroomPartialModel)
## 
## Call:
## glm(formula = Class ~ CapShape + CapSurface + CapColor, family = binomial(link = logit), 
##     data = mushrooms)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0877  -1.0427  -0.2862   1.0875   2.7470  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -2.68041    0.25382 -10.560  < 2e-16 ***
## CapShapeconical    17.33853  617.50856   0.028  0.97760    
## CapShapeflat        2.67108    0.16913  15.793  < 2e-16 ***
## CapShapeknobbed     3.55429    0.18507  19.205  < 2e-16 ***
## CapShapesunken    -12.02817  255.38629  -0.047  0.96244    
## CapShapeconvex      2.54187    0.16778  15.150  < 2e-16 ***
## CapSurfacegrooves  17.87103  578.44289   0.031  0.97535    
## CapSurfacesmooth    1.18538    0.06817  17.390  < 2e-16 ***
## CapSurfacescaly     0.88260    0.06103  14.462  < 2e-16 ***
## CapColorcinnamon   -1.90336    0.39613  -4.805 1.55e-06 ***
## CapColorred        -0.55348    0.19294  -2.869  0.00412 ** 
## CapColorgray       -0.65803    0.19250  -3.418  0.00063 ***
## CapColorbrown      -1.06939    0.18948  -5.644 1.66e-08 ***
## CapColorpink       -0.14602    0.26179  -0.558  0.57700    
## CapColorgreen     -16.37514  363.54452  -0.045  0.96407    
## CapColorpurple    -16.37514  363.54452  -0.045  0.96407    
## CapColorwhite      -1.37665    0.19675  -6.997 2.62e-12 ***
## CapColoryellow      0.31866    0.19903   1.601  0.10936    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 11251.8  on 8123  degrees of freedom
## Residual deviance:  9929.8  on 8106  degrees of freedom
## AIC: 9965.8
## 
## Number of Fisher Scoring iterations: 14
#checking if smaller model is better

library(MASS)

stepAIC(MushroomPartialModel,direction = "both")
## Start:  AIC=9965.84
## Class ~ CapShape + CapSurface + CapColor
## 
##              Df Deviance     AIC
## <none>            9929.8  9965.8
## - CapSurface  3  10289.5 10319.5
## - CapColor    9  10399.8 10417.8
## - CapShape    5  10497.0 10523.0
## 
## Call:  glm(formula = Class ~ CapShape + CapSurface + CapColor, family = binomial(link = logit), 
##     data = mushrooms)
## 
## Coefficients:
##       (Intercept)    CapShapeconical       CapShapeflat  
##           -2.6804            17.3385             2.6711  
##   CapShapeknobbed     CapShapesunken     CapShapeconvex  
##            3.5543           -12.0282             2.5419  
## CapSurfacegrooves   CapSurfacesmooth    CapSurfacescaly  
##           17.8710             1.1854             0.8826  
##  CapColorcinnamon        CapColorred       CapColorgray  
##           -1.9034            -0.5535            -0.6580  
##     CapColorbrown       CapColorpink      CapColorgreen  
##           -1.0694            -0.1460           -16.3751  
##    CapColorpurple      CapColorwhite     CapColoryellow  
##          -16.3751            -1.3767             0.3187  
## 
## Degrees of Freedom: 8123 Total (i.e. Null);  8106 Residual
## Null Deviance:       11250 
## Residual Deviance: 9930  AIC: 9966