library(RCurl)
## Loading required package: bitops
library(ggplot2)

Get the data from UCI:

Mushroom <-read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header= FALSE, sep=",")

Dimensions of Mashroom:

dim(Mushroom)
## [1] 8124   23

Summary of Mashroom:

summary(Mushroom)
##  V1       V2       V3             V4       V5             V6      
##  e:4208   b: 452   f:2320   n      :2284   f:4748   n      :3528  
##  p:3916   c:   4   g:   4   g      :1840   t:3376   f      :2160  
##           f:3152   s:2556   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3656            b      : 168            l      : 400  
##                             (Other): 220            (Other): 484  
##  V7       V8       V9            V10       V11      V12      V13     
##  a: 210   c:6812   b:5612   b      :1728   e:3516   ?:2480   f: 552  
##  f:7914   w:1312   n:2512   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5176  
##                             n      :1048            e:1120   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1170                             
##  V14           V15            V16       V17      V18      V19     
##  f: 600   w      :4464   w      :4384   p:8124   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7488  
##  s:4936   g      : 576   g      : 576            w:7924   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  V20           V21       V22      V23     
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1872   n: 400   l: 832  
##  n:  36   h      :1632   s:1248   m: 292  
##  p:3968   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 368  
##           (Other): 144            w: 192

Selecting the required data(classes, cap-shape, cap-color, odor, population, habitat:

Mushroom_data <- Mushroom[,c(1,2,4,6,22,23)]

Summary of Mashroom_data:

summary(Mushroom_data)
##  V1       V2             V4             V6       V22      V23     
##  e:4208   b: 452   n      :2284   n      :3528   a: 384   d:3148  
##  p:3916   c:   4   g      :1840   f      :2160   c: 340   g:2148  
##           f:3152   e      :1500   s      : 576   n: 400   l: 832  
##           k: 828   y      :1072   y      : 576   s:1248   m: 292  
##           s:  32   w      :1040   a      : 400   v:4040   p:1144  
##           x:3656   b      : 168   l      : 400   y:1712   u: 368  
##                    (Other): 220   (Other): 484            w: 192

Adding meaningful names to the columns:

Mushroom_d <- Mushroom[,c(1,2,4,6,22,23)]

colnames(Mushroom_d) <- c("classes","cap_shape","cap_color","odor", "population", "habitat")

levels(Mushroom_d$classes)[levels(Mushroom_d$classes)=="e"] <- "Edible"
levels(Mushroom_d$classes)[levels(Mushroom_d$classes)=="p"] <- "Poisonous"

levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="b"] <- "bell"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="c"] <- "conical"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="x"] <- "convex"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="f"] <- "flat"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="k"] <- "knobbed"
levels(Mushroom_d$cap_shape)[levels(Mushroom_d$cap_shape)=="s"] <- "sunken"

levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="n"] <- "brown"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="b"] <- "buff"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="c"] <- "cinnamon"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="g"] <- "gray"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="r"] <- "green"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="p"] <- "pink"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="u"] <- "purple"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="e"] <- "red"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="w"] <- "white"
levels(Mushroom_d$cap_color)[levels(Mushroom_d$cap_color)=="y"] <- "yellow"

levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="a"] <- "almond"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="l"] <- "anise"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="c"] <- "creosote"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="y"] <- "fishy"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="f"] <- "foul"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="m"] <- "musty"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="n"] <- "none"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="p"] <- "pungent"
levels(Mushroom_d$odor)[levels(Mushroom_d$odor)=="s"] <- "spicy"

levels(Mushroom_d$population)[levels(Mushroom_d$population)=="a"] <- "abundant"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="c"] <- "clustered"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="n"] <- "numerous"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="s"] <- "scattered"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="v"] <- "several"
levels(Mushroom_d$population)[levels(Mushroom_d$population)=="y"] <- "solitary"

levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="g"] <- "grasses"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="l"] <- "leaves"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="m"] <- "meadows"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="p"] <- "paths"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="u"] <- "urban"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="w"] <- "waste"
levels(Mushroom_d$habitat)[levels(Mushroom_d$habitat)=="d"] <- "woods"

head(Mushroom_d)
##     classes cap_shape cap_color    odor population habitat
## 1 Poisonous    convex     brown pungent  scattered   urban
## 2    Edible    convex    yellow  almond   numerous grasses
## 3    Edible      bell     white   anise   numerous meadows
## 4 Poisonous    convex     white pungent  scattered   urban
## 5    Edible    convex      gray    none   abundant grasses
## 6    Edible    convex    yellow  almond   numerous grasses

Summary of Mashroom_data after renaming:

summary(Mushroom_d)
##       classes       cap_shape      cap_color         odor     
##  Edible   :4208   bell   : 452   brown  :2284   none   :3528  
##  Poisonous:3916   conical:   4   gray   :1840   foul   :2160  
##                   flat   :3152   red    :1500   spicy  : 576  
##                   knobbed: 828   yellow :1072   fishy  : 576  
##                   sunken :  32   white  :1040   almond : 400  
##                   convex :3656   buff   : 168   anise  : 400  
##                                  (Other): 220   (Other): 484  
##      population      habitat    
##  abundant : 384   woods  :3148  
##  clustered: 340   grasses:2148  
##  numerous : 400   leaves : 832  
##  scattered:1248   meadows: 292  
##  several  :4040   paths  :1144  
##  solitary :1712   urban  : 368  
##                   waste  : 192

Plot population distribution:

qplot(population, data = Mushroom_d, fill= classes)