Loading the data about mushrooms from website https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data

Mushrooms <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data",header = FALSE,sep=",")
head(Mushrooms,10)
##    V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1   p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2   e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3   e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4   p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5   e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6   e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 7   e  b  s  w  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
## 8   e  b  y  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 9   p  x  y  w  t  p  f  c  n   p   e   e   s   s   w   w   p   w   o   p
## 10  e  b  s  y  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
##    V21 V22 V23
## 1    k   s   u
## 2    n   n   g
## 3    n   n   m
## 4    k   s   u
## 5    n   a   g
## 6    k   n   g
## 7    k   n   m
## 8    n   s   m
## 9    k   v   g
## 10   k   s   m

File can be loaded from local directory also.

library(stringr)
localdir <- getwd()
destfile <- paste(str_trim(localdir),"//agaricus-lepiota.data",sep="")
Mushrooms <- read.table(destfile,header = FALSE,sep=",")
head(Mushrooms,10)
##    V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1   p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2   e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3   e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4   p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5   e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6   e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 7   e  b  s  w  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
## 8   e  b  y  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 9   p  x  y  w  t  p  f  c  n   p   e   e   s   s   w   w   p   w   o   p
## 10  e  b  s  y  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
##    V21 V22 V23
## 1    k   s   u
## 2    n   n   g
## 3    n   n   m
## 4    k   s   u
## 5    n   a   g
## 6    k   n   g
## 7    k   n   m
## 8    n   s   m
## 9    k   v   g
## 10   k   s   m

File contains 23 columns and 8124 rows. First column is categorical data showing mushroom catagory (e = edible and p = poisonous). Rest of 22 columns are attributes of mushrooms.

dim(Mushrooms)
## [1] 8124   23
str(Mushrooms)
## 'data.frame':    8124 obs. of  23 variables:
##  $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
##  $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
##  $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
##  $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
##  $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
##  $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
##  $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
##  $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
##  $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
##  $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
##  $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...

Downloading data dictionary located at https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names

library(stringr)
localdir <- getwd()
destfile <- paste(str_trim(localdir),"//agaricus-lepiota.txt",sep="")
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names"
download.file(url, destfile, mode="wb")

Changing the column name to meaning full names. Example V1 = class, V2 = cap-shape and so on. These columns names are derived from data dictionary

names(Mushrooms)[names(Mushrooms) == "V1"] <- "class"
names(Mushrooms)[names(Mushrooms) == "V2"] <- "cap-shape"
library(plyr)
Mushrooms <- rename(Mushrooms,
c("V3"="cap-surface", 
"V4"="cap-color",  
"V5"="bruises",  
"V6"="odor",  
"V7"="gill-attachment",  
"V8"="gill-spacing",  
"V9"="gill-size",  
"V10"="gill-color",  
"V11"="stalk-shape",  
"V12"="stalk-root",  
"V13"="stalk-surface-above-ring",  
"V14"="stalk-surface-below-ring",  
"V15"="stalk-color-above-ring",  
"V16"="stalk-color-below-ring",  
"V17"="veil-type",  
"V18"="veil-color",  
"V19"="ring-number",  
"V20"="ring-type",  
"V21"="spore-print-color",  
"V22"="population",  
"V23"="habitat"
))
head(Mushrooms,10)
##    class cap-shape cap-surface cap-color bruises odor gill-attachment
## 1      p         x           s         n       t    p               f
## 2      e         x           s         y       t    a               f
## 3      e         b           s         w       t    l               f
## 4      p         x           y         w       t    p               f
## 5      e         x           s         g       f    n               f
## 6      e         x           y         y       t    a               f
## 7      e         b           s         w       t    a               f
## 8      e         b           y         w       t    l               f
## 9      p         x           y         w       t    p               f
## 10     e         b           s         y       t    a               f
##    gill-spacing gill-size gill-color stalk-shape stalk-root
## 1             c         n          k           e          e
## 2             c         b          k           e          c
## 3             c         b          n           e          c
## 4             c         n          n           e          e
## 5             w         b          k           t          e
## 6             c         b          n           e          c
## 7             c         b          g           e          c
## 8             c         b          n           e          c
## 9             c         n          p           e          e
## 10            c         b          g           e          c
##    stalk-surface-above-ring stalk-surface-below-ring
## 1                         s                        s
## 2                         s                        s
## 3                         s                        s
## 4                         s                        s
## 5                         s                        s
## 6                         s                        s
## 7                         s                        s
## 8                         s                        s
## 9                         s                        s
## 10                        s                        s
##    stalk-color-above-ring stalk-color-below-ring veil-type veil-color
## 1                       w                      w         p          w
## 2                       w                      w         p          w
## 3                       w                      w         p          w
## 4                       w                      w         p          w
## 5                       w                      w         p          w
## 6                       w                      w         p          w
## 7                       w                      w         p          w
## 8                       w                      w         p          w
## 9                       w                      w         p          w
## 10                      w                      w         p          w
##    ring-number ring-type spore-print-color population habitat
## 1            o         p                 k          s       u
## 2            o         p                 n          n       g
## 3            o         p                 n          n       m
## 4            o         p                 k          s       u
## 5            o         e                 n          a       g
## 6            o         p                 k          n       g
## 7            o         p                 k          n       m
## 8            o         p                 n          s       m
## 9            o         p                 k          v       g
## 10           o         p                 k          s       m

List distinct values in class column.

unique(Mushrooms$class)
## [1] p e
## Levels: e p

Formatting data, updated coded values to actual values. Example: in class column e = edible and p = poisonous. Since there are only 2 levels data can be formated using “elseif”"

unique(Mushrooms$class)
## [1] p e
## Levels: e p
Mushrooms$class <- ifelse(Mushrooms$class=="e","edible","poisonous")
unique(Mushrooms$class)
## [1] "poisonous" "edible"

Column cap-shape contain more then two levels. Formating cap-shape data.

unique(Mushrooms$`cap-shape`)
## [1] x b s f k c
## Levels: b c f k s x
revalue(Mushrooms$`cap-shape`,c("b" = "bell")) -> Mushrooms$`cap-shape`
revalue(Mushrooms$`cap-shape`,c("c" = "conical")) -> Mushrooms$`cap-shape`
revalue(Mushrooms$`cap-shape`,c("x" = "convex")) -> Mushrooms$`cap-shape`
revalue(Mushrooms$`cap-shape`,c("f" = "flat")) -> Mushrooms$`cap-shape`
revalue(Mushrooms$`cap-shape`,c("k" = "knobbed")) -> Mushrooms$`cap-shape`
revalue(Mushrooms$`cap-shape`,c("s" = "sunken")) -> Mushrooms$`cap-shape`
unique(Mushrooms$`cap-shape`)
## [1] convex  bell    sunken  flat    knobbed conical
## Levels: bell conical flat knobbed sunken convex

Formating cap-surface data.

unique(Mushrooms$`cap-surface`)
## [1] s y f g
## Levels: f g s y
revalue(Mushrooms$`cap-surface`,c("s" = "smooth")) -> Mushrooms$`cap-surface`
revalue(Mushrooms$`cap-surface`,c("y" = "scaly")) -> Mushrooms$`cap-surface`
revalue(Mushrooms$`cap-surface`,c("f" = "fibrous")) -> Mushrooms$`cap-surface`
revalue(Mushrooms$`cap-surface`,c("g" = "grooves")) -> Mushrooms$`cap-surface`
unique(Mushrooms$`cap-surface`)
## [1] smooth  scaly   fibrous grooves
## Levels: fibrous grooves smooth scaly

Formating cap-color data.

unique(Mushrooms$`cap-color`)
##  [1] n y w g e p b u c r
## Levels: b c e g n p r u w y
revalue(Mushrooms$`cap-color`,c("n" = "brown")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("b" = "buff")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("c" = "cinnamon")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("g" = "gray")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("r" = "green")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("p" = "pink")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("u" = "purple")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("e" = "red")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("w" = "white")) -> Mushrooms$`cap-color`
revalue(Mushrooms$`cap-color`,c("y" = "yellow")) -> Mushrooms$`cap-color`
unique(Mushrooms$`cap-color`)
##  [1] brown    yellow   white    gray     red      pink     buff    
##  [8] purple   cinnamon green   
## Levels: buff cinnamon red gray brown pink green purple white yellow

Formating bruises data.

unique(Mushrooms$`bruises`)
## [1] t f
## Levels: f t
revalue(Mushrooms$`bruises`,c("t" = "bruises")) -> Mushrooms$`bruises`
revalue(Mushrooms$`bruises`,c("f" = "no")) -> Mushrooms$`bruises`
unique(Mushrooms$`bruises`)
## [1] bruises no     
## Levels: no bruises

Subset data where class = edible. Selecting first 5 columns

edible <- Mushrooms[Mushrooms$class == "edible",1:5]
head(edible,20)
##     class cap-shape cap-surface cap-color bruises
## 2  edible    convex      smooth    yellow bruises
## 3  edible      bell      smooth     white bruises
## 5  edible    convex      smooth      gray      no
## 6  edible    convex       scaly    yellow bruises
## 7  edible      bell      smooth     white bruises
## 8  edible      bell       scaly     white bruises
## 10 edible      bell      smooth    yellow bruises
## 11 edible    convex       scaly    yellow bruises
## 12 edible    convex       scaly    yellow bruises
## 13 edible      bell      smooth    yellow bruises
## 15 edible    convex     fibrous     brown      no
## 16 edible    sunken     fibrous      gray      no
## 17 edible      flat     fibrous     white      no
## 21 edible      bell      smooth    yellow bruises
## 23 edible      bell       scaly    yellow bruises
## 24 edible      bell       scaly     white bruises
## 25 edible      bell      smooth     white bruises
## 27 edible    convex       scaly    yellow bruises
## 28 edible    convex       scaly     white bruises
## 29 edible      flat     fibrous     brown      no

Subset data where class = poisonous and bruises = no. Selecting columns based on names

poisonous <- subset(Mushrooms, Mushrooms$class == "poisonous" & Mushrooms$bruises == "no",select = c(class,`cap-color`,`cap-shape`,bruises))
head(poisonous,20)
##          class cap-color cap-shape bruises
## 1817 poisonous      gray    convex      no
## 2129 poisonous      gray    convex      no
## 2179 poisonous      gray    convex      no
## 2211 poisonous      pink    convex      no
## 2239 poisonous      gray    convex      no
## 2286 poisonous      pink    convex      no
## 2386 poisonous      gray    convex      no
## 2427 poisonous      pink    convex      no
## 2444 poisonous      gray    convex      no
## 2483 poisonous      pink    convex      no
## 2513 poisonous     white    convex      no
## 2534 poisonous      gray    convex      no
## 2537 poisonous      gray    convex      no
## 2540 poisonous      gray    convex      no
## 2541 poisonous      pink    convex      no
## 2561 poisonous     white    convex      no
## 2567 poisonous      gray    convex      no
## 2572 poisonous      gray    convex      no
## 2595 poisonous      gray    convex      no
## 2606 poisonous      gray    convex      no