The Mushroom Dataset contains descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms in the Agaricus and Lepiota Family. It includes 8,124 instances and 22 variables, and was published on April 27, 1987.

In this assignment, I studied the Mushrooms dataset, renamed the columns and variables to make it easier to understand, and created subsets of the data to answer a few questions about it.


1. Load in the Mushrooms dataset:

mushroom <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", sep=",")

print(head(mushroom))
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g



2. Rename the columns:

names(mushroom) <- c("Class", "Cap-Shape", "Cap-Surface", "Cap-Color", "Bruises", "Odor", "Gill-Attachment", "Gill-Spacing", "Gill-Size", "Gill-Color", "Stalk-Shape", "Stalk-Root", "Stalk-Surface-Above-Ring", "Stalk-Surface-Below-Ring", "Stalk-Color-Above-Ring", "Stalk-Color-Below-Ring", "Veil-Type", "Veil-Color", "Ring-Number", "Ring-Type", "Spore-Print-Color", "Population", "Habitat")

print(head(mushroom))
##   Class Cap-Shape Cap-Surface Cap-Color Bruises Odor Gill-Attachment
## 1     p         x           s         n       t    p               f
## 2     e         x           s         y       t    a               f
## 3     e         b           s         w       t    l               f
## 4     p         x           y         w       t    p               f
## 5     e         x           s         g       f    n               f
## 6     e         x           y         y       t    a               f
##   Gill-Spacing Gill-Size Gill-Color Stalk-Shape Stalk-Root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   Stalk-Surface-Above-Ring Stalk-Surface-Below-Ring Stalk-Color-Above-Ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   Stalk-Color-Below-Ring Veil-Type Veil-Color Ring-Number Ring-Type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   Spore-Print-Color Population Habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g



3. Rename the variables in each column:

mushroom$Class <- recode(mushroom$Class, e = "edible", p = "poisonous")

mushroom$`Cap-Shape` <- recode(mushroom$`Cap-Shape`, 
                               b = "bell", c = "conical", x = "convex", f = "flat", k = "knobbed", s = "sunken")

mushroom$`Cap-Surface` <- recode(mushroom$`Cap-Surface`, f="fibrous", g="grooves", y="scaly", s="smooth")

mushroom$`Cap-Color` <- recode(mushroom$`Cap-Color`, n = "brown", b = "buff", c = "cinnamon", g = "gray", r = "green", p = "pink", u = "purple", e = "red", w = "white", y = "yellow")

mushroom$Bruises <- recode(mushroom$Bruises, t = "bruises", f = "no-bruises")

mushroom$Odor <- recode(mushroom$Odor, a = 'almond', l = 'anise', c = 'creosote', y = 'fishy', f = 'foul', m = 'musty', n = 'none', p = 'pungent', s = 'spicy')

mushroom$`Gill-Attachment` <- recode(mushroom$`Gill-Attachment`, a = 'attached', d = 'descending', f = 'free', n = 'notched')

mushroom$`Gill-Spacing` <- recode(mushroom$`Gill-Spacing`, c = 'close', w = 'crowded', d = 'distant')

mushroom$`Gill-Size` <- recode(mushroom$`Gill-Size`, b = 'broad', n = 'narrow' )

mushroom$`Gill-Color` <- recode(mushroom$`Gill-Color`, k = 'black', n = 'brown', b = 'buff', h = 'chocolate', g = 'gray', r = 'green', o = 'orange', p = 'pink', u = 'purple', e = 'red', w = 'white', y = 'yellow')

mushroom$`Stalk-Shape` <- recode(mushroom$`Stalk-Shape`, e = 'enlarging', t = 'tapering')

mushroom$`Stalk-Root` <- recode(mushroom$`Stalk-Root`, b = 'bulbous', c = 'club', u = 'cup', e = 'equal', z = 'rhizomorphs', r = 'rooted')

mushroom$`Stalk-Surface-Above-Ring` <- recode(mushroom$`Stalk-Surface-Above-Ring`, f = 'fibrous', y = 'scaly', k = 'silky', s = 'smooth')

mushroom$`Stalk-Surface-Below-Ring` <- recode(mushroom$`Stalk-Surface-Below-Ring`, f = 'fibrous', y = 'scaly', k = 'silky', s = 'smooth')

mushroom$`Stalk-Color-Above-Ring` <- recode(mushroom$`Stalk-Color-Above-Ring`, n = 'brown', b = 'buff', c = 'cinnamon', g = 'gray', o = 'orange', p = 'pink', e = 'red', w = 'white', y = 'yellow')

mushroom$`Stalk-Color-Below-Ring` <- recode(mushroom$`Stalk-Color-Below-Ring`, n = 'brown', b = 'buff', c = 'cinnamon', g = 'gray', o = 'orange', p = 'pink', e = 'red', w = 'white', y = 'yellow')

mushroom$`Veil-Type` <- recode(mushroom$`Veil-Type`, p = 'partial', u = 'universal')

mushroom$`Veil-Color` <- recode(mushroom$`Veil-Color`, n = 'brown', o = 'orange', w = 'white', y = 'yellow')

mushroom$`Ring-Number` <- recode(mushroom$`Ring-Number`, n = 'none', o = 'one', t = 'two')

mushroom$`Ring-Type` <- recode(mushroom$`Ring-Type`, c = 'cobwebby', e = 'evanescent', f = 'flaring', l = 'large', n = 'none', p = 'pendant', s = 'sheathing', z = 'zone')

mushroom$`Spore-Print-Color` <- recode(mushroom$`Spore-Print-Color`, k = 'black', n = 'brown', b = 'buff', h = 'chocolate', r = 'green', o = 'orange', u = 'purple',
w = 'white', y = 'yellow')

mushroom$Population <- recode(mushroom$Population, a = 'abundant', c = 'clustered', n = 'numerous', s = 'scattered', v = 'several', y = 'solitary')

mushroom$Habitat <- recode(mushroom$Habitat, g = 'grasses', l = 'leaves', m = 'meadows', p = 'paths', u = 'urban', w = 'waste', d = 'woods')

summary(mushroom)
##        Class        Cap-Shape     Cap-Surface     Cap-Color   
##  edible   :4208   bell   : 452   fibrous:2320   brown  :2284  
##  poisonous:3916   conical:   4   grooves:   4   gray   :1840  
##                   flat   :3152   smooth :2556   red    :1500  
##                   knobbed: 828   scaly  :3244   yellow :1072  
##                   sunken :  32                  white  :1040  
##                   convex :3656                  buff   : 168  
##                                                 (Other): 220  
##        Bruises          Odor      Gill-Attachment  Gill-Spacing 
##  no-bruises:4748   none   :3528   attached: 210   close  :6812  
##  bruises   :3376   foul   :2160   free    :7914   crowded:1312  
##                    spicy  : 576                                 
##                    fishy  : 576                                 
##                    almond : 400                                 
##                    anise  : 400                                 
##                    (Other): 484                                 
##   Gill-Size        Gill-Color      Stalk-Shape     Stalk-Root  
##  broad :5612   buff     :1728   enlarging:3516   ?      :2480  
##  narrow:2512   pink     :1492   tapering :4608   bulbous:3776  
##                white    :1202                    club   : 556  
##                brown    :1048                    equal  :1120  
##                gray     : 752                    rooted : 192  
##                chocolate: 732                                  
##                (Other)  :1170                                  
##  Stalk-Surface-Above-Ring Stalk-Surface-Below-Ring Stalk-Color-Above-Ring
##  fibrous: 552             fibrous: 600             white  :4464          
##  silky  :2372             silky  :2304             pink   :1872          
##  smooth :5176             smooth :4936             gray   : 576          
##  scaly  :  24             scaly  : 284             brown  : 448          
##                                                    buff   : 432          
##                                                    orange : 192          
##                                                    (Other): 140          
##  Stalk-Color-Below-Ring   Veil-Type     Veil-Color   Ring-Number
##  white  :4384           partial:8124   brown :  96   none:  36  
##  pink   :1872                          orange:  96   one :7488  
##  gray   : 576                          white :7924   two : 600  
##  brown  : 512                          yellow:   8              
##  buff   : 432                                                   
##  orange : 192                                                   
##  (Other): 156                                                   
##       Ring-Type    Spore-Print-Color     Population      Habitat    
##  evanescent:2776   white    :2388    abundant : 384   woods  :3148  
##  flaring   :  48   brown    :1968    clustered: 340   grasses:2148  
##  large     :1296   black    :1872    numerous : 400   leaves : 832  
##  none      :  36   chocolate:1632    scattered:1248   meadows: 292  
##  pendant   :3968   green    :  72    several  :4040   paths  :1144  
##                    buff     :  48    solitary :1712   urban  : 368  
##                    (Other)  : 144                     waste  : 192




4. Question: Where are poisonous mushrooms most commonly found compared to edible mushrooms?

subset1 <- subset(mushroom, Class == "poisonous", select = c(Class, Population, Habitat))

subset2 <- subset(mushroom, Class == "edible", select = c(Class, Population, Habitat))

summary(subset1)
##        Class          Population      Habitat    
##  edible   :   0   abundant :   0   woods  :1268  
##  poisonous:3916   clustered:  52   grasses: 740  
##                   numerous :   0   leaves : 592  
##                   scattered: 368   meadows:  36  
##                   several  :2848   paths  :1008  
##                   solitary : 648   urban  : 272  
##                                    waste  :   0
summary(subset2)
##        Class          Population      Habitat    
##  edible   :4208   abundant : 384   woods  :1880  
##  poisonous:   0   clustered: 288   grasses:1408  
##                   numerous : 400   leaves : 240  
##                   scattered: 880   meadows: 256  
##                   several  :1192   paths  : 136  
##                   solitary :1064   urban  :  96  
##                                    waste  : 192

The data shows that poisonous mushrooms are most often found in the woods and paths, while edible mushrooms are most found in woods and grasses. Few poisonous mushrooms are found in waste or meadows.



5. Question: What do poisonous mushrooms smell like compared to edible mushrooms?

subset3 <- subset(mushroom, Class == "poisonous", select = c(Class,Odor))
summary(subset3)
##        Class            Odor     
##  edible   :   0   foul    :2160  
##  poisonous:3916   spicy   : 576  
##                   fishy   : 576  
##                   pungent : 256  
##                   creosote: 192  
##                   none    : 120  
##                   (Other) :  36
subset4 <- subset(mushroom, Class == "edible", select = c(Class,Odor))
summary(subset4)
##        Class            Odor     
##  edible   :4208   none    :3408  
##  poisonous:   0   almond  : 400  
##                   anise   : 400  
##                   creosote:   0  
##                   foul    :   0  
##                   musty   :   0  
##                   (Other) :   0
qplot(mushroom$Class, mushroom$Odor, xlab="Type of Mushroom", ylab="Odor", geom="jitter")

The majority of poisonous mushrooms smell foul, while the majority of edible mushrooms don’t smell at all.



6. Question: How does the look of the mushroom’s cap affect its edibility?

subset5 <- subset(mushroom, Class == "poisonous", select = c(Class, `Cap-Shape`, `Cap-Color`, `Cap-Surface`))

subset6 <- subset(mushroom, Class == "edible", select = c(Class, `Cap-Shape`, `Cap-Color`, `Cap-Surface`))

summary(subset5)
##        Class        Cap-Shape      Cap-Color     Cap-Surface  
##  edible   :   0   bell   :  48   brown  :1020   fibrous: 760  
##  poisonous:3916   conical:   4   red    : 876   grooves:   4  
##                   flat   :1556   gray   : 808   smooth :1412  
##                   knobbed: 600   yellow : 672   scaly  :1740  
##                   sunken :   0   white  : 320                 
##                   convex :1708   buff   : 120                 
##                                  (Other): 100
summary(subset6)
##        Class        Cap-Shape      Cap-Color     Cap-Surface  
##  edible   :4208   bell   : 404   brown  :1264   fibrous:1560  
##  poisonous:   0   conical:   0   gray   :1032   grooves:   0  
##                   flat   :1596   white  : 720   smooth :1144  
##                   knobbed: 228   red    : 624   scaly  :1504  
##                   sunken :  32   yellow : 400                 
##                   convex :1948   pink   :  56                 
##                                  (Other): 112
qplot(mushroom$Class, mushroom$`Cap-Color`, xlab="Type of Mushroom", ylab="Cap Color", geom="jitter")

qplot(mushroom$Class, mushroom$`Cap-Shape`, xlab="Type of Mushroom", ylab="Cap Shape", geom="jitter")

qplot(mushroom$Class, mushroom$`Cap-Surface`, xlab="Type of Mushroom", ylab="Cap Surface", geom="jitter")


It seems difficult to predict a mushroom’s edibility just by looking at its cap. On the whole, both poisonous and edible mushrooms have the same cap color, shape, and surface.

There are a few outliers – for example, only edible mushrooms have a purple or green cap, but those mushrooms are rare in this sample.

My takeaway from this data is that one reliable way to determine a mushroom’s edibility is to smell it. Foul mushrooms are always poisonous.