1. Load in the Mushrooms dataset:
mushroom <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", sep=",")
print(head(mushroom))
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
2. Rename the columns:
names(mushroom) <- c("Class", "Cap-Shape", "Cap-Surface", "Cap-Color", "Bruises", "Odor", "Gill-Attachment", "Gill-Spacing", "Gill-Size", "Gill-Color", "Stalk-Shape", "Stalk-Root", "Stalk-Surface-Above-Ring", "Stalk-Surface-Below-Ring", "Stalk-Color-Above-Ring", "Stalk-Color-Below-Ring", "Veil-Type", "Veil-Color", "Ring-Number", "Ring-Type", "Spore-Print-Color", "Population", "Habitat")
print(head(mushroom))
## Class Cap-Shape Cap-Surface Cap-Color Bruises Odor Gill-Attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## Gill-Spacing Gill-Size Gill-Color Stalk-Shape Stalk-Root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## Stalk-Surface-Above-Ring Stalk-Surface-Below-Ring Stalk-Color-Above-Ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## Stalk-Color-Below-Ring Veil-Type Veil-Color Ring-Number Ring-Type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## Spore-Print-Color Population Habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
3. Rename the variables in each column:
mushroom$Class <- recode(mushroom$Class, e = "edible", p = "poisonous")
mushroom$`Cap-Shape` <- recode(mushroom$`Cap-Shape`,
b = "bell", c = "conical", x = "convex", f = "flat", k = "knobbed", s = "sunken")
mushroom$`Cap-Surface` <- recode(mushroom$`Cap-Surface`, f="fibrous", g="grooves", y="scaly", s="smooth")
mushroom$`Cap-Color` <- recode(mushroom$`Cap-Color`, n = "brown", b = "buff", c = "cinnamon", g = "gray", r = "green", p = "pink", u = "purple", e = "red", w = "white", y = "yellow")
mushroom$Bruises <- recode(mushroom$Bruises, t = "bruises", f = "no-bruises")
mushroom$Odor <- recode(mushroom$Odor, a = 'almond', l = 'anise', c = 'creosote', y = 'fishy', f = 'foul', m = 'musty', n = 'none', p = 'pungent', s = 'spicy')
mushroom$`Gill-Attachment` <- recode(mushroom$`Gill-Attachment`, a = 'attached', d = 'descending', f = 'free', n = 'notched')
mushroom$`Gill-Spacing` <- recode(mushroom$`Gill-Spacing`, c = 'close', w = 'crowded', d = 'distant')
mushroom$`Gill-Size` <- recode(mushroom$`Gill-Size`, b = 'broad', n = 'narrow' )
mushroom$`Gill-Color` <- recode(mushroom$`Gill-Color`, k = 'black', n = 'brown', b = 'buff', h = 'chocolate', g = 'gray', r = 'green', o = 'orange', p = 'pink', u = 'purple', e = 'red', w = 'white', y = 'yellow')
mushroom$`Stalk-Shape` <- recode(mushroom$`Stalk-Shape`, e = 'enlarging', t = 'tapering')
mushroom$`Stalk-Root` <- recode(mushroom$`Stalk-Root`, b = 'bulbous', c = 'club', u = 'cup', e = 'equal', z = 'rhizomorphs', r = 'rooted')
mushroom$`Stalk-Surface-Above-Ring` <- recode(mushroom$`Stalk-Surface-Above-Ring`, f = 'fibrous', y = 'scaly', k = 'silky', s = 'smooth')
mushroom$`Stalk-Surface-Below-Ring` <- recode(mushroom$`Stalk-Surface-Below-Ring`, f = 'fibrous', y = 'scaly', k = 'silky', s = 'smooth')
mushroom$`Stalk-Color-Above-Ring` <- recode(mushroom$`Stalk-Color-Above-Ring`, n = 'brown', b = 'buff', c = 'cinnamon', g = 'gray', o = 'orange', p = 'pink', e = 'red', w = 'white', y = 'yellow')
mushroom$`Stalk-Color-Below-Ring` <- recode(mushroom$`Stalk-Color-Below-Ring`, n = 'brown', b = 'buff', c = 'cinnamon', g = 'gray', o = 'orange', p = 'pink', e = 'red', w = 'white', y = 'yellow')
mushroom$`Veil-Type` <- recode(mushroom$`Veil-Type`, p = 'partial', u = 'universal')
mushroom$`Veil-Color` <- recode(mushroom$`Veil-Color`, n = 'brown', o = 'orange', w = 'white', y = 'yellow')
mushroom$`Ring-Number` <- recode(mushroom$`Ring-Number`, n = 'none', o = 'one', t = 'two')
mushroom$`Ring-Type` <- recode(mushroom$`Ring-Type`, c = 'cobwebby', e = 'evanescent', f = 'flaring', l = 'large', n = 'none', p = 'pendant', s = 'sheathing', z = 'zone')
mushroom$`Spore-Print-Color` <- recode(mushroom$`Spore-Print-Color`, k = 'black', n = 'brown', b = 'buff', h = 'chocolate', r = 'green', o = 'orange', u = 'purple',
w = 'white', y = 'yellow')
mushroom$Population <- recode(mushroom$Population, a = 'abundant', c = 'clustered', n = 'numerous', s = 'scattered', v = 'several', y = 'solitary')
mushroom$Habitat <- recode(mushroom$Habitat, g = 'grasses', l = 'leaves', m = 'meadows', p = 'paths', u = 'urban', w = 'waste', d = 'woods')
summary(mushroom)
## Class Cap-Shape Cap-Surface Cap-Color
## edible :4208 bell : 452 fibrous:2320 brown :2284
## poisonous:3916 conical: 4 grooves: 4 gray :1840
## flat :3152 smooth :2556 red :1500
## knobbed: 828 scaly :3244 yellow :1072
## sunken : 32 white :1040
## convex :3656 buff : 168
## (Other): 220
## Bruises Odor Gill-Attachment Gill-Spacing
## no-bruises:4748 none :3528 attached: 210 close :6812
## bruises :3376 foul :2160 free :7914 crowded:1312
## spicy : 576
## fishy : 576
## almond : 400
## anise : 400
## (Other): 484
## Gill-Size Gill-Color Stalk-Shape Stalk-Root
## broad :5612 buff :1728 enlarging:3516 ? :2480
## narrow:2512 pink :1492 tapering :4608 bulbous:3776
## white :1202 club : 556
## brown :1048 equal :1120
## gray : 752 rooted : 192
## chocolate: 732
## (Other) :1170
## Stalk-Surface-Above-Ring Stalk-Surface-Below-Ring Stalk-Color-Above-Ring
## fibrous: 552 fibrous: 600 white :4464
## silky :2372 silky :2304 pink :1872
## smooth :5176 smooth :4936 gray : 576
## scaly : 24 scaly : 284 brown : 448
## buff : 432
## orange : 192
## (Other): 140
## Stalk-Color-Below-Ring Veil-Type Veil-Color Ring-Number
## white :4384 partial:8124 brown : 96 none: 36
## pink :1872 orange: 96 one :7488
## gray : 576 white :7924 two : 600
## brown : 512 yellow: 8
## buff : 432
## orange : 192
## (Other): 156
## Ring-Type Spore-Print-Color Population Habitat
## evanescent:2776 white :2388 abundant : 384 woods :3148
## flaring : 48 brown :1968 clustered: 340 grasses:2148
## large :1296 black :1872 numerous : 400 leaves : 832
## none : 36 chocolate:1632 scattered:1248 meadows: 292
## pendant :3968 green : 72 several :4040 paths :1144
## buff : 48 solitary :1712 urban : 368
## (Other) : 144 waste : 192
4. Question: Where are poisonous mushrooms most commonly found compared to edible mushrooms?
subset1 <- subset(mushroom, Class == "poisonous", select = c(Class, Population, Habitat))
subset2 <- subset(mushroom, Class == "edible", select = c(Class, Population, Habitat))
summary(subset1)
## Class Population Habitat
## edible : 0 abundant : 0 woods :1268
## poisonous:3916 clustered: 52 grasses: 740
## numerous : 0 leaves : 592
## scattered: 368 meadows: 36
## several :2848 paths :1008
## solitary : 648 urban : 272
## waste : 0
summary(subset2)
## Class Population Habitat
## edible :4208 abundant : 384 woods :1880
## poisonous: 0 clustered: 288 grasses:1408
## numerous : 400 leaves : 240
## scattered: 880 meadows: 256
## several :1192 paths : 136
## solitary :1064 urban : 96
## waste : 192
5. Question: What do poisonous mushrooms smell like compared to edible mushrooms?
subset3 <- subset(mushroom, Class == "poisonous", select = c(Class,Odor))
summary(subset3)
## Class Odor
## edible : 0 foul :2160
## poisonous:3916 spicy : 576
## fishy : 576
## pungent : 256
## creosote: 192
## none : 120
## (Other) : 36
subset4 <- subset(mushroom, Class == "edible", select = c(Class,Odor))
summary(subset4)
## Class Odor
## edible :4208 none :3408
## poisonous: 0 almond : 400
## anise : 400
## creosote: 0
## foul : 0
## musty : 0
## (Other) : 0
qplot(mushroom$Class, mushroom$Odor, xlab="Type of Mushroom", ylab="Odor", geom="jitter")
6. Question: How does the look of the mushroom’s cap affect its edibility?
subset5 <- subset(mushroom, Class == "poisonous", select = c(Class, `Cap-Shape`, `Cap-Color`, `Cap-Surface`))
subset6 <- subset(mushroom, Class == "edible", select = c(Class, `Cap-Shape`, `Cap-Color`, `Cap-Surface`))
summary(subset5)
## Class Cap-Shape Cap-Color Cap-Surface
## edible : 0 bell : 48 brown :1020 fibrous: 760
## poisonous:3916 conical: 4 red : 876 grooves: 4
## flat :1556 gray : 808 smooth :1412
## knobbed: 600 yellow : 672 scaly :1740
## sunken : 0 white : 320
## convex :1708 buff : 120
## (Other): 100
summary(subset6)
## Class Cap-Shape Cap-Color Cap-Surface
## edible :4208 bell : 404 brown :1264 fibrous:1560
## poisonous: 0 conical: 0 gray :1032 grooves: 0
## flat :1596 white : 720 smooth :1144
## knobbed: 228 red : 624 scaly :1504
## sunken : 32 yellow : 400
## convex :1948 pink : 56
## (Other): 112
qplot(mushroom$Class, mushroom$`Cap-Color`, xlab="Type of Mushroom", ylab="Cap Color", geom="jitter")
qplot(mushroom$Class, mushroom$`Cap-Shape`, xlab="Type of Mushroom", ylab="Cap Shape", geom="jitter")
qplot(mushroom$Class, mushroom$`Cap-Surface`, xlab="Type of Mushroom", ylab="Cap Surface", geom="jitter")