## Loading required package: plyr
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Import the file as a csv.
link <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushrooms <- read.table(file = link, header = FALSE, sep = ",")
head(mushrooms)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Create a Data Frame with several columns
mush1 <- mushrooms[1]
mush2 <- mushrooms[6]
mush3 <- mushrooms[21]
mush4 <- mushrooms[23]
mush5 <- mushrooms[4]
mush6<-mushrooms[22]
mush <- cbind(mush1,mush2,mush3,mush4,mush5, mush6)
Add Column Headers
colnames(mush) <-c("Type", "Odor","Spore-Print-Color","Habitat", "Cap Color", "Population")
Give the full name to each data description
mush$Type <- mapvalues(mush$Type,
from = c("p","e"),
to = c("poisonous","edible"))
mush$Odor <- mapvalues(mush$Odor,
from = c("a","l","c","y","f","m","n","p","s"),
to = c("almond","anise","creosote", "fishy", "foul",
"musty","none","pungent","spicy"))
mush$Habitat <-mapvalues(mush$Habitat, from = c("g","l","m","p","u","w","d"),
to = c("Grasses","Leaves","Meadows","Paths","Urban","Waste","Woods"))
mush$`Cap Color` <-mapvalues(mush$`Cap Color`, from = c("n","b","c","g","r","p","u", "e", "w", "y"), to= c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"))
mush$'Spore-Print-Color'<-mapvalues(mush$'Spore-Print-Color', from = c("k","n","b","o","u","w","y"), to= c("Black", "Brown", "Buff", "Orange","Purple","White","Yellow"))
mush$'Population'<-mapvalues(mush$'Population', from = c("a","n","c","s", "v", "y"), to= c("abundant", "numerous", "clustered", "scattered","several","solitary"))
head(mush)
## Type Odor Spore-Print-Color Habitat Cap Color Population
## 1 poisonous pungent Black Urban Brown scattered
## 2 edible almond Brown Grasses Yellow numerous
## 3 edible anise Brown Meadows White numerous
## 4 poisonous pungent Black Urban White scattered
## 5 edible none Brown Grasses Gray abundant
## 6 edible almond Black Grasses Yellow numerous
summary (mush)
## Type Odor Spore-Print-Color Habitat
## edible :4208 none :3528 White :2388 Woods :3148
## poisonous:3916 foul :2160 Brown :1968 Grasses:2148
## spicy : 576 Black :1872 Leaves : 832
## fishy : 576 h :1632 Meadows: 292
## almond : 400 r : 72 Paths :1144
## anise : 400 Buff : 48 Urban : 368
## (Other): 484 (Other): 144 Waste : 192
## Cap Color Population
## Brown :2284 abundant : 384
## Gray :1840 clustered: 340
## Red :1500 numerous : 400
## Yellow :1072 scattered:1248
## White :1040 several :4040
## Buff : 168 solitary :1712
## (Other): 220
Data Visualization
plot (mush$Type, main="Proportion of Edible/Poisonous Mushrooms",col=c('lightgreen','red'))

plot(mush$Habitat, main="Mushrooms Habitat",col=c('lightpink'))

poison_by_habitat <-table(mush[c(1,4)])
barplot(poison_by_habitat,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Habitat",ylab = "Species Count", main="Frequency of Poisonous Mushrooms by Habitat")

odor_by_poisonous <-table(mush[c(1,2)])
barplot(odor_by_poisonous,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Odor",ylab = "Species Count", main="Frequency of Mushrooms Odor by Poisonous", las=3)

color_by_poisonous <-table(mush[c(1,5)])
barplot(color_by_poisonous,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Cap Color",ylab = "Species Count", main="Frequency of Cap Color by Poisonous", las=3)

color_by_population <-table(mush[c(1,6)])
barplot(color_by_population,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Cap Color",ylab = "Species Count", main="Frequency of Population by Poisonous", las=3)

Based on the Data Visualization we can tell that the Odor is one of the best predictor of the mushroom poisonous.