## Loading required package: plyr
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import the file as a csv.

link <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data" 
mushrooms <- read.table(file = link, header = FALSE, sep = ",") 
head(mushrooms) 
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Create a Data Frame with several columns

mush1 <- mushrooms[1] 
mush2 <- mushrooms[6] 
mush3 <- mushrooms[21] 
mush4 <- mushrooms[23] 
mush5 <- mushrooms[4]
mush6<-mushrooms[22]
mush <- cbind(mush1,mush2,mush3,mush4,mush5, mush6)

Add Column Headers

colnames(mush) <-c("Type", "Odor","Spore-Print-Color","Habitat", "Cap Color", "Population")

Give the full name to each data description

mush$Type <- mapvalues(mush$Type, 
                        from = c("p","e"), 
                        to = c("poisonous","edible"))
mush$Odor <- mapvalues(mush$Odor,
                        from = c("a","l","c","y","f","m","n","p","s"), 
                        to = c("almond","anise","creosote", "fishy", "foul", 
                               "musty","none","pungent","spicy"))
mush$Habitat <-mapvalues(mush$Habitat, from = c("g","l","m","p","u","w","d"), 
                        to = c("Grasses","Leaves","Meadows","Paths","Urban","Waste","Woods"))
mush$`Cap Color` <-mapvalues(mush$`Cap Color`, from = c("n","b","c","g","r","p","u", "e", "w", "y"), to= c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"))
mush$'Spore-Print-Color'<-mapvalues(mush$'Spore-Print-Color', from = c("k","n","b","o","u","w","y"), to= c("Black", "Brown", "Buff", "Orange","Purple","White","Yellow"))
mush$'Population'<-mapvalues(mush$'Population', from = c("a","n","c","s", "v", "y"), to= c("abundant", "numerous", "clustered", "scattered","several","solitary"))
head(mush)
##        Type    Odor Spore-Print-Color Habitat Cap Color Population
## 1 poisonous pungent             Black   Urban     Brown  scattered
## 2    edible  almond             Brown Grasses    Yellow   numerous
## 3    edible   anise             Brown Meadows     White   numerous
## 4 poisonous pungent             Black   Urban     White  scattered
## 5    edible    none             Brown Grasses      Gray   abundant
## 6    edible  almond             Black Grasses    Yellow   numerous
summary (mush)
##         Type           Odor      Spore-Print-Color    Habitat    
##  edible   :4208   none   :3528   White  :2388      Woods  :3148  
##  poisonous:3916   foul   :2160   Brown  :1968      Grasses:2148  
##                   spicy  : 576   Black  :1872      Leaves : 832  
##                   fishy  : 576   h      :1632      Meadows: 292  
##                   almond : 400   r      :  72      Paths  :1144  
##                   anise  : 400   Buff   :  48      Urban  : 368  
##                   (Other): 484   (Other): 144      Waste  : 192  
##    Cap Color        Population  
##  Brown  :2284   abundant : 384  
##  Gray   :1840   clustered: 340  
##  Red    :1500   numerous : 400  
##  Yellow :1072   scattered:1248  
##  White  :1040   several  :4040  
##  Buff   : 168   solitary :1712  
##  (Other): 220

Data Visualization

plot (mush$Type, main="Proportion of Edible/Poisonous Mushrooms",col=c('lightgreen','red'))

plot(mush$Habitat, main="Mushrooms Habitat",col=c('lightpink'))

poison_by_habitat <-table(mush[c(1,4)])
barplot(poison_by_habitat,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Habitat",ylab = "Species Count", main="Frequency of Poisonous Mushrooms by Habitat")

odor_by_poisonous <-table(mush[c(1,2)])
barplot(odor_by_poisonous,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Odor",ylab = "Species Count", main="Frequency of Mushrooms Odor by Poisonous", las=3)

color_by_poisonous <-table(mush[c(1,5)])
barplot(color_by_poisonous,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Cap Color",ylab = "Species Count", main="Frequency of Cap Color by Poisonous", las=3)

color_by_population <-table(mush[c(1,6)])
barplot(color_by_population,legend.text=TRUE, beside=TRUE, col=c("lightgreen","red"), xlab = "Cap Color",ylab = "Species Count", main="Frequency of Population by Poisonous", las=3)

Based on the Data Visualization we can tell that the Odor is one of the best predictor of the mushroom poisonous.