## if (!require("RCurl"))install.packages("RCurl")
## if (!require("plyr")) install.packages('plyr')
## if (!require("plyr")) install.packages('vcd')

library(RCurl)
## Loading required package: bitops
library(plyr)
library(vcd)
## Loading required package: grid
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

mushroom_data <-getURL(url)

mushroom_df <- read.csv(text=mushroom_data,header=F,sep=",", na.strings = "?", stringsAsFactors = FALSE)
dim(mushroom_df)
## [1] 8124   23
head(mushroom_df)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
# Subset to get 5 variables: edible, odor, spore-print-color, population, and habitat
mushroom_df <- mushroom_df[,c(1, 6, 22, 23)]

mushroom_df$class <- revalue(mushroom_df$V1, c(e = "edible", p = 'poisonous'))
mushroom_df$odor <- revalue(mushroom_df$V6, c(a = "almond", l = "anise", c = "creosote",y = "fishy", f = "foul", m = "musty", n = "none", p = "pungent", s = "spicy"))
mushroom_df$population <- revalue(mushroom_df$V22, c(a = "abundant", c = "clustered", n = "numerous", s = "scattered", v = "several", y = "solitary"))
mushroom_df$habitat <- revalue(mushroom_df$V23, c(g = "grasses", l = "leaves", m = "meadows", p = "paths", u = "urban", w = "waste", d = "woods"))

mushroom_df <- mushroom_df[, 5:8]

head(mushroom_df)
##       class    odor population habitat
## 1 poisonous pungent  scattered   urban
## 2    edible  almond   numerous grasses
## 3    edible   anise   numerous meadows
## 4 poisonous pungent  scattered   urban
## 5    edible    none   abundant grasses
## 6    edible  almond   numerous grasses
str(mushroom_df)
## 'data.frame':    8124 obs. of  4 variables:
##  $ class     : chr  "poisonous" "edible" "edible" "poisonous" ...
##  $ odor      : chr  "pungent" "almond" "anise" "pungent" ...
##  $ population: chr  "scattered" "numerous" "numerous" "scattered" ...
##  $ habitat   : chr  "urban" "grasses" "meadows" "urban" ...
table(mushroom_df$class, mushroom_df$habitat)
##            
##             grasses leaves meadows paths urban waste woods
##   edible       1408    240     256   136    96   192  1880
##   poisonous     740    592      36  1008   272     0  1268
table(mushroom_df$class, mushroom_df$population)
##            
##             abundant clustered numerous scattered several solitary
##   edible         384       288      400       880    1192     1064
##   poisonous        0        52        0       368    2848      648
mosaic(~ class + population + habitat, data = mushroom_df, shade=TRUE, legend=TRUE)

Reference:

https://wwells.github.io/CUNYBridge_R/HW3_mushrooms.html