## if (!require("RCurl"))install.packages("RCurl")
## if (!require("plyr")) install.packages('plyr')
## if (!require("plyr")) install.packages('vcd')
library(RCurl)
## Loading required package: bitops
library(plyr)
library(vcd)
## Loading required package: grid
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroom_data <-getURL(url)
mushroom_df <- read.csv(text=mushroom_data,header=F,sep=",", na.strings = "?", stringsAsFactors = FALSE)
dim(mushroom_df)
## [1] 8124 23
head(mushroom_df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
# Subset to get 5 variables: edible, odor, spore-print-color, population, and habitat
mushroom_df <- mushroom_df[,c(1, 6, 22, 23)]
mushroom_df$class <- revalue(mushroom_df$V1, c(e = "edible", p = 'poisonous'))
mushroom_df$odor <- revalue(mushroom_df$V6, c(a = "almond", l = "anise", c = "creosote",y = "fishy", f = "foul", m = "musty", n = "none", p = "pungent", s = "spicy"))
mushroom_df$population <- revalue(mushroom_df$V22, c(a = "abundant", c = "clustered", n = "numerous", s = "scattered", v = "several", y = "solitary"))
mushroom_df$habitat <- revalue(mushroom_df$V23, c(g = "grasses", l = "leaves", m = "meadows", p = "paths", u = "urban", w = "waste", d = "woods"))
mushroom_df <- mushroom_df[, 5:8]
head(mushroom_df)
## class odor population habitat
## 1 poisonous pungent scattered urban
## 2 edible almond numerous grasses
## 3 edible anise numerous meadows
## 4 poisonous pungent scattered urban
## 5 edible none abundant grasses
## 6 edible almond numerous grasses
str(mushroom_df)
## 'data.frame': 8124 obs. of 4 variables:
## $ class : chr "poisonous" "edible" "edible" "poisonous" ...
## $ odor : chr "pungent" "almond" "anise" "pungent" ...
## $ population: chr "scattered" "numerous" "numerous" "scattered" ...
## $ habitat : chr "urban" "grasses" "meadows" "urban" ...
table(mushroom_df$class, mushroom_df$habitat)
##
## grasses leaves meadows paths urban waste woods
## edible 1408 240 256 136 96 192 1880
## poisonous 740 592 36 1008 272 0 1268
table(mushroom_df$class, mushroom_df$population)
##
## abundant clustered numerous scattered several solitary
## edible 384 288 400 880 1192 1064
## poisonous 0 52 0 368 2848 648
mosaic(~ class + population + habitat, data = mushroom_df, shade=TRUE, legend=TRUE)
Reference: