First, we load the data from the online source and save it as a data frame. We then subset it to four variables, name those columns, and replace the abbreviations with full descriptional values.
src <- 'https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'
mush <- read.csv(url(src), header=FALSE, stringsAsFactors = FALSE)
mushroom <- as.data.frame(mush)
newMush <- mushroom[c(1,19, 22:23)] #subset the data, keeping four columns
names(newMush) <- c("classes", "ringNumber", "population", "habitat")
#renaming the values based on the provided data dictionary
newMush$classes[newMush$classes=='p'] <- 'poisonous'
newMush$classes[newMush$classes=='e'] <- 'edible'
newMush$ringNumber[newMush$ringNumber=='n'] <- 'none'
newMush$ringNumber[newMush$ringNumber=='o'] <- 'one'
newMush$ringNumber[newMush$ringNumber=='t'] <- 'two'
newMush$population[newMush$population=='a'] <- 'abundant'
newMush$population[newMush$population=='c'] <- 'clustered'
newMush$population[newMush$population=='n'] <- 'numerous'
newMush$population[newMush$population=='s'] <- 'scattered'
newMush$population[newMush$population=='v'] <- 'several'
newMush$population[newMush$population=='y'] <- 'solitary'
newMush$habitat[newMush$habitat=='g'] <- 'grasses'
newMush$habitat[newMush$habitat=='l'] <- 'leaves'
newMush$habitat[newMush$habitat=='m'] <- 'meadows'
newMush$habitat[newMush$habitat=='p'] <- 'paths'
newMush$habitat[newMush$habitat=='u'] <- 'urban'
newMush$habitat[newMush$habitat=='w'] <- 'waste'
newMush$habitat[newMush$habitat=='d'] <- 'woods'
head(newMush)
## classes ringNumber population habitat
## 1 poisonous one scattered urban
## 2 edible one numerous grasses
## 3 edible one numerous meadows
## 4 poisonous one scattered urban
## 5 edible one abundant grasses
## 6 edible one numerous grasses
Looking at contingency tables, it seems that the population type with the largest number of poisonous mushrooms is ‘several’, and the habitat with the largest number of poisonous mushrooms is ‘woods’.
table(newMush$classes, newMush$population)
##
## abundant clustered numerous scattered several solitary
## edible 384 288 400 880 1192 1064
## poisonous 0 52 0 368 2848 648
table(newMush$classes, newMush$habitat)
##
## grasses leaves meadows paths urban waste woods
## edible 1408 240 256 136 96 192 1880
## poisonous 740 592 36 1008 272 0 1268