Load & subset the mushroom data set
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: plyr
## [1] "The data set can be found here:"
## [1] "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
Update column names and all abbreviations to words
names(mush.df) <- c("edibility", "has_bruises", "population", "habitat")
mush.df$edibility[mush.df$edibility == "e"] <- "edible"
mush.df$edibility[mush.df$edibility == "p"] <- "poisonous"
mush.df$has_bruises[mush.df$has_bruises == "t"] <- "TRUE"
mush.df$has_bruises[mush.df$has_bruises == "f"] <- "FALSE"
mush.df$population[mush.df$population == "a"] <- "abundant"
mush.df$population[mush.df$population == "c"] <- "clustered"
mush.df$population[mush.df$population == "n"] <- "numerous"
mush.df$population[mush.df$population == "s"] <- "scattered"
mush.df$population[mush.df$population == "v"] <- "several"
mush.df$population[mush.df$population == "y"] <- "solitary"
mush.df$habitat[mush.df$habitat == "g"] <- "grasses"
mush.df$habitat[mush.df$habitat == "l"] <- "leaves"
mush.df$habitat[mush.df$habitat == "m"] <- "meadows"
mush.df$habitat[mush.df$habitat == "p"] <- "paths"
mush.df$habitat[mush.df$habitat == "u"] <- "urban"
mush.df$habitat[mush.df$habitat == "w"] <- "waste"
mush.df$habitat[mush.df$habitat == "d"] <- "woods"
Convert back to factors & validate the changes
## edibility has_bruises population habitat
## 1 poisonous TRUE scattered urban
## 2 edible TRUE numerous grasses
## 3 edible TRUE numerous meadows
## 4 poisonous TRUE scattered urban
## 5 edible FALSE abundant grasses
## 6 edible TRUE numerous grasses
## 'data.frame': 8124 obs. of 4 variables:
## $ edibility : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
## $ has_bruises: Factor w/ 2 levels "FALSE","TRUE": 2 2 2 2 1 2 2 2 2 2 ...
## $ population : Factor w/ 6 levels "abundant","clustered",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ habitat : Factor w/ 7 levels "grasses","leaves",..: 5 1 3 5 1 1 3 3 1 3 ...