# Mushroom data originally from UCI Machine Learning Repository: https://archive.ics.uci.edu/ml/datasets/Mushroom
# First, let's download the data from my GitHub
directory <- getwd()
download.file(url = "https://raw.githubusercontent.com/EyeDen/data607/master/agaricus-lepiota.data", destfile = paste(directory, "agaricus-lepiota.data", sep = "/"))
mushroom <- read.table("agaricus-lepiota.data", sep = ",")
# Check that we have read the table
head(mushroom, 10)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## 7 e b s w t a f c b g e c s s w w p w o p
## 8 e b y w t l f c b n e c s s w w p w o p
## 9 p x y w t p f c n p e e s s w w p w o p
## 10 e b s y t a f c b g e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
## 7 k n m
## 8 n s m
## 9 k v g
## 10 k s m
# Now, let's examine a subset
# The data dictionary is found here: https://raw.githubusercontent.com/EyeDen/data607/master/agaricus-lepiota.names
# Classes, cap-shape, cap-color, odor, population, and habitat sound interesting
subshroom <- mushroom[, c("V1", "V4", "V6", "V22", "V23")]
# Now let's rename the columns into something more understandable
colnames(subshroom) <- c("Classification", "CapColor", "Odor", "Population", "Habitat")
# Now we need to change the values for each column.
library(plyr)
subshroom$Classification <- mapvalues(subshroom$Classification, c("e", "p"), c("edible", "poisonous"))
subshroom$CapColor <- mapvalues(subshroom$CapColor, c("n", "b", "c", "g", "r", "p", "u", "e", "w", "y"), c("brown", "buff", "cinnamon", "gray", "green", "pink", "purple", "red", "white", "yellow"))
subshroom$Odor <- mapvalues(subshroom$Odor, c("a", "l", "c", "y", "f", "m", "n", "p", "s"), c("almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy"))
subshroom$Population <- mapvalues(subshroom$Population, c("a", "c", "n", "s", "v", "y"), c("abundant", "clustered", "numerous", "scattered", "several", "solitary"))
subshroom$Habitat <- mapvalues(subshroom$Habitat, c("g", "l", "m", "p", "u", "w", "d"), c("grasses", "leaves", "meadows", "paths", "urban", "waste", "woods"))
# All done. Let's check.
head(subshroom, 20)
## Classification CapColor Odor Population Habitat
## 1 poisonous brown pungent scattered urban
## 2 edible yellow almond numerous grasses
## 3 edible white anise numerous meadows
## 4 poisonous white pungent scattered urban
## 5 edible gray none abundant grasses
## 6 edible yellow almond numerous grasses
## 7 edible white almond numerous meadows
## 8 edible white anise scattered meadows
## 9 poisonous white pungent several grasses
## 10 edible yellow almond scattered meadows
## 11 edible yellow anise numerous grasses
## 12 edible yellow almond scattered meadows
## 13 edible yellow almond scattered grasses
## 14 poisonous white pungent several urban
## 15 edible brown none abundant grasses
## 16 edible gray none solitary urban
## 17 edible white none abundant grasses
## 18 poisonous brown pungent scattered grasses
## 19 poisonous white pungent scattered urban
## 20 poisonous brown pungent scattered urban