location <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushroomData <- getURL(location)
mushroomDF <- read.csv(text=mushroomData,header=F,sep=",")
head(mushroomDF)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
**The columns corresponding to cap-color, odor, gill-color, spore-print-color will be selected. Since the first column is the indicator of poisonous/edible this will translate to columns 1,4,6,10,21
mushroomDF <- mushroomDF[,c(1,4,6,10,21)]
str(mushroomDF)
## 'data.frame': 8124 obs. of 5 variables:
## $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
colnames(mushroomDF) <- c("edibility", "cap-color", "odor", "gill-color","spore-print-color")
head(mushroomDF)
## edibility cap-color odor gill-color spore-print-color
## 1 p n p k k
## 2 e y a k n
## 3 e w l n n
## 4 p w p n k
## 5 e g n k n
## 6 e y a n k
According to the data description found at the following link: (https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names) the abbreviations are as follows:
mushroomDF$edibility <- revalue(mushroomDF$edibility, c("e" = "edible", "p" = "poisonous"))
mushroomDF$`cap-color` <- revalue(mushroomDF$`cap-color`,c("n" = "brown", "b" = "buff", "c" = "cinnamon",
"g" = "gray", "r" = "green", "p" = "pink",
"u" = "purple", "e" = "red", "w" = "white",
"y" = "yellow"))
mushroomDF$odor<- revalue(mushroomDF$odor, c("a" = "almond", "l" = "anise", "c" = "creosote",
"y" = "fishy", "f" = "foul", "m" = "musty",
"n" = "none", "p" = "pungent", "s" = "spicy"))
mushroomDF$`gill-color`<- revalue(mushroomDF$`gill-color`, c("k" = "black", "n" = "brown", "b" = "buff",
"h" = "chocolate", "g" = "gray", "r" = "green", "o" = "orange", "p" = "pink", "u" = "purple",
"e" = "red", "w" = "white",
"y" = "yellow"))
mushroomDF$`spore-print-color` <- revalue(mushroomDF$`spore-print-color`,
c("k" = "black", "n" = "brown", "b" = "buff",
"h" = "chocolate", "r" = "green", "o" = "orange", "u" = "purple", "w" = "white", "y" = "yellow"))
str(mushroomDF)
## 'data.frame': 8124 obs. of 5 variables:
## $ edibility : Factor w/ 2 levels "edible","poisonous": 2 1 1 2 1 1 1 1 2 1 ...
## $ cap-color : Factor w/ 10 levels "buff","cinnamon",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ odor : Factor w/ 9 levels "almond","creosote",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ gill-color : Factor w/ 12 levels "buff","red","gray",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ spore-print-color: Factor w/ 9 levels "buff","chocolate",..: 3 4 4 3 4 3 3 4 3 3 ...
head(mushroomDF)
## edibility cap-color odor gill-color spore-print-color
## 1 poisonous brown pungent black black
## 2 edible yellow almond black brown
## 3 edible white anise brown brown
## 4 poisonous white pungent brown black
## 5 edible gray none black brown
## 6 edible yellow almond brown black