mushrooms = read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
head(mushrooms, 10)
## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
## 7 e b y w t l f c b n e c s s w w p w o p n s m
## 8 p x y w t p f c n p e e s s w w p w o p k v g
## 9 e b s y t a f c b g e c s s w w p w o p k s m
## 10 e x y y t l f c b g e c s s w w p w o p n n g
tail(mushrooms, 5)
## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 8119 e k s n f n a c b y e ? s s o o p o o p b c l
## 8120 e x s n f n a c b y e ? s s o o p n o p b v l
## 8121 e f s n f n a c b n e ? s s o o p o o p b c l
## 8122 p k y n f y f c n b t ? s k w w p w o e w v l
## 8123 e x s n f n a c b y e ? s s o o p o o p o c l
library(plyr)
mushroom.colnames <-rename(mushrooms, c("p" = "classes", "x" = "cap shape", "s" = "cap surface", "n" = "cap color", "t" = "bruises", "p.1"="odor", "f" = "gill attachement", "c" = "gill spacing", "n.1" = "gill size", "k" = "gill color", "e" = "stalk shape", "e.1" = "stalk root", "s.1" = "stalk surface above ring", "s.2" = "stalk surface below ring", "w" = "stalk color above veil", "w.1" = "stalk color below veil", "p.2" = "veil type", "w.2" = "veil color", "o" = "ring number", "p.3" = "ring type", "k.1" = "spore print color", "s.3" = "population", "u" = "habitat" ))
head(mushroom.colnames, 10)
## classes cap shape cap surface cap color bruises odor gill attachement
## 1 e x s y t a f
## 2 e b s w t l f
## 3 p x y w t p f
## 4 e x s g f n f
## 5 e x y y t a f
## 6 e b s w t a f
## 7 e b y w t l f
## 8 p x y w t p f
## 9 e b s y t a f
## 10 e x y y t l f
## gill spacing gill size gill color stalk shape stalk root
## 1 c b k e c
## 2 c b n e c
## 3 c n n e e
## 4 w b k t e
## 5 c b n e c
## 6 c b g e c
## 7 c b n e c
## 8 c n p e e
## 9 c b g e c
## 10 c b g e c
## stalk surface above ring stalk surface below ring
## 1 s s
## 2 s s
## 3 s s
## 4 s s
## 5 s s
## 6 s s
## 7 s s
## 8 s s
## 9 s s
## 10 s s
## stalk color above veil stalk color below veil veil type veil color
## 1 w w p w
## 2 w w p w
## 3 w w p w
## 4 w w p w
## 5 w w p w
## 6 w w p w
## 7 w w p w
## 8 w w p w
## 9 w w p w
## 10 w w p w
## ring number ring type spore print color population habitat
## 1 o p n n g
## 2 o p n n m
## 3 o p k s u
## 4 o e n a g
## 5 o p k n g
## 6 o p k n m
## 7 o p n s m
## 8 o p k v g
## 9 o p k s m
## 10 o p n n g
mushroom.subset <- subset(mushroom.colnames, select = c("classes", "habitat", "population", "cap color"))
head(mushroom.subset, 10)
## classes habitat population cap color
## 1 e g n y
## 2 e m n w
## 3 p u s w
## 4 e g a g
## 5 e g n y
## 6 e m n w
## 7 e m s w
## 8 p g v w
## 9 e m s y
## 10 e g n y
mushroom.subset <- transform(mushroom.subset,
classes=revalue(classes,c("e"="edible", "p" = "poisonous")))
head(mushroom.subset, 10)
## classes habitat population cap.color
## 1 edible g n y
## 2 edible m n w
## 3 poisonous u s w
## 4 edible g a g
## 5 edible g n y
## 6 edible m n w
## 7 edible m s w
## 8 poisonous g v w
## 9 edible m s y
## 10 edible g n y
mushroom.subset<- transform(mushroom.subset, habitat=revalue(habitat, c("g" = "grasses", "l" = "leaves", "m" = "meadows", "p" = "paths", "u" = "urban", "w" = "waste", "d"= "woods")))
head(mushroom.subset, 10)
## classes habitat population cap.color
## 1 edible grasses n y
## 2 edible meadows n w
## 3 poisonous urban s w
## 4 edible grasses a g
## 5 edible grasses n y
## 6 edible meadows n w
## 7 edible meadows s w
## 8 poisonous grasses v w
## 9 edible meadows s y
## 10 edible grasses n y
mushroom.subset<- transform(mushroom.subset, population=revalue(population, c("a" = "abundant", "c" = "clustered", "n" = "numerous", "s" = "scattered", "v" = "several", "y" = "solitary")))
head(mushroom.subset, 10)
## classes habitat population cap.color
## 1 edible grasses numerous y
## 2 edible meadows numerous w
## 3 poisonous urban scattered w
## 4 edible grasses abundant g
## 5 edible grasses numerous y
## 6 edible meadows numerous w
## 7 edible meadows scattered w
## 8 poisonous grasses several w
## 9 edible meadows scattered y
## 10 edible grasses numerous y
mushroom.subset<- transform(mushroom.subset, cap.color=revalue(cap.color, c("n" = "brown", "b" = "buff", "c" = "cinnamon", "g" = "gray", "r" = "green", "p" = "pink", "u" = "purple", "e" = "red", "w" = "white", "y" = "yellow")))
head(mushroom.subset, 10)
## classes habitat population cap.color
## 1 edible grasses numerous yellow
## 2 edible meadows numerous white
## 3 poisonous urban scattered white
## 4 edible grasses abundant gray
## 5 edible grasses numerous yellow
## 6 edible meadows numerous white
## 7 edible meadows scattered white
## 8 poisonous grasses several white
## 9 edible meadows scattered yellow
## 10 edible grasses numerous yellow
tail(mushroom.subset, 5)
## classes habitat population cap.color
## 8119 edible leaves clustered brown
## 8120 edible leaves several brown
## 8121 edible leaves clustered brown
## 8122 poisonous leaves several brown
## 8123 edible leaves clustered brown
str(mushroom.subset)
## 'data.frame': 8123 obs. of 4 variables:
## $ classes : Factor w/ 2 levels "edible","poisonous": 1 1 2 1 1 1 1 2 1 1 ...
## $ habitat : Factor w/ 7 levels "woods","grasses",..: 2 4 6 2 2 4 4 2 4 2 ...
## $ population: Factor w/ 6 levels "abundant","clustered",..: 3 3 4 1 3 3 4 5 4 3 ...
## $ cap.color : Factor w/ 10 levels "buff","cinnamon",..: 10 9 9 4 10 9 9 9 10 10 ...