library(RCurl)
library(dplyr)
mush_url <- getURL('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data')
mush_data <- read.csv(text = mush_url, header = FALSE,sep = ",",stringsAsFactors = FALSE )
head(mush_data)## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
colnames(mush_data) <- c('edibility', 'cap_shape', 'cap_surface', 'cap_color', 'bruises', 'odor', 'gill attachment', 'gill spacing', 'gill size', 'gill color', 'stalk shape', 'stalk root', 'stalk surface above ring', 'stalk surface below ring', 'stalk color above ring', 'stalk color above ring', 'veil type', 'veil color', 'ring number', 'ring type', 'spore print color', 'population', 'habitat')
head(mush_data)## edibility cap_shape cap_surface cap_color bruises odor gill attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill spacing gill size gill color stalk shape stalk root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk surface above ring stalk surface below ring stalk color above ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk color above ring veil type veil color ring number ring type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore print color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
mush_data_cols <- subset(mush_data, select = edibility:odor)
head(mush_data_cols)## edibility cap_shape cap_surface cap_color bruises odor
## 1 p x s n t p
## 2 e x s y t a
## 3 e b s w t l
## 4 p x y w t p
## 5 e x s g f n
## 6 e x y y t a
mush_data_cols$edibility = ifelse( mush_data_cols$edibility == "p" , "poisonous", "edible")
mush_data_cols$cap_shape = case_when(
mush_data_cols$cap_shape == "b" ~ "bell",
mush_data_cols$cap_shape == "c" ~ "conical",
mush_data_cols$cap_shape == "x" ~ "convex",
mush_data_cols$cap_shape == "f" ~ "flat",
mush_data_cols$cap_shape == "k" ~ "knobbed",
mush_data_cols$cap_shape == "s" ~ "sunken",
TRUE ~ mush_data_cols$cap_shape
)
mush_data_cols$cap_surface = case_when(
mush_data_cols$cap_surface == "f" ~ "fibrous",
mush_data_cols$cap_surface == "g" ~ "grooves",
mush_data_cols$cap_surface == "y" ~ "scaly",
mush_data_cols$cap_surface == "s" ~ "smooth",
TRUE ~ mush_data_cols$cap_surface
)
mush_data_cols$cap_color = case_when(
mush_data_cols$cap_color == "n" ~ "brown",
mush_data_cols$cap_color == "b" ~ "buff",
mush_data_cols$cap_color == "c" ~ "cinnamon",
mush_data_cols$cap_color == "g" ~ "gray",
mush_data_cols$cap_color == "r" ~ "green",
mush_data_cols$cap_color == "p" ~ "pink",
mush_data_cols$cap_color == "u" ~ "purple",
mush_data_cols$cap_color == "e" ~ "red",
mush_data_cols$cap_color == "w" ~ "white",
mush_data_cols$cap_color == "y" ~ "yellow",
TRUE ~ mush_data_cols$cap_color
)
mush_data_cols$bruises = ifelse( mush_data_cols$bruises == "t" , "bruises", "no")
mush_data_cols$odor = case_when(
mush_data_cols$odor == "a" ~ "almond",
mush_data_cols$odor == "l" ~ "anise",
mush_data_cols$odor == "c" ~ "creosote",
mush_data_cols$odor == "y" ~ "fishy",
mush_data_cols$odor == "f" ~ "foul",
mush_data_cols$odor == "m" ~ "musty",
mush_data_cols$odor == "n" ~ "none",
mush_data_cols$odor == "p" ~ "pungent",
mush_data_cols$odor == "s" ~ "spicy",
TRUE ~ mush_data_cols$odor
)head(mush_data_cols)## edibility cap_shape cap_surface cap_color bruises odor
## 1 poisonous convex smooth brown bruises pungent
## 2 edible convex smooth yellow bruises almond
## 3 edible bell smooth white bruises anise
## 4 poisonous convex scaly white bruises pungent
## 5 edible convex smooth gray no none
## 6 edible convex scaly yellow bruises almond