Data Set Exploration
url <- 'https://raw.githubusercontent.com/humbertohpgit/MSDS1stSem/master/Mushrooms'
mushrooms_df <- read.csv(url, header=FALSE, stringsAsFactors=FALSE)
nrow(mushrooms_df )
## [1] 8124
head(mushrooms_df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
table(mushrooms_df$V1)
##
## e p
## 4208 3916
Transformation Tasks
newcolnames <- c("main_cat", "cap_shape", "cap_surface", "cap_color", "bruises", "odor", "gill_attachment", "gill_spacing", "gill_size", "gill_color",
"stalk_shape", "stalk_root", "stalk_surface_above_ring", "stalk_surface_below_ring", "stalk_color_above_ring",
"stalk_color_below_ring", "veil_type", "veil_color", "ring_number", "ring_type", "spore_print_color",
"population", "habitat")
names(mushrooms_df) <- newcolnames
mushrooms_df2 <- subset(mushrooms_df, select = c(main_cat, odor, spore_print_color, cap_color, habitat, population))
head(mushrooms_df2)
## main_cat odor spore_print_color cap_color habitat population
## 1 p p k n u s
## 2 e a n y g n
## 3 e l n w m n
## 4 p p k w u s
## 5 e n n g g a
## 6 e a k y g n
mushrooms_df2$main_cat[mushrooms_df2$main_cat == "e"] <- "Edible"
## other way -> mushrooms_df2$main_cat <- replace(mushrooms_df2$main_cat, mushrooms_df2$main_cat=="e", "Edible")
mushrooms_df2$main_cat[mushrooms_df2$main_cat == "p"] <- "Poisonous"
table(mushrooms_df2$main_cat)
##
## Edible Poisonous
## 4208 3916
mushrooms_df2$odor[mushrooms_df2$odor == "a"] <- "Almond"
mushrooms_df2$odor[mushrooms_df2$odor == "l"] <- "Anise"
mushrooms_df2$odor[mushrooms_df2$odor == "c"] <- "Creosote"
mushrooms_df2$odor[mushrooms_df2$odor == "y"] <- "Fishy"
mushrooms_df2$odor[mushrooms_df2$odor == "f"] <- "Foul"
mushrooms_df2$odor[mushrooms_df2$odor == "m"] <- "Musty"
mushrooms_df2$odor[mushrooms_df2$odor == "n"] <- "None"
mushrooms_df2$odor[mushrooms_df2$odor == "p"] <- "Pungent"
mushrooms_df2$odor[mushrooms_df2$odor == "s"] <- "Spicy"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "k"] <- "Black"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "n"] <- "Brown"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "b"] <- "Buff"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "h"] <- "Chocolate"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "r"] <- "Green"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "o"] <- "Orange"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "u"] <- "Purple"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "w"] <- "White"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "y"] <- "Yellow"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "n"] <- "Brown"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "b"] <- "Buff"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "c"] <- "Cinnamon"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "g"] <- "Gray"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "r"] <- "Green"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "p"] <- "Pink"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "u"] <- "Purple"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "e"] <- "Red"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "w"] <- "White"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "y"] <- "Yellow"
mushrooms_df2$habitat[mushrooms_df2$habitat == "g"] <- "Grasses"
mushrooms_df2$habitat[mushrooms_df2$habitat == "l"] <- "Leaves"
mushrooms_df2$habitat[mushrooms_df2$habitat == "m"] <- "Meadows"
mushrooms_df2$habitat[mushrooms_df2$habitat == "p"] <- "Paths"
mushrooms_df2$habitat[mushrooms_df2$habitat == "u"] <- "Urban"
mushrooms_df2$habitat[mushrooms_df2$habitat == "w"] <- "Waste"
mushrooms_df2$habitat[mushrooms_df2$habitat == "d"] <- "Woods"
mushrooms_df2$population[mushrooms_df2$population == "a"] <- "Abundant"
mushrooms_df2$population[mushrooms_df2$population == "c"] <- "Clustered"
mushrooms_df2$population[mushrooms_df2$population == "n"] <- "Numerous"
mushrooms_df2$population[mushrooms_df2$population == "s"] <- "Scattered"
mushrooms_df2$population[mushrooms_df2$population == "v"] <- "Several"
mushrooms_df2$population[mushrooms_df2$population == "y"] <- "Solitary"
Final Data Set
head(mushrooms_df2, 50)
## main_cat odor spore_print_color cap_color habitat population
## 1 Poisonous Pungent Black Brown Urban Scattered
## 2 Edible Almond Brown Yellow Grasses Numerous
## 3 Edible Anise Brown White Meadows Numerous
## 4 Poisonous Pungent Black White Urban Scattered
## 5 Edible None Brown Gray Grasses Abundant
## 6 Edible Almond Black Yellow Grasses Numerous
## 7 Edible Almond Black White Meadows Numerous
## 8 Edible Anise Brown White Meadows Scattered
## 9 Poisonous Pungent Black White Grasses Several
## 10 Edible Almond Black Yellow Meadows Scattered
## 11 Edible Anise Brown Yellow Grasses Numerous
## 12 Edible Almond Black Yellow Meadows Scattered
## 13 Edible Almond Brown Yellow Grasses Scattered
## 14 Poisonous Pungent Brown White Urban Several
## 15 Edible None Black Brown Grasses Abundant
## 16 Edible None Brown Gray Urban Solitary
## 17 Edible None Brown White Grasses Abundant
## 18 Poisonous Pungent Black Brown Grasses Scattered
## 19 Poisonous Pungent Brown White Urban Scattered
## 20 Poisonous Pungent Brown Brown Urban Scattered
## 21 Edible Almond Brown Yellow Meadows Scattered
## 22 Poisonous Pungent Brown Brown Grasses Several
## 23 Edible Anise Brown Yellow Meadows Scattered
## 24 Edible Almond Brown White Meadows Numerous
## 25 Edible Anise Black White Meadows Scattered
## 26 Poisonous Pungent Brown White Grasses Several
## 27 Edible Almond Brown Yellow Meadows Numerous
## 28 Edible Anise Brown White Meadows Numerous
## 29 Edible None Black Brown Urban Solitary
## 30 Edible Almond Brown Yellow Woods Several
## 31 Edible Anise Brown Yellow Meadows Numerous
## 32 Poisonous Pungent Brown White Urban Scattered
## 33 Edible Anise Brown Yellow Meadows Numerous
## 34 Edible Anise Brown Brown Paths Solitary
## 35 Edible Anise Brown Yellow Meadows Scattered
## 36 Edible Anise Brown Yellow Woods Several
## 37 Edible None Black Gray Urban Several
## 38 Poisonous Pungent Brown Brown Urban Scattered
## 39 Edible Almond Brown Yellow Woods Several
## 40 Edible Anise Black Yellow Meadows Scattered
## 41 Edible Almond Brown Yellow Grasses Scattered
## 42 Edible Anise Black Yellow Paths Solitary
## 43 Edible None Black Brown Urban Solitary
## 44 Poisonous Pungent Brown White Grasses Several
## 45 Edible Almond Black Yellow Meadows Numerous
## 46 Edible Almond Brown White Grasses Numerous
## 47 Edible Anise Black Yellow Meadows Scattered
## 48 Edible Anise Brown White Meadows Numerous
## 49 Edible Anise Brown Yellow Paths Scattered
## 50 Edible Anise Black Yellow Paths Scattered