Data Set Exploration

url <- 'https://raw.githubusercontent.com/humbertohpgit/MSDS1stSem/master/Mushrooms'
mushrooms_df <- read.csv(url, header=FALSE, stringsAsFactors=FALSE)
nrow(mushrooms_df )
## [1] 8124
head(mushrooms_df)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
table(mushrooms_df$V1)
## 
##    e    p 
## 4208 3916

Transformation Tasks

newcolnames <- c("main_cat", "cap_shape", "cap_surface", "cap_color", "bruises", "odor", "gill_attachment", "gill_spacing", "gill_size", "gill_color", 
                 "stalk_shape", "stalk_root", "stalk_surface_above_ring", "stalk_surface_below_ring", "stalk_color_above_ring", 
                 "stalk_color_below_ring", "veil_type", "veil_color", "ring_number", "ring_type", "spore_print_color", 
                 "population", "habitat")
names(mushrooms_df) <- newcolnames

mushrooms_df2 <- subset(mushrooms_df, select = c(main_cat, odor, spore_print_color, cap_color, habitat, population))
head(mushrooms_df2)
##   main_cat odor spore_print_color cap_color habitat population
## 1        p    p                 k         n       u          s
## 2        e    a                 n         y       g          n
## 3        e    l                 n         w       m          n
## 4        p    p                 k         w       u          s
## 5        e    n                 n         g       g          a
## 6        e    a                 k         y       g          n
mushrooms_df2$main_cat[mushrooms_df2$main_cat == "e"] <- "Edible"
## other way -> mushrooms_df2$main_cat <- replace(mushrooms_df2$main_cat, mushrooms_df2$main_cat=="e", "Edible")
mushrooms_df2$main_cat[mushrooms_df2$main_cat == "p"] <- "Poisonous"
table(mushrooms_df2$main_cat)
## 
##    Edible Poisonous 
##      4208      3916
mushrooms_df2$odor[mushrooms_df2$odor == "a"] <- "Almond"
mushrooms_df2$odor[mushrooms_df2$odor == "l"] <- "Anise"
mushrooms_df2$odor[mushrooms_df2$odor == "c"] <- "Creosote"
mushrooms_df2$odor[mushrooms_df2$odor == "y"] <- "Fishy"
mushrooms_df2$odor[mushrooms_df2$odor == "f"] <- "Foul"
mushrooms_df2$odor[mushrooms_df2$odor == "m"] <- "Musty"
mushrooms_df2$odor[mushrooms_df2$odor == "n"] <- "None"
mushrooms_df2$odor[mushrooms_df2$odor == "p"] <- "Pungent"
mushrooms_df2$odor[mushrooms_df2$odor == "s"] <- "Spicy"

mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "k"] <- "Black"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "n"] <- "Brown"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "b"] <- "Buff"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "h"] <- "Chocolate"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "r"] <- "Green"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "o"] <- "Orange"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "u"] <- "Purple"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "w"] <- "White"
mushrooms_df2$spore_print_color[mushrooms_df2$spore_print_color == "y"] <- "Yellow"

mushrooms_df2$cap_color[mushrooms_df2$cap_color == "n"] <- "Brown"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "b"] <- "Buff"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "c"] <- "Cinnamon"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "g"] <- "Gray"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "r"] <- "Green"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "p"] <- "Pink"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "u"] <- "Purple"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "e"] <- "Red"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "w"] <- "White"
mushrooms_df2$cap_color[mushrooms_df2$cap_color == "y"] <- "Yellow"

mushrooms_df2$habitat[mushrooms_df2$habitat == "g"] <- "Grasses"
mushrooms_df2$habitat[mushrooms_df2$habitat == "l"] <- "Leaves"
mushrooms_df2$habitat[mushrooms_df2$habitat == "m"] <- "Meadows"
mushrooms_df2$habitat[mushrooms_df2$habitat == "p"] <- "Paths"
mushrooms_df2$habitat[mushrooms_df2$habitat == "u"] <- "Urban"
mushrooms_df2$habitat[mushrooms_df2$habitat == "w"] <- "Waste"
mushrooms_df2$habitat[mushrooms_df2$habitat == "d"] <- "Woods"

mushrooms_df2$population[mushrooms_df2$population == "a"] <- "Abundant"
mushrooms_df2$population[mushrooms_df2$population == "c"] <- "Clustered"
mushrooms_df2$population[mushrooms_df2$population == "n"] <- "Numerous"
mushrooms_df2$population[mushrooms_df2$population == "s"] <- "Scattered"
mushrooms_df2$population[mushrooms_df2$population == "v"] <- "Several"
mushrooms_df2$population[mushrooms_df2$population == "y"] <- "Solitary"

Final Data Set

head(mushrooms_df2, 50)
##     main_cat    odor spore_print_color cap_color habitat population
## 1  Poisonous Pungent             Black     Brown   Urban  Scattered
## 2     Edible  Almond             Brown    Yellow Grasses   Numerous
## 3     Edible   Anise             Brown     White Meadows   Numerous
## 4  Poisonous Pungent             Black     White   Urban  Scattered
## 5     Edible    None             Brown      Gray Grasses   Abundant
## 6     Edible  Almond             Black    Yellow Grasses   Numerous
## 7     Edible  Almond             Black     White Meadows   Numerous
## 8     Edible   Anise             Brown     White Meadows  Scattered
## 9  Poisonous Pungent             Black     White Grasses    Several
## 10    Edible  Almond             Black    Yellow Meadows  Scattered
## 11    Edible   Anise             Brown    Yellow Grasses   Numerous
## 12    Edible  Almond             Black    Yellow Meadows  Scattered
## 13    Edible  Almond             Brown    Yellow Grasses  Scattered
## 14 Poisonous Pungent             Brown     White   Urban    Several
## 15    Edible    None             Black     Brown Grasses   Abundant
## 16    Edible    None             Brown      Gray   Urban   Solitary
## 17    Edible    None             Brown     White Grasses   Abundant
## 18 Poisonous Pungent             Black     Brown Grasses  Scattered
## 19 Poisonous Pungent             Brown     White   Urban  Scattered
## 20 Poisonous Pungent             Brown     Brown   Urban  Scattered
## 21    Edible  Almond             Brown    Yellow Meadows  Scattered
## 22 Poisonous Pungent             Brown     Brown Grasses    Several
## 23    Edible   Anise             Brown    Yellow Meadows  Scattered
## 24    Edible  Almond             Brown     White Meadows   Numerous
## 25    Edible   Anise             Black     White Meadows  Scattered
## 26 Poisonous Pungent             Brown     White Grasses    Several
## 27    Edible  Almond             Brown    Yellow Meadows   Numerous
## 28    Edible   Anise             Brown     White Meadows   Numerous
## 29    Edible    None             Black     Brown   Urban   Solitary
## 30    Edible  Almond             Brown    Yellow   Woods    Several
## 31    Edible   Anise             Brown    Yellow Meadows   Numerous
## 32 Poisonous Pungent             Brown     White   Urban  Scattered
## 33    Edible   Anise             Brown    Yellow Meadows   Numerous
## 34    Edible   Anise             Brown     Brown   Paths   Solitary
## 35    Edible   Anise             Brown    Yellow Meadows  Scattered
## 36    Edible   Anise             Brown    Yellow   Woods    Several
## 37    Edible    None             Black      Gray   Urban    Several
## 38 Poisonous Pungent             Brown     Brown   Urban  Scattered
## 39    Edible  Almond             Brown    Yellow   Woods    Several
## 40    Edible   Anise             Black    Yellow Meadows  Scattered
## 41    Edible  Almond             Brown    Yellow Grasses  Scattered
## 42    Edible   Anise             Black    Yellow   Paths   Solitary
## 43    Edible    None             Black     Brown   Urban   Solitary
## 44 Poisonous Pungent             Brown     White Grasses    Several
## 45    Edible  Almond             Black    Yellow Meadows   Numerous
## 46    Edible  Almond             Brown     White Grasses   Numerous
## 47    Edible   Anise             Black    Yellow Meadows  Scattered
## 48    Edible   Anise             Brown     White Meadows   Numerous
## 49    Edible   Anise             Brown    Yellow   Paths  Scattered
## 50    Edible   Anise             Black    Yellow   Paths  Scattered