This file is also available at: https://github.com/murphystout/data-607/blob/master/week-1-assignment-data-607.Rmd

Mushroom Dataset

Step 1 - Read CSV into a Data Frame:

mushroom <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")

Step 2 - Assign Column Names.

These can be found at the data dictionary at: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.names

mushroom_names <- c("edible","cap_shape","cap_surface","cap_color","bruises?","odor","gill_attachment","gill_spacing","gill_size","gill_color","stalk_shape","stalk_root","stalk_surface_above_ring","stalk_surface_below_ring","stalk_color_above_ring","stalk_color_below_ring","veil_type","veil_color","ring_number","ring_type","spore_print_color","population","habitat")

colnames(mushroom) <- mushroom_names

Step 3 - Pull A Subset of Data for more manageable analysis

mushroom_subset <- subset(mushroom, select = c(edible, cap_shape, cap_surface, cap_color, ring_type))

head(mushroom_subset)
##   edible cap_shape cap_surface cap_color ring_type
## 1      e         x           s         y         p
## 2      e         b           s         w         p
## 3      p         x           y         w         p
## 4      e         x           s         g         e
## 5      e         x           y         y         p
## 6      e         b           s         w         p

Step 4 - Replace Abbreviations

levels(mushroom_subset$edible)
## [1] "e" "p"
levels(mushroom_subset$edible) <- c("Edible", "Poisonous")

levels(mushroom_subset$cap_shape)
## [1] "b" "c" "f" "k" "s" "x"
levels(mushroom_subset$cap_shape) <- c("Bell", "Conical", "Flat", "Knobbed", "Sunken", "Convex")

levels(mushroom_subset$cap_surface)
## [1] "f" "g" "s" "y"
levels(mushroom_subset$cap_surface) <- c("Fibrous", "Grooves", "Smooth", "Scaly")

levels(mushroom_subset$cap_color) 
##  [1] "b" "c" "e" "g" "n" "p" "r" "u" "w" "y"
levels(mushroom_subset$cap_color) <- c("Buff", "Cinnamon", "Red","Gray","Brown", "Pink","Green","Purple","White","Yellow")

levels(mushroom_subset$ring_type)
## [1] "e" "f" "l" "n" "p"
levels(mushroom_subset$ring_type) <- c("Evanescent","Flaring","Large","None","Pendant")

head(mushroom_subset)
##      edible cap_shape cap_surface cap_color  ring_type
## 1    Edible    Convex      Smooth    Yellow    Pendant
## 2    Edible      Bell      Smooth     White    Pendant
## 3 Poisonous    Convex       Scaly     White    Pendant
## 4    Edible    Convex      Smooth      Gray Evanescent
## 5    Edible    Convex       Scaly    Yellow    Pendant
## 6    Edible      Bell      Smooth     White    Pendant