rm(list=ls())
#install.packages(RCurl)
#install.packages(bitops)
library(RCurl)
## Loading required package: bitops
library(bitops)

#From Andy Catlin since my import function wasn't working with this file
#Possibly corrupt
x <- getURL("https://raw.githubusercontent.com/excelsiordata/DATA607/master/Mushroom_Data")
mushrooms <- read.csv(text = x, head=TRUE, sep=",", stringsAsFactors=FALSE)

#Can also link to the data in github
#mushrooms <- import("https://raw.githubusercontent.com/excelsiordata/DATA607/master/Mushroom_Data")
#Take a peek at the data and make sure everything's okay
head(mushrooms)
##   Edible cap_shape cap_surface cap_color bruises odor gill_attachment
## 1      p         x           s         n       t    p               f
## 2      e         x           s         y       t    a               f
## 3      e         b           s         w       t    l               f
## 4      p         x           y         w       t    p               f
## 5      e         x           s         g       f    n               f
## 6      e         x           y         y       t    a               f
##   gill_spacing gill_size gill_color stalk_shape stalk_root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk_color_below_ring veil_type veil_color ring_number ring_type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore_print_color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g
#Create the data frame
mushrooms.df <- data.frame(mushrooms, stringsAsFactors=FALSE)

#Take a peek at the data frame
head(mushrooms.df)
##   Edible cap_shape cap_surface cap_color bruises odor gill_attachment
## 1      p         x           s         n       t    p               f
## 2      e         x           s         y       t    a               f
## 3      e         b           s         w       t    l               f
## 4      p         x           y         w       t    p               f
## 5      e         x           s         g       f    n               f
## 6      e         x           y         y       t    a               f
##   gill_spacing gill_size gill_color stalk_shape stalk_root
## 1            c         n          k           e          e
## 2            c         b          k           e          c
## 3            c         b          n           e          c
## 4            c         n          n           e          e
## 5            w         b          k           t          e
## 6            c         b          n           e          c
##   stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk_color_below_ring veil_type veil_color ring_number ring_type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore_print_color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g
#Create a subset of the data
mushsub <- mushrooms.df[, 1:5]

#Take a peek at the subset
head(mushsub)
##   Edible cap_shape cap_surface cap_color bruises
## 1      p         x           s         n       t
## 2      e         x           s         y       t
## 3      e         b           s         w       t
## 4      p         x           y         w       t
## 5      e         x           s         g       f
## 6      e         x           y         y       t
#Make sure the data dictionary isn't missing any values that are in the data
table(mushsub$Edible)
## 
##    e    p 
## 4208 3916
table(mushsub$cap_shape)
## 
##    b    c    f    k    s    x 
##  452    4 3152  828   32 3656
table(mushsub$cap_surface)
## 
##    f    g    s    y 
## 2320    4 2556 3244
table(mushsub$cap_color)
## 
##    b    c    e    g    n    p    r    u    w    y 
##  168   44 1500 1840 2284  144   16   16 1040 1072
table(mushsub$bruises)
## 
##    f    t 
## 4748 3376
#Rename the columns
names(mushsub)[names(mushsub)=="cap_shape"] <- "Mushroom_Cap_Shape"
names(mushsub)[names(mushsub)=="cap_surface"] <- "Mushroom_Cap_Surface"
names(mushsub)[names(mushsub)=="cap_color"] <- "Mushroom_Cap_Color"
names(mushsub)[names(mushsub)=="bruises"] <- "Mushroom_Bruises"

#Replace abbreviated values with extended values
mushsub$Edible <- gsub("e", "Edible", mushsub$Edible)
mushsub$Edible <- gsub("p", "Poisonous", mushsub$Edible)

mushsub$Mushroom_Cap_Shape <- gsub("b", "Bell", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("c", "Conical", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("x", "Convex", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("f", "Flat", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("k", "Knobbed", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("s", "Sunken", mushsub$Mushroom_Cap_Shape)

mushsub$Mushroom_Cap_Surface <- gsub("f", "Fibrous", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("g", "Grooves", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("y", "Scaly", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("s", "Smooth", mushsub$Mushroom_Cap_Surface)

mushsub$Mushroom_Bruises <- gsub("t", "Yes", mushsub$Mushroom_Bruises)
mushsub$Mushroom_Bruises <- gsub("f", "No", mushsub$Mushroom_Bruises)

#This method isn't appropriate for non-mutually exclusive values
#mushsub$Mushroom_Cap_Color <- gsub("n", "Brown", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("b", "Buff", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("c", "Cinnamon", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("g", "Gray", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("r", "Green", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("p", "Pink", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("u", "Purple", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("e", "Red", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("w", "White", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("y", "Yellow", mushsub$Mushroom_Cap_Color)

#Tried to create a data dictionary but couldn't get it to work
#colordict = list(n = 'Brown', b = 'Buff', c = 'Cinnamon', g = 'Gray', r = 'Green', p = 'Pink', u = 'Purple', e = 'Red', w = 'White', y = 'Yellow')
#Mushroom_Cap_Colors_Extended = mushsub$Mushroom_Cap_Color
#for (i in 1:10)
#(Mushroom_Cap_Colors_Extended <- replace (Mushroom_Cap_Colors_Extended, Mushroom_Cap_Colors_Extended == names(colordict[i]), colordict[i]))

#Third time's a charm - THIS ONE WORKS to rename the cap colors
#Had to add a stringsAsFactors = FALSE argument to the data frame
#in order for this to work
map = setNames(c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"), c("n", "b", "c", "g", "r", "p", "u", "e", "w", "y"))
mushsub$Mushroom_Cap_Color[] <- map[unlist(mushsub$Mushroom_Cap_Color)]

#Check both ends and make sure everything looks good
head(mushsub)
##      Edible Mushroom_Cap_Shape Mushroom_Cap_Surface Mushroom_Cap_Color
## 1 Poisonous             Convex               Smooth              Brown
## 2    Edible             Convex               Smooth             Yellow
## 3    Edible               Bell               Smooth              White
## 4 Poisonous             Convex                Scaly              White
## 5    Edible             Convex               Smooth               Gray
## 6    Edible             Convex                Scaly             Yellow
##   Mushroom_Bruises
## 1              Yes
## 2              Yes
## 3              Yes
## 4              Yes
## 5               No
## 6              Yes
tail(mushsub)
##         Edible Mushroom_Cap_Shape Mushroom_Cap_Surface Mushroom_Cap_Color
## 8119 Poisonous            Knobbed                Scaly              Brown
## 8120    Edible            Knobbed               Smooth              Brown
## 8121    Edible             Convex               Smooth              Brown
## 8122    Edible               Flat               Smooth              Brown
## 8123 Poisonous            Knobbed                Scaly              Brown
## 8124    Edible             Convex               Smooth              Brown
##      Mushroom_Bruises
## 8119               No
## 8120               No
## 8121               No
## 8122               No
## 8123               No
## 8124               No