rm(list=ls())
#install.packages(RCurl)
#install.packages(bitops)
library(RCurl)
## Loading required package: bitops
library(bitops)
#From Andy Catlin since my import function wasn't working with this file
#Possibly corrupt
x <- getURL("https://raw.githubusercontent.com/excelsiordata/DATA607/master/Mushroom_Data")
mushrooms <- read.csv(text = x, head=TRUE, sep=",", stringsAsFactors=FALSE)
#Can also link to the data in github
#mushrooms <- import("https://raw.githubusercontent.com/excelsiordata/DATA607/master/Mushroom_Data")
#Take a peek at the data and make sure everything's okay
head(mushrooms)
## Edible cap_shape cap_surface cap_color bruises odor gill_attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill_spacing gill_size gill_color stalk_shape stalk_root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk_color_below_ring veil_type veil_color ring_number ring_type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore_print_color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
#Create the data frame
mushrooms.df <- data.frame(mushrooms, stringsAsFactors=FALSE)
#Take a peek at the data frame
head(mushrooms.df)
## Edible cap_shape cap_surface cap_color bruises odor gill_attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill_spacing gill_size gill_color stalk_shape stalk_root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk_color_below_ring veil_type veil_color ring_number ring_type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore_print_color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
#Create a subset of the data
mushsub <- mushrooms.df[, 1:5]
#Take a peek at the subset
head(mushsub)
## Edible cap_shape cap_surface cap_color bruises
## 1 p x s n t
## 2 e x s y t
## 3 e b s w t
## 4 p x y w t
## 5 e x s g f
## 6 e x y y t
#Make sure the data dictionary isn't missing any values that are in the data
table(mushsub$Edible)
##
## e p
## 4208 3916
table(mushsub$cap_shape)
##
## b c f k s x
## 452 4 3152 828 32 3656
table(mushsub$cap_surface)
##
## f g s y
## 2320 4 2556 3244
table(mushsub$cap_color)
##
## b c e g n p r u w y
## 168 44 1500 1840 2284 144 16 16 1040 1072
table(mushsub$bruises)
##
## f t
## 4748 3376
#Rename the columns
names(mushsub)[names(mushsub)=="cap_shape"] <- "Mushroom_Cap_Shape"
names(mushsub)[names(mushsub)=="cap_surface"] <- "Mushroom_Cap_Surface"
names(mushsub)[names(mushsub)=="cap_color"] <- "Mushroom_Cap_Color"
names(mushsub)[names(mushsub)=="bruises"] <- "Mushroom_Bruises"
#Replace abbreviated values with extended values
mushsub$Edible <- gsub("e", "Edible", mushsub$Edible)
mushsub$Edible <- gsub("p", "Poisonous", mushsub$Edible)
mushsub$Mushroom_Cap_Shape <- gsub("b", "Bell", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("c", "Conical", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("x", "Convex", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("f", "Flat", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("k", "Knobbed", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Shape <- gsub("s", "Sunken", mushsub$Mushroom_Cap_Shape)
mushsub$Mushroom_Cap_Surface <- gsub("f", "Fibrous", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("g", "Grooves", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("y", "Scaly", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Cap_Surface <- gsub("s", "Smooth", mushsub$Mushroom_Cap_Surface)
mushsub$Mushroom_Bruises <- gsub("t", "Yes", mushsub$Mushroom_Bruises)
mushsub$Mushroom_Bruises <- gsub("f", "No", mushsub$Mushroom_Bruises)
#This method isn't appropriate for non-mutually exclusive values
#mushsub$Mushroom_Cap_Color <- gsub("n", "Brown", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("b", "Buff", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("c", "Cinnamon", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("g", "Gray", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("r", "Green", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("p", "Pink", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("u", "Purple", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("e", "Red", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("w", "White", mushsub$Mushroom_Cap_Color)
#mushsub$Mushroom_Cap_Color <- gsub("y", "Yellow", mushsub$Mushroom_Cap_Color)
#Tried to create a data dictionary but couldn't get it to work
#colordict = list(n = 'Brown', b = 'Buff', c = 'Cinnamon', g = 'Gray', r = 'Green', p = 'Pink', u = 'Purple', e = 'Red', w = 'White', y = 'Yellow')
#Mushroom_Cap_Colors_Extended = mushsub$Mushroom_Cap_Color
#for (i in 1:10)
#(Mushroom_Cap_Colors_Extended <- replace (Mushroom_Cap_Colors_Extended, Mushroom_Cap_Colors_Extended == names(colordict[i]), colordict[i]))
#Third time's a charm - THIS ONE WORKS to rename the cap colors
#Had to add a stringsAsFactors = FALSE argument to the data frame
#in order for this to work
map = setNames(c("Brown", "Buff", "Cinnamon", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow"), c("n", "b", "c", "g", "r", "p", "u", "e", "w", "y"))
mushsub$Mushroom_Cap_Color[] <- map[unlist(mushsub$Mushroom_Cap_Color)]
#Check both ends and make sure everything looks good
head(mushsub)
## Edible Mushroom_Cap_Shape Mushroom_Cap_Surface Mushroom_Cap_Color
## 1 Poisonous Convex Smooth Brown
## 2 Edible Convex Smooth Yellow
## 3 Edible Bell Smooth White
## 4 Poisonous Convex Scaly White
## 5 Edible Convex Smooth Gray
## 6 Edible Convex Scaly Yellow
## Mushroom_Bruises
## 1 Yes
## 2 Yes
## 3 Yes
## 4 Yes
## 5 No
## 6 Yes
tail(mushsub)
## Edible Mushroom_Cap_Shape Mushroom_Cap_Surface Mushroom_Cap_Color
## 8119 Poisonous Knobbed Scaly Brown
## 8120 Edible Knobbed Smooth Brown
## 8121 Edible Convex Smooth Brown
## 8122 Edible Flat Smooth Brown
## 8123 Poisonous Knobbed Scaly Brown
## 8124 Edible Convex Smooth Brown
## Mushroom_Bruises
## 8119 No
## 8120 No
## 8121 No
## 8122 No
## 8123 No
## 8124 No