Assignment

Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns (and if you like rows) in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data—for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.

Load Mushroom data from the Web

mushroomData <- 
  read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", 
           header=FALSE, stringsAsFactors=FALSE)

str(mushroomData)
## 'data.frame':    8124 obs. of  23 variables:
##  $ V1 : chr  "p" "e" "e" "p" ...
##  $ V2 : chr  "x" "x" "b" "x" ...
##  $ V3 : chr  "s" "s" "s" "y" ...
##  $ V4 : chr  "n" "y" "w" "w" ...
##  $ V5 : chr  "t" "t" "t" "t" ...
##  $ V6 : chr  "p" "a" "l" "p" ...
##  $ V7 : chr  "f" "f" "f" "f" ...
##  $ V8 : chr  "c" "c" "c" "c" ...
##  $ V9 : chr  "n" "b" "b" "n" ...
##  $ V10: chr  "k" "k" "n" "n" ...
##  $ V11: chr  "e" "e" "e" "e" ...
##  $ V12: chr  "e" "c" "c" "e" ...
##  $ V13: chr  "s" "s" "s" "s" ...
##  $ V14: chr  "s" "s" "s" "s" ...
##  $ V15: chr  "w" "w" "w" "w" ...
##  $ V16: chr  "w" "w" "w" "w" ...
##  $ V17: chr  "p" "p" "p" "p" ...
##  $ V18: chr  "w" "w" "w" "w" ...
##  $ V19: chr  "o" "o" "o" "o" ...
##  $ V20: chr  "p" "p" "p" "p" ...
##  $ V21: chr  "k" "n" "n" "k" ...
##  $ V22: chr  "s" "n" "n" "s" ...
##  $ V23: chr  "u" "g" "m" "u" ...
kable(head(mushroomData)) 
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23
p x s n t p f c n k e e s s w w p w o p k s u
e x s y t a f c b k e c s s w w p w o p n n g
e b s w t l f c b n e c s s w w p w o p n n m
p x y w t p f c n n e e s s w w p w o p k s u
e x s g f n f w b k t e s s w w p w o e n a g
e x y y t a f c b n e c s s w w p w o p k n g

Add Column Headers

names(mushroomData)[1]<-"EdibleOrPoisonous"
names(mushroomData)[2]<-"CapShape"
names(mushroomData)[3]<-"CapSurface"
names(mushroomData)[4]<-"CapColor"
names(mushroomData)[5]<-"Bruises"
names(mushroomData)[6]<-"Odor"
names(mushroomData)[7]<-"GillAttachment"
names(mushroomData)[8]<-"GillSpacing"
names(mushroomData)[9]<-"FillSize"
names(mushroomData)[10]<-"GillColor"
names(mushroomData)[11]<-"StalkShape"
names(mushroomData)[12]<-"StalkRoot"
names(mushroomData)[13]<-"StalkSurfaceAboveRing"
names(mushroomData)[14]<-"StalkSurfaceBelowRing"
names(mushroomData)[15]<-"StalkColorAboveRing"
names(mushroomData)[16]<-"StalkColorBelowRing"
names(mushroomData)[17]<-"VielType"
names(mushroomData)[18]<-"VeilColor"
names(mushroomData)[19]<-"RingNumber"
names(mushroomData)[20]<-"RingType"
names(mushroomData)[21]<-"SporePrintColor"
names(mushroomData)[22]<-"Population"
names(mushroomData)[23]<-"Habitat"


kable(  head(mushroomData)  )
EdibleOrPoisonous CapShape CapSurface CapColor Bruises Odor GillAttachment GillSpacing FillSize GillColor StalkShape StalkRoot StalkSurfaceAboveRing StalkSurfaceBelowRing StalkColorAboveRing StalkColorBelowRing VielType VeilColor RingNumber RingType SporePrintColor Population Habitat
p x s n t p f c n k e e s s w w p w o p k s u
e x s y t a f c b k e c s s w w p w o p n n g
e b s w t l f c b n e c s s w w p w o p n n m
p x y w t p f c n n e e s s w w p w o p k s u
e x s g f n f w b k t e s s w w p w o e n a g
e x y y t a f c b n e c s s w w p w o p k n g

Subsetting columns that may determine edibility

mushroomData <- mushroomData[,c( "EdibleOrPoisonous", "CapSurface", "CapColor", "GillColor", "Odor" )]

kable( head(mushroomData)  )
EdibleOrPoisonous CapSurface CapColor GillColor Odor
p s n k p
e s y k a
e s w n l
p y w n p
e s g k n
e y y n a

Change abbreviations to meaningful names

mushroomData$EdibleOrPoisonous[mushroomData$EdibleOrPoisonous =="e"] <- "edible"
mushroomData$EdibleOrPoisonous[mushroomData$EdibleOrPoisonous =="p"] <- "poisonous"


mushroomData$CapSurface[mushroomData$CapSurface =="f"] <- "fibrous"
mushroomData$CapSurface[mushroomData$CapSurface =="g"] <- "grooves"
mushroomData$CapSurface[mushroomData$CapSurface =="y"] <- "scaly"
mushroomData$CapSurface[mushroomData$CapSurface =="s"] <- "smooth"

mushroomData$CapColor[mushroomData$CapColor =="n"] <- "brown"
mushroomData$CapColor[mushroomData$CapColor =="b"] <- "buff"
mushroomData$CapColor[mushroomData$CapColor =="c"] <- "cinnamon"
mushroomData$CapColor[mushroomData$CapColor =="g"] <- "gray"
mushroomData$CapColor[mushroomData$CapColor =="r"] <- "green"
mushroomData$CapColor[mushroomData$CapColor =="p"] <- "pink"
mushroomData$CapColor[mushroomData$CapColor =="u"] <- "purple"
mushroomData$CapColor[mushroomData$CapColor =="e"] <- "red"
mushroomData$CapColor[mushroomData$CapColor =="w"] <- "white"
mushroomData$CapColor[mushroomData$CapColor =="y"] <- "yellow"
mushroomData$CapColor[mushroomData$CapColor =="f"] <- "no"

mushroomData$GillColor[mushroomData$GillColor =="k"] <- "black"
mushroomData$GillColor[mushroomData$GillColor =="n"] <- "brown"
mushroomData$GillColor[mushroomData$GillColor =="b"] <- "buff"
mushroomData$GillColor[mushroomData$GillColor =="h"] <- "chocolate"
mushroomData$GillColor[mushroomData$GillColor =="g"] <- "gray"
mushroomData$GillColor[mushroomData$GillColor =="r"] <- "green"
mushroomData$GillColor[mushroomData$GillColor =="o"] <- "orange"
mushroomData$GillColor[mushroomData$GillColor =="p"] <- "pink"
mushroomData$GillColor[mushroomData$GillColor =="u"] <- "purple"
mushroomData$GillColor[mushroomData$GillColor =="e"] <- "red"
mushroomData$GillColor[mushroomData$GillColor =="w"] <- "white"
mushroomData$GillColor[mushroomData$GillColor =="y"] <- "yellow"


mushroomData$Odor[mushroomData$Odor =="a"] <- "almond"
mushroomData$Odor[mushroomData$Odor =="l"] <- "anise"
mushroomData$Odor[mushroomData$Odor =="c"] <- "creosote"
mushroomData$Odor[mushroomData$Odor =="y"] <- "fishy"
mushroomData$Odor[mushroomData$Odor =="f"] <- "foul"
mushroomData$Odor[mushroomData$Odor =="m"] <- "musty"
mushroomData$Odor[mushroomData$Odor =="n"] <- "none"
mushroomData$Odor[mushroomData$Odor =="p"] <- "pungent"
mushroomData$Odor[mushroomData$Odor =="s"] <- "spicy"

datatable(mushroomData)