The task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data.
theurl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
thedata <- read.table(file = theurl, header = FALSE, sep = ",")
head(thedata)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
names(thedata)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11"
## [12] "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22"
## [23] "V23"
names(thedata) <-c("classes","cshape","csurface","ccolor","bruises?","odor", "gattachment", "gspacing", "gsize","gcolor", "sshape", "sroot", "ssurfacearing", "ssurfacebring", "scoloraring", "scolorbring", "vtype", "vcolor", "rnumber", "rtype", "sprintcolor", "population", "habitat")
names(thedata)
## [1] "classes" "cshape" "csurface" "ccolor"
## [5] "bruises?" "odor" "gattachment" "gspacing"
## [9] "gsize" "gcolor" "sshape" "sroot"
## [13] "ssurfacearing" "ssurfacebring" "scoloraring" "scolorbring"
## [17] "vtype" "vcolor" "rnumber" "rtype"
## [21] "sprintcolor" "population" "habitat"
library(plyr)
thedata$classes <-mapvalues(thedata$classes, c("e","p"), c("edible","poisonous"))
thedata$cshape <-mapvalues(thedata$cshape, c("b","c","x","f","k","s"), c("bell","conical","convex","flat","knobbed","sunken"))
thedata$csurface <-mapvalues(thedata$csurface, c("f","g","y","s"), c("fibrous","grooves","scaly","smooth"))
thedata$ccolor <-mapvalues(thedata$ccolor, c("n","b","c","g","r","p","u","e","w","y"), c("brown","buff","cinnamon","gray","green","pink","purple","red","white","yellow"))
thedata$bruises <-mapvalues(thedata$bruises, c("t","f"), c("bruises","no"))
thedata$odor <-mapvalues(thedata$odor, c("a","l","c","y","f","m","n","p","s"), c("almond","anise","creosote","fishy","foul","musty","none","pungent","spicy"))
thedata$gattachment <-mapvalues(thedata$gattachment, c("a","d","f","n"), c("attached","descending","free","notched"))
## The following `from` values were not present in `x`: d, n
thedata$gspacing <-mapvalues(thedata$gspacing, c("c","w","d"), c("close","crowded","distant"))
## The following `from` values were not present in `x`: d
thedata$gsize <-mapvalues(thedata$gsize, c("b","n"), c("broad","narrow"))
thedata$gcolor <-mapvalues(thedata$gcolor, c("k","n","b","h","g","r","o","p","u","e","w","y"), c("black","brown","buff","chocolate","gray","green","orange","pink","purple","red","white","yellow"))
thedata$sshape <-mapvalues(thedata$sshape, c("e","t"), c("enlarging","tapering"))
thedata$sroot <-mapvalues(thedata$sroot, c("b","c","u","e","z","r","?"), c("bulubous","clud","cup","equal","rhizomorphs","rooted","missing"))
## The following `from` values were not present in `x`: u, z
thedata$ssurfacearing <-mapvalues(thedata$ssurfacearing, c("f","y","k","s"), c("fibrous","scaly","silky","smooth"))
thedata$ssurfacebring <-mapvalues(thedata$ssurfacebring, c("f","y","k","s"), c("fibrous","scaly","silky","smooth"))
thedata$scoloraring <-mapvalues(thedata$scoloraring, c("n","b","c","g","o","p","e","w","y"), c("brown","buff","cinnamon","gray","orange","pink","red","white","yellow"))
thedata$scolorbring <-mapvalues(thedata$scolorbring, c("n","b","c","g","o","p","e","w","y"), c("brown","buff","cinnamon","gray","orange","pink","red","white","yellow"))
thedata$vtype <-mapvalues(thedata$vtype, c("p","u"), c("partial","universal"))
## The following `from` values were not present in `x`: u
thedata$vcolor <-mapvalues(thedata$vcolor, c("b","n","w","y"), c("brown","orange","white","yellow"))
## The following `from` values were not present in `x`: b
thedata$rnumber <-mapvalues(thedata$rnumber, c("n","o","t"), c("none","one","two"))
thedata$rtype <-mapvalues(thedata$rtype, c("c","e","f","l","n","p","s","z"), c("coweboy","evanescent","flaring","large","none","pendant","sheathing","zone"))
## The following `from` values were not present in `x`: c, s, z
thedata$sprintcolor <-mapvalues(thedata$sprintcolor, c("k","n","b","h","r","o","u","w","y"), c("black","brown","buff","chocolate","green","orange","purple","white","yellow"))
thedata$population <-mapvalues(thedata$population, c("a","c","n","s","v","y"), c("abundant","clustered","numerous","scattered","several","solitary"))
thedata$habitat <-mapvalues(thedata$habitat, c("g","l","m","p","u","w","d"), c("grasses","leaves","meadows","paths","urban","waste","woods"))
head(thedata)
## classes cshape csurface ccolor bruises? odor gattachment gspacing
## 1 poisonous convex smooth brown t pungent free close
## 2 edible convex smooth yellow t almond free close
## 3 edible bell smooth white t anise free close
## 4 poisonous convex scaly white t pungent free close
## 5 edible convex smooth gray f none free crowded
## 6 edible convex scaly yellow t almond free close
## gsize gcolor sshape sroot ssurfacearing ssurfacebring scoloraring
## 1 narrow black enlarging equal smooth smooth white
## 2 broad black enlarging clud smooth smooth white
## 3 broad brown enlarging clud smooth smooth white
## 4 narrow brown enlarging equal smooth smooth white
## 5 broad black tapering equal smooth smooth white
## 6 broad brown enlarging clud smooth smooth white
## scolorbring vtype vcolor rnumber rtype sprintcolor population
## 1 white partial white one pendant black scattered
## 2 white partial white one pendant brown numerous
## 3 white partial white one pendant brown numerous
## 4 white partial white one pendant black scattered
## 5 white partial white one evanescent brown abundant
## 6 white partial white one pendant black numerous
## habitat bruises
## 1 urban bruises
## 2 grasses bruises
## 3 meadows bruises
## 4 urban bruises
## 5 grasses no
## 6 grasses bruises
thedataedible <-head(subset(thedata, classes == "edible", select = c('classes','sshape','rnumber','vcolor','population','odor','gspacing','habitat')))
head(thedataedible)
## classes sshape rnumber vcolor population odor gspacing habitat
## 2 edible enlarging one white numerous almond close grasses
## 3 edible enlarging one white numerous anise close meadows
## 5 edible tapering one white abundant none crowded grasses
## 6 edible enlarging one white numerous almond close grasses
## 7 edible enlarging one white numerous almond close meadows
## 8 edible enlarging one white scattered anise close meadows
thedatapoisonous <-head(subset(thedata, classes == "poisonous", select = c('classes','sshape','rnumber','vcolor','population','odor','gspacing','habitat')))
head(thedatapoisonous)
## classes sshape rnumber vcolor population odor gspacing habitat
## 1 poisonous enlarging one white scattered pungent close urban
## 4 poisonous enlarging one white scattered pungent close urban
## 9 poisonous enlarging one white several pungent close grasses
## 14 poisonous enlarging one white several pungent close urban
## 18 poisonous enlarging one white scattered pungent close grasses
## 19 poisonous enlarging one white scattered pungent close urban