R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(plyr)

mushroomsdf<- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", stringsAsFactors = TRUE)

dim(mushroomsdf)
## [1] 8123   23
#Subset
fungidf<- mushroomsdf[, c(1, 2, 4, 6, 21:23)]

#Add col names
colnames(fungidf)<- c("EDIBILITY", "CAP_SHAPE", "CAP_COLOR", "ODOR", "SPORE_PRINT", "POPULATION", "HABITAT")

#Get all levels of factors before replacing them  
sapply(fungidf, levels)
## $EDIBILITY
## [1] "e" "p"
## 
## $CAP_SHAPE
## [1] "b" "c" "f" "k" "s" "x"
## 
## $CAP_COLOR
##  [1] "b" "c" "e" "g" "n" "p" "r" "u" "w" "y"
## 
## $ODOR
## [1] "a" "c" "f" "l" "m" "n" "p" "s" "y"
## 
## $SPORE_PRINT
## [1] "b" "h" "k" "n" "o" "r" "u" "w" "y"
## 
## $POPULATION
## [1] "a" "c" "n" "s" "v" "y"
## 
## $HABITAT
## [1] "d" "g" "l" "m" "p" "u" "w"
#Rename levels
fungidf1 <- transform(fungidf,
          EDIBILITY=revalue(EDIBILITY,c("e"="edible", "p"="poisonous")),
          CAP_SHAPE=revalue(CAP_SHAPE,    c("b"="bell","c"="conical","x"="convex","f"="flat", "k"="knobbed","s"="sunken")),
           CAP_COLOR=revalue(CAP_COLOR,c("n"="brown", "b"="buff", "c"="cinnamon", "g"="gray", "r"="green","p"="pink", "u"="purple", "e"="red", "w"="white", "y"="yellow")),
          ODOR=revalue(ODOR, c( "a"="almond", "l"="anise", "c"="creosote", "y"="fishy", "f"="foul", "m"="musty", "n"="none", "p"="pungent", "s"="spicy")),
          SPORE_PRINT=revalue(SPORE_PRINT, c("k"="black","n"="brown","b"="buff", "h"= "chocolate","r"= "green", "o"="orange", "u"= "purple", "w"="white", "y"="yellow")),
          POPULATION=revalue(POPULATION, c("a"="abundant", "c"="clustered", "n"= "numerous", "s"="scattered", "v"="several", "y" ="solitary" )),
          HABITAT=revalue(HABITAT, c("g"="grasses", "l"="leaves", "m"= "meadows", "p"= "paths", "u"= "urban", "w"= "waste", "d"= "woods" )))


head(fungidf1)
##   EDIBILITY CAP_SHAPE CAP_COLOR    ODOR SPORE_PRINT POPULATION HABITAT
## 1    edible    convex    yellow  almond       brown   numerous grasses
## 2    edible      bell     white   anise       brown   numerous meadows
## 3 poisonous    convex     white pungent       black  scattered   urban
## 4    edible    convex      gray    none       brown   abundant grasses
## 5    edible    convex    yellow  almond       black   numerous grasses
## 6    edible      bell     white  almond       black   numerous meadows