R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

First, I load the data into a data frame.

library(RCurl)
## Loading required package: bitops
x <- getURL("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
y <- data.frame(read.csv(text=x, header=F))
dim(y)
## [1] 8124   23
head(y)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g

Then I rename the column names.

colnames(y) <- c("poisonous-or-edible","cap-shape","cap-surface","cap-color","bruises","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
head(y)
##   poisonous-or-edible cap-shape cap-surface cap-color bruises odor
## 1                   p         x           s         n       t    p
## 2                   e         x           s         y       t    a
## 3                   e         b           s         w       t    l
## 4                   p         x           y         w       t    p
## 5                   e         x           s         g       f    n
## 6                   e         x           y         y       t    a
##   gill-attachment gill-spacing gill-size gill-color stalk-shape stalk-root
## 1               f            c         n          k           e          e
## 2               f            c         b          k           e          c
## 3               f            c         b          n           e          c
## 4               f            c         n          n           e          e
## 5               f            w         b          k           t          e
## 6               f            c         b          n           e          c
##   stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1                        s                        s                      w
## 2                        s                        s                      w
## 3                        s                        s                      w
## 4                        s                        s                      w
## 5                        s                        s                      w
## 6                        s                        s                      w
##   stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1                      w         p          w           o         p
## 2                      w         p          w           o         p
## 3                      w         p          w           o         p
## 4                      w         p          w           o         p
## 5                      w         p          w           o         e
## 6                      w         p          w           o         p
##   spore-print-color population habitat
## 1                 k          s       u
## 2                 n          n       g
## 3                 n          n       m
## 4                 k          s       u
## 5                 n          a       g
## 6                 k          n       g

Then, I create a new data frame, which is a subset of the original data frame.

tinydf <- subset(y, select = c("poisonous-or-edible","cap-shape","cap-color","odor","veil-color"))
head(tinydf)
##   poisonous-or-edible cap-shape cap-color odor veil-color
## 1                   p         x         n    p          w
## 2                   e         x         y    a          w
## 3                   e         b         w    l          w
## 4                   p         x         w    p          w
## 5                   e         x         g    n          w
## 6                   e         x         y    a          w

Then, I rename the variables to names that are easier to understand.

levels(tinydf$'poisonous-or-edible') <- c(levels(tinydf$'poisonous-or-edible'), "poisonous", "edible")
tinydf$'poisonous-or-edible'[tinydf$'poisonous-or-edible' == "p"] <- "poisonous"
tinydf$'poisonous-or-edible'[tinydf$'poisonous-or-edible' == "e"] <- "edible"


levels(tinydf$'cap-shape') <- c(levels(tinydf$'cap-shape'), "bell", "conical", "convex", "flat", "knobbed", "sunken")
tinydf$'cap-shape'[tinydf$'cap-shape' == "b"] <- "bell"
tinydf$'cap-shape'[tinydf$'cap-shape' == "c"] <- "conical"
tinydf$'cap-shape'[tinydf$'cap-shape' == "x"] <- "convex"
tinydf$'cap-shape'[tinydf$'cap-shape' == "f"] <- "flat"
tinydf$'cap-shape'[tinydf$'cap-shape' == "k"] <- "knobbed"
tinydf$'cap-shape'[tinydf$'cap-shape' == "s"] <- "sunken"

levels(tinydf$'cap-color') <- c(levels(tinydf$'cap-color'),"brown","buff","cinnamon","gray","green","pink","purple","red","white","yellow")
tinydf$'cap-color'[tinydf$'cap-color' == "n"] <- "brown"
tinydf$'cap-color'[tinydf$'cap-color' == "b"] <- "buff"
tinydf$'cap-color'[tinydf$'cap-color' == "c"] <- "cinnamon"
tinydf$'cap-color'[tinydf$'cap-color' == "g"] <- "gray"
tinydf$'cap-color'[tinydf$'cap-color' == "r"] <- "green"
tinydf$'cap-color'[tinydf$'cap-color' == "p"] <- "pink"
tinydf$'cap-color'[tinydf$'cap-color' == "u"] <- "purple"
tinydf$'cap-color'[tinydf$'cap-color' == "e"] <- "red"
tinydf$'cap-color'[tinydf$'cap-color' == "w"] <- "white"
tinydf$'cap-color'[tinydf$'cap-color' == "y"] <- "yellow"

levels(tinydf$'odor') <- c(levels(tinydf$'odor'),"almond","anise","creosote","fishy","foul","musty","none","pungent","spicy")
tinydf$'odor'[tinydf$'odor' == "a"] <- "almond"
tinydf$'odor'[tinydf$'odor' == "l"] <- "anise"
tinydf$'odor'[tinydf$'odor' == "c"] <- "creosote"
tinydf$'odor'[tinydf$'odor' == "y"] <- "fishy"
tinydf$'odor'[tinydf$'odor' == "f"] <- "foul"
tinydf$'odor'[tinydf$'odor' == "m"] <- "musty"
tinydf$'odor'[tinydf$'odor' == "n"] <- "none"
tinydf$'odor'[tinydf$'odor' == "p"] <- "pungent"
tinydf$'odor'[tinydf$'odor' == "s"] <- "spicy"

levels(tinydf$'veil-color') <- c(levels(tinydf$'veil-color'),"brown","orange","white","yellow")
tinydf$'veil-color'[tinydf$'veil-color' == "n"] <- "brown"
tinydf$'veil-color'[tinydf$'veil-color' == "o"] <- "orange"
tinydf$'veil-color'[tinydf$'veil-color' == "w"] <- "white"
tinydf$'veil-color'[tinydf$'veil-color' == "y"] <- "yellow"

head(tinydf)
##   poisonous-or-edible cap-shape cap-color    odor veil-color
## 1           poisonous    convex     brown pungent      white
## 2              edible    convex    yellow  almond      white
## 3              edible      bell     white   anise      white
## 4           poisonous    convex     white pungent      white
## 5              edible    convex      gray    none      white
## 6              edible    convex    yellow  almond      white