This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
First, I load the data into a data frame.
library(RCurl)
## Loading required package: bitops
x <- getURL("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
y <- data.frame(read.csv(text=x, header=F))
dim(y)
## [1] 8124 23
head(y)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Then I rename the column names.
colnames(y) <- c("poisonous-or-edible","cap-shape","cap-surface","cap-color","bruises","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
head(y)
## poisonous-or-edible cap-shape cap-surface cap-color bruises odor
## 1 p x s n t p
## 2 e x s y t a
## 3 e b s w t l
## 4 p x y w t p
## 5 e x s g f n
## 6 e x y y t a
## gill-attachment gill-spacing gill-size gill-color stalk-shape stalk-root
## 1 f c n k e e
## 2 f c b k e c
## 3 f c b n e c
## 4 f c n n e e
## 5 f w b k t e
## 6 f c b n e c
## stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore-print-color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
Then, I create a new data frame, which is a subset of the original data frame.
tinydf <- subset(y, select = c("poisonous-or-edible","cap-shape","cap-color","odor","veil-color"))
head(tinydf)
## poisonous-or-edible cap-shape cap-color odor veil-color
## 1 p x n p w
## 2 e x y a w
## 3 e b w l w
## 4 p x w p w
## 5 e x g n w
## 6 e x y a w
Then, I rename the variables to names that are easier to understand.
levels(tinydf$'poisonous-or-edible') <- c(levels(tinydf$'poisonous-or-edible'), "poisonous", "edible")
tinydf$'poisonous-or-edible'[tinydf$'poisonous-or-edible' == "p"] <- "poisonous"
tinydf$'poisonous-or-edible'[tinydf$'poisonous-or-edible' == "e"] <- "edible"
levels(tinydf$'cap-shape') <- c(levels(tinydf$'cap-shape'), "bell", "conical", "convex", "flat", "knobbed", "sunken")
tinydf$'cap-shape'[tinydf$'cap-shape' == "b"] <- "bell"
tinydf$'cap-shape'[tinydf$'cap-shape' == "c"] <- "conical"
tinydf$'cap-shape'[tinydf$'cap-shape' == "x"] <- "convex"
tinydf$'cap-shape'[tinydf$'cap-shape' == "f"] <- "flat"
tinydf$'cap-shape'[tinydf$'cap-shape' == "k"] <- "knobbed"
tinydf$'cap-shape'[tinydf$'cap-shape' == "s"] <- "sunken"
levels(tinydf$'cap-color') <- c(levels(tinydf$'cap-color'),"brown","buff","cinnamon","gray","green","pink","purple","red","white","yellow")
tinydf$'cap-color'[tinydf$'cap-color' == "n"] <- "brown"
tinydf$'cap-color'[tinydf$'cap-color' == "b"] <- "buff"
tinydf$'cap-color'[tinydf$'cap-color' == "c"] <- "cinnamon"
tinydf$'cap-color'[tinydf$'cap-color' == "g"] <- "gray"
tinydf$'cap-color'[tinydf$'cap-color' == "r"] <- "green"
tinydf$'cap-color'[tinydf$'cap-color' == "p"] <- "pink"
tinydf$'cap-color'[tinydf$'cap-color' == "u"] <- "purple"
tinydf$'cap-color'[tinydf$'cap-color' == "e"] <- "red"
tinydf$'cap-color'[tinydf$'cap-color' == "w"] <- "white"
tinydf$'cap-color'[tinydf$'cap-color' == "y"] <- "yellow"
levels(tinydf$'odor') <- c(levels(tinydf$'odor'),"almond","anise","creosote","fishy","foul","musty","none","pungent","spicy")
tinydf$'odor'[tinydf$'odor' == "a"] <- "almond"
tinydf$'odor'[tinydf$'odor' == "l"] <- "anise"
tinydf$'odor'[tinydf$'odor' == "c"] <- "creosote"
tinydf$'odor'[tinydf$'odor' == "y"] <- "fishy"
tinydf$'odor'[tinydf$'odor' == "f"] <- "foul"
tinydf$'odor'[tinydf$'odor' == "m"] <- "musty"
tinydf$'odor'[tinydf$'odor' == "n"] <- "none"
tinydf$'odor'[tinydf$'odor' == "p"] <- "pungent"
tinydf$'odor'[tinydf$'odor' == "s"] <- "spicy"
levels(tinydf$'veil-color') <- c(levels(tinydf$'veil-color'),"brown","orange","white","yellow")
tinydf$'veil-color'[tinydf$'veil-color' == "n"] <- "brown"
tinydf$'veil-color'[tinydf$'veil-color' == "o"] <- "orange"
tinydf$'veil-color'[tinydf$'veil-color' == "w"] <- "white"
tinydf$'veil-color'[tinydf$'veil-color' == "y"] <- "yellow"
head(tinydf)
## poisonous-or-edible cap-shape cap-color odor veil-color
## 1 poisonous convex brown pungent white
## 2 edible convex yellow almond white
## 3 edible bell white anise white
## 4 poisonous convex white pungent white
## 5 edible convex gray none white
## 6 edible convex yellow almond white