The code in this document demonstrates how to load data from a structured online source and apply basic data transformations.
library(tidyverse)
library(plyr)
import.data <- as_tibble(read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = F))
dat <- import.data %>% select(c("food.type"="V1","cap.shape"="V2",
"cap.surface"="V3","cap.color"="V4",
"habitat"="V23"))
dat$food.type <- revalue(dat$food.type, c("e"="edible","p"="poisonous"))
dat$cap.shape <- revalue(dat$cap.shape, c("b"="bell","c"="conical","x"="convex",
'f'="flat", 'k'="knobbed",'s'="sunken"))
dat$cap.surface <- revalue(dat$cap.surface, c('f'="fibrous",'g'="grooves", 'y'="scaly",
's'="smooth"))
dat$cap.color <- revalue(dat$cap.color, c('n'='brown', 'b'='buff','c'='cinnamon','g'='gray',
'r'='green', 'p'='pink','u'='purple','e'='red',
'w'='white', 'y'='yellow'))
dat$habitat <- revalue(dat$habitat, c('g'='grasses', 'l'='leaves', 'm'='meadows','p'='paths',
'u'='urban', 'w'='waste','d'='woods'))
## # A tibble: 8,124 x 5
## food.type cap.shape cap.surface cap.color habitat
## <fct> <fct> <fct> <fct> <fct>
## 1 poisonous convex smooth brown urban
## 2 edible convex smooth yellow grasses
## 3 edible bell smooth white meadows
## 4 poisonous convex scaly white urban
## 5 edible convex smooth gray grasses
## 6 edible convex scaly yellow grasses
## 7 edible bell smooth white meadows
## 8 edible bell scaly white meadows
## 9 poisonous convex scaly white grasses
## 10 edible bell smooth yellow meadows
## # ... with 8,114 more rows
p1 <- ggplot(data=dat)
p1 + geom_bar(mapping=aes(x=cap.shape, fill=food.type), position="dodge")
p1 + geom_bar(mapping=aes(x=cap.color, fill=food.type), position="dodge")
p1 + geom_bar(mapping=aes(x=cap.surface, fill=food.type), position="dodge")
p1 + geom_bar(mapping=aes(x=habitat, fill=food.type), position="dodge")