The code in this document demonstrates how to load data from a structured online source and apply basic data transformations.

Load packages

library(tidyverse)
library(plyr)

Import mushroom data as a tibble

import.data <- as_tibble(read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = F))

Select the subset of columns that classifies mushroom cap properties and mushroom habitat with renamed variables

dat <- import.data %>% select(c("food.type"="V1","cap.shape"="V2", 
                                "cap.surface"="V3","cap.color"="V4",
                                "habitat"="V23"))

Revalue factor levels

dat$food.type <- revalue(dat$food.type, c("e"="edible","p"="poisonous"))
dat$cap.shape <- revalue(dat$cap.shape, c("b"="bell","c"="conical","x"="convex",
                                            'f'="flat", 'k'="knobbed",'s'="sunken")) 
dat$cap.surface <- revalue(dat$cap.surface,  c('f'="fibrous",'g'="grooves", 'y'="scaly", 
                                              's'="smooth"))
dat$cap.color <- revalue(dat$cap.color, c('n'='brown', 'b'='buff','c'='cinnamon','g'='gray',
                                          'r'='green', 'p'='pink','u'='purple','e'='red',
                                          'w'='white', 'y'='yellow'))
dat$habitat <- revalue(dat$habitat, c('g'='grasses', 'l'='leaves', 'm'='meadows','p'='paths',
                                      'u'='urban', 'w'='waste','d'='woods'))

View new tibble with basic transformations

## # A tibble: 8,124 x 5
##    food.type cap.shape cap.surface cap.color habitat
##    <fct>     <fct>     <fct>       <fct>     <fct>  
##  1 poisonous convex    smooth      brown     urban  
##  2 edible    convex    smooth      yellow    grasses
##  3 edible    bell      smooth      white     meadows
##  4 poisonous convex    scaly       white     urban  
##  5 edible    convex    smooth      gray      grasses
##  6 edible    convex    scaly       yellow    grasses
##  7 edible    bell      smooth      white     meadows
##  8 edible    bell      scaly       white     meadows
##  9 poisonous convex    scaly       white     grasses
## 10 edible    bell      smooth      yellow    meadows
## # ... with 8,114 more rows

Create barplots

p1 <- ggplot(data=dat) 

p1 + geom_bar(mapping=aes(x=cap.shape, fill=food.type), position="dodge")

p1 + geom_bar(mapping=aes(x=cap.color, fill=food.type), position="dodge")

p1 + geom_bar(mapping=aes(x=cap.surface, fill=food.type), position="dodge")

p1 + geom_bar(mapping=aes(x=habitat, fill=food.type), position="dodge")