Datasets

We’ll be using data from the blue_jays.rda, cdc.txt, and the US_income.rda datasets which are already in the /data subdirectory in our data_vis_labs project.

# Load package(s)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(maps)
library(hexbin)
library(hrbrthemes)
library(viridisLite)
library(viridis)
library(statebins)

# Load datasets
load(file = "data/blue_jays.rda")
load(file = "data/US_income.rda")

# Read in the cdc dataset
cdc <- read_delim(file = "data/cdc.txt", delim = "|") %>%
  mutate(genhlth = factor(genhlth,
    levels = c("excellent", "very good", "good", "fair", "poor")
  ))

Exercise 1

ggplot(blue_jays, aes(x = Mass,y = Head)) +
  geom_point(alpha = 1/3, size = 1.5, ) +
  xlim(c(57,82)) +
  geom_density2d(size = 0.4, binwidth = 0.004, color = "black") +
  labs(x = "Body Mass (g)", y = "Head length (mm)") +
  theme_minimal()

With the geom_density2d function above, I have created a contour line above the scatterpoints from the blue_jays dataset.

Exercise 2

Plot 1

ggplot(cdc, aes(x = height, y = weight)) + 
  geom_hex(bins = 35) +
  labs(x = "Height(in)", y = "Weight(lbs)", fill = "count") +
  theme_minimal()

Using geom_hex, I have created a plot using variables height and weight.

Plot 2

ggplot(cdc, aes(x = height, y = weight)) +
  stat_density2d(aes(fill = stat(level)), geom = "polygon", show.legend = FALSE) + 
  labs(x = "Height(in)", y = "Weight(lbs)", fill = "count") +
  theme_minimal() +
  facet_wrap(~gender)

Using the stat_density2d function, and geom = “polygon”, I have created plots divided by gender.

Exercise 3

nj_counties <- map_data("county", "New Jersey") %>%
  select(lon = long, lat, group, id = subregion)

ggplot(nj_counties, aes(lon, lat)) + 
  geom_polygon(aes(group = group), fill = NA, colour = "grey50") +
  coord_quickmap() +
  theme_void()

Above, I’ve made a county map of New Jersey using nj_counties in order to divide it by county and used geom_ploygon to visualize the data.

Exercise 4

Using US_income dataset, recreate the following graphics as precisely as possible.

# Setting income levels
US_income <- mutate(
  US_income,
  income_bins = cut(
    ifelse(is.na(median_income), 25000, median_income),
    breaks = c(0, 40000, 50000, 60000, 70000, 80000),
    labels = c("< $40k", "$40k to $50k", "$50k to $60k", "$60k to $70k", "> $70k"),
    right = FALSE
  )
)

Plot 1

ggplot(US_income, aes(geometry = geometry, fill = income_bins)) +
  geom_sf(size = 0.2, color = "grey80") +
  scale_fill_viridis(discrete = TRUE) +
  labs(fill = "Median\nIncome") + 
  theme_void() +
  coord_sf(datum = NA)

Above, I have created a map of the United States divided and colored by median income.

Plot 2

devtools::install_github("hrbrmstr/statebins")

## Skipping install of 'statebins' from a github remote, the SHA1 (717a6d1b) has not changed since last install.
##   Use `force = TRUE` to force installation

Above, I have downloaded the developmental version of the statebins package.

library(statebins)
library(hrbrthemes)
library(tidyverse)

packageVersion("statebins")

## [1] '2.0.0'

Above, I have once again loaded the statebins, hrbrthemes, and tidyverse packages and loaded the current version of the statebins package.

ggplot(US_income, aes(state = name, fill = income_bins)) + 
  scale_fill_viridis(discrete = TRUE) +
  labs(fill = "Median\nIncome") +
  geom_statebins() +
  theme_statebins()

Above, I have created a statebins plot using US income with a theme of statebins.

L54 Toolbox

Taehyung Kim

April 16, 2019

Datasets

Exercise 1

Exercise 2

Plot 1

Plot 2

Exercise 3

Exercise 4

Plot 1

Plot 2