We’ll be using data from the blue_jays.rda, cdc.txt, and the US_income.rda datasets which are already in the /data subdirectory in our data_vis_labs project.
# Load package(s)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(maps)
library(hexbin)
library(hrbrthemes)
library(viridisLite)
library(viridis)
library(statebins)
# Load datasets
load(file = "data/blue_jays.rda")
load(file = "data/US_income.rda")
# Read in the cdc dataset
cdc <- read_delim(file = "data/cdc.txt", delim = "|") %>%
mutate(genhlth = factor(genhlth,
levels = c("excellent", "very good", "good", "fair", "poor")
))
ggplot(blue_jays, aes(x = Mass,y = Head)) +
geom_point(alpha = 1/3, size = 1.5, ) +
xlim(c(57,82)) +
geom_density2d(size = 0.4, binwidth = 0.004, color = "black") +
labs(x = "Body Mass (g)", y = "Head length (mm)") +
theme_minimal()
With the geom_density2d function above, I have created a contour line above the scatterpoints from the blue_jays dataset.
ggplot(cdc, aes(x = height, y = weight)) +
geom_hex(bins = 35) +
labs(x = "Height(in)", y = "Weight(lbs)", fill = "count") +
theme_minimal()
Using geom_hex, I have created a plot using variables height and weight.
ggplot(cdc, aes(x = height, y = weight)) +
stat_density2d(aes(fill = stat(level)), geom = "polygon", show.legend = FALSE) +
labs(x = "Height(in)", y = "Weight(lbs)", fill = "count") +
theme_minimal() +
facet_wrap(~gender)
Using the stat_density2d function, and geom = “polygon”, I have created plots divided by gender.
nj_counties <- map_data("county", "New Jersey") %>%
select(lon = long, lat, group, id = subregion)
ggplot(nj_counties, aes(lon, lat)) +
geom_polygon(aes(group = group), fill = NA, colour = "grey50") +
coord_quickmap() +
theme_void()
Above, I’ve made a county map of New Jersey using nj_counties in order to divide it by county and used geom_ploygon to visualize the data.
Using US_income dataset, recreate the following graphics as precisely as possible.
# Setting income levels
US_income <- mutate(
US_income,
income_bins = cut(
ifelse(is.na(median_income), 25000, median_income),
breaks = c(0, 40000, 50000, 60000, 70000, 80000),
labels = c("< $40k", "$40k to $50k", "$50k to $60k", "$60k to $70k", "> $70k"),
right = FALSE
)
)
ggplot(US_income, aes(geometry = geometry, fill = income_bins)) +
geom_sf(size = 0.2, color = "grey80") +
scale_fill_viridis(discrete = TRUE) +
labs(fill = "Median\nIncome") +
theme_void() +
coord_sf(datum = NA)
Above, I have created a map of the United States divided and colored by median income.
devtools::install_github("hrbrmstr/statebins")
## Skipping install of 'statebins' from a github remote, the SHA1 (717a6d1b) has not changed since last install.
## Use `force = TRUE` to force installation
Above, I have downloaded the developmental version of the statebins package.
library(statebins)
library(hrbrthemes)
library(tidyverse)
packageVersion("statebins")
## [1] '2.0.0'
Above, I have once again loaded the statebins, hrbrthemes, and tidyverse packages and loaded the current version of the statebins package.
ggplot(US_income, aes(state = name, fill = income_bins)) +
scale_fill_viridis(discrete = TRUE) +
labs(fill = "Median\nIncome") +
geom_statebins() +
theme_statebins()
Above, I have created a statebins plot using US income with a theme of statebins.