# Load package(s)
library(tidyverse)
library(ggthemes)
library(patchwork)
library(sf)
library(tigris)
library(viridis)
library(statebins)
library(ggrepel)
library(grid)
library(jpeg)
# Read in the dataset(s)
ga_data <- read.csv("data/ga_election_data.csv")%>%
janitor::clean_names()
ga_map <- counties(state = "GA", cb = TRUE) %>%
janitor::clean_names()
load("data/US_income.rda")
load("data/tech_stocks.rda")
load("data/corruption.rda")
admin_data <- read.csv("data/NU_admission_data.csv")
steph <- read_delim(file = "data/stephen_curry_shotdata_2014_15.txt", delim = "|") %>%
janitor::clean_names()# load image
court <- rasterGrob(readJPEG(source = "data/nbahalfcourt.jpg"),
width = unit(1, "npc"), height = unit(1, "npc")
)
ggplot() +
#fill grid with image
annotation_custom(
grob = court,
xmin = -250, xmax = 250,
ymin = -52, ymax = 418
) +
coord_fixed() +
xlim(250, -250) +
ylim(-52, 418) +
theme_void() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
plot.title = element_text(face = "bold", size = 14)) +
labs(title = "Stephen Curry",
subtitle = "Shot Chart (2014-2015)",
x = NULL,
y = NULL) +
geom_hex(data = steph, aes(x = loc_x, y = loc_y),
color = "grey", bins = 20, alpha = 0.7) +
scale_fill_gradient(name = "Shot \nAttempts",
low = "yellow", high = "red",
breaks = c(0, 5, 10, 15),
labels = c(0, 5, 10, "15+"),
limits = c(0, 15),
na.value = "red"
)ga_graph <- ga_data %>%
mutate(prop_pre_eday = (absentee_by_mail_votes + advanced_voting_votes)/ total_votes) %>%
select(-contains("_vote"))
biden_map <- ga_map %>%
left_join(ga_graph, by = c("name" = "county")) %>%
filter(candidate == "Joseph R. Biden")
trump_map <- ga_map %>%
left_join(ga_graph, by = c("name" = "county")) %>%
filter(candidate == "Donald J. Trump")
Dems <- ggplot(biden_map) +
geom_sf(aes(fill = prop_pre_eday), show.legend = FALSE) +
labs(title = "Joseph R. Biden", subtitle = "Democratic Nominee") +
theme_map() +
scale_fill_gradient2(high = "#5D3A9B",
mid = "white",
low = "#1AFF1A",
midpoint = 0.75,
breaks = c(0.5, 0.75, 1.00),
limits = c(0.5, 1.0),
labels = c("50%", "75%", "100%")) +
theme(plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12))
GOP <- ggplot(trump_map) +
geom_sf(aes(fill = prop_pre_eday)) +
labs(title = "Donald J. Trump", subtitle = "Republican Nominee") +
theme_map() +
scale_fill_gradient2(high = "#5D3A9B",
mid = "white",
low = "#1AFF1A",
midpoint = 0.75,
breaks = c(0.5, 0.75, 1.00),
limits = c(0.5, 1.0),
labels = c("50%", "75%", "100%")) +
theme(plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12),
legend.position = c(1,1),
legend.title = element_blank(),
legend.justification = c(1, 1))
plot <- Dems + GOP
plot +
plot_annotation(
title = 'Percentage of votes from early voting',
caption = 'Georgia: 2020 US Presidential Election Results',
theme = theme(plot.title = element_text(face = "bold", size = 24),
plot.caption = element_text(size = 12)))Summary: It is clear that more Democratic voters voted early, with many areas over 80%. Among Republican voters, fewer people voted early, but still most counties had more than 50% of the votes came before election day.
# Setting income levels
US_income <- mutate(
US_income,
income_bins = cut(
ifelse(is.na(median_income), 25000, median_income),
breaks = c(0, 40000, 50000, 60000, 70000, 80000),
labels = c("< $40k", "$40k to $50k",
"$50k to $60k", "$60k to $70k", "> $70k"),
right = FALSE
)
)ggplot(US_income)+
geom_sf(aes(fill = income_bins),
color = "grey80", size = 0.2)+
scale_fill_viridis(discrete = TRUE)+
theme_void()+
labs(fill = "Median \nIncome")ggplot(US_income)+
geom_statebins(aes(fill = income_bins, state = name))+
scale_fill_viridis(discrete = TRUE)+
theme_statebins()+
labs(fill = "Median \nIncome")ggplot(data = tech_stocks,
aes(x = date, y = price_indexed,
color = company)) +
geom_line(key_glyph = "timeseries") +
scale_y_continuous(breaks = c(0, 100, 200, 300, 400, 500),
labels = scales::dollar,
position = "right") +
scale_x_date(expand = c(0,0)) +
lims(color = c("Facebook", "Alphabet", "Microsoft", "Apple")) +
ylab(NULL) +
xlab(NULL) +
guides(color = guide_legend(override.aes = list(size = 1.3))) +
labs(title = "Stock price, indexed") +
theme_minimal() +
theme(legend.position = c(0.75, 0.85),
legend.title = element_blank())corrupt_2015 <- corruption %>%
filter(year == "2015")
label <- corrupt_2015 %>%
filter(country %in% c("Argentina", "Chile", "China", "Japan", "Ghana", "Iraq", "Niger", "Singapore", "United States"))
ggplot(data = corrupt_2015,
aes(x = cpi, y = hdi, color = region)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm",
formula = "y ~ log(x)",
se = FALSE, color = "grey40") +
geom_text_repel(data = label,
aes(label = country),
box.padding = 0.6,
min.segment.length = 0,
seed = 9876,
color = "black") +
xlab("Corruption Perceptions Index, 2015 \n(100 = least corrupt)") +
ylab("Human Development Index, 2015 \n(1.0 - most developed)") +
labs(title = "Corruption and human development (2015)") +
theme_minimal() +
scale_color_brewer(palette = "Set1") +
guides(color = guide_legend(override.aes = list(size = 3))) +
theme(legend.title = element_blank(),
legend.position = c(1,0),
legend.justification = c(1,0))#bar graph data
bar_dat <- admin_data %>%
mutate(
#green bar
cat_a = Applications - Admitted.students,
#red
cat_b = Admitted.students - Matriculants,
#blue
cat_c = Matriculants)%>%
select(Year, contains("cat_"))%>%
pivot_longer(cols = -Year,
names_to = "category",
values_to = "values")
bar_label <- admin_data %>%
select(-contains("rate"))%>%
pivot_longer(cols = -Year,
names_to = "category",
values_to = "values") %>%
mutate(col_labels = prettyNum(values, big.mark = ","))
bar_plt <- ggplot(bar_dat, aes(Year, values)) +
geom_col(aes(fill = category)) +
theme_classic() +
geom_text(data = bar_label,
aes(label = col_labels),
nudge_y = -300,
size = 2.2,
color = "white") +
scale_y_continuous(
name = "Applicants",
limits = c(0,50000),
breaks = seq(0,50000,5000),
expand = c(0,0),
labels = scales::label_comma()
) +
scale_x_continuous(
name = "Entering Year",
breaks = 1999:2020,
expand = c(0,0.25),
) +
scale_fill_manual(
name = NULL,
values = c("#B6ACD1", "#836EAA", "#4E2A84"),
labels = c("Applications", "Admitted \nstudents", "Matriculants")
) +
theme(legend.position = c(0.5,1),
legend.justification = c(0.5, 1),
legend.direction = "horizontal") +
labs(title = "Northwestern University \nUndergraduate Admissions 1999-2000")rate_dat <- admin_data %>%
select(Year, contains("rate")) %>%
pivot_longer(cols = -Year,
names_to = "category",
values_to = "values")
rate_labels <- rate_dat %>%
mutate(pct_label = str_c(values, "%"))
#yield plot
rate_plt <- ggplot(rate_dat, aes(Year, values)) +
geom_line(aes(color = category)) +
geom_point(aes(color = category, shape = category)) +
theme_classic() +
geom_text(data = rate_labels,
aes(label = pct_label),
nudge_y = 2.5,
size = 1.8
) +
scale_x_continuous(
name = "Entering Year",
breaks = 1999:2020,
expand = c(0,0.5),
) +
scale_y_continuous(
name = "Rate",
limits = c(0,60),
breaks = seq(0,60,10),
expand = c(0,0),
labels = scales::label_percent(scale = 1)
) +
scale_color_manual(
name = NULL,
values = c("#B6ACD1", "#836EAA"),
labels = c("Admission Rate", "Yield Rate")
) +
scale_shape_manual(
name = NULL,
values = c(16, 8),
labels = c("Admission Rate", "Yield Rate")
) +
theme(legend.position = c(0.5,1),
legend.justification = c(0.5, 1),
legend.direction = "horizontal") +
labs(title = "Northwestern University \nUndergraduate Admissions 1999-2000")bar_plt / rate_plt