In this exercise, you will build on the map you created for the previous assignment. You will take your map of fatal motor vehicle crashes in Wisconsin and adjust it into a final publishable version. You will also integrate a second graph with the map to mitigate some of the limits of a map, such as the ability to precisely compare counties. This second graph could be a column or dot plot showing the fatality rate of the counties, a scatterplot showing county population and the number of fatal crashes, a scatterplot of the pre and post 2012 fatality rate, or a slope graph showing the rate of fatal crashes for pre and post 2012. You need to pick just one plot in addition to the map.
Objectives:
Submit:
Submit an html rendering of your R Notebook that includes the interactive map and graph combination. Be sure to pick an appropriate color map and to add an assertive title, axis labels with units (not on the map), subtitle, and footnote describing the data source. In your notebook include brief paragraphs that address the following points
The benefit of a static plot is that it could present the full infomation of the graph and could be easily exported into the readable contents, like .pdf format without changes.
The benefit of an interactive plot is that it could create a sense of ‘interaction’ between the users and the graph itself, and interactive infomation could add another second layer of ‘infomation’ on the graph that could be hidden unless interacting with, which increased the possible infomation held by the graph. ALthough, animation is engaging but hard to understand sometimes.
library(tidyverse) # Includes ggplot and dplyr
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggalt)
## Warning: package 'ggalt' was built under R version 4.0.3
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(ggpointdensity) # To address overplotting and color lines with density
## Warning: package 'ggpointdensity' was built under R version 4.0.3
library(ggrepel)
library(patchwork) # To combine plots
library(ggforce)
library(maps) # Map data for states and counties
## Warning: package 'maps' was built under R version 4.0.3
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(mapproj) # For a range of map projections: mercator, albers, mollweide, gilbert...
## Warning: package 'mapproj' was built under R version 4.0.3
library(usmap) # For 50 states
## Warning: package 'usmap' was built under R version 4.0.3
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.3
## Loading required package: viridisLite
library(viridisLite)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.0.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(sf)
## Warning: package 'sf' was built under R version 4.0.3
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(ggrepel)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(ggiraph)
## Warning: package 'ggiraph' was built under R version 4.0.3
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(patchwork)
#rm(list = ls()) # Clears all variables
## Load and transform the fatal crash data
load("fars_2020.data")
fars.df = fars.df %>% select(latitude, longitud, everything()) %>%
filter(longitud < 777) %>% # Removes missing values coded by 777, 888...
mutate(state = str_pad(state, 2, pad = "0")) %>%
mutate(county = str_pad(county, 3, pad = "0")) %>%
unite("fips", state:county, sep = "", remove = FALSE)
## Filter crashes to include only crashes from Wisconsin, based on the fips number
fars_wisc_map <- fars.df %>% filter(between(fips, 55001, 55141))
midwest.bb <- c(left = -95, bottom = 40, right = -86, top = 47)
library(usmap)
colfunc <- colorRampPalette(c("#F2F2F3", "#1261A0"))
colfunc(10)
## [1] "#F2F2F3" "#D9E1E9" "#C0D1E0" "#A7C1D7" "#8EB1CE" "#75A1C4" "#5C91BB"
## [8] "#4381B2" "#2A71A9" "#1261A0"
MainStates <- map_data("state")
StatePopulation <-
read.csv(
"https://raw.githubusercontent.com/ds4stats/r-tutorials/master/intro-maps/data/StatePopulation.csv",
as.is = TRUE
)
MergedStates <-
inner_join(MainStates, StatePopulation, by = "region")
AllCounty <- map_data("county")
wiscCounty <- AllCounty %>% filter(region == "wisconsin")
wiscpop_map <- countypop %>% filter(abbr == 'WI')
wiscpop_map <-
mutate(wiscpop_map,
region = "wisconsin",
regio = str_replace(tolower(wiscpop_map$county), pattern = " county", ""))
wiscpop <-
left_join(wiscCounty, wiscpop_map, by = c("subregion" = "regio"))
library(usmap)
wisccrash_map <- left_join(wiscpop, fars_wisc_map, by = c("fips" = "fips"))
wisccrash_map <- mutate(wisccrash_map, crashrate_percaptia = fatals/pop_2015)
wisccrash_map_year <-
transform(wisccrash_map, year = as.numeric(year) + 2005)
wisccrash_map_year = wisccrash_map_year %>%
mutate(pre2012 = if_else(
condition = (year < 2012),
true = "pre2012",
false = "post2012",
missing = NULL
))
prop_summary <-
wisccrash_map_year %>% group_by(group, pre2012) %>% summarise(crash_mean = mean(crashrate_percaptia, na.rm = T)) %>% ungroup() %>% na.omit
prop_summary_spread <-
prop_summary %>% spread(pre2012, crash_mean) %>% mutate(Diff = post2012 - pre2012)
rownames(prop_summary_spread) <-
paste("G_", prop_summary_spread$group)
temp <- paste("G_", wisccrash_map_year$group)
prop_summary_extend2 <- prop_summary_spread[temp, ]
wisccrash_map_year <-
cbind(wisccrash_map_year, prop_summary_extend2[, "Diff"])
rownames(prop_summary) <-
paste(prop_summary$group, prop_summary$pre2012)
temp <- paste(wisccrash_map_year$group, wisccrash_map_year$pre2012)
prop_summary_extend1 <- prop_summary[temp, ]
wisccrash_map_year <-
cbind(wisccrash_map_year, prop_summary_extend1[, "crash_mean"])
wisccrash_map_year_plot <- wisccrash_map_year %>% na.omit
## Plot the mean between pre and post 2012
p_map <-
ggplot(wisccrash_map_year_plot, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = crash_mean), color = 'grey50') +
scale_fill_gradientn(name = "Crash Rate", colours = colfunc(10)) +
coord_map(projection = "albers",
lat0 = 39,
lat1 = 45) +
labs(x = "Longitude", y = "Latitude",
title = "Crash fatal rate percapita of all counties in WI, before and after 2012") +
facet_wrap(pre2012 ~ .)
p_map_diff <-
ggplot(wisccrash_map_year_plot, aes(x = long, y = lat, group = group)) +
geom_polygon(aes(fill = Diff), color = 'grey50') +
scale_fill_gradientn(name = "Differential Crash Rate", colours = colfunc(10)) +
coord_map(projection = "albers",
lat0 = 39,
lat1 = 45) +
labs(x = "Longitude", y = "Latitude")+
theme_bw() +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
theme(axis.title.x=element_text(size=9)) +
theme(axis.title.y=element_text(size=9)) +
theme(legend.text=element_text(size = 9))
p_map_diff
prop_summary$group <- as.factor(prop_summary$group)
prop_summary <- prop_summary %>% mutate(anno = if_else(crash_mean < 0.00015, "small", "large"))
df_anno <- prop_summary %>% filter( anno == "large")
prop_summary_spread$group <- as.factor(prop_summary_spread$group)
prop_summary_spread <- prop_summary_spread %>% mutate(anno = if_else(abs(Diff)<1e-5, "small", "large"))
df_anno1 <- prop_summary_spread %>% filter( anno == "large")
p_scatter <- prop_summary %>% ggplot(aes(x = crash_mean, y = group, color = anno)) +
geom_point(alpha = .9, size = .5) +
facet_wrap(~pre2012) +
scale_y_discrete(expand=c(0, 0)) +
scale_color_manual(values = c("red", "gray50")) +
#geom_jitter +
geom_text(data = df_anno, aes(x = crash_mean + 1e-5, y = group, label = group), size = 3, alpha = 0.9) +
#theme(legend.position = "none")
theme_minimal(base_size = 9)
p_scatter_diff <- prop_summary_spread %>% ggplot() +
geom_point(aes(x = group, y = Diff, color = anno)) +
scale_x_discrete(breaks=c("2500","2950","3000","3050","3100", "3500"),
labels=c("2500","2950","3000","3050","3100", "3500")) +
#theme(axis.text.x = element_blank()) +
scale_color_manual(name = "Differences", values = c("red", "gray50")) +
geom_text(data = df_anno1, aes(x = group, y = Diff + 1e-6, label = group), size = 2, alpha = 0.9) +
labs(x = "Group name of counties", y = "Difference between crash rates") +
theme_minimal(base_size = 9) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
#theme(legend.position = c(1.1,.85), legend.direction = "vertical") +
theme(legend.background = element_rect(fill="grey90",
size=0.3, linetype="solid", colour ="grey75"))
#p_scatter
p_scatter_diff
# p1 <- p_map + p_scatter + plot_layout(ncol = 1, heights = c(1, 1))
#
# p1
combined_p2 <- p_map_diff + p_scatter_diff +
plot_annotation(
title = 'Mean crash rate for Wisconsin counties',
subtitle = 'Comparison before and after 2012',
caption = 'State population from: https://raw.githubusercontent.com/',
theme = theme(plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 8, face = "italic"))
)
plot(combined_p2)
p11 <- prop_summary %>%
ggplot() +
geom_point(aes(x = group, y = crash_mean, color = anno)) +
facet_wrap(~pre2012) +
theme(axis.text.x = element_blank()) +
scale_color_manual(values = c("#66C2A5","#FC8D62")) +
theme(legend.position = "none")
p21 <- prop_summary_spread %>%
ggplot() +
geom_point(aes(x = group, y = Diff, color = anno)) +
theme(axis.text.x = element_blank()) +
scale_color_manual(values = c("#66C2A5","#FC8D62")) +
theme(legend.position = "none")
my_p1 <- p11 + geom_point_interactive(aes(tooltip = group, x = group, y = crash_mean, color = anno))
my_p11 <- p_map + my_p1 + plot_layout(ncol = 1, heights = c(1, 1))
girafe(code = print(my_p11))
## Warning: package 'gdtools' was built under R version 4.0.3
my_p2 <- p21 + geom_point_interactive(aes(tooltip = group, x = group, y = Diff, color = anno))
combined_interactive_p2 <- p_map_diff+my_p2 +
plot_annotation(
title = 'Mean crash rate for Wisconsin counties, interactive',
subtitle = 'Comparison before and after 2012',
caption = 'State population from: https://raw.githubusercontent.com/',
theme = theme(plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 8, face = "italic"))
)
girafe(code = print(combined_interactive_p2))