In this exercise, you will build on the map you created for the previous assignment. You will take your map of fatal motor vehicle crashes in Wisconsin and adjust it into a final publishable version. You will also integrate a second graph with the map to mitigate some of the limits of a map, such as the ability to precisely compare counties. This second graph could be a column or dot plot showing the fatality rate of the counties, a scatterplot showing county population and the number of fatal crashes, a scatterplot of the pre and post 2012 fatality rate, or a slope graph showing the rate of fatal crashes for pre and post 2012. You need to pick just one plot in addition to the map.

Objectives:

Submit:

Submit an html rendering of your R Notebook that includes the interactive map and graph combination. Be sure to pick an appropriate color map and to add an assertive title, axis labels with units (not on the map), subtitle, and footnote describing the data source. In your notebook include brief paragraphs that address the following points

  1. Describe what your graph shows: what is the pattern in the data that is interesting? This description should be consistent with the assertive title, highlighting, and annotation in the plots.
  1. Describe a few of the design choices you made to reduce clutter and focus the viewers’ attention on the pattern in the data that you want them to see.
  1. Describe a benefit of a static plot and a benefit of the interactive plot.
library(tidyverse) # Includes ggplot and dplyr
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggalt)
## Warning: package 'ggalt' was built under R version 4.0.3
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
library(ggpointdensity) # To address overplotting and color lines with density
## Warning: package 'ggpointdensity' was built under R version 4.0.3
library(ggrepel)
library(patchwork) # To combine plots
library(ggforce)
library(maps) # Map data for states and counties
## Warning: package 'maps' was built under R version 4.0.3
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapproj) # For a range of map projections: mercator, albers, mollweide, gilbert...
## Warning: package 'mapproj' was built under R version 4.0.3
library(usmap) # For 50 states 
## Warning: package 'usmap' was built under R version 4.0.3
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.3
## Loading required package: viridisLite
library(viridisLite)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.0.3
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(sf)
## Warning: package 'sf' was built under R version 4.0.3
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(ggrepel)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(ggiraph)
## Warning: package 'ggiraph' was built under R version 4.0.3
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(patchwork)

#rm(list = ls()) # Clears all variables

Load and transform data

## Load and transform the fatal crash data
load("fars_2020.data")
fars.df = fars.df %>% select(latitude, longitud, everything()) %>% 
  filter(longitud < 777) %>%  # Removes missing values coded by 777, 888...
  mutate(state = str_pad(state, 2, pad = "0")) %>% 
  mutate(county = str_pad(county, 3, pad = "0")) %>% 
  unite("fips", state:county, sep = "", remove = FALSE)
## Filter crashes to include only crashes from Wisconsin, based on the fips number
fars_wisc_map <- fars.df %>% filter(between(fips, 55001, 55141))
midwest.bb <- c(left = -95, bottom = 40, right = -86, top = 47) 

Plot the Wisconsin counties using polygons data and fill with percapita crash rate

library(usmap)
colfunc <- colorRampPalette(c("#F2F2F3", "#1261A0"))
colfunc(10)
##  [1] "#F2F2F3" "#D9E1E9" "#C0D1E0" "#A7C1D7" "#8EB1CE" "#75A1C4" "#5C91BB"
##  [8] "#4381B2" "#2A71A9" "#1261A0"
MainStates <- map_data("state")
StatePopulation <-
  read.csv(
    "https://raw.githubusercontent.com/ds4stats/r-tutorials/master/intro-maps/data/StatePopulation.csv",
    as.is = TRUE
  )
MergedStates <-
  inner_join(MainStates, StatePopulation, by = "region")

AllCounty <- map_data("county")
wiscCounty <- AllCounty %>% filter(region == "wisconsin")

wiscpop_map <- countypop %>% filter(abbr == 'WI')
wiscpop_map <-
  mutate(wiscpop_map,
         region = "wisconsin",
         regio =  str_replace(tolower(wiscpop_map$county), pattern = " county", ""))

wiscpop <-
  left_join(wiscCounty, wiscpop_map, by = c("subregion" = "regio"))
library(usmap)
wisccrash_map <- left_join(wiscpop, fars_wisc_map,  by = c("fips" = "fips")) 
wisccrash_map <- mutate(wisccrash_map, crashrate_percaptia = fatals/pop_2015)
wisccrash_map_year <-
  transform(wisccrash_map, year = as.numeric(year) + 2005)

wisccrash_map_year = wisccrash_map_year %>%
  mutate(pre2012 = if_else(
    condition = (year < 2012),
    true = "pre2012",
    false = "post2012",
    missing = NULL
  ))

prop_summary <-
  wisccrash_map_year %>% group_by(group, pre2012) %>% summarise(crash_mean = mean(crashrate_percaptia, na.rm = T)) %>% ungroup() %>% na.omit

prop_summary_spread <-
  prop_summary %>% spread(pre2012, crash_mean)  %>% mutate(Diff = post2012 - pre2012)

rownames(prop_summary_spread) <-
  paste("G_", prop_summary_spread$group)

temp <- paste("G_", wisccrash_map_year$group)
prop_summary_extend2 <- prop_summary_spread[temp, ]
wisccrash_map_year <-
  cbind(wisccrash_map_year, prop_summary_extend2[, "Diff"])

rownames(prop_summary) <-
  paste(prop_summary$group, prop_summary$pre2012)
temp <- paste(wisccrash_map_year$group, wisccrash_map_year$pre2012)

prop_summary_extend1 <- prop_summary[temp, ]
wisccrash_map_year <-
  cbind(wisccrash_map_year, prop_summary_extend1[, "crash_mean"])

wisccrash_map_year_plot <- wisccrash_map_year %>% na.omit

## Plot the mean between pre and post 2012
p_map <-
  ggplot(wisccrash_map_year_plot,  aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = crash_mean), color = 'grey50') +
  scale_fill_gradientn(name = "Crash Rate", colours = colfunc(10)) +
  coord_map(projection = "albers",
            lat0 = 39,
            lat1 = 45)  +
  labs(x = "Longitude", y = "Latitude",
       title = "Crash fatal rate percapita of all counties in WI, before and after 2012") +
  facet_wrap(pre2012 ~ .)

Plot the difference between pre and post 2012

p_map_diff <-
  ggplot(wisccrash_map_year_plot,  aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = Diff), color = 'grey50') +
  scale_fill_gradientn(name = "Differential Crash Rate", colours = colfunc(10)) +
  coord_map(projection = "albers",
            lat0 = 39,
            lat1 = 45)  +
 
  labs(x = "Longitude", y = "Latitude")+
  theme_bw() + 
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) + 
  theme(axis.title.x=element_text(size=9)) +
  theme(axis.title.y=element_text(size=9)) +
  theme(legend.text=element_text(size = 9)) 

p_map_diff

prop_summary$group <- as.factor(prop_summary$group)
prop_summary <- prop_summary %>% mutate(anno = if_else(crash_mean < 0.00015, "small", "large"))
df_anno <- prop_summary %>% filter( anno == "large")

prop_summary_spread$group <- as.factor(prop_summary_spread$group)
prop_summary_spread <- prop_summary_spread %>% mutate(anno = if_else(abs(Diff)<1e-5, "small", "large"))
df_anno1 <- prop_summary_spread %>% filter( anno == "large")


p_scatter <- prop_summary %>% ggplot(aes(x = crash_mean, y = group, color = anno)) +
  geom_point(alpha = .9, size = .5) +
  facet_wrap(~pre2012) +
  scale_y_discrete(expand=c(0, 0)) +
  scale_color_manual(values = c("red", "gray50")) +
  #geom_jitter +
  geom_text(data = df_anno, aes(x = crash_mean + 1e-5, y = group, label = group), size = 3, alpha = 0.9) +
  #theme(legend.position = "none")
  theme_minimal(base_size = 9)

p_scatter_diff <- prop_summary_spread %>% ggplot() + 
  geom_point(aes(x = group, y = Diff, color = anno)) + 
  scale_x_discrete(breaks=c("2500","2950","3000","3050","3100", "3500"), 
                   labels=c("2500","2950","3000","3050","3100", "3500"))  +
  #theme(axis.text.x = element_blank()) + 
  scale_color_manual(name = "Differences", values = c("red", "gray50")) + 
  geom_text(data = df_anno1, aes(x = group, y = Diff + 1e-6, label = group), size = 2, alpha = 0.9) + 
  labs(x = "Group name of counties", y = "Difference between crash rates") + 
  theme_minimal(base_size = 9) + 
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) + 
  #theme(legend.position = c(1.1,.85), legend.direction = "vertical") + 
  theme(legend.background = element_rect(fill="grey90", 
                                  size=0.3, linetype="solid", colour ="grey75"))

#p_scatter

p_scatter_diff

# p1 <- p_map + p_scatter + plot_layout(ncol = 1, heights = c(1, 1))
# 
# p1
combined_p2 <- p_map_diff + p_scatter_diff + 
  plot_annotation(
    title = 'Mean crash rate for Wisconsin counties',
    subtitle = 'Comparison before and after 2012', 
    caption = 'State population from: https://raw.githubusercontent.com/',
    theme = theme(plot.title = element_text(size = 16),
                  plot.subtitle = element_text(size = 12),
                  plot.caption = element_text(size = 8, face = "italic"))
  )

plot(combined_p2)

p11 <- prop_summary %>% 
  ggplot() + 
  geom_point(aes(x = group, y = crash_mean, color = anno)) + 
  facet_wrap(~pre2012) + 
  theme(axis.text.x = element_blank()) + 
  scale_color_manual(values = c("#66C2A5","#FC8D62")) + 
  theme(legend.position = "none")


p21 <- prop_summary_spread %>% 
  ggplot() + 
  geom_point(aes(x = group, y = Diff, color = anno)) + 
  theme(axis.text.x = element_blank()) + 
  scale_color_manual(values = c("#66C2A5","#FC8D62")) + 
  theme(legend.position = "none")


my_p1 <- p11 + geom_point_interactive(aes(tooltip = group, x = group, y = crash_mean, color = anno))
my_p11 <- p_map + my_p1  + plot_layout(ncol = 1, heights = c(1, 1))


girafe(code = print(my_p11))
## Warning: package 'gdtools' was built under R version 4.0.3
my_p2 <- p21 + geom_point_interactive(aes(tooltip = group, x = group, y = Diff, color = anno))
combined_interactive_p2 <- p_map_diff+my_p2 +
  plot_annotation(
    title = 'Mean crash rate for Wisconsin counties, interactive',
    subtitle = 'Comparison before and after 2012', 
    caption = 'State population from: https://raw.githubusercontent.com/',
    theme = theme(plot.title = element_text(size = 16),
                  plot.subtitle = element_text(size = 12),
                  plot.caption = element_text(size = 8, face = "italic"))
  )


girafe(code = print(combined_interactive_p2))