#This lab will explore the spatial distribution of climate damages across the US and the relationship between climate damages and areas with a high percentage of disadvantaged residents.

#CEJST’s basic spatial unit is the 2010 census tract. Tracts are largish neighborhoods averaging about 4,000 people. They are relatively stable over time. They are nested within counties in the same way the counties are nested within states.

#Tracts are identified by an 11-character geoid that consists of fips codes for state (2 characters), county (3 characters) and tract (6 characters). This means the first two characters of the code identify the tract’s state, and the first five characters of the code identify the tract’s county. This make it very easy for R programmers to group_by() and summarize() tract-level data to states or counties.
suppressWarnings({library(tidyverse)
library(readxl)
library(sf)
library(RColorBrewer)
library(tmap)
library(plotly)
library(here)
})
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
## 
## 
## Attaching package: 'plotly'
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## 
## here() starts at C:/temp/class2322/_B__Classes_export (2)/class2322/ClimateImpacts
source("functions.R")
j1 <- read_excel("communities-2022-05-31-1915GMT.xlsx")

creating county geocode from first 5 characters of tract id

total population, and disadvantaged population

j2 <- j1 %>% 
  tolow() %>%  
  mutate(geocode = paste0("g", mid(census.tract.id,1,5))) %>% 
  select(geocode,census.tract.id,everything()) %>% 
  mutate(totpop = total.population,
         dispop = ifelse(identified.as.disadvantaged=="TRUE",
                         total.population,0))

adding population and disadvantaged population by county

calculating disadv as true when over 50% of the population

is disadvantaged

j3 <- j2 %>% 
  group_by(geocode) %>% 
  summarize(totpop=sum(totpop,na.rm=T),
            dispop = sum(dispop,na.rm=T)) %>% 
  mutate(dispct = 100*dispop/totpop) %>% 
  mutate(disadv = ifelse(dispct > 50,TRUE,FALSE))
gacejst = readRDS("gacejst.rds") %>% 
  # the two commands starting with "disadv =" are not a mistake.
  # the first sets every tract to 0, even those with missing values.
  # the second sets disadvantaged tracts to 1.
  # the variable sm_c is a binary 1/0 variable representing whether
  #    or not the tract is disadvantaged.
  mutate(disadv = 0,
         disadv = ifelse(sm_c == 1, 1, disadv),
         geocode = paste0("g",geoid10)) %>% 
  select(geocode,disadv,everything())

#The following tmap code draws Georgia cejst tracts in an interactive map.

#Zoom to the Georgia Tech area and identify disadvantaged tracts near campus.

tmap_mode("view")
## tmap mode set to interactive viewing
tm_basemap("OpenStreetMap") +
tm_shape(gacejst) +
  tm_polygons("disadv",alpha=0.5, palette="Reds")

#Task 2: Importing Hsiang climate impact data

d1 <- read_excel("hsiang-county_damages_by_sector.xlsx") %>% 
  tolow()

renaming variables, converting coastal damages from log to normal units

d2 <- d1 %>%  mutate(geocode = paste0("g",mid(as.character(county.fips.code+100000),2,5))) %>% 
  rename(stabbr = state.code,
         cname = county.name,
         cpop12 = county.population..in.2012.,
         cinc12 = county.income..in.2012.,
         agric = agricultural.damage......4.major.crops.,
         mortal = mortality..deaths.per.100k.,
         energy = energy.expenditures....,
         lowrisklab = labor.low.risk....,
         highrisklab = labor.high.risk....,
         coast = coastal.damage..log10...county.income..,
         vcrime = violent.crime....,
         pcrime = property.crime....,
         total = total.damages....county.income.) %>%
  mutate(coast = 10**coast) %>% 
  replace(., is.na(.), 0) %>% 
  select(geocode,everything(),-county.fips.code)

#Task 3: Reading state and county GIS files #Reading the state and county Census cartographic boundary files as R spatial dataframes. #At the end of this code block the CEJST and Hsiang datasets will be joined to the county spatial dataframe, so all the county-level variables in those datasets will be mappable by tmap.

s1 <- st_read("cb_2018_us_state_20m.shp")
## Reading layer `cb_2018_us_state_20m' from data source 
##   `C:\temp\class2322\_B__Classes_export (2)\class2322\ClimateImpacts\cb_2018_us_state_20m.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 52 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## Geodetic CRS:  NAD83
# reading county boundaries shapefile, creating geocode for joining,
#    removing Alaska, Hawaii and non-states
c1 <- st_read("cb_2018_us_county_20m.shp") %>% 
  tolow() %>% 
  mutate(geocode = paste0("g",geoid)) %>% 
  filter(statefp != "02" & statefp != "15") %>% 
  filter(statefp <= "56")
## Reading layer `cb_2018_us_county_20m' from data source 
##   `C:\temp\class2322\_B__Classes_export (2)\class2322\ClimateImpacts\cb_2018_us_county_20m.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3220 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## Geodetic CRS:  NAD83
# joining damages and CEJEST files by geocode
c2 <- c1 %>% 
  left_join(d2) %>% 
  left_join(j3) %>% 
# creating county name with state abbreviation  
  mutate(cname2 = paste0(cname,", ",stabbr)) %>% 
# rounding values for better display on graphs and maps
  mutate(dispct = round(dispct,1),
         total = round(total,1)) %>% 
  select(cname2,everything())
## Joining, by = "geocode"
## Joining, by = "geocode"

#Task 4: Comparing total damages for disadvantaged vs. non-disadvantaged counties #To compare damages in disadvantaged vs. non-disadvantaged counties we could just calculate the mean values for counties in each group. However that would treat large population and small population counties as equal contributors to the means. Instead we should use a weighted mean, which calculates a mean using weights of each county’s total population.

#Remember total damages are a percent of each county’s GDP.

#What do the results show?

#Do disadvantaged counties suffer higher or lower losses of GDP, compared to the other counties.

damagesmean <- c2 %>% 
  group_by(disadv) %>% 
  summarize(average = weighted.mean(total,totpop,na.rm=T))

damagesmean
## Simple feature collection with 3 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -124.7258 ymin: 24.49813 xmax: -66.9499 ymax: 49.38436
## Geodetic CRS:  NAD83
## # A tibble: 3 × 3
##   disadv average                                                        geometry
##   <lgl>    <dbl>                                                  <GEOMETRY [°]>
## 1 FALSE     3.51 MULTIPOLYGON (((-96.62065 35.639, -96.19256 35.63909, -96.1923…
## 2 TRUE      7.07 MULTIPOLYGON (((-74.09064 40.7488, -74.07869 40.7506, -74.0748…
## 3 NA      NaN    POLYGON ((-103.0009 43.47685, -103.0006 43.00026, -102.7921 43…

#creating a scatter plot of the relationship between percent of disadvantaged population (on x axis) vs. total damages as a percent of county GDP.

#Putting ggplotly() around a ggplot command creates an interactive version of the chart.

#Note: the geom_smooth with method=“lm” command fits a regression line to the points.

#Question: what does it mean when the line slopes upward?

ggplotly(
ggplot(c2) +
  geom_point(aes(x=dispct,y=total,
                 col=disadv,label=cname2)) +
  scale_color_manual(values=c("deepskyblue1","chocolate2")) +
  geom_smooth(aes(x=dispct,y=total),method="lm") +
  xlab("County Percent Disadvantaged") +
  ylab("Climate Damages as Percent of County GDP") +
  ggtitle("Are Expected Climate Change Damages Higher
          in Counties with over 50% Disadvantaged Population?"))
## Warning: Ignoring unknown aesthetics: label
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

#The ggplot below shows a variation of the prior map by coloring Georgia counties in red. You can think of the regression line as showing the average damage level for counties with different percentages of disadvantaged population.

#What does it mean that most Georgia counties are above the regression line?

ggplotly(
ggplot() +
  geom_point(data=c2,aes(x=dispct,y=total,
                 col=disadv,label=cname2)) +
  scale_color_manual(values=c("deepskyblue1","yellow")) +
  geom_smooth(data=c2, aes(x=dispct,y=total),method="lm") +
  geom_point(data=(c2 %>% filter(statefp=="13")),
             aes(x=dispct,y=total,
                 label=cname2), col="red") +
  xlab("County Percent Disadvantaged") +
  ylab("Climate Damages as Percent of County GDP") +
  ggtitle("Are Expected Climate Change Damages Higher
          in Counties with over 50% Disadvantaged Population?"))
## Warning: Ignoring unknown aesthetics: label
## Ignoring unknown aesthetics: label
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

#Task 5: Creating maps of disadvantaged counties and climate damages

tmap_mode("view")
## tmap mode set to interactive viewing
# Map of disadvantaged areas
tm_shape(c2) +
  tm_polygons("disadv", palette="Reds") +
  tm_shape(s1) +
  tm_borders("black",lwd=1.5)
# Map showing total damages (as percent of county GDP)
tm_shape(c2) +
  tm_polygons("total", palette="-RdYlBu") +
tm_shape(s1) +
  tm_borders("black",lwd=1.5)
## Variable(s) "total" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#Question: what does it mean when a county’s total damages number is negative?

#Starting with the code from the total damages map, create maps showing agricultural damages, mortality damages, and coastal damages (from hurricanes). Use a different palette from RColorBrewer for each map. Use google or duckduckgo to search for an RColorBrewer image showing all the RColorBrewer palettes.

#Agricultural damages map 

tm_shape(c2) +
  tm_polygons("agric", palette="-RdYlGn") +
tm_shape(s1) +
  tm_borders("black",lwd=1.5) 
## Variable(s) "agric" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#Mortality damages map

tm_shape(c2) +
  tm_polygons("mortal", palette="RdPu") +
tm_shape(s1) +
  tm_borders("black",lwd=1.5) 
#Map of coastal damages from hurricanes 

tm_shape(c2) +
  tm_polygons("coast", palette="OrRd") +
tm_shape(s1) +
  tm_borders("black",lwd=1.5) 

#Task 7: Black population and climate damages #The next line of code loads an American Community Survey dataset that includes total population and black population by county, and a black percentage variable.

#Write code to create a new county dataset named c3 by starting with the existing county dataset c2 and conducting a left_join to add the b1 variables. See above in Task 3 for similar joins.

#Write code to calculate the weighted mean of black percent population comparing disadvantaged to non-disadvantaged counties. See above Task 4 for an example of similar code.

#Write code to create a ggplot scatterplot with black percentage on the x axis and total damages on the y axis. See above in Task 4 for an example of similar code. Execute a google or duckduckgo search for “colors in R Ying Wei.” Use this colors cheat-sheet to select your colors for the scatterplot.

b1 <- readRDS("usacs20.rds")
c3 <- c2 %>% left_join(b1, by="geocode")
blckpopcomp <-c3 %>% 
  group_by(disadv) %>% 
  summarize(average = weighted.mean(blackpct,totpop.x,na.rm=T))

blckpopcomp
## Simple feature collection with 3 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -124.7258 ymin: 24.49813 xmax: -66.9499 ymax: 49.38436
## Geodetic CRS:  NAD83
## # A tibble: 3 × 3
##   disadv average                                                        geometry
##   <lgl>    <dbl>                                                  <GEOMETRY [°]>
## 1 FALSE     11.7 MULTIPOLYGON (((-96.62065 35.639, -96.19256 35.63909, -96.1923…
## 2 TRUE      19.5 MULTIPOLYGON (((-74.09064 40.7488, -74.07869 40.7506, -74.0748…
## 3 NA        NA   POLYGON ((-103.0009 43.47685, -103.0006 43.00026, -102.7921 43…
ggplotly(
ggplot(c3) +
  geom_point(aes(x=blackpct,y=total,
                 col=disadv,label=cname2)) +
  scale_color_manual(values=c("powderblue","forestgreen")) +
  geom_smooth(aes(x=dispct,y=total),method="lm") +
  xlab("County Percent Black Population") +
  ylab("Climate Damages as Percent of County GDP") +
  ggtitle("Are Expected Climate Change Damages Higher
          in Counties with over 50% Black Population?"))
## Warning: Ignoring unknown aesthetics: label
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
ggplotly(
ggplot() +
  geom_point(data=c3,aes(x=blackpct,y=total,
                 col=disadv,label=cname2)) +
  scale_color_manual(values=c("powderblue","forestgreen")) +
  geom_smooth(data=c3, aes(x=dispct,y=total),method="lm") +
  geom_point(data=(c3 %>% filter(statefp=="12")),
             aes(x=blackpct,y=total,
                 label=cname2), col="darkorchid1") +
  xlab("County Percent Black Population") +
  ylab("Climate Damages as Percent of County GDP") +
  ggtitle("Are Expected Climate Change Damages Higher
          in Counties with over 50% Black Population in Florida?"))
## Warning: Ignoring unknown aesthetics: label
## Ignoring unknown aesthetics: label
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).