1. Introduction

This report looks at how social vulnerability changed in Puerto Rico census tracts between 2014 and 2022.
I am interested in which places became more vulnerable.
The audience is planners and disaster recovery people who want to focus support on higher-risk areas.

2. Prepare the data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
# load the two CSV files

PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
## Rows: 939 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (6): STATE, ST_ABBR, COUNTY, LOCATION, GeoLevel, Comparison
## dbl (154): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")
## Rows: 910 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (125): AFFGEOID, TRACTCE, ST, STATE, ST_ABBR, COUNTY, FIPS, LOCATION, E_...
## dbl   (5): STCNTY, AREA_SQMI, F_NOVEH, Shape.STArea(), Shape.STLength()
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# clean 2014 data

PR_SVL14_subset <- PR_SVL14 %>%
select(FIPS,
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES) %>%
rename(
Overall_2014 = RPL_THEMES
) %>%
filter(FIPS != "FIPS", !is.na(FIPS)) %>%
mutate(
FIPS = as.character(FIPS),
Overall_2014 = as.numeric(Overall_2014)
)

# clean 2022 data

PR_SVL22_subset <- PR_SVL22 %>%
select(FIPS,
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES) %>%
rename(
Overall_2022 = RPL_THEMES
) %>%
mutate(
FIPS = format(FIPS, scientific = FALSE, trim = TRUE),
FIPS = as.character(FIPS),
Overall_2022 = as.numeric(Overall_2022)
)

# join by FIPS and keep only rows with both years

comparison <- PR_SVL14_subset %>%
left_join(PR_SVL22_subset, by = "FIPS") %>%
filter(!is.na(Overall_2014), !is.na(Overall_2022))

# calculate change (2022 - 2014)

comparison$Change <- comparison$Overall_2022 - comparison$Overall_2014

# look at the change values

summary(comparison$Change)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -999.8971   -0.1192    0.0074   -2.3635    0.1456    0.7816

3. Plots

3.1 Plot change in vulnerability

ggplot(comparison, aes(x = Change)) +
  geom_histogram(bins = 30, color = "white", fill = "steelblue") +
  xlim(-1, 1) +
  labs(
    title = "Change in Social Vulnerability (2022 - 2014)",
    x = "Change in Vulnerability Score (-1 to +1)",
    y = "Number of Census Tracts"
  ) +
  theme_minimal()
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).


## 3.2 How many tracts got worse vs better

# categorize each tract

``` r
comparison <- comparison %>%
  mutate(Change_Group = case_when(
    Change >  0.05 ~ "Became MORE vulnerable",
    Change < -0.05 ~ "Became LESS vulnerable",
    TRUE           ~ "Stayed about the same"
  ))
  # count how many tracts in each group
change_counts <- comparison %>%
  group_by(Change_Group) %>%
  summarize(n_tracts = n()) %>%
  # order the bars the way we want to show them
  mutate(Change_Group = factor(
    Change_Group,
    levels = c("Became MORE vulnerable",
               "Stayed about the same",
               "Became LESS vulnerable")
  ))

change_counts
## # A tibble: 3 × 2
##   Change_Group           n_tracts
##   <fct>                     <int>
## 1 Became LESS vulnerable      294
## 2 Became MORE vulnerable      344
## 3 Stayed about the same       204
# bar chart
ggplot(change_counts, aes(x = Change_Group, y = n_tracts)) +
  geom_col(fill = "tomato", color = "white") +
  labs(
    title = "How Did Social Vulnerability Change (2014 → 2022)?",
    x = "",
    y = "Number of Census Tracts"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 15, hjust = 1)
  )

3.3 Population density vs vulnerability (2022)

PR_SVL22_subset_density <- PR_SVL22 %>%
  select(
    FIPS,
    AREA_SQMI,
    E_TOTPOP,
    RPL_THEMES
  ) %>%
  rename(
    Overall_2022 = RPL_THEMES
  ) %>%
  filter(!is.na(FIPS)) %>%
  mutate(
    # make sure FIPS isn't scientific notation
    FIPS = format(FIPS, scientific = FALSE, trim = TRUE),
    FIPS = as.character(FIPS),

    AREA_SQMI = as.numeric(AREA_SQMI),
    E_TOTPOP = as.numeric(E_TOTPOP),
    Overall_2022 = as.numeric(Overall_2022),

    Population_Density = E_TOTPOP / AREA_SQMI
  )
comparison <- comparison %>%
  left_join(
    PR_SVL22_subset_density %>%
      select(FIPS, Population_Density),
    by = "FIPS"
  )
  
  comparison_clean <- comparison %>%
  filter(
    is.finite(Population_Density),
    !is.na(Population_Density),
    !is.na(Overall_2022),
    Population_Density >= 0
  )

summary(comparison_clean$Population_Density)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   766.5  2587.4  4830.3  7657.0 36172.9
summary(comparison_clean$Overall_2022)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -999.0000    0.2521    0.5081  -22.0397    0.7557    1.0000
plot_density_vulnerability <- ggplot(comparison_clean,
       aes(x = Population_Density, y = Overall_2022)) +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  coord_cartesian(
    xlim = c(
      0,
      quantile(comparison_clean$Population_Density, 0.99, na.rm = TRUE)
    )
  ) +
  labs(
    title = "Population Density vs. Social Vulnerability (2022)",
    subtitle = "Each point is a census tract in Puerto Rico",
    x = "Population Density (people per square mile)",
    y = "Overall Social Vulnerability in 2022 (0 = low, 1 = high)"
  ) +
  theme_minimal()

plot_density_vulnerability
## `geom_smooth()` using formula = 'y ~ x'