library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at C:/Users/ddtmd/Desktop/2025 Fall/GEOG588/TermProject
library(skimr)
library(mapview)
PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
## Rows: 939 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): STATE, ST_ABBR, COUNTY, LOCATION, GeoLevel, Comparison
## dbl (154): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")
## Rows: 910 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (125): AFFGEOID, TRACTCE, ST, STATE, ST_ABBR, COUNTY, FIPS, LOCATION, E_...
## dbl (5): STCNTY, AREA_SQMI, F_NOVEH, Shape.STArea(), Shape.STLength()
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#2022 data: select a subset of columns
PR_SVL22_subset <- PR_SVL22 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL22_subset)
#rename the columns
library(dplyr)
PR_SVL22_subset <- PR_SVL22 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL22_subset)
## # A tibble: 939 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <dbl> <dbl> <dbl>
## 1 0.888 0.244 0.457
## 2 0.750 0.109 0.522
## 3 0.774 0.145 0.522
## 4 0.887 0.552 0.522
## 5 0.186 0.424 0.0684
## 6 0.643 0.690 0.522
## 7 0.565 0.646 0.0456
## 8 0.306 0.463 0.0163
## 9 0.626 0.887 0.0087
## 10 0.260 0.351 0.0456
## # ℹ 929 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <dbl>,
## # `Overall Summary Ranking` <dbl>
# 2014 Data: select a subset of columns
PR_SVL14_subset <- PR_SVL14 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL14_subset)
# rename the columns
library(dplyr)
PR_SVL14_subset <- PR_SVL14 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL14_subset)
## # A tibble: 910 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <chr> <chr> <chr>
## 1 RPL_THEME1 RPL_THEME2 RPL_THEME3
## 2 0.9717 0.7423 0.902
## 3 0.9174 0.1927 0.8976
## 4 0.9842 0.1608 0.8645
## 5 0.9333 0.3932 0.6134
## 6 0.7115 0.9604 0.7313
## 7 0.7692 0.2126 0.8656
## 8 0.6719 0.2456 0.1993
## 9 0.6244 0.4989 0.1663
## 10 0.8778 0.5297 0.5672
## # ℹ 900 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <chr>,
## # `Overall Summary Ranking` <chr>
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
library(dplyr)
# Step 1. Make cleaned subset for 2014
PR_SVL14_subset <- PR_SVL14 %>%
select(
STCNTY, # county FIPS
COUNTY, # county name
LOCATION, # tract description
FIPS, # tract ID
AREA_SQMI, # area (sq mi)
E_TOTPOP, # total population
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 2. Make cleaned subset for 2022
PR_SVL22_subset <- PR_SVL22 %>%
select(
STCNTY,
COUNTY,
LOCATION,
FIPS,
AREA_SQMI,
E_TOTPOP,
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 3. Join 2014 + 2022 using FIPS (tract ID)
PR_SVL14_subset <- PR_SVL14_subset %>%
slice(-1) # Delete first row becuase it's not numeric (2022 is okay)
str(PR_SVL14_subset$`Overall Summary Ranking`)
## chr [1:909] "0.8948" "0.5814" "0.6538" "0.8281" "0.8937" "0.5011" "0.4355" ...
comparison <- PR_SVL14_subset %>%
mutate(FIPS = as.character(FIPS)) %>% # FIPS 문자로
rename(
Overall_2014 = `Overall Summary Ranking`
) %>%
left_join(
PR_SVL22_subset %>%
mutate(FIPS = as.character(FIPS)) %>%
rename(
Overall_2022 = `Overall Summary Ranking`
),
by = "FIPS",
suffix = c("_2014", "_2022")
) %>%
mutate(
# 여기서 진짜 숫자로 바꿔줌 (2014는 꼭 필요, 2022는 안전용)
Overall_2014 = as.numeric(Overall_2014),
Overall_2022 = as.numeric(Overall_2022),
Change = Overall_2022 - Overall_2014,
Change_Direction = case_when(
Change > 0 ~ "Increased (worse / more vulnerable)",
Change < 0 ~ "Decreased (better / less vulnerable)",
TRUE ~ "No change"
)
)
view(comparison)
table(comparison$Change_Direction)
##
## Decreased (better / less vulnerable) Increased (worse / more vulnerable)
## 387 438
## No change
## 84
You can also embed plots, for example:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.69240 -0.12255 0.01160 0.01179 0.15470 0.78160
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
##Mapping
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE)
# 1. Puerto Rico Tracts 불러오기
pr_tracts <- tracts("PR", year = 2022, cb = TRUE) %>%
st_transform(32620) %>% # UTM zone20N (Puerto Rico)
mutate(FIPS = as.character(GEOID))
# 2. tract + comparison 조인
pr_change_sf <- pr_tracts %>%
left_join(comparison %>% select(FIPS, Change, Change_Direction), by="FIPS")
# 3. Continuous Change Map
ggplot(pr_change_sf) +
geom_sf(aes(fill = Change), color="grey40", size=0.1) +
scale_fill_distiller(palette="RdBu", direction=-1, limits=c(-0.7,0.7)) +
labs(
title="Change in Social Vulnerability (2014→2022)",
fill="Δ SVI"
) +
theme_minimal()
##
library(sf)
library(dplyr)
coast_sj <- st_read("C:/Users/ddtmd/Desktop/PR_Data/PR_GSV/data_raw/coast_san_juan.gpkg") %>%
st_transform(32620) # 분석좌표계 맞춤
## Reading layer `coast_san_juan' from data source
## `C:\Users\ddtmd\Desktop\PR_Data\PR_GSV\data_raw\coast_san_juan.gpkg'
## using driver `GPKG'
## Simple feature collection with 37 features and 12 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -66.13921 ymin: 18.42781 xmax: -65.98353 ymax: 18.47198
## Geodetic CRS: WGS 84
#Creating Centroid
tract_centroids <- pr_change_sf %>%
st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
# centroid → coastline 거리 행렬 계산
dist_matrix <- st_distance(tract_centroids, coast_sj)
# 각 tract마다 "가장 가까운 해안까지의 거리"만 뽑기 (미터 단위)
tract_centroids$dist_to_coast_m <- apply(dist_matrix, 1, min)
# 요약 확인
summary(tract_centroids$dist_to_coast_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36.7 7898.4 27693.2 39341.1 66599.1 181953.0
library(ggplot2)
ggplot(tract_centroids, aes(x = dist_to_coast_m, y = Change)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(
title = "SVI Change vs Distance to Coastline (San Juan)",
x = "Distance to Coast (meters)",
y = "Δ SVI (2022 - 2014)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 116 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 116 rows containing missing values or values outside the scale range
## (`geom_point()`).
Result: The scatter plot shows a slightly negative slope: Further from the coast → ΔSVI decreases (less vulnerable) Closer to the coast → ΔSVI stays higher or decreases less (vulnerability remains/worsens)
model <- lm(Change ~ dist_to_coast_m, data = tract_centroids)
summary(model)
##
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = tract_centroids)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71523 -0.13117 -0.00792 0.14806 0.81303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.232e-02 1.219e-02 5.935 4.33e-09 ***
## dist_to_coast_m -1.529e-06 2.316e-07 -6.602 7.30e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2302 on 821 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.05041, Adjusted R-squared: 0.04925
## F-statistic: 43.58 on 1 and 821 DF, p-value: 7.299e-11
# Box Plot
library(dplyr)
library(ggplot2)
tract_centroids %>%
# Convert dist_to_coast_m into categorized/factor distance groupsR
mutate(dist_group = cut(dist_to_coast_m,
breaks=c(0,500,1000,2000,5000,10000, Inf),
labels=c("0-0.5km","0.5-1km","1-2km","2-5km","5-10km",">10km"),
include.lowest = TRUE)) %>%
ggplot(aes(x = dist_group, y = Change, fill = dist_group)) +
geom_boxplot(outlier.color = "red", alpha = 0.7) +
labs(
title = "SVI Change by Distance to Coast (Grouped)",
x = "Distance Group to Coastline",
y = "Δ SVI (2022 - 2014)"
) +
theme_minimal() +
theme(legend.position = "none")
## Warning: Removed 116 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Results: Coastal zones (<500m) show greater SVI change and variability, implying higher development impact, whereas inland areas remain more stable.
cor.test(tract_centroids$dist_to_coast_m, tract_centroids$Change)
##
## Pearson's product-moment correlation
##
## data: tract_centroids$dist_to_coast_m and tract_centroids$Change
## t = -6.6015, df = 821, p-value = 7.299e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2884259 -0.1586082
## sample estimates:
## cor
## -0.2245129
Result: Farther from the coast => greater reduction in SVI (less vulnerable over time).
tract_centroids %>%
mutate(dist_group = cut(dist_to_coast_m,
breaks=c(0,500,1000,2000,5000,10000, Inf),
labels=c("0-0.5km","0.5-1km","1-2km","2-5km","5-10km",">10km"),
include.lowest = TRUE)) %>%
group_by(dist_group) %>%
summarise(mean_change = mean(Change, na.rm=TRUE)) %>%
ggplot(aes(x=dist_group, y=mean_change)) +
geom_col(fill="steelblue") +
labs(
title="Average ΔSVI by Distance to Coast",
x="Distance Group",
y="Mean ΔSVI"
) +
theme_minimal()
Results: Areas farther from the coast generally show larger decreases in
SVI, while coastal areas show smaller improvements or even increases in
vulnerability. (The bar chart provides a descriptive visualization of
the average differences across distance groups,)
#SVI ~ Coast Distance regression model + Interpretation
model <- lm(Change ~ dist_to_coast_m, data = tract_centroids)
summary(model)
##
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = tract_centroids)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71523 -0.13117 -0.00792 0.14806 0.81303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.232e-02 1.219e-02 5.935 4.33e-09 ***
## dist_to_coast_m -1.529e-06 2.316e-07 -6.602 7.30e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2302 on 821 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.05041, Adjusted R-squared: 0.04925
## F-statistic: 43.58 on 1 and 821 DF, p-value: 7.299e-11
Result: A simple OLS regression shows a significant negative relationship between distance to the coast and SVI change (β = -1.53e-06, p < .001). Inland census tracts experienced greater reductions in vulnerability over time, while coastal areas tended to remain more vulnerable. (the OLS regression serves as an inferential analysis to test whether distance to the coast predicts changes in SVI.)
library(readr)
library(dplyr)
path_places <- "C:/Users/ddtmd/Desktop/PR_Data/PR_GooglePlaceAPI/DATA/sj_places_google.csv"
sj_places_google <- read_csv(path_places)
## Rows: 120 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, address, type
## dbl (4): rating, user_ratings_total, lat, lng
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(sj_places_google)
## Rows: 120
## Columns: 7
## $ name <chr> "La Concha Resort, Puerto Rico, Autograph Collectio…
## $ address <chr> "1077 Ashford Ave, San Juan, 00907, Puerto Rico", "…
## $ rating <dbl> 4.4, 4.1, 4.4, 4.4, 4.5, 4.1, 4.6, 3.9, 4.4, 4.7, 4…
## $ user_ratings_total <dbl> 6214, 2170, 2142, 2100, 7261, 650, 7075, 18, 257, 7…
## $ lat <dbl> 18.45728, 18.44325, 18.45555, 18.45102, 18.45416, 1…
## $ lng <dbl> -66.07343, -66.02300, -66.08822, -66.06438, -66.090…
## $ type <chr> "hotel", "hotel", "hotel", "hotel", "hotel", "hotel…
#Convert to sf spatial object
sj_places_sf <- sj_places_google %>%
st_as_sf(coords = c("lng", "lat"), crs = 4326) %>% # WGS84
st_transform(32620) # PR UTM Zone 20N\
mapview(sj_places_sf)
##Calculate hotel count & density per census tract
# Add area (km²) for density calculation
pr_change_sf$area_km2 <- as.numeric(st_area(pr_change_sf)) / 1e6
# Spatial join: assign each hotel to a census tract
hotel_join <- st_join(pr_change_sf, sj_places_sf, join = st_contains)
# Count hotels & compute density
hotel_density <- hotel_join %>%
add_count(FIPS, name="hotel_count") %>%
distinct(FIPS, .keep_all = TRUE) %>%
mutate(hotel_density = hotel_count / area_km2)
View(hotel_density)
# count per tract
hotel_density <- hotel_join %>%
add_count(FIPS, name="hotel_count") %>%
distinct(FIPS, .keep_all = TRUE) %>%
mutate(hotel_density = hotel_count / area_km2)
View(hotel_density)
For now, I will use dist_to_coast_m (distance to the coast) as a proxy for beach access, and leave Outcome as Change (SVI 2022–2014) as planned.
# 1. organize the data for mediation (Hotel_density, dist_to_coast_m, and Change should all be in a single data.frame).
library(dplyr)
# 1) Centroids: SVI Change + distance to coast
centroids_df <- tract_centroids %>%
st_drop_geometry() %>%
select(FIPS, Change, dist_to_coast_m)
# 2) Hotel density: hotel_density by tract
hotel_df <- hotel_density %>%
st_drop_geometry() %>%
select(FIPS, hotel_density)
# 3) Join for mediation analysis
med_data <- centroids_df %>%
left_join(hotel_df, by = "FIPS") %>%
filter(!is.na(Change),
!is.na(dist_to_coast_m),
!is.na(hotel_density))
str(med_data)
## 'data.frame': 823 obs. of 4 variables:
## $ FIPS : chr "72005400900" "72025201000" "72025201500" "72025201300" ...
## $ Change : num 0.0703 0.0201 0.1968 0.2645 0.0553 ...
## $ dist_to_coast_m: num 107042 22774 23801 22792 10326 ...
## $ hotel_density : num 0.876 0.827 1.684 1.321 0.263 ...
summary(med_data[, c("Change", "dist_to_coast_m", "hotel_density")])
## Change dist_to_coast_m hotel_density
## Min. :-0.69240 Min. : 36.7 Min. : 0.01164
## 1st Qu.:-0.12255 1st Qu.: 8751.2 1st Qu.: 0.07705
## Median : 0.01160 Median : 29270.0 Median : 0.29396
## Mean : 0.01179 Mean : 39590.8 Mean : 0.99357
## 3rd Qu.: 0.15470 3rd Qu.: 66748.5 3rd Qu.: 1.08898
## Max. : 0.78160 Max. :118821.8 Max. :49.79539
#Code interpretation: Korean note:
- med_data는 mediation 분석에 사용할 최종
테이블
- 각 트랙트별로
- Change = SVI 변화량
- dist_to_coast_m = 해안까지 거리 (beach access
proxy)
- hotel_density = 호텔 밀도
먼저 호텔 밀도와 SVI 변화의 직접 관계만 본 모델:
# Total effect: Hotel density → Change
m_total <- lm(Change ~ hotel_density, data = med_data)
summary(m_total)
##
## Call:
## lm(formula = Change ~ hotel_density, data = med_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6949 -0.1344 -0.0011 0.1473 0.7686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002120 0.008651 0.245 0.80651
## hotel_density 0.009735 0.002840 3.427 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2346 on 821 degrees of freedom
## Multiple R-squared: 0.01411, Adjusted R-squared: 0.01291
## F-statistic: 11.75 on 1 and 821 DF, p-value: 0.0006397
Interpretation: HIgher hotel density = Increased SVI
library(sf)
library(dplyr)
library(sfnetworks)
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tigris':
##
## blocks
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
# 1) 도로 + 해안선 불러오기 ----
path_roads <- "C:/Users/ddtmd/Desktop/PR_Data/PR_GSV/data_raw/roads_san_juan.gpkg"
path_coast <- "C:/Users/ddtmd/Desktop/PR_Data/PR_GSV/data_raw/coast_san_juan.gpkg"
roads_sj <- st_read(path_roads, quiet = TRUE) %>%
st_transform(32620) %>%
st_cast("LINESTRING") # ★ 도로를 전부 LINESTRING으로 강제
coast_sj <- st_read(path_coast, quiet = TRUE) %>%
st_transform(32620)
# 2) 해안선 2km 버퍼 ----
buf2km <- st_buffer(coast_sj, 2000) # 2000m
# 3) 버퍼 안에 들어오는 도로만 필터링 (geometry 안 바꿈) ----
roads_sj_coast2km <- roads_sj %>%
st_filter(buf2km, .predicate = st_intersects)
# 타입 확인 (LINESTRING만 나와야 함)
print(table(st_geometry_type(roads_sj_coast2km)))
##
## GEOMETRY POINT LINESTRING POLYGON
## 0 0 7403 0
## MULTIPOINT MULTILINESTRING MULTIPOLYGON GEOMETRYCOLLECTION
## 0 0 0 0
## CIRCULARSTRING COMPOUNDCURVE CURVEPOLYGON MULTICURVE
## 0 0 0 0
## MULTISURFACE CURVE SURFACE POLYHEDRALSURFACE
## 0 0 0 0
## TIN TRIANGLE
## 0 0
# 4) sfnetwork 만들기 ----
roads_net <- as_sfnetwork(roads_sj_coast2km, directed = FALSE) %>%
activate("edges") %>%
mutate(weight = st_length(st_geometry(.)))
# 5) node 추출 ----
nodes <- roads_net %>%
activate("nodes") %>%
st_as_sf()
library(sf)
# 1) Beach access 경로 (네 폴더 구조에 맞춰서)
path_access <- "C:/Users/ddtmd/Desktop/PR_Data/PR_GSV/data_raw/poly_tourism_san_juan_point.gpkg"
# 2) 읽고 좌표계 32620으로 변환
access_points_2014 <- st_read(path_access) %>%
st_transform(32620)
## Reading layer `poly_tourism_san_juan_point' from data source
## `C:\Users\ddtmd\Desktop\PR_Data\PR_GSV\data_raw\poly_tourism_san_juan_point.gpkg'
## using driver `GPKG'
## Simple feature collection with 30 features and 43 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -66.12405 ymin: 18.40322 xmax: -66.01285 ymax: 18.46642
## Geodetic CRS: WGS 84
``` r
library(sf)
library(dplyr)
library(sfnetworks)
library(igraph)
#--------------------------------------------------
# 1. Create tract centroids (from pr_change_sf)
#--------------------------------------------------
# pr_change_sf: tract polygons + Change, Overall_20xx, FIPS, etc.
# Make sure CRS is 32620 (same as roads & access)
pr_change_sf <- st_transform(pr_change_sf, 32620)
tract_centroids <- pr_change_sf %>%
filter(!is.na(Change)) %>% # keep tracts with defined SVI change (if needed)
st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
#--------------------------------------------------
# 3. Snap beach access points & centroids to nearest road nodes
#--------------------------------------------------
# access_points_2014: POINT layer of beach access locations
access_points_2014 <- st_transform(access_points_2014, 32620)
# nearest network node index for each beach access point
access_idx <- st_nearest_feature(access_points_2014, nodes)
# unique node indices that represent beach access targets
unique_access_idx <- unique(access_idx)
# nearest network node index for each tract centroid
centroid_idx <- st_nearest_feature(tract_centroids, nodes)
#--------------------------------------------------
# 4. Compute shortest path distance on the road network
#--------------------------------------------------
# convert sfnetwork to igraph
g <- roads_net %>%
activate("edges") %>%
as.igraph()
# distance matrix: (tract centroids) x (access nodes)
dist_mat <- igraph::distances(
g,
v = centroid_idx, # from: nearest node to each tract centroid
to = unique_access_idx, # to: set of access nodes
weights = igraph::E(g)$weight
)
# for each centroid (row), get minimum distance to any access node
min_dist_m <- apply(dist_mat, 1, min, na.rm = TRUE)
# attach to tract_centroids
tract_centroids$dist_access_net_m <- as.numeric(min_dist_m)
# Replace Inf with NA
tract_centroids$dist_access_net_m[is.infinite(tract_centroids$dist_access_net_m)] <- NA
summary(tract_centroids$dist_access_net_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 4960 5506 5961 6972 8234 539
analysis_df <- tract_centroids %>%
st_drop_geometry() %>%
filter(!is.na(dist_access_net_m), !is.na(Change))
model_access_change <- lm(Change ~ dist_access_net_m, data = analysis_df)
summary(model_access_change)
##
## Call:
## lm(formula = Change ~ dist_access_net_m, data = analysis_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66052 -0.11910 -0.01044 0.13022 0.70688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.054e-01 4.793e-02 2.199 0.0287 *
## dist_access_net_m -1.274e-05 7.719e-06 -1.650 0.1000 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2259 on 282 degrees of freedom
## Multiple R-squared: 0.009566, Adjusted R-squared: 0.006054
## F-statistic: 2.724 on 1 and 282 DF, p-value: 0.09998
Interpretation: Beach Access (network distance) → Decreased SVI Change (But low explanatory) (Greater network distance to beach access is associated with slightly larger decreases in social vulnerability over time, but the effect is weak and explains only a small portion of the variance.)