library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at C:/Users/ddtmd/Desktop/2025 Fall/GEOG588/TermProject
library(skimr)
library(mapview)
PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
## Rows: 939 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): STATE, ST_ABBR, COUNTY, LOCATION, GeoLevel, Comparison
## dbl (154): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")
## Rows: 910 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (125): AFFGEOID, TRACTCE, ST, STATE, ST_ABBR, COUNTY, FIPS, LOCATION, E_...
## dbl (5): STCNTY, AREA_SQMI, F_NOVEH, Shape.STArea(), Shape.STLength()
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#2022 data: select a subset of columns
PR_SVL22_subset <- PR_SVL22 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL22_subset)
#rename the columns
library(dplyr)
PR_SVL22_subset <- PR_SVL22 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL22_subset)
## # A tibble: 939 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <dbl> <dbl> <dbl>
## 1 0.888 0.244 0.457
## 2 0.750 0.109 0.522
## 3 0.774 0.145 0.522
## 4 0.887 0.552 0.522
## 5 0.186 0.424 0.0684
## 6 0.643 0.690 0.522
## 7 0.565 0.646 0.0456
## 8 0.306 0.463 0.0163
## 9 0.626 0.887 0.0087
## 10 0.260 0.351 0.0456
## # ℹ 929 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <dbl>,
## # `Overall Summary Ranking` <dbl>
# 2014 Data: select a subset of columns
PR_SVL14_subset <- PR_SVL14 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL14_subset)
# rename the columns
library(dplyr)
PR_SVL14_subset <- PR_SVL14 %>%
select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL14_subset)
## # A tibble: 910 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <chr> <chr> <chr>
## 1 RPL_THEME1 RPL_THEME2 RPL_THEME3
## 2 0.9717 0.7423 0.902
## 3 0.9174 0.1927 0.8976
## 4 0.9842 0.1608 0.8645
## 5 0.9333 0.3932 0.6134
## 6 0.7115 0.9604 0.7313
## 7 0.7692 0.2126 0.8656
## 8 0.6719 0.2456 0.1993
## 9 0.6244 0.4989 0.1663
## 10 0.8778 0.5297 0.5672
## # ℹ 900 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <chr>,
## # `Overall Summary Ranking` <chr>
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
library(dplyr)
# Step 1. Make cleaned subset for 2014
PR_SVL14_subset <- PR_SVL14 %>%
select(
STCNTY, # county FIPS
COUNTY, # county name
LOCATION, # tract description
FIPS, # tract ID
AREA_SQMI, # area (sq mi)
E_TOTPOP, # total population
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 2. Make cleaned subset for 2022
PR_SVL22_subset <- PR_SVL22 %>%
select(
STCNTY,
COUNTY,
LOCATION,
FIPS,
AREA_SQMI,
E_TOTPOP,
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 3. Join 2014 + 2022 using FIPS (tract ID)
PR_SVL14_subset <- PR_SVL14_subset %>%
slice(-1) # Delete first row becuase it's not numeric (2022 is okay)
str(PR_SVL14_subset$`Overall Summary Ranking`)
## chr [1:909] "0.8948" "0.5814" "0.6538" "0.8281" "0.8937" "0.5011" "0.4355" ...
comparison <- PR_SVL14_subset %>%
mutate(FIPS = as.character(FIPS)) %>% # FIPS 문자로
rename(
Overall_2014 = `Overall Summary Ranking`
) %>%
left_join(
PR_SVL22_subset %>%
mutate(FIPS = as.character(FIPS)) %>%
rename(
Overall_2022 = `Overall Summary Ranking`
),
by = "FIPS",
suffix = c("_2014", "_2022")
) %>%
mutate(
# 여기서 진짜 숫자로 바꿔줌 (2014는 꼭 필요, 2022는 안전용)
Overall_2014 = as.numeric(Overall_2014),
Overall_2022 = as.numeric(Overall_2022),
Change = Overall_2022 - Overall_2014,
Change_Direction = case_when(
Change > 0 ~ "Increased (worse / more vulnerable)",
Change < 0 ~ "Decreased (better / less vulnerable)",
TRUE ~ "No change"
)
)
view(comparison)
table(comparison$Change_Direction)
##
## Decreased (better / less vulnerable) Increased (worse / more vulnerable)
## 387 438
## No change
## 84
You can also embed plots, for example:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.69240 -0.12255 0.01160 0.01179 0.15470 0.78160
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE)
# 1. Puerto Rico Tracts 불러오기
pr_tracts <- tracts("PR", year = 2022, cb = TRUE) %>%
st_transform(32620) %>% # UTM zone20N (Puerto Rico)
mutate(FIPS = as.character(GEOID))
# 2. tract + comparison 조인
pr_change_sf <- pr_tracts %>%
left_join(comparison %>% select(FIPS, Change, Change_Direction), by="FIPS")
# 3. Continuous Change Map
ggplot(pr_change_sf) +
geom_sf(aes(fill = Change), color="grey40", size=0.1) +
scale_fill_distiller(palette="RdBu", direction=-1, limits=c(-0.7,0.7)) +
labs(
title="Change in Social Vulnerability (2014→2022)",
fill="Δ SVI"
) +
theme_minimal()
# 4. scatter plot
library(sf)
library(dplyr)
coast_sj <- st_read("C:/Users/ddtmd/Desktop/PR_Data/PR_GSV/data_raw/coast_san_juan.gpkg") %>%
st_transform(32620) # 분석좌표계 맞춤
## Reading layer `coast_san_juan' from data source
## `C:\Users\ddtmd\Desktop\PR_Data\PR_GSV\data_raw\coast_san_juan.gpkg'
## using driver `GPKG'
## Simple feature collection with 37 features and 12 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -66.13921 ymin: 18.42781 xmax: -65.98353 ymax: 18.47198
## Geodetic CRS: WGS 84
#Creating Centroid
tract_centroids <- pr_change_sf %>%
st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
# centroid → coastline 거리 행렬 계산
dist_matrix <- st_distance(tract_centroids, coast_sj)
# 각 tract마다 "가장 가까운 해안까지의 거리"만 뽑기 (미터 단위)
tract_centroids$dist_to_coast_m <- apply(dist_matrix, 1, min)
# 요약 확인
summary(tract_centroids$dist_to_coast_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36.7 7898.4 27693.2 39341.1 66599.1 181953.0
library(ggplot2)
ggplot(tract_centroids, aes(x = dist_to_coast_m, y = Change)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(
title = "SVI Change vs Distance to Coastline (San Juan)",
x = "Distance to Coast (meters)",
y = "Δ SVI (2022 - 2014)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 116 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 116 rows containing missing values or values outside the scale range
## (`geom_point()`).
Result: The scatter plot shows a slightly negative slope: Further from
the coast → ΔSVI decreases (less vulnerable) Closer to the coast → ΔSVI
stays higher or decreases less (vulnerability remains/worsens)
modelA <- lm(Change ~ dist_to_coast_m, data = tract_centroids)
summary(modelA)
##
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = tract_centroids)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71523 -0.13117 -0.00792 0.14806 0.81303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.232e-02 1.219e-02 5.935 4.33e-09 ***
## dist_to_coast_m -1.529e-06 2.316e-07 -6.602 7.30e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2302 on 821 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.05041, Adjusted R-squared: 0.04925
## F-statistic: 43.58 on 1 and 821 DF, p-value: 7.299e-11
# Box Plot
library(dplyr)
library(ggplot2)
tract_centroids %>%
# Convert dist_to_coast_m into categorized/factor distance groupsR
mutate(dist_group = cut(dist_to_coast_m,
breaks=c(0,500,1000,2000,5000,10000, Inf),
labels=c("0-0.5km","0.5-1km","1-2km","2-5km","5-10km",">10km"),
include.lowest = TRUE)) %>%
ggplot(aes(x = dist_group, y = Change, fill = dist_group)) +
geom_boxplot(outlier.color = "red", alpha = 0.7) +
labs(
title = "SVI Change by Distance to Coast (Grouped)",
x = "Distance Group to Coastline",
y = "Δ SVI (2022 - 2014)"
) +
theme_minimal() +
theme(legend.position = "none")
## Warning: Removed 116 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Results: Coastal zones (<500m) show greater SVI change and variability, implying higher development impact, whereas inland areas remain more stable.
#7. Relationship testing code
cor.test(tract_centroids$dist_to_coast_m, tract_centroids$Change)
##
## Pearson's product-moment correlation
##
## data: tract_centroids$dist_to_coast_m and tract_centroids$Change
## t = -6.6015, df = 821, p-value = 7.299e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2884259 -0.1586082
## sample estimates:
## cor
## -0.2245129
Result: Farther from the coast => greater reduction in SVI (less vulnerable over time).
tract_centroids %>%
mutate(dist_group = cut(dist_to_coast_m,
breaks=c(0,500,1000,2000,5000,10000, Inf),
labels=c("0-0.5km","0.5-1km","1-2km","2-5km","5-10km",">10km"),
include.lowest = TRUE)) %>%
group_by(dist_group) %>%
summarise(mean_change = mean(Change, na.rm=TRUE)) %>%
ggplot(aes(x=dist_group, y=mean_change)) +
geom_col(fill="steelblue") +
labs(
title="Average ΔSVI by Distance to Coast",
x="Distance Group",
y="Mean ΔSVI"
) +
theme_minimal()
Results: Areas farther from the coast generally show larger decreases in
SVI, while coastal areas show smaller improvements or even increases in
vulnerability. (The bar chart provides a descriptive visualization of
the average differences across distance groups,)
library(readr)
library(dplyr)
path_places <- "C:/Users/ddtmd/Desktop/PR_Data/PR_GooglePlaceAPI/DATA/sj_places_google.csv"
sj_places_google <- read_csv(path_places)
## Rows: 120 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, address, type
## dbl (4): rating, user_ratings_total, lat, lng
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(sj_places_google)
## Rows: 120
## Columns: 7
## $ name <chr> "La Concha Resort, Puerto Rico, Autograph Collectio…
## $ address <chr> "1077 Ashford Ave, San Juan, 00907, Puerto Rico", "…
## $ rating <dbl> 4.4, 4.1, 4.4, 4.4, 4.5, 4.1, 4.6, 3.9, 4.4, 4.7, 4…
## $ user_ratings_total <dbl> 6214, 2170, 2142, 2100, 7261, 650, 7075, 18, 257, 7…
## $ lat <dbl> 18.45728, 18.44325, 18.45555, 18.45102, 18.45416, 1…
## $ lng <dbl> -66.07343, -66.02300, -66.08822, -66.06438, -66.090…
## $ type <chr> "hotel", "hotel", "hotel", "hotel", "hotel", "hotel…
#Convert to sf spatial object
sj_places_sf <- sj_places_google %>%
st_as_sf(coords = c("lng", "lat"), crs = 4326) %>% # WGS84
st_transform(32620) # PR UTM Zone 20N\
mapview(sj_places_sf)
##Calculate hotel count & density per census tract
# Add area (km²) for density calculation
pr_change_sf$area_km2 <- as.numeric(st_area(pr_change_sf)) / 1e6
# Spatial join: assign each hotel to a census tract
hotel_join <- st_join(pr_change_sf, sj_places_sf, join = st_contains)
# Count hotels & compute density
hotel_density <- hotel_join %>%
add_count(FIPS, name="hotel_count") %>%
distinct(FIPS, .keep_all = TRUE) %>%
mutate(hotel_density = hotel_count / area_km2)
View(hotel_density)
# count per tract
hotel_density <- hotel_join %>%
add_count(FIPS, name="hotel_count") %>%
distinct(FIPS, .keep_all = TRUE) %>%
mutate(hotel_density = hotel_count / area_km2)
View(hotel_density)
For now, I will use dist_to_coast_m (distance to the coast) as a proxy for beach access, and leave Outcome as Change (SVI 2022–2014) as planned.
# 1. organize the data for mediation (Hotel_density, dist_to_coast_m, and Change should all be in a single data.frame).
library(dplyr)
# 1) Centroids: SVI Change + distance to coast
centroids_df <- tract_centroids %>%
st_drop_geometry() %>%
select(FIPS, Change, dist_to_coast_m)
# 2) Hotel density: hotel_density by tract
hotel_df <- hotel_density %>%
st_drop_geometry() %>%
select(FIPS, hotel_density)
# 3) Join for mediation analysis
med_data <- centroids_df %>%
left_join(hotel_df, by = "FIPS") %>%
filter(!is.na(Change),
!is.na(dist_to_coast_m),
!is.na(hotel_density))
str(med_data)
## 'data.frame': 823 obs. of 4 variables:
## $ FIPS : chr "72005400900" "72025201000" "72025201500" "72025201300" ...
## $ Change : num 0.0703 0.0201 0.1968 0.2645 0.0553 ...
## $ dist_to_coast_m: num 107042 22774 23801 22792 10326 ...
## $ hotel_density : num 0.876 0.827 1.684 1.321 0.263 ...
summary(med_data[, c("Change", "dist_to_coast_m", "hotel_density")])
## Change dist_to_coast_m hotel_density
## Min. :-0.69240 Min. : 36.7 Min. : 0.01164
## 1st Qu.:-0.12255 1st Qu.: 8751.2 1st Qu.: 0.07705
## Median : 0.01160 Median : 29270.0 Median : 0.29396
## Mean : 0.01179 Mean : 39590.8 Mean : 0.99357
## 3rd Qu.: 0.15470 3rd Qu.: 66748.5 3rd Qu.: 1.08898
## Max. : 0.78160 Max. :118821.8 Max. :49.79539
med_data= mediation analysis data
Change = SVI 변화량 (SVI change)dist_to_coast_m = 해안까지 거리 (beach access
proxy)hotel_density = 호텔 밀도 (hotel density)먼저 호텔 밀도와 SVI 변화의 직접 관계만 본 모델: Relationship between Hotel density vs Change SVI
# Total effect: Hotel density → Change
m_total <- lm(Change ~ hotel_density, data = med_data)
summary(m_total)
##
## Call:
## lm(formula = Change ~ hotel_density, data = med_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6949 -0.1344 -0.0011 0.1473 0.7686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002120 0.008651 0.245 0.80651
## hotel_density 0.009735 0.002840 3.427 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2346 on 821 degrees of freedom
## Multiple R-squared: 0.01411, Adjusted R-squared: 0.01291
## F-statistic: 11.75 on 1 and 821 DF, p-value: 0.0006397
Interpretation: Higher hotel density = Increased SVI The total effect model showed a significant positive association between hotel density and changes in the Social Vulnerability Index. 호텔 밀도는 사회적 취약성 증가에 직접적으로 영향을 미침침
m_full <- lm(Change ~ hotel_density + dist_to_coast_m, data = med_data)
summary(m_full)
##
## Call:
## lm(formula = Change ~ hotel_density + dist_to_coast_m, data = med_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70700 -0.13123 -0.00836 0.14889 0.80919
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.214e-02 1.314e-02 4.727 2.68e-06 ***
## hotel_density 5.836e-03 2.858e-03 2.042 0.0415 *
## dist_to_coast_m -1.418e-06 2.374e-07 -5.973 3.47e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2298 on 820 degrees of freedom
## Multiple R-squared: 0.05521, Adjusted R-squared: 0.05291
## F-statistic: 23.96 on 2 and 820 DF, p-value: 7.714e-11
Interpretation: Higher hotel density and poorer coastal accessibility are both associated with increases in social vulnerability.(호텔이 많아질수록 → 해안 접근이 어려워지고 → 그 결과 SVI가 증가)
But partial meditation: beacuase in total effect (m_total), hotel density coefficient:0.009735 (siginificance high), while full mediation model (m_full) (hotel density coefficient: 5.836e-03 (0.0058) (low significance) so the coefficient is reduced which means, => Hotel density increases social vulnerability partly through reduced beach access, but this pathway alone does not fully explain the relationship; other mechanisms, such as housing costs, land use change, tourism labor structures, and rising living expenses, also play a role (Korean note:호텔 밀도는 해변 접근성을 통해서도 SVI를 증가시키지만, 그것만으로 설명되지는 않는다.”즉, 일부 영향 → 접근성 경로주거비,토지 이용,관광 노동 구조,생활비 상승 등)
In other words, Social vulnerability increases in hotel-dense areas not only because beach access is constrained, but also due to other factors, which is why the mediation is partial.
=> Beach access partially mediates the relationship between hotel density and changes in social vulnerability.