library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI
library(skimr)
library(mapview)
PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
## Rows: 939 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): STATE, ST_ABBR, COUNTY, LOCATION, GeoLevel, Comparison
## dbl (154): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")
## Rows: 910 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (125): AFFGEOID, TRACTCE, ST, STATE, ST_ABBR, COUNTY, FIPS, LOCATION, E_...
## dbl (5): STCNTY, AREA_SQMI, F_NOVEH, Shape.STArea(), Shape.STLength()
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#2022 data: select a subset of columns
PR_SVL22_subset <- PR_SVL22 %>%
dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL22_subset)
#rename the columns
library(dplyr)
PR_SVL22_subset <- PR_SVL22 %>%
dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL22_subset)
## # A tibble: 939 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <dbl> <dbl> <dbl>
## 1 0.888 0.244 0.457
## 2 0.750 0.109 0.522
## 3 0.774 0.145 0.522
## 4 0.887 0.552 0.522
## 5 0.186 0.424 0.0684
## 6 0.643 0.690 0.522
## 7 0.565 0.646 0.0456
## 8 0.306 0.463 0.0163
## 9 0.626 0.887 0.0087
## 10 0.260 0.351 0.0456
## # ℹ 929 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <dbl>,
## # `Overall Summary Ranking` <dbl>
# 2014 Data: select a subset of columns
PR_SVL14_subset <- PR_SVL14 %>%
dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)
view(PR_SVL14_subset)
# rename the columns
library(dplyr)
PR_SVL14_subset <- PR_SVL14 %>%
dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
print(PR_SVL14_subset)
## # A tibble: 910 × 5
## `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
## <chr> <chr> <chr>
## 1 RPL_THEME1 RPL_THEME2 RPL_THEME3
## 2 0.9717 0.7423 0.902
## 3 0.9174 0.1927 0.8976
## 4 0.9842 0.1608 0.8645
## 5 0.9333 0.3932 0.6134
## 6 0.7115 0.9604 0.7313
## 7 0.7692 0.2126 0.8656
## 8 0.6719 0.2456 0.1993
## 9 0.6244 0.4989 0.1663
## 10 0.8778 0.5297 0.5672
## # ℹ 900 more rows
## # ℹ abbreviated name: ¹`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <chr>,
## # `Overall Summary Ranking` <chr>
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
# Make sure both datasets have the key column
names(PR_SVL14)
## [1] "AFFGEOID" "TRACTCE" "ST"
## [4] "STATE" "ST_ABBR" "STCNTY"
## [7] "COUNTY" "FIPS" "LOCATION"
## [10] "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [13] "E_HU" "M_HU" "E_HH"
## [16] "M_HH" "E_POV" "M_POV"
## [19] "E_UNEMP" "M_UNEMP" "E_PCI"
## [22] "M_PCI" "E_NOHSDP" "M_NOHSDP"
## [25] "E_AGE65" "M_AGE65" "E_AGE17"
## [28] "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_MINRTY"
## [34] "M_MINRTY" "E_LIMENG" "M_LIMENG"
## [37] "E_MUNIT" "M_MUNIT" "E_MOBILE"
## [40] "M_MOBILE" "E_CROWD" "M_CROWD"
## [43] "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV" "MP_POV"
## [49] "EP_UNEMP" "MP_UNEMP" "EP_PCI"
## [52] "MP_PCI" "EP_NOHSDP" "MP_NOHSDP"
## [55] "EP_AGE65" "MP_AGE65" "EP_AGE17"
## [58] "MP_AGE17" "EP_DISABL" "MP_DISABL"
## [61] "EP_SNGPNT" "MP_SNGPNT" "EP_MINRTY"
## [64] "MP_MINRTY" "EP_LIMENG" "MP_LIMENG"
## [67] "EP_MUNIT" "MP_MUNIT" "EP_MOBILE"
## [70] "MP_MOBILE" "EP_CROWD" "MP_CROWD"
## [73] "EP_NOVEH" "MP_NOVEH" "EP_GROUPQ"
## [76] "MP_GROUPQ" "EPL_POV" "EPL_UNEMP"
## [79] "EPL_PCI" "EPL_NOHSDP" "SPL_THEME1"
## [82] "RPL_THEME1" "EPL_AGE65" "EPL_AGE17"
## [85] "EPL_DISABL" "EPL_SNGPNT" "SPL_THEME2"
## [88] "RPL_THEME2" "EPL_MINRTY" "EPL_LIMENG"
## [91] "SPL_THEME3" "RPL_THEME3" "EPL_MUNIT"
## [94] "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH"
## [97] "EPL_GROUPQ" "SPL_THEME4" "RPL_THEME4"
## [100] "SPL_THEMES" "RPL_THEMES" "F_POV"
## [103] "F_UNEMP" "F_PCI" "F_NOHSDP"
## [106] "F_THEME1" "F_AGE65" "F_AGE17"
## [109] "F_DISABL" "F_SNGPNT" "F_THEME2"
## [112] "F_MINRTY" "F_LIMENG" "F_THEME3"
## [115] "F_MUNIT" "F_MOBILE" "F_CROWD"
## [118] "F_NOVEH" "F_GROUPQ" "F_THEME4"
## [121] "F_TOTAL" "E_UNINSUR" "M_UNINSUR"
## [124] "EP_UNINSUR" "MP_UNINSUR" "Shape"
## [127] "Shape.STArea()" "Shape.STLength()" "GeoLevel"
## [130] "Comparison"
names(PR_SVL22)
## [1] "ST" "STATE" "ST_ABBR" "STCNTY" "COUNTY"
## [6] "FIPS" "LOCATION" "AREA_SQMI" "E_TOTPOP" "M_TOTPOP"
## [11] "E_HU" "M_HU" "E_HH" "M_HH" "E_POV150"
## [16] "M_POV150" "E_UNEMP" "M_UNEMP" "E_HBURD" "M_HBURD"
## [21] "E_NOHSDP" "M_NOHSDP" "E_UNINSUR" "M_UNINSUR" "E_AGE65"
## [26] "M_AGE65" "E_AGE17" "M_AGE17" "E_DISABL" "M_DISABL"
## [31] "E_SNGPNT" "M_SNGPNT" "E_LIMENG" "M_LIMENG" "E_MINRTY"
## [36] "M_MINRTY" "E_MUNIT" "M_MUNIT" "E_MOBILE" "M_MOBILE"
## [41] "E_CROWD" "M_CROWD" "E_NOVEH" "M_NOVEH" "E_GROUPQ"
## [46] "M_GROUPQ" "EP_POV150" "MP_POV150" "EP_UNEMP" "MP_UNEMP"
## [51] "EP_HBURD" "MP_HBURD" "EP_NOHSDP" "MP_NOHSDP" "EP_UNINSUR"
## [56] "MP_UNINSUR" "EP_AGE65" "MP_AGE65" "EP_AGE17" "MP_AGE17"
## [61] "EP_DISABL" "MP_DISABL" "EP_SNGPNT" "MP_SNGPNT" "EP_LIMENG"
## [66] "MP_LIMENG" "EP_MINRTY" "MP_MINRTY" "EP_MUNIT" "MP_MUNIT"
## [71] "EP_MOBILE" "MP_MOBILE" "EP_CROWD" "MP_CROWD" "EP_NOVEH"
## [76] "MP_NOVEH" "EP_GROUPQ" "MP_GROUPQ" "EPL_POV150" "EPL_UNEMP"
## [81] "EPL_HBURD" "EPL_NOHSDP" "EPL_UNINSUR" "SPL_THEME1" "RPL_THEME1"
## [86] "EPL_AGE65" "EPL_AGE17" "EPL_DISABL" "EPL_SNGPNT" "EPL_LIMENG"
## [91] "SPL_THEME2" "RPL_THEME2" "EPL_MINRTY" "SPL_THEME3" "RPL_THEME3"
## [96] "EPL_MUNIT" "EPL_MOBILE" "EPL_CROWD" "EPL_NOVEH" "EPL_GROUPQ"
## [101] "SPL_THEME4" "RPL_THEME4" "SPL_THEMES" "RPL_THEMES" "F_POV150"
## [106] "F_UNEMP" "F_HBURD" "F_NOHSDP" "F_UNINSUR" "F_THEME1"
## [111] "F_AGE65" "F_AGE17" "F_DISABL" "F_SNGPNT" "F_LIMENG"
## [116] "F_THEME2" "F_MINRTY" "F_THEME3" "F_MUNIT" "F_MOBILE"
## [121] "F_CROWD" "F_NOVEH" "F_GROUPQ" "F_THEME4" "F_TOTAL"
## [126] "E_DAYPOP" "E_NOINT" "M_NOINT" "E_AFAM" "M_AFAM"
## [131] "E_HISP" "M_HISP" "E_ASIAN" "M_ASIAN" "E_AIAN"
## [136] "M_AIAN" "E_NHPI" "M_NHPI" "E_TWOMORE" "M_TWOMORE"
## [141] "E_OTHERRACE" "M_OTHERRACE" "EP_NOINT" "MP_NOINT" "EP_AFAM"
## [146] "MP_AFAM" "EP_HISP" "MP_HISP" "EP_ASIAN" "MP_ASIAN"
## [151] "EP_AIAN" "MP_AIAN" "EP_NHPI" "MP_NHPI" "EP_TWOMORE"
## [156] "MP_TWOMORE" "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel" "Comparison"
library(dplyr)
# Step 1. Make cleaned subset for 2014
PR_SVL14_subset <- PR_SVL14 %>%
dplyr::select(
STCNTY, # county FIPS
COUNTY, # county name
LOCATION, # tract description
FIPS, # tract ID
AREA_SQMI, # area (sq mi)
E_TOTPOP, # total population
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 2. Make cleaned subset for 2022
PR_SVL22_subset <- PR_SVL22 %>%
dplyr::select(
STCNTY,
COUNTY,
LOCATION,
FIPS,
AREA_SQMI,
E_TOTPOP,
RPL_THEME1,
RPL_THEME2,
RPL_THEME3,
RPL_THEME4,
RPL_THEMES
) %>%
rename(
`Socioeconomic Status` = RPL_THEME1,
`Household Characteristics` = RPL_THEME2,
`Racial & Ethnic Minority Status` = RPL_THEME3,
`Housing Type & Transportation` = RPL_THEME4,
`Overall Summary Ranking` = RPL_THEMES
)
# Step 3. Join 2014 + 2022 using FIPS (tract ID)
PR_SVL14_subset <- PR_SVL14_subset %>%
slice(-1) # Delete first row becuase it's not numeric (2022 is okay)
str(PR_SVL14_subset$`Overall Summary Ranking`)
## chr [1:909] "0.8948" "0.5814" "0.6538" "0.8281" "0.8937" "0.5011" "0.4355" ...
comparison <- PR_SVL14_subset %>%
mutate(FIPS = as.character(FIPS)) %>% # FIPS 문자로
rename(
Overall_2014 = `Overall Summary Ranking`
) %>%
left_join(
PR_SVL22_subset %>%
mutate(FIPS = as.character(FIPS)) %>%
rename(
Overall_2022 = `Overall Summary Ranking`
),
by = "FIPS",
suffix = c("_2014", "_2022")
) %>%
mutate(
# 여기서 진짜 숫자로 바꿔줌 (2014는 꼭 필요, 2022는 안전용)
Overall_2014 = as.numeric(Overall_2014),
Overall_2022 = as.numeric(Overall_2022),
Change = Overall_2022 - Overall_2014,
Change_Direction = case_when(
Change > 0 ~ "Increased (worse / more vulnerable)",
Change < 0 ~ "Decreased (better / less vulnerable)",
TRUE ~ "No change"
)
)
view(comparison)
table(comparison$Change_Direction)
##
## Decreased (better / less vulnerable) Increased (worse / more vulnerable)
## 387 438
## No change
## 84
# ---- 1. FIPS 문자형 통일 + 헤더행 제거 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
mutate(FIPS = as.character(FIPS)) %>%
filter(FIPS != "FIPS", !is.na(FIPS))
PR_SVL22_subset <- PR_SVL22_subset %>%
mutate(
FIPS = format(FIPS, scientific = FALSE, trim = TRUE),
FIPS = as.character(FIPS)
) %>%
filter(FIPS != "FIPS", !is.na(FIPS))
# ---- 2. SVI 점수 numeric 변환 ----
svi_cols <- c(
"Socioeconomic Status",
"Household Characteristics",
"Racial & Ethnic Minority Status",
"Housing Type & Transportation",
"Overall Summary Ranking"
)
PR_SVL14_subset <- PR_SVL14_subset %>%
mutate(across(all_of(svi_cols), as.numeric))
PR_SVL22_subset <- PR_SVL22_subset %>%
mutate(across(all_of(svi_cols), as.numeric))
# ---- 3. 0~1 범위 밖 이상치 NA 처리 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
mutate(
`Overall Summary Ranking` =
ifelse(`Overall Summary Ranking` < 0 |
`Overall Summary Ranking` > 1,
NA, `Overall Summary Ranking`)
)
PR_SVL22_subset <- PR_SVL22_subset %>%
mutate(
`Overall Summary Ranking` =
ifelse(`Overall Summary Ranking` < 0 |
`Overall Summary Ranking` > 1,
NA, `Overall Summary Ranking`)
)
# ---- 4. comparison 최종 버전 만들기 ----
comparison <- PR_SVL14_subset %>%
rename(
Overall_2014 = `Overall Summary Ranking`,
COUNTY_2014 = COUNTY,
LOCATION_2014 = LOCATION
) %>%
left_join(
PR_SVL22_subset %>%
rename(
Overall_2022 = `Overall Summary Ranking`,
COUNTY_2022 = COUNTY,
LOCATION_2022 = LOCATION
),
by = "FIPS",
suffix = c("_2014", "_2022")
) %>%
filter(!is.na(Overall_2014), !is.na(Overall_2022)) %>%
mutate(
Change = Overall_2022 - Overall_2014,
Change_Direction = case_when(
Change > 0 ~ "Increased (worse / more vulnerable)",
Change < 0 ~ "Decreased (better / less vulnerable)",
TRUE ~ "No change"
)
)
# ---- 5. 요약 / 히스토그램 ----
library(ggplot2)
summary(comparison$Change)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.69240 -0.12255 0.01160 0.01179 0.15470 0.78160
hist(comparison$Change, breaks = 40)
ggplot(comparison, aes(x = Change)) +
geom_histogram(bins = 40, fill = "steelblue", color = "white") +
labs(
title = "Distribution of Change in Social Vulnerability (2022 - 2014)",
x = "Change in Overall Summary Ranking (Δ vulnerability)",
y = "Number of Census Tracts"
) +
theme_minimal()
## Mapping
#Mapping
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE)
# 1. Puerto Rico Tracts 불러오기
pr_tracts <- tracts("PR", year = 2022, cb = TRUE) %>%
st_transform(32620) %>% # UTM zone20N (Puerto Rico)
mutate(FIPS = as.character(GEOID))
# 2. tract + comparison 조인
pr_change_sf <- pr_tracts %>%
dplyr::left_join(
comparison %>% dplyr::select(FIPS, Change, Change_Direction),
by = "FIPS"
)
# 3. Continuous Change Map
ggplot(pr_change_sf) +
geom_sf(aes(fill = Change), color="grey40", size=0.1) +
scale_fill_distiller(palette="RdBu", direction=-1, limits=c(-0.7,0.7)) +
labs(
title="Change in Social Vulnerability (2014→2022)",
fill="Δ SVI"
) +
theme_minimal()
## 3. d
#Scatter plot
library(sf)
library(dplyr)
library(sf)
coast_pr <- st_read(
"C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg"
) %>%
st_transform(32620) # UTM zone 20N (Puerto Rico)
## Reading layer `pr_coastline' from data source
## `C:\Users\ddtmd\Desktop\2026 Spring\HotelDensity_PR_GooglePlaceAPI\data_processed\pr_coastline.gpkg'
## using driver `GPKG'
## Simple feature collection with 16 features and 0 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -23817.68 ymin: 1980811 xmax: 265239.9 ymax: 2052181
## Projected CRS: WGS 84 / UTM zone 20N
#Creating Centroid
tract_centroids <- pr_change_sf %>%
st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
# centroid → coastline 거리 행렬 계산
dist_matrix <- st_distance(tract_centroids, coast_pr)
# 각 tract마다 "가장 가까운 해안까지의 거리"만 뽑기 (미터 단위)
tract_centroids$dist_to_coast_m <- apply(dist_matrix, 1, min)
# 요약 확인
summary(tract_centroids$dist_to_coast_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.2 2501.0 5902.5 8164.3 11632.5 27400.2
library(ggplot2)
# =========================================================
library(sf)
library(dplyr)
library(ggplot2)
# 0) FIPS 타입 통일
pr_tracts <- pr_tracts %>%
mutate(FIPS = as.character(GEOID))
comparison2 <- comparison %>%
dplyr::mutate(FIPS = as.character(FIPS)) %>%
dplyr::select(FIPS, Overall_2014, Overall_2022, Change, Change_Direction)
# 1) join
analysis_sf <- pr_tracts %>%
dplyr::left_join(comparison2, by = "FIPS")
# 2) (optional) NA 확인
summary(is.na(analysis_sf$Overall_2014))
## Mode FALSE TRUE
## logical 823 116
summary(is.na(analysis_sf$Overall_2022))
## Mode FALSE TRUE
## logical 823 116
summary(is.na(analysis_sf$Change))
## Mode FALSE TRUE
## logical 823 116
## 2 Distance to coast 다시붙이기
sf::sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
coast_pr <- st_read(
"C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
quiet = TRUE
) %>% st_transform(32620)
analysis_sf <- analysis_sf %>% st_transform(32620)
cent <- st_centroid(analysis_sf)
## Warning: st_centroid assumes attributes are constant over geometries
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))
summary(analysis_sf$dist_to_coast_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.2 2501.0 5902.5 8164.3 11632.5 27400.2
## 3. Level plot 2014 / 2022 / Change (3개)
df <- analysis_sf %>% st_drop_geometry() %>%
filter(!is.na(dist_to_coast_m), !is.na(Overall_2014), !is.na(Overall_2022), !is.na(Change))
# (A) Level 2014
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2014)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "SVI Level (2014) vs Distance to Coastline (PR)",
x = "Distance to Coast (meters)",
y = "SVI (Overall Summary Ranking, 2014)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# (B) Level 2022
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2022)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "SVI Level (2022) vs Distance to Coastline (PR)",
x = "Distance to Coast (meters)",
y = "SVI (Overall Summary Ranking, 2022)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# (C) Change
ggplot(df, aes(x = dist_to_coast_m, y = Change)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "ΔSVI (2022–2014) vs Distance to Coastline (PR)",
x = "Distance to Coast (meters)",
y = "Δ SVI (2022 - 2014)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
##4. 회귀선형 확인
m14 <- lm(Overall_2014 ~ dist_to_coast_m, data = df)
m22 <- lm(Overall_2022 ~ dist_to_coast_m, data = df)
mchg <- lm(Change ~ dist_to_coast_m, data = df)
summary(m14)
##
## Call:
## lm(formula = Overall_2014 ~ dist_to_coast_m, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5448 -0.2477 -0.0100 0.2393 0.5287
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.673e-01 1.537e-02 30.410 < 2e-16 ***
## dist_to_coast_m 4.210e-06 1.387e-06 3.035 0.00248 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2852 on 821 degrees of freedom
## Multiple R-squared: 0.0111, Adjusted R-squared: 0.009891
## F-statistic: 9.211 on 1 and 821 DF, p-value: 0.002481
summary(m22)
##
## Call:
## lm(formula = Overall_2022 ~ dist_to_coast_m, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53016 -0.24258 0.01179 0.24680 0.49894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.356e-01 1.536e-02 34.880 <2e-16 ***
## dist_to_coast_m -2.476e-06 1.386e-06 -1.787 0.0744 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 821 degrees of freedom
## Multiple R-squared: 0.003873, Adjusted R-squared: 0.00266
## F-statistic: 3.192 on 1 and 821 DF, p-value: 0.07436
summary(mchg)
##
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71314 -0.13323 0.00029 0.14906 0.75157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.827e-02 1.247e-02 5.477 5.75e-08 ***
## dist_to_coast_m -6.686e-06 1.125e-06 -5.942 4.15e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2314 on 821 degrees of freedom
## Multiple R-squared: 0.04124, Adjusted R-squared: 0.04007
## F-statistic: 35.31 on 1 and 821 DF, p-value: 4.149e-09
# FINAL (Copy/Paste): PR tract-level dataset
# - SVI Change (Change) 2014→2022 : from pr_change_sf
# - Distance to coast (dist_to_coast_m)
# - Hotel density (lodging_dens_km2) + Winsorize (p99)
# - Regression: Change ~ hotel_density_w + dist_to_coast_m
# =========================================================
# 패키지 로드 + s2 끄기
library(sf)
library(dplyr)
library(readr)
library(ggplot2)
sf::sf_use_s2(FALSE)
# “pr_change_sf”가 이미 있는지 확인
# ---- 0) MUST have pr_change_sf (contains Change) ----
stopifnot(exists("pr_change_sf"))
# quick check
stopifnot("Change" %in% names(pr_change_sf))
stopifnot("FIPS" %in% names(pr_change_sf))
# ---- 1) Read hotel density ----
dir_hd <- "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI"
#호텔 밀도 데이터 읽고 필요한 컬럼만 정리
hotel_den_raw <- read_csv(
file.path(dir_hd, "PR_tract_lodging_density.csv"),
show_col_types = FALSE
)
hotel_den <- hotel_den_raw %>%
transmute(
FIPS = as.character(GEOID),
hotel_density = as.numeric(lodging_dens_km2), # lodging density per km2
lodging_total = as.numeric(lodging_total),
area_km2 = as.numeric(area_km2)
)
#SVI 변화 데이터 + 호텔 밀도 붙이기 (조인)
# ---- 2) Join hotel density to pr_change_sf ----
analysis_sf <- pr_change_sf %>%
mutate(FIPS = as.character(FIPS)) %>%
left_join(hotel_den, by = "FIPS")
# sanity check
summary(analysis_sf$hotel_density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.5379 0.0000 64.9653
sum(is.na(analysis_sf$hotel_density)) # should be 0
## [1] 0
#해안까지 거리 만들기 (핵심 공간 계산)
# ---- 3) Distance to PR coastline (centroid -> min distance) ----
coast_pr <- st_read(
"C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
quiet = TRUE
) %>% st_transform(32620)
analysis_sf <- analysis_sf %>% st_transform(32620)
cent <- st_centroid(analysis_sf)
## Warning: st_centroid assumes attributes are constant over geometries
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))
summary(analysis_sf$dist_to_coast_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.2 2501.0 5902.5 8164.3 11632.5 27400.2
#호텔밀도 극단값 자르기 (winsorize p99)
# ---- 4) Winsorize hotel density at 99th percentile (no log) ----
p99 <- quantile(analysis_sf$hotel_density, 0.99, na.rm = TRUE)
analysis_sf <- analysis_sf %>%
mutate(hotel_density_w = pmin(hotel_density, p99))
quantile(analysis_sf$hotel_density, c(.95, .99), na.rm = TRUE)
## 95% 99%
## 1.137027 14.438188
quantile(analysis_sf$hotel_density_w, c(.95, .99), na.rm = TRUE)
## 95% 99%
## 1.137027 13.922990
#회귀용 데이터프레임 만들기 (geometry 제거 + NA 제거)
# ---- 5) Regression-ready df (drop geometry + remove NA) ----
df_reg <- analysis_sf %>%
st_drop_geometry() %>%
filter(
!is.na(Change),
!is.na(hotel_density_w),
!is.na(dist_to_coast_m)
)
# ---- 6) Main regression ----
m <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
summary(m)
##
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71171 -0.13453 -0.00019 0.14989 0.75261
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.665e-02 1.292e-02 5.157 3.14e-07 ***
## hotel_density_w 2.228e-03 4.662e-03 0.478 0.633
## dist_to_coast_m -6.585e-06 1.145e-06 -5.749 1.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2315 on 820 degrees of freedom
## Multiple R-squared: 0.0415, Adjusted R-squared: 0.03917
## F-statistic: 17.75 on 2 and 820 DF, p-value: 2.831e-08
# ---- 7) (Optional) Quick scatter checks ----
ggplot(df_reg, aes(x = hotel_density_w, y = Change)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(title = "ΔSVI vs Hotel density (winsorized p99)", x = "Hotel density (per km2, winsorized)", y = "ΔSVI (2022-2014)")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df_reg, aes(x = dist_to_coast_m, y = Change)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal() +
labs(title = "ΔSVI vs Distance to coast", x = "Distance to coast (m)", y = "ΔSVI (2022-2014)")
## `geom_smooth()` using formula = 'y ~ x'
#호텔밀도 → 해안거리
m_b <- lm(dist_to_coast_m ~ hotel_density_w, data = df_reg)
summary(m_b)
##
## Call:
## lm(formula = dist_to_coast_m ~ hotel_density_w, data = df_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8691 -5546 -2268 3621 18694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8706.4 250.5 34.754 < 2e-16 ***
## hotel_density_w -751.0 139.6 -5.379 9.76e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7053 on 821 degrees of freedom
## Multiple R-squared: 0.03405, Adjusted R-squared: 0.03287
## F-statistic: 28.94 on 1 and 821 DF, p-value: 9.759e-08
#호텔밀도만 넣은 모델
m_c <- lm(Change ~ hotel_density_w, data = df_reg)
summary(m_c)
##
## Call:
## lm(formula = Change ~ hotel_density_w, data = df_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70172 -0.13736 -0.00062 0.14355 0.77228
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.009320 0.008380 1.112 0.266
## hotel_density_w 0.007173 0.004670 1.536 0.125
##
## Residual standard error: 0.2359 on 821 degrees of freedom
## Multiple R-squared: 0.002865, Adjusted R-squared: 0.001651
## F-statistic: 2.359 on 1 and 821 DF, p-value: 0.1249
#mediation test
# =========================================================
# Mediation (package: mediation)
# X = hotel_density_w
# M = dist_to_coast_m
# Y = Change (ΔSVI 2022-2014)
# =========================================================
# 0) packages
install_if_missing <- function(pkgs){
to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if(length(to_install) > 0) install.packages(to_install)
}
install_if_missing(c("mediation", "dplyr"))
library(mediation)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.1
library(dplyr)
# 1) make sure df_reg exists and has needed columns
stopifnot(exists("df_reg"))
stopifnot(all(c("Change","hotel_density_w","dist_to_coast_m") %in% names(df_reg)))
df_med <- df_reg %>%
filter(
!is.na(Change),
!is.na(hotel_density_w),
!is.na(dist_to_coast_m)
)
# 2) mediator model (M ~ X)
med.fit <- lm(dist_to_coast_m ~ hotel_density_w, data = df_med)
# 3) outcome model (Y ~ X + M)
out.fit <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_med)
# 4) run mediation
# sims=2000 is usually fine; increase to 5000+ for final reporting
set.seed(123)
med.out <- mediate(
model.m = med.fit,
model.y = out.fit,
treat = "hotel_density_w",
mediator= "dist_to_coast_m",
sims = 2000,
boot = TRUE # robust to non-normality
)
## Running nonparametric bootstrap
summary(med.out)
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.00494542 0.00325845 0.00701564 <2e-16 ***
## ADE 0.00222778 -0.00493922 0.00951248 0.543
## Total Effect 0.00717320 0.00028132 0.01459750 0.039 *
## Prop. Mediated 0.68943029 0.26832347 4.62386151 0.039 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 823
##
##
## Simulations: 2000
# 5) optional: plot ACME/ADE
plot(med.out)