R Markdown

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI
library(skimr)
library(mapview)
PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
## Rows: 939 Columns: 160
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (6): STATE, ST_ABBR, COUNTY, LOCATION, GeoLevel, Comparison
## dbl (154): ST, STCNTY, FIPS, AREA_SQMI, E_TOTPOP, M_TOTPOP, E_HU, M_HU, E_HH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")
## Rows: 910 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (125): AFFGEOID, TRACTCE, ST, STATE, ST_ABBR, COUNTY, FIPS, LOCATION, E_...
## dbl   (5): STCNTY, AREA_SQMI, F_NOVEH, Shape.STArea(), Shape.STLength()
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#2022 data: select a subset of columns 
PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)


view(PR_SVL22_subset)

#rename the columns
library(dplyr)

PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )
print(PR_SVL22_subset)
## # A tibble: 939 × 5
##    `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
##                     <dbl>                       <dbl>                      <dbl>
##  1                  0.888                       0.244                     0.457 
##  2                  0.750                       0.109                     0.522 
##  3                  0.774                       0.145                     0.522 
##  4                  0.887                       0.552                     0.522 
##  5                  0.186                       0.424                     0.0684
##  6                  0.643                       0.690                     0.522 
##  7                  0.565                       0.646                     0.0456
##  8                  0.306                       0.463                     0.0163
##  9                  0.626                       0.887                     0.0087
## 10                  0.260                       0.351                     0.0456
## # ℹ 929 more rows
## # ℹ abbreviated name: ¹​`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <dbl>,
## #   `Overall Summary Ranking` <dbl>
# 2014 Data: select a subset of columns 
PR_SVL14_subset <- PR_SVL14 %>%
 dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)

view(PR_SVL14_subset)

# rename the columns
library(dplyr)

PR_SVL14_subset <- PR_SVL14 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )

print(PR_SVL14_subset)
## # A tibble: 910 × 5
##    `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
##    <chr>                  <chr>                       <chr>                     
##  1 RPL_THEME1             RPL_THEME2                  RPL_THEME3                
##  2 0.9717                 0.7423                      0.902                     
##  3 0.9174                 0.1927                      0.8976                    
##  4 0.9842                 0.1608                      0.8645                    
##  5 0.9333                 0.3932                      0.6134                    
##  6 0.7115                 0.9604                      0.7313                    
##  7 0.7692                 0.2126                      0.8656                    
##  8 0.6719                 0.2456                      0.1993                    
##  9 0.6244                 0.4989                      0.1663                    
## 10 0.8778                 0.5297                      0.5672                    
## # ℹ 900 more rows
## # ℹ abbreviated name: ¹​`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <chr>,
## #   `Overall Summary Ranking` <chr>
# Make sure both datasets have the key column
names(PR_SVL14)
##   [1] "AFFGEOID"         "TRACTCE"          "ST"              
##   [4] "STATE"            "ST_ABBR"          "STCNTY"          
##   [7] "COUNTY"           "FIPS"             "LOCATION"        
##  [10] "AREA_SQMI"        "E_TOTPOP"         "M_TOTPOP"        
##  [13] "E_HU"             "M_HU"             "E_HH"            
##  [16] "M_HH"             "E_POV"            "M_POV"           
##  [19] "E_UNEMP"          "M_UNEMP"          "E_PCI"           
##  [22] "M_PCI"            "E_NOHSDP"         "M_NOHSDP"        
##  [25] "E_AGE65"          "M_AGE65"          "E_AGE17"         
##  [28] "M_AGE17"          "E_DISABL"         "M_DISABL"        
##  [31] "E_SNGPNT"         "M_SNGPNT"         "E_MINRTY"        
##  [34] "M_MINRTY"         "E_LIMENG"         "M_LIMENG"        
##  [37] "E_MUNIT"          "M_MUNIT"          "E_MOBILE"        
##  [40] "M_MOBILE"         "E_CROWD"          "M_CROWD"         
##  [43] "E_NOVEH"          "M_NOVEH"          "E_GROUPQ"        
##  [46] "M_GROUPQ"         "EP_POV"           "MP_POV"          
##  [49] "EP_UNEMP"         "MP_UNEMP"         "EP_PCI"          
##  [52] "MP_PCI"           "EP_NOHSDP"        "MP_NOHSDP"       
##  [55] "EP_AGE65"         "MP_AGE65"         "EP_AGE17"        
##  [58] "MP_AGE17"         "EP_DISABL"        "MP_DISABL"       
##  [61] "EP_SNGPNT"        "MP_SNGPNT"        "EP_MINRTY"       
##  [64] "MP_MINRTY"        "EP_LIMENG"        "MP_LIMENG"       
##  [67] "EP_MUNIT"         "MP_MUNIT"         "EP_MOBILE"       
##  [70] "MP_MOBILE"        "EP_CROWD"         "MP_CROWD"        
##  [73] "EP_NOVEH"         "MP_NOVEH"         "EP_GROUPQ"       
##  [76] "MP_GROUPQ"        "EPL_POV"          "EPL_UNEMP"       
##  [79] "EPL_PCI"          "EPL_NOHSDP"       "SPL_THEME1"      
##  [82] "RPL_THEME1"       "EPL_AGE65"        "EPL_AGE17"       
##  [85] "EPL_DISABL"       "EPL_SNGPNT"       "SPL_THEME2"      
##  [88] "RPL_THEME2"       "EPL_MINRTY"       "EPL_LIMENG"      
##  [91] "SPL_THEME3"       "RPL_THEME3"       "EPL_MUNIT"       
##  [94] "EPL_MOBILE"       "EPL_CROWD"        "EPL_NOVEH"       
##  [97] "EPL_GROUPQ"       "SPL_THEME4"       "RPL_THEME4"      
## [100] "SPL_THEMES"       "RPL_THEMES"       "F_POV"           
## [103] "F_UNEMP"          "F_PCI"            "F_NOHSDP"        
## [106] "F_THEME1"         "F_AGE65"          "F_AGE17"         
## [109] "F_DISABL"         "F_SNGPNT"         "F_THEME2"        
## [112] "F_MINRTY"         "F_LIMENG"         "F_THEME3"        
## [115] "F_MUNIT"          "F_MOBILE"         "F_CROWD"         
## [118] "F_NOVEH"          "F_GROUPQ"         "F_THEME4"        
## [121] "F_TOTAL"          "E_UNINSUR"        "M_UNINSUR"       
## [124] "EP_UNINSUR"       "MP_UNINSUR"       "Shape"           
## [127] "Shape.STArea()"   "Shape.STLength()" "GeoLevel"        
## [130] "Comparison"
names(PR_SVL22)
##   [1] "ST"           "STATE"        "ST_ABBR"      "STCNTY"       "COUNTY"      
##   [6] "FIPS"         "LOCATION"     "AREA_SQMI"    "E_TOTPOP"     "M_TOTPOP"    
##  [11] "E_HU"         "M_HU"         "E_HH"         "M_HH"         "E_POV150"    
##  [16] "M_POV150"     "E_UNEMP"      "M_UNEMP"      "E_HBURD"      "M_HBURD"     
##  [21] "E_NOHSDP"     "M_NOHSDP"     "E_UNINSUR"    "M_UNINSUR"    "E_AGE65"     
##  [26] "M_AGE65"      "E_AGE17"      "M_AGE17"      "E_DISABL"     "M_DISABL"    
##  [31] "E_SNGPNT"     "M_SNGPNT"     "E_LIMENG"     "M_LIMENG"     "E_MINRTY"    
##  [36] "M_MINRTY"     "E_MUNIT"      "M_MUNIT"      "E_MOBILE"     "M_MOBILE"    
##  [41] "E_CROWD"      "M_CROWD"      "E_NOVEH"      "M_NOVEH"      "E_GROUPQ"    
##  [46] "M_GROUPQ"     "EP_POV150"    "MP_POV150"    "EP_UNEMP"     "MP_UNEMP"    
##  [51] "EP_HBURD"     "MP_HBURD"     "EP_NOHSDP"    "MP_NOHSDP"    "EP_UNINSUR"  
##  [56] "MP_UNINSUR"   "EP_AGE65"     "MP_AGE65"     "EP_AGE17"     "MP_AGE17"    
##  [61] "EP_DISABL"    "MP_DISABL"    "EP_SNGPNT"    "MP_SNGPNT"    "EP_LIMENG"   
##  [66] "MP_LIMENG"    "EP_MINRTY"    "MP_MINRTY"    "EP_MUNIT"     "MP_MUNIT"    
##  [71] "EP_MOBILE"    "MP_MOBILE"    "EP_CROWD"     "MP_CROWD"     "EP_NOVEH"    
##  [76] "MP_NOVEH"     "EP_GROUPQ"    "MP_GROUPQ"    "EPL_POV150"   "EPL_UNEMP"   
##  [81] "EPL_HBURD"    "EPL_NOHSDP"   "EPL_UNINSUR"  "SPL_THEME1"   "RPL_THEME1"  
##  [86] "EPL_AGE65"    "EPL_AGE17"    "EPL_DISABL"   "EPL_SNGPNT"   "EPL_LIMENG"  
##  [91] "SPL_THEME2"   "RPL_THEME2"   "EPL_MINRTY"   "SPL_THEME3"   "RPL_THEME3"  
##  [96] "EPL_MUNIT"    "EPL_MOBILE"   "EPL_CROWD"    "EPL_NOVEH"    "EPL_GROUPQ"  
## [101] "SPL_THEME4"   "RPL_THEME4"   "SPL_THEMES"   "RPL_THEMES"   "F_POV150"    
## [106] "F_UNEMP"      "F_HBURD"      "F_NOHSDP"     "F_UNINSUR"    "F_THEME1"    
## [111] "F_AGE65"      "F_AGE17"      "F_DISABL"     "F_SNGPNT"     "F_LIMENG"    
## [116] "F_THEME2"     "F_MINRTY"     "F_THEME3"     "F_MUNIT"      "F_MOBILE"    
## [121] "F_CROWD"      "F_NOVEH"      "F_GROUPQ"     "F_THEME4"     "F_TOTAL"     
## [126] "E_DAYPOP"     "E_NOINT"      "M_NOINT"      "E_AFAM"       "M_AFAM"      
## [131] "E_HISP"       "M_HISP"       "E_ASIAN"      "M_ASIAN"      "E_AIAN"      
## [136] "M_AIAN"       "E_NHPI"       "M_NHPI"       "E_TWOMORE"    "M_TWOMORE"   
## [141] "E_OTHERRACE"  "M_OTHERRACE"  "EP_NOINT"     "MP_NOINT"     "EP_AFAM"     
## [146] "MP_AFAM"      "EP_HISP"      "MP_HISP"      "EP_ASIAN"     "MP_ASIAN"    
## [151] "EP_AIAN"      "MP_AIAN"      "EP_NHPI"      "MP_NHPI"      "EP_TWOMORE"  
## [156] "MP_TWOMORE"   "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel"     "Comparison"
# Make sure both datasets have the key column
names(PR_SVL14)
##   [1] "AFFGEOID"         "TRACTCE"          "ST"              
##   [4] "STATE"            "ST_ABBR"          "STCNTY"          
##   [7] "COUNTY"           "FIPS"             "LOCATION"        
##  [10] "AREA_SQMI"        "E_TOTPOP"         "M_TOTPOP"        
##  [13] "E_HU"             "M_HU"             "E_HH"            
##  [16] "M_HH"             "E_POV"            "M_POV"           
##  [19] "E_UNEMP"          "M_UNEMP"          "E_PCI"           
##  [22] "M_PCI"            "E_NOHSDP"         "M_NOHSDP"        
##  [25] "E_AGE65"          "M_AGE65"          "E_AGE17"         
##  [28] "M_AGE17"          "E_DISABL"         "M_DISABL"        
##  [31] "E_SNGPNT"         "M_SNGPNT"         "E_MINRTY"        
##  [34] "M_MINRTY"         "E_LIMENG"         "M_LIMENG"        
##  [37] "E_MUNIT"          "M_MUNIT"          "E_MOBILE"        
##  [40] "M_MOBILE"         "E_CROWD"          "M_CROWD"         
##  [43] "E_NOVEH"          "M_NOVEH"          "E_GROUPQ"        
##  [46] "M_GROUPQ"         "EP_POV"           "MP_POV"          
##  [49] "EP_UNEMP"         "MP_UNEMP"         "EP_PCI"          
##  [52] "MP_PCI"           "EP_NOHSDP"        "MP_NOHSDP"       
##  [55] "EP_AGE65"         "MP_AGE65"         "EP_AGE17"        
##  [58] "MP_AGE17"         "EP_DISABL"        "MP_DISABL"       
##  [61] "EP_SNGPNT"        "MP_SNGPNT"        "EP_MINRTY"       
##  [64] "MP_MINRTY"        "EP_LIMENG"        "MP_LIMENG"       
##  [67] "EP_MUNIT"         "MP_MUNIT"         "EP_MOBILE"       
##  [70] "MP_MOBILE"        "EP_CROWD"         "MP_CROWD"        
##  [73] "EP_NOVEH"         "MP_NOVEH"         "EP_GROUPQ"       
##  [76] "MP_GROUPQ"        "EPL_POV"          "EPL_UNEMP"       
##  [79] "EPL_PCI"          "EPL_NOHSDP"       "SPL_THEME1"      
##  [82] "RPL_THEME1"       "EPL_AGE65"        "EPL_AGE17"       
##  [85] "EPL_DISABL"       "EPL_SNGPNT"       "SPL_THEME2"      
##  [88] "RPL_THEME2"       "EPL_MINRTY"       "EPL_LIMENG"      
##  [91] "SPL_THEME3"       "RPL_THEME3"       "EPL_MUNIT"       
##  [94] "EPL_MOBILE"       "EPL_CROWD"        "EPL_NOVEH"       
##  [97] "EPL_GROUPQ"       "SPL_THEME4"       "RPL_THEME4"      
## [100] "SPL_THEMES"       "RPL_THEMES"       "F_POV"           
## [103] "F_UNEMP"          "F_PCI"            "F_NOHSDP"        
## [106] "F_THEME1"         "F_AGE65"          "F_AGE17"         
## [109] "F_DISABL"         "F_SNGPNT"         "F_THEME2"        
## [112] "F_MINRTY"         "F_LIMENG"         "F_THEME3"        
## [115] "F_MUNIT"          "F_MOBILE"         "F_CROWD"         
## [118] "F_NOVEH"          "F_GROUPQ"         "F_THEME4"        
## [121] "F_TOTAL"          "E_UNINSUR"        "M_UNINSUR"       
## [124] "EP_UNINSUR"       "MP_UNINSUR"       "Shape"           
## [127] "Shape.STArea()"   "Shape.STLength()" "GeoLevel"        
## [130] "Comparison"
names(PR_SVL22)
##   [1] "ST"           "STATE"        "ST_ABBR"      "STCNTY"       "COUNTY"      
##   [6] "FIPS"         "LOCATION"     "AREA_SQMI"    "E_TOTPOP"     "M_TOTPOP"    
##  [11] "E_HU"         "M_HU"         "E_HH"         "M_HH"         "E_POV150"    
##  [16] "M_POV150"     "E_UNEMP"      "M_UNEMP"      "E_HBURD"      "M_HBURD"     
##  [21] "E_NOHSDP"     "M_NOHSDP"     "E_UNINSUR"    "M_UNINSUR"    "E_AGE65"     
##  [26] "M_AGE65"      "E_AGE17"      "M_AGE17"      "E_DISABL"     "M_DISABL"    
##  [31] "E_SNGPNT"     "M_SNGPNT"     "E_LIMENG"     "M_LIMENG"     "E_MINRTY"    
##  [36] "M_MINRTY"     "E_MUNIT"      "M_MUNIT"      "E_MOBILE"     "M_MOBILE"    
##  [41] "E_CROWD"      "M_CROWD"      "E_NOVEH"      "M_NOVEH"      "E_GROUPQ"    
##  [46] "M_GROUPQ"     "EP_POV150"    "MP_POV150"    "EP_UNEMP"     "MP_UNEMP"    
##  [51] "EP_HBURD"     "MP_HBURD"     "EP_NOHSDP"    "MP_NOHSDP"    "EP_UNINSUR"  
##  [56] "MP_UNINSUR"   "EP_AGE65"     "MP_AGE65"     "EP_AGE17"     "MP_AGE17"    
##  [61] "EP_DISABL"    "MP_DISABL"    "EP_SNGPNT"    "MP_SNGPNT"    "EP_LIMENG"   
##  [66] "MP_LIMENG"    "EP_MINRTY"    "MP_MINRTY"    "EP_MUNIT"     "MP_MUNIT"    
##  [71] "EP_MOBILE"    "MP_MOBILE"    "EP_CROWD"     "MP_CROWD"     "EP_NOVEH"    
##  [76] "MP_NOVEH"     "EP_GROUPQ"    "MP_GROUPQ"    "EPL_POV150"   "EPL_UNEMP"   
##  [81] "EPL_HBURD"    "EPL_NOHSDP"   "EPL_UNINSUR"  "SPL_THEME1"   "RPL_THEME1"  
##  [86] "EPL_AGE65"    "EPL_AGE17"    "EPL_DISABL"   "EPL_SNGPNT"   "EPL_LIMENG"  
##  [91] "SPL_THEME2"   "RPL_THEME2"   "EPL_MINRTY"   "SPL_THEME3"   "RPL_THEME3"  
##  [96] "EPL_MUNIT"    "EPL_MOBILE"   "EPL_CROWD"    "EPL_NOVEH"    "EPL_GROUPQ"  
## [101] "SPL_THEME4"   "RPL_THEME4"   "SPL_THEMES"   "RPL_THEMES"   "F_POV150"    
## [106] "F_UNEMP"      "F_HBURD"      "F_NOHSDP"     "F_UNINSUR"    "F_THEME1"    
## [111] "F_AGE65"      "F_AGE17"      "F_DISABL"     "F_SNGPNT"     "F_LIMENG"    
## [116] "F_THEME2"     "F_MINRTY"     "F_THEME3"     "F_MUNIT"      "F_MOBILE"    
## [121] "F_CROWD"      "F_NOVEH"      "F_GROUPQ"     "F_THEME4"     "F_TOTAL"     
## [126] "E_DAYPOP"     "E_NOINT"      "M_NOINT"      "E_AFAM"       "M_AFAM"      
## [131] "E_HISP"       "M_HISP"       "E_ASIAN"      "M_ASIAN"      "E_AIAN"      
## [136] "M_AIAN"       "E_NHPI"       "M_NHPI"       "E_TWOMORE"    "M_TWOMORE"   
## [141] "E_OTHERRACE"  "M_OTHERRACE"  "EP_NOINT"     "MP_NOINT"     "EP_AFAM"     
## [146] "MP_AFAM"      "EP_HISP"      "MP_HISP"      "EP_ASIAN"     "MP_ASIAN"    
## [151] "EP_AIAN"      "MP_AIAN"      "EP_NHPI"      "MP_NHPI"      "EP_TWOMORE"  
## [156] "MP_TWOMORE"   "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel"     "Comparison"
library(dplyr)

# Step 1. Make cleaned subset for 2014
PR_SVL14_subset <- PR_SVL14 %>%
  dplyr::select(
    STCNTY,          # county FIPS
    COUNTY,          # county name
    LOCATION,        # tract description
    FIPS,            # tract ID
    AREA_SQMI,       # area (sq mi)
    E_TOTPOP,        # total population
    RPL_THEME1,
    RPL_THEME2,
    RPL_THEME3,
    RPL_THEME4,
    RPL_THEMES
  ) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )

# Step 2. Make cleaned subset for 2022
PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(
    STCNTY,
    COUNTY,
    LOCATION,
    FIPS,
    AREA_SQMI,
    E_TOTPOP,
    RPL_THEME1,
    RPL_THEME2,
    RPL_THEME3,
    RPL_THEME4,
    RPL_THEMES
  ) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )
# Step 3. Join 2014 + 2022 using FIPS (tract ID)

PR_SVL14_subset <- PR_SVL14_subset %>%
  slice(-1)   # Delete first row becuase it's not numeric (2022 is okay)
str(PR_SVL14_subset$`Overall Summary Ranking`)
##  chr [1:909] "0.8948" "0.5814" "0.6538" "0.8281" "0.8937" "0.5011" "0.4355" ...
comparison <- PR_SVL14_subset %>%
  mutate(FIPS = as.character(FIPS)) %>%              # FIPS 문자로
  rename(
    Overall_2014 = `Overall Summary Ranking`
  ) %>%
  left_join(
    PR_SVL22_subset %>%
      mutate(FIPS = as.character(FIPS)) %>%
      rename(
        Overall_2022 = `Overall Summary Ranking`
      ),
    by = "FIPS",
    suffix = c("_2014", "_2022")
  ) %>%
  mutate(
    # 여기서 진짜 숫자로 바꿔줌 (2014는 꼭 필요, 2022는 안전용)
    Overall_2014 = as.numeric(Overall_2014),
    Overall_2022 = as.numeric(Overall_2022),
    
    Change = Overall_2022 - Overall_2014,
    Change_Direction = case_when(
      Change > 0 ~ "Increased (worse / more vulnerable)",
      Change < 0 ~ "Decreased (better / less vulnerable)",
      TRUE       ~ "No change"
    )
  )


view(comparison)
table(comparison$Change_Direction)
## 
## Decreased (better / less vulnerable)  Increased (worse / more vulnerable) 
##                                  387                                  438 
##                            No change 
##                                   84
# ---- 1. FIPS 문자형 통일 + 헤더행 제거 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(FIPS = as.character(FIPS)) %>%
  filter(FIPS != "FIPS", !is.na(FIPS))

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(
    FIPS = format(FIPS, scientific = FALSE, trim = TRUE),
    FIPS = as.character(FIPS)
  ) %>%
  filter(FIPS != "FIPS", !is.na(FIPS))

# ---- 2. SVI 점수 numeric 변환 ----
svi_cols <- c(
  "Socioeconomic Status",
  "Household Characteristics",
  "Racial & Ethnic Minority Status",
  "Housing Type & Transportation",
  "Overall Summary Ranking"
)

PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(across(all_of(svi_cols), as.numeric))

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(across(all_of(svi_cols), as.numeric))

# ---- 3. 0~1 범위 밖 이상치 NA 처리 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(
    `Overall Summary Ranking` =
      ifelse(`Overall Summary Ranking` < 0 |
               `Overall Summary Ranking` > 1,
             NA, `Overall Summary Ranking`)
  )

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(
    `Overall Summary Ranking` =
      ifelse(`Overall Summary Ranking` < 0 |
               `Overall Summary Ranking` > 1,
             NA, `Overall Summary Ranking`)
  )

# ---- 4. comparison 최종 버전 만들기 ----
comparison <- PR_SVL14_subset %>%
  rename(
    Overall_2014  = `Overall Summary Ranking`,
    COUNTY_2014   = COUNTY,
    LOCATION_2014 = LOCATION
  ) %>%
  left_join(
    PR_SVL22_subset %>%
      rename(
        Overall_2022  = `Overall Summary Ranking`,
        COUNTY_2022   = COUNTY,
        LOCATION_2022 = LOCATION
      ),
    by = "FIPS",
    suffix = c("_2014", "_2022")
  ) %>%
  filter(!is.na(Overall_2014), !is.na(Overall_2022)) %>%
  mutate(
    Change = Overall_2022 - Overall_2014,
    Change_Direction = case_when(
      Change > 0 ~ "Increased (worse / more vulnerable)",
      Change < 0 ~ "Decreased (better / less vulnerable)",
      TRUE       ~ "No change"
    )
  )

# ---- 5. 요약 / 히스토그램 ----
library(ggplot2)

summary(comparison$Change)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.69240 -0.12255  0.01160  0.01179  0.15470  0.78160
hist(comparison$Change, breaks = 40)

ggplot(comparison, aes(x = Change)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Change in Social Vulnerability (2022 - 2014)",
    x = "Change in Overall Summary Ranking (Δ vulnerability)",
    y = "Number of Census Tracts"
  ) +
  theme_minimal()

## Mapping

#Mapping

library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
options(tigris_use_cache = TRUE)

# 1. Puerto Rico Tracts 불러오기
pr_tracts <- tracts("PR", year = 2022, cb = TRUE) %>%
  st_transform(32620) %>%             # UTM zone20N (Puerto Rico)
  mutate(FIPS = as.character(GEOID))

# 2. tract + comparison 조인
pr_change_sf <- pr_tracts %>%
  dplyr::left_join(
    comparison %>% dplyr::select(FIPS, Change, Change_Direction),
    by = "FIPS"
  )


# 3. Continuous Change Map
ggplot(pr_change_sf) +
  geom_sf(aes(fill = Change), color="grey40", size=0.1) +
  scale_fill_distiller(palette="RdBu", direction=-1, limits=c(-0.7,0.7)) +
  labs(
    title="Change in Social Vulnerability (2014→2022)",
    fill="Δ SVI"
  ) +
  theme_minimal()

## 3. d

#Scatter plot

library(sf)
library(dplyr)

library(sf)

coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg"
) %>%
  st_transform(32620)  # UTM zone 20N (Puerto Rico)
## Reading layer `pr_coastline' from data source 
##   `C:\Users\ddtmd\Desktop\2026 Spring\HotelDensity_PR_GooglePlaceAPI\data_processed\pr_coastline.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 16 features and 0 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -23817.68 ymin: 1980811 xmax: 265239.9 ymax: 2052181
## Projected CRS: WGS 84 / UTM zone 20N
#Creating Centroid
tract_centroids <- pr_change_sf %>% 
  st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
# centroid → coastline 거리 행렬 계산
dist_matrix <- st_distance(tract_centroids, coast_pr)

# 각 tract마다 "가장 가까운 해안까지의 거리"만 뽑기 (미터 단위)
tract_centroids$dist_to_coast_m <- apply(dist_matrix, 1, min)

# 요약 확인
summary(tract_centroids$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2
library(ggplot2)

# =========================================================
library(sf)
library(dplyr)
library(ggplot2)

# 0) FIPS 타입 통일
pr_tracts <- pr_tracts %>%
  mutate(FIPS = as.character(GEOID))

comparison2 <- comparison %>%
  dplyr::mutate(FIPS = as.character(FIPS)) %>%
  dplyr::select(FIPS, Overall_2014, Overall_2022, Change, Change_Direction)


# 1) join
analysis_sf <- pr_tracts %>%
  dplyr::left_join(comparison2, by = "FIPS")

# 2) (optional) NA 확인
summary(is.na(analysis_sf$Overall_2014))
##    Mode   FALSE    TRUE 
## logical     823     116
summary(is.na(analysis_sf$Overall_2022))
##    Mode   FALSE    TRUE 
## logical     823     116
summary(is.na(analysis_sf$Change))
##    Mode   FALSE    TRUE 
## logical     823     116
## 2 Distance to coast 다시붙이기 
sf::sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
  quiet = TRUE
) %>% st_transform(32620)

analysis_sf <- analysis_sf %>% st_transform(32620)

cent <- st_centroid(analysis_sf)
## Warning: st_centroid assumes attributes are constant over geometries
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))

summary(analysis_sf$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2
## 3. Level plot 2014 / 2022 / Change (3개)
df <- analysis_sf %>% st_drop_geometry() %>%
  filter(!is.na(dist_to_coast_m), !is.na(Overall_2014), !is.na(Overall_2022), !is.na(Change))

# (A) Level 2014
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2014)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "SVI Level (2014) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "SVI (Overall Summary Ranking, 2014)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# (B) Level 2022
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2022)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "SVI Level (2022) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "SVI (Overall Summary Ranking, 2022)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# (C) Change
ggplot(df, aes(x = dist_to_coast_m, y = Change)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "ΔSVI (2022–2014) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "Δ SVI (2022 - 2014)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

##4. 회귀선형 확인
m14  <- lm(Overall_2014 ~ dist_to_coast_m, data = df)
m22  <- lm(Overall_2022 ~ dist_to_coast_m, data = df)
mchg <- lm(Change       ~ dist_to_coast_m, data = df)

summary(m14)
## 
## Call:
## lm(formula = Overall_2014 ~ dist_to_coast_m, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5448 -0.2477 -0.0100  0.2393  0.5287 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.673e-01  1.537e-02  30.410  < 2e-16 ***
## dist_to_coast_m 4.210e-06  1.387e-06   3.035  0.00248 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2852 on 821 degrees of freedom
## Multiple R-squared:  0.0111, Adjusted R-squared:  0.009891 
## F-statistic: 9.211 on 1 and 821 DF,  p-value: 0.002481
summary(m22)
## 
## Call:
## lm(formula = Overall_2022 ~ dist_to_coast_m, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53016 -0.24258  0.01179  0.24680  0.49894 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.356e-01  1.536e-02  34.880   <2e-16 ***
## dist_to_coast_m -2.476e-06  1.386e-06  -1.787   0.0744 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.285 on 821 degrees of freedom
## Multiple R-squared:  0.003873,   Adjusted R-squared:  0.00266 
## F-statistic: 3.192 on 1 and 821 DF,  p-value: 0.07436
summary(mchg)
## 
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71314 -0.13323  0.00029  0.14906  0.75157 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.827e-02  1.247e-02   5.477 5.75e-08 ***
## dist_to_coast_m -6.686e-06  1.125e-06  -5.942 4.15e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2314 on 821 degrees of freedom
## Multiple R-squared:  0.04124,    Adjusted R-squared:  0.04007 
## F-statistic: 35.31 on 1 and 821 DF,  p-value: 4.149e-09

4. import hotel data

# FINAL (Copy/Paste): PR tract-level dataset
#   - SVI Change (Change) 2014→2022  : from pr_change_sf
#   - Distance to coast (dist_to_coast_m)
#   - Hotel density (lodging_dens_km2) + Winsorize (p99)
#   - Regression: Change ~ hotel_density_w + dist_to_coast_m
# =========================================================

# 패키지 로드 + s2 끄기
library(sf)
library(dplyr)
library(readr)
library(ggplot2)

sf::sf_use_s2(FALSE)

# “pr_change_sf”가 이미 있는지 확인
# ---- 0) MUST have pr_change_sf (contains Change) ----
stopifnot(exists("pr_change_sf"))
# quick check
stopifnot("Change" %in% names(pr_change_sf))
stopifnot("FIPS" %in% names(pr_change_sf))

# ---- 1) Read hotel density ----
dir_hd <- "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI"

#호텔 밀도 데이터 읽고 필요한 컬럼만 정리
hotel_den_raw <- read_csv(
  file.path(dir_hd, "PR_tract_lodging_density.csv"),
  show_col_types = FALSE
)

hotel_den <- hotel_den_raw %>%
  transmute(
    FIPS = as.character(GEOID),
    hotel_density = as.numeric(lodging_dens_km2),   # lodging density per km2
    lodging_total = as.numeric(lodging_total),
    area_km2 = as.numeric(area_km2)
  )

#SVI 변화 데이터 + 호텔 밀도 붙이기 (조인)
# ---- 2) Join hotel density to pr_change_sf ----
analysis_sf <- pr_change_sf %>%
  mutate(FIPS = as.character(FIPS)) %>%
  left_join(hotel_den, by = "FIPS")

# sanity check
summary(analysis_sf$hotel_density)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.5379  0.0000 64.9653
sum(is.na(analysis_sf$hotel_density))  # should be 0
## [1] 0

5. Import distance to coastal from centroid

#해안까지 거리 만들기 (핵심 공간 계산)
# ---- 3) Distance to PR coastline (centroid -> min distance) ----
coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
  quiet = TRUE
) %>% st_transform(32620)

analysis_sf <- analysis_sf %>% st_transform(32620)

cent <- st_centroid(analysis_sf)
## Warning: st_centroid assumes attributes are constant over geometries
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))

summary(analysis_sf$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2

6. Ready for anlysis

#호텔밀도 극단값 자르기 (winsorize p99)
# ---- 4) Winsorize hotel density at 99th percentile (no log) ----
p99 <- quantile(analysis_sf$hotel_density, 0.99, na.rm = TRUE)
analysis_sf <- analysis_sf %>%
  mutate(hotel_density_w = pmin(hotel_density, p99))

quantile(analysis_sf$hotel_density,   c(.95, .99), na.rm = TRUE)
##       95%       99% 
##  1.137027 14.438188
quantile(analysis_sf$hotel_density_w, c(.95, .99), na.rm = TRUE)
##       95%       99% 
##  1.137027 13.922990
#회귀용 데이터프레임 만들기 (geometry 제거 + NA 제거)
# ---- 5) Regression-ready df (drop geometry + remove NA) ----
df_reg <- analysis_sf %>%
  st_drop_geometry() %>%
  filter(
    !is.na(Change),
    !is.na(hotel_density_w),
    !is.na(dist_to_coast_m)
  )

# ---- 6) Main regression ----
m <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
summary(m)
## 
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71171 -0.13453 -0.00019  0.14989  0.75261 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.665e-02  1.292e-02   5.157 3.14e-07 ***
## hotel_density_w  2.228e-03  4.662e-03   0.478    0.633    
## dist_to_coast_m -6.585e-06  1.145e-06  -5.749 1.26e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2315 on 820 degrees of freedom
## Multiple R-squared:  0.0415, Adjusted R-squared:  0.03917 
## F-statistic: 17.75 on 2 and 820 DF,  p-value: 2.831e-08
# ---- 7) (Optional) Quick scatter checks ----
ggplot(df_reg, aes(x = hotel_density_w, y = Change)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(title = "ΔSVI vs Hotel density (winsorized p99)", x = "Hotel density (per km2, winsorized)", y = "ΔSVI (2022-2014)")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(df_reg, aes(x = dist_to_coast_m, y = Change)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(title = "ΔSVI vs Distance to coast", x = "Distance to coast (m)", y = "ΔSVI (2022-2014)")
## `geom_smooth()` using formula = 'y ~ x'

#호텔밀도 → 해안거리
m_b <- lm(dist_to_coast_m ~ hotel_density_w, data = df_reg)
summary(m_b)
## 
## Call:
## lm(formula = dist_to_coast_m ~ hotel_density_w, data = df_reg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8691  -5546  -2268   3621  18694 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8706.4      250.5  34.754  < 2e-16 ***
## hotel_density_w   -751.0      139.6  -5.379 9.76e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7053 on 821 degrees of freedom
## Multiple R-squared:  0.03405,    Adjusted R-squared:  0.03287 
## F-statistic: 28.94 on 1 and 821 DF,  p-value: 9.759e-08
#호텔밀도만 넣은 모델
m_c <- lm(Change ~ hotel_density_w, data = df_reg)
summary(m_c)
## 
## Call:
## lm(formula = Change ~ hotel_density_w, data = df_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70172 -0.13736 -0.00062  0.14355  0.77228 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)     0.009320   0.008380   1.112    0.266
## hotel_density_w 0.007173   0.004670   1.536    0.125
## 
## Residual standard error: 0.2359 on 821 degrees of freedom
## Multiple R-squared:  0.002865,   Adjusted R-squared:  0.001651 
## F-statistic: 2.359 on 1 and 821 DF,  p-value: 0.1249

7. Test

#mediation test
# =========================================================
# Mediation (package: mediation)
# X = hotel_density_w
# M = dist_to_coast_m
# Y = Change (ΔSVI 2022-2014)
# =========================================================

# 0) packages
install_if_missing <- function(pkgs){
  to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
  if(length(to_install) > 0) install.packages(to_install)
}
install_if_missing(c("mediation", "dplyr"))

library(mediation)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.1
library(dplyr)

# 1) make sure df_reg exists and has needed columns
stopifnot(exists("df_reg"))
stopifnot(all(c("Change","hotel_density_w","dist_to_coast_m") %in% names(df_reg)))

df_med <- df_reg %>%
  filter(
    !is.na(Change),
    !is.na(hotel_density_w),
    !is.na(dist_to_coast_m)
  )

# 2) mediator model (M ~ X)
med.fit <- lm(dist_to_coast_m ~ hotel_density_w, data = df_med)

# 3) outcome model (Y ~ X + M)
out.fit <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_med)

# 4) run mediation
# sims=2000 is usually fine; increase to 5000+ for final reporting
set.seed(123)
med.out <- mediate(
  model.m = med.fit,
  model.y = out.fit,
  treat   = "hotel_density_w",
  mediator= "dist_to_coast_m",
  sims    = 2000,
  boot    = TRUE   # robust to non-normality
)
## Running nonparametric bootstrap
summary(med.out)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                   Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            0.00494542   0.00325845   0.00701564  <2e-16 ***
## ADE             0.00222778  -0.00493922   0.00951248   0.543    
## Total Effect    0.00717320   0.00028132   0.01459750   0.039 *  
## Prop. Mediated  0.68943029   0.26832347   4.62386151   0.039 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 823 
## 
## 
## Simulations: 2000
# 5) optional: plot ACME/ADE
plot(med.out)