R Markdown

library(tidyverse)
library(here)
library(skimr)
library(mapview)
PR_SVL22 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2022_CencusTrack_PR_SVL/2022_CT_svi_interactive_map.csv")
PR_SVL14 <- read_csv("C:/Users/ddtmd/Desktop/PR_Data/2014_CencusTrack_PR_SVL/2014_CT_svi_interactive_map.csv")

#2022 data: select a subset of columns 
PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)


view(PR_SVL22_subset)

#rename the columns
library(dplyr)

PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )
print(PR_SVL22_subset)
## # A tibble: 939 × 5
##    `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
##                     <dbl>                       <dbl>                      <dbl>
##  1                  0.888                       0.244                     0.457 
##  2                  0.750                       0.109                     0.522 
##  3                  0.774                       0.145                     0.522 
##  4                  0.887                       0.552                     0.522 
##  5                  0.186                       0.424                     0.0684
##  6                  0.643                       0.690                     0.522 
##  7                  0.565                       0.646                     0.0456
##  8                  0.306                       0.463                     0.0163
##  9                  0.626                       0.887                     0.0087
## 10                  0.260                       0.351                     0.0456
## # ℹ 929 more rows
## # ℹ abbreviated name: ¹​`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <dbl>,
## #   `Overall Summary Ranking` <dbl>
# 2014 Data: select a subset of columns 
PR_SVL14_subset <- PR_SVL14 %>%
 dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES)

view(PR_SVL14_subset)

# rename the columns
library(dplyr)

PR_SVL14_subset <- PR_SVL14 %>%
  dplyr::select(RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, RPL_THEMES) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )

print(PR_SVL14_subset)
## # A tibble: 910 × 5
##    `Socioeconomic Status` `Household Characteristics` Racial & Ethnic Minority…¹
##    <chr>                  <chr>                       <chr>                     
##  1 RPL_THEME1             RPL_THEME2                  RPL_THEME3                
##  2 0.9717                 0.7423                      0.902                     
##  3 0.9174                 0.1927                      0.8976                    
##  4 0.9842                 0.1608                      0.8645                    
##  5 0.9333                 0.3932                      0.6134                    
##  6 0.7115                 0.9604                      0.7313                    
##  7 0.7692                 0.2126                      0.8656                    
##  8 0.6719                 0.2456                      0.1993                    
##  9 0.6244                 0.4989                      0.1663                    
## 10 0.8778                 0.5297                      0.5672                    
## # ℹ 900 more rows
## # ℹ abbreviated name: ¹​`Racial & Ethnic Minority Status`
## # ℹ 2 more variables: `Housing Type & Transportation` <chr>,
## #   `Overall Summary Ranking` <chr>
# Make sure both datasets have the key column
names(PR_SVL14)
##   [1] "AFFGEOID"         "TRACTCE"          "ST"              
##   [4] "STATE"            "ST_ABBR"          "STCNTY"          
##   [7] "COUNTY"           "FIPS"             "LOCATION"        
##  [10] "AREA_SQMI"        "E_TOTPOP"         "M_TOTPOP"        
##  [13] "E_HU"             "M_HU"             "E_HH"            
##  [16] "M_HH"             "E_POV"            "M_POV"           
##  [19] "E_UNEMP"          "M_UNEMP"          "E_PCI"           
##  [22] "M_PCI"            "E_NOHSDP"         "M_NOHSDP"        
##  [25] "E_AGE65"          "M_AGE65"          "E_AGE17"         
##  [28] "M_AGE17"          "E_DISABL"         "M_DISABL"        
##  [31] "E_SNGPNT"         "M_SNGPNT"         "E_MINRTY"        
##  [34] "M_MINRTY"         "E_LIMENG"         "M_LIMENG"        
##  [37] "E_MUNIT"          "M_MUNIT"          "E_MOBILE"        
##  [40] "M_MOBILE"         "E_CROWD"          "M_CROWD"         
##  [43] "E_NOVEH"          "M_NOVEH"          "E_GROUPQ"        
##  [46] "M_GROUPQ"         "EP_POV"           "MP_POV"          
##  [49] "EP_UNEMP"         "MP_UNEMP"         "EP_PCI"          
##  [52] "MP_PCI"           "EP_NOHSDP"        "MP_NOHSDP"       
##  [55] "EP_AGE65"         "MP_AGE65"         "EP_AGE17"        
##  [58] "MP_AGE17"         "EP_DISABL"        "MP_DISABL"       
##  [61] "EP_SNGPNT"        "MP_SNGPNT"        "EP_MINRTY"       
##  [64] "MP_MINRTY"        "EP_LIMENG"        "MP_LIMENG"       
##  [67] "EP_MUNIT"         "MP_MUNIT"         "EP_MOBILE"       
##  [70] "MP_MOBILE"        "EP_CROWD"         "MP_CROWD"        
##  [73] "EP_NOVEH"         "MP_NOVEH"         "EP_GROUPQ"       
##  [76] "MP_GROUPQ"        "EPL_POV"          "EPL_UNEMP"       
##  [79] "EPL_PCI"          "EPL_NOHSDP"       "SPL_THEME1"      
##  [82] "RPL_THEME1"       "EPL_AGE65"        "EPL_AGE17"       
##  [85] "EPL_DISABL"       "EPL_SNGPNT"       "SPL_THEME2"      
##  [88] "RPL_THEME2"       "EPL_MINRTY"       "EPL_LIMENG"      
##  [91] "SPL_THEME3"       "RPL_THEME3"       "EPL_MUNIT"       
##  [94] "EPL_MOBILE"       "EPL_CROWD"        "EPL_NOVEH"       
##  [97] "EPL_GROUPQ"       "SPL_THEME4"       "RPL_THEME4"      
## [100] "SPL_THEMES"       "RPL_THEMES"       "F_POV"           
## [103] "F_UNEMP"          "F_PCI"            "F_NOHSDP"        
## [106] "F_THEME1"         "F_AGE65"          "F_AGE17"         
## [109] "F_DISABL"         "F_SNGPNT"         "F_THEME2"        
## [112] "F_MINRTY"         "F_LIMENG"         "F_THEME3"        
## [115] "F_MUNIT"          "F_MOBILE"         "F_CROWD"         
## [118] "F_NOVEH"          "F_GROUPQ"         "F_THEME4"        
## [121] "F_TOTAL"          "E_UNINSUR"        "M_UNINSUR"       
## [124] "EP_UNINSUR"       "MP_UNINSUR"       "Shape"           
## [127] "Shape.STArea()"   "Shape.STLength()" "GeoLevel"        
## [130] "Comparison"
names(PR_SVL22)
##   [1] "ST"           "STATE"        "ST_ABBR"      "STCNTY"       "COUNTY"      
##   [6] "FIPS"         "LOCATION"     "AREA_SQMI"    "E_TOTPOP"     "M_TOTPOP"    
##  [11] "E_HU"         "M_HU"         "E_HH"         "M_HH"         "E_POV150"    
##  [16] "M_POV150"     "E_UNEMP"      "M_UNEMP"      "E_HBURD"      "M_HBURD"     
##  [21] "E_NOHSDP"     "M_NOHSDP"     "E_UNINSUR"    "M_UNINSUR"    "E_AGE65"     
##  [26] "M_AGE65"      "E_AGE17"      "M_AGE17"      "E_DISABL"     "M_DISABL"    
##  [31] "E_SNGPNT"     "M_SNGPNT"     "E_LIMENG"     "M_LIMENG"     "E_MINRTY"    
##  [36] "M_MINRTY"     "E_MUNIT"      "M_MUNIT"      "E_MOBILE"     "M_MOBILE"    
##  [41] "E_CROWD"      "M_CROWD"      "E_NOVEH"      "M_NOVEH"      "E_GROUPQ"    
##  [46] "M_GROUPQ"     "EP_POV150"    "MP_POV150"    "EP_UNEMP"     "MP_UNEMP"    
##  [51] "EP_HBURD"     "MP_HBURD"     "EP_NOHSDP"    "MP_NOHSDP"    "EP_UNINSUR"  
##  [56] "MP_UNINSUR"   "EP_AGE65"     "MP_AGE65"     "EP_AGE17"     "MP_AGE17"    
##  [61] "EP_DISABL"    "MP_DISABL"    "EP_SNGPNT"    "MP_SNGPNT"    "EP_LIMENG"   
##  [66] "MP_LIMENG"    "EP_MINRTY"    "MP_MINRTY"    "EP_MUNIT"     "MP_MUNIT"    
##  [71] "EP_MOBILE"    "MP_MOBILE"    "EP_CROWD"     "MP_CROWD"     "EP_NOVEH"    
##  [76] "MP_NOVEH"     "EP_GROUPQ"    "MP_GROUPQ"    "EPL_POV150"   "EPL_UNEMP"   
##  [81] "EPL_HBURD"    "EPL_NOHSDP"   "EPL_UNINSUR"  "SPL_THEME1"   "RPL_THEME1"  
##  [86] "EPL_AGE65"    "EPL_AGE17"    "EPL_DISABL"   "EPL_SNGPNT"   "EPL_LIMENG"  
##  [91] "SPL_THEME2"   "RPL_THEME2"   "EPL_MINRTY"   "SPL_THEME3"   "RPL_THEME3"  
##  [96] "EPL_MUNIT"    "EPL_MOBILE"   "EPL_CROWD"    "EPL_NOVEH"    "EPL_GROUPQ"  
## [101] "SPL_THEME4"   "RPL_THEME4"   "SPL_THEMES"   "RPL_THEMES"   "F_POV150"    
## [106] "F_UNEMP"      "F_HBURD"      "F_NOHSDP"     "F_UNINSUR"    "F_THEME1"    
## [111] "F_AGE65"      "F_AGE17"      "F_DISABL"     "F_SNGPNT"     "F_LIMENG"    
## [116] "F_THEME2"     "F_MINRTY"     "F_THEME3"     "F_MUNIT"      "F_MOBILE"    
## [121] "F_CROWD"      "F_NOVEH"      "F_GROUPQ"     "F_THEME4"     "F_TOTAL"     
## [126] "E_DAYPOP"     "E_NOINT"      "M_NOINT"      "E_AFAM"       "M_AFAM"      
## [131] "E_HISP"       "M_HISP"       "E_ASIAN"      "M_ASIAN"      "E_AIAN"      
## [136] "M_AIAN"       "E_NHPI"       "M_NHPI"       "E_TWOMORE"    "M_TWOMORE"   
## [141] "E_OTHERRACE"  "M_OTHERRACE"  "EP_NOINT"     "MP_NOINT"     "EP_AFAM"     
## [146] "MP_AFAM"      "EP_HISP"      "MP_HISP"      "EP_ASIAN"     "MP_ASIAN"    
## [151] "EP_AIAN"      "MP_AIAN"      "EP_NHPI"      "MP_NHPI"      "EP_TWOMORE"  
## [156] "MP_TWOMORE"   "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel"     "Comparison"
# Make sure both datasets have the key column
names(PR_SVL14)
##   [1] "AFFGEOID"         "TRACTCE"          "ST"              
##   [4] "STATE"            "ST_ABBR"          "STCNTY"          
##   [7] "COUNTY"           "FIPS"             "LOCATION"        
##  [10] "AREA_SQMI"        "E_TOTPOP"         "M_TOTPOP"        
##  [13] "E_HU"             "M_HU"             "E_HH"            
##  [16] "M_HH"             "E_POV"            "M_POV"           
##  [19] "E_UNEMP"          "M_UNEMP"          "E_PCI"           
##  [22] "M_PCI"            "E_NOHSDP"         "M_NOHSDP"        
##  [25] "E_AGE65"          "M_AGE65"          "E_AGE17"         
##  [28] "M_AGE17"          "E_DISABL"         "M_DISABL"        
##  [31] "E_SNGPNT"         "M_SNGPNT"         "E_MINRTY"        
##  [34] "M_MINRTY"         "E_LIMENG"         "M_LIMENG"        
##  [37] "E_MUNIT"          "M_MUNIT"          "E_MOBILE"        
##  [40] "M_MOBILE"         "E_CROWD"          "M_CROWD"         
##  [43] "E_NOVEH"          "M_NOVEH"          "E_GROUPQ"        
##  [46] "M_GROUPQ"         "EP_POV"           "MP_POV"          
##  [49] "EP_UNEMP"         "MP_UNEMP"         "EP_PCI"          
##  [52] "MP_PCI"           "EP_NOHSDP"        "MP_NOHSDP"       
##  [55] "EP_AGE65"         "MP_AGE65"         "EP_AGE17"        
##  [58] "MP_AGE17"         "EP_DISABL"        "MP_DISABL"       
##  [61] "EP_SNGPNT"        "MP_SNGPNT"        "EP_MINRTY"       
##  [64] "MP_MINRTY"        "EP_LIMENG"        "MP_LIMENG"       
##  [67] "EP_MUNIT"         "MP_MUNIT"         "EP_MOBILE"       
##  [70] "MP_MOBILE"        "EP_CROWD"         "MP_CROWD"        
##  [73] "EP_NOVEH"         "MP_NOVEH"         "EP_GROUPQ"       
##  [76] "MP_GROUPQ"        "EPL_POV"          "EPL_UNEMP"       
##  [79] "EPL_PCI"          "EPL_NOHSDP"       "SPL_THEME1"      
##  [82] "RPL_THEME1"       "EPL_AGE65"        "EPL_AGE17"       
##  [85] "EPL_DISABL"       "EPL_SNGPNT"       "SPL_THEME2"      
##  [88] "RPL_THEME2"       "EPL_MINRTY"       "EPL_LIMENG"      
##  [91] "SPL_THEME3"       "RPL_THEME3"       "EPL_MUNIT"       
##  [94] "EPL_MOBILE"       "EPL_CROWD"        "EPL_NOVEH"       
##  [97] "EPL_GROUPQ"       "SPL_THEME4"       "RPL_THEME4"      
## [100] "SPL_THEMES"       "RPL_THEMES"       "F_POV"           
## [103] "F_UNEMP"          "F_PCI"            "F_NOHSDP"        
## [106] "F_THEME1"         "F_AGE65"          "F_AGE17"         
## [109] "F_DISABL"         "F_SNGPNT"         "F_THEME2"        
## [112] "F_MINRTY"         "F_LIMENG"         "F_THEME3"        
## [115] "F_MUNIT"          "F_MOBILE"         "F_CROWD"         
## [118] "F_NOVEH"          "F_GROUPQ"         "F_THEME4"        
## [121] "F_TOTAL"          "E_UNINSUR"        "M_UNINSUR"       
## [124] "EP_UNINSUR"       "MP_UNINSUR"       "Shape"           
## [127] "Shape.STArea()"   "Shape.STLength()" "GeoLevel"        
## [130] "Comparison"
names(PR_SVL22)
##   [1] "ST"           "STATE"        "ST_ABBR"      "STCNTY"       "COUNTY"      
##   [6] "FIPS"         "LOCATION"     "AREA_SQMI"    "E_TOTPOP"     "M_TOTPOP"    
##  [11] "E_HU"         "M_HU"         "E_HH"         "M_HH"         "E_POV150"    
##  [16] "M_POV150"     "E_UNEMP"      "M_UNEMP"      "E_HBURD"      "M_HBURD"     
##  [21] "E_NOHSDP"     "M_NOHSDP"     "E_UNINSUR"    "M_UNINSUR"    "E_AGE65"     
##  [26] "M_AGE65"      "E_AGE17"      "M_AGE17"      "E_DISABL"     "M_DISABL"    
##  [31] "E_SNGPNT"     "M_SNGPNT"     "E_LIMENG"     "M_LIMENG"     "E_MINRTY"    
##  [36] "M_MINRTY"     "E_MUNIT"      "M_MUNIT"      "E_MOBILE"     "M_MOBILE"    
##  [41] "E_CROWD"      "M_CROWD"      "E_NOVEH"      "M_NOVEH"      "E_GROUPQ"    
##  [46] "M_GROUPQ"     "EP_POV150"    "MP_POV150"    "EP_UNEMP"     "MP_UNEMP"    
##  [51] "EP_HBURD"     "MP_HBURD"     "EP_NOHSDP"    "MP_NOHSDP"    "EP_UNINSUR"  
##  [56] "MP_UNINSUR"   "EP_AGE65"     "MP_AGE65"     "EP_AGE17"     "MP_AGE17"    
##  [61] "EP_DISABL"    "MP_DISABL"    "EP_SNGPNT"    "MP_SNGPNT"    "EP_LIMENG"   
##  [66] "MP_LIMENG"    "EP_MINRTY"    "MP_MINRTY"    "EP_MUNIT"     "MP_MUNIT"    
##  [71] "EP_MOBILE"    "MP_MOBILE"    "EP_CROWD"     "MP_CROWD"     "EP_NOVEH"    
##  [76] "MP_NOVEH"     "EP_GROUPQ"    "MP_GROUPQ"    "EPL_POV150"   "EPL_UNEMP"   
##  [81] "EPL_HBURD"    "EPL_NOHSDP"   "EPL_UNINSUR"  "SPL_THEME1"   "RPL_THEME1"  
##  [86] "EPL_AGE65"    "EPL_AGE17"    "EPL_DISABL"   "EPL_SNGPNT"   "EPL_LIMENG"  
##  [91] "SPL_THEME2"   "RPL_THEME2"   "EPL_MINRTY"   "SPL_THEME3"   "RPL_THEME3"  
##  [96] "EPL_MUNIT"    "EPL_MOBILE"   "EPL_CROWD"    "EPL_NOVEH"    "EPL_GROUPQ"  
## [101] "SPL_THEME4"   "RPL_THEME4"   "SPL_THEMES"   "RPL_THEMES"   "F_POV150"    
## [106] "F_UNEMP"      "F_HBURD"      "F_NOHSDP"     "F_UNINSUR"    "F_THEME1"    
## [111] "F_AGE65"      "F_AGE17"      "F_DISABL"     "F_SNGPNT"     "F_LIMENG"    
## [116] "F_THEME2"     "F_MINRTY"     "F_THEME3"     "F_MUNIT"      "F_MOBILE"    
## [121] "F_CROWD"      "F_NOVEH"      "F_GROUPQ"     "F_THEME4"     "F_TOTAL"     
## [126] "E_DAYPOP"     "E_NOINT"      "M_NOINT"      "E_AFAM"       "M_AFAM"      
## [131] "E_HISP"       "M_HISP"       "E_ASIAN"      "M_ASIAN"      "E_AIAN"      
## [136] "M_AIAN"       "E_NHPI"       "M_NHPI"       "E_TWOMORE"    "M_TWOMORE"   
## [141] "E_OTHERRACE"  "M_OTHERRACE"  "EP_NOINT"     "MP_NOINT"     "EP_AFAM"     
## [146] "MP_AFAM"      "EP_HISP"      "MP_HISP"      "EP_ASIAN"     "MP_ASIAN"    
## [151] "EP_AIAN"      "MP_AIAN"      "EP_NHPI"      "MP_NHPI"      "EP_TWOMORE"  
## [156] "MP_TWOMORE"   "EP_OTHERRACE" "MP_OTHERRACE" "GeoLevel"     "Comparison"
library(dplyr)

# Step 1. Make cleaned subset for 2014
PR_SVL14_subset <- PR_SVL14 %>%
  dplyr::select(
    STCNTY,          # county FIPS
    COUNTY,          # county name
    LOCATION,        # tract description
    FIPS,            # tract ID
    AREA_SQMI,       # area (sq mi)
    E_TOTPOP,        # total population
    RPL_THEME1,
    RPL_THEME2,
    RPL_THEME3,
    RPL_THEME4,
    RPL_THEMES
  ) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )

# Step 2. Make cleaned subset for 2022
PR_SVL22_subset <- PR_SVL22 %>%
  dplyr::select(
    STCNTY,
    COUNTY,
    LOCATION,
    FIPS,
    AREA_SQMI,
    E_TOTPOP,
    RPL_THEME1,
    RPL_THEME2,
    RPL_THEME3,
    RPL_THEME4,
    RPL_THEMES
  ) %>%
  rename(
    `Socioeconomic Status` = RPL_THEME1,
    `Household Characteristics` = RPL_THEME2,
    `Racial & Ethnic Minority Status` = RPL_THEME3,
    `Housing Type & Transportation` = RPL_THEME4,
    `Overall Summary Ranking` = RPL_THEMES
  )
# Step 3. Join 2014 + 2022 using FIPS (tract ID)

PR_SVL14_subset <- PR_SVL14_subset %>%
  slice(-1)   # Delete first row becuase it's not numeric (2022 is okay)
str(PR_SVL14_subset$`Overall Summary Ranking`)
##  chr [1:909] "0.8948" "0.5814" "0.6538" "0.8281" "0.8937" "0.5011" "0.4355" ...
comparison <- PR_SVL14_subset %>%
  mutate(FIPS = as.character(FIPS)) %>%              # FIPS 문자로
  rename(
    Overall_2014 = `Overall Summary Ranking`
  ) %>%
  left_join(
    PR_SVL22_subset %>%
      mutate(FIPS = as.character(FIPS)) %>%
      rename(
        Overall_2022 = `Overall Summary Ranking`
      ),
    by = "FIPS",
    suffix = c("_2014", "_2022")
  ) %>%
  mutate(
    # 여기서 진짜 숫자로 바꿔줌 (2014는 꼭 필요, 2022는 안전용)
    Overall_2014 = as.numeric(Overall_2014),
    Overall_2022 = as.numeric(Overall_2022),
    
    Change = Overall_2022 - Overall_2014,
    Change_Direction = case_when(
      Change > 0 ~ "Increased (worse / more vulnerable)",
      Change < 0 ~ "Decreased (better / less vulnerable)",
      TRUE       ~ "No change"
    )
  )


view(comparison)
table(comparison$Change_Direction)
## 
## Decreased (better / less vulnerable)  Increased (worse / more vulnerable) 
##                                  387                                  438 
##                            No change 
##                                   84
# ---- 1. FIPS 문자형 통일 + 헤더행 제거 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(FIPS = as.character(FIPS)) %>%
  filter(FIPS != "FIPS", !is.na(FIPS))

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(
    FIPS = format(FIPS, scientific = FALSE, trim = TRUE),
    FIPS = as.character(FIPS)
  ) %>%
  filter(FIPS != "FIPS", !is.na(FIPS))

# ---- 2. SVI 점수 numeric 변환 ----
svi_cols <- c(
  "Socioeconomic Status",
  "Household Characteristics",
  "Racial & Ethnic Minority Status",
  "Housing Type & Transportation",
  "Overall Summary Ranking"
)

PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(across(all_of(svi_cols), as.numeric))

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(across(all_of(svi_cols), as.numeric))

# ---- 3. 0~1 범위 밖 이상치 NA 처리 ----
PR_SVL14_subset <- PR_SVL14_subset %>%
  mutate(
    `Overall Summary Ranking` =
      ifelse(`Overall Summary Ranking` < 0 |
               `Overall Summary Ranking` > 1,
             NA, `Overall Summary Ranking`)
  )

PR_SVL22_subset <- PR_SVL22_subset %>%
  mutate(
    `Overall Summary Ranking` =
      ifelse(`Overall Summary Ranking` < 0 |
               `Overall Summary Ranking` > 1,
             NA, `Overall Summary Ranking`)
  )

# ---- 4. comparison 최종 버전 만들기 ----
comparison <- PR_SVL14_subset %>%
  rename(
    Overall_2014  = `Overall Summary Ranking`,
    COUNTY_2014   = COUNTY,
    LOCATION_2014 = LOCATION
  ) %>%
  left_join(
    PR_SVL22_subset %>%
      rename(
        Overall_2022  = `Overall Summary Ranking`,
        COUNTY_2022   = COUNTY,
        LOCATION_2022 = LOCATION
      ),
    by = "FIPS",
    suffix = c("_2014", "_2022")
  ) %>%
  filter(!is.na(Overall_2014), !is.na(Overall_2022)) %>%
  mutate(
    Change = Overall_2022 - Overall_2014,
    Change_Direction = case_when(
      Change > 0 ~ "Increased (worse / more vulnerable)",
      Change < 0 ~ "Decreased (better / less vulnerable)",
      TRUE       ~ "No change"
    )
  )

# ---- 5. 요약 / 히스토그램 ----
library(ggplot2)

summary(comparison$Change)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.69240 -0.12255  0.01160  0.01179  0.15470  0.78160
hist(comparison$Change, breaks = 40)

ggplot(comparison, aes(x = Change)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Change in Social Vulnerability (2022 - 2014)",
    x = "Change in Overall Summary Ranking (Δ vulnerability)",
    y = "Number of Census Tracts"
  ) +
  theme_minimal()

## Mapping

#Mapping

library(sf)
library(ggplot2)
library(dplyr)
library(tigris)

options(tigris_use_cache = TRUE)

# 1. Puerto Rico Tracts 불러오기
pr_tracts <- tracts("PR", year = 2022, cb = TRUE) %>%
  st_transform(32620) %>%             # UTM zone20N (Puerto Rico)
  mutate(FIPS = as.character(GEOID))

# 2. tract + comparison 조인
pr_change_sf <- pr_tracts %>%
  dplyr::left_join(
    comparison %>% dplyr::select(FIPS, Change, Change_Direction),
    by = "FIPS"
  )


# 3. Continuous Change Map
ggplot(pr_change_sf) +
  geom_sf(aes(fill = Change), color="grey40", size=0.1) +
  scale_fill_distiller(palette="RdBu", direction=-1, limits=c(-0.7,0.7)) +
  labs(
    title="Change in Social Vulnerability (2014→2022)",
    fill="Δ SVI"
  ) +
  theme_minimal()

## 3. d

#Scatter plot

library(sf)
library(dplyr)

library(sf)

coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg"
) %>%
  st_transform(32620)  # UTM zone 20N (Puerto Rico)
## Reading layer `pr_coastline' from data source 
##   `C:\Users\ddtmd\Desktop\2026 Spring\HotelDensity_PR_GooglePlaceAPI\data_processed\pr_coastline.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 16 features and 0 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -23817.68 ymin: 1980811 xmax: 265239.9 ymax: 2052181
## Projected CRS: WGS 84 / UTM zone 20N
#Creating Centroid
tract_centroids <- pr_change_sf %>% 
  st_centroid()

# centroid → coastline 거리 행렬 계산
dist_matrix <- st_distance(tract_centroids, coast_pr)

# 각 tract마다 "가장 가까운 해안까지의 거리"만 뽑기 (미터 단위)
tract_centroids$dist_to_coast_m <- apply(dist_matrix, 1, min)

# 요약 확인
summary(tract_centroids$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2
library(ggplot2)

# =========================================================
library(sf)
library(dplyr)
library(ggplot2)

# 0) FIPS 타입 통일
pr_tracts <- pr_tracts %>%
  mutate(FIPS = as.character(GEOID))

comparison2 <- comparison %>%
  dplyr::mutate(FIPS = as.character(FIPS)) %>%
  dplyr::select(FIPS, Overall_2014, Overall_2022, Change, Change_Direction)


# 1) join
analysis_sf <- pr_tracts %>%
  dplyr::left_join(comparison2, by = "FIPS")

# 2) (optional) NA 확인
summary(is.na(analysis_sf$Overall_2014))
##    Mode   FALSE    TRUE 
## logical     823     116
summary(is.na(analysis_sf$Overall_2022))
##    Mode   FALSE    TRUE 
## logical     823     116
summary(is.na(analysis_sf$Change))
##    Mode   FALSE    TRUE 
## logical     823     116
## 2 Distance to coast 다시붙이기 
sf::sf_use_s2(FALSE)

coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
  quiet = TRUE
) %>% st_transform(32620)

analysis_sf <- analysis_sf %>% st_transform(32620)

cent <- st_centroid(analysis_sf)
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))

summary(analysis_sf$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2
## 3. Level plot 2014 / 2022 / Change (3개)
df <- analysis_sf %>% st_drop_geometry() %>%
  filter(!is.na(dist_to_coast_m), !is.na(Overall_2014), !is.na(Overall_2022), !is.na(Change))

# (A) Level 2014
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2014)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "SVI Level (2014) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "SVI (Overall Summary Ranking, 2014)"
  ) +
  theme_minimal()

# (B) Level 2022
ggplot(df, aes(x = dist_to_coast_m, y = Overall_2022)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "SVI Level (2022) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "SVI (Overall Summary Ranking, 2022)"
  ) +
  theme_minimal()

# (C) Change
ggplot(df, aes(x = dist_to_coast_m, y = Change)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  labs(
    title = "ΔSVI (2022–2014) vs Distance to Coastline (PR)",
    x = "Distance to Coast (meters)",
    y = "Δ SVI (2022 - 2014)"
  ) +
  theme_minimal()

##4. 회귀선형 확인
m14  <- lm(Overall_2014 ~ dist_to_coast_m, data = df)
m22  <- lm(Overall_2022 ~ dist_to_coast_m, data = df)
mchg <- lm(Change       ~ dist_to_coast_m, data = df)

summary(m14)
## 
## Call:
## lm(formula = Overall_2014 ~ dist_to_coast_m, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5448 -0.2477 -0.0100  0.2393  0.5287 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.673e-01  1.537e-02  30.410  < 2e-16 ***
## dist_to_coast_m 4.210e-06  1.387e-06   3.035  0.00248 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2852 on 821 degrees of freedom
## Multiple R-squared:  0.0111, Adjusted R-squared:  0.009891 
## F-statistic: 9.211 on 1 and 821 DF,  p-value: 0.002481
summary(m22)
## 
## Call:
## lm(formula = Overall_2022 ~ dist_to_coast_m, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53016 -0.24258  0.01179  0.24680  0.49894 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.356e-01  1.536e-02  34.880   <2e-16 ***
## dist_to_coast_m -2.476e-06  1.386e-06  -1.787   0.0744 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.285 on 821 degrees of freedom
## Multiple R-squared:  0.003873,   Adjusted R-squared:  0.00266 
## F-statistic: 3.192 on 1 and 821 DF,  p-value: 0.07436
summary(mchg)
## 
## Call:
## lm(formula = Change ~ dist_to_coast_m, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71314 -0.13323  0.00029  0.14906  0.75157 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.827e-02  1.247e-02   5.477 5.75e-08 ***
## dist_to_coast_m -6.686e-06  1.125e-06  -5.942 4.15e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2314 on 821 degrees of freedom
## Multiple R-squared:  0.04124,    Adjusted R-squared:  0.04007 
## F-statistic: 35.31 on 1 and 821 DF,  p-value: 4.149e-09

4. import hotel data

# FINAL (Copy/Paste): PR tract-level dataset
#   - SVI Change (Change) 2014→2022  : from pr_change_sf
#   - Distance to coast (dist_to_coast_m)
#   - Hotel density (lodging_dens_km2) + Winsorize (p99)
#   - Regression: Change ~ hotel_density_w + dist_to_coast_m
# =========================================================

# 패키지 로드 + s2 끄기
library(sf)
library(dplyr)
library(readr)
library(ggplot2)

sf::sf_use_s2(FALSE)

# “pr_change_sf”가 이미 있는지 확인
# ---- 0) MUST have pr_change_sf (contains Change) ----
stopifnot(exists("pr_change_sf"))
# quick check
stopifnot("Change" %in% names(pr_change_sf))
stopifnot("FIPS" %in% names(pr_change_sf))

# ---- 1) Read hotel density ----
dir_hd <- "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI"

#호텔 밀도 데이터 읽고 필요한 컬럼만 정리
hotel_den_raw <- read_csv(
  file.path(dir_hd, "PR_tract_lodging_density.csv"),
  show_col_types = FALSE
)

hotel_den <- hotel_den_raw %>%
  transmute(
    FIPS = as.character(GEOID),
    hotel_density = as.numeric(lodging_dens_km2),   # lodging density per km2
    lodging_total = as.numeric(lodging_total),
    area_km2 = as.numeric(area_km2)
  )

#SVI 변화 데이터 + 호텔 밀도 붙이기 (조인)
# ---- 2) Join hotel density to pr_change_sf ----
analysis_sf <- pr_change_sf %>%
  mutate(FIPS = as.character(FIPS)) %>%
  left_join(hotel_den, by = "FIPS")

# sanity check
summary(analysis_sf$hotel_density)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.5379  0.0000 64.9653
sum(is.na(analysis_sf$hotel_density))  # should be 0
## [1] 0

5. Import distance to coastal from centroid

#해안까지 거리 만들기 (핵심 공간 계산)
# ---- 3) Distance to PR coastline (centroid -> min distance) ----
coast_pr <- st_read(
  "C:/Users/ddtmd/Desktop/2026 Spring/HotelDensity_PR_GooglePlaceAPI/data_processed/pr_coastline.gpkg",
  quiet = TRUE
) %>% st_transform(32620)

analysis_sf <- analysis_sf %>% st_transform(32620)

cent <- st_centroid(analysis_sf)
dmat <- st_distance(cent, coast_pr)
analysis_sf$dist_to_coast_m <- as.numeric(apply(dmat, 1, min))

summary(analysis_sf$dist_to_coast_m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    15.2  2501.0  5902.5  8164.3 11632.5 27400.2

6. Ready for anlysis

#호텔밀도 극단값 자르기 (winsorize p99)
# ---- 4) Winsorize hotel density at 99th percentile (no log) ----
p99 <- quantile(analysis_sf$hotel_density, 0.99, na.rm = TRUE)
analysis_sf <- analysis_sf %>%
  mutate(hotel_density_w = pmin(hotel_density, p99))

quantile(analysis_sf$hotel_density,   c(.95, .99), na.rm = TRUE)
##       95%       99% 
##  1.137027 14.438188
quantile(analysis_sf$hotel_density_w, c(.95, .99), na.rm = TRUE)
##       95%       99% 
##  1.137027 13.922990
#회귀용 데이터프레임 만들기 (geometry 제거 + NA 제거)
# ---- 5) Regression-ready df (drop geometry + remove NA) ----
df_reg <- analysis_sf %>%
  st_drop_geometry() %>%
  filter(
    !is.na(Change),
    !is.na(hotel_density_w),
    !is.na(dist_to_coast_m)
  )

# ---- 6) Main regression ----
m <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
summary(m)
## 
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m, data = df_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71171 -0.13453 -0.00019  0.14989  0.75261 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.665e-02  1.292e-02   5.157 3.14e-07 ***
## hotel_density_w  2.228e-03  4.662e-03   0.478    0.633    
## dist_to_coast_m -6.585e-06  1.145e-06  -5.749 1.26e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2315 on 820 degrees of freedom
## Multiple R-squared:  0.0415, Adjusted R-squared:  0.03917 
## F-statistic: 17.75 on 2 and 820 DF,  p-value: 2.831e-08
# ---- 7) (Optional) Quick scatter checks ----
ggplot(df_reg, aes(x = hotel_density_w, y = Change)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(title = "ΔSVI vs Hotel density (winsorized p99)", x = "Hotel density (per km2, winsorized)", y = "ΔSVI (2022-2014)")

ggplot(df_reg, aes(x = dist_to_coast_m, y = Change)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal() +
  labs(title = "ΔSVI vs Distance to coast", x = "Distance to coast (m)", y = "ΔSVI (2022-2014)")

#호텔밀도 → 해안거리
m_b <- lm(dist_to_coast_m ~ hotel_density_w, data = df_reg)
summary(m_b)
## 
## Call:
## lm(formula = dist_to_coast_m ~ hotel_density_w, data = df_reg)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8691  -5546  -2268   3621  18694 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       8706.4      250.5  34.754  < 2e-16 ***
## hotel_density_w   -751.0      139.6  -5.379 9.76e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7053 on 821 degrees of freedom
## Multiple R-squared:  0.03405,    Adjusted R-squared:  0.03287 
## F-statistic: 28.94 on 1 and 821 DF,  p-value: 9.759e-08
#호텔밀도만 넣은 모델
m_c <- lm(Change ~ hotel_density_w, data = df_reg)
summary(m_c)
## 
## Call:
## lm(formula = Change ~ hotel_density_w, data = df_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70172 -0.13736 -0.00062  0.14355  0.77228 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)     0.009320   0.008380   1.112    0.266
## hotel_density_w 0.007173   0.004670   1.536    0.125
## 
## Residual standard error: 0.2359 on 821 degrees of freedom
## Multiple R-squared:  0.002865,   Adjusted R-squared:  0.001651 
## F-statistic: 2.359 on 1 and 821 DF,  p-value: 0.1249

7. Test

#mediation test
# =========================================================
# Mediation (package: mediation)
# X = hotel_density_w
# M = dist_to_coast_m
# Y = Change (ΔSVI 2022-2014)
# =========================================================

# 0) packages
install_if_missing <- function(pkgs){
  to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
  if(length(to_install) > 0) install.packages(to_install)
}
install_if_missing(c("mediation", "dplyr"))

library(mediation)
library(dplyr)

# 1) make sure df_reg exists and has needed columns
stopifnot(exists("df_reg"))
stopifnot(all(c("Change","hotel_density_w","dist_to_coast_m") %in% names(df_reg)))

df_med <- df_reg %>%
  filter(
    !is.na(Change),
    !is.na(hotel_density_w),
    !is.na(dist_to_coast_m)
  )

# 2) mediator model (M ~ X)
med.fit <- lm(dist_to_coast_m ~ hotel_density_w, data = df_med)

# 3) outcome model (Y ~ X + M)
out.fit <- lm(Change ~ hotel_density_w + dist_to_coast_m, data = df_med)

# 4) run mediation
# sims=2000 is usually fine; increase to 5000+ for final reporting
set.seed(123)
med.out <- mediate(
  model.m = med.fit,
  model.y = out.fit,
  treat   = "hotel_density_w",
  mediator= "dist_to_coast_m",
  sims    = 2000,
  boot    = TRUE   # robust to non-normality
)

summary(med.out)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                   Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            0.00494542   0.00325845   0.00701564  <2e-16 ***
## ADE             0.00222778  -0.00493922   0.00951248   0.543    
## Total Effect    0.00717320   0.00028132   0.01459750   0.039 *  
## Prop. Mediated  0.68943029   0.26832347   4.62386151   0.039 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 823 
## 
## 
## Simulations: 2000
# 5) optional: plot ACME/ADE
plot(med.out)

Controal Variables

#Contral Variables

# ---- packages ----
install_if_missing <- function(pkgs){
  to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
  if(length(to_install) > 0) install.packages(to_install)
}
install_if_missing(c("tidycensus","tigris","sf","dplyr","stringr","jsonlite"))

library(tidycensus)
library(tigris)
library(sf)
library(dplyr)
library(stringr)
library(jsonlite)

options(tigris_use_cache = TRUE)
sf::sf_use_s2(FALSE)

# (권장) API key 한 번만 세팅
# census_api_key("YOUR_KEY", install = TRUE, overwrite = TRUE)
# readRenviron("~/.Renviron")


#1. DP03 프로필(산업 % 비중)만 “직접 API”로 가져오는 함수
#tidycensus가 DP profile에서 404 나는 경우가 있어서, DP03만 direct API로 안정적으로 가져옴.

get_pr_dp03_profile_pct <- function(year_end){
  
  # 우리가 원하는 DP03 "Percent" 변수 목록 (P)
  vars_dp03P <- c(
    pct_construction   = "DP03_0034P",
    pct_manufacturing  = "DP03_0035P",
    pct_wholesale      = "DP03_0036P",
    pct_retail         = "DP03_0037P",
    pct_transport      = "DP03_0038P",
    pct_information    = "DP03_0039P",
    pct_finance_re     = "DP03_0040P",
    pct_prof_sci_mgmt  = "DP03_0041P",
    pct_edu_health     = "DP03_0042P",
    pct_tourism_proxy  = "DP03_0043P",  # Arts/Ent/Rec/Accom/Food
    pct_other_services = "DP03_0044P",
    pct_public_admin   = "DP03_0045P"
  )
  
  # Census API에서는 보통 Percent estimate가 "...PE" 형태로 옴
  api_vars <- paste0(unname(vars_dp03P), "E")   # ex: DP03_0043PE
  names(api_vars) <- names(vars_dp03P)
  
  base <- sprintf("https://api.census.gov/data/%s/acs/acs5/profile", year_end)
  get_str <- paste(c("NAME", api_vars), collapse = ",")
  
  url <- paste0(
    base,
    "?get=", URLencode(get_str, reserved = TRUE),
    "&for=tract:*",
    "&in=state:72"
  )
  
  raw <- jsonlite::fromJSON(url)
  df <- as.data.frame(raw[-1, ], stringsAsFactors = FALSE)
  names(df) <- raw[1, ]
  
  out <- df %>%
    dplyr::transmute(
      FIPS = paste0(state, county, tract),
      dplyr::across(dplyr::all_of(api_vars), ~ suppressWarnings(as.numeric(.x)) / 100)
    )
  
  # pct_* 로 rename (자동 매핑)
  rename_map <- setNames(names(vars_dp03P), api_vars)
  out <- out %>% dplyr::rename(!!!rename_map)
  
  out
}

#2. PR tract controls 한 번에 가져오기 (DP03 + basic + housing + area)
pull_pr_controls <- function(year_end){
  
  # 1) DP03 (percent industry shares)
  dp03 <- get_pr_dp03_profile_pct(year_end)
  
  # 2) basic SES
  vars_basic <- c(
    total_pop   = "B01003_001",
    med_income  = "B19013_001",
    pov_total   = "B17001_001",
    pov_below   = "B17001_002"
  )
  
  basic <- tidycensus::get_acs(
    geography = "tract",
    variables = vars_basic,
    state = "PR",
    year = year_end,
    survey = "acs5",
    output = "wide"
  ) %>%
    dplyr::transmute(
      FIPS = GEOID,
      total_pop  = total_popE,
      med_income = med_incomeE,
      pov_rate   = dplyr::if_else(pov_totalE > 0, pov_belowE / pov_totalE, NA_real_)
    )
  
  # 3) housing pressure
  vars_housing <- c(
    tenure_total   = "B25003_001",
    renters        = "B25003_003",
    med_gross_rent = "B25064_001",
    rb_30_34 = "B25070_008",
    rb_35_39 = "B25070_009",
    rb_40_49 = "B25070_010",
    rb_50p   = "B25070_011",
    rb_total = "B25070_001"
  )
  
  housing <- tidycensus::get_acs(
    geography = "tract",
    variables = vars_housing,
    state = "PR",
    year = year_end,
    survey = "acs5",
    output = "wide"
  ) %>%
    dplyr::transmute(
      FIPS = GEOID,
      renter_share = dplyr::if_else(tenure_totalE > 0, rentersE / tenure_totalE, NA_real_),
      med_gross_rent = med_gross_rentE,
      rent_burden_30p = dplyr::if_else(
        rb_totalE > 0,
        (rb_30_34E + rb_35_39E + rb_40_49E + rb_50pE) / rb_totalE,
        NA_real_
      )
    )
  
  # 4) area for pop density (고정 2022 CB 추천)
  tr <- tigris::tracts("PR", year = 2022, cb = TRUE) %>%
    sf::st_transform(32620) %>%
    dplyr::mutate(
      FIPS = GEOID,
      area_km2 = as.numeric(sf::st_area(geometry)) / 1e6
    ) %>%
    sf::st_drop_geometry() %>%
    dplyr::select(FIPS, area_km2)
  
  out <- dp03 %>%
    dplyr::left_join(basic,  by = "FIPS") %>%
    dplyr::left_join(housing, by = "FIPS") %>%
    dplyr::left_join(tr,     by = "FIPS") %>%
    dplyr::mutate(
      pop_density_km2 = dplyr::if_else(area_km2 > 0, total_pop / area_km2, NA_real_),
      year_end = year_end
    )
  
  out
}

#3. 
acs14 <- pull_pr_controls(2014) %>% dplyr::rename_with(~ paste0(.x, "_2014"), -FIPS)
acs22 <- pull_pr_controls(2022) %>% dplyr::rename_with(~ paste0(.x, "_2022"), -FIPS)

# 확인
names(acs14)[grepl("^pct_", names(acs14))]
## character(0)
names(acs14)[grepl("tourism", names(acs14))]
## character(0)
head(acs14)
##          FIPS DP03_0034PE_2014 DP03_0035PE_2014 DP03_0036PE_2014
## 1 72029100200            0.077            0.133            0.021
## 2 72029100400            0.109            0.036            0.052
## 3 72029100800            0.061            0.016            0.025
## 4 72029100700            0.168            0.125            0.020
## 5 72029100101            0.039            0.048            0.033
## 6 72029100502            0.019            0.095            0.013
##   DP03_0037PE_2014 DP03_0038PE_2014 DP03_0039PE_2014 DP03_0040PE_2014
## 1            0.086            0.031            0.000            0.075
## 2            0.126            0.037            0.008            0.012
## 3            0.123            0.057            0.000            0.055
## 4            0.069            0.061            0.000            0.047
## 5            0.115            0.073            0.024            0.040
## 6            0.155            0.023            0.000            0.074
##   DP03_0041PE_2014 DP03_0042PE_2014 DP03_0043PE_2014 DP03_0044PE_2014
## 1            0.093            0.245            0.115            0.037
## 2            0.077            0.263            0.127            0.035
## 3            0.098            0.370            0.073            0.016
## 4            0.075            0.224            0.046            0.046
## 5            0.117            0.208            0.137            0.035
## 6            0.160            0.170            0.145            0.019
##   DP03_0045PE_2014 total_pop_2014 med_income_2014 pov_rate_2014
## 1            0.087           2309           16724     0.5184062
## 2            0.116           7138           19624     0.5057651
## 3            0.020           1539           14828     0.4938272
## 4            0.113           4515           16868     0.3936642
## 5            0.114           4250           15657     0.5590663
## 6            0.126           3720           26250     0.2814516
##   renter_share_2014 med_gross_rent_2014 rent_burden_30p_2014 area_km2_2014
## 1         0.3803191                 405            0.5734266     3.2793940
## 2         0.2465294                 759            0.9223301     3.8475950
## 3         0.2673611                 463            0.8831169    12.1343082
## 4         0.2773674                 415            0.7578692    19.2464521
## 5         0.3036837                 456            0.8076923     4.8014769
## 6         0.2200913                 672            0.6597510     0.7108695
##   pop_density_km2_2014 year_end_2014
## 1             704.0935          2014
## 2            1855.1848          2014
## 3             126.8305          2014
## 4             234.5887          2014
## 5             885.1443          2014
## 6            5233.0279          2014
#지금 acs14에서 DP03 컬럼을 “pct_”로 다시 rename
#너가 이미 뽑아놓은 acs14, acs22에 바로 적용해.
acs14 <- acs14 %>%
  dplyr::rename(
    pct_construction_2014   = DP03_0034PE_2014,
    pct_manufacturing_2014  = DP03_0035PE_2014,
    pct_wholesale_2014      = DP03_0036PE_2014,
    pct_retail_2014         = DP03_0037PE_2014,
    pct_transport_2014      = DP03_0038PE_2014,
    pct_information_2014    = DP03_0039PE_2014,
    pct_finance_re_2014     = DP03_0040PE_2014,
    pct_prof_sci_mgmt_2014  = DP03_0041PE_2014,
    pct_edu_health_2014     = DP03_0042PE_2014,
    pct_tourism_proxy_2014  = DP03_0043PE_2014,
    pct_other_services_2014 = DP03_0044PE_2014,
    pct_public_admin_2014   = DP03_0045PE_2014
  )

acs22 <- acs22 %>%
  dplyr::rename(
    pct_construction_2022   = DP03_0034PE_2022,
    pct_manufacturing_2022  = DP03_0035PE_2022,
    pct_wholesale_2022      = DP03_0036PE_2022,
    pct_retail_2022         = DP03_0037PE_2022,
    pct_transport_2022      = DP03_0038PE_2022,
    pct_information_2022    = DP03_0039PE_2022,
    pct_finance_re_2022     = DP03_0040PE_2022,
    pct_prof_sci_mgmt_2022  = DP03_0041PE_2022,
    pct_edu_health_2022     = DP03_0042PE_2022,
    pct_tourism_proxy_2022  = DP03_0043PE_2022,
    pct_other_services_2022 = DP03_0044PE_2022,
    pct_public_admin_2022   = DP03_0045PE_2022
  )

# 확인
names(acs14)[grepl("^pct_", names(acs14))]
##  [1] "pct_construction_2014"   "pct_manufacturing_2014" 
##  [3] "pct_wholesale_2014"      "pct_retail_2014"        
##  [5] "pct_transport_2014"      "pct_information_2014"   
##  [7] "pct_finance_re_2014"     "pct_prof_sci_mgmt_2014" 
##  [9] "pct_edu_health_2014"     "pct_tourism_proxy_2014" 
## [11] "pct_other_services_2014" "pct_public_admin_2014"
names(acs14)[grepl("tourism", names(acs14))]
## [1] "pct_tourism_proxy_2014"
#############################################################################

#1) Join: df_reg에 acs14 + acs22 붙이기

df_full <- df_reg %>%
dplyr::mutate(FIPS = as.character(FIPS)) %>%
  dplyr::left_join(acs14, by = "FIPS") %>%
  dplyr::left_join(acs22, by = "FIPS")

# join 잘 됐는지 체크
dplyr::glimpse(df_full)
## Rows: 823
## Columns: 63
## $ STATEFP                 <chr> "72", "72", "72", "72", "72", "72", "72", "72"…
## $ COUNTYFP                <chr> "005", "025", "025", "025", "031", "031", "043…
## $ TRACTCE                 <chr> "400900", "201000", "201500", "201300", "05090…
## $ AFFGEOID                <chr> "1400000US72005400900", "1400000US72025201000"…
## $ GEOID                   <chr> "72005400900", "72025201000", "72025201500", "…
## $ NAME                    <chr> "4009", "2010", "2015", "2013", "509.02", "508…
## $ NAMELSAD                <chr> "Census Tract 4009", "Census Tract 2010", "Cen…
## $ STUSPS                  <chr> "PR", "PR", "PR", "PR", "PR", "PR", "PR", "PR"…
## $ NAMELSADCO              <chr> "Aguadilla Municipio", "Caguas Municipio", "Ca…
## $ STATE_NAME              <chr> "Puerto Rico", "Puerto Rico", "Puerto Rico", "…
## $ LSAD                    <chr> "CT", "CT", "CT", "CT", "CT", "CT", "CT", "CT"…
## $ ALAND                   <dbl> 1120757, 1183020, 588614, 759722, 3693709, 106…
## $ AWATER                  <dbl> 0, 0, 0, 0, 111446, 182694, 0, 498516, 0, 0, 0…
## $ FIPS                    <chr> "72005400900", "72025201000", "72025201500", "…
## $ Change                  <dbl> 0.0703, 0.0201, 0.1968, 0.2645, 0.0553, 0.1219…
## $ Change_Direction        <chr> "Increased (worse / more vulnerable)", "Increa…
## $ hotel_density           <dbl> 0.8795381, 0.0000000, 0.0000000, 0.0000000, 0.…
## $ lodging_total           <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ area_km2                <dbl> 1.1369604, 1.2036730, 0.5953259, 0.7520502, 3.…
## $ dist_to_coast_m         <dbl> 593.2190, 22745.5561, 23769.3409, 22759.9076, …
## $ hotel_density_w         <dbl> 0.8795381, 0.0000000, 0.0000000, 0.0000000, 0.…
## $ pct_construction_2014   <dbl> 0.138, 0.000, 0.014, 0.124, 0.075, 0.022, 0.02…
## $ pct_manufacturing_2014  <dbl> 0.140, 0.112, 0.089, 0.102, 0.022, 0.032, 0.17…
## $ pct_wholesale_2014      <dbl> 0.067, 0.030, 0.037, 0.041, 0.000, 0.041, 0.01…
## $ pct_retail_2014         <dbl> 0.190, 0.250, 0.202, 0.211, 0.096, 0.184, 0.13…
## $ pct_transport_2014      <dbl> 0.015, 0.000, 0.000, 0.000, 0.071, 0.071, 0.01…
## $ pct_information_2014    <dbl> 0.031, 0.000, 0.014, 0.000, 0.023, 0.032, 0.00…
## $ pct_finance_re_2014     <dbl> 0.000, 0.043, 0.085, 0.056, 0.083, 0.079, 0.03…
## $ pct_prof_sci_mgmt_2014  <dbl> 0.079, 0.051, 0.102, 0.076, 0.090, 0.104, 0.05…
## $ pct_edu_health_2014     <dbl> 0.090, 0.100, 0.220, 0.199, 0.217, 0.197, 0.28…
## $ pct_tourism_proxy_2014  <dbl> 0.155, 0.222, 0.072, 0.056, 0.144, 0.121, 0.09…
## $ pct_other_services_2014 <dbl> 0.000, 0.120, 0.028, 0.069, 0.015, 0.021, 0.03…
## $ pct_public_admin_2014   <dbl> 0.073, 0.072, 0.128, 0.067, 0.164, 0.097, 0.06…
## $ total_pop_2014          <dbl> 2349, 2230, 2931, 2869, 5040, 4451, 2314, 2524…
## $ med_income_2014         <dbl> 10079, 11869, 23865, 25313, 29205, 29427, 1519…
## $ pov_rate_2014           <dbl> 0.74201788, 0.66995516, 0.49451679, 0.33321678…
## $ renter_share_2014       <dbl> 0.55845630, 0.85101580, 0.40265907, 0.34714004…
## $ med_gross_rent_2014     <dbl> 199, 482, 557, 625, 537, 532, 481, 248, 1220, …
## $ rent_burden_30p_2014    <dbl> 0.7052846, 0.6618037, 0.7877358, 0.6164773, 0.…
## $ area_km2_2014           <dbl> 1.1411410, 1.2098756, 0.5938731, 0.7567413, 3.…
## $ pop_density_km2_2014    <dbl> 2058.4661, 1843.1646, 4935.3982, 3791.2558, 13…
## $ year_end_2014           <dbl> 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014…
## $ pct_construction_2022   <dbl> 0.000, 0.040, 0.044, 0.000, 0.054, 0.029, 0.11…
## $ pct_manufacturing_2022  <dbl> 0.121, 0.075, 0.031, 0.042, 0.049, 0.038, 0.10…
## $ pct_wholesale_2022      <dbl> 0.048, 0.085, 0.064, 0.025, 0.026, 0.007, 0.00…
## $ pct_retail_2022         <dbl> 0.075, 0.197, 0.260, 0.027, 0.095, 0.165, 0.02…
## $ pct_transport_2022      <dbl> 0.000, 0.000, 0.067, 0.053, 0.092, 0.060, 0.01…
## $ pct_information_2022    <dbl> 0.000, 0.000, 0.000, 0.025, 0.026, 0.030, 0.00…
## $ pct_finance_re_2022     <dbl> 0.000, 0.030, 0.014, 0.028, 0.069, 0.091, 0.04…
## $ pct_prof_sci_mgmt_2022  <dbl> 0.293, 0.125, 0.084, 0.142, 0.047, 0.053, 0.08…
## $ pct_edu_health_2022     <dbl> 0.223, 0.218, 0.192, 0.344, 0.251, 0.279, 0.14…
## $ pct_tourism_proxy_2022  <dbl> 0.203, 0.128, 0.083, 0.167, 0.145, 0.110, 0.12…
## $ pct_other_services_2022 <dbl> 0.000, 0.101, 0.072, 0.037, 0.072, 0.025, 0.10…
## $ pct_public_admin_2022   <dbl> 0.036, 0.000, 0.089, 0.089, 0.065, 0.108, 0.16…
## $ total_pop_2022          <dbl> 2522, 1639, 2109, 2640, 6101, 4497, 1822, 1784…
## $ med_income_2022         <dbl> 10263, 13447, 26016, 24779, 36454, 37754, 2072…
## $ pov_rate_2022           <dbl> 0.62410785, 0.66076876, 0.39731286, 0.40660592…
## $ renter_share_2022       <dbl> 0.5263158, 0.7765727, 0.3737673, 0.3785366, 0.…
## $ med_gross_rent_2022     <dbl> 220, 431, 452, 532, 827, 513, 501, 237, 2078, …
## $ rent_burden_30p_2022    <dbl> 0.7607143, 0.6117318, 0.7810026, 0.6134021, 0.…
## $ area_km2_2022           <dbl> 1.1411410, 1.2098756, 0.5938731, 0.7567413, 3.…
## $ pop_density_km2_2022    <dbl> 2210.0687, 1354.6847, 3551.2640, 3488.6425, 16…
## $ year_end_2022           <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022…
summary(is.na(df_full$pct_tourism_proxy_2014))
##    Mode   FALSE 
## logical     823
summary(is.na(df_full$pct_tourism_proxy_2022))
##    Mode   FALSE 
## logical     823
## 컨트롤 포함 회귀 (baseline 2014 컨트롤)
m_full <- lm(
  Change ~
    hotel_density_w +
    dist_to_coast_m +
    pop_density_km2_2014 +
    med_income_2014 +
    pov_rate_2014 +
    renter_share_2014 +
    med_gross_rent_2014 +
    rent_burden_30p_2014 +
    pct_tourism_proxy_2014 +
    pct_manufacturing_2014 +
    pct_construction_2014 +
    pct_public_admin_2014,
  data = df_full
)

summary(m_full)
## 
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m + pop_density_km2_2014 + 
##     med_income_2014 + pov_rate_2014 + renter_share_2014 + med_gross_rent_2014 + 
##     rent_burden_30p_2014 + pct_tourism_proxy_2014 + pct_manufacturing_2014 + 
##     pct_construction_2014 + pct_public_admin_2014, data = df_full)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7143 -0.1327 -0.0034  0.1326  0.6720 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.456e-01  9.637e-02   5.662 2.09e-08 ***
## hotel_density_w        -8.311e-03  4.530e-03  -1.835 0.066923 .  
## dist_to_coast_m        -4.360e-06  1.125e-06  -3.874 0.000116 ***
## pop_density_km2_2014    9.988e-06  4.391e-06   2.275 0.023188 *  
## med_income_2014        -6.530e-06  1.686e-06  -3.874 0.000116 ***
## pov_rate_2014          -9.185e-01  1.085e-01  -8.468  < 2e-16 ***
## renter_share_2014       2.678e-01  7.713e-02   3.472 0.000544 ***
## med_gross_rent_2014     8.270e-06  5.710e-05   0.145 0.884877    
## rent_burden_30p_2014   -8.790e-02  7.089e-02  -1.240 0.215379    
## pct_tourism_proxy_2014  1.087e-01  1.651e-01   0.659 0.510288    
## pct_manufacturing_2014  1.007e-01  1.462e-01   0.689 0.491119    
## pct_construction_2014   1.423e-01  1.952e-01   0.729 0.466252    
## pct_public_admin_2014  -8.819e-02  1.731e-01  -0.509 0.610584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2118 on 798 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.2014, Adjusted R-squared:  0.1894 
## F-statistic: 16.77 on 12 and 798 DF,  p-value: < 2.2e-16
# 0) baseline SVI table 준비 (FIPS + Overall_2014 only)
baseline_svi <- comparison %>%
  transmute(
    FIPS = as.character(FIPS),
    Overall_2014 = as.numeric(Overall_2014)
  )

# 1) df_full에 "baseline 2014 Overall SVI" Added! ***********
df_full <- df_full %>%
  mutate(FIPS = as.character(FIPS)) %>%
  left_join(baseline_svi, by = "FIPS")

# 2) 확인
"Overall_2014" %in% names(df_full)
## [1] TRUE
summary(df_full$Overall_2014)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0011  0.2574  0.5000  0.5029  0.7494  1.0000
m_full2 <- lm(
  Change ~
    hotel_density_w + dist_to_coast_m +
    Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = df_full
)
summary(m_full2)
## 
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 + 
##     pop_density_km2_2014 + med_income_2014 + pov_rate_2014 + 
##     renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 + 
##     pct_tourism_proxy_2014 + pct_manufacturing_2014 + pct_construction_2014 + 
##     pct_public_admin_2014, data = df_full)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64279 -0.12606 -0.00341  0.12945  0.57072 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.077e-01  8.927e-02   5.687 1.81e-08 ***
## hotel_density_w        -1.135e-02  4.202e-03  -2.701  0.00705 ** 
## dist_to_coast_m        -2.937e-06  1.049e-06  -2.799  0.00525 ** 
## Overall_2014           -5.209e-01  4.498e-02 -11.581  < 2e-16 ***
## pop_density_km2_2014    1.252e-05  4.071e-06   3.076  0.00217 ** 
## med_income_2014        -7.266e-06  1.562e-06  -4.652 3.84e-06 ***
## pov_rate_2014          -2.245e-01  1.169e-01  -1.920  0.05524 .  
## renter_share_2014       3.029e-01  7.147e-02   4.238 2.52e-05 ***
## med_gross_rent_2014    -2.882e-05  5.295e-05  -0.544  0.58649    
## rent_burden_30p_2014   -1.108e-01  6.566e-02  -1.687  0.09190 .  
## pct_tourism_proxy_2014  1.006e-01  1.528e-01   0.658  0.51047    
## pct_manufacturing_2014 -1.329e-02  1.357e-01  -0.098  0.92203    
## pct_construction_2014   3.130e-01  1.813e-01   1.727  0.08464 .  
## pct_public_admin_2014  -9.221e-02  1.603e-01  -0.575  0.56522    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1961 on 797 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.3164, Adjusted R-squared:  0.3053 
## F-statistic: 28.38 on 13 and 797 DF,  p-value: < 2.2e-16
##m_full2 : The regression results indicate that, after controlling for baseline vulnerability and socioeconomic characteristics, hotel density is negatively associated with changes in social vulnerability (β = −0.011, p = 0.007), suggesting that areas with higher hotel density tend to experience smaller increases—or slight decreases—in SVI over time. Coastal proximity is also significantly related to vulnerability change (β = −2.94e−06, p = 0.005), indicating that tracts closer to the coastline tend to experience greater increases in vulnerability. Baseline vulnerability (SVI 2014) shows a strong negative association with change (β = −0.521, p < 0.001), suggesting convergence effects where initially more vulnerable areas tend to experience decreases in SVI over time.

##baseline 포함한 mediation (권장 코드)
library(mediation)
set.seed(123)

df_med2 <- df_full %>%
  dplyr::filter(
    !is.na(Change),
    !is.na(hotel_density_w),
    !is.na(dist_to_coast_m),
    !is.na(Overall_2014)
  )

med.fit2 <- lm(dist_to_coast_m ~ hotel_density_w + Overall_2014, data = df_med2)

out.fit2 <- lm(Change ~ hotel_density_w + dist_to_coast_m + Overall_2014, data = df_med2)

med.out2 <- mediate(
  model.m = med.fit2,
  model.y = out.fit2,
  treat = "hotel_density_w",
  mediator = "dist_to_coast_m",
  sims = 2000,
  boot = TRUE
)

summary(med.out2)
## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                   Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             0.0038582    0.0023442    0.0056569  <2e-16 ***
## ADE             -0.0016882   -0.0086455    0.0059217   0.635    
## Total Effect     0.0021699   -0.0044478    0.0096945   0.562    
## Prop. Mediated   1.7780096  -16.5195449   13.5621255   0.562    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 823 
## 
## 
## Simulations: 2000

#Final result: Mediation analysis further shows a significant indirect effect of hotel density on vulnerability change through coastal proximity (ACME = 0.00386, p < 0.001). However, the direct effect (ADE) and the total effect are not statistically significant, indicating that hotel density does not have a significant overall effect on vulnerability change but may operate indirectly through spatial coastal dynamics.

#3.13

# analysis_sf 또는 pr_change_sf 사용
# 여기서는 df_full + geometry 있는 sf object 필요
library(sf)
library(spdep)
library(spatialreg)
library(dplyr)

# 1) analysis_sf + df_full join
gwr_sf <- analysis_sf %>%
  dplyr::select(FIPS, geometry) %>%
  left_join(
    df_full %>%
      dplyr::select(
        FIPS, Change, hotel_density_w, dist_to_coast_m,
        Overall_2014, pop_density_km2_2014, med_income_2014,
        pov_rate_2014, renter_share_2014, med_gross_rent_2014,
        rent_burden_30p_2014, pct_tourism_proxy_2014,
        pct_manufacturing_2014, pct_construction_2014,
        pct_public_admin_2014
      ),
    by = "FIPS"
  )

# 2) OLS에 들어갈 변수들 기준으로 complete cases만 남김
vars_use <- c(
  "Change", "hotel_density_w", "dist_to_coast_m", "Overall_2014",
  "pop_density_km2_2014", "med_income_2014", "pov_rate_2014",
  "renter_share_2014", "med_gross_rent_2014", "rent_burden_30p_2014",
  "pct_tourism_proxy_2014", "pct_manufacturing_2014",
  "pct_construction_2014", "pct_public_admin_2014"
)

gwr_sf_cc <- gwr_sf %>%
  filter(complete.cases(across(all_of(vars_use))))

nrow(gwr_sf)      # 원래
## [1] 939
nrow(gwr_sf_cc)   # 분석용
## [1] 811
#
# 3) OLS
ols_mod <- lm(
  Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = gwr_sf_cc
)

summary(ols_mod)
## 
## Call:
## lm(formula = Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 + 
##     pop_density_km2_2014 + med_income_2014 + pov_rate_2014 + 
##     renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 + 
##     pct_tourism_proxy_2014 + pct_manufacturing_2014 + pct_construction_2014 + 
##     pct_public_admin_2014, data = gwr_sf_cc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64279 -0.12606 -0.00341  0.12945  0.57072 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.077e-01  8.927e-02   5.687 1.81e-08 ***
## hotel_density_w        -1.135e-02  4.202e-03  -2.701  0.00705 ** 
## dist_to_coast_m        -2.937e-06  1.049e-06  -2.799  0.00525 ** 
## Overall_2014           -5.209e-01  4.498e-02 -11.581  < 2e-16 ***
## pop_density_km2_2014    1.252e-05  4.071e-06   3.076  0.00217 ** 
## med_income_2014        -7.266e-06  1.562e-06  -4.652 3.84e-06 ***
## pov_rate_2014          -2.245e-01  1.169e-01  -1.920  0.05524 .  
## renter_share_2014       3.029e-01  7.147e-02   4.238 2.52e-05 ***
## med_gross_rent_2014    -2.882e-05  5.295e-05  -0.544  0.58649    
## rent_burden_30p_2014   -1.108e-01  6.566e-02  -1.687  0.09190 .  
## pct_tourism_proxy_2014  1.006e-01  1.528e-01   0.658  0.51047    
## pct_manufacturing_2014 -1.329e-02  1.357e-01  -0.098  0.92203    
## pct_construction_2014   3.130e-01  1.813e-01   1.727  0.08464 .  
## pct_public_admin_2014  -9.221e-02  1.603e-01  -0.575  0.56522    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1961 on 797 degrees of freedom
## Multiple R-squared:  0.3164, Adjusted R-squared:  0.3053 
## F-statistic: 28.38 on 13 and 797 DF,  p-value: < 2.2e-16
# 4) neighbor list도 같은 complete-case 데이터로 다시 생성
nb <- poly2nb(gwr_sf_cc, queen = TRUE)
table(card(nb))   # 각 tract의 neighbor 수 확인
## 
##   0   1   2   3   4   5   6   7   8   9  10  11 
##   2  15  29  89 143 185 141  99  69  26  11   2
lw <- nb2listw(nb, style = "W", zero.policy = TRUE)

# 5) Moran's I
moran.test(residuals(ols_mod), lw, zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  residuals(ols_mod)  
## weights: lw  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = 1.6805, p-value = 0.04643
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0369721900     -0.0012376238      0.0005169998
##2. Spatial lag/error
library(spatialreg)

lag_mod <- lagsarlm(
  Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = gwr_sf_cc,
  listw = lw,
  zero.policy = TRUE
)

err_mod <- errorsarlm(
  Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = gwr_sf_cc,
  listw = lw,
  zero.policy = TRUE
)

summary(lag_mod)
## 
## Call:lagsarlm(formula = Change ~ hotel_density_w + dist_to_coast_m + 
##     Overall_2014 + pop_density_km2_2014 + med_income_2014 + pov_rate_2014 + 
##     renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 + 
##     pct_tourism_proxy_2014 + pct_manufacturing_2014 + pct_construction_2014 + 
##     pct_public_admin_2014, data = gwr_sf_cc, listw = lw, zero.policy = TRUE)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.6457819 -0.1271601 -0.0026882  0.1280568  0.5816327 
## 
## Type: lag 
## Regions with no neighbours included:
##  530 666 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error  z value  Pr(>|z|)
## (Intercept)             5.0116e-01  8.8601e-02   5.6564 1.546e-08
## hotel_density_w        -1.1381e-02  4.1638e-03  -2.7332  0.006271
## dist_to_coast_m        -2.7237e-06  1.0614e-06  -2.5661  0.010284
## Overall_2014           -5.2030e-01  4.4620e-02 -11.6606 < 2.2e-16
## pop_density_km2_2014    1.1882e-05  4.0451e-06   2.9374  0.003309
## med_income_2014        -7.1522e-06  1.5470e-06  -4.6232 3.778e-06
## pov_rate_2014          -2.1163e-01  1.1618e-01  -1.8216  0.068511
## renter_share_2014       2.9453e-01  7.1546e-02   4.1166 3.844e-05
## med_gross_rent_2014    -3.3079e-05  5.2465e-05  -0.6305  0.528369
## rent_burden_30p_2014   -1.1077e-01  6.5033e-02  -1.7034  0.088499
## pct_tourism_proxy_2014  9.9429e-02  1.5138e-01   0.6568  0.511295
## pct_manufacturing_2014 -4.1355e-03  1.3466e-01  -0.0307  0.975500
## pct_construction_2014   3.1301e-01  1.7953e-01   1.7435  0.081245
## pct_public_admin_2014  -8.3601e-02  1.5896e-01  -0.5259  0.598946
## 
## Rho: 0.049868, LR test value: 1.1072, p-value: 0.2927
## Asymptotic standard error: 0.047665
##     z-value: 1.0462, p-value: 0.29546
## Wald statistic: 1.0946, p-value: 0.29546
## 
## Log likelihood: 178.1877 for lag model
## ML residual variance (sigma squared): 0.037711, (sigma: 0.19419)
## Number of observations: 811 
## Number of parameters estimated: 16 
## AIC: -324.38, (AIC for lm: -325.27)
## LM test for residual autocorrelation
## test value: 1.7307, p-value: 0.18832
summary(err_mod)
## 
## Call:errorsarlm(formula = Change ~ hotel_density_w + dist_to_coast_m + 
##     Overall_2014 + pop_density_km2_2014 + med_income_2014 + pov_rate_2014 + 
##     renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 + 
##     pct_tourism_proxy_2014 + pct_manufacturing_2014 + pct_construction_2014 + 
##     pct_public_admin_2014, data = gwr_sf_cc, listw = lw, zero.policy = TRUE)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.6442197 -0.1285401 -0.0038391  0.1298094  0.5827512 
## 
## Type: error 
## Regions with no neighbours included:
##  530 666 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error  z value  Pr(>|z|)
## (Intercept)             5.0304e-01  8.9117e-02   5.6447 1.655e-08
## hotel_density_w        -1.1716e-02  4.3226e-03  -2.7105  0.006719
## dist_to_coast_m        -2.9692e-06  1.1170e-06  -2.6582  0.007855
## Overall_2014           -5.2747e-01  4.4878e-02 -11.7535 < 2.2e-16
## pop_density_km2_2014    1.1653e-05  4.1099e-06   2.8354  0.004577
## med_income_2014        -6.9768e-06  1.5653e-06  -4.4573 8.301e-06
## pov_rate_2014          -2.1334e-01  1.1682e-01  -1.8262  0.067815
## renter_share_2014       3.1209e-01  7.2455e-02   4.3074 1.652e-05
## med_gross_rent_2014    -3.7580e-05  5.2398e-05  -0.7172  0.473254
## rent_burden_30p_2014   -1.1237e-01  6.5294e-02  -1.7210  0.085252
## pct_tourism_proxy_2014  9.9380e-02  1.5336e-01   0.6480  0.516972
## pct_manufacturing_2014 -3.8130e-03  1.3642e-01  -0.0280  0.977702
## pct_construction_2014   3.1801e-01  1.8048e-01   1.7620  0.078065
## pct_public_admin_2014  -8.6535e-02  1.5952e-01  -0.5425  0.587500
## 
## Lambda: 0.082743, LR test value: 2.4964, p-value: 0.1141
## Asymptotic standard error: 0.052678
##     z-value: 1.5707, p-value: 0.11625
## Wald statistic: 2.4672, p-value: 0.11625
## 
## Log likelihood: 178.8823 for error model
## ML residual variance (sigma squared): 0.037614, (sigma: 0.19394)
## Number of observations: 811 
## Number of parameters estimated: 16 
## AIC: -325.76, (AIC for lm: -325.27)
AIC(ols_mod, lag_mod, err_mod)
##         df       AIC
## ols_mod 15 -325.2682
## lag_mod 16 -324.3753
## err_mod 16 -325.7646
# 2) residual Moran’s I 다시 확인
moran.test(residuals(err_mod), lw, zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  residuals(err_mod)  
## weights: lw  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = -0.063937, p-value = 0.5255
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0026914229     -0.0012376238      0.0005170209
moran.test(residuals(lag_mod), lw, zero.policy = TRUE)
## 
##  Moran I test under randomisation
## 
## data:  residuals(lag_mod)  
## weights: lw  
## n reduced by no-neighbour observations  
## 
## Moran I statistic standard deviate = 0.6451, p-value = 0.2594
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.0134304520     -0.0012376238      0.0005170048
#GWR

library(GWmodel)
# 1) projected CRS 확인 (이미 UTM 32620이면 그대로)
st_crs(gwr_sf_cc)
## Coordinate Reference System:
##   User input: EPSG:32620 
##   wkt:
## PROJCRS["WGS 84 / UTM zone 20N",
##     BASEGEOGCRS["WGS 84",
##         ENSEMBLE["World Geodetic System 1984 ensemble",
##             MEMBER["World Geodetic System 1984 (Transit)"],
##             MEMBER["World Geodetic System 1984 (G730)"],
##             MEMBER["World Geodetic System 1984 (G873)"],
##             MEMBER["World Geodetic System 1984 (G1150)"],
##             MEMBER["World Geodetic System 1984 (G1674)"],
##             MEMBER["World Geodetic System 1984 (G1762)"],
##             MEMBER["World Geodetic System 1984 (G2139)"],
##             MEMBER["World Geodetic System 1984 (G2296)"],
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]],
##             ENSEMBLEACCURACY[2.0]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["UTM zone 20N",
##         METHOD["Transverse Mercator",
##             ID["EPSG",9807]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",-63,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["Scale factor at natural origin",0.9996,
##             SCALEUNIT["unity",1],
##             ID["EPSG",8805]],
##         PARAMETER["False easting",500000,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["(E)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["(N)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Navigation and medium accuracy spatial referencing."],
##         AREA["Between 66°W and 60°W, northern hemisphere between equator and 84°N, onshore and offshore. Anguilla. Antigua and Barbuda. Bermuda. Brazil. British Virgin Islands. Canada - New Brunswick; Labrador; Nova Scotia; Nunavut; Prince Edward Island; Quebec. Dominica. Greenland. Grenada. Guadeloupe. Guyana. Martinique. Montserrat. Puerto Rico. St Kitts and Nevis. St Barthelemy. St Lucia. St Maarten, St Martin. St Vincent and the Grenadines. Trinidad and Tobago. Venezuela. US Virgin Islands."],
##         BBOX[0,-66,84,-60]],
##     ID["EPSG",32620]]
# 2) centroid 좌표
coords <- st_coordinates(st_centroid(st_geometry(gwr_sf_cc)))

# 3) distance matrix
dMat <- gw.dist(dp.locat = coords)

# 4) sf -> Spatial
gwr_sp <- as(gwr_sf_cc, "Spatial")

# 5) bandwidth selection
bw <- bw.gwr(
  Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = gwr_sp,
  approach = "AICc",
  kernel = "bisquare",
  adaptive = TRUE,
  dMat = dMat
)
## Adaptive bandwidth (number of nearest neighbours): 508 AICc value: -340.9175 
## Adaptive bandwidth (number of nearest neighbours): 322 AICc value: -314.4702 
## Adaptive bandwidth (number of nearest neighbours): 624 AICc value: -344.451 
## Adaptive bandwidth (number of nearest neighbours): 695 AICc value: -342.796 
## Adaptive bandwidth (number of nearest neighbours): 579 AICc value: -344.0177 
## Adaptive bandwidth (number of nearest neighbours): 650 AICc value: -343.1509 
## Adaptive bandwidth (number of nearest neighbours): 605 AICc value: -344.7079 
## Adaptive bandwidth (number of nearest neighbours): 596 AICc value: -344.7147 
## Adaptive bandwidth (number of nearest neighbours): 588 AICc value: -344.5232 
## Adaptive bandwidth (number of nearest neighbours): 598 AICc value: -344.7142 
## Adaptive bandwidth (number of nearest neighbours): 591 AICc value: -344.6719 
## Adaptive bandwidth (number of nearest neighbours): 595 AICc value: -344.7322 
## Adaptive bandwidth (number of nearest neighbours): 598 AICc value: -344.7142 
## Adaptive bandwidth (number of nearest neighbours): 596 AICc value: -344.7147 
## Adaptive bandwidth (number of nearest neighbours): 597 AICc value: -344.7121 
## Adaptive bandwidth (number of nearest neighbours): 596 AICc value: -344.7147 
## Adaptive bandwidth (number of nearest neighbours): 596 AICc value: -344.7147 
## Adaptive bandwidth (number of nearest neighbours): 595 AICc value: -344.7322
bw
## [1] 595
#GWR
gwr_mod <- gwr.basic(
  Change ~ hotel_density_w + dist_to_coast_m + Overall_2014 +
    pop_density_km2_2014 + med_income_2014 + pov_rate_2014 +
    renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 +
    pct_tourism_proxy_2014 + pct_manufacturing_2014 +
    pct_construction_2014 + pct_public_admin_2014,
  data = gwr_sp,
  bw = bw,
  kernel = "bisquare",
  adaptive = TRUE,
  dMat = dMat
)

gwr_mod
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2026-03-13 14:57:37.22028 
##    Call:
##    gwr.basic(formula = Change ~ hotel_density_w + dist_to_coast_m + 
##     Overall_2014 + pop_density_km2_2014 + med_income_2014 + pov_rate_2014 + 
##     renter_share_2014 + med_gross_rent_2014 + rent_burden_30p_2014 + 
##     pct_tourism_proxy_2014 + pct_manufacturing_2014 + pct_construction_2014 + 
##     pct_public_admin_2014, data = gwr_sp, bw = bw, kernel = "bisquare", 
##     adaptive = TRUE, dMat = dMat)
## 
##    Dependent (y) variable:  Change
##    Independent variables:  hotel_density_w dist_to_coast_m Overall_2014 pop_density_km2_2014 med_income_2014 pov_rate_2014 renter_share_2014 med_gross_rent_2014 rent_burden_30p_2014 pct_tourism_proxy_2014 pct_manufacturing_2014 pct_construction_2014 pct_public_admin_2014
##    Number of data points: 811
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64279 -0.12606 -0.00341  0.12945  0.57072 
## 
##    Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)             5.077e-01  8.927e-02   5.687 1.81e-08 ***
##    hotel_density_w        -1.135e-02  4.202e-03  -2.701  0.00705 ** 
##    dist_to_coast_m        -2.937e-06  1.049e-06  -2.799  0.00525 ** 
##    Overall_2014           -5.209e-01  4.498e-02 -11.581  < 2e-16 ***
##    pop_density_km2_2014    1.252e-05  4.071e-06   3.076  0.00217 ** 
##    med_income_2014        -7.266e-06  1.562e-06  -4.652 3.84e-06 ***
##    pov_rate_2014          -2.245e-01  1.169e-01  -1.920  0.05524 .  
##    renter_share_2014       3.029e-01  7.147e-02   4.238 2.52e-05 ***
##    med_gross_rent_2014    -2.882e-05  5.295e-05  -0.544  0.58649    
##    rent_burden_30p_2014   -1.108e-01  6.566e-02  -1.687  0.09190 .  
##    pct_tourism_proxy_2014  1.006e-01  1.528e-01   0.658  0.51047    
##    pct_manufacturing_2014 -1.329e-02  1.357e-01  -0.098  0.92203    
##    pct_construction_2014   3.130e-01  1.813e-01   1.727  0.08464 .  
##    pct_public_admin_2014  -9.221e-02  1.603e-01  -0.575  0.56522    
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 0.1961 on 797 degrees of freedom
##    Multiple R-squared: 0.3164
##    Adjusted R-squared: 0.3053 
##    F-statistic: 28.38 on 13 and 797 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 30.6407
##    Sigma(hat): 0.1946144
##    AIC:  -325.2682
##    AICc:  -324.6644
##    BIC:  -965.3201
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 595 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: A distance matrix is specified for this model calibration.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                                  Min.     1st Qu.      Median     3rd Qu.
##    Intercept               3.7841e-01  4.2777e-01  4.3501e-01  5.9315e-01
##    hotel_density_w        -4.9588e-02 -1.0294e-02 -1.0228e-02 -1.0092e-02
##    dist_to_coast_m        -6.3041e-06 -4.9980e-06 -3.9970e-06 -1.9408e-06
##    Overall_2014           -7.3212e-01 -5.9999e-01 -5.4639e-01 -5.0689e-01
##    pop_density_km2_2014    4.5868e-06  7.2463e-06  8.7170e-06  1.3052e-05
##    med_income_2014        -9.8756e-06 -7.8844e-06 -7.7154e-06 -7.4028e-06
##    pov_rate_2014          -3.1263e-01 -2.4265e-01 -2.0017e-01 -1.2371e-01
##    renter_share_2014       1.6327e-01  3.3030e-01  3.4069e-01  3.6038e-01
##    med_gross_rent_2014    -1.3366e-04 -7.3855e-05 -2.1634e-05  8.9655e-07
##    rent_burden_30p_2014   -2.6715e-01 -2.0538e-01 -5.4356e-03 -2.1612e-03
##    pct_tourism_proxy_2014 -2.9619e-01 -4.2581e-03  4.9260e-02  1.9367e-01
##    pct_manufacturing_2014 -3.2093e-01 -5.2738e-02  6.1336e-02  2.1181e-01
##    pct_construction_2014   5.4365e-02  2.6197e-01  3.2735e-01  4.2503e-01
##    pct_public_admin_2014  -5.5084e-01 -3.0357e-01  1.0205e-01  1.3755e-01
##                              Max.
##    Intercept               0.6195
##    hotel_density_w         0.0062
##    dist_to_coast_m         0.0000
##    Overall_2014           -0.4654
##    pop_density_km2_2014    0.0000
##    med_income_2014         0.0000
##    pov_rate_2014           0.0860
##    renter_share_2014       0.3980
##    med_gross_rent_2014     0.0000
##    rent_burden_30p_2014    0.0531
##    pct_tourism_proxy_2014  0.6473
##    pct_manufacturing_2014  0.5301
##    pct_construction_2014   0.8016
##    pct_public_admin_2014   0.4161
##    ************************Diagnostic information*************************
##    Number of data points: 811 
##    Effective number of parameters (2trace(S) - trace(S'S)): 48.62106 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 762.3789 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -344.7322 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -389.1577 
##    BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): -981.8413 
##    Residual sum of squares: 28.0307 
##    R-square value:  0.3746381 
##    Adjusted R-square value:  0.334703 
## 
##    ***********************************************************************
##    Program stops at: 2026-03-13 14:57:37.502351
gwr_mod$GW.diagnostic
## $RSS.gw
## [1] 28.0307
## 
## $AIC
## [1] -389.1577
## 
## $AICc
## [1] -344.7322
## 
## $enp
## [1] 48.62106
## 
## $edf
## [1] 762.3789
## 
## $gw.R2
## [1] 0.3746381
## 
## $gwR2.adj
## [1] 0.334703
## 
## $BIC
## [1] -981.8413
#Hotel density coefficient map
library(ggplot2)
gwr_sf_map <- st_as_sf(gwr_mod$SDF)

ggplot(gwr_sf_map) +
  geom_sf(aes(fill = hotel_density_w), color = NA) +
  scale_fill_distiller(palette = "RdBu", direction = -1) +
  theme_minimal() +
  labs(
    title = "Local GWR Coefficient for Hotel Density",
    fill = "Local beta"
  )

#Distance to coast coefficient map
ggplot(gwr_sf_map) +
  geom_sf(aes(fill = dist_to_coast_m), color = NA) +
  scale_fill_distiller(palette = "RdBu", direction = -1) +
  theme_minimal() +
  labs(
    title = "Local GWR Coefficient for Distance to Coast",
    fill = "Local beta"
  )

#local R2 map
ggplot(gwr_sf_map) +
  geom_sf(aes(fill = Local_R2), color = NA) +
  scale_fill_viridis_c() +
  theme_minimal() +
  labs(
    title = "Local R-squared from GWR",
    fill = "Local R2"
  )

#OLS vs Spatial error vs GWR
model_compare <- data.frame(
  Model = c("OLS", "Spatial Error", "GWR"),
  AIC = c(
    AIC(ols_mod),
    AIC(err_mod),
    gwr_mod$GW.diagnostic$AIC
  ),
  AICc = c(
    -324.6644,   # OLS output에서 확인한 값
    NA,          # spatial error는 보통 AIC 사용
    gwr_mod$GW.diagnostic$AICc
  ),
  R2 = c(
    summary(ols_mod)$r.squared,
    NA,
    gwr_mod$GW.diagnostic$gw.R2
  ),
  Adj_R2 = c(
    summary(ols_mod)$adj.r.squared,
    NA,
    gwr_mod$GW.diagnostic$gwR2.adj
  )
)

model_compare
##           Model       AIC      AICc        R2    Adj_R2
## 1           OLS -325.2682 -324.6644 0.3164094 0.3052592
## 2 Spatial Error -325.7646        NA        NA        NA
## 3           GWR -389.1577 -344.7322 0.3746381 0.3347030
#local coefficient summary table
gwr_table <- summary(gwr_mod$SDF@data[, c(
  "hotel_density_w",
  "dist_to_coast_m",
  "Local_R2"
)])

gwr_table
##  hotel_density_w     dist_to_coast_m         Local_R2     
##  Min.   :-0.049588   Min.   :-6.304e-06   Min.   :0.1828  
##  1st Qu.:-0.010294   1st Qu.:-4.998e-06   1st Qu.:0.3610  
##  Median :-0.010228   Median :-3.997e-06   Median :0.3660  
##  Mean   :-0.015954   Mean   :-3.508e-06   Mean   :0.3711  
##  3rd Qu.:-0.010092   3rd Qu.:-1.941e-06   3rd Qu.:0.3809  
##  Max.   : 0.006164   Max.   :-1.228e-07   Max.   :0.4095
names(gwr_mod$SDF@data)
##  [1] "Intercept"                 "hotel_density_w"          
##  [3] "dist_to_coast_m"           "Overall_2014"             
##  [5] "pop_density_km2_2014"      "med_income_2014"          
##  [7] "pov_rate_2014"             "renter_share_2014"        
##  [9] "med_gross_rent_2014"       "rent_burden_30p_2014"     
## [11] "pct_tourism_proxy_2014"    "pct_manufacturing_2014"   
## [13] "pct_construction_2014"     "pct_public_admin_2014"    
## [15] "y"                         "yhat"                     
## [17] "residual"                  "CV_Score"                 
## [19] "Stud_residual"             "Intercept_SE"             
## [21] "hotel_density_w_SE"        "dist_to_coast_m_SE"       
## [23] "Overall_2014_SE"           "pop_density_km2_2014_SE"  
## [25] "med_income_2014_SE"        "pov_rate_2014_SE"         
## [27] "renter_share_2014_SE"      "med_gross_rent_2014_SE"   
## [29] "rent_burden_30p_2014_SE"   "pct_tourism_proxy_2014_SE"
## [31] "pct_manufacturing_2014_SE" "pct_construction_2014_SE" 
## [33] "pct_public_admin_2014_SE"  "Intercept_TV"             
## [35] "hotel_density_w_TV"        "dist_to_coast_m_TV"       
## [37] "Overall_2014_TV"           "pop_density_km2_2014_TV"  
## [39] "med_income_2014_TV"        "pov_rate_2014_TV"         
## [41] "renter_share_2014_TV"      "med_gross_rent_2014_TV"   
## [43] "rent_burden_30p_2014_TV"   "pct_tourism_proxy_2014_TV"
## [45] "pct_manufacturing_2014_TV" "pct_construction_2014_TV" 
## [47] "pct_public_admin_2014_TV"  "Local_R2"
# make it better dataframe
coef_summary <- data.frame(
  Variable = c("hotel_density_w", "dist_to_coast_m", "Local_R2"),
  Min = c(
    min(gwr_mod$SDF@data$hotel_density_w, na.rm = TRUE),
    min(gwr_mod$SDF@data$dist_to_coast_m, na.rm = TRUE),
    min(gwr_mod$SDF@data$Local_R2, na.rm = TRUE)
  ),
  Q1 = c(
    quantile(gwr_mod$SDF@data$hotel_density_w, 0.25, na.rm = TRUE),
    quantile(gwr_mod$SDF@data$dist_to_coast_m, 0.25, na.rm = TRUE),
    quantile(gwr_mod$SDF@data$Local_R2, 0.25, na.rm = TRUE)
  ),
  Median = c(
    median(gwr_mod$SDF@data$hotel_density_w, na.rm = TRUE),
    median(gwr_mod$SDF@data$dist_to_coast_m, na.rm = TRUE),
    median(gwr_mod$SDF@data$Local_R2, na.rm = TRUE)
  ),
  Q3 = c(
    quantile(gwr_mod$SDF@data$hotel_density_w, 0.75, na.rm = TRUE),
    quantile(gwr_mod$SDF@data$dist_to_coast_m, 0.75, na.rm = TRUE),
    quantile(gwr_mod$SDF@data$Local_R2, 0.75, na.rm = TRUE)
  ),
  Max = c(
    max(gwr_mod$SDF@data$hotel_density_w, na.rm = TRUE),
    max(gwr_mod$SDF@data$dist_to_coast_m, na.rm = TRUE),
    max(gwr_mod$SDF@data$Local_R2, na.rm = TRUE)
  )
)

coef_summary
##          Variable           Min            Q1        Median            Q3
## 1 hotel_density_w -4.958781e-02 -1.029360e-02 -1.022781e-02 -1.009152e-02
## 2 dist_to_coast_m -6.304141e-06 -4.997987e-06 -3.997007e-06 -1.940775e-06
## 3        Local_R2  1.828277e-01  3.610301e-01  3.659745e-01  3.808867e-01
##             Max
## 1  6.164256e-03
## 2 -1.228044e-07
## 3  4.095010e-01