library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)
library(lfe)
## Warning: package 'lfe' was built under R version 4.3.2
## Loading required package: Matrix
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(tmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ purrr   1.0.1     ✔ tibble  3.2.1
## ✔ readr   2.1.4     ✔ tidyr   1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()      masks data.table::between()
## ✖ tidyr::expand()       masks Matrix::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks data.table::first()
## ✖ data.table::hour()    masks lubridate::hour()
## ✖ data.table::isoweek() masks lubridate::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks data.table::last()
## ✖ data.table::mday()    masks lubridate::mday()
## ✖ data.table::minute()  masks lubridate::minute()
## ✖ data.table::month()   masks lubridate::month()
## ✖ tidyr::pack()         masks Matrix::pack()
## ✖ data.table::quarter() masks lubridate::quarter()
## ✖ data.table::second()  masks lubridate::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ tidyr::unpack()       masks Matrix::unpack()
## ✖ data.table::wday()    masks lubridate::wday()
## ✖ data.table::week()    masks lubridate::week()
## ✖ data.table::yday()    masks lubridate::yday()
## ✖ data.table::year()    masks lubridate::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lfe)
library(slider)
## Warning: package 'slider' was built under R version 4.3.2
##POIs
load("E:/nine_city_POIs_address.RData")
load("E:/POI_gridmet_daily_weather_data/all_POI.RData")

combined_data <- st_drop_geometry(combined_data)
POIs <-merge(combined_data_cityarea, combined_data, by="PLACEKEY", all.x = FALSE)
POIs <-POIs[!duplicated(POIs$PLACEKEY), ]
POIs <-POIs[,c(1,6,8,9,10,12,14,16)]
POIs <-st_drop_geometry(POIs)

POIs$new_tags <- sub(",.*", "", POIs$CATEGORY_TAGS)

newscheme <-fread("E:/final_categorized_restaurant_types.csv")

POIs <- merge(POIs,newscheme, by="new_tags", all.x = TRUE)
POIs$new_schemes[is.na(POIs$new_schemes)] <- "Others"
POIs$new_tags <- NULL
POIs$CATEGORY_TAGS <- NULL

POIs <-POIs[,c(1,5)]


#### filter out outliers
filesave <-paste0("E:/POI_gridmet_daily_weather_data/7cities_heat_spend_visit_climate",".RData")
load(filesave)

Q1 <- quantile(df$visit, 0.25, na.rm = TRUE) # 1st Quartile
Q3 <- quantile(df$visit, 0.75, na.rm = TRUE) # 3rd Quartile
IQR <- Q3 - Q1 # Interquartile Range

lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR

df <- df[df$visit >= lower_bound & df$visit <= upper_bound, ]
df <- df[!is.na(df$NAME), ]

############Baseline regression##################
FE_heat_visit <- felm(visit ~ extreme_heat_100F + pr+rmax + wind| 
                             place_month + date|0, 
                           data = df)
summary(FE_heat_visit)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.792  -2.910  -0.326   2.318  40.970 
## 
## Coefficients:
##                     Estimate Std. Error  t value Pr(>|t|)    
## extreme_heat_100F -1.6255860  0.0154238 -105.395   <2e-16 ***
## pr                 0.0053485  0.0005746    9.308   <2e-16 ***
## rmax               0.0452110  0.0005060   89.343   <2e-16 ***
## wind              -0.1992387  0.0049609  -40.162   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.761 on 2049902 degrees of freedom
## Multiple R-squared(full model): 0.6727   Adjusted R-squared: 0.6676 
## Multiple R-squared(proj model): 0.01396   Adjusted R-squared: -0.001348 
## F-statistic(full model):132.4 on 31831 and 2049902 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  7257 on 4 and 2049902 DF, p-value: < 2.2e-16
FE_heat_visit_2day <- felm(visit ~ extreme_heat_100F_2day + pr+rmax + wind| 
                        place_month + date|0, 
                      data = df)
summary(FE_heat_visit_2day)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F_2day + pr + rmax + wind |      place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.821  -2.906  -0.316   2.310  41.806 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F_2day -1.9341549  0.0151806 -127.41   <2e-16 ***
## pr                      0.0058535  0.0005739   10.20   <2e-16 ***
## rmax                    0.0384145  0.0005143   74.69   <2e-16 ***
## wind                   -0.2006142  0.0049521  -40.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.754 on 2049902 degrees of freedom
## Multiple R-squared(full model): 0.6735   Adjusted R-squared: 0.6685 
## Multiple R-squared(proj model): 0.01641   Adjusted R-squared: 0.001136 
## F-statistic(full model):132.9 on 31831 and 2049902 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  8550 on 4 and 2049902 DF, p-value: < 2.2e-16
FE_heat_visit_90F <- felm(visit ~ extreme_heat_90F + pr+rmax + wind| 
                             place_month + date|0, 
                           data = df)
summary(FE_heat_visit_90F)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_90F + pr + rmax + wind |      place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.880  -2.875  -0.336   2.291  41.698 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_90F -1.7152096  0.0145615 -117.79  < 2e-16 ***
## pr               -0.0033772  0.0005764   -5.86 4.64e-09 ***
## rmax              0.0477020  0.0004968   96.02  < 2e-16 ***
## wind             -0.1576220  0.0049479  -31.86  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.757 on 2049902 degrees of freedom
## Multiple R-squared(full model): 0.6732   Adjusted R-squared: 0.6681 
## Multiple R-squared(proj model): 0.01529   Adjusted R-squared: -5.389e-06 
## F-statistic(full model):132.6 on 31831 and 2049902 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  7955 on 4 and 2049902 DF, p-value: < 2.2e-16
FE_heat_visit_90F_2day <- felm(visit ~ extreme_heat_90F_2day + pr+rmax + wind| 
                            place_month + date|0, 
                          data = df)
summary(FE_heat_visit_90F_2day)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_90F_2day + pr + rmax + wind |      place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.277  -2.858  -0.329   2.272  41.655 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_90F_2day -2.3106887  0.0153403 -150.63   <2e-16 ***
## pr                    -0.0084587  0.0005776  -14.64   <2e-16 ***
## rmax                   0.0423291  0.0004986   84.89   <2e-16 ***
## wind                  -0.1202396  0.0049462  -24.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.745 on 2049902 degrees of freedom
## Multiple R-squared(full model): 0.6746   Adjusted R-squared: 0.6695 
## Multiple R-squared(proj model): 0.01947   Adjusted R-squared: 0.004247 
## F-statistic(full model):133.5 on 31831 and 2049902 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 1.018e+04 on 4 and 2049902 DF, p-value: < 2.2e-16
###6 cities, restaurant heat exposure
MSAs <- unique(df$NAME)

for (msa in MSAs){
  print(msa)
  df_msa <- df[df$NAME==msa,]
  
  print("Number of Observations")
  print(nrow(df_msa))
  FE_heat_city <- felm(visit ~ extreme_heat_100F + pr+rmax + wind| 
                         place_month + date|0, 
                       data = df_msa)
  print(summary(FE_heat_city))
}
## [1] "Dallas-Fort Worth-Arlington, TX"
## [1] "Number of Observations"
## [1] 657273
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.460  -3.021  -0.326   2.451  42.877 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -1.274899   0.049226 -25.899  < 2e-16 ***
## pr                 0.006742   0.001288   5.232 1.67e-07 ***
## rmax               0.012328   0.002758   4.470 7.82e-06 ***
## wind               0.153106   0.037972   4.032 5.53e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.806 on 647062 degrees of freedom
## Multiple R-squared(full model): 0.6936   Adjusted R-squared: 0.6887 
## Multiple R-squared(proj model): 0.001133   Adjusted R-squared: -0.01463 
## F-statistic(full model):143.4 on 10210 and 647062 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 183.5 on 4 and 647062 DF, p-value: < 2.2e-16 
## 
## 
## [1] "Las Vegas-Henderson-Paradise, NV"
## [1] "Number of Observations"
## [1] 183405
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.743  -2.817  -0.271   2.197  42.084 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -1.463555   0.070686 -20.705  < 2e-16 ***
## pr                -0.009383   0.018292  -0.513 0.607966    
## rmax              -0.038606   0.010633  -3.631 0.000283 ***
## wind              -0.158675   0.067731  -2.343 0.019145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.391 on 180212 degrees of freedom
## Multiple R-squared(full model): 0.6494   Adjusted R-squared: 0.6432 
## Multiple R-squared(proj model): 0.002505   Adjusted R-squared: -0.01516 
## F-statistic(full model):104.6 on 3192 and 180212 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 113.1 on 4 and 180212 DF, p-value: < 2.2e-16 
## 
## 
## [1] "Phoenix-Mesa-Chandler, AZ"
## [1] "Number of Observations"
## [1] 400868
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.790  -2.452  -0.131   1.756  41.076 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -1.287473   0.053949 -23.865   <2e-16 ***
## pr                -0.013664   0.006514  -2.098   0.0359 *  
## rmax               0.013714   0.005726   2.395   0.0166 *  
## wind              -0.142101   0.054164  -2.624   0.0087 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.238 on 394618 degrees of freedom
## Multiple R-squared(full model): 0.6748   Adjusted R-squared: 0.6696 
## Multiple R-squared(proj model): 0.001485   Adjusted R-squared: -0.01433 
## F-statistic(full model):  131 on 6249 and 394618 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 146.7 on 4 and 394618 DF, p-value: < 2.2e-16 
## 
## 
## [1] "Houston-The Woodlands-Sugar Land, TX"
## [1] "Number of Observations"
## [1] 482058
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.879  -2.694  -0.316   2.285  39.526 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -2.195977   0.058050 -37.829  < 2e-16 ***
## pr                 0.004359   0.001409   3.094  0.00197 ** 
## rmax              -0.003192   0.003294  -0.969  0.33253    
## wind              -0.205384   0.039086  -5.255 1.48e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.657 on 474271 degrees of freedom
## Multiple R-squared(full model): 0.6869   Adjusted R-squared: 0.6817 
## Multiple R-squared(proj model): 0.003137   Adjusted R-squared: -0.01323 
## F-statistic(full model):133.6 on 7786 and 474271 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 373.2 on 4 and 474271 DF, p-value: < 2.2e-16 
## 
## 
## [1] "San Antonio-New Braunfels, TX"
## [1] "Number of Observations"
## [1] 188606
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.371  -3.077  -0.277   2.475  39.974 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -1.328893   0.082016 -16.203   <2e-16 ***
## pr                 0.004411   0.004421   0.998   0.3184    
## rmax               0.015977   0.007957   2.008   0.0446 *  
## wind              -0.094939   0.086381  -1.099   0.2717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.893 on 185304 degrees of freedom
## Multiple R-squared(full model): 0.6648   Adjusted R-squared: 0.6589 
## Multiple R-squared(proj model): 0.001446   Adjusted R-squared: -0.01634 
## F-statistic(full model):111.3 on 3301 and 185304 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 67.09 on 4 and 185304 DF, p-value: < 2.2e-16 
## 
## 
## [1] "Austin-Round Rock-Georgetown, TX"
## [1] "Number of Observations"
## [1] 169524
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + pr + rmax + wind |      place_month + date | 0, data = df_msa) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.354  -2.852  -0.269   2.305  41.350 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F -1.343666   0.090525 -14.843   <2e-16 ***
## pr                 0.007830   0.003383   2.315   0.0206 *  
## rmax              -0.006215   0.007079  -0.878   0.3800    
## wind               0.092561   0.082382   1.124   0.2612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.727 on 166590 degrees of freedom
## Multiple R-squared(full model): 0.6863   Adjusted R-squared: 0.6808 
## Multiple R-squared(proj model): 0.001382   Adjusted R-squared: -0.0162 
## F-statistic(full model):124.3 on 2933 and 166590 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 57.62 on 4 and 166590 DF, p-value: < 2.2e-16
##air temp only
FE_airtemp_visit<- felm(visit ~ tmax + pr+rmax + wind| 
                                place_month + date|0, 
                              data = df)
summary(FE_airtemp_visit)
## 
## Call:
##    felm(formula = visit ~ tmax + pr + rmax + wind | place_month +      date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.865  -2.866  -0.317   2.291  42.069 
## 
## Coefficients:
##        Estimate Std. Error  t value Pr(>|t|)    
## tmax -0.4225541  0.0022037 -191.745  < 2e-16 ***
## pr   -0.0035066  0.0005717   -6.134 8.57e-10 ***
## rmax  0.0073570  0.0005558   13.237  < 2e-16 ***
## wind -0.0781336  0.0049414  -15.812  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.725 on 2049902 degrees of freedom
## Multiple R-squared(full model): 0.6767   Adjusted R-squared: 0.6717 
## Multiple R-squared(proj model): 0.02609   Adjusted R-squared: 0.01097 
## F-statistic(full model):134.8 on 31831 and 2049902 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 1.373e+04 on 4 and 2049902 DF, p-value: < 2.2e-16
###LST as temperature

load("E:/POI_gridmet_daily_weather_data/df_spend_visit_climate_extreme_lst.RData")
print(nrow(df_lst_spend))
## [1] 269456
FE_lst_visit<- felm(visit ~ LST + pr+rmax + wind| 
                            place_month + date|0, 
                          data = df_lst_spend)

summary(FE_lst_visit)
## 
## Call:
##    felm(formula = visit ~ LST + pr + rmax + wind | place_month +      date | 0, data = df_lst_spend) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7773.7    -4.2     0.0     3.7  7773.7 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)    
## LST  -1.481204   0.232312  -6.376 1.82e-10 ***
## pr   -0.061972   0.137172  -0.452 0.651426    
## rmax -0.008155   0.087967  -0.093 0.926135    
## wind -6.530353   1.836304  -3.556 0.000376 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 121.8 on 233792 degrees of freedom
##   (3696 observations deleted due to missingness)
## Multiple R-squared(full model): 0.7629   Adjusted R-squared: 0.7305 
## Multiple R-squared(proj model): 0.0002475   Adjusted R-squared: -0.1365 
## F-statistic(full model):23.53 on 31967 and 233792 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 14.47 on 4 and 233792 DF, p-value: 8.139e-12
#########mobility-weighted########

weekly_visit_spend_climate <- df %>%
  group_by(PLACEKEY, DATE_RANGE_START) %>%
  summarize(average_spend = mean(spend_pertransaction, na.rm = TRUE), 
            average_visit = mean(visit, na.rm = TRUE), 
            average_pr = mean(pr, na.rm = TRUE),
            average_rmax = mean(rmax, na.rm = TRUE),
            average_wind = mean(wind, na.rm = TRUE))
## `summarise()` has grouped output by 'PLACEKEY'. You can override using the
## `.groups` argument.
# filesave <-paste0("E:/POI_gridmet_daily_weather_data/df_weekly_spend_visit_climate",".RData")
# save(weekly_visit_spend_climate, file=filesave)

files <- list.files(path = "E:/POI_location_temp/mobility_extreme_heat",  full.names = TRUE) 
weighted_heat <- data.frame()
#combine three-year (2019, 2022, 2023) data
for (i in 1:3){
  print(i)
  load(files[i])
  weighted_heat <-rbind(weighted_heat, POI_weighted_heat)
}
## [1] 1
## [1] 2
## [1] 3
weekly_visit_spend_climate <- merge(weekly_visit_spend_climate, POIs, by="PLACEKEY")

weekly_visit_spend_climate <- merge(weekly_visit_spend_climate,weighted_heat, by=c("PLACEKEY","DATE_RANGE_START"))

mobility_weighted_visit <- felm(average_visit ~ weighted_heat +average_pr+average_rmax + average_wind| 
                            PLACEKEY + DATE_RANGE_START|0, 
                          data = weekly_visit_spend_climate)

summary(mobility_weighted_visit)
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = weekly_visit_spend_climate) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.424  -2.313  -0.076   2.127  38.770 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.030991   0.000513 -60.406   <2e-16 ***
## average_pr    -0.003131   0.003124  -1.002    0.316    
## average_rmax   0.026865   0.001344  19.991   <2e-16 ***
## average_wind  -0.227637   0.020673 -11.011   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.963 on 233539 degrees of freedom
## Multiple R-squared(full model): 0.7934   Adjusted R-squared: 0.7859 
## Multiple R-squared(proj model): 0.02291   Adjusted R-squared: -0.0125 
## F-statistic(full model):  106 on 8463 and 233539 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  1369 on 4 and 233539 DF, p-value: < 2.2e-16
for (msa in MSAs){
  print(msa)
  df_mobility <- weekly_visit_spend_climate[weekly_visit_spend_climate$NAME==msa,]
  print(nrow(df_mobility))
  mobility_weighted <- felm(average_visit ~ weighted_heat +average_pr+average_rmax + average_wind| 
                              PLACEKEY + DATE_RANGE_START|0, 
                            data = df_mobility)
  print(summary(mobility_weighted))
}
## [1] "Dallas-Fort Worth-Arlington, TX"
## [1] 79107
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.507  -2.436  -0.098   2.192  34.416 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.016012   0.002213  -7.234 4.73e-13 ***
## average_pr    -0.003107   0.006143  -0.506    0.613    
## average_rmax  -0.005384   0.006970  -0.772    0.440    
## average_wind   0.061048   0.059417   1.027    0.304    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.971 on 76442 degrees of freedom
## Multiple R-squared(full model): 0.7995   Adjusted R-squared: 0.7925 
## Multiple R-squared(proj model): 0.0007043   Adjusted R-squared: -0.03412 
## F-statistic(full model):114.4 on 2664 and 76442 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 13.47 on 4 and 76442 DF, p-value: 5.638e-11 
## 
## 
## [1] "Las Vegas-Henderson-Paradise, NV"
## [1] 22545
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.997  -2.100  -0.106   2.016  37.071 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.017132   0.003246  -5.278 1.32e-07 ***
## average_pr    -0.003814   0.089239  -0.043   0.9659    
## average_rmax  -0.054045   0.021006  -2.573   0.0101 *  
## average_wind  -0.180792   0.144565  -1.251   0.2111    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.742 on 21697 degrees of freedom
## Multiple R-squared(full model): 0.767   Adjusted R-squared: 0.7579 
## Multiple R-squared(proj model): 0.001731   Adjusted R-squared: -0.03724 
## F-statistic(full model):84.34 on 847 and 21697 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 9.406 on 4 and 21697 DF, p-value: 1.359e-07 
## 
## 
## [1] "Phoenix-Mesa-Chandler, AZ"
## [1] 50197
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.763  -1.879  -0.089   1.736  39.270 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.014518   0.002522  -5.757 8.61e-09 ***
## average_pr    -0.037923   0.037002  -1.025 0.305423    
## average_rmax  -0.032693   0.011501  -2.843 0.004477 ** 
## average_wind  -0.469895   0.129201  -3.637 0.000276 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.367 on 48579 degrees of freedom
## Multiple R-squared(full model): 0.7995   Adjusted R-squared: 0.7928 
## Multiple R-squared(proj model): 0.001222   Adjusted R-squared: -0.03202 
## F-statistic(full model):119.8 on 1617 and 48579 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 14.86 on 4 and 48579 DF, p-value: 3.869e-12 
## 
## 
## [1] "Houston-The Woodlands-Sugar Land, TX"
## [1] 48817
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.518  -2.332  -0.052   2.346  33.812 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.020028   0.002232  -8.972   <2e-16 ***
## average_pr    -0.005783   0.007505  -0.771    0.441    
## average_rmax   0.011473   0.010633   1.079    0.281    
## average_wind  -0.070628   0.079097  -0.893    0.372    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.139 on 46796 degrees of freedom
## Multiple R-squared(full model): 0.7905   Adjusted R-squared: 0.7814 
## Multiple R-squared(proj model): 0.001934   Adjusted R-squared: -0.04115 
## F-statistic(full model): 87.4 on 2020 and 46796 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 22.67 on 4 and 46796 DF, p-value: < 2.2e-16 
## 
## 
## [1] "San Antonio-New Braunfels, TX"
## [1] 24628
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.571  -2.456  -0.023   2.158  34.047 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## weighted_heat -0.0173357  0.0034202  -5.069 4.04e-07 ***
## average_pr     0.0369026  0.0202338   1.824   0.0682 .  
## average_rmax   0.0008595  0.0200981   0.043   0.9659    
## average_wind   0.2115125  0.1367440   1.547   0.1219    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.119 on 23768 degrees of freedom
## Multiple R-squared(full model): 0.7732   Adjusted R-squared: 0.765 
## Multiple R-squared(proj model): 0.001315   Adjusted R-squared: -0.03478 
## F-statistic(full model):94.34 on 859 and 23768 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 7.822 on 4 and 23768 DF, p-value: 2.698e-06 
## 
## 
## [1] "Austin-Round Rock-Georgetown, TX"
## [1] 16709
## 
## Call:
##    felm(formula = average_visit ~ weighted_heat + average_pr + average_rmax +      average_wind | PLACEKEY + DATE_RANGE_START | 0, data = df_mobility) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.342  -2.384  -0.013   2.236  31.950 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## weighted_heat -0.009564   0.003943  -2.426   0.0153 *
## average_pr     0.029237   0.022686   1.289   0.1975  
## average_rmax  -0.037008   0.021383  -1.731   0.0835 .
## average_wind  -0.222048   0.137611  -1.614   0.1066  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.284 on 15977 degrees of freedom
## Multiple R-squared(full model): 0.7777   Adjusted R-squared: 0.7675 
## Multiple R-squared(proj model): 0.0007823   Adjusted R-squared: -0.04494 
## F-statistic(full model):76.45 on 731 and 15977 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 3.127 on 4 and 15977 DF, p-value: 0.01397
################IV regression##################
###############################################
#load(paste0("E:/POI_gridmet_daily_weather_data/df_spend_visit_climate_extreme_01_15_IVmodel",".RData"))

files <- list.files(path = "E:/POI_gridmet_daily_weather_data/grass",  full.names = TRUE) #read grass evaporation data

df_grass_eva<-data.frame()

for (i in 1:40){
  load(files[i])
  print(i)
  daily_grass$eva <- daily_grass$grass$`pet_2019-05-01`
  daily_grass$grass <-NULL
  current_df <- merge(df, daily_grass, by=c("PLACEKEY","date"))
  df_grass_eva <-bind_rows(df_grass_eva,current_df)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
########stage I########
first_stage <- felm(extreme_heat_100F ~eva+ pr+rmax + wind| 
                      PLACEKEY + date|0, 
                    data = df_grass_eva)
summary(first_stage)
## 
## Call:
##    felm(formula = extreme_heat_100F ~ eva + pr + rmax + wind | PLACEKEY +      date | 0, data = df_grass_eva) 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.14052 -0.19590 -0.00012  0.16055  1.12208 
## 
## Coefficients:
##        Estimate Std. Error t value Pr(>|t|)    
## eva   2.700e-02  3.162e-04   85.38   <2e-16 ***
## pr    2.663e-03  2.884e-05   92.35   <2e-16 ***
## rmax -5.866e-03  2.871e-05 -204.35   <2e-16 ***
## wind -2.457e-02  2.967e-04  -82.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2847 on 2069307 degrees of freedom
## Multiple R-squared(full model): 0.5341   Adjusted R-squared: 0.5321 
## Multiple R-squared(proj model): 0.06084   Adjusted R-squared: 0.0568 
## F-statistic(full model):266.4 on 8905 and 2069307 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 3.351e+04 on 4 and 2069307 DF, p-value: < 2.2e-16
##get the fitted values
fixed_effects <- getfe(first_stage)$fe
model_coefs <- coef(first_stage) # Extracting model coefficients
model_mat <- model.matrix(~ 0+eva + pr + rmax + wind, data = df_grass_eva) #create matrix
linear_predictor <- model_mat %*% model_coefs
df_grass_eva$fitted_heat <-linear_predictor

#########stage II########
second_stage <- felm(visit ~fitted_heat+ pr+rmax + wind| 
                       PLACEKEY + date|0, 
                     data = df_grass_eva)

summary(second_stage)
## 
## Call:
##    felm(formula = visit ~ fitted_heat + pr + rmax + wind | PLACEKEY +      date | 0, data = df_grass_eva) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.097  -3.042  -0.382   2.429  44.157 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## fitted_heat -2.121e+01  2.440e-01  -86.93   <2e-16 ***
## pr           4.607e-02  7.842e-04   58.74   <2e-16 ***
## rmax        -1.077e-01  1.887e-03  -57.08   <2e-16 ***
## wind        -4.002e-01  5.561e-03  -71.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.932 on 2069307 degrees of freedom
## Multiple R-squared(full model): 0.6491   Adjusted R-squared: 0.6476 
## Multiple R-squared(proj model): 0.01131   Adjusted R-squared: 0.007052 
## F-statistic(full model):429.9 on 8905 and 2069307 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  5916 on 4 and 2069307 DF, p-value: < 2.2e-16
###Long term effects
FE_airtemp_visit_7days <- felm(visit ~ extreme_heat_100F+extreme_heat_7days + pr+rmax + wind| 
                                 place_month + date|0, 
                               data = df)
summary(FE_airtemp_visit_7days)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + extreme_heat_7days +      pr + rmax + wind | place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.779  -2.899  -0.318   2.299  41.316 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F  -1.1910253  0.0182320  -65.33   <2e-16 ***
## extreme_heat_7days -1.2421761  0.0273284  -45.45   <2e-16 ***
## pr                  0.0072076  0.0005902   12.21   <2e-16 ***
## rmax                0.0402977  0.0005158   78.13   <2e-16 ***
## wind               -0.1999847  0.0049994  -40.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.754 on 2020275 degrees of freedom
##   (29634 observations deleted due to missingness)
## Multiple R-squared(full model): 0.6723   Adjusted R-squared: 0.6671 
## Multiple R-squared(proj model): 0.015   Adjusted R-squared: -0.0005158 
## F-statistic(full model):130.2 on 31824 and 2020275 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  6153 on 5 and 2020275 DF, p-value: < 2.2e-16
FE_airtemp_spend_visit_14days <- felm(visit ~ extreme_heat_100F+extreme_heat_14days + pr+rmax + wind| 
                                        place_month + date|0, 
                                      data = df)
summary(FE_airtemp_spend_visit_14days)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + extreme_heat_14days +      pr + rmax + wind | place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.672  -2.881  -0.320   2.281  40.923 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F   -1.4395738  0.0167341  -86.03   <2e-16 ***
## extreme_heat_14days -0.9767812  0.0318495  -30.67   <2e-16 ***
## pr                   0.0150073  0.0006478   23.17   <2e-16 ***
## rmax                 0.0406583  0.0005186   78.40   <2e-16 ***
## wind                -0.2045631  0.0050423  -40.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.747 on 1985735 degrees of freedom
##   (64186 observations deleted due to missingness)
## Multiple R-squared(full model): 0.6718   Adjusted R-squared: 0.6665 
## Multiple R-squared(proj model): 0.01489   Adjusted R-squared: -0.0008928 
## F-statistic(full model):127.7 on 31812 and 1985735 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  6002 on 5 and 1985735 DF, p-value: < 2.2e-16
FE_airtemp_visit_21days <- felm(visit ~ extreme_heat_100F+extreme_heat_21days + pr+rmax + wind| 
                                  place_month + date|0, 
                                data = df)
summary(FE_airtemp_visit_21days)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + extreme_heat_21days +      pr + rmax + wind | place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.636  -2.861  -0.314   2.261  40.914 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F   -1.4395500  0.0163227  -88.19   <2e-16 ***
## extreme_heat_21days -1.3252919  0.0358651  -36.95   <2e-16 ***
## pr                   0.0149141  0.0006623   22.52   <2e-16 ***
## rmax                 0.0405758  0.0005178   78.36   <2e-16 ***
## wind                -0.1980499  0.0051429  -38.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.736 on 1950995 degrees of freedom
##   (98936 observations deleted due to missingness)
## Multiple R-squared(full model): 0.6715   Adjusted R-squared: 0.6662 
## Multiple R-squared(proj model): 0.01519   Adjusted R-squared: -0.0008613 
## F-statistic(full model):125.4 on 31802 and 1950995 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  6019 on 5 and 1950995 DF, p-value: < 2.2e-16
FE_airtemp_visit_28days <- felm(visit ~ extreme_heat_100F+extreme_heat_28days + pr+rmax + wind| 
                                  place_month + date|0, 
                                data = df)
summary(FE_airtemp_visit_28days)
## 
## Call:
##    felm(formula = visit ~ extreme_heat_100F + extreme_heat_28days +      pr + rmax + wind | place_month + date | 0, data = df) 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.605  -2.830  -0.313   2.235  40.909 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## extreme_heat_100F   -1.4924689  0.0160474  -93.00   <2e-16 ***
## extreme_heat_28days -1.3497360  0.0396778  -34.02   <2e-16 ***
## pr                   0.0152852  0.0006607   23.14   <2e-16 ***
## rmax                 0.0393876  0.0005194   75.84   <2e-16 ***
## wind                -0.1903740  0.0052164  -36.50   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.713 on 1916348 degrees of freedom
##   (133611 observations deleted due to missingness)
## Multiple R-squared(full model): 0.6727   Adjusted R-squared: 0.6673 
## Multiple R-squared(proj model): 0.01482   Adjusted R-squared: -0.001516 
## F-statistic(full model):  124 on 31774 and 1916348 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  5765 on 5 and 1916348 DF, p-value: < 2.2e-16