rm(list=ls())
library(data.table)
library(ggplot2)
library(dplyr)
library(stringr)
library(lfe)
library(stargazer)

library(arrow)
library(dplyr)
library(lubridate)
library(RSQLite)
library(DBI)  
#source('C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/functions.R')

#data_path = "C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/air_quality/data/wildfire-influence-data/"
data_path <- "C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/wildfire-influence-data/"
full_file_path <- file.path(data_path, "epa_station_day_totalPM_smokePM_panel_20000101-20221231.rds")
print(full_file_path)
## [1] "C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/wildfire-influence-data//epa_station_day_totalPM_smokePM_panel_20000101-20221231.rds"
if (file.exists(full_file_path)) {
    aq <- readRDS(full_file_path)
    print("File loaded successfully.")
} else {
    stop("File does not exist at the provided path.")
}
## [1] "File loaded successfully."
state_regions <- fread("https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv")
# creates a data.table with county-level house prices (zhvi) for years 2015 and 2022

temp <- fread("C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/Zillow data/County_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_month.csv")


temp[,fips:=paste0(str_pad(StateCodeFIPS,2,pad="0"),str_pad(MunicipalCodeFIPS,3,pad="0"))]
temp[,c("RegionID","RegionName","SizeRank","StateName","RegionType","State","Metro","StateCodeFIPS","MunicipalCodeFIPS")] <- NULL
temp <- melt(temp,id.vars = c("fips"))
temp$month <- as.Date(as.character(temp$variable),origin = "1970-01-01")
temp$variable <- NULL
names(temp) <- c("fips","zhvi","month")

zhvi_2015 <- temp[year(month)==2015,.(zhvi_2015=mean(zhvi)),by=fips]
zhvi_2022 <- temp[year(month)==2022,.(zhvi_2022=mean(zhvi)),by=fips]

zhvi <- merge(zhvi_2015,zhvi_2022,by="fips")
#colnames(temp)
# Data from: Burke et al 2023 "The contribution of wildfire to PM2.5 trends in the USA".
# https://github.com/echolab-stanford/wildfire-influence?tab=readme-ov-file
# contains air quaility and smoke indicators for each detector. For each detector we know the county code

aq <- readRDS(paste0(data_path,"epa_station_day_totalPM_smokePM_panel_20000101-20221231.rds"))
aq <- data.table(aq)
aq <- aq[date>="2006-01-01"]
aq <- aq[!is.na(pm25)]
aq[,county:=paste0(state_code,county_code)]
aq[,c("state_code","county_code","cbsa","grid_id_10km"):=list(NULL)]
head(aq[smoke_day==1])
##           id       date  pm25 smoke_day smokePM filled_smoke_day filled_smokePM
##       <char>     <Date> <num>     <num>   <num>            <num>          <num>
## 1: 350250008 2006-01-01  35.1         1   31.90                1          31.90
## 2: 481130069 2006-01-01  22.6         1   16.30                1          16.30
## 3: 481390015 2006-01-01  22.7         1   16.00                1          16.00
## 4: 481390017 2006-01-01  23.0         1   16.50                1          16.50
## 5: 482570005 2006-01-01  15.3         1    9.70                1           9.70
## 6: 484391002 2006-01-01  28.0         1   20.85                1          20.85
##    pm25_anom nobs_3yr pm25_med_3yr daily_obs_count percent_complete   aqi   frm
##        <num>    <num>        <num>           <num>            <num> <num> <num>
## 1:     31.90       61         3.20               1              100  99.0   0.0
## 2:     16.30       61         6.30               1              100  73.5   0.5
## 3:     16.00       61         6.70               1              100  73.0   0.0
## 4:     16.50       31         6.50               1              100  74.0   0.0
## 5:      9.70       61         5.60               1              100  58.0   0.0
## 6:     20.85       60         7.15               1              100  84.0   1.0
##    frm_like        lon      lat      state county cbsa_code
##       <num>      <num>    <num>     <char> <char>     <num>
## 1:      1.0 -103.12292 32.72666 New Mexico  35025     26020
## 2:      0.5  -96.86012 32.82006      Texas  48113     19100
## 3:      1.0  -97.02500 32.43694      Texas  48139     19100
## 4:      1.0  -97.04250 32.47361      Texas  48139     19100
## 5:      1.0  -96.31769 32.56497      Texas  48257     19100
## 6:      0.0  -97.35653 32.80581      Texas  48439     19100
# creates a county-date panel using above aq data

aq_county_date <- aq[,.(smoke_day=max(smoke_day,na.rm=T),
                        pm25 = mean(pm25,na.rm=T),
                        smokePM = max(smokePM,na.rm=T),
                        aqi = mean(aqi,na.rm=T),
                        pm25_med_3yr = mean(pm25_med_3yr,na.rm=T)
                        ),
                     by=.(state,county,date)]

aq_county_date <- aq_county_date[is.finite(smokePM)]
# summarize to county level

aq_2006_2009 <- aq_county_date[year(date) %in% 2006:2009,
                               .(pct_smoke_days_2006 = mean(smoke_day),
                                 smokePM_per_day_2006 = mean(smokePM)),
                               by=.(county)]


# 2010 to 2014, used to construct the instrument
aq_2010_2014 <- aq_county_date[year(date) %in% 2010:2014,
                               .(pct_smoke_days = mean(smoke_day),
                                 smokePM_per_day = mean(smokePM)),
                               by=.(state,county)]


# start of the period
aq_2015 <- aq_county_date[year(date) %in% 2015:2016,
                          .(pm25_median_2015=median(pm25,na.rm=T),
                            pm25_mean_2015=mean(pm25,na.rm=T),
                            aqi_median_2015=median(aqi,na.rm=T),
                            aqi_mean_2015=mean(aqi,na.rm=T)
                            ),
                          by=county]
# end of the period
aq_2022 <- aq_county_date[year(date) %in% 2021:2022,
                          .(pm25_median_2022=median(pm25,na.rm=T),
                            pm25_mean_2022=mean(pm25,na.rm=T),
                            aqi_median_2022=median(aqi,na.rm=T),
                            aqi_mean_2022=mean(aqi,na.rm=T),
                            pct_smoke_days_2022 = mean(smoke_day),
                            smokePM_per_day_2022 = mean(smokePM)
                            ),
                          by=county]
aq_county <- merge(aq_2010_2014,aq_2015,by="county")
aq_county <- merge(aq_county,aq_2022,by="county")
aq_county <- merge(aq_county,aq_2006_2009,by="county",all.x=T)

aq_county[,d_pm25_median:=pm25_median_2022-pm25_median_2015]  # d_ mean delta. Change from 2015 to 2022
aq_county[,d_pm25_mean:=pm25_mean_2022-pm25_mean_2015]
aq_county[,d_aqi_median:=aqi_median_2022-aqi_median_2015]
aq_county[,d_aqi_mean:=aqi_mean_2022-aqi_mean_2015]

aq_county[,pct_smoke_days_q:=ntile(pct_smoke_days,4)]
aq_county[,smokePM_per_day_q:=ntile(smokePM_per_day,4)]


aq_county <- merge(aq_county,state_regions[,c("State","Region")],by.x="state",by.y="State")
aq_county <- merge(aq_county,zhvi,by.x="county",by.y="fips",all.x=T)
aq_county[,d_zhvi:=log(zhvi_2022/zhvi_2015)] # d_ mean delta. Change from 2015 to 2022

quantiles <- aq_county[, c(quantile(pct_smoke_days, 0.01), quantile(pct_smoke_days, 0.99))]  # 2010 to 2014 data, iv
aq_county[, w_pct_smoke_days := pmin(pmax(pct_smoke_days, quantiles[1]), quantiles[2])]

quantiles <- aq_county[, c(quantile(smokePM_per_day, 0.01), quantile(smokePM_per_day, 0.99))]   # 2010 to 2014 data, iv
aq_county[, w_smokePM_per_day := pmin(pmax(smokePM_per_day, quantiles[1]), quantiles[2])]


quantiles <- aq_county[, c(quantile(d_zhvi, 0.01,na.rm=T), quantile(d_zhvi, 0.99,na.rm=T))]
aq_county[, w_d_zhvi := pmin(pmax(d_zhvi, quantiles[1]), quantiles[2])]


# aq_county[,state:=substr(county,1,2)]
# aq_county[,pct_smoke_days_q:=ntile(pct_smoke_days,4),by=state]
# aq_county[,smokePM_per_day_q:=ntile(smokePM_per_day,4),by=state]

# aq_county <- merge(aq_county,state_regions,by.x="state",by.y="FIPS")

#Do not have access

# https://hazards.fema.gov/nri/data-resources
#nri <- data.table(read_parquet('C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Projects/air_quality/data/NRI_Table_Counties.parquet'))
#nri <- nri[,c("STCOFIPS","WFIR_AFREQ","WFIR_ALR_NPCTL","WFIR_RISKV","WFIR_RISKS","WFIR_RISKR","WNTW_EVNTS")]
#nri[,county:=str_pad(STCOFIPS,5,"left","0")]

#us_counties <- readOGR("C:/Users/dratnadiwakara2/Documents/OneDrive - Louisiana State University/Raw Data/Shapefiles/US Counties/cb_2013_us_county_20m","cb_2013_us_county_20m")

#us_counties <- fortify(us_counties,region="GEOID")

#us_counties <- data.table(us_counties)

#us_counties <- merge(us_counties,nri,by.x="id",by.y="county")
#us_counties[,state:=as.numeric(substr(id,1,2))]
list.files("C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/NRI_Table_Counties/")
## [1] "NRI_HazardInfo.csv"          "NRI_metadata_March2023.docx"
## [3] "NRI_metadata_March2023.xml"  "NRI_Table_Counties.csv"     
## [5] "NRIDataDictionary.csv"

#recreating above using original source

# Load necessary libraries
library(data.table)
library(sf)
library(dplyr)
library(ggplot2)

# Load the NRI data from the CSV file
nri <- fread('C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/NRI_Table_Counties/NRI_Table_Counties.csv')

# Select and manipulate the necessary columns
nri <- nri[, .(STCOFIPS, WFIR_AFREQ, WFIR_ALR_NPCTL, WFIR_RISKV, WFIR_RISKS, WFIR_RISKR, WNTW_EVNTS)]
nri[, county := str_pad(STCOFIPS, 5, pad = "0")]

# Load the shapefile using the sf package
us_counties <- st_read("C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/cb_2013_us_county_20m/cb_2013_us_county_20m.shp")
## Reading layer `cb_2013_us_county_20m' from data source 
##   `C:\Users\mferdo2\OneDrive - EQS\Programming\EPAData\From Dr. Ratnadiwakar (IV)\cb_2013_us_county_20m\cb_2013_us_county_20m.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3221 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1473 ymin: 17.88481 xmax: 179.7785 ymax: 71.35256
## Geodetic CRS:  NAD83
# Merge the shapefile data with the NRI data
us_counties <- merge(us_counties, nri, by.x = "GEOID", by.y = "county", all.x = TRUE)

# Extract the state FIPS code
us_counties$state <- as.numeric(substr(us_counties$GEOID, 1, 2))

# Convert to data.table and retain geometry
us_counties_dt <- st_as_sf(us_counties)

WFIR_ALR_NPCTL: Wildfire - Expected Annual Loss Rate - National Percentile

ggplot(data = us_counties_dt[!us_counties_dt$state %in% c(2, 15, 72), ]) +
  geom_sf(aes(fill = WFIR_ALR_NPCTL), color = NA) +
  scale_fill_gradientn(colors = c("ivory1", "honeydew1", "darkseagreen3", "darkturquoise", "dodgerblue", "dodgerblue4")) +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    legend.position = "bottom",
    panel.grid = element_blank()
  ) +
  guides(fill = guide_legend(title = "SCI Percentile"))

#did not run

#ggplot(data = us_counties[!us_counties$state %in% c(2, 15, 72)]) +
#  geom_polygon(aes(x = long, y = lat, group = GEOID, fill = WFIR_ALR_NPCTL), color = NA) +
#  scale_fill_gradientn(colors = c("ivory1", "honeydew1", "darkseagreen3", "darkturquoise", "dodgerblue", "dodgerblue4")) +
#  theme_minimal() +
#  theme(
 #   axis.title = element_blank(),
  #  axis.text = element_blank(),
  #  axis.ticks = element_blank(),
  #  legend.position = "bottom",
 #   panel.grid = element_blank()
 # ) +
#  guides(fill = guide_legend(title = "SCI Percentile"))
aq_county <- merge(aq_county,nri,by="county")
# Define the file path
file_path <- "C:/Users/mferdo2/OneDrive - EQS/Programming/EPAData/From Dr. Ratnadiwakar (IV)/aq_county.csv"

# Save the data frame as a CSV file
fwrite(aq_county, file_path)

WFIR_ALR_NPCTL: Wildfire - Expected Annual Loss Rate - National Percentile

r <- list()
r[[1]] <- felm(w_d_zhvi~WFIR_ALR_NPCTL|0|(d_aqi_mean~w_pct_smoke_days+pct_smoke_days_2006),data=aq_county[WFIR_ALR_NPCTL<50])
r[[2]] <- felm(w_d_zhvi~WFIR_ALR_NPCTL|0|(d_pm25_mean~w_pct_smoke_days+pct_smoke_days_2006),data=aq_county[WFIR_ALR_NPCTL<50])

r[[3]] <- felm(w_d_zhvi~WFIR_ALR_NPCTL|0|(d_aqi_mean~log(0.00001+w_smokePM_per_day+smokePM_per_day_2006)),data=aq_county[WFIR_ALR_NPCTL<50])
r[[4]] <- felm(w_d_zhvi~WFIR_ALR_NPCTL|0|(d_pm25_mean~log(0.00001+w_smokePM_per_day+smokePM_per_day_2006)),data=aq_county[WFIR_ALR_NPCTL<50])


# rr <- felm(d_zhvi~1|Region|(d_aqi_mean~pct_smoke_days)|Region,data=aq_county)
stargazer(r,type="text")
## 
## ======================================================================
##                                          Dependent variable:          
##                                ---------------------------------------
##                                               w_d_zhvi                
##                                   (1)       (2)       (3)       (4)   
## ----------------------------------------------------------------------
## WFIR_ALR_NPCTL                   0.001     0.001     0.001     0.002  
##                                 (0.001)   (0.001)   (0.001)   (0.001) 
##                                                                       
## `d_aqi_mean(fit)`              -0.025***           -0.049***          
##                                 (0.008)             (0.018)           
##                                                                       
## `d_pm25_mean(fit)`                       -0.081***           -0.171***
##                                           (0.026)             (0.064) 
##                                                                       
## Constant                       0.453***  0.457***  0.425***  0.430*** 
##                                 (0.019)   (0.019)   (0.032)   (0.031) 
##                                                                       
## ----------------------------------------------------------------------
## Observations                      348       348       348       348   
## R2                              -0.409    -0.353    -1.666    -1.620  
## Adjusted R2                     -0.417    -0.361    -1.681    -1.636  
## Residual Std. Error (df = 345)   0.185     0.181     0.254     0.252  
## ======================================================================
## Note:                                      *p<0.1; **p<0.05; ***p<0.01
condfstat(r[[1]])
##       d_aqi_mean
## iid F   8.389138
## attr(,"df1")
## [1] 4
condfstat(r[[3]])
##       d_aqi_mean
## iid F   3.569265
## attr(,"df1")
## [1] 3
r <- list()
r[[1]] <- felm(w_d_zhvi~1|0|(d_aqi_mean~w_pct_smoke_days),data=aq_county)
r[[2]] <- felm(w_d_zhvi~1|0|(d_pm25_mean~w_pct_smoke_days),data=aq_county)

r[[3]] <- felm(w_d_zhvi~1|0|(d_aqi_mean~log(0.00001+w_smokePM_per_day)),data=aq_county)
r[[4]] <- felm(w_d_zhvi~1|0|(d_pm25_mean~log(0.00001+w_smokePM_per_day)),data=aq_county)


# rr <- felm(d_zhvi~1|Region|(d_aqi_mean~pct_smoke_days)|Region,data=aq_county)
stargazer(r,type="text")
## 
## ====================================================================
##                                         Dependent variable:         
##                                -------------------------------------
##                                              w_d_zhvi               
##                                   (1)       (2)      (3)      (4)   
## --------------------------------------------------------------------
## `d_aqi_mean(fit)`              -0.023***           -0.015*          
##                                 (0.008)            (0.008)          
##                                                                     
## `d_pm25_mean(fit)`                       -0.074***          -0.040* 
##                                           (0.026)           (0.021) 
##                                                                     
## Constant                       0.536***  0.542***  0.535*** 0.538***
##                                 (0.008)   (0.009)  (0.007)  (0.007) 
##                                                                     
## --------------------------------------------------------------------
## Observations                      666       666      666      666   
## R2                              -0.473    -0.497    -0.217   -0.167 
## Adjusted R2                     -0.476    -0.500    -0.219   -0.169 
## Residual Std. Error (df = 664)   0.201     0.203    0.183    0.179  
## ====================================================================
## Note:                                    *p<0.1; **p<0.05; ***p<0.01
condfstat(r[[1]])
##       d_aqi_mean
## iid F   15.64727
## attr(,"df1")
## [1] 2
condfstat(r[[3]])
##       d_aqi_mean
## iid F   12.95079
## attr(,"df1")
## [1] 2
r <- list()
r[[1]] <- felm(d_aqi_mean~pct_smoke_days,data=aq_county[is.finite(pct_smoke_days_2022)])
r[[2]] <- felm(d_aqi_mean~smokePM_per_day,data=aq_county)
r[[3]] <- felm(d_pm25_mean~log(0.0000001+pct_smoke_days),data=aq_county)
r[[4]] <- felm(d_pm25_mean~log(0.0000001+smokePM_per_day),data=aq_county)

r[[5]] <- felm(d_aqi_mean~factor(pct_smoke_days_q),data=aq_county)
r[[6]] <- felm(d_aqi_mean~factor(smokePM_per_day_q),data=aq_county)
r[[7]] <- felm(d_pm25_mean~factor(pct_smoke_days_q),data=aq_county)
r[[8]] <- felm(d_pm25_mean~factor(smokePM_per_day_q),data=aq_county)
# r[[2]] <- felm(d_pm25_median~factor(smokePM_per_day_q)|state|0|state,data=aq_county)

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## =====================================================================================================
##                                                        Dependent variable:                           
##                              ------------------------------------------------------------------------
##                                  d_aqi_mean        d_pm25_mean       d_aqi_mean        d_pm25_mean   
##                                 (1)      (2)      (3)      (4)      (5)      (6)      (7)      (8)   
## -----------------------------------------------------------------------------------------------------
## pct_smoke_days               26.500***                                                               
##                               (4.387)                                                                
## smokePM_per_day                        2.486***                                                      
##                                        (0.610)                                                       
## log(1e-07 + pct_smoke_days)                     0.176***                                             
##                                                 (0.054)                                              
## log(1e-07 + smokePM_per_day)                             0.161***                                    
##                                                          (0.046)                                     
## factor(pct_smoke_days_q)2                                          -0.250            -0.008          
##                                                                   (0.488)           (0.152)          
## factor(pct_smoke_days_q)3                                         1.952***          0.738***         
##                                                                   (0.488)           (0.152)          
## factor(pct_smoke_days_q)4                                         2.395***          0.749***         
##                                                                   (0.488)           (0.152)          
## factor(smokePM_per_day_q)2                                                  0.281             0.077  
##                                                                            (0.490)           (0.151) 
## factor(smokePM_per_day_q)3                                                 1.609***          0.500***
##                                                                            (0.490)           (0.151) 
## factor(smokePM_per_day_q)4                                                 2.721***          0.980***
##                                                                            (0.490)           (0.151) 
## Constant                     -1.821*** -0.675** 0.650*** 0.366*** -0.706** -0.834** -0.192*  -0.211**
##                               (0.394)  (0.300)  (0.155)  (0.077)  (0.344)  (0.346)  (0.107)  (0.107) 
## -----------------------------------------------------------------------------------------------------
## Observations                    701      701      701      701      701      701      701      701   
## R2                             0.050    0.023    0.015    0.017    0.061    0.054    0.065    0.071  
## Adjusted R2                    0.048    0.022    0.014    0.016    0.057    0.050    0.061    0.067  
## =====================================================================================================
## Note:                                                                     *p<0.1; **p<0.05; ***p<0.01
r <- list()
r[[1]] <- felm(d_aqi_mean~log(0.0000001+pct_smoke_days)|Region|0|Region,data=aq_county)
r[[2]] <- felm(d_aqi_mean~log(0.0000001+smokePM_per_day)|Region|0|Region,data=aq_county)
r[[3]] <- felm(d_pm25_mean~log(0.0000001+pct_smoke_days)|Region|0|Region,data=aq_county)
r[[4]] <- felm(d_pm25_mean~log(0.0000001+smokePM_per_day)|Region|0|Region,data=aq_county)

r[[5]] <- felm(d_aqi_mean~factor(pct_smoke_days_q)|Region|0|Region,data=aq_county)
r[[6]] <- felm(d_aqi_mean~factor(smokePM_per_day_q)|Region|0|Region,data=aq_county)
r[[7]] <- felm(d_pm25_mean~factor(pct_smoke_days_q)|Region|0|Region,data=aq_county)
r[[8]] <- felm(d_pm25_mean~factor(smokePM_per_day_q)|Region|0|Region,data=aq_county)

# r[[2]] <- felm(d_pm25_median~factor(smokePM_per_day_q)|state|0|state,data=aq_county)

stargazer(r,type="text",no.space = T,omit.stat = "ser")
## 
## ================================================================================================
##                                                      Dependent variable:                        
##                              -------------------------------------------------------------------
##                                d_aqi_mean      d_pm25_mean      d_aqi_mean        d_pm25_mean   
##                                (1)     (2)     (3)     (4)     (5)      (6)      (7)      (8)   
## ------------------------------------------------------------------------------------------------
## log(1e-07 + pct_smoke_days)   0.452           0.139                                             
##                              (0.252)         (0.092)                                            
## log(1e-07 + smokePM_per_day)          0.361           0.136                                     
##                                      (0.231)         (0.103)                                    
## factor(pct_smoke_days_q)2                                     0.275             0.180*          
##                                                              (0.218)           (0.066)          
## factor(pct_smoke_days_q)3                                    1.734***          0.676**          
##                                                              (0.138)           (0.134)          
## factor(pct_smoke_days_q)4                                    2.919**           0.975***         
##                                                              (0.558)           (0.125)          
## factor(smokePM_per_day_q)2                                            0.847***          0.283** 
##                                                                       (0.063)           (0.050) 
## factor(smokePM_per_day_q)3                                            1.909**           0.659***
##                                                                       (0.336)           (0.107) 
## factor(smokePM_per_day_q)4                                            2.539**           0.987** 
##                                                                       (0.652)           (0.176) 
## ------------------------------------------------------------------------------------------------
## Observations                   701     701     701     701     701      701      701      701   
## R2                            0.086   0.086   0.102   0.105   0.116    0.111    0.138    0.144  
## Adjusted R2                   0.081   0.080   0.096   0.100   0.108    0.104    0.130    0.137  
## ================================================================================================
## Note:                                                                *p<0.1; **p<0.05; ***p<0.01
#aq_county_date <- aq[,.(smoke_day=max(smoke_day,na.rm=T),
                        #pm25 = mean(pm25,na.rm=T),
                        #smokePM = max(smokePM,na.rm=T),
                        #aqi = mean(aqi,na.rm=T),
                        #pm25_med_3yr = mean(pm25_med_3yr,na.rm=T)
                        #),
                     #by=.(county,date)]
#aq_county_date[,year:=year(date)]



#aq_county_year <- aq_county_date[,.(n_smoke_days = sum(smoke_day,na.rm=T),
                                    #n_total_days = .N,
                                   # mean_pm25 = mean(pm25),
                                   # median_pm25 = median(pm25),
                                    #mean_aqi = mean(aqi),
                                    #median_aqi = median(aqi),
                                   # pm25_med_3yr = #mean(pm25_med_3yr,na.rm=T),
                                    #pm25_anom = mean(pm25_anom,na.rm=T),
                                    #smokePM = mean(smokePM,na.rm=T)),
                                 #by=.(county,year)]

#aq_county_year[,pct_smoke_days:=n_smoke_days/n_total_days]
#setorder(aq_county_year,county,year)

#aq_county_year[,mean_pm25_1:=shift(mean_pm25,n=1,type="lag"),by=county]
#aq_county_year[,median_pm25_1:=shift(median_pm25,n=1,type="lag"),by=county]
#aq_county_year[,d_pm25:=mean_pm25-mean_pm25_1]
#aq_county_year[,d_pm25_median:=median_pm25-median_pm25_1]

#aq_county_year[,mean_aqi_1:=shift(mean_aqi,n=1,type="lag"),by=county]
#aq_county_year[,d_aqi:=mean_aqi-mean_aqi_1]

#aq_county_year[,pm25_anom_1:=shift(smokePM,n=1,type="lag"),by=county]
#aq_county_year[,pm25_anom_2:=shift(smokePM,n=2,type="lag"),by=county]
#aq_county_year[,pm25_anom_3:=shift(smokePM,n=3,type="lag"),by=county]
#aq_county_year[,pm25_anom_4:=shift(smokePM,n=4,type="lag"),by=county]


#aq_county_year[,pct_smoke_days_1:=shift(pct_smoke_days,n=1,type="lag"),by=county]
#aq_county_year[,pct_smoke_days_2:=shift(pct_smoke_days,n=2,type="lag"),by=county]
#aq_county_year[,pct_smoke_days_3:=shift(pct_smoke_days,n=3,type="lag"),by=county]

#aq_county_year[,d_smoke_pct_0:=pct_smoke_days-pct_smoke_days_1]
#aq_county_year[,d_smoke_pct_1:=pct_smoke_days_1-pct_smoke_days_2]
#aq_county_year[,d_smoke_pct_2:=pct_smoke_days_2-pct_smoke_days_3]
#r <- list()

#r[[1]] <- felm(median_pm25~pm25_anom_1+pm25_anom_2+pm25_anom_3|county+year|0|county,data=aq_county_year[is.finite(pm25_anom_1) & is.finite(pm25_anom_2) & is.finite(pm25_anom_3)])
#r[[2]] <- felm(median_aqi~pm25_anom_1+pm25_anom_2+pm25_anom_3|county+year|0|county,data=aq_county_year)

#stargazer(r,type="text",no.space = T)