PS3

Author

Andrew Grimoldby

Published

April 13, 2023

library(pacman)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(ggplot2)
p_load("tidyverse")
p_load('fixest')
p_load("lmtest")
p_load("stargazer")
p_load("data.table")
p_load('haven')
p_load('broom')
p_load('modelsummary')
p_load('patchwork')

Q1: Reading and viewing data

allsites <- read_dta('allsites.dta')
glimpse(allsites)

Rows: 42,974
Columns: 44
$ fips             <chr> "01001020200", "01001020300", "01001020400", "0100102…
$ state            <chr> "", "", "", "", "", "", "", "", "AL", "", "", "", "",…
$ npl2000          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ pop_den8         <dbl> 1634.88367, 1497.60767, 1995.14172, 454.03589, 913.84…
$ shrblk8          <dbl> 0.606448, 0.149201, 0.000000, 0.059753, 0.156902, 0.2…
$ shrhsp8          <dbl> 0.012328, 0.016613, 0.016842, 0.014321, 0.003367, 0.0…
$ child8           <dbl> 0.355618, 0.328753, 0.300121, 0.357530, 0.347474, 0.3…
$ old8             <dbl> 0.139876, 0.098083, 0.059050, 0.025185, 0.056565, 0.1…
$ shrfor8          <dbl> 0.010431, 0.021405, 0.009334, 0.014321, 0.000000, 0.0…
$ ffh8             <dbl> 0.249146, 0.166015, 0.097989, 0.066844, 0.175700, 0.1…
$ smhse8           <dbl> 0.517978, 0.404166, 0.488126, 0.408845, 0.422598, 0.6…
$ hsdrop8          <dbl> 0.141361, 0.066079, 0.083333, 0.272222, 0.196428, 0.0…
$ no_hs_diploma8   <dbl> 0.4954874, 0.2879581, 0.2278614, 0.2380952, 0.3936529…
$ ba_or_better8    <dbl> 0.10288808, 0.14077953, 0.20651032, 0.20602527, 0.087…
$ unemprt8         <dbl> 0.108241, 0.035384, 0.033933, 0.073512, 0.079545, 0.0…
$ povrat8          <dbl> 0.302460, 0.077174, 0.057426, 0.070123, 0.084511, 0.2…
$ welfare8         <dbl> 0.203566, 0.091003, 0.033996, 0.076256, 0.070646, 0.1…
$ avhhin8          <dbl> 14465.63, 18397.63, 23987.15, 22034.01, 19911.98, 151…
$ tothsun8         <dbl> 741, 1018, 1633, 641, 1029, 1077, 803, 1013, 708, 113…
$ ownocc8          <dbl> 491, 716, 1293, 487, 834, 826, 590, 715, 617, 658, 67…
$ firestoveheat80  <dbl> 0.05802969, 0.00000000, 0.01959584, 0.02340094, 0.017…
$ noaircond80      <dbl> 0.33063427, 0.11296660, 0.01898347, 0.06240249, 0.165…
$ nofullkitchen80  <dbl> 0.006747638, 0.017681729, 0.004286589, 0.034321371, 0…
$ zerofullbath80   <dbl> 0.03373819, 0.03143419, 0.00000000, 0.01560062, 0.016…
$ statefips        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ bedrms0_80occ    <dbl> 0.0000000000, 0.0000000000, 0.0000000000, 0.000000000…
$ bedrms1_80occ    <dbl> 0.069246434, 0.008379889, 0.000000000, 0.012170386, 0…
$ bedrms2_80occ    <dbl> 0.15274949, 0.03910615, 0.08971384, 0.17241380, 0.232…
$ bedrms3_80occ    <dbl> 0.5987780, 0.7416201, 0.6140758, 0.5354970, 0.5455636…
$ bedrms4_80occ    <dbl> 0.13849287, 0.20391062, 0.27610210, 0.26977688, 0.157…
$ bedrms5_80occ    <dbl> 0.040733196, 0.006983240, 0.020108275, 0.010141988, 0…
$ blt0_1yrs80occ   <dbl> 0.03258656, 0.06145252, 0.02552204, 0.09146342, 0.056…
$ blt2_5yrs80occ   <dbl> 0.11201629, 0.07541899, 0.10208817, 0.24390244, 0.229…
$ blt6_10yrs80occ  <dbl> 0.2749491, 0.3198324, 0.1755607, 0.4857724, 0.2553957…
$ blt10_20yrs80occ <dbl> 0.04276986, 0.33659217, 0.52668214, 0.16869919, 0.326…
$ blt20_30yrs80occ <dbl> 0.08350305, 0.09916201, 0.16550657, 0.00000000, 0.093…
$ blt30_40yrs80occ <dbl> 0.124236256, 0.018156424, 0.004640371, 0.000000000, 0…
$ blt40_yrs80occ   <dbl> 0.32993889, 0.08938547, 0.00000000, 0.01016260, 0.022…
$ detach80occ      <dbl> 0.9804772, 1.0000000, 0.9718969, 0.8747433, 0.7125457…
$ attach80occ      <dbl> 0.008676790, 0.000000000, 0.003903201, 0.000000000, 0…
$ mobile80occ      <dbl> 0.01084599, 0.00000000, 0.02419984, 0.12525667, 0.287…
$ occupied80       <dbl> 0.9352227, 0.9715128, 0.9675444, 0.9126365, 0.9475219…
$ lnmeanhs8        <dbl> 10.33014, 10.62553, 10.82902, 10.80235, 10.53244, 10.…
$ lnmdvalhs0       <dbl> 11.21855, 11.29849, 11.40645, 11.66993, 11.35158, 11.…

Q2: Regression

reg2 <- lm(lnmdvalhs0 ~ npl2000, data = allsites)
summary(reg2)


Call:
lm(formula = lnmdvalhs0 ~ npl2000, data = allsites)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.50946 -0.40522 -0.02112  0.37935  2.11709 

Coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept) 11.719696   0.003052 3840.095   <2e-16 ***
npl2000     -0.021269   0.020159   -1.055    0.291    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6254 on 42972 degrees of freedom
Multiple R-squared:  2.591e-05, Adjusted R-squared:  2.635e-06 
F-statistic: 1.113 on 1 and 42972 DF,  p-value: 0.2914

Our results suggest that when a Census tract was listed on the NPL, on average, the median house price falls by approximately 2 percent. Although, this coefficient is not statistically significant at any level, which means we likely need to control for something else…

Q3: Clustering

reg3 <- feols(lnmdvalhs0 ~ npl2000, cluster = ~statefips, data=allsites)
list('Heteroskedastic' = reg2,
     'Cluster' = reg3) %>% modelsummary(gof_map = 'nobs', output = 'huxtable')

	Heteroskedastic	Cluster
(Intercept)	11.720	11.720
	(0.003)	(0.092)
npl2000	-0.021	-0.021
	(0.020)	(0.049)
Num.Obs.	42974	42974

Clustering the standard errors increases them from approximately 0.003 to 0.092. Since we have multiple observations in each state that are likely correlated with each other, we should expect clustered standard errors to be different from standard errors.

Q4: Controls

reg4i <- feols(lnmdvalhs0 ~ npl2000 + lnmeanhs8, cluster = ~statefips, data=allsites)
reg4ii <- feols(lnmdvalhs0 ~ npl2000 + lnmeanhs8 + povrat8 + pop_den8, cluster = ~statefips, data=allsites)
reg4iii <- feols(lnmdvalhs0 ~ npl2000 + lnmeanhs8 + povrat8 + pop_den8| statefips, cluster = ~statefips, data=allsites)
etable(reg3, reg4i, reg4ii, reg4iii)

                             reg3              reg4i               reg4ii
Dependent Var.:        lnmdvalhs0         lnmdvalhs0           lnmdvalhs0
                                                                         
Constant        11.72*** (0.0924)  2.404*** (0.4442)    2.615*** (0.5827)
npl2000          -0.0213 (0.0488)    0.0400 (0.0247)   0.0972*** (0.0179)
lnmeanhs8                         0.8557*** (0.0408)   0.8336*** (0.0529)
povrat8                                                -0.4645** (0.1483)
pop_den8                                             1.43e-5*** (1.22e-6)
Fixed-Effects:  ----------------- ------------------ --------------------
statefips                      No                 No                   No
_______________ _________________ __________________ ____________________
S.E.: Clustered     by: statefips      by: statefips        by: statefips
Observations               42,974             42,974               42,974
R2                        2.59e-5            0.57916              0.62474
Within R2                      --                 --                   --

                            reg4iii
Dependent Var.:          lnmdvalhs0
                                   
Constant                           
npl2000             0.0346 (0.0217)
lnmeanhs8        0.7768*** (0.0412)
povrat8         -0.4046*** (0.1119)
pop_den8        8.4e-6*** (1.44e-6)
Fixed-Effects:  -------------------
statefips                       Yes
_______________ ___________________
S.E.: Clustered       by: statefips
Observations                 42,974
R2                          0.70986
Within R2                   0.58936
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

For my economic control variable I used poverty rate in 1980, which I view as a control for socioeconomic status. Furthermore, I used the population density to control for demographics. We see that in all regression done in question 4, the sign of the npl2000 estimate has changed from negative to positive. Where reg4ii, column 3, is statistically significant at all levels. We also see that with state FE, the estimate on npl2000 is no longer statistically significant.

Q5: Unbiasedness, what do we need?

To satisfy the CIA (which, in turn, creates an unbiased estimator) we need the NPL in 2000 to be as good as random conditional on the covariates.

Q6: Covariates

allcovariates <- read_dta('allcovariates.dta')
reg6i <- feols(povrat8 ~ npl2000, data = allcovariates)
reg6ii <- feols(pop_den8 ~ npl2000, data = allcovariates)
reg6iii <- feols(avhhin8 ~ npl2000, data = allcovariates)
reg6iv <- feols(no_hs_diploma8 ~ npl2000, data = allcovariates)
etable(reg6i, reg6ii, reg6iii, reg6iv)

                             reg6i              reg6ii             reg6iii
Dependent Var.:            povrat8            pop_den8             avhhin8
                                                                          
Constant        0.1125*** (0.0005)  5,424.1*** (43.18) 21,510.2*** (39.45)
npl2000          -0.0069* (0.0032) -4,017.1*** (302.2) -1,170.0*** (276.1)
_______________ __________________ ___________________ ___________________
S.E. type                      IID                 IID                 IID
Observations                48,245              48,245              48,245
R2                         9.35e-5             0.00365             0.00037
Adj. R2                    7.28e-5             0.00363             0.00035

                            reg6iv
Dependent Var.:     no_hs_diploma8
                                  
Constant        0.3147*** (0.0008)
npl2000         0.0280*** (0.0054)
_______________ __________________
S.E. type                      IID
Observations                48,245
R2                         0.00056
Adj. R2                    0.00054
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

All of our coefficients are statistically significant at some level. Furthermore, all variables with the exception of no high school diploma suggest that it would be less likely that an area would be NPL listed. Meaning that when a higher poverty rate, population density, or average household income are present, a tract is less likely to be NPL listed. Alternatively, when a low high school graduation rate (or high no diploma rate) is present, we see that a tract is more likely to be NPL listed.

Q7: Site Covariates

sitecovariates <- read_dta('sitecovariates.dta')

reg7povi <- feols(povrat8 ~ npl2000, data = sitecovariates)
reg7povii <- feols(povrat8 ~ I(hrs_82 >28.5), data = sitecovariates)
reg7poviii <- feols(povrat8 ~ I(hrs_82 > 28.5), data = filter(sitecovariates, between(hrs_82, 16.5,40.5)))
etable(reg7povi,reg7povii,reg7poviii)

                          reg7povi          reg7povii         reg7poviii
Dependent Var.:            povrat8            povrat8            povrat8
                                                                        
Constant        0.1169*** (0.0068) 0.1139*** (0.0063) 0.1072*** (0.0093)
npl2000          -0.0168* (0.0083)                                      
I(hrs_82>28.5)                      -0.0134. (0.0080)    0.0043 (0.0120)
_______________ __________________ __________________ __________________
S.E. type                      IID                IID                IID
Observations                   487                487                227
R2                         0.00842            0.00583            0.00059
Adj. R2                    0.00637            0.00378           -0.00385
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reg7popdeni <- feols(pop_den8 ~ npl2000, data = sitecovariates)
reg7popdenii <- feols(pop_den8 ~ I(hrs_82 >28.5), data = sitecovariates)
reg7popdeniii <- feols(pop_den8 ~ I(hrs_82 > 28.5), data = filter(sitecovariates, between(hrs_82, 16.5,40.5)))
etable(reg7popdeni,reg7popdenii,reg7popdeniii)

                       reg7popdeni       reg7popdenii      reg7popdeniii
Dependent Var.:           pop_den8           pop_den8           pop_den8
                                                                        
Constant        1,770.8*** (205.2) 1,670.2*** (190.2) 1,360.9*** (264.7)
npl2000            -620.1* (248.5)                                      
I(hrs_82>28.5)                        -512.8* (239.9)     -209.9 (340.7)
_______________ __________________ __________________ __________________
S.E. type                      IID                IID                IID
Observations                   487                487                227
R2                         0.01268            0.00933            0.00168
Adj. R2                    0.01064            0.00729           -0.00275
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reg7hhinci <- feols(avhhin8 ~ npl2000, data = sitecovariates)
reg7hhincii <- feols(avhhin8 ~ I(hrs_82 >28.5), data = sitecovariates)
reg7hhinciii <- feols(avhhin8 ~ I(hrs_82 > 28.5), data = filter(sitecovariates, between(hrs_82, 16.5,40.5)))
etable(reg7hhinci,reg7hhincii,reg7hhinciii)

                         reg7hhinci         reg7hhincii        reg7hhinciii
Dependent Var.:             avhhin8             avhhin8             avhhin8
                                                                           
Constant        19,547.5*** (441.5) 19,635.3*** (408.5) 19,812.2*** (576.9)
npl2000            1,265.8* (534.7)                                        
I(hrs_82>28.5)                         1,233.5* (515.3)       488.6 (742.5)
_______________ ___________________ ___________________ ___________________
S.E. type                       IID                 IID                 IID
Observations                    487                 487                 227
R2                          0.01142             0.01168             0.00192
Adj. R2                     0.00939             0.00964            -0.00252
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

reg7nohsi <- feols(no_hs_diploma8 ~ npl2000, data = sitecovariates)
reg7nohsii <- feols(no_hs_diploma8 ~ I(hrs_82 >28.5), data = sitecovariates)
reg7nohsiii <- feols(no_hs_diploma8 ~ I(hrs_82 > 28.5), data = filter(sitecovariates, between(hrs_82, 16.5,40.5)))
etable(reg7nohsi,reg7nohsii,reg7nohsiii)

                          reg7nohsi          reg7nohsii        reg7nohsiii
Dependent Var.:      no_hs_diploma8      no_hs_diploma8     no_hs_diploma8
                                                                          
Constant         0.4175*** (0.0111)  0.4053*** (0.0104) 0.3881*** (0.0144)
npl2000         -0.0755*** (0.0135)                                       
I(hrs_82>28.5)                      -0.0624*** (0.0131)  -0.0348. (0.0185)
_______________ ___________________ ___________________ __________________
S.E. type                       IID                 IID                IID
Observations                    487                 487                227
R2                          0.06064             0.04468            0.01550
Adj. R2                     0.05871             0.04271            0.01112
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We look at all of the same variables we looked at in question 6, which leads to 12 different regressions.

For poverty rate, we see that the first and second column are almost identical. Although, we notice that with the HRS>28.5 coefficient, it is not statistically significant at any level, while the npl2000 coefficient is significant at the 10% level. When we create a range of HRS, we also see that there is more balance among the covariates in comparison to the first two columns.

For population density we see very similar results for columns 1 and 2. For column 3, we don’t see as much balance as with poverty rate. And we can see that a confidence interval for this would include zero, indicating that the coefficient on the range of HRS is not statistically significant.

For average household income, we see nearly identical results in columns 1 and 2. For column 3 we see a significant dip in the HRS score coefficient and with the standard error, our hypothesis test would consider a negative result for that coefficient. Similar to the poverty rate, we see that there is more balance among covariates in the first two columns, compared to the third column.

For no high school diploma we have the same story as the previous 9 regressions.

Overall, our results suggest that covariates are typically balanced when using NPL prior to 2000, vs not, and on the above and below HRS 28.5. When we considered the range of HRS scores, we lost statistical significance every time, and also saw less balance.

Q8: IV Necessary Assumptions

For the IV to be valid, we need the HRS to be exogenous, meaning that the covariance between HRS and the error are zero. We also need the HRS score to be correlated with whether a tract is NPL listed or not. The final assumption we require is monotonicity, meaning that NPL and HRS move in the same direction.

Q9: Regression Discontinuity Necessary Assumptions

We need HRS to vary smoothly in NPL. And we need NPL to be correlated with HRS.

Q10: 3 Facts

Because we require exogeneity, the first fact may cause IV to be invalid. Since 28.5 was chosen for manageability reasons, this may suggest that it is actually endogenous, and would fail to meet the assumptions needed. As for the last two facts, one big thing we are concerned with is sorting. Since parties were not aware of the threshold/cutoff, it seems we do not have a sorting issue. So long as the other assumptions are met (which there is no indication that they are not), both of the latter two facts would be a valid IV/RD.

Q11: 2 mile data

miledata <- read_dta('2miledata.dta')

#First Stage
FirstStage <- feols(npl2000 ~ hrs_82, cluster = miledata$statefips, data = miledata)

#Reduced Form
ReducedForm <- feols(log(mdvalhs0) ~ hrs_82, cluster = miledata$statefips, data = miledata)

#Using the fitted values to estimate in two stages, for fun
miledata1.1 <-miledata %>% mutate(D_hat = FirstStage$fitted.values)
FirstAndReduced <- feols(log(mdvalhs0) ~ D_hat, cluster = miledata1.1$statefips, data = miledata1.1)

#2SLS
TwoSLS <- feols(log(mdvalhs0) ~ 1 | npl2000 ~ hrs_82, cluster= miledata$statefips, data = miledata)

etable(FirstStage, ReducedForm, TwoSLS, FirstAndReduced, digits=6)

                            FirstStage            ReducedForm
Dependent Var.:                npl2000          log(mdvalhs0)
                                                             
Constant           0.003736 (0.038960)  11.4298*** (0.107503)
hrs_82          0.020178*** (0.000931) 0.006238*** (0.001724)
npl2000                                                      
D_hat                                                        
_______________ ______________________ ______________________
S.E.: Clustered            by: cluster            by: cluster
Observations                       483                    483
R2                             0.55801                0.04640
Adj. R2                        0.55709                0.04442

                                TwoSLS        FirstAndReduced
Dependent Var.:          log(mdvalhs0)          log(mdvalhs0)
                                                             
Constant         11.4286*** (0.106443)  11.4286*** (0.107732)
hrs_82                                                       
npl2000         0.309157*** (0.083740)                       
D_hat                                  0.309157*** (0.085426)
_______________ ______________________ ______________________
S.E.: Clustered            by: cluster            by: cluster
Observations                       483                    483
R2                             0.01276                0.04640
Adj. R2                        0.01071                0.04442
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From using our fitted values from the first stage, we can see that the first stage and reduced form are equal to the 2SLS. (The first and reduced form have different standard errors from the 2SLS, which is expected).

Q12 State Fixed Effects

#First Stage
FirstStage12 <- feols(npl2000 ~ hrs_82 | statefips, cluster = miledata$statefips, data = miledata)

#Reduced Form
ReducedForm12 <- feols(log(mdvalhs0) ~ hrs_82 | statefips, cluster = miledata$statefips, data = miledata)

#2SLS
TwoSLS12 <- feols(log(mdvalhs0) ~ 1 | statefips | npl2000 ~ hrs_82, cluster = miledata$statefips, data = miledata)

etable(FirstStage12, ReducedForm12, TwoSLS12, digits=6)

                          FirstStage12        ReducedForm12
Dependent Var.:                npl2000        log(mdvalhs0)
                                                           
hrs_82          0.019981*** (0.001095) 0.003293* (0.001411)
npl2000                                                    
Fixed-Effects:  ---------------------- --------------------
statefips                          Yes                  Yes
_______________ ______________________ ____________________
S.E.: Clustered            by: cluster          by: cluster
Observations                       483                  483
R2                             0.61462              0.40333
Within R2                      0.52153              0.01636

                            TwoSLS12
Dependent Var.:        log(mdvalhs0)
                                    
hrs_82                              
npl2000         0.164785* (0.071730)
Fixed-Effects:  --------------------
statefips                        Yes
_______________ ____________________
S.E.: Clustered          by: cluster
Observations                     483
R2                           0.39709
Within R2                    0.00607
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We see that the reduced form has gone from approximately 0.006 to 0.003 (we also went from statistically significant at all levels, to statistically significant at the 5 percent level), which suggests that we are not controlling for everything, and furthermore, that we may have a weak instrument.

#Q13: Indicator 2SLS

#First Stage
FirstStage13 <- feols(npl2000 ~ I(hrs_82>28.5), cluster = miledata$statefips, data = miledata)

#Reduced Form
ReducedForm13 <- feols(log(mdvalhs0) ~ I(hrs_82>28.5), cluster = miledata$statefips, data = miledata)

#2SLS
TwoSLS13 <- feols(log(mdvalhs0) ~ 1 | npl2000 ~ I(hrs_82>28.5), cluster= miledata$statefips, data = miledata)

etable(FirstStage13, ReducedForm13, TwoSLS13, digits=6)

                          FirstStage13         ReducedForm13
Dependent Var.:                npl2000         log(mdvalhs0)
                                                            
Constant        0.163842*** (0.031526) 11.5279*** (0.090761)
I(hrs_82>28.5)  0.826354*** (0.033228) 0.178662** (0.062024)
npl2000                                                     
_______________ ______________________ _____________________
S.E.: Clustered            by: cluster           by: cluster
Observations                       483                   483
R2                             0.73776               0.03000
Adj. R2                        0.73721               0.02799

                             TwoSLS13
Dependent Var.:         log(mdvalhs0)
                                     
Constant        11.4925*** (0.097697)
I(hrs_82>28.5)                       
npl2000         0.216205** (0.073486)
_______________ _____________________
S.E.: Clustered           by: cluster
Observations                      483
R2                            0.02641
Adj. R2                       0.02439
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Both the first stage and reduced form coefficients increased, with the first stage increasing by nearly 600%. Because of this increase, we can conclude that by having the indicator for HRS score, the treatment effect is greater than it previously was.

Q14: Sharp or Fuzzy RD?

In this case, we will have a fuzzy RD. This is because when we cross the 28.5 HRS threshold, NPL doesn’t go from 0 to 1.

Q15: Plotting

#Data Set
plot_miledata <- miledata %>%
  mutate(run = hrs_82 - 28.5, run_bin = run %/% 1*1) %>%
  group_by(run_bin) %>%
  transmute(
    n = n(),
    mean_y = mean((mdvalhs0), na.rm = T),
    mean_treatment = mean(npl2000),
    mean_hhinc = mean(avhhin8_nbr),
    mean_povrate = mean(povrat8_nbr),
    mean_popden = mean(pop_den8_nbr),
    mean_hsdiploma = mean(no_hs_diploma8_nbr)
 ) %>% ungroup()

#Outcome with running variable
ggplot(data = plot_miledata, aes(x=run_bin, y = mean_y, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Mean Household Value")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#Treatment and running variable
ggplot(data = plot_miledata, aes(x=run_bin, y = mean_treatment, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Ratio of Treated Tracts")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#Covariates and Running Variable, 4 separate plots
ggplot(data = plot_miledata, aes(x=run_bin, y = mean_hhinc, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Mean Household Income")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = plot_miledata, aes(x=run_bin, y = mean_povrate, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Mean share of Poverty Rate")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = plot_miledata, aes(x=run_bin, y = mean_popden, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Mean Population Density")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = plot_miledata, aes(x=run_bin, y = mean_hsdiploma, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Mean share without HS Diploma")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#Bin Counts and Running Variable
ggplot(data = plot_miledata, aes(x=run_bin, y = n, group = run_bin<0)) +
  geom_vline(xintercept = 0, linewidth=0.2, color = 'blue')+
  geom_line()+
  geom_smooth()+
  geom_point()+
  labs(x= "Distance from HRS Threshold", y= "Number of Observations")+
  theme_minimal()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Q16: Plausable?

Most of our covariates were pretty smooth across the threshold. Furthermore, with the treated we saw a large discontinuous jump at the threshold. In the mean household value, we saw a large jump in the terms of the numbers, but a relatively small jump in the grand scheme of things, which speaks to the treatment effect on the house value. Finally, the number of observations does jump at the threshold, which is troublesome. Overall, I think this does seem like a plausible RD.

Q17: RD Estimates

miledata1.2 <- miledata %>% filter(hrs_82 %>% between(16.5,40.5))

#2SLS
FuzzyRD <- feols(log(mdvalhs0) ~ 1 | npl2000 ~ I(hrs_82>28.5), data = miledata1.2)

#State FE
FuzzyRD_StateFE <- feols(log(mdvalhs0) ~ 1 | statefips| npl2000 ~ i(hrs_82>28.5), data = miledata1.2)

etable(FuzzyRD, FuzzyRD_StateFE, digits=6)

                              FuzzyRD     FuzzyRD_StateFE
Dependent Var.:         log(mdvalhs0)       log(mdvalhs0)
                                                         
Constant        11.5553*** (0.071503)                    
npl2000           0.056360 (0.091011) 0.092180 (0.072554)
Fixed-Effects:  --------------------- -------------------
statefips                          No                 Yes
_______________ _____________________ ___________________
S.E. type                         IID       by: statefips
Observations                      226                 226
R2                           -0.00168             0.41230
Within R2                          --            -0.00676
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Without state FE, we estimate that being NPL listed leads to a 5.6% increase in median house value in 2000. With state FE we estimate a 9.2% increase in median house value in 2000 when NPL listed. Neither of these estimates are statistically significant from zero.

Q18: Which is more credible?

I would argue that the model with state FE is more credible. The reason for this is two fold; the first reason being that we see a lower standard error, which hopefully means we are capturing some of the differences between states. The second reason is that we have a much higher R2 value with state fixed effects. Granted, this could be slightly inflated with the inclusion of those fixed effects, but the model does seem to have a better fit than without. One reason for concern would be the low number of observations (which is true for both models).

Q19: What kind of Treatment Effect?

In this case, we are looking at the Local Average Treatment Effect (LATE). We estimate, as stated earlier, that when NPL is listed, we have a 9.2% increase in median house value.

Q20: Policy Relevance

If I understand the question correctly, I would say yes. When we look at this estimand, we can see whether being an NPL tract area is helpful towards housing prices. Depending on the policy goal, this estimate could be relevant in determining the effect of the policy.