#install.packages("dplyr")
cities <- read.csv('C:/Users/canda/DEM7903 Causal Inference/Homeownership FC.csv')
View(cities)
cities$Proximity <- as.numeric(cities$Proximity)
class(cities$Proximity)
## [1] "numeric"
View(cities)
plot(cities$Dbw2000,cities$pctbown00,pch=20,
     xlab='D index',ylab='Percent Black Homeowners',
     main='Cities by Residential segregtaion and Black homeownership in Year 2000')

cor(cities$Dbw2000,cities$pctbown00)
## [1] 0.2205872
cor.test(cities$Dbw2000,cities$pctbown00)
## 
##  Pearson's product-moment correlation
## 
## data:  cities$Dbw2000 and cities$pctbown00
## t = 3.1904, df = 199, p-value = 0.001651
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08478066 0.34834751
## sample estimates:
##       cor 
## 0.2205872
cit2 <- lm(t_00~Dbw2000+pctbown00,data=cities)
summary(cit2)
## 
## Call:
## lm(formula = t_00 ~ Dbw2000 + pctbown00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -223690  -72106  -23687   21499 1729889 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -46559.0    28694.4  -1.623    0.106    
## Dbw2000       3879.6      840.7   4.615 7.07e-06 ***
## pctbown00     -663.9     2051.1  -0.324    0.747    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 183100 on 198 degrees of freedom
## Multiple R-squared:  0.09918,    Adjusted R-squared:  0.09009 
## F-statistic:  10.9 on 2 and 198 DF,  p-value: 3.228e-05
library(lme4)
## Loading required package: Matrix
city3<-lmer(Dbw2000~pctbown00+(1|pctbown00),data=cities)
summary(city3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Dbw2000 ~ pctbown00 + (1 | pctbown00)
##    Data: cities
## 
## REML criterion at convergence: 1665.5
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.66924 -0.61415 -0.00613  0.53549  2.00319 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  pctbown00 (Intercept)  93.95    9.693  
##  Residual              146.51   12.104  
## Number of obs: 201, groups:  pctbown00, 175
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  28.2803     1.4344  19.716
## pctbown00     0.5231     0.1716   3.049
## 
## Correlation of Fixed Effects:
##           (Intr)
## pctbown00 -0.610
#library(mediation)
#install.packages("lavaan")
library(lavaan)
## This is lavaan 0.6-11
## lavaan is FREE software! Please report any bugs.
str(cities)
## 'data.frame':    201 obs. of  13 variables:
##  $ statefp20: int  48 48 48 48 48 48 48 48 48 48 ...
##  $ placefp20: int  26544 48804 41212 20092 13492 58820 7000 46776 41116 48772 ...
##  $ cityname : chr  "Forest Hill" "Missouri City" "Lancaster" "DeSoto" ...
##  $ Proximity: num  0 1 0 0 0 0 0 1 1 0 ...
##  $ b_20     : int  5630 31255 27716 40012 26719 21645 55824 8650 5803 10287 ...
##  $ t_20     : int  13955 74259 41275 56145 49148 56039 115282 23392 18030 36914 ...
##  $ pb_20    : num  40.3 42.1 67.2 71.3 54.4 ...
##  $ dbw_20   : num  17.6 41.2 40 21.3 32.4 ...
##  $ b_00     : int  7414 20486 13814 17242 10918 25360 52461 9263 4767 6770 ...
##  $ t_00     : int  12949 52913 25894 37646 32093 57755 113866 23935 13682 30831 ...
##  $ pb_00    : num  57.3 38.7 53.4 45.8 34 ...
##  $ Dbw2000  : num  28.8 52.9 44.8 15.9 32.9 ...
##  $ pctbown00: num  50.4 32.9 29.6 28.8 26 ...
fit.totaleffect=lm(pctbown00~Dbw2000,cities)
summary(fit.totaleffect)
## 
## Call:
## lm(formula = pctbown00 ~ Dbw2000, data = cities)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.160 -3.365 -1.672  1.167 45.725 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  2.08965    0.98057   2.131  0.03431 * 
## Dbw2000      0.09042    0.02834   3.190  0.00165 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.328 on 199 degrees of freedom
## Multiple R-squared:  0.04866,    Adjusted R-squared:  0.04388 
## F-statistic: 10.18 on 1 and 199 DF,  p-value: 0.001651
fit.realeffect=lm(Dbw2000~pctbown00,cities)
summary(fit.realeffect)
## 
## Call:
## lm(formula = Dbw2000 ~ pctbown00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.204 -11.181  -0.123  10.735  39.794 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  28.1832     1.3646   20.65  < 2e-16 ***
## pctbown00     0.5382     0.1687    3.19  0.00165 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.44 on 199 degrees of freedom
## Multiple R-squared:  0.04866,    Adjusted R-squared:  0.04388 
## F-statistic: 10.18 on 1 and 199 DF,  p-value: 0.001651
fit.othereffect=lm(pb_00~pctbown00,cities)
summary(fit.othereffect)
## 
## Call:
## lm(formula = pb_00 ~ pctbown00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.369  -2.798  -1.130   1.829  28.382 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.90820    0.43823   6.636 2.98e-10 ***
## pctbown00    1.54146    0.05417  28.455  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.958 on 199 degrees of freedom
## Multiple R-squared:  0.8027, Adjusted R-squared:  0.8017 
## F-statistic: 809.7 on 1 and 199 DF,  p-value: < 2.2e-16
fit.mediator=lm(Proximity~pctbown00,cities)
summary(fit.mediator)
## 
## Call:
## lm(formula = Proximity ~ pctbown00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8475 -0.2685 -0.2401  0.6402  0.7700 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.228730   0.039628   5.772 2.97e-08 ***
## pctbown00   0.012272   0.004899   2.505    0.013 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4483 on 199 degrees of freedom
## Multiple R-squared:  0.03057,    Adjusted R-squared:  0.0257 
## F-statistic: 6.276 on 1 and 199 DF,  p-value: 0.01304
fit.new=lm(Proximity~pb_00,cities)
summary(fit.new)
## 
## Call:
## lm(formula = Proximity ~ pb_00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8219 -0.2438 -0.1947  0.5260  0.8288 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.169865   0.042295   4.016 8.38e-05 ***
## pb_00       0.011388   0.002777   4.101 5.99e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4373 on 199 degrees of freedom
## Multiple R-squared:  0.07793,    Adjusted R-squared:  0.0733 
## F-statistic: 16.82 on 1 and 199 DF,  p-value: 5.992e-05
fit.newnew=lm(Proximity~dbw_20,cities)
summary(fit.newnew)
## 
## Call:
## lm(formula = Proximity ~ dbw_20, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6196 -0.3110 -0.2159  0.5593  0.8615 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 0.033497   0.085210   0.393   0.6947   
## dbw_20      0.009172   0.002850   3.219   0.0015 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.444 on 199 degrees of freedom
## Multiple R-squared:  0.04948,    Adjusted R-squared:  0.0447 
## F-statistic: 10.36 on 1 and 199 DF,  p-value: 0.001504
fit.other=lm(pb_00~Dbw2000,cities)
summary(fit.other)
## 
## Call:
## lm(formula = pb_00 ~ Dbw2000, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.682  -6.951  -2.864   3.652  47.282 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.55271    1.64089   2.165   0.0316 *  
## Dbw2000      0.22301    0.04743   4.702  4.8e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.59 on 199 degrees of freedom
## Multiple R-squared:    0.1,  Adjusted R-squared:  0.09548 
## F-statistic: 22.11 on 1 and 199 DF,  p-value: 4.8e-06
fit.dv=lm(pctbown00~Dbw2000+Proximity,cities)
summary(fit.dv)
## 
## Call:
## lm(formula = pctbown00 ~ Dbw2000 + Proximity, data = cities)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.801 -3.289 -1.650  0.724 46.221 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.97488    0.97732   2.021  0.04466 * 
## Dbw2000      0.07719    0.02914   2.649  0.00873 **
## Proximity    1.80977    1.01302   1.787  0.07555 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.294 on 198 degrees of freedom
## Multiple R-squared:  0.06375,    Adjusted R-squared:  0.05429 
## F-statistic: 6.741 on 2 and 198 DF,  p-value: 0.001472
fit.main=lm(Dbw2000~Proximity+pctbown00,cities)
summary(fit.main)
## 
## Call:
## lm(formula = Dbw2000 ~ Proximity + pctbown00, data = cities)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.639 -10.831  -0.328   9.328  41.569 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  26.4159     1.4405  18.338  < 2e-16 ***
## Proximity     7.7264     2.3849   3.240  0.00140 ** 
## pctbown00     0.4433     0.1674   2.649  0.00873 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.08 on 198 degrees of freedom
## Multiple R-squared:  0.09655,    Adjusted R-squared:  0.08743 
## F-statistic: 10.58 on 2 and 198 DF,  p-value: 4.31e-05
specmod <- "#Path c
Dbw2000 ~ c*pctbown00
#Path a
Proximity ~ a*Dbw2000
#Path b
Proximity ~ b*pctbown00
#Indirect effect (a*b)
ab :=a*b
"
fitmod<- sem(specmod, data=cities)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan
## WARNING: some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
summary(fitmod,fit.measures=TRUE, rsquare=TRUE)
## lavaan 0.6-11 ended normally after 1 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##   Number of observations                           201
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                                26.650
##   Degrees of freedom                                 3
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)               -952.092
##   Loglikelihood unrestricted model (H1)       -952.092
##                                                       
##   Akaike (AIC)                                1914.185
##   Bayesian (BIC)                              1930.701
##   Sample-size adjusted Bayesian (BIC)         1914.860
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value RMSEA <= 0.05                             NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Dbw2000 ~                                           
##     pctbown00  (c)    0.538    0.168    3.206    0.001
##   Proximity ~                                         
##     Dbw2000    (a)    0.007    0.002    3.264    0.001
##     pctbown00  (b)    0.009    0.005    1.800    0.072
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Dbw2000         235.999   23.541   10.025    0.000
##    .Proximity         0.189    0.019   10.025    0.000
## 
## R-Square:
##                    Estimate
##     Dbw2000           0.049
##     Proximity         0.079
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.000    0.000    1.748    0.081
set.seed(2000)

fitmod2 <- sem(specmod, data=cities, se="bootstrap", bootstrap=3000)
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan
## WARNING: some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
parameterEstimates(fitmod2, ci = TRUE, level=0.95, boot.ci.type="perc")
##         lhs op       rhs label     est     se      z pvalue ci.lower ci.upper
## 1   Dbw2000  ~ pctbown00     c   0.538  0.218  2.463  0.014    0.222    1.075
## 2 Proximity  ~   Dbw2000     a   0.007  0.002  3.315  0.001    0.003    0.010
## 3 Proximity  ~ pctbown00     b   0.009  0.007  1.334  0.182    0.000    0.026
## 4   Dbw2000 ~~   Dbw2000       235.999 21.544 10.955  0.000  192.447  276.757
## 5 Proximity ~~ Proximity         0.189  0.014 13.346  0.000    0.156    0.212
## 6 pctbown00 ~~ pctbown00        41.678  0.000     NA     NA   41.678   41.678
## 7        ab :=       a*b    ab   0.000  0.000  1.410  0.159    0.000    0.000
summary(fitmod2)
## lavaan 0.6-11 ended normally after 1 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
##                                                       
##   Number of observations                           201
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                            Bootstrap
##   Number of requested bootstrap draws             3000
##   Number of successful bootstrap draws            3000
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   Dbw2000 ~                                           
##     pctbown00  (c)    0.538    0.218    2.463    0.014
##   Proximity ~                                         
##     Dbw2000    (a)    0.007    0.002    3.315    0.001
##     pctbown00  (b)    0.009    0.007    1.334    0.182
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Dbw2000         235.999   21.544   10.955    0.000
##    .Proximity         0.189    0.014   13.346    0.000
## 
## Defined Parameters:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     ab                0.000    0.000    1.410    0.159