Sample Example
## http://stats.stackexchange.com/questions/88409/which-test-should-be-used-to-compare-two-mean-differences
v1=c(504,1218,495,1259,504,1233,529,1335,515,1199,508,1326,490,1133,347,1077)
v2=matrix(v1,ncol=4)
colnames(v2) <- c("pre1", "post1", "pre2", "post2")
v3=with(data.frame(v2),data.frame(pre=c(pre1,pre2),post=c(post1,post2),
condition=rep(c(0,1),c(4,4))))
v3
## pre post condition
## 1 504 504 0
## 2 1218 1233 0
## 3 495 529 0
## 4 1259 1335 0
## 5 515 490 1
## 6 1199 1133 1
## 7 508 347 1
## 8 1326 1077 1
## Solution from Nick Stauner
summary(lm(post~scale(pre,scale=F, center=T)*condition,v3)) #scaled out nonessential multicollinearity
##
## Call:
## lm(formula = post ~ scale(pre, scale = F, center = T) * condition,
## data = v3)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -16.46 -30.39 17.90 28.95 61.51 91.75 -75.22 -78.03
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 909.6146 40.5553 22.429
## scale(pre, scale = F, center = T) 1.0405 0.1096 9.491
## condition -155.9272 57.3534 -2.719
## scale(pre, scale = F, center = T):condition -0.1447 0.1533 -0.943
## Pr(>|t|)
## (Intercept) 2.34e-05 ***
## scale(pre, scale = F, center = T) 0.000688 ***
## condition 0.053058 .
## scale(pre, scale = F, center = T):condition 0.398892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 81.09 on 4 degrees of freedom
## Multiple R-squared: 0.9764, Adjusted R-squared: 0.9588
## F-statistic: 55.24 on 3 and 4 DF, p-value: 0.001033
library(ggplot2)
ggplot(v3,aes(x=pre,y=post,colour=factor(condition)))+geom_point()+ stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

summary(lm(post~pre*condition,v3))
##
## Call:
## lm(formula = post ~ pre * condition, data = v3)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -16.46 -30.39 17.90 28.95 61.51 91.75 -75.22 -78.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.9531 103.5416 -0.038 0.971374
## pre 1.0405 0.1096 9.491 0.000688 ***
## condition -28.9146 146.3111 -0.198 0.852976
## pre:condition -0.1447 0.1533 -0.943 0.398892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 81.09 on 4 degrees of freedom
## Multiple R-squared: 0.9764, Adjusted R-squared: 0.9588
## F-statistic: 55.24 on 3 and 4 DF, p-value: 0.001033
Applying Before After Data
setwd("C:/Users/s-das/Dropbox/---- TRB_2017 ----/Difference in Differences/Supporting Docs")
aa1 <- read.csv("aa1.csv")
aa1
## Before After Condition
## 1 20 32 Control Sites
## 2 57 42 Control Sites
## 3 8 6 Control Sites
## 4 57 42 Control Sites
## 5 161 172 Control Sites
## 6 319 398 Control Sites
## 7 14 12 Control Sites
## 8 45 75 Control Sites
## 9 81 87 Control Sites
## 10 21 9 Treatment Sites
## 11 118 47 Treatment Sites
## 12 39 27 Treatment Sites
## 13 118 47 Treatment Sites
## 14 126 114 Treatment Sites
## 15 358 148 Treatment Sites
## 16 65 51 Treatment Sites
## 17 116 75 Treatment Sites
## 18 115 79 Treatment Sites
ggplot(aa1,aes(x=Before,y=After,colour=factor(Condition)))+geom_point()+
stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

summary(lm(After~scale(Before,scale=F)*Condition,aa1))
##
## Call:
## lm(formula = After ~ scale(Before, scale = F) * Condition, data = aa1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.073 -18.659 -0.847 11.915 45.177
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 117.75391 7.01854
## scale(Before, scale = F) 1.23430 0.07369
## ConditionTreatment Sites -58.16038 9.92878
## scale(Before, scale = F):ConditionTreatment Sites -0.84794 0.10516
## t value Pr(>|t|)
## (Intercept) 16.778 1.15e-10 ***
## scale(Before, scale = F) 16.750 1.17e-10 ***
## ConditionTreatment Sites -5.858 4.16e-05 ***
## scale(Before, scale = F):ConditionTreatment Sites -8.063 1.25e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.7 on 14 degrees of freedom
## Multiple R-squared: 0.9576, Adjusted R-squared: 0.9486
## F-statistic: 105.5 on 3 and 14 DF, p-value: 7.549e-10
library(car)
## Warning: package 'car' was built under R version 3.2.5
leveneTest(summary(lm(After~Before,v3))$resid~factor(Condition),aa1)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0559 0.8162
## 16
summary(lm(rank(After)~scale(Before,scale=F)*Condition,aa1))
##
## Call:
## lm(formula = rank(After) ~ scale(Before, scale = F) * Condition,
## data = aa1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.311 -2.503 -1.251 3.026 5.021
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 10.05901 1.20662
## scale(Before, scale = F) 0.05115 0.01267
## ConditionTreatment Sites -0.84914 1.70695
## scale(Before, scale = F):ConditionTreatment Sites -0.01541 0.01808
## t value Pr(>|t|)
## (Intercept) 8.336 8.45e-07 ***
## scale(Before, scale = F) 4.038 0.00122 **
## ConditionTreatment Sites -0.497 0.62659
## scale(Before, scale = F):ConditionTreatment Sites -0.853 0.40827
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.559 on 14 degrees of freedom
## Multiple R-squared: 0.6329, Adjusted R-squared: 0.5543
## F-statistic: 8.047 on 3 and 14 DF, p-value: 0.002324
ggplot(aa1,aes(x=Before,y=rank(After),colour=factor(Condition)))+geom_point()+
stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

Another Example
######## http://www.princeton.edu/~otorres/DID101R.pdf
library(foreign)
mydata = read.dta("http://dss.princeton.edu/training/Panel101.dta")
head(mydata)
## country year y y_bin x1 x2 x3
## 1 A 1990 1342787840 1 0.2779036 -1.1079559 0.28255358
## 2 A 1991 -1899660544 0 0.3206847 -0.9487200 0.49253848
## 3 A 1992 -11234363 0 0.3634657 -0.7894840 0.70252335
## 4 A 1993 2645775360 1 0.2461440 -0.8855330 -0.09439092
## 5 A 1994 3008334848 1 0.4246230 -0.7297683 0.94613063
## 6 A 1995 3229574144 1 0.4772141 -0.7232460 1.02968037
## opinion
## 1 Str agree
## 2 Disag
## 3 Disag
## 4 Disag
## 5 Disag
## 6 Str agree
mydata$time = ifelse(mydata$year >= 1994, 1, 0)
mydata$treated = ifelse(mydata$country == "E" |
mydata$country == "F" |
mydata$country == "G", 1, 0)
mydata$did = mydata$time * mydata$treated
head(mydata)
## country year y y_bin x1 x2 x3
## 1 A 1990 1342787840 1 0.2779036 -1.1079559 0.28255358
## 2 A 1991 -1899660544 0 0.3206847 -0.9487200 0.49253848
## 3 A 1992 -11234363 0 0.3634657 -0.7894840 0.70252335
## 4 A 1993 2645775360 1 0.2461440 -0.8855330 -0.09439092
## 5 A 1994 3008334848 1 0.4246230 -0.7297683 0.94613063
## 6 A 1995 3229574144 1 0.4772141 -0.7232460 1.02968037
## opinion time treated did
## 1 Str agree 0 0 0
## 2 Disag 0 0 0
## 3 Disag 0 0 0
## 4 Disag 0 0 0
## 5 Disag 1 0 0
## 6 Str agree 1 0 0
didreg = lm(y ~ treated + time + did, data = mydata)
summary(didreg)
##
## Call:
## lm(formula = y ~ treated + time + did, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.768e+09 -1.623e+09 1.167e+08 1.393e+09 6.807e+09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.581e+08 7.382e+08 0.485 0.6292
## treated 1.776e+09 1.128e+09 1.575 0.1200
## time 2.289e+09 9.530e+08 2.402 0.0191 *
## did -2.520e+09 1.456e+09 -1.731 0.0882 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.953e+09 on 66 degrees of freedom
## Multiple R-squared: 0.08273, Adjusted R-squared: 0.04104
## F-statistic: 1.984 on 3 and 66 DF, p-value: 0.1249
didreg1 = lm(y ~ treated*time, data = mydata)
summary(didreg1)
##
## Call:
## lm(formula = y ~ treated * time, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.768e+09 -1.623e+09 1.167e+08 1.393e+09 6.807e+09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.581e+08 7.382e+08 0.485 0.6292
## treated 1.776e+09 1.128e+09 1.575 0.1200
## time 2.289e+09 9.530e+08 2.402 0.0191 *
## treated:time -2.520e+09 1.456e+09 -1.731 0.0882 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.953e+09 on 66 degrees of freedom
## Multiple R-squared: 0.08273, Adjusted R-squared: 0.04104
## F-statistic: 1.984 on 3 and 66 DF, p-value: 0.1249
Try Mine
setwd("C:/Users/s-das/Dropbox/---- TRB_2017 ----/Difference in Differences/Supporting Docs")
aa3 <- read.csv("aa2.csv")
aa3
## Count BA Condition BA1 Condition1
## 1 20 Before Control Sites 0 0
## 2 57 Before Control Sites 0 0
## 3 8 Before Control Sites 0 0
## 4 57 Before Control Sites 0 0
## 5 161 Before Control Sites 0 0
## 6 319 Before Control Sites 0 0
## 7 14 Before Control Sites 0 0
## 8 45 Before Control Sites 0 0
## 9 81 Before Control Sites 0 0
## 10 21 Before Treatment Sites 0 1
## 11 118 Before Treatment Sites 0 1
## 12 39 Before Treatment Sites 0 1
## 13 118 Before Treatment Sites 0 1
## 14 126 Before Treatment Sites 0 1
## 15 358 Before Treatment Sites 0 1
## 16 65 Before Treatment Sites 0 1
## 17 116 Before Treatment Sites 0 1
## 18 115 Before Treatment Sites 0 1
## 19 32 After Control Sites 1 0
## 20 42 After Control Sites 1 0
## 21 6 After Control Sites 1 0
## 22 42 After Control Sites 1 0
## 23 172 After Control Sites 1 0
## 24 398 After Control Sites 1 0
## 25 12 After Control Sites 1 0
## 26 75 After Control Sites 1 0
## 27 87 After Control Sites 1 0
## 28 9 After Treatment Sites 1 1
## 29 47 After Treatment Sites 1 1
## 30 27 After Treatment Sites 1 1
## 31 47 After Treatment Sites 1 1
## 32 114 After Treatment Sites 1 1
## 33 148 After Treatment Sites 1 1
## 34 51 After Treatment Sites 1 1
## 35 75 After Treatment Sites 1 1
## 36 79 After Treatment Sites 1 1
aa3$did = aa3$BA1 * aa3$Condition1
didreg = lm(Count ~ BA1 + Condition1 + did, data = aa3)
summary(didreg)
##
## Call:
## lm(formula = Count ~ BA1 + Condition1 + did, data = aa3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -98.56 -55.25 -19.33 7.00 301.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 84.67 31.87 2.656 0.0122 *
## BA1 11.56 45.08 0.256 0.7993
## Condition1 34.89 45.08 0.774 0.4446
## did -64.78 63.75 -1.016 0.3172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 95.62 on 32 degrees of freedom
## Multiple R-squared: 0.0438, Adjusted R-squared: -0.04584
## F-statistic: 0.4886 on 3 and 32 DF, p-value: 0.6926
didreg = lm(Count ~ BA1*Condition1, data = aa3)
summary(didreg)
##
## Call:
## lm(formula = Count ~ BA1 * Condition1, data = aa3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -98.56 -55.25 -19.33 7.00 301.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 84.67 31.87 2.656 0.0122 *
## BA1 11.56 45.08 0.256 0.7993
## Condition1 34.89 45.08 0.774 0.4446
## BA1:Condition1 -64.78 63.75 -1.016 0.3172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 95.62 on 32 degrees of freedom
## Multiple R-squared: 0.0438, Adjusted R-squared: -0.04584
## F-statistic: 0.4886 on 3 and 32 DF, p-value: 0.6926
#### DID in wfe
### library(wfe)