Sample Example

## http://stats.stackexchange.com/questions/88409/which-test-should-be-used-to-compare-two-mean-differences
v1=c(504,1218,495,1259,504,1233,529,1335,515,1199,508,1326,490,1133,347,1077)
v2=matrix(v1,ncol=4)
colnames(v2) <- c("pre1", "post1", "pre2", "post2")

v3=with(data.frame(v2),data.frame(pre=c(pre1,pre2),post=c(post1,post2),
                                  condition=rep(c(0,1),c(4,4)))) 
v3
##    pre post condition
## 1  504  504         0
## 2 1218 1233         0
## 3  495  529         0
## 4 1259 1335         0
## 5  515  490         1
## 6 1199 1133         1
## 7  508  347         1
## 8 1326 1077         1
## Solution from Nick Stauner
summary(lm(post~scale(pre,scale=F, center=T)*condition,v3))   #scaled out nonessential multicollinearity
## 
## Call:
## lm(formula = post ~ scale(pre, scale = F, center = T) * condition, 
##     data = v3)
## 
## Residuals:
##      1      2      3      4      5      6      7      8 
## -16.46 -30.39  17.90  28.95  61.51  91.75 -75.22 -78.03 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  909.6146    40.5553  22.429
## scale(pre, scale = F, center = T)              1.0405     0.1096   9.491
## condition                                   -155.9272    57.3534  -2.719
## scale(pre, scale = F, center = T):condition   -0.1447     0.1533  -0.943
##                                             Pr(>|t|)    
## (Intercept)                                 2.34e-05 ***
## scale(pre, scale = F, center = T)           0.000688 ***
## condition                                   0.053058 .  
## scale(pre, scale = F, center = T):condition 0.398892    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81.09 on 4 degrees of freedom
## Multiple R-squared:  0.9764, Adjusted R-squared:  0.9588 
## F-statistic: 55.24 on 3 and 4 DF,  p-value: 0.001033
library(ggplot2)
ggplot(v3,aes(x=pre,y=post,colour=factor(condition)))+geom_point()+ stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

summary(lm(post~pre*condition,v3))  
## 
## Call:
## lm(formula = post ~ pre * condition, data = v3)
## 
## Residuals:
##      1      2      3      4      5      6      7      8 
## -16.46 -30.39  17.90  28.95  61.51  91.75 -75.22 -78.03 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.9531   103.5416  -0.038 0.971374    
## pre             1.0405     0.1096   9.491 0.000688 ***
## condition     -28.9146   146.3111  -0.198 0.852976    
## pre:condition  -0.1447     0.1533  -0.943 0.398892    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81.09 on 4 degrees of freedom
## Multiple R-squared:  0.9764, Adjusted R-squared:  0.9588 
## F-statistic: 55.24 on 3 and 4 DF,  p-value: 0.001033

Applying Before After Data

setwd("C:/Users/s-das/Dropbox/---- TRB_2017 ----/Difference in Differences/Supporting Docs")
aa1 <- read.csv("aa1.csv") 
aa1
##    Before After       Condition
## 1      20    32   Control Sites
## 2      57    42   Control Sites
## 3       8     6   Control Sites
## 4      57    42   Control Sites
## 5     161   172   Control Sites
## 6     319   398   Control Sites
## 7      14    12   Control Sites
## 8      45    75   Control Sites
## 9      81    87   Control Sites
## 10     21     9 Treatment Sites
## 11    118    47 Treatment Sites
## 12     39    27 Treatment Sites
## 13    118    47 Treatment Sites
## 14    126   114 Treatment Sites
## 15    358   148 Treatment Sites
## 16     65    51 Treatment Sites
## 17    116    75 Treatment Sites
## 18    115    79 Treatment Sites
ggplot(aa1,aes(x=Before,y=After,colour=factor(Condition)))+geom_point()+
  stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

summary(lm(After~scale(Before,scale=F)*Condition,aa1))
## 
## Call:
## lm(formula = After ~ scale(Before, scale = F) * Condition, data = aa1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.073 -18.659  -0.847  11.915  45.177 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                       117.75391    7.01854
## scale(Before, scale = F)                            1.23430    0.07369
## ConditionTreatment Sites                          -58.16038    9.92878
## scale(Before, scale = F):ConditionTreatment Sites  -0.84794    0.10516
##                                                   t value Pr(>|t|)    
## (Intercept)                                        16.778 1.15e-10 ***
## scale(Before, scale = F)                           16.750 1.17e-10 ***
## ConditionTreatment Sites                           -5.858 4.16e-05 ***
## scale(Before, scale = F):ConditionTreatment Sites  -8.063 1.25e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.7 on 14 degrees of freedom
## Multiple R-squared:  0.9576, Adjusted R-squared:  0.9486 
## F-statistic: 105.5 on 3 and 14 DF,  p-value: 7.549e-10
library(car)
## Warning: package 'car' was built under R version 3.2.5
leveneTest(summary(lm(After~Before,v3))$resid~factor(Condition),aa1)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.0559 0.8162
##       16
summary(lm(rank(After)~scale(Before,scale=F)*Condition,aa1))
## 
## Call:
## lm(formula = rank(After) ~ scale(Before, scale = F) * Condition, 
##     data = aa1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.311 -2.503 -1.251  3.026  5.021 
## 
## Coefficients:
##                                                   Estimate Std. Error
## (Intercept)                                       10.05901    1.20662
## scale(Before, scale = F)                           0.05115    0.01267
## ConditionTreatment Sites                          -0.84914    1.70695
## scale(Before, scale = F):ConditionTreatment Sites -0.01541    0.01808
##                                                   t value Pr(>|t|)    
## (Intercept)                                         8.336 8.45e-07 ***
## scale(Before, scale = F)                            4.038  0.00122 ** 
## ConditionTreatment Sites                           -0.497  0.62659    
## scale(Before, scale = F):ConditionTreatment Sites  -0.853  0.40827    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.559 on 14 degrees of freedom
## Multiple R-squared:  0.6329, Adjusted R-squared:  0.5543 
## F-statistic: 8.047 on 3 and 14 DF,  p-value: 0.002324
ggplot(aa1,aes(x=Before,y=rank(After),colour=factor(Condition)))+geom_point()+
  stat_smooth(method='lm',formula=y~scale(x,scale=F))+theme_bw()

Another Example

######## http://www.princeton.edu/~otorres/DID101R.pdf
library(foreign)
mydata = read.dta("http://dss.princeton.edu/training/Panel101.dta")
head(mydata)
##   country year           y y_bin        x1         x2          x3
## 1       A 1990  1342787840     1 0.2779036 -1.1079559  0.28255358
## 2       A 1991 -1899660544     0 0.3206847 -0.9487200  0.49253848
## 3       A 1992   -11234363     0 0.3634657 -0.7894840  0.70252335
## 4       A 1993  2645775360     1 0.2461440 -0.8855330 -0.09439092
## 5       A 1994  3008334848     1 0.4246230 -0.7297683  0.94613063
## 6       A 1995  3229574144     1 0.4772141 -0.7232460  1.02968037
##     opinion
## 1 Str agree
## 2     Disag
## 3     Disag
## 4     Disag
## 5     Disag
## 6 Str agree
mydata$time = ifelse(mydata$year >= 1994, 1, 0)

mydata$treated = ifelse(mydata$country == "E" |
                          mydata$country == "F" |
                          mydata$country == "G", 1, 0)

mydata$did = mydata$time * mydata$treated
head(mydata)
##   country year           y y_bin        x1         x2          x3
## 1       A 1990  1342787840     1 0.2779036 -1.1079559  0.28255358
## 2       A 1991 -1899660544     0 0.3206847 -0.9487200  0.49253848
## 3       A 1992   -11234363     0 0.3634657 -0.7894840  0.70252335
## 4       A 1993  2645775360     1 0.2461440 -0.8855330 -0.09439092
## 5       A 1994  3008334848     1 0.4246230 -0.7297683  0.94613063
## 6       A 1995  3229574144     1 0.4772141 -0.7232460  1.02968037
##     opinion time treated did
## 1 Str agree    0       0   0
## 2     Disag    0       0   0
## 3     Disag    0       0   0
## 4     Disag    0       0   0
## 5     Disag    1       0   0
## 6 Str agree    1       0   0
didreg = lm(y ~ treated + time + did, data = mydata)
summary(didreg)
## 
## Call:
## lm(formula = y ~ treated + time + did, data = mydata)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.768e+09 -1.623e+09  1.167e+08  1.393e+09  6.807e+09 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  3.581e+08  7.382e+08   0.485   0.6292  
## treated      1.776e+09  1.128e+09   1.575   0.1200  
## time         2.289e+09  9.530e+08   2.402   0.0191 *
## did         -2.520e+09  1.456e+09  -1.731   0.0882 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.953e+09 on 66 degrees of freedom
## Multiple R-squared:  0.08273,    Adjusted R-squared:  0.04104 
## F-statistic: 1.984 on 3 and 66 DF,  p-value: 0.1249
didreg1 = lm(y ~ treated*time, data = mydata)
summary(didreg1)
## 
## Call:
## lm(formula = y ~ treated * time, data = mydata)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.768e+09 -1.623e+09  1.167e+08  1.393e+09  6.807e+09 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   3.581e+08  7.382e+08   0.485   0.6292  
## treated       1.776e+09  1.128e+09   1.575   0.1200  
## time          2.289e+09  9.530e+08   2.402   0.0191 *
## treated:time -2.520e+09  1.456e+09  -1.731   0.0882 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.953e+09 on 66 degrees of freedom
## Multiple R-squared:  0.08273,    Adjusted R-squared:  0.04104 
## F-statistic: 1.984 on 3 and 66 DF,  p-value: 0.1249

Try Mine

setwd("C:/Users/s-das/Dropbox/---- TRB_2017 ----/Difference in Differences/Supporting Docs")
aa3 <- read.csv("aa2.csv") 
aa3
##    Count     BA       Condition BA1 Condition1
## 1     20 Before   Control Sites   0          0
## 2     57 Before   Control Sites   0          0
## 3      8 Before   Control Sites   0          0
## 4     57 Before   Control Sites   0          0
## 5    161 Before   Control Sites   0          0
## 6    319 Before   Control Sites   0          0
## 7     14 Before   Control Sites   0          0
## 8     45 Before   Control Sites   0          0
## 9     81 Before   Control Sites   0          0
## 10    21 Before Treatment Sites   0          1
## 11   118 Before Treatment Sites   0          1
## 12    39 Before Treatment Sites   0          1
## 13   118 Before Treatment Sites   0          1
## 14   126 Before Treatment Sites   0          1
## 15   358 Before Treatment Sites   0          1
## 16    65 Before Treatment Sites   0          1
## 17   116 Before Treatment Sites   0          1
## 18   115 Before Treatment Sites   0          1
## 19    32  After   Control Sites   1          0
## 20    42  After   Control Sites   1          0
## 21     6  After   Control Sites   1          0
## 22    42  After   Control Sites   1          0
## 23   172  After   Control Sites   1          0
## 24   398  After   Control Sites   1          0
## 25    12  After   Control Sites   1          0
## 26    75  After   Control Sites   1          0
## 27    87  After   Control Sites   1          0
## 28     9  After Treatment Sites   1          1
## 29    47  After Treatment Sites   1          1
## 30    27  After Treatment Sites   1          1
## 31    47  After Treatment Sites   1          1
## 32   114  After Treatment Sites   1          1
## 33   148  After Treatment Sites   1          1
## 34    51  After Treatment Sites   1          1
## 35    75  After Treatment Sites   1          1
## 36    79  After Treatment Sites   1          1
aa3$did = aa3$BA1 * aa3$Condition1
didreg = lm(Count ~ BA1 + Condition1 + did, data = aa3)
summary(didreg)
## 
## Call:
## lm(formula = Count ~ BA1 + Condition1 + did, data = aa3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -98.56 -55.25 -19.33   7.00 301.78 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    84.67      31.87   2.656   0.0122 *
## BA1            11.56      45.08   0.256   0.7993  
## Condition1     34.89      45.08   0.774   0.4446  
## did           -64.78      63.75  -1.016   0.3172  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95.62 on 32 degrees of freedom
## Multiple R-squared:  0.0438, Adjusted R-squared:  -0.04584 
## F-statistic: 0.4886 on 3 and 32 DF,  p-value: 0.6926
didreg = lm(Count ~ BA1*Condition1, data = aa3)
summary(didreg)
## 
## Call:
## lm(formula = Count ~ BA1 * Condition1, data = aa3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -98.56 -55.25 -19.33   7.00 301.78 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       84.67      31.87   2.656   0.0122 *
## BA1               11.56      45.08   0.256   0.7993  
## Condition1        34.89      45.08   0.774   0.4446  
## BA1:Condition1   -64.78      63.75  -1.016   0.3172  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95.62 on 32 degrees of freedom
## Multiple R-squared:  0.0438, Adjusted R-squared:  -0.04584 
## F-statistic: 0.4886 on 3 and 32 DF,  p-value: 0.6926
#### DID in wfe

### library(wfe)