data <- read.csv("data.csv")
data$post2000[data$year>=2000]<-1
data$post2000[data$year<2000]<-0
table(data$inctax)
##
## 0 1
## 2576 2324
lm1 <- lm(gini_coef ~ inctax*post2000, data)
summary(lm1)
##
## Call:
## lm(formula = gini_coef ~ inctax * post2000, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.225087 -0.040171 -0.006476 0.034453 0.275603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.456517 0.001389 328.614 < 2e-16 ***
## inctax 0.015297 0.001974 7.749 1.13e-14 ***
## post2000 0.138556 0.004277 32.394 < 2e-16 ***
## inctax:post2000 -0.018287 0.005755 -3.178 0.00149 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06293 on 4612 degrees of freedom
## (1402 observations deleted due to missingness)
## Multiple R-squared: 0.3135, Adjusted R-squared: 0.313
## F-statistic: 701.9 on 3 and 4612 DF, p-value: < 2.2e-16
fakedata <- data[1:4,]
fakedata$inctax[1] <- 1
fakedata$post2000[1] <- 0
fakedata$inctax[2] <- 1
fakedata$post2000[2] <- 1
fakedata$inctax[3] <- 0
fakedata$post2000[3] <- 0
fakedata$inctax[4] <- 0
fakedata$post2000[4] <- 1
preds <- data.frame(predict(lm1, newdata=fakedata, se.fit=T)) #se.fit=T
preds$inctax <- c(1,1,0,0)
preds$post2000 <- c(0,1,0,1)
preds$cilow <- preds$fit - preds$se.fit*1.96
preds$cihigh <- preds$fit + preds$se.fit*1.96
preds$inctax <- as.character(preds$inctax)
preds$post2000 <- as.character(preds$post2000)
ggplot(preds, aes(y=fit, x=factor(inctax), color=post2000)) +
geom_point() +
geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
geom_line(aes(group=post2000)) +
xlab("Income Tax") +
ylab("Gini Score") +
theme_classic()

plot(lm1$residuals)
