data <- read.csv("data.csv")
data$post2000[data$year>=2000]<-1
data$post2000[data$year<2000]<-0

table(data$inctax)
## 
##    0    1 
## 2576 2324
lm1 <- lm(gini_coef ~ inctax*post2000, data)
summary(lm1)
## 
## Call:
## lm(formula = gini_coef ~ inctax * post2000, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.225087 -0.040171 -0.006476  0.034453  0.275603 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.456517   0.001389 328.614  < 2e-16 ***
## inctax           0.015297   0.001974   7.749 1.13e-14 ***
## post2000         0.138556   0.004277  32.394  < 2e-16 ***
## inctax:post2000 -0.018287   0.005755  -3.178  0.00149 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06293 on 4612 degrees of freedom
##   (1402 observations deleted due to missingness)
## Multiple R-squared:  0.3135, Adjusted R-squared:  0.313 
## F-statistic: 701.9 on 3 and 4612 DF,  p-value: < 2.2e-16
fakedata <- data[1:4,]

fakedata$inctax[1] <- 1
fakedata$post2000[1] <- 0

fakedata$inctax[2] <- 1
fakedata$post2000[2] <- 1

fakedata$inctax[3] <- 0
fakedata$post2000[3] <- 0

fakedata$inctax[4] <- 0
fakedata$post2000[4] <- 1

preds <- data.frame(predict(lm1, newdata=fakedata, se.fit=T)) #se.fit=T
preds$inctax <- c(1,1,0,0)
preds$post2000 <- c(0,1,0,1)

preds$cilow <- preds$fit - preds$se.fit*1.96
preds$cihigh <- preds$fit + preds$se.fit*1.96
preds$inctax <- as.character(preds$inctax)
preds$post2000 <- as.character(preds$post2000)
ggplot(preds, aes(y=fit, x=factor(inctax), color=post2000)) +
  geom_point() +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  geom_line(aes(group=post2000)) +
  xlab("Income Tax") +
  ylab("Gini Score") +
  theme_classic()

plot(lm1$residuals)