Carregar as bibliotecas necessárias
library(dplyr)
##
## Anexando pacote: 'dplyr'
## Os seguintes objetos são mascarados por 'package:stats':
##
## filter, lag
## Os seguintes objetos são mascarados por 'package:base':
##
## intersect, setdiff, setequal, union
library(broom)
library(ggplot2)
Criar um conjunto de dados simulado
set.seed(123)
data <- data.frame(
city = rep(c("Treatment", "Control"), each = 100),
year = rep(c(0, 1), times = 100),
salary = c(rnorm(100, mean = 50, sd = 10), rnorm(100, mean = 55, sd = 10),
rnorm(100, mean = 50, sd = 10), rnorm(100, mean = 50, sd = 10))
)
Ajustar o modelo Dif-in-Dif
model <- lm(salary ~ year * city, data = data)
Resumo do modelo
summary(model)
##
## Call:
## lm(formula = salary ~ year * city, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.532 -6.443 -0.392 6.425 35.721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.87305 0.97802 53.039 <2e-16 ***
## year -0.18379 1.38314 -0.133 0.894
## cityTreatment 0.08774 1.38314 0.063 0.949
## year:cityTreatment -1.62907 1.95605 -0.833 0.405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.78 on 396 degrees of freedom
## Multiple R-squared: 0.005744, Adjusted R-squared: -0.001788
## F-statistic: 0.7626 on 3 and 396 DF, p-value: 0.5156
Visualizar os resultados
data %>%
group_by(city, year) %>%
summarise(mean_salary = mean(salary)) %>%
ggplot(aes(x = year, y = mean_salary, color = city, group = city)) +
geom_line() +
geom_point() +
labs(title = "Diferenças em Diferenças",
x = "Ano",
y = "Salário Médio") +
theme_minimal()
## `summarise()` has grouped output by 'city'. You can override using the
## `.groups` argument.
