library(wooldridge)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
data("minwage")

minwage <- minwage %>%
  mutate(gwage232_lag1 = lag(gwage232, 1),
         gemp232_lag1 = lag(gemp232, 1))

minwage <- na.omit(minwage)

# Create a lagged variable for gwage232
minwage$lag_gwage232 <- lag(minwage$gwage232)

# (i) First-order autocorrelation and weak stationarity
filtered_data <- minwage[complete.cases(minwage$gwage232, minwage$lag_gwage232), ]

# Calculate autocorrelation for filtered data
acf(filtered_data$gwage232, lag.max = 1)

# (ii) Dynamic model estimation
model1 <- lm(gwage232 ~ lag_gwage232 + gmwage + gcpi, data = filtered_data)
summary(model1)
## 
## Call:
## lm(formula = gwage232 ~ lag_gwage232 + gmwage + gcpi, data = filtered_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044651 -0.004120 -0.001272  0.004487  0.041568 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.002371   0.000433   5.475 6.45e-08 ***
## lag_gwage232 -0.068681   0.034467  -1.993  0.04676 *  
## gmwage        0.151749   0.009519  15.941  < 2e-16 ***
## gcpi          0.257423   0.086571   2.974  0.00306 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007781 on 594 degrees of freedom
## Multiple R-squared:  0.3067, Adjusted R-squared:  0.3032 
## F-statistic: 87.58 on 3 and 594 DF,  p-value: < 2.2e-16
# (iii) Adding lagged employment growth
model2 <- lm(gwage232 ~ lag_gwage232 + gmwage + gcpi + lag(gemp232), data = filtered_data)
summary(model2)
## 
## Call:
## lm(formula = gwage232 ~ lag_gwage232 + gmwage + gcpi + lag(gemp232), 
##     data = filtered_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.043901 -0.004332 -0.000965  0.004275  0.042430 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.0024096  0.0004308   5.593 3.41e-08 ***
## lag_gwage232 -0.0658257  0.0341679  -1.927 0.054516 .  
## gmwage        0.1525343  0.0094366  16.164  < 2e-16 ***
## gcpi          0.2510616  0.0860926   2.916 0.003678 ** 
## lag(gemp232)  0.0608256  0.0170106   3.576 0.000378 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007711 on 592 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3212, Adjusted R-squared:  0.3166 
## F-statistic: 70.03 on 4 and 592 DF,  p-value: < 2.2e-16
# (iv) Comparing model coefficients
coef(model1)["gmwage"]
##    gmwage 
## 0.1517485
coef(model2)["gmwage"]
##    gmwage 
## 0.1525343
# (v) R-squared for gmwage regression
model3 <- lm(gmwage ~ lag(gwage232) + lag(gemp232), data = filtered_data)
summary(model3)$r.squared
## [1] 0.004078661