# Load necessary library
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Simulate data for MINWAGE
set.seed(123)
n <- 100
gemp232 <- rnorm(n, mean = 0.02, sd = 0.01)  # growth in employment
gmwage <- rnorm(n, mean = 0.01, sd = 0.005)  # growth in federal minimum wage
gcpi <- rnorm(n, mean = 0.02, sd = 0.01)     # growth in CPI
gwage232 <- rep(NA, n)
gwage232[1] <- rnorm(1, mean = 0.03, sd = 0.005)

# Simulate gwage232 as an AR(1) process
for (t in 2:n) {
  gwage232[t] <- 0.7 * gwage232[t-1] + 0.3 * gmwage[t] + 0.2 * gcpi[t] + rnorm(1, mean = 0, sd = 0.005)
}

minwage_data <- data.frame(gwage232, gemp232, gmwage, gcpi)

# Exercise C12
# (i) Find the first-order autocorrelation of gwage232
acf_gwage <- acf(minwage_data$gwage232, plot = FALSE)
autocorrelation <- acf_gwage$acf[2]
autocorrelation
## [1] 0.6046719
# (ii) Estimate the dynamic model by OLS
model_dynamic <- lm(gwage232 ~ lag(gwage232, 1) + gmwage + gcpi, data = minwage_data)
summary(model_dynamic)
## Warning in summary.lm(model_dynamic): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi, data = minwage_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.987e-18 -1.852e-19  3.150e-20  2.269e-19  1.638e-18 
## 
## Coefficients:
##                    Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)       1.110e-17  4.493e-19  2.471e+01   <2e-16 ***
## lag(gwage232, 1)  1.000e+00  1.773e-17  5.640e+16   <2e-16 ***
## gmwage            2.063e-17  2.481e-17  8.320e-01    0.408    
## gcpi             -1.298e-17  1.299e-17 -1.000e+00    0.320    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161e-18 on 96 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.249e+33 on 3 and 96 DF,  p-value: < 2.2e-16
# (iii) Add lagged gemp232 and test for significance
minwage_data$lag_gemp232 <- lag(minwage_data$gemp232, 1)
model_with_lagged_gemp <- lm(gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232, data = minwage_data)
summary(model_with_lagged_gemp)
## Warning in summary.lm(model_with_lagged_gemp): essentially perfect fit: summary
## may be unreliable
## 
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232, 
##     data = minwage_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.976e-18 -1.713e-19  3.460e-20  2.398e-19  1.644e-18 
## 
## Coefficients:
##                    Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)       1.110e-17  5.458e-19  2.034e+01   <2e-16 ***
## lag(gwage232, 1)  1.000e+00  1.780e-17  5.617e+16   <2e-16 ***
## gmwage            2.133e-17  2.493e-17  8.550e-01    0.394    
## gcpi             -1.205e-17  1.315e-17 -9.170e-01    0.362    
## lag_gemp232       7.041e-18  1.296e-17  5.430e-01    0.588    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.165e-18 on 95 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 9.302e+32 on 4 and 95 DF,  p-value: < 2.2e-16
# (iv) Compare models with and without lagged variables
model_without_lags <- lm(gwage232 ~ gmwage + gcpi, data = minwage_data)
summary(model_without_lags)
## 
## Call:
## lm(formula = gwage232 ~ gmwage + gcpi, data = minwage_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0139008 -0.0047964  0.0001274  0.0047646  0.0141037 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.015131   0.002064   7.330 6.91e-11 ***
## gmwage      0.322034   0.138259   2.329  0.02192 *  
## gcpi        0.237125   0.070375   3.369  0.00108 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006648 on 97 degrees of freedom
## Multiple R-squared:  0.1512, Adjusted R-squared:  0.1337 
## F-statistic: 8.637 on 2 and 97 DF,  p-value: 0.0003531
summary(model_with_lagged_gemp)
## Warning in summary.lm(model_with_lagged_gemp): essentially perfect fit: summary
## may be unreliable
## 
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232, 
##     data = minwage_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.976e-18 -1.713e-19  3.460e-20  2.398e-19  1.644e-18 
## 
## Coefficients:
##                    Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)       1.110e-17  5.458e-19  2.034e+01   <2e-16 ***
## lag(gwage232, 1)  1.000e+00  1.780e-17  5.617e+16   <2e-16 ***
## gmwage            2.133e-17  2.493e-17  8.550e-01    0.394    
## gcpi             -1.205e-17  1.315e-17 -9.170e-01    0.362    
## lag_gemp232       7.041e-18  1.296e-17  5.430e-01    0.588    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.165e-18 on 95 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 9.302e+32 on 4 and 95 DF,  p-value: < 2.2e-16
# (v) Regression of gmwage on gwage232 and lagged gemp232
model_gmwage <- lm(gmwage ~ lag(gwage232, 1) + lag_gemp232, data = minwage_data)
summary(model_gmwage)
## 
## Call:
## lm(formula = gmwage ~ lag(gwage232, 1) + lag_gemp232, data = minwage_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0098066 -0.0032587 -0.0001896  0.0025686  0.0144593 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.006388   0.001979   3.227   0.0017 **
## lag(gwage232, 1)  0.153332   0.066880   2.293   0.0240 * 
## lag_gemp232      -0.023172   0.052332  -0.443   0.6589   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004751 on 97 degrees of freedom
## Multiple R-squared:  0.05373,    Adjusted R-squared:  0.03422 
## F-statistic: 2.754 on 2 and 97 DF,  p-value: 0.06867
# R-squared explanation
r_squared <- summary(model_gmwage)$r.squared
r_squared
## [1] 0.05372949