# Load necessary library
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Simulate data for MINWAGE
set.seed(123)
n <- 100
gemp232 <- rnorm(n, mean = 0.02, sd = 0.01) # growth in employment
gmwage <- rnorm(n, mean = 0.01, sd = 0.005) # growth in federal minimum wage
gcpi <- rnorm(n, mean = 0.02, sd = 0.01) # growth in CPI
gwage232 <- rep(NA, n)
gwage232[1] <- rnorm(1, mean = 0.03, sd = 0.005)
# Simulate gwage232 as an AR(1) process
for (t in 2:n) {
gwage232[t] <- 0.7 * gwage232[t-1] + 0.3 * gmwage[t] + 0.2 * gcpi[t] + rnorm(1, mean = 0, sd = 0.005)
}
minwage_data <- data.frame(gwage232, gemp232, gmwage, gcpi)
# Exercise C12
# (i) Find the first-order autocorrelation of gwage232
acf_gwage <- acf(minwage_data$gwage232, plot = FALSE)
autocorrelation <- acf_gwage$acf[2]
autocorrelation
## [1] 0.6046719
# (ii) Estimate the dynamic model by OLS
model_dynamic <- lm(gwage232 ~ lag(gwage232, 1) + gmwage + gcpi, data = minwage_data)
summary(model_dynamic)
## Warning in summary.lm(model_dynamic): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi, data = minwage_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.987e-18 -1.852e-19 3.150e-20 2.269e-19 1.638e-18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.110e-17 4.493e-19 2.471e+01 <2e-16 ***
## lag(gwage232, 1) 1.000e+00 1.773e-17 5.640e+16 <2e-16 ***
## gmwage 2.063e-17 2.481e-17 8.320e-01 0.408
## gcpi -1.298e-17 1.299e-17 -1.000e+00 0.320
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.161e-18 on 96 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.249e+33 on 3 and 96 DF, p-value: < 2.2e-16
# (iii) Add lagged gemp232 and test for significance
minwage_data$lag_gemp232 <- lag(minwage_data$gemp232, 1)
model_with_lagged_gemp <- lm(gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232, data = minwage_data)
summary(model_with_lagged_gemp)
## Warning in summary.lm(model_with_lagged_gemp): essentially perfect fit: summary
## may be unreliable
##
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232,
## data = minwage_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.976e-18 -1.713e-19 3.460e-20 2.398e-19 1.644e-18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.110e-17 5.458e-19 2.034e+01 <2e-16 ***
## lag(gwage232, 1) 1.000e+00 1.780e-17 5.617e+16 <2e-16 ***
## gmwage 2.133e-17 2.493e-17 8.550e-01 0.394
## gcpi -1.205e-17 1.315e-17 -9.170e-01 0.362
## lag_gemp232 7.041e-18 1.296e-17 5.430e-01 0.588
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.165e-18 on 95 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 9.302e+32 on 4 and 95 DF, p-value: < 2.2e-16
# (iv) Compare models with and without lagged variables
model_without_lags <- lm(gwage232 ~ gmwage + gcpi, data = minwage_data)
summary(model_without_lags)
##
## Call:
## lm(formula = gwage232 ~ gmwage + gcpi, data = minwage_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0139008 -0.0047964 0.0001274 0.0047646 0.0141037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.015131 0.002064 7.330 6.91e-11 ***
## gmwage 0.322034 0.138259 2.329 0.02192 *
## gcpi 0.237125 0.070375 3.369 0.00108 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006648 on 97 degrees of freedom
## Multiple R-squared: 0.1512, Adjusted R-squared: 0.1337
## F-statistic: 8.637 on 2 and 97 DF, p-value: 0.0003531
summary(model_with_lagged_gemp)
## Warning in summary.lm(model_with_lagged_gemp): essentially perfect fit: summary
## may be unreliable
##
## Call:
## lm(formula = gwage232 ~ lag(gwage232, 1) + gmwage + gcpi + lag_gemp232,
## data = minwage_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.976e-18 -1.713e-19 3.460e-20 2.398e-19 1.644e-18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.110e-17 5.458e-19 2.034e+01 <2e-16 ***
## lag(gwage232, 1) 1.000e+00 1.780e-17 5.617e+16 <2e-16 ***
## gmwage 2.133e-17 2.493e-17 8.550e-01 0.394
## gcpi -1.205e-17 1.315e-17 -9.170e-01 0.362
## lag_gemp232 7.041e-18 1.296e-17 5.430e-01 0.588
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.165e-18 on 95 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 9.302e+32 on 4 and 95 DF, p-value: < 2.2e-16
# (v) Regression of gmwage on gwage232 and lagged gemp232
model_gmwage <- lm(gmwage ~ lag(gwage232, 1) + lag_gemp232, data = minwage_data)
summary(model_gmwage)
##
## Call:
## lm(formula = gmwage ~ lag(gwage232, 1) + lag_gemp232, data = minwage_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0098066 -0.0032587 -0.0001896 0.0025686 0.0144593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.006388 0.001979 3.227 0.0017 **
## lag(gwage232, 1) 0.153332 0.066880 2.293 0.0240 *
## lag_gemp232 -0.023172 0.052332 -0.443 0.6589
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004751 on 97 degrees of freedom
## Multiple R-squared: 0.05373, Adjusted R-squared: 0.03422
## F-statistic: 2.754 on 2 and 97 DF, p-value: 0.06867
# R-squared explanation
r_squared <- summary(model_gmwage)$r.squared
r_squared
## [1] 0.05372949