# Install necessary libraries
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Create a sample dataset (replace with your actual data)
set.seed(123)
data <- data.frame(
  gwage232 = rnorm(100),
  gemp232 = rnorm(100),
  gmwage = rnorm(100),
  gcpi = rnorm(100)
)

# Focus on sector 232 (Men’s and Boys’ Furnishings)
sector_232 <- data[, c("gwage232", "gemp232", "gmwage", "gcpi")]

# (i) Find the first order autocorrelation in gwage232
autocorr <- acf(sector_232$gwage232, lag.max = 1, plot = FALSE)$acf[2]
print(paste("First order autocorrelation in gwage232: ", autocorr))
## [1] "First order autocorrelation in gwage232:  -0.0255963416195418"
# Create lagged variables
sector_232$gwage232_lag <- c(NA, sector_232$gwage232[-nrow(sector_232)])
sector_232$gemp232_lag <- c(NA, sector_232$gemp232[-nrow(sector_232)])

# Drop the first row to avoid missing values
sector_232 <- sector_232[-1, ]

# (ii) Estimate the dynamic model by OLS
model <- lm(gwage232 ~ gwage232_lag + gmwage + gcpi, data = sector_232)
summary(model)
## 
## Call:
## lm(formula = gwage232 ~ gwage232_lag + gmwage + gcpi, data = sector_232)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4124 -0.5967 -0.1046  0.5123  2.2320 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.11142    0.09374   1.189    0.238
## gwage232_lag -0.04124    0.10352  -0.398    0.691
## gmwage       -0.11833    0.10013  -1.182    0.240
## gcpi         -0.05178    0.09039  -0.573    0.568
## 
## Residual standard error: 0.921 on 95 degrees of freedom
## Multiple R-squared:  0.01798,    Adjusted R-squared:  -0.01304 
## F-statistic: 0.5796 on 3 and 95 DF,  p-value: 0.6298
# (iii) Add the lagged growth in employment to the equation
model <- lm(gwage232 ~ gwage232_lag + gemp232_lag + gmwage + gcpi, data = sector_232)
summary(model)
## 
## Call:
## lm(formula = gwage232 ~ gwage232_lag + gemp232_lag + gmwage + 
##     gcpi, data = sector_232)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.40609 -0.59723 -0.06863  0.48704  2.17403 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.11460    0.09471   1.210    0.229
## gwage232_lag -0.03949    0.10416  -0.379    0.705
## gemp232_lag   0.03111    0.09773   0.318    0.751
## gmwage       -0.12213    0.10132  -1.205    0.231
## gcpi         -0.05281    0.09088  -0.581    0.563
## 
## Residual standard error: 0.9254 on 94 degrees of freedom
## Multiple R-squared:  0.01903,    Adjusted R-squared:  -0.02271 
## F-statistic: 0.4559 on 4 and 94 DF,  p-value: 0.7678
# (iv) Compare the models
model_no_lags <- lm(gwage232 ~ gmwage + gcpi, data = sector_232)
summary(model_no_lags)
## 
## Call:
## lm(formula = gwage232 ~ gmwage + gcpi, data = sector_232)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3811 -0.6065 -0.1107  0.5498  2.2609 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.10716    0.09272   1.156    0.251
## gmwage      -0.11604    0.09953  -1.166    0.247
## gcpi        -0.04633    0.08896  -0.521    0.604
## 
## Residual standard error: 0.917 on 96 degrees of freedom
## Multiple R-squared:  0.01634,    Adjusted R-squared:  -0.004158 
## F-statistic: 0.7971 on 2 and 96 DF,  p-value: 0.4536
# (v) Run the regression of gmwage on gwage232_lag and gemp232_lag
model_gmwage <- lm(gmwage ~ gwage232_lag + gemp232_lag, data = sector_232)
summary(model_gmwage)
## 
## Call:
## lm(formula = gmwage ~ gwage232_lag + gemp232_lag, data = sector_232)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9228 -0.6010 -0.1195  0.5640  2.0227 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.11498    0.09477   1.213    0.228
## gwage232_lag -0.04549    0.10374  -0.439    0.662
## gemp232_lag   0.11262    0.09780   1.152    0.252
## 
## Residual standard error: 0.9331 on 96 degrees of freedom
## Multiple R-squared:  0.01629,    Adjusted R-squared:  -0.004203 
## F-statistic: 0.7949 on 2 and 96 DF,  p-value: 0.4546