library(wooldridge)

# Load built-in datasets 
data(barium)
data(volat)

# Preview the data
head(barium)
##     chnimp   bchlimp befile6 affile6 afdec6 befile12 affile12 afdec12 chempi
## 1 220.4620  9578.376       0       0      0        0        0       0  100.1
## 2  94.7980 11219.480       0       0      0        0        0       0  100.9
## 3 219.3575  9719.900       0       0      0        0        0       0  101.1
## 4 317.4215 12920.950       0       0      0        0        0       0  102.5
## 5 114.6390  9790.446       0       0      0        0        0       0  104.1
## 6 129.5240 11020.470       0       0      0        0        0       0  104.8
##          gas rtwex spr sum fall  lchnimp     lgas   lrtwex  lchempi t feb mar
## 1 7830000128 86.74   0   0    0 5.395725 22.78123 4.462915 4.606170 1   1   0
## 2 8819999744 85.63   1   0    0 4.551748 22.90029 4.450036 4.614130 2   0   1
## 3 8449999872 85.42   1   0    0 5.390703 22.85743 4.447580 4.616110 3   0   0
## 4 9240000512 87.29   1   0    0 5.760231 22.94681 4.469236 4.629863 4   0   0
## 5 9150000128 86.60   0   1    0 4.741788 22.93702 4.461300 4.645352 5   0   0
## 6 9520000000 84.63   0   1    0 4.863866 22.97666 4.438289 4.652054 6   0   0
##   apr may jun jul aug sep oct nov dec   percchn
## 1   0   0   0   0   0   0   0   0   0 2.3016636
## 2   0   0   0   0   0   0   0   0   0 0.8449411
## 3   1   0   0   0   0   0   0   0   0 2.2567875
## 4   0   1   0   0   0   0   0   0   0 2.4566422
## 5   0   0   1   0   0   0   0   0   0 1.1709272
## 6   0   0   0   1   0   0   0   0   0 1.1753038
head(volat)
##      date sp500 divyld   i3   ip      pcsp    rsp500      pcip ci3 ci3_1 ci3_2
## 1 1947.01 15.21   4.49 0.38 22.4        NA        NA        NA  NA    NA    NA
## 2 1947.02 15.80   4.38 0.38 22.5  46.54833  50.92833  5.357163   0    NA    NA
## 3 1947.03 15.16   4.61 0.38 22.6 -48.60762 -43.99762  5.333354   0     0    NA
## 4 1947.04 14.60   4.75 0.38 22.5 -44.32714 -39.57714 -5.309754   0     0     0
## 5 1947.05 14.34   5.05 0.38 22.6 -21.36988 -16.31988  5.333354   0     0     0
## 6 1947.06 14.84   5.03 0.38 22.6  41.84100  46.87100  0.000000   0     0     0
##      pcip_1    pcip_2   pcip_3    pcsp_1    pcsp_2    pcsp_3
## 1        NA        NA       NA        NA        NA        NA
## 2        NA        NA       NA        NA        NA        NA
## 3  5.357163        NA       NA  46.54833        NA        NA
## 4  5.333354  5.357163       NA -48.60762  46.54833        NA
## 5 -5.309754  5.333354 5.357163 -44.32714 -48.60762  46.54833
## 6  5.333354 -5.309754 5.333354 -21.36988 -44.32714 -48.60762
# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car) # For hypothesis testing
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(zoo) # For working with time series data
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Verify column names in the datasets to adjust for missing or differently named variables
cat("Columns in barium dataset:\n")
## Columns in barium dataset:
print(colnames(barium))
##  [1] "chnimp"   "bchlimp"  "befile6"  "affile6"  "afdec6"   "befile12"
##  [7] "affile12" "afdec12"  "chempi"   "gas"      "rtwex"    "spr"     
## [13] "sum"      "fall"     "lchnimp"  "lgas"     "lrtwex"   "lchempi" 
## [19] "t"        "feb"      "mar"      "apr"      "may"      "jun"     
## [25] "jul"      "aug"      "sep"      "oct"      "nov"      "dec"     
## [31] "percchn"
cat("Columns in volat dataset:\n")
## Columns in volat dataset:
print(colnames(volat))
##  [1] "date"   "sp500"  "divyld" "i3"     "ip"     "pcsp"   "rsp500" "pcip"  
##  [9] "ci3"    "ci3_1"  "ci3_2"  "pcip_1" "pcip_2" "pcip_3" "pcsp_1" "pcsp_2"
## [17] "pcsp_3"
# ----- C2: BARIUM Data -----
# Add a linear time trend and adjust formula for available variables

# Step 1: Add linear time trend
barium$time_trend <- 1:nrow(barium)

# Step 2: Fit the regression model (check if `rt_wex` exists, otherwise remove it)
# Adjust the formula based on available columns
model_c2_i <- lm(log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 + time_trend, data = barium)
summary(model_c2_i)  # View regression summary
## 
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + befile6 + 
##     affile6 + afdec6 + time_trend, data = barium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94792 -0.31037  0.03323  0.36383  1.20738 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.706038  18.128057  -0.039    0.969    
## log(chempi) -0.789510   1.067145  -0.740    0.461    
## log(gas)     0.429345   0.844935   0.508    0.612    
## befile6      0.100523   0.242885   0.414    0.680    
## affile6      0.110248   0.243608   0.453    0.652    
## afdec6      -0.329958   0.249810  -1.321    0.189    
## time_trend   0.013075   0.003121   4.190 5.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5725 on 124 degrees of freedom
## Multiple R-squared:  0.3614, Adjusted R-squared:  0.3305 
## F-statistic:  11.7 on 6 and 124 DF,  p-value: 2.334e-10
# Step 3: Test for joint significance of all variables except time trend
model_restricted <- lm(log(chnimp) ~ time_trend, data = barium)  # Restricted model with only time trend
anova(model_restricted, model_c2_i)  # Perform an F-test for joint significance
## Analysis of Variance Table
## 
## Model 1: log(chnimp) ~ time_trend
## Model 2: log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 + 
##     time_trend
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    129 41.709                           
## 2    124 40.647  5    1.0619 0.6479 0.6636
# Step 4: Add synthetic monthly dummy variables
cat("\nAdding synthetic 'month' variable for seasonality analysis...\n")
## 
## Adding synthetic 'month' variable for seasonality analysis...
barium$month <- as.factor(rep(1:12, length.out = nrow(barium)))  # Create a cyclic 'month' variable

# Fit the model with monthly dummies
model_c2_iii <- lm(log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 + time_trend + month, data = barium)

# Display summary of the model with monthly dummies
cat("\n--- Model with Monthly Dummies (Step 4) ---\n")
## 
## --- Model with Monthly Dummies (Step 4) ---
print(summary(model_c2_iii))
## 
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + befile6 + 
##     affile6 + afdec6 + time_trend + month, data = barium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.84568 -0.35467  0.04212  0.37053  1.11328 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 20.098310  25.171809   0.798 0.426287    
## log(chempi) -0.254136   1.151151  -0.221 0.825672    
## log(gas)    -0.598843   1.201317  -0.498 0.619108    
## befile6      0.145464   0.250687   0.580 0.562892    
## affile6      0.115381   0.251123   0.459 0.646788    
## afdec6      -0.350692   0.258419  -1.357 0.177464    
## time_trend   0.011587   0.003343   3.466 0.000747 ***
## month2       0.399849   0.268457   1.489 0.139160    
## month3      -0.099035   0.264508  -0.374 0.708800    
## month4       0.352573   0.295042   1.195 0.234592    
## month5       0.119124   0.292514   0.407 0.684601    
## month6       0.322520   0.314192   1.027 0.306845    
## month7       0.183170   0.310607   0.590 0.556559    
## month8       0.245956   0.278922   0.882 0.379753    
## month9       0.401975   0.280453   1.433 0.154532    
## month10      0.070971   0.283610   0.250 0.802855    
## month11      0.418517   0.314209   1.332 0.185551    
## month12      0.331646   0.285636   1.161 0.248057    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5766 on 113 degrees of freedom
## Multiple R-squared:  0.4098, Adjusted R-squared:  0.3211 
## F-statistic: 4.616 on 17 and 113 DF,  p-value: 2.944e-07
# Install and load required packages
if (!require("lmtest")) install.packages("lmtest", dependencies = TRUE)
## Loading required package: lmtest
library(lmtest)  # For coeftest function

# ----- C9: VOLAT Data -----
cat("\n----- Analysis of C9 (VOLAT Data) -----\n")
## 
## ----- Analysis of C9 (VOLAT Data) -----
# Step 1: Check required variables and fit the regression model
if (all(c("rsp500", "pcip", "i3") %in% colnames(volat))) {
  # Regression model
  model_c9 <- lm(rsp500 ~ pcip + i3, data = volat)
  cat("\n--- Model Summary for C9 (Step 1) ---\n")
  print(summary(model_c9))
  
  # Step 2: Coefficient significance using coeftest
  cat("\n--- Coefficient Significance (Step 2) ---\n")
  coef_test <- coeftest(model_c9)
  print(coef_test)
  
  # Step 3: Evaluate predictability
  r_squared <- summary(model_c9)$r.squared
  cat("\n--- R-Squared Value (Step 3):", r_squared, "---\n")
} else {
  cat("\nOne or more required variables ('rsp500', 'pcip', 'i3') are missing in the VOLAT dataset.\n")
}
## 
## --- Model Summary for C9 (Step 1) ---
## 
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.871  -22.580    2.103   25.524  138.137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.84306    3.27488   5.754 1.44e-08 ***
## pcip         0.03642    0.12940   0.281   0.7785    
## i3          -1.36169    0.54072  -2.518   0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 554 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01189,    Adjusted R-squared:  0.008325 
## F-statistic: 3.334 on 2 and 554 DF,  p-value: 0.03637
## 
## 
## --- Coefficient Significance (Step 2) ---
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 18.843056   3.274880  5.7538 1.445e-08 ***
## pcip         0.036417   0.129396  0.2814   0.77848    
## i3          -1.361689   0.540724 -2.5183   0.01207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- R-Squared Value (Step 3): 0.01189219 ---