library(wooldridge)
# Load built-in datasets
data(barium)
data(volat)
# Preview the data
head(barium)
## chnimp bchlimp befile6 affile6 afdec6 befile12 affile12 afdec12 chempi
## 1 220.4620 9578.376 0 0 0 0 0 0 100.1
## 2 94.7980 11219.480 0 0 0 0 0 0 100.9
## 3 219.3575 9719.900 0 0 0 0 0 0 101.1
## 4 317.4215 12920.950 0 0 0 0 0 0 102.5
## 5 114.6390 9790.446 0 0 0 0 0 0 104.1
## 6 129.5240 11020.470 0 0 0 0 0 0 104.8
## gas rtwex spr sum fall lchnimp lgas lrtwex lchempi t feb mar
## 1 7830000128 86.74 0 0 0 5.395725 22.78123 4.462915 4.606170 1 1 0
## 2 8819999744 85.63 1 0 0 4.551748 22.90029 4.450036 4.614130 2 0 1
## 3 8449999872 85.42 1 0 0 5.390703 22.85743 4.447580 4.616110 3 0 0
## 4 9240000512 87.29 1 0 0 5.760231 22.94681 4.469236 4.629863 4 0 0
## 5 9150000128 86.60 0 1 0 4.741788 22.93702 4.461300 4.645352 5 0 0
## 6 9520000000 84.63 0 1 0 4.863866 22.97666 4.438289 4.652054 6 0 0
## apr may jun jul aug sep oct nov dec percchn
## 1 0 0 0 0 0 0 0 0 0 2.3016636
## 2 0 0 0 0 0 0 0 0 0 0.8449411
## 3 1 0 0 0 0 0 0 0 0 2.2567875
## 4 0 1 0 0 0 0 0 0 0 2.4566422
## 5 0 0 1 0 0 0 0 0 0 1.1709272
## 6 0 0 0 1 0 0 0 0 0 1.1753038
head(volat)
## date sp500 divyld i3 ip pcsp rsp500 pcip ci3 ci3_1 ci3_2
## 1 1947.01 15.21 4.49 0.38 22.4 NA NA NA NA NA NA
## 2 1947.02 15.80 4.38 0.38 22.5 46.54833 50.92833 5.357163 0 NA NA
## 3 1947.03 15.16 4.61 0.38 22.6 -48.60762 -43.99762 5.333354 0 0 NA
## 4 1947.04 14.60 4.75 0.38 22.5 -44.32714 -39.57714 -5.309754 0 0 0
## 5 1947.05 14.34 5.05 0.38 22.6 -21.36988 -16.31988 5.333354 0 0 0
## 6 1947.06 14.84 5.03 0.38 22.6 41.84100 46.87100 0.000000 0 0 0
## pcip_1 pcip_2 pcip_3 pcsp_1 pcsp_2 pcsp_3
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 5.357163 NA NA 46.54833 NA NA
## 4 5.333354 5.357163 NA -48.60762 46.54833 NA
## 5 -5.309754 5.333354 5.357163 -44.32714 -48.60762 46.54833
## 6 5.333354 -5.309754 5.333354 -21.36988 -44.32714 -48.60762
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car) # For hypothesis testing
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(zoo) # For working with time series data
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Verify column names in the datasets to adjust for missing or differently named variables
cat("Columns in barium dataset:\n")
## Columns in barium dataset:
print(colnames(barium))
## [1] "chnimp" "bchlimp" "befile6" "affile6" "afdec6" "befile12"
## [7] "affile12" "afdec12" "chempi" "gas" "rtwex" "spr"
## [13] "sum" "fall" "lchnimp" "lgas" "lrtwex" "lchempi"
## [19] "t" "feb" "mar" "apr" "may" "jun"
## [25] "jul" "aug" "sep" "oct" "nov" "dec"
## [31] "percchn"
cat("Columns in volat dataset:\n")
## Columns in volat dataset:
print(colnames(volat))
## [1] "date" "sp500" "divyld" "i3" "ip" "pcsp" "rsp500" "pcip"
## [9] "ci3" "ci3_1" "ci3_2" "pcip_1" "pcip_2" "pcip_3" "pcsp_1" "pcsp_2"
## [17] "pcsp_3"
# ----- C2: BARIUM Data -----
# Add a linear time trend and adjust formula for available variables
# Step 1: Add linear time trend
barium$time_trend <- 1:nrow(barium)
# Step 2: Fit the regression model (check if `rt_wex` exists, otherwise remove it)
# Adjust the formula based on available columns
model_c2_i <- lm(log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 + time_trend, data = barium)
summary(model_c2_i) # View regression summary
##
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + befile6 +
## affile6 + afdec6 + time_trend, data = barium)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.94792 -0.31037 0.03323 0.36383 1.20738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.706038 18.128057 -0.039 0.969
## log(chempi) -0.789510 1.067145 -0.740 0.461
## log(gas) 0.429345 0.844935 0.508 0.612
## befile6 0.100523 0.242885 0.414 0.680
## affile6 0.110248 0.243608 0.453 0.652
## afdec6 -0.329958 0.249810 -1.321 0.189
## time_trend 0.013075 0.003121 4.190 5.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5725 on 124 degrees of freedom
## Multiple R-squared: 0.3614, Adjusted R-squared: 0.3305
## F-statistic: 11.7 on 6 and 124 DF, p-value: 2.334e-10
# Step 3: Test for joint significance of all variables except time trend
model_restricted <- lm(log(chnimp) ~ time_trend, data = barium) # Restricted model with only time trend
anova(model_restricted, model_c2_i) # Perform an F-test for joint significance
## Analysis of Variance Table
##
## Model 1: log(chnimp) ~ time_trend
## Model 2: log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 +
## time_trend
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 129 41.709
## 2 124 40.647 5 1.0619 0.6479 0.6636
# Step 4: Add synthetic monthly dummy variables
cat("\nAdding synthetic 'month' variable for seasonality analysis...\n")
##
## Adding synthetic 'month' variable for seasonality analysis...
barium$month <- as.factor(rep(1:12, length.out = nrow(barium))) # Create a cyclic 'month' variable
# Fit the model with monthly dummies
model_c2_iii <- lm(log(chnimp) ~ log(chempi) + log(gas) + befile6 + affile6 + afdec6 + time_trend + month, data = barium)
# Display summary of the model with monthly dummies
cat("\n--- Model with Monthly Dummies (Step 4) ---\n")
##
## --- Model with Monthly Dummies (Step 4) ---
print(summary(model_c2_iii))
##
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + befile6 +
## affile6 + afdec6 + time_trend + month, data = barium)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.84568 -0.35467 0.04212 0.37053 1.11328
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.098310 25.171809 0.798 0.426287
## log(chempi) -0.254136 1.151151 -0.221 0.825672
## log(gas) -0.598843 1.201317 -0.498 0.619108
## befile6 0.145464 0.250687 0.580 0.562892
## affile6 0.115381 0.251123 0.459 0.646788
## afdec6 -0.350692 0.258419 -1.357 0.177464
## time_trend 0.011587 0.003343 3.466 0.000747 ***
## month2 0.399849 0.268457 1.489 0.139160
## month3 -0.099035 0.264508 -0.374 0.708800
## month4 0.352573 0.295042 1.195 0.234592
## month5 0.119124 0.292514 0.407 0.684601
## month6 0.322520 0.314192 1.027 0.306845
## month7 0.183170 0.310607 0.590 0.556559
## month8 0.245956 0.278922 0.882 0.379753
## month9 0.401975 0.280453 1.433 0.154532
## month10 0.070971 0.283610 0.250 0.802855
## month11 0.418517 0.314209 1.332 0.185551
## month12 0.331646 0.285636 1.161 0.248057
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5766 on 113 degrees of freedom
## Multiple R-squared: 0.4098, Adjusted R-squared: 0.3211
## F-statistic: 4.616 on 17 and 113 DF, p-value: 2.944e-07
# Install and load required packages
if (!require("lmtest")) install.packages("lmtest", dependencies = TRUE)
## Loading required package: lmtest
library(lmtest) # For coeftest function
# ----- C9: VOLAT Data -----
cat("\n----- Analysis of C9 (VOLAT Data) -----\n")
##
## ----- Analysis of C9 (VOLAT Data) -----
# Step 1: Check required variables and fit the regression model
if (all(c("rsp500", "pcip", "i3") %in% colnames(volat))) {
# Regression model
model_c9 <- lm(rsp500 ~ pcip + i3, data = volat)
cat("\n--- Model Summary for C9 (Step 1) ---\n")
print(summary(model_c9))
# Step 2: Coefficient significance using coeftest
cat("\n--- Coefficient Significance (Step 2) ---\n")
coef_test <- coeftest(model_c9)
print(coef_test)
# Step 3: Evaluate predictability
r_squared <- summary(model_c9)$r.squared
cat("\n--- R-Squared Value (Step 3):", r_squared, "---\n")
} else {
cat("\nOne or more required variables ('rsp500', 'pcip', 'i3') are missing in the VOLAT dataset.\n")
}
##
## --- Model Summary for C9 (Step 1) ---
##
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -157.871 -22.580 2.103 25.524 138.137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.84306 3.27488 5.754 1.44e-08 ***
## pcip 0.03642 0.12940 0.281 0.7785
## i3 -1.36169 0.54072 -2.518 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.13 on 554 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.01189, Adjusted R-squared: 0.008325
## F-statistic: 3.334 on 2 and 554 DF, p-value: 0.03637
##
##
## --- Coefficient Significance (Step 2) ---
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.843056 3.274880 5.7538 1.445e-08 ***
## pcip 0.036417 0.129396 0.2814 0.77848
## i3 -1.361689 0.540724 -2.5183 0.01207 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## --- R-Squared Value (Step 3): 0.01189219 ---