# Load required libraries
install.packages("dplyr", repos = "http://cran.us.r-project.org")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("ggplot2", repos = "http://cran.us.r-project.org")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Simulate data for BARIUM
set.seed(123)
n <- 100
time <- 1:n
trend <- rnorm(n, mean = 0.5, sd = 0.1)
x1 <- rnorm(n, mean = 2, sd = 1)
x2 <- rnorm(n, mean = 1, sd = 0.5)
monthly_dummy <- factor(rep(1:12, length.out = n))
y <- 3 + 0.5 * trend + 0.8 * x1 - 0.4 * x2 + rnorm(n, sd = 0.5)
barium_data <- data.frame(time, trend, x1, x2, monthly_dummy, y)
# Exercise C2
# (i) Add a linear time trend and check significance
model1 <- lm(y ~ trend + x1 + x2, data = barium_data)
summary(model1)
##
## Call:
## lm(formula = y ~ trend + x1 + x2, data = barium_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24569 -0.32696 0.02832 0.33516 1.26605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.14013 0.35697 8.797 5.71e-14 ***
## trend 0.22275 0.58438 0.381 0.704
## x1 0.82311 0.05473 15.040 < 2e-16 ***
## x2 -0.45739 0.11223 -4.075 9.46e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5258 on 96 degrees of freedom
## Multiple R-squared: 0.7143, Adjusted R-squared: 0.7053
## F-statistic: 80 on 3 and 96 DF, p-value: < 2.2e-16
# (ii) Test for joint significance of variables (excluding the trend)
model2 <- lm(y ~ x1 + x2, data = barium_data)
anova(model1, model2)
## Analysis of Variance Table
##
## Model 1: y ~ trend + x1 + x2
## Model 2: y ~ x1 + x2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 96 26.536
## 2 97 26.576 -1 -0.04016 0.1453 0.7039
# (iii) Add monthly dummies and check for seasonality
model3 <- lm(y ~ trend + x1 + x2 + monthly_dummy, data = barium_data)
summary(model3)
##
## Call:
## lm(formula = y ~ trend + x1 + x2 + monthly_dummy, data = barium_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29007 -0.33084 0.02293 0.36298 1.26445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.70186 0.44819 6.028 4.14e-08 ***
## trend 0.43176 0.61444 0.703 0.484174
## x1 0.82518 0.05820 14.178 < 2e-16 ***
## x2 -0.45454 0.12185 -3.730 0.000344 ***
## monthly_dummy2 0.48244 0.25771 1.872 0.064639 .
## monthly_dummy3 0.35854 0.25400 1.412 0.161718
## monthly_dummy4 0.18873 0.25160 0.750 0.455262
## monthly_dummy5 0.36254 0.26485 1.369 0.174647
## monthly_dummy6 0.36895 0.26222 1.407 0.163071
## monthly_dummy7 0.42900 0.25913 1.656 0.101507
## monthly_dummy8 0.05621 0.26987 0.208 0.835498
## monthly_dummy9 0.52666 0.26186 2.011 0.047466 *
## monthly_dummy10 0.46657 0.25951 1.798 0.075742 .
## monthly_dummy11 0.36673 0.26379 1.390 0.168092
## monthly_dummy12 0.32683 0.26806 1.219 0.226121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5317 on 85 degrees of freedom
## Multiple R-squared: 0.7413, Adjusted R-squared: 0.6986
## F-statistic: 17.39 on 14 and 85 DF, p-value: < 2.2e-16
# Simulate data for VOLAT
set.seed(123)
rsp500 <- rnorm(n, mean = 5, sd = 2)
pcip <- rnorm(n, mean = 0.3, sd = 0.1)
i3 <- rnorm(n, mean = 0.05, sd = 0.02)
volat_data <- data.frame(rsp500, pcip, i3)
# Exercise C9
# (i) Fit the regression model
model_volat <- lm(rsp500 ~ pcip + i3, data = volat_data)
# (ii) Report coefficients
summary(model_volat)
##
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5775 -1.2295 -0.1682 1.1247 4.6838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.0735 0.7584 8.009 2.57e-12 ***
## pcip -0.8614 1.8997 -0.453 0.651
## i3 -12.2795 9.6698 -1.270 0.207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.827 on 97 degrees of freedom
## Multiple R-squared: 0.01877, Adjusted R-squared: -0.001466
## F-statistic: 0.9276 on 2 and 97 DF, p-value: 0.399
# (iii) Identify statistically significant variables
significant_vars <- summary(model_volat)$coefficients[, "Pr(>|t|)"] < 0.05
significant_vars
## (Intercept) pcip i3
## TRUE FALSE FALSE
# (iv) Analyze predictability
predictability <- "Returns on the S&P 500 are predictable if significant variables are identified and have expected signs."
predictability
## [1] "Returns on the S&P 500 are predictable if significant variables are identified and have expected signs."