library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.4.1
data("barium")

# (C2 Part (i))
barium$trend <- barium$t  # Use the variable 't' as the linear time trend
model1 <- lm(log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex), data = barium)
summary(model1)
## 
## Call:
## lm(formula = log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex), 
##     data = barium)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9462 -0.3095  0.0199  0.3774  1.1985 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.57512   20.67163  -0.125 0.901061    
## trend        0.01356    0.00371   3.654 0.000377 ***
## log(chempi) -0.99233    1.19557  -0.830 0.408102    
## log(gas)     0.57013    0.86725   0.657 0.512125    
## log(rtwex)  -0.08936    0.40007  -0.223 0.823613    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5731 on 126 degrees of freedom
## Multiple R-squared:  0.3498, Adjusted R-squared:  0.3292 
## F-statistic: 16.95 on 4 and 126 DF,  p-value: 3.838e-11
# (C2 Part (ii))
library(car)
## Loading required package: carData
linearHypothesis(model1, c("log(chempi) = 0", "log(gas) = 0", "log(rtwex) = 0"))
## Linear hypothesis test
## 
## Hypothesis:
## log(chempi) = 0
## log(gas) = 0
## log(rtwex) = 0
## 
## Model 1: restricted model
## Model 2: log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex)
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    129 41.709                           
## 2    126 41.386  3   0.32274 0.3275 0.8054
# (C2 Part (iii))
# Include all monthly dummy variables except January (baseline)
model2 <- lm(log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex) + 
               feb + mar + apr + may + jun + jul + aug + sep + oct + nov + dec, data = barium)
summary(model2)
## 
## Call:
## lm(formula = log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex) + 
##     feb + mar + apr + may + jun + jul + aug + sep + oct + nov + 
##     dec, data = barium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86409 -0.34931  0.01966  0.38943  1.06689 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.150788  30.924932   0.910 0.364571    
## trend        0.012953   0.003771   3.435 0.000826 ***
## log(chempi) -0.684025   1.228814  -0.557 0.578845    
## log(gas)    -0.790531   1.336974  -0.591 0.555491    
## log(rtwex)  -0.300425   0.435624  -0.690 0.491809    
## feb         -0.351338   0.292753  -1.200 0.232560    
## mar          0.062903   0.254089   0.248 0.804913    
## apr         -0.424580   0.256487  -1.655 0.100577    
## may          0.057545   0.254148   0.226 0.821276    
## jun         -0.173419   0.253743  -0.683 0.495702    
## jul          0.038939   0.262166   0.149 0.882187    
## aug         -0.097512   0.261111  -0.373 0.709500    
## sep         -0.043634   0.252288  -0.173 0.862993    
## oct          0.093825   0.252192   0.372 0.710550    
## nov         -0.259692   0.252285  -1.029 0.305471    
## dec          0.095602   0.260688   0.367 0.714495    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.577 on 115 degrees of freedom
## Multiple R-squared:  0.3984, Adjusted R-squared:   0.32 
## F-statistic: 5.078 on 15 and 115 DF,  p-value: 1.353e-07
# Compare Models With and Without Monthly Dummies
anova(model1, model2)
## Analysis of Variance Table
## 
## Model 1: log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex)
## Model 2: log(chnimp) ~ trend + log(chempi) + log(gas) + log(rtwex) + feb + 
##     mar + apr + may + jun + jul + aug + sep + oct + nov + dec
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    126 41.386                           
## 2    115 38.292 11    3.0945 0.8449 0.5959
# Check Individual Monthly Dummy Effects
coefficients <- summary(model2)$coefficients
monthly_dummies <- coefficients[grep("feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec", rownames(coefficients)), ]
monthly_dummies
##        Estimate Std. Error    t value  Pr(>|t|)
## feb -0.35133811  0.2927528 -1.2001188 0.2325597
## mar  0.06290337  0.2540892  0.2475641 0.8049131
## apr -0.42457953  0.2564866 -1.6553673 0.1005765
## may  0.05754461  0.2541481  0.2264216 0.8212755
## jun -0.17341880  0.2537432 -0.6834421 0.4957022
## jul  0.03893868  0.2621663  0.1485267 0.8821873
## aug -0.09751238  0.2611115 -0.3734511 0.7095003
## sep -0.04363386  0.2522883 -0.1729524 0.8629929
## oct  0.09382470  0.2521916  0.3720374 0.7105497
## nov -0.25969185  0.2522849 -1.0293594 0.3054710
## dec  0.09560179  0.2606875  0.3667295 0.7144947
# (C9 Part (i))

data("volat")

# Hypothesize the Signs of β1 and β2
# β1 (pcip): Likely positive
# β2 (i3): Likely negative

# (C9 Part (ii))
model <- lm(rsp500 ~ pcip + i3, data = volat)
summary(model)
## 
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.871  -22.580    2.103   25.524  138.137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.84306    3.27488   5.754 1.44e-08 ***
## pcip         0.03642    0.12940   0.281   0.7785    
## i3          -1.36169    0.54072  -2.518   0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 554 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01189,    Adjusted R-squared:  0.008325 
## F-statistic: 3.334 on 2 and 554 DF,  p-value: 0.03637
# (C9 Part (iii))
coefficients <- summary(model)$coefficients
significance <- coefficients[, 4] < 0.05  # Check if p-value < 0.05
significant_vars <- rownames(coefficients)[significance]
significant_vars  # Print significant variables
## [1] "(Intercept)" "i3"
# (C9 Part (iv))
# R-squared value
r_squared <- summary(model)$r.squared
r_squared
## [1] 0.01189219
# Interpretation:
# - Statistically significant coefficients suggest some level of predictability.
# - A low R-squared value indicates that the model explains only a small proportion of the variance.