install.packages("devtools")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lmtest")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("sandwich")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(devtools)
## Loading required package: usethis
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)

devtools::install_github("JustinMShea/wooldridge")
## Skipping install of 'wooldridge' from a github remote, the SHA1 (44137328) has not changed since last install.
##   Use `force = TRUE` to force installation
library(wooldridge)

data("barium")

model_barium <- lm(log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + afdec6 + t, data = barium)

summary(model_barium)
## 
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + 
##     affile6 + afdec6 + t, data = barium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94173 -0.31037  0.03092  0.36435  1.21434 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.3972590 19.7847366  -0.121  0.90376   
## log(chempi) -0.6612071  1.2224820  -0.541  0.58957   
## log(gas)     0.4734524  0.8720272   0.543  0.58816   
## rtwex        0.0009682  0.0044441   0.218  0.82790   
## befile6      0.0878089  0.2507104   0.350  0.72676   
## affile6      0.0928429  0.2572680   0.361  0.71881   
## afdec6      -0.3619936  0.2907101  -1.245  0.21542   
## t            0.0126198  0.0037647   3.352  0.00107 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5747 on 123 degrees of freedom
## Multiple R-squared:  0.3617, Adjusted R-squared:  0.3253 
## F-statistic: 9.956 on 7 and 123 DF,  p-value: 8.28e-10
model_barium_no_time <- lm(log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + afdec6, data = barium)

# Perform the F-test comparing the two models
anova(model_barium, model_barium_no_time)
## Analysis of Variance Table
## 
## Model 1: log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + 
##     afdec6 + t
## Model 2: log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + 
##     afdec6
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    123 40.631                                
## 2    124 44.343 -1   -3.7119 11.237 0.001066 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_barium_with_monthly <- lm(log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + afdec6 + 
                               feb + mar + apr + may + jun + jul + aug + sep + oct + nov + dec, data = barium)

# Display the summary of the model
summary(model_barium_with_monthly)
## 
## Call:
## lm(formula = log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + 
##     affile6 + afdec6 + feb + mar + apr + may + jun + jul + aug + 
##     sep + oct + nov + dec, data = barium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.99351 -0.36138  0.08331  0.41404  1.38601 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.149449  30.949521   0.683   0.4958    
## log(chempi)  3.281504   0.491628   6.675 9.64e-10 ***
## log(gas)    -1.366322   1.369156  -0.998   0.3204    
## rtwex        0.006119   0.004508   1.357   0.1774    
## befile6      0.145300   0.266529   0.545   0.5867    
## affile6      0.015003   0.279437   0.054   0.9573    
## afdec6      -0.541856   0.311689  -1.738   0.0849 .  
## feb         -0.427149   0.303544  -1.407   0.1621    
## mar          0.058031   0.264883   0.219   0.8270    
## apr         -0.453006   0.268561  -1.687   0.0944 .  
## may          0.035040   0.269396   0.130   0.8967    
## jun         -0.204567   0.269413  -0.759   0.4492    
## jul          0.008259   0.278714   0.030   0.9764    
## aug         -0.152847   0.277969  -0.550   0.5835    
## sep         -0.135434   0.267967  -0.505   0.6143    
## oct          0.051164   0.267134   0.192   0.8485    
## nov         -0.244611   0.262969  -0.930   0.3543    
## dec          0.137690   0.271281   0.508   0.6128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6016 on 113 degrees of freedom
## Multiple R-squared:  0.3576, Adjusted R-squared:  0.2609 
## F-statistic:   3.7 on 17 and 113 DF,  p-value: 1.351e-05
# Perform an ANOVA to compare the models
anova(model_barium_no_time, model_barium_with_monthly)
## Analysis of Variance Table
## 
## Model 1: log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + 
##     afdec6
## Model 2: log(chnimp) ~ log(chempi) + log(gas) + rtwex + befile6 + affile6 + 
##     afdec6 + feb + mar + apr + may + jun + jul + aug + sep + 
##     oct + nov + dec
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    124 44.343                           
## 2    113 40.892 11    3.4508 0.8669 0.5746
data("volat") 
# Fit the model for the Volat dataset
model_volat <- lm(rsp500 ~ pcip + i3, data = volat)

# Display the summary of the model
summary(model_volat)
## 
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.871  -22.580    2.103   25.524  138.137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.84306    3.27488   5.754 1.44e-08 ***
## pcip         0.03642    0.12940   0.281   0.7785    
## i3          -1.36169    0.54072  -2.518   0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 554 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01189,    Adjusted R-squared:  0.008325 
## F-statistic: 3.334 on 2 and 554 DF,  p-value: 0.03637
summary(model_volat)$coefficients
##                Estimate Std. Error    t value     Pr(>|t|)
## (Intercept) 18.84305624  3.2748802  5.7538154 1.444871e-08
## pcip         0.03641681  0.1293963  0.2814362 7.784810e-01
## i3          -1.36168867  0.5407244 -2.5182677 1.207402e-02
library(wooldridge)

data(barium)
str(barium)
## 'data.frame':    131 obs. of  31 variables:
##  $ chnimp  : num  220.5 94.8 219.4 317.4 114.6 ...
##  $ bchlimp : num  9578 11219 9720 12921 9790 ...
##  $ befile6 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ affile6 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ afdec6  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ befile12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ affile12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ afdec12 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ chempi  : num  100 101 101 102 104 ...
##  $ gas     : num  7.83e+09 8.82e+09 8.45e+09 9.24e+09 9.15e+09 ...
##  $ rtwex   : num  86.7 85.6 85.4 87.3 86.6 ...
##  $ spr     : int  0 1 1 1 0 0 0 0 0 0 ...
##  $ sum     : int  0 0 0 0 1 1 1 0 0 0 ...
##  $ fall    : int  0 0 0 0 0 0 0 1 1 1 ...
##  $ lchnimp : num  5.4 4.55 5.39 5.76 4.74 ...
##  $ lgas    : num  22.8 22.9 22.9 22.9 22.9 ...
##  $ lrtwex  : num  4.46 4.45 4.45 4.47 4.46 ...
##  $ lchempi : num  4.61 4.61 4.62 4.63 4.65 ...
##  $ t       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ feb     : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ mar     : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ apr     : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ may     : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ jun     : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ jul     : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ aug     : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ sep     : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ oct     : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ nov     : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ dec     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ percchn : num  2.302 0.845 2.257 2.457 1.171 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
barium$month <- rep(1:12, length.out = nrow(barium))
barium$month <- as.factor(barium$month)
#(i)
barium$time_trend <- 1:nrow(barium)
model_c2_1 <- lm(chnimp ~ gas + chempi + time_trend, data = barium)
summary(model_c2_1)
## 
## Call:
## lm(formula = chnimp ~ gas + chempi + time_trend, data = barium)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -451.59 -200.59  -49.09  153.81  943.43 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.827e+01  4.659e+02   0.125 0.900673    
## gas          6.686e-08  5.008e-08   1.335 0.184194    
## chempi      -3.739e+00  4.334e+00  -0.863 0.389901    
## time_trend   6.373e+00  1.589e+00   4.012 0.000102 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 291.1 on 127 degrees of freedom
## Multiple R-squared:  0.3225, Adjusted R-squared:  0.3065 
## F-statistic: 20.15 on 3 and 127 DF,  p-value: 9.564e-11
model_restricted <- lm(chnimp ~ time_trend, data = barium)
model_full <- lm(chnimp ~ gas + chempi + time_trend, data = barium)
anova(model_restricted, model_full)
## Analysis of Variance Table
## 
## Model 1: chnimp ~ time_trend
## Model 2: chnimp ~ gas + chempi + time_trend
##   Res.Df      RSS Df Sum of Sq      F Pr(>F)
## 1    129 10921259                           
## 2    127 10761080  2    160179 0.9452 0.3913
barium$month <- factor(barium$month) 
model_with_dummies <- lm(chnimp ~ gas + chempi + time_trend + month, data = barium)
summary(model_with_dummies)
## 
## Call:
## lm(formula = chnimp ~ gas + chempi + time_trend + month, data = barium)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -457.63 -224.10  -37.47  174.18  900.84 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.681e+02  4.984e+02   0.337 0.736475    
## gas          3.188e-08  7.134e-08   0.447 0.655782    
## chempi      -2.544e+00  4.744e+00  -0.536 0.592803    
## time_trend   5.942e+00  1.727e+00   3.441 0.000808 ***
## month2       1.590e+02  1.349e+02   1.179 0.240904    
## month3      -1.106e+02  1.323e+02  -0.837 0.404586    
## month4       1.184e+02  1.478e+02   0.801 0.424670    
## month5       1.632e+01  1.463e+02   0.112 0.911363    
## month6       9.568e+01  1.575e+02   0.608 0.544603    
## month7       3.189e+01  1.557e+02   0.205 0.838075    
## month8       1.582e+02  1.394e+02   1.134 0.259004    
## month9       1.395e+02  1.411e+02   0.989 0.324782    
## month10      6.595e-01  1.419e+02   0.005 0.996299    
## month11      1.070e+02  1.572e+02   0.680 0.497571    
## month12      1.272e+02  1.434e+02   0.887 0.376879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 293.4 on 116 degrees of freedom
## Multiple R-squared:  0.3715, Adjusted R-squared:  0.2956 
## F-statistic: 4.897 on 14 and 116 DF,  p-value: 4.738e-07
data(volat)
str(volat)
## 'data.frame':    558 obs. of  17 variables:
##  $ date  : num  1947 1947 1947 1947 1947 ...
##  $ sp500 : num  15.2 15.8 15.2 14.6 14.3 ...
##  $ divyld: num  4.49 4.38 4.61 4.75 5.05 ...
##  $ i3    : num  0.38 0.38 0.38 0.38 0.38 ...
##  $ ip    : num  22.4 22.5 22.6 22.5 22.6 ...
##  $ pcsp  : num  NA 46.5 -48.6 -44.3 -21.4 ...
##  $ rsp500: num  NA 50.9 -44 -39.6 -16.3 ...
##  $ pcip  : num  NA 5.36 5.33 -5.31 5.33 ...
##  $ ci3   : num  NA 0 0 0 0 ...
##  $ ci3_1 : num  NA NA 0 0 0 ...
##  $ ci3_2 : num  NA NA NA 0 0 ...
##  $ pcip_1: num  NA NA 5.36 5.33 -5.31 ...
##  $ pcip_2: num  NA NA NA 5.36 5.33 ...
##  $ pcip_3: num  NA NA NA NA 5.36 ...
##  $ pcsp_1: num  NA NA 46.5 -48.6 -44.3 ...
##  $ pcsp_2: num  NA NA NA 46.5 -48.6 ...
##  $ pcsp_3: num  NA NA NA NA 46.5 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
model_c9 <- lm(rsp500 ~ pcip + i3, data = volat)
summary(model_c9)
## 
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.871  -22.580    2.103   25.524  138.137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.84306    3.27488   5.754 1.44e-08 ***
## pcip         0.03642    0.12940   0.281   0.7785    
## i3          -1.36169    0.54072  -2.518   0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 554 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01189,    Adjusted R-squared:  0.008325 
## F-statistic: 3.334 on 2 and 554 DF,  p-value: 0.03637