This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## *Chapter 7*
#### **Exercise 1**
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(ISLR2)
library(matlib)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, will use the null device.
## See '?rgl.useNULL' for ways to avoid this warning.
library(wooldridge)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
data <- wooldridge::sleep75
head(sleep75)
## age black case clerical construc educ earns74 gdhlth inlf leis1 leis2 leis3
## 1 32 0 1 0 0 12 0 0 1 3529 3479 3479
## 2 31 0 2 0 0 14 9500 1 1 2140 2140 2140
## 3 44 0 3 0 0 17 42500 1 1 4595 4505 4227
## 4 30 0 4 0 0 12 42500 1 1 3211 3211 3211
## 5 64 0 5 0 0 14 2500 1 1 4052 4007 4007
## 6 41 0 6 0 0 12 0 1 1 4812 4797 4797
## smsa lhrwage lothinc male marr prot rlxall selfe sleep slpnaps south
## 1 0 1.955861 10.075380 1 1 1 3163 0 3113 3163 0
## 2 0 0.357674 0.000000 1 0 1 2920 1 2920 2920 1
## 3 1 3.021887 0.000000 1 1 0 3038 1 2670 2760 0
## 4 0 2.263844 0.000000 0 1 1 3083 1 3083 3083 0
## 5 0 1.011601 9.328213 1 1 1 3493 0 3448 3493 0
## 6 0 2.957511 10.657280 1 1 1 4078 0 4063 4078 0
## spsepay spwrk75 totwrk union worknrm workscnd exper yngkid yrsmarr hrwage
## 1 0 0 3438 0 3438 0 14 0 13 7.070004
## 2 0 0 5020 0 5020 0 11 0 0 1.429999
## 3 20000 1 2815 0 2815 0 21 0 0 20.529997
## 4 5000 1 3786 0 3786 0 12 0 12 9.619998
## 5 2400 1 2580 0 2580 0 44 0 33 2.750000
## 6 0 0 1205 0 0 1205 23 0 23 19.249998
## agesq
## 1 1024
## 2 961
## 3 1936
## 4 900
## 5 4096
## 6 1681
data(sleep75)
model1 <- lm(sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
summary(model1)
##
## Call:
## lm(formula = sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2378.00 -243.29 6.74 259.24 1350.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3840.83197 235.10870 16.336 <2e-16 ***
## totwrk -0.16342 0.01813 -9.013 <2e-16 ***
## educ -11.71332 5.86689 -1.997 0.0463 *
## age -8.69668 11.20746 -0.776 0.4380
## I(age^2) 0.12844 0.13390 0.959 0.3378
## male 87.75243 34.32616 2.556 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 417.7 on 700 degrees of freedom
## Multiple R-squared: 0.1228, Adjusted R-squared: 0.1165
## F-statistic: 19.59 on 5 and 700 DF, p-value: < 2.2e-16
2*(1-pt(87.75/34.33,700,FALSE))
## [1] 0.01079613
model1 <- lm(sleep ~ totwrk + educ + male, data = sleep75)
summary(model1)
##
## Call:
## lm(formula = sleep ~ totwrk + educ + male, data = sleep75)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2380.27 -239.15 6.74 257.31 1370.63
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3747.51727 81.00609 46.262 < 2e-16 ***
## totwrk -0.16734 0.01794 -9.329 < 2e-16 ***
## educ -13.88479 5.65757 -2.454 0.01436 *
## male 90.96919 34.27441 2.654 0.00813 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 418 on 702 degrees of freedom
## Multiple R-squared: 0.1193, Adjusted R-squared: 0.1155
## F-statistic: 31.69 on 3 and 702 DF, p-value: < 2.2e-16
2*(1-pt(2.19/0.53,4131,FALSE))
## [1] 3.666238e-05
2*(1-pt(45.09/4.29,4131,FALSE))
## [1] 0
2*(1-pt(169.81/12.71,4131,FALSE))
## [1] 0
#### **Exercise 3**
library(wooldridge)
data1 <- wooldridge::gpa2
head(data1)
## sat tothrs colgpa athlete verbmath hsize hsrank hsperc female white black
## 1 920 43 2.04 1 0.48387 0.10 4 40.00000 1 0 0
## 2 1170 18 4.00 0 0.82813 9.40 191 20.31915 0 1 0
## 3 810 14 1.78 1 0.88372 1.19 42 35.29412 0 1 0
## 4 940 40 2.42 0 0.80769 5.71 252 44.13310 0 1 0
## 5 1180 18 2.61 0 0.73529 2.14 86 40.18692 0 1 0
## 6 980 114 3.03 0 0.81481 2.68 41 15.29851 1 1 0
## hsizesq
## 1 0.0100
## 2 88.3600
## 3 1.4161
## 4 32.6041
## 5 4.5796
## 6 7.1824
data("gpa2")
model2 <- lm(sat ~ hsize + I(hsize^2) + female + black + I(female*black), data = gpa2)
summary(model2)
##
## Call:
## lm(formula = sat ~ hsize + I(hsize^2) + female + black + I(female *
## black), data = gpa2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -570.45 -89.54 -5.24 85.41 479.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1028.0972 6.2902 163.445 < 2e-16 ***
## hsize 19.2971 3.8323 5.035 4.97e-07 ***
## I(hsize^2) -2.1948 0.5272 -4.163 3.20e-05 ***
## female -45.0915 4.2911 -10.508 < 2e-16 ***
## black -169.8126 12.7131 -13.357 < 2e-16 ***
## I(female * black) 62.3064 18.1542 3.432 0.000605 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared: 0.08578, Adjusted R-squared: 0.08468
## F-statistic: 77.52 on 5 and 4131 DF, p-value: < 2.2e-16
model4<- lm(sat ~ hsize + hsizesq + female + black + female*black , data = data1)
summary(model4)
##
## Call:
## lm(formula = sat ~ hsize + hsizesq + female + black + female *
## black, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -570.45 -89.54 -5.24 85.41 479.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1028.0972 6.2902 163.445 < 2e-16 ***
## hsize 19.2971 3.8323 5.035 4.97e-07 ***
## hsizesq -2.1948 0.5272 -4.163 3.20e-05 ***
## female -45.0915 4.2911 -10.508 < 2e-16 ***
## black -169.8126 12.7131 -13.357 < 2e-16 ***
## female:black 62.3064 18.1542 3.432 0.000605 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared: 0.08578, Adjusted R-squared: 0.08468
## F-statistic: 77.52 on 5 and 4131 DF, p-value: < 2.2e-16
2*(1-pt(2.19/0.53,4131,FALSE))
## [1] 3.666238e-05
t_stat <- 62.31/18.15
df <- 4131-1
p_value <- 2 * (1 - pt(abs(t_stat), df))
p_value
## [1] 0.0006026815
2*(1-pt(169.81/12.71,4131,FALSE))
## [1] 0
#### **C1**
library(wooldridge)
data1 <- wooldridge::gpa1
head(data1)
## age soph junior senior senior5 male campus business engineer colGPA hsGPA ACT
## 1 21 0 0 1 0 0 0 1 0 3.0 3.0 21
## 2 21 0 0 1 0 0 0 1 0 3.4 3.2 24
## 3 20 0 1 0 0 0 0 1 0 3.0 3.6 26
## 4 19 1 0 0 0 1 1 1 0 3.5 3.5 27
## 5 20 0 1 0 0 0 0 1 0 3.6 3.9 28
## 6 20 0 0 1 0 1 1 1 0 3.0 3.4 25
## job19 job20 drive bike walk voluntr PC greek car siblings bgfriend clubs
## 1 0 1 1 0 0 0 0 0 1 1 0 0
## 2 0 1 1 0 0 0 0 0 1 0 1 1
## 3 1 0 0 0 1 0 0 0 1 1 0 1
## 4 1 0 0 0 1 0 0 0 0 1 0 0
## 5 0 1 0 1 0 0 0 0 1 1 1 0
## 6 0 0 0 0 1 0 0 0 1 1 0 0
## skipped alcohol gradMI fathcoll mothcoll
## 1 2 1.0 1 0 0
## 2 0 1.0 1 1 1
## 3 0 1.0 1 1 1
## 4 0 0.0 0 0 0
## 5 0 1.5 1 1 0
## 6 0 0.0 0 1 0
library(wooldridge)
data1 <- wooldridge::gpa1
head(data1)
## age soph junior senior senior5 male campus business engineer colGPA hsGPA ACT
## 1 21 0 0 1 0 0 0 1 0 3.0 3.0 21
## 2 21 0 0 1 0 0 0 1 0 3.4 3.2 24
## 3 20 0 1 0 0 0 0 1 0 3.0 3.6 26
## 4 19 1 0 0 0 1 1 1 0 3.5 3.5 27
## 5 20 0 1 0 0 0 0 1 0 3.6 3.9 28
## 6 20 0 0 1 0 1 1 1 0 3.0 3.4 25
## job19 job20 drive bike walk voluntr PC greek car siblings bgfriend clubs
## 1 0 1 1 0 0 0 0 0 1 1 0 0
## 2 0 1 1 0 0 0 0 0 1 0 1 1
## 3 1 0 0 0 1 0 0 0 1 1 0 1
## 4 1 0 0 0 1 0 0 0 0 1 0 0
## 5 0 1 0 1 0 0 0 0 1 1 1 0
## 6 0 0 0 0 1 0 0 0 1 1 0 0
## skipped alcohol gradMI fathcoll mothcoll
## 1 2 1.0 1 0 0
## 2 0 1.0 1 1 1
## 3 0 1.0 1 1 1
## 4 0 0.0 0 0 0
## 5 0 1.5 1 1 0
## 6 0 0.0 0 1 0
data_C1 <- gpa1
model_C1 <- lm(colGPA~PC+hsGPA+ACT+mothcoll+fathcoll,data= data_C1)
summary(model_C1)
##
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll,
## data = data_C1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78149 -0.25726 -0.02121 0.24691 0.74432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.255554 0.335392 3.744 0.000268 ***
## PC 0.151854 0.058716 2.586 0.010762 *
## hsGPA 0.450220 0.094280 4.775 4.61e-06 ***
## ACT 0.007724 0.010678 0.723 0.470688
## mothcoll -0.003758 0.060270 -0.062 0.950376
## fathcoll 0.041800 0.061270 0.682 0.496265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3344 on 135 degrees of freedom
## Multiple R-squared: 0.2222, Adjusted R-squared: 0.1934
## F-statistic: 7.713 on 5 and 135 DF, p-value: 2.083e-06
library(wooldridge)
data1 <- wooldridge::gpa1
# Perform the regression analysis
model_7C1_ur <- lm(colGPA ~ PC + hsGPA + ACT, data = data1)
summary(model_7C1_ur)
##
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7901 -0.2622 -0.0107 0.2334 0.7570
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.263520 0.333125 3.793 0.000223 ***
## PC 0.157309 0.057287 2.746 0.006844 **
## hsGPA 0.447242 0.093647 4.776 4.54e-06 ***
## ACT 0.008659 0.010534 0.822 0.412513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3325 on 137 degrees of freedom
## Multiple R-squared: 0.2194, Adjusted R-squared: 0.2023
## F-statistic: 12.83 on 3 and 137 DF, p-value: 1.932e-07
F= ((0.2222-0.2194)/2)/((1-0.2222)/135)
F
## [1] 0.2429931
critical_value = qf(0.95,2,135, TRUE)
critical_value
## [1] 4.423013
model6<- lm(colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll , data = data1)
summary(model6)
##
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll,
## data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78149 -0.25726 -0.02121 0.24691 0.74432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.255554 0.335392 3.744 0.000268 ***
## PC 0.151854 0.058716 2.586 0.010762 *
## hsGPA 0.450220 0.094280 4.775 4.61e-06 ***
## ACT 0.007724 0.010678 0.723 0.470688
## mothcoll -0.003758 0.060270 -0.062 0.950376
## fathcoll 0.041800 0.061270 0.682 0.496265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3344 on 135 degrees of freedom
## Multiple R-squared: 0.2222, Adjusted R-squared: 0.1934
## F-statistic: 7.713 on 5 and 135 DF, p-value: 2.083e-06
library(car)
hypotheses <- c("mothcoll=0", "fathcoll=0")
joint_test <- linearHypothesis(model6, hypotheses)
joint_test
##
## Linear hypothesis test:
## mothcoll = 0
## fathcoll = 0
##
## Model 1: restricted model
## Model 2: colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 137 15.149
## 2 135 15.094 2 0.054685 0.2446 0.7834
summary(joint_test)
## Res.Df RSS Df Sum of Sq F
## Min. :135.0 Min. :15.09 Min. :2 Min. :0.05469 Min. :0.2446
## 1st Qu.:135.5 1st Qu.:15.11 1st Qu.:2 1st Qu.:0.05469 1st Qu.:0.2446
## Median :136.0 Median :15.12 Median :2 Median :0.05469 Median :0.2446
## Mean :136.0 Mean :15.12 Mean :2 Mean :0.05469 Mean :0.2446
## 3rd Qu.:136.5 3rd Qu.:15.14 3rd Qu.:2 3rd Qu.:0.05469 3rd Qu.:0.2446
## Max. :137.0 Max. :15.15 Max. :2 Max. :0.05469 Max. :0.2446
## NA's :1 NA's :1 NA's :1
## Pr(>F)
## Min. :0.7834
## 1st Qu.:0.7834
## Median :0.7834
## Mean :0.7834
## 3rd Qu.:0.7834
## Max. :0.7834
## NA's :1
model3 <- lm (colGPA ~ PC + hsGPA + I(hsGPA^2) + ACT + mothcoll + fathcoll, data = gpa1)
summary (model3)
##
## Call:
## lm(formula = colGPA ~ PC + hsGPA + I(hsGPA^2) + ACT + mothcoll +
## fathcoll, data = gpa1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78998 -0.24327 -0.00648 0.26179 0.72231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.040328 2.443038 2.063 0.0410 *
## PC 0.140446 0.058858 2.386 0.0184 *
## hsGPA -1.802520 1.443552 -1.249 0.2140
## I(hsGPA^2) 0.337341 0.215711 1.564 0.1202
## ACT 0.004786 0.010786 0.444 0.6580
## mothcoll 0.003091 0.060110 0.051 0.9591
## fathcoll 0.062761 0.062401 1.006 0.3163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3326 on 134 degrees of freedom
## Multiple R-squared: 0.2361, Adjusted R-squared: 0.2019
## F-statistic: 6.904 on 6 and 134 DF, p-value: 2.088e-06
#### **C2**
data3 <- wooldridge::wage2
head(data3)
## wage hours IQ KWW educ exper tenure age married black south urban sibs
## 1 769 40 93 35 12 11 2 31 1 0 0 1 1
## 2 808 50 119 41 18 11 16 37 1 0 0 1 1
## 3 825 40 108 46 14 11 9 33 1 0 0 1 1
## 4 650 40 96 32 12 13 7 32 1 0 0 1 4
## 5 562 40 74 27 11 14 5 34 1 0 0 1 10
## 6 1400 40 116 43 16 14 2 35 1 1 0 1 1
## brthord meduc feduc lwage
## 1 2 8 8 6.645091
## 2 NA 14 14 6.694562
## 3 2 14 14 6.715384
## 4 3 12 12 6.476973
## 5 6 6 11 6.331502
## 6 2 8 NA 7.244227
data("wage2")
model4 <- lm(log(wage) ~ educ +exper +tenure + married + black + south + urban, data = wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98069 -0.21996 0.00707 0.24288 1.22822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.395497 0.113225 47.653 < 2e-16 ***
## educ 0.065431 0.006250 10.468 < 2e-16 ***
## exper 0.014043 0.003185 4.409 1.16e-05 ***
## tenure 0.011747 0.002453 4.789 1.95e-06 ***
## married 0.199417 0.039050 5.107 3.98e-07 ***
## black -0.188350 0.037667 -5.000 6.84e-07 ***
## south -0.090904 0.026249 -3.463 0.000558 ***
## urban 0.183912 0.026958 6.822 1.62e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared: 0.2526, Adjusted R-squared: 0.2469
## F-statistic: 44.75 on 7 and 927 DF, p-value: < 2.2e-16
model4 <- lm(log(wage) ~ educ +exper + I(exper^2) + tenure + I(tenure^2)
+ married + black + south + urban, data = wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + I(exper^2) + tenure +
## I(tenure^2) + married + black + south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98236 -0.21972 -0.00036 0.24078 1.25127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.3586756 0.1259143 42.558 < 2e-16 ***
## educ 0.0642761 0.0063115 10.184 < 2e-16 ***
## exper 0.0172146 0.0126138 1.365 0.172665
## I(exper^2) -0.0001138 0.0005319 -0.214 0.830622
## tenure 0.0249291 0.0081297 3.066 0.002229 **
## I(tenure^2) -0.0007964 0.0004710 -1.691 0.091188 .
## married 0.1985470 0.0391103 5.077 4.65e-07 ***
## black -0.1906636 0.0377011 -5.057 5.13e-07 ***
## south -0.0912153 0.0262356 -3.477 0.000531 ***
## urban 0.1854241 0.0269585 6.878 1.12e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3653 on 925 degrees of freedom
## Multiple R-squared: 0.255, Adjusted R-squared: 0.2477
## F-statistic: 35.17 on 9 and 925 DF, p-value: < 2.2e-16
linearHypothesis(model4, c("I(exper^2)=0", "I(tenure^2)=0" ))
##
## Linear hypothesis test:
## I(exper^2) = 0
## I(tenure^2) = 0
##
## Model 1: restricted model
## Model 2: log(wage) ~ educ + exper + I(exper^2) + tenure + I(tenure^2) +
## married + black + south + urban
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 927 123.82
## 2 925 123.42 2 0.39756 1.4898 0.226
F= ((0.255-0.2526)/2)/((1-0.255)/925)
F
## [1] 1.489933
1-pf(F,2,925)
## [1] 0.2259282
model4 <- lm(log(wage) ~ educ +exper +tenure + married
+ black + south + urban + I(black*educ), data = wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban + I(black * educ), data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97782 -0.21832 0.00475 0.24136 1.23226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.374817 0.114703 46.859 < 2e-16 ***
## educ 0.067115 0.006428 10.442 < 2e-16 ***
## exper 0.013826 0.003191 4.333 1.63e-05 ***
## tenure 0.011787 0.002453 4.805 1.80e-06 ***
## married 0.198908 0.039047 5.094 4.25e-07 ***
## black 0.094809 0.255399 0.371 0.710561
## south -0.089450 0.026277 -3.404 0.000692 ***
## urban 0.183852 0.026955 6.821 1.63e-11 ***
## I(black * educ) -0.022624 0.020183 -1.121 0.262603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared: 0.2536, Adjusted R-squared: 0.2471
## F-statistic: 39.32 on 8 and 926 DF, p-value: < 2.2e-16
model4 <- lm(log(wage)~educ+exper+tenure+married+black+south+urban+educ*black,data= wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban + educ * black, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97782 -0.21832 0.00475 0.24136 1.23226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.374817 0.114703 46.859 < 2e-16 ***
## educ 0.067115 0.006428 10.442 < 2e-16 ***
## exper 0.013826 0.003191 4.333 1.63e-05 ***
## tenure 0.011787 0.002453 4.805 1.80e-06 ***
## married 0.198908 0.039047 5.094 4.25e-07 ***
## black 0.094809 0.255399 0.371 0.710561
## south -0.089450 0.026277 -3.404 0.000692 ***
## urban 0.183852 0.026955 6.821 1.63e-11 ***
## educ:black -0.022624 0.020183 -1.121 0.262603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared: 0.2536, Adjusted R-squared: 0.2471
## F-statistic: 39.32 on 8 and 926 DF, p-value: < 2.2e-16
model_7C2_4 <- lm(log(wage)~educ+exper+tenure+married+black+south+urban+married*black,data= wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban + educ * black, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97782 -0.21832 0.00475 0.24136 1.23226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.374817 0.114703 46.859 < 2e-16 ***
## educ 0.067115 0.006428 10.442 < 2e-16 ***
## exper 0.013826 0.003191 4.333 1.63e-05 ***
## tenure 0.011787 0.002453 4.805 1.80e-06 ***
## married 0.198908 0.039047 5.094 4.25e-07 ***
## black 0.094809 0.255399 0.371 0.710561
## south -0.089450 0.026277 -3.404 0.000692 ***
## urban 0.183852 0.026955 6.821 1.63e-11 ***
## educ:black -0.022624 0.020183 -1.121 0.262603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared: 0.2536, Adjusted R-squared: 0.2471
## F-statistic: 39.32 on 8 and 926 DF, p-value: < 2.2e-16
## *Chapter 8*
#### **Exercise 1**
#### **Exercise 5**
data4 <- wooldridge::smoke
head(data4)
## educ cigpric white age income cigs restaurn lincome agesq lcigpric
## 1 16.0 60.506 1 46 20000 0 0 9.903487 2116 4.102743
## 2 16.0 57.883 1 40 30000 0 0 10.308952 1600 4.058424
## 3 12.0 57.664 1 58 30000 3 0 10.308952 3364 4.054633
## 4 13.5 57.883 1 30 20000 0 0 9.903487 900 4.058424
## 5 10.0 58.320 1 17 20000 0 0 9.903487 289 4.065945
## 6 6.0 59.340 1 86 6500 0 0 8.779557 7396 4.083283
model11 <- lm(cigs ~ log(cigpric) + log(income) + educ + age + agesq + restaurn + white , data = data4)
summary(model11)
##
## Call:
## lm(formula = cigs ~ log(cigpric) + log(income) + educ + age +
## agesq + restaurn + white, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.772 -9.330 -5.907 7.945 70.275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.682419 24.220729 -0.111 0.91184
## log(cigpric) -0.850907 5.782321 -0.147 0.88305
## log(income) 0.869014 0.728763 1.192 0.23344
## educ -0.501753 0.167168 -3.001 0.00277 **
## age 0.774502 0.160516 4.825 1.68e-06 ***
## agesq -0.009069 0.001748 -5.188 2.70e-07 ***
## restaurn -2.865621 1.117406 -2.565 0.01051 *
## white -0.559236 1.459461 -0.383 0.70169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.41 on 799 degrees of freedom
## Multiple R-squared: 0.05291, Adjusted R-squared: 0.04461
## F-statistic: 6.377 on 7 and 799 DF, p-value: 2.588e-07
s_error <- coef(summary(model11))
s_error
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.682418774 24.220728831 -0.1107489 9.118433e-01
## log(cigpric) -0.850907441 5.782321084 -0.1471567 8.830454e-01
## log(income) 0.869013971 0.728763480 1.1924499 2.334389e-01
## educ -0.501753247 0.167167689 -3.0014966 2.770186e-03
## age 0.774502156 0.160515805 4.8250835 1.676279e-06
## agesq -0.009068603 0.001748055 -5.1878253 2.699355e-07
## restaurn -2.865621212 1.117405936 -2.5645301 1.051320e-02
## white -0.559236403 1.459461034 -0.3831801 7.016882e-01
age <- -coef(model11)["age"] / (2 * coef(model11)["I(age^2)"])
age
## age
## NA
coef_res <- coef(model11)["restaurn"]
coef_res
## restaurn
## -2.865621
library(lmtest)
data_update <- data.frame(cigpric = 67.44, income = 6500, educ = 16, age = 77, restaurn = 0, white = 0)
het_test <- bptest(model11)
het_test
##
## studentized Breusch-Pagan test
##
## data: model11
## BP = 32.377, df = 7, p-value = 3.458e-05
#### **C4**
data5 <- wooldridge::vote1
head(data5)
## state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1 AL 7 1 68 328.296 8.737 41 5.793916 2.167567
## 2 AK 1 0 62 626.377 402.477 60 6.439952 5.997638
## 3 AZ 2 1 73 99.607 3.065 55 4.601233 1.120048
## 4 AZ 3 0 69 319.690 26.281 64 5.767352 3.268846
## 5 AR 3 0 75 159.221 60.054 66 5.070293 4.095244
## 6 AR 4 1 69 570.155 21.393 46 6.345908 3.063064
## shareA
## 1 97.40767
## 2 60.88104
## 3 97.01476
## 4 92.40370
## 5 72.61247
## 6 96.38355
data ("vote1")
model5 <- lm (voteA ~ prtystrA + democA + lexpendA + lexpendB, data = vote1)
summary(model5)
##
## Call:
## lm(formula = voteA ~ prtystrA + democA + lexpendA + lexpendB,
## data = vote1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.576 -4.864 -1.146 4.903 24.566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.66142 4.73604 7.952 2.56e-13 ***
## prtystrA 0.25192 0.07129 3.534 0.00053 ***
## democA 3.79294 1.40652 2.697 0.00772 **
## lexpendA 5.77929 0.39182 14.750 < 2e-16 ***
## lexpendB -6.23784 0.39746 -15.694 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared: 0.8012, Adjusted R-squared: 0.7964
## F-statistic: 169.2 on 4 and 168 DF, p-value: < 2.2e-16
data5 <- data5 %>% mutate(residual_squared=(model5$residuals)^2)
head(data5)
## state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1 AL 7 1 68 328.296 8.737 41 5.793916 2.167567
## 2 AK 1 0 62 626.377 402.477 60 6.439952 5.997638
## 3 AZ 2 1 73 99.607 3.065 55 4.601233 1.120048
## 4 AZ 3 0 69 319.690 26.281 64 5.767352 3.268846
## 5 AR 3 0 75 159.221 60.054 66 5.070293 4.095244
## 6 AR 4 1 69 570.155 21.393 46 6.345908 3.063064
## shareA residual_squared
## 1 97.40767 14.038468
## 2 60.88104 88.688062
## 3 97.01476 3.667331
## 4 92.40370 5.176383
## 5 72.61247 287.464399
## 6 96.38355 2.593858
model5_r<- lm(residual_squared~prtystrA+democA+log(expendA)+log(expendB),data= data5)
summary(model5_r)
##
## Call:
## lm(formula = residual_squared ~ prtystrA + democA + log(expendA) +
## log(expendB), data = data5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.88 -46.16 -23.51 15.84 508.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 113.9635 50.8150 2.243 0.0262 *
## prtystrA -0.2993 0.7649 -0.391 0.6961
## democA 15.6192 15.0912 1.035 0.3022
## log(expendA) -10.3057 4.2040 -2.451 0.0153 *
## log(expendB) -0.0514 4.2645 -0.012 0.9904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 81.25 on 168 degrees of freedom
## Multiple R-squared: 0.05256, Adjusted R-squared: 0.03
## F-statistic: 2.33 on 4 and 168 DF, p-value: 0.05806
bptest(model5)
##
## studentized Breusch-Pagan test
##
## data: model5
## BP = 9.0934, df = 4, p-value = 0.05881
bptest (model5, ~ prtystrA*democA*lexpendA*lexpendB + I(prtystrA^2) + I(democA^2) + I(lexpendA^2) + I(lexpendB^2), data = vote1)
##
## studentized Breusch-Pagan test
##
## data: model5
## BP = 43.327, df = 18, p-value = 0.0007194
#### **C13**
library(sandwich)
library(lmtest)
data6 <- wooldridge::fertil2
head(data6)
## mnthborn yearborn age electric radio tv bicycle educ ceb agefbrth children
## 1 5 64 24 1 1 1 1 12 0 NA 0
## 2 1 56 32 1 1 1 1 13 3 25 3
## 3 7 58 30 1 0 0 0 5 1 27 1
## 4 11 45 42 1 0 1 0 4 3 17 2
## 5 5 45 43 1 1 1 1 11 2 24 2
## 6 8 52 36 1 0 0 0 7 1 26 1
## knowmeth usemeth monthfm yearfm agefm idlnchld heduc agesq urban urb_educ
## 1 1 0 NA NA NA 2 NA 576 1 12
## 2 1 1 11 80 24 3 12 1024 1 13
## 3 1 0 6 83 24 5 7 900 1 5
## 4 1 0 1 61 15 3 11 1764 1 4
## 5 1 1 3 66 20 2 14 1849 1 11
## 6 1 1 11 76 24 4 9 1296 1 7
## spirit protest catholic frsthalf educ0 evermarr
## 1 0 0 0 1 0 0
## 2 0 0 0 1 0 1
## 3 1 0 0 0 0 1
## 4 0 0 0 0 0 1
## 5 0 1 0 1 0 1
## 6 0 0 0 0 0 1
model <- lm(children ~ age + I(age^2) + educ + electric + urban, data = fertil2)
robust_se <- sqrt(diag(vcovHC(model)))
summary_with_robust_se <- cbind(coef(model), "Robust SE" = robust_se)
summary(model)
##
## Call:
## lm(formula = children ~ age + I(age^2) + educ + electric + urban,
## data = fertil2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9012 -0.7136 -0.0039 0.7119 7.4318
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.2225162 0.2401888 -17.580 < 2e-16 ***
## age 0.3409255 0.0165082 20.652 < 2e-16 ***
## I(age^2) -0.0027412 0.0002718 -10.086 < 2e-16 ***
## educ -0.0752323 0.0062966 -11.948 < 2e-16 ***
## electric -0.3100404 0.0690045 -4.493 7.20e-06 ***
## urban -0.2000339 0.0465062 -4.301 1.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.452 on 4352 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.5734, Adjusted R-squared: 0.5729
## F-statistic: 1170 on 5 and 4352 DF, p-value: < 2.2e-16
print(summary_with_robust_se)
## Robust SE
## (Intercept) -4.222516228 0.2443961935
## age 0.340925520 0.0192199445
## I(age^2) -0.002741209 0.0003513959
## educ -0.075232323 0.0063159137
## electric -0.310040409 0.0640737262
## urban -0.200033857 0.0455162364
coeftest(model6, vcov=vcovHC(model6, type = "HC0"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2555544 0.3368805 3.7270 0.000284 ***
## PC 0.1518539 0.0596641 2.5451 0.012048 *
## hsGPA 0.4502203 0.0962508 4.6776 6.962e-06 ***
## ACT 0.0077242 0.0103074 0.7494 0.454932
## mothcoll -0.0037579 0.0601271 -0.0625 0.950258
## fathcoll 0.0417999 0.0578749 0.7222 0.471392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
F= ((0.574-0.5734)/3)/((1-0.574)/4349)
F
## [1] 2.041784
critical_value = qf(0.95,3,4349, TRUE)
critical_value
## [1] 3.403585
p_value = 1- pf(F,3,4349)
p_value
## [1] 0.1058349
a <- coeftest(model, vcov = sandwich)
a
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.22251623 0.24368307 -17.3279 < 2.2e-16 ***
## age 0.34092552 0.01916146 17.7923 < 2.2e-16 ***
## I(age^2) -0.00274121 0.00035027 -7.8260 6.278e-15 ***
## educ -0.07523232 0.00630336 -11.9353 < 2.2e-16 ***
## electric -0.31004041 0.06390411 -4.8517 1.267e-06 ***
## urban -0.20003386 0.04543962 -4.4022 1.097e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 1065, df = 5, p-value < 2.2e-16
usqr=(summary(model6)$residual)^2
yhat=model6$fitted.values
yhatsqr=model6$fitted.values^2
model6WT=summary(lm(usqr~yhat+yhatsqr,data=fertil2))$r.squared
model6WT*4361
## [1] 143.6236
1-pchisq(1093.183,2)
## [1] 0
joint_test <- coeftest(model6, vcov = vcovHC)
print(joint_test[, "Pr(>|t|)"])
## (Intercept) PC hsGPA ACT mothcoll fathcoll
## 5.903211e-04 1.682044e-02 1.977193e-05 4.846838e-01 9.528773e-01 4.948069e-01
p_value = 1- pf(F,3,4349)
p_value
## [1] 0.1058349
head(summary_with_robust_se)
## Robust SE
## (Intercept) -4.222516228 0.2443961935
## age 0.340925520 0.0192199445
## I(age^2) -0.002741209 0.0003513959
## educ -0.075232323 0.0063159137
## electric -0.310040409 0.0640737262
## urban -0.200033857 0.0455162364
fv <- fitted(model6)
head(fv,6)
## 1 2 3 4 5 6
## 2.768423 2.919682 3.115218 3.039878 3.269490 3.021208
residuals <- resid(model6)
head(residuals,6)
## 1 2 3 4 5 6
## 0.23157702 0.48031850 -0.11521801 0.46012183 0.33050949 -0.02120773
hetero_test <- lm(residuals^2 ~ fv)
summary(hetero_test)
##
## Call:
## lm(formula = residuals^2 ~ fv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.13813 -0.07873 -0.04322 0.04132 0.51313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.27680 0.18877 -1.466 0.1448
## fv 0.12558 0.06165 2.037 0.0436 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.128 on 139 degrees of freedom
## Multiple R-squared: 0.02898, Adjusted R-squared: 0.022
## F-statistic: 4.149 on 1 and 139 DF, p-value: 0.04357
## *Chapter 9*
#### **Exercise 1**
data7 <- wooldridge::ceosal2
head(data7)
## salary age college grad comten ceoten sales profits mktval lsalary lsales
## 1 1161 49 1 1 9 2 6200 966 23200 7.057037 8.732305
## 2 600 43 1 1 10 10 283 48 1100 6.396930 5.645447
## 3 379 51 1 1 9 3 169 40 1100 5.937536 5.129899
## 4 651 55 1 0 22 22 1100 -54 1000 6.478509 7.003066
## 5 497 44 1 1 8 6 351 28 387 6.208590 5.860786
## 6 1067 64 1 1 7 7 19000 614 3900 6.972606 9.852194
## lmktval comtensq ceotensq profmarg
## 1 10.051908 81 4 15.580646
## 2 7.003066 100 100 16.961130
## 3 7.003066 81 9 23.668638
## 4 6.907755 484 484 -4.909091
## 5 5.958425 64 36 7.977208
## 6 8.268732 49 49 3.231579
model11 <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten + comten, data = data7)
summary(model11)
##
## Call:
## lm(formula = log(salary) ~ log(sales) + log(mktval) + profmarg +
## ceoten + comten, data = data7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5436 -0.2796 -0.0164 0.2857 1.9879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.571977 0.253466 18.038 < 2e-16 ***
## log(sales) 0.187787 0.040003 4.694 5.46e-06 ***
## log(mktval) 0.099872 0.049214 2.029 0.04397 *
## profmarg -0.002211 0.002105 -1.050 0.29514
## ceoten 0.017104 0.005540 3.087 0.00236 **
## comten -0.009238 0.003337 -2.768 0.00626 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4947 on 171 degrees of freedom
## Multiple R-squared: 0.3525, Adjusted R-squared: 0.3336
## F-statistic: 18.62 on 5 and 171 DF, p-value: 9.488e-15
r_squared <- summary(model11)$r.squared
r_squared
## [1] 0.3525374
model12 <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceotensq + comtensq, data = data7)
summary(model12)
##
## Call:
## lm(formula = log(salary) ~ log(sales) + log(mktval) + profmarg +
## ceotensq + comtensq, data = data7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.47481 -0.25933 -0.00511 0.27010 2.07583
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.612e+00 2.524e-01 18.276 < 2e-16 ***
## log(sales) 1.805e-01 4.021e-02 4.489 1.31e-05 ***
## log(mktval) 1.018e-01 4.988e-02 2.040 0.0429 *
## profmarg -2.077e-03 2.135e-03 -0.973 0.3321
## ceotensq 3.761e-04 1.916e-04 1.963 0.0512 .
## comtensq -1.788e-04 7.236e-05 -2.471 0.0144 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5024 on 171 degrees of freedom
## Multiple R-squared: 0.3324, Adjusted R-squared: 0.3129
## F-statistic: 17.03 on 5 and 171 DF, p-value: 1.195e-13
r_squared <- summary(model12)$r.squared
r_squared
## [1] 0.3323998
#### **Exercise 5**
data9 <- wooldridge::campus
head(data9)
## enroll priv police crime lcrime lenroll lpolice
## 1 21836 0 24 446 6.100319 9.991315 3.178054
## 2 6485 0 13 1 0.000000 8.777247 2.564949
## 3 2123 0 3 1 0.000000 7.660585 1.098612
## 4 8240 0 17 121 4.795791 9.016756 2.833213
## 5 19793 0 30 470 6.152733 9.893084 3.401197
## 6 3256 1 9 25 3.218876 8.088255 2.197225
b0 <- -6.63
se_b0 <- 1.03
b1 <- 1.27
se_b1 <- 0.11
n <- nrow(data9)
t_stat <- (b1 - 1) / se_b1
df <- n - 2
critical_value <- qt(0.95, df)
if (t_stat > critical_value) {
cat("Reject the null hypothesis H0: B1 = 1 in favor of H1: B1 > 1 at the 5% level.\n")
} else {
cat("Fail to reject the null hypothesis H0: B1 = 1 at the 5% level.\n")
}
## Reject the null hypothesis H0: B1 = 1 in favor of H1: B1 > 1 at the 5% level.
#### **C4**
data ("infmrt")
model9 <- lm(infmort ~ lpcinc + lphysic + lpopul + DC, data = infmrt )
summary(model9)
##
## Call:
## lm(formula = infmort ~ lpcinc + lphysic + lpopul + DC, data = infmrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6894 -0.8973 -0.1412 0.7054 3.1705
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.2125 6.7267 5.383 5.09e-07 ***
## lpcinc -2.3964 0.8866 -2.703 0.00812 **
## lphysic -1.5548 0.7734 -2.010 0.04718 *
## lpopul 0.5755 0.1365 4.215 5.60e-05 ***
## DC 13.9632 1.2466 11.201 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.255 on 97 degrees of freedom
## Multiple R-squared: 0.6432, Adjusted R-squared: 0.6285
## F-statistic: 43.72 on 4 and 97 DF, p-value: < 2.2e-16
model9 <- lm(infmort~log(pcinc)+log(physic)+log(popul), data=infmrt)
summary(model9)
##
## Call:
## lm(formula = infmort ~ log(pcinc) + log(physic) + log(popul),
## data = infmrt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0204 -1.1897 -0.1152 0.9083 8.1890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.22614 10.13466 3.574 0.000547 ***
## log(pcinc) -4.88415 1.29319 -3.777 0.000273 ***
## log(physic) 4.02783 0.89101 4.521 1.73e-05 ***
## log(popul) -0.05356 0.18748 -0.286 0.775712
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.891 on 98 degrees of freedom
## Multiple R-squared: 0.1818, Adjusted R-squared: 0.1568
## F-statistic: 7.26 on 3 and 98 DF, p-value: 0.0001898
#### **C5**
library(wooldridge)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(quantreg)
## Loading required package: SparseM
data <- as.data.frame(rdchem)
data$sales_billion <- data$sales / 1e3
ols_full <- lm(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data)
summary(ols_full)
##
## Call:
## lm(formula = rdintens ~ sales_billion + I(sales_billion^2) +
## profmarg, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0371 -1.1238 -0.4547 0.7165 5.8522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.058967 0.626269 3.288 0.00272 **
## sales_billion 0.316611 0.138854 2.280 0.03041 *
## I(sales_billion^2) -0.007390 0.003716 -1.989 0.05657 .
## profmarg 0.053322 0.044210 1.206 0.23787
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.774 on 28 degrees of freedom
## Multiple R-squared: 0.1905, Adjusted R-squared: 0.1037
## F-statistic: 2.196 on 3 and 28 DF, p-value: 0.1107
data_filtered <- data %>% filter(sales_billion < 40)
ols_filtered <- lm(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data_filtered)
summary(ols_filtered)
##
## Call:
## lm(formula = rdintens ~ sales_billion + I(sales_billion^2) +
## profmarg, data = data_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0371 -1.1238 -0.4547 0.7165 5.8522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.058967 0.626269 3.288 0.00272 **
## sales_billion 0.316611 0.138854 2.280 0.03041 *
## I(sales_billion^2) -0.007390 0.003716 -1.989 0.05657 .
## profmarg 0.053322 0.044210 1.206 0.23787
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.774 on 28 degrees of freedom
## Multiple R-squared: 0.1905, Adjusted R-squared: 0.1037
## F-statistic: 2.196 on 3 and 28 DF, p-value: 0.1107
lad_full <- rq(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data, tau = 0.5)
summary(lad_full)
##
## Call: rq(formula = rdintens ~ sales_billion + I(sales_billion^2) +
## profmarg, tau = 0.5, data = data)
##
## tau: [1] 0.5
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 1.40428 0.87031 2.66628
## sales_billion 0.26346 -0.13508 0.75753
## I(sales_billion^2) -0.00600 -0.01679 0.00344
## profmarg 0.11400 0.01376 0.16427
lad_filtered <- rq(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data_filtered, tau = 0.5)
summary(lad_filtered)
##
## Call: rq(formula = rdintens ~ sales_billion + I(sales_billion^2) +
## profmarg, tau = 0.5, data = data_filtered)
##
## tau: [1] 0.5
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 1.40428 0.87031 2.66628
## sales_billion 0.26346 -0.13508 0.75753
## I(sales_billion^2) -0.00600 -0.01679 0.00344
## profmarg 0.11400 0.01376 0.16427
comparison <- data.frame(
Method = c("OLS (Full)", "OLS (Filtered)", "LAD (Full)", "LAD (Filtered)"),
Intercept = c(coef(ols_full)[1], coef(ols_filtered)[1], coef(lad_full)[1], coef(lad_filtered)[1]),
Sales = c(coef(ols_full)[2], coef(ols_filtered)[2], coef(lad_full)[2], coef(lad_filtered)[2]),
Sales_Squared = c(coef(ols_full)[3], coef(ols_filtered)[3], coef(lad_full)[3], coef(lad_filtered)[3]),
Profmarg = c(coef(ols_full)[4], coef(ols_filtered)[4], coef(lad_full)[4], coef(lad_filtered)[4])
)
print(comparison)
## Method Intercept Sales Sales_Squared Profmarg
## 1 OLS (Full) 2.058967 0.3166110 -0.007389624 0.05332217
## 2 OLS (Filtered) 2.058967 0.3166110 -0.007389624 0.05332217
## 3 LAD (Full) 1.404279 0.2634638 -0.006001127 0.11400366
## 4 LAD (Filtered) 1.404279 0.2634638 -0.006001127 0.11400366
## *Chapter 10*
#### **Exercise 1**
#### **Exercise 5**
#### **C1**
data_10C1 <- intdef %>% mutate(later1979= ifelse(year>1979,1,0))
head(data_10C1,5)
## year i3 inf rec out def i3_1 inf_1 def_1 ci3 cinf
## 1 1948 1.04 8.1 16.2 11.6 -4.6000004 NA NA NA NA NA
## 2 1949 1.10 -1.2 14.5 14.3 -0.1999998 1.04 8.1 -4.6000004 0.06000006 -9.3
## 3 1950 1.22 1.3 14.4 15.6 1.2000008 1.10 -1.2 -0.1999998 0.12000000 2.5
## 4 1951 1.55 7.9 16.1 14.2 -1.9000006 1.22 1.3 1.2000008 0.32999992 6.6
## 5 1952 1.77 1.9 19.0 19.4 0.3999996 1.55 7.9 -1.9000006 0.22000003 -6.0
## cdef y77 later1979
## 1 NA 0 0
## 2 4.400001 0 0
## 3 1.400001 0 0
## 4 -3.100001 0 0
## 5 2.300000 0 0
model_10C1 <- lm(i3~inf+def+later1979, data=data_10C1)
summary(model_10C1)
##
## Call:
## lm(formula = i3 ~ inf + def + later1979, data = data_10C1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4674 -0.8407 0.2388 1.0148 3.9654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29623 0.42535 3.047 0.00362 **
## inf 0.60842 0.07625 7.979 1.37e-10 ***
## def 0.36266 0.12025 3.016 0.00396 **
## later1979 1.55877 0.50577 3.082 0.00329 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.711 on 52 degrees of freedom
## Multiple R-squared: 0.6635, Adjusted R-squared: 0.6441
## F-statistic: 34.18 on 3 and 52 DF, p-value: 2.408e-12
data_10C1 <- as.integer(data_10C1$year > 1979)
data13 <- wooldridge::intdef
head(data13)
## year i3 inf rec out def i3_1 inf_1 def_1 ci3 cinf
## 1 1948 1.04 8.1 16.2 11.6 -4.6000004 NA NA NA NA NA
## 2 1949 1.10 -1.2 14.5 14.3 -0.1999998 1.04 8.1 -4.6000004 0.06000006 -9.3
## 3 1950 1.22 1.3 14.4 15.6 1.2000008 1.10 -1.2 -0.1999998 0.12000000 2.5
## 4 1951 1.55 7.9 16.1 14.2 -1.9000006 1.22 1.3 1.2000008 0.32999992 6.6
## 5 1952 1.77 1.9 19.0 19.4 0.3999996 1.55 7.9 -1.9000006 0.22000003 -6.0
## 6 1953 1.93 0.8 18.7 20.4 1.6999989 1.77 1.9 0.3999996 0.15999997 -1.1
## cdef y77
## 1 NA 0
## 2 4.400001 0
## 3 1.400001 0
## 4 -3.100001 0
## 5 2.300000 0
## 6 1.299999 0
data13$dummy <- as.integer(data13$year > 1979)
data13$dummy
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
model15 <- lm(i3 ~ inf + def , data = data13)
summary(model15)
##
## Call:
## lm(formula = i3 ~ inf + def, data = data13)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9948 -1.1694 0.1959 0.9602 4.7224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.73327 0.43197 4.012 0.00019 ***
## inf 0.60587 0.08213 7.376 1.12e-09 ***
## def 0.51306 0.11838 4.334 6.57e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.843 on 53 degrees of freedom
## Multiple R-squared: 0.6021, Adjusted R-squared: 0.5871
## F-statistic: 40.09 on 2 and 53 DF, p-value: 2.483e-11
model16 <- lm(i3 ~ inf + def + y77 , data = data13)
summary(model16)
##
## Call:
## lm(formula = i3 ~ inf + def + y77, data = data13)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4048 -0.9632 0.2192 0.8497 4.3447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.40531 0.42239 3.327 0.00162 **
## inf 0.56884 0.07832 7.263 1.88e-09 ***
## def 0.36276 0.12337 2.940 0.00488 **
## y77 1.47773 0.52349 2.823 0.00673 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.733 on 52 degrees of freedom
## Multiple R-squared: 0.6549, Adjusted R-squared: 0.635
## F-statistic: 32.9 on 3 and 52 DF, p-value: 4.608e-12
#### **C9**
# Load necessary library
library(wooldridge)
# Load the dataset 'volat'
data("volat")
# Display the structure of the dataset to verify variable names
str(volat)
## 'data.frame': 558 obs. of 17 variables:
## $ date : num 1947 1947 1947 1947 1947 ...
## $ sp500 : num 15.2 15.8 15.2 14.6 14.3 ...
## $ divyld: num 4.49 4.38 4.61 4.75 5.05 ...
## $ i3 : num 0.38 0.38 0.38 0.38 0.38 ...
## $ ip : num 22.4 22.5 22.6 22.5 22.6 ...
## $ pcsp : num NA 46.5 -48.6 -44.3 -21.4 ...
## $ rsp500: num NA 50.9 -44 -39.6 -16.3 ...
## $ pcip : num NA 5.36 5.33 -5.31 5.33 ...
## $ ci3 : num NA 0 0 0 0 ...
## $ ci3_1 : num NA NA 0 0 0 ...
## $ ci3_2 : num NA NA NA 0 0 ...
## $ pcip_1: num NA NA 5.36 5.33 -5.31 ...
## $ pcip_2: num NA NA NA 5.36 5.33 ...
## $ pcip_3: num NA NA NA NA 5.36 ...
## $ pcsp_1: num NA NA 46.5 -48.6 -44.3 ...
## $ pcsp_2: num NA NA NA 46.5 -48.6 ...
## $ pcsp_3: num NA NA NA NA 46.5 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
# Check the first few rows to confirm the data
head(volat)
## date sp500 divyld i3 ip pcsp rsp500 pcip ci3 ci3_1 ci3_2
## 1 1947.01 15.21 4.49 0.38 22.4 NA NA NA NA NA NA
## 2 1947.02 15.80 4.38 0.38 22.5 46.54833 50.92833 5.357163 0 NA NA
## 3 1947.03 15.16 4.61 0.38 22.6 -48.60762 -43.99762 5.333354 0 0 NA
## 4 1947.04 14.60 4.75 0.38 22.5 -44.32714 -39.57714 -5.309754 0 0 0
## 5 1947.05 14.34 5.05 0.38 22.6 -21.36988 -16.31988 5.333354 0 0 0
## 6 1947.06 14.84 5.03 0.38 22.6 41.84100 46.87100 0.000000 0 0 0
## pcip_1 pcip_2 pcip_3 pcsp_1 pcsp_2 pcsp_3
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 5.357163 NA NA 46.54833 NA NA
## 4 5.333354 5.357163 NA -48.60762 46.54833 NA
## 5 -5.309754 5.333354 5.357163 -44.32714 -48.60762 46.54833
## 6 5.333354 -5.309754 5.333354 -21.36988 -44.32714 -48.60762
# Estimate the equation rsp500_t = β0 + β1 * pcip_t + β2 * i3_t + u_t using OLS
model <- lm(rsp500 ~ pcip + i3, data = volat)
# Summarize the results of the model
summary(model)
##
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -157.871 -22.580 2.103 25.524 138.137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.84306 3.27488 5.754 1.44e-08 ***
## pcip 0.03642 0.12940 0.281 0.7785
## i3 -1.36169 0.54072 -2.518 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40.13 on 554 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.01189, Adjusted R-squared: 0.008325
## F-statistic: 3.334 on 2 and 554 DF, p-value: 0.03637
## *Chapter 11*
#### **C7**
# Load the wooldridge package
library(wooldridge)
# Load the CONSUMP dataset
data("consump", package = "wooldridge")
# View the structure of the dataset
str(consump)
## 'data.frame': 37 obs. of 24 variables:
## $ year : int 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 ...
## $ i3 : num 3.41 2.93 2.38 2.78 3.16 ...
## $ inf : num 0.7 1.7 1 1 1.3 ...
## $ rdisp : num 1530 1565 1616 1694 1756 ...
## $ rnondc : num 606 615 627 646 660 ...
## $ rserv : num 687 717 746 783 819 ...
## $ pop : num 177830 180671 183691 186538 189242 ...
## $ y : num 8604 8664 8796 9080 9276 ...
## $ rcons : num 1294 1333 1373 1430 1479 ...
## $ c : num 7275 7377 7476 7665 7814 ...
## $ r3 : num 2.71 1.23 1.38 1.78 1.86 ...
## $ lc : num 8.89 8.91 8.92 8.94 8.96 ...
## $ ly : num 9.06 9.07 9.08 9.11 9.14 ...
## $ gc : num NA 0.0139 0.0133 0.0251 0.0192 ...
## $ gy : num NA 0.00696 0.01511 0.03171 0.02145 ...
## $ gc_1 : num NA NA 0.0139 0.0133 0.0251 ...
## $ gy_1 : num NA NA 0.00696 0.01511 0.03171 ...
## $ r3_1 : num NA 2.71 1.23 1.38 1.78 ...
## $ lc_ly : num -0.168 -0.161 -0.163 -0.169 -0.172 ...
## $ lc_ly_1: num NA -0.168 -0.161 -0.163 -0.169 ...
## $ gc_2 : num NA NA NA 0.0139 0.0133 ...
## $ gy_2 : num NA NA NA 0.00696 0.01511 ...
## $ r3_2 : num NA NA 2.71 1.23 1.38 ...
## $ lc_ly_2: num NA NA -0.168 -0.161 -0.163 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
# Generate growth in consumption (gc) as specified
consump$gc <- log(consump$c) - log(dplyr::lag(consump$c, n = 1))
# Run regression: gc_t = β0 + β1 * gc_t-1 + u_t
model1 <- lm(gc ~ dplyr::lag(gc, n = 1), data = consump)
summary(model1)
##
## Call:
## lm(formula = gc ~ dplyr::lag(gc, n = 1), data = consump)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.027878 -0.005974 -0.001451 0.007142 0.020227
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.011431 0.003778 3.026 0.00478 **
## dplyr::lag(gc, n = 1) 0.446141 0.156047 2.859 0.00731 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01161 on 33 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1985, Adjusted R-squared: 0.1742
## F-statistic: 8.174 on 1 and 33 DF, p-value: 0.00731
# Include lagged variables of gy, i3, and inf
consump$gy_lag <- dplyr::lag(consump$gy, n = 1)
consump$i3_lag <- dplyr::lag(consump$i3, n = 1)
consump$inf_lag <- dplyr::lag(consump$inf, n = 1)
# Run regression with additional variables
model2 <- lm(gc ~ dplyr::lag(gc, n = 1) + gy_lag + i3_lag + inf_lag, data = consump)
summary(model2)
##
## Call:
## lm(formula = gc ~ dplyr::lag(gc, n = 1) + gy_lag + i3_lag + inf_lag,
## data = consump)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0249093 -0.0075869 0.0000855 0.0087234 0.0188617
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0225940 0.0070892 3.187 0.00335 **
## dplyr::lag(gc, n = 1) 0.4335832 0.2896525 1.497 0.14487
## gy_lag -0.1079087 0.1946371 -0.554 0.58341
## i3_lag -0.0007467 0.0011107 -0.672 0.50654
## inf_lag -0.0008281 0.0010041 -0.825 0.41606
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01134 on 30 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3038, Adjusted R-squared: 0.211
## F-statistic: 3.273 on 4 and 30 DF, p-value: 0.02431
# Check p-value of lag(gc)
p_value_lag_gc <- summary(model2)$coefficients["dplyr::lag(gc, n = 1)", "Pr(>|t|)"]
#### **C12**
library(wooldridge)
library(lmtest)
library(sandwich)
library(ggplot2)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data("minwage")
# Focus on the relevant variables
# gwage232: growth in wage (sector 232)
# gemp232: growth in employment (sector 232)
# gmwage: growth in federal minimum wage
# gcpi: growth in CPI
# Create lagged variables
minwage$gwage232_lag1 <- c(NA, minwage$gwage232[-nrow(minwage)]) # Lagged gwage232
minwage$gemp232_lag1 <- c(NA, minwage$gemp232[-nrow(minwage)]) # Lagged gemp232
# Remove rows with NA values
minwage_clean <- na.omit(minwage)
acf_gwage <- acf(minwage_clean$gwage232, lag.max = 1, plot = FALSE) # ACF
first_order_autocorrelation <- acf_gwage$acf[2] # First lag
cat("First-order autocorrelation in gwage232:", first_order_autocorrelation, "\n")
## First-order autocorrelation in gwage232: -0.02425413
model_dynamic <- lm(gwage232 ~ gwage232_lag1 + gmwage + gcpi, data = minwage_clean)
summary(model_dynamic)
##
## Call:
## lm(formula = gwage232 ~ gwage232_lag1 + gmwage + gcpi, data = minwage_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.044649 -0.004114 -0.001262 0.004481 0.041568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0023648 0.0004295 5.506 5.45e-08 ***
## gwage232_lag1 -0.0684816 0.0343986 -1.991 0.0470 *
## gmwage 0.1517511 0.0095115 15.955 < 2e-16 ***
## gcpi 0.2586795 0.0858602 3.013 0.0027 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007775 on 595 degrees of freedom
## Multiple R-squared: 0.3068, Adjusted R-squared: 0.3033
## F-statistic: 87.79 on 3 and 595 DF, p-value: < 2.2e-16
# Check if gmwage has a significant effect while controlling for other variables
cat("Effect of gmwage (contemporaneous effect):\n")
## Effect of gmwage (contemporaneous effect):
print(summary(model_dynamic)$coefficients["gmwage", ])
## Estimate Std. Error t value Pr(>|t|)
## 1.517511e-01 9.511457e-03 1.595456e+01 5.753131e-48
model_with_lagged_employment <- lm(gwage232 ~ gwage232_lag1 + gmwage + gcpi + gemp232_lag1, data = minwage_clean)
summary(model_with_lagged_employment)
##
## Call:
## lm(formula = gwage232 ~ gwage232_lag1 + gmwage + gcpi + gemp232_lag1,
## data = minwage_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.043900 -0.004316 -0.000955 0.004255 0.042430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0023960 0.0004254 5.633 2.74e-08 ***
## gwage232_lag1 -0.0656875 0.0340720 -1.928 0.054344 .
## gmwage 0.1525470 0.0094213 16.192 < 2e-16 ***
## gcpi 0.2536899 0.0850342 2.983 0.002968 **
## gemp232_lag1 0.0606620 0.0169691 3.575 0.000379 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007699 on 594 degrees of freedom
## Multiple R-squared: 0.3214, Adjusted R-squared: 0.3169
## F-statistic: 70.34 on 4 and 594 DF, p-value: < 2.2e-16
# Check if the lagged growth in employment is statistically significant
cat("Effect of lagged growth in employment (gemp232_lag1):\n")
## Effect of lagged growth in employment (gemp232_lag1):
print(summary(model_with_lagged_employment)$coefficients["gemp232_lag1", ])
## Estimate Std. Error t value Pr(>|t|)
## 0.0606619954 0.0169690920 3.5748521768 0.0003789121
model_without_lags <- lm(gwage232 ~ gmwage + gcpi, data = minwage_clean)
summary(model_without_lags)
##
## Call:
## lm(formula = gwage232 ~ gmwage + gcpi, data = minwage_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.044501 -0.004053 -0.001328 0.004503 0.041194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0021926 0.0004217 5.199 2.75e-07 ***
## gmwage 0.1506131 0.0095178 15.824 < 2e-16 ***
## gcpi 0.2374126 0.0854046 2.780 0.00561 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007794 on 596 degrees of freedom
## Multiple R-squared: 0.3022, Adjusted R-squared: 0.2999
## F-statistic: 129.1 on 2 and 596 DF, p-value: < 2.2e-16
# Compare coefficients of gmwage across models
cat("Coefficient of gmwage in model without lags:", summary(model_without_lags)$coefficients["gmwage", 1], "\n")
## Coefficient of gmwage in model without lags: 0.1506131
cat("Coefficient of gmwage in model with lags:", summary(model_with_lagged_employment)$coefficients["gmwage", 1], "\n")
## Coefficient of gmwage in model with lags: 0.152547
model_gmwage <- lm(gmwage ~ gwage232_lag1 + gemp232_lag1, data = minwage_clean)
summary(model_gmwage)
##
## Call:
## lm(formula = gmwage ~ gwage232_lag1 + gemp232_lag1, data = minwage_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.01987 -0.00511 -0.00385 -0.00290 0.62191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.003487 0.001465 2.380 0.0176 *
## gwage232_lag1 0.212051 0.146727 1.445 0.1489
## gemp232_lag1 -0.042776 0.073749 -0.580 0.5621
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03347 on 596 degrees of freedom
## Multiple R-squared: 0.004117, Adjusted R-squared: 0.0007754
## F-statistic: 1.232 on 2 and 596 DF, p-value: 0.2924
# Report R-squared of the regression
r_squared_gmwage <- summary(model_gmwage)$r.squared
cat("R-squared of gmwage regression:", r_squared_gmwage, "\n")
## R-squared of gmwage regression: 0.004117273
# Interpret R-squared value
cat("Interpretation: R-squared indicates how well the lagged variables explain variation in gmwage.\n")
## Interpretation: R-squared indicates how well the lagged variables explain variation in gmwage.
## **Chapter 12**
#### **C11**
# Load necessary libraries
library(wooldridge) # For accessing Wooldridge datasets
library(lmtest) # For model diagnostics
library(sandwich) # For robust standard errors
library(tseries) # For ARCH models
library(ggplot2) # For plotting
# Load the NYSE dataset
data("nyse") # The dataset is part of the Wooldridge package
# Add lagged returns for the analysis
nyse$return_lag1 <- c(NA, nyse$return[-nrow(nyse)]) # Lagged return_t-1
nyse$return_lag2 <- c(NA, nyse$return_lag1[-nrow(nyse)]) # Lagged return_t-2
# Remove rows with NA values (introduced by lags)
nyse_clean <- na.omit(nyse)
model_12_47 <- lm(return ~ return_lag1, data = nyse_clean)
summary(model_12_47)
##
## Call:
## lm(formula = return ~ return_lag1, data = nyse_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2633 -1.2997 0.1011 1.3203 8.0688
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.17862 0.08084 2.210 0.0275 *
## return_lag1 0.05806 0.03811 1.523 0.1281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.112 on 686 degrees of freedom
## Multiple R-squared: 0.003372, Adjusted R-squared: 0.001919
## F-statistic: 2.321 on 1 and 686 DF, p-value: 0.1281
# Calculate squared residuals
nyse_clean$residuals_squared <- residuals(model_12_47)^2
# Find average, minimum, and maximum of squared residuals
avg_res <- mean(nyse_clean$residuals_squared, na.rm = TRUE)
min_res <- min(nyse_clean$residuals_squared, na.rm = TRUE)
max_res <- max(nyse_clean$residuals_squared, na.rm = TRUE)
cat("Average of squared residuals:", avg_res, "\n")
## Average of squared residuals: 4.446345
cat("Minimum of squared residuals:", min_res, "\n")
## Minimum of squared residuals: 3.591354e-06
cat("Maximum of squared residuals:", max_res, "\n")
## Maximum of squared residuals: 232.9687
nyse_clean$return_lag1_sq <- nyse_clean$return_lag1^2 # Squared lagged return
hetero_model <- lm(residuals_squared ~ return_lag1 + return_lag1_sq, data = nyse_clean)
summary(hetero_model)
##
## Call:
## lm(formula = residuals_squared ~ return_lag1 + return_lag1_sq,
## data = nyse_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.545 -3.021 -1.974 0.695 221.544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.25771 0.44131 7.382 4.54e-13 ***
## return_lag1 -0.78556 0.19626 -4.003 6.95e-05 ***
## return_lag1_sq 0.29749 0.03558 8.361 3.46e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.67 on 685 degrees of freedom
## Multiple R-squared: 0.1305, Adjusted R-squared: 0.128
## F-statistic: 51.41 on 2 and 685 DF, p-value: < 2.2e-16
# Extract coefficients, standard errors, R-squared, and adjusted R-squared
coefs <- coef(hetero_model)
std_errors <- sqrt(diag(vcov(hetero_model)))
r_squared <- summary(hetero_model)$r.squared
adj_r_squared <- summary(hetero_model)$adj.r.squared
cat("Coefficients:", coefs, "\n")
## Coefficients: 3.257706 -0.7855573 0.2974907
cat("Standard Errors:", std_errors, "\n")
## Standard Errors: 0.441314 0.1962611 0.0355787
cat("R-squared:", r_squared, "\n")
## R-squared: 0.1305106
cat("Adjusted R-squared:", adj_r_squared, "\n")
## Adjusted R-squared: 0.127972
nyse_clean$conditional_variance <- predict(hetero_model, newdata = nyse_clean)
ggplot(nyse_clean, aes(x = return_lag1, y = conditional_variance)) +
geom_line(color = "blue") +
labs(title = "Conditional Variance vs Lagged Return",
x = "Lagged Return (return_t-1)", y = "Conditional Variance") +
theme_minimal()
# Find value of return where variance is smallest
min_variance <- min(nyse_clean$conditional_variance, na.rm = TRUE)
cat("Minimum conditional variance:", min_variance, "\n")
## Minimum conditional variance: 2.739119
negative_variance <- sum(nyse_clean$conditional_variance < 0, na.rm = TRUE)
cat("Number of negative variance estimates:", negative_variance, "\n")
## Number of negative variance estimates: 0
arch1_model <- garch(nyse_clean$return, order = c(1, 0))
##
## ***** ESTIMATION WITH ANALYTICAL GRADIENT *****
##
##
## I INITIAL X(I) D(I)
##
## 1 4.244486e+00 1.000e+00
## 2 5.000000e-02 1.000e+00
##
## IT NF F RELDF PRELDF RELDX STPPAR D*STEP NPRELDF
## 0 1 8.604e+02
## 1 4 8.604e+02 1.04e-05 3.38e-05 1.1e-03 2.9e+02 1.0e-02 4.94e-03
## 2 5 8.604e+02 1.86e-06 1.97e-06 1.0e-03 1.3e+00 1.0e-02 1.99e-06
## 3 6 8.604e+02 4.21e-09 1.02e-07 1.5e-03 0.0e+00 1.3e-02 1.02e-07
## 4 7 8.604e+02 2.86e-09 9.16e-08 7.7e-04 1.8e+00 6.7e-03 1.13e-06
## 5 8 8.604e+02 1.89e-09 4.61e-08 3.9e-04 1.9e+00 3.4e-03 1.67e-06
## 6 11 8.604e+02 1.01e-10 5.29e-09 4.5e-05 2.5e+01 4.0e-04 1.62e-06
## 7 14 8.604e+02 3.62e-11 5.04e-10 4.1e-06 3.9e+00 3.8e-05 1.61e-06
## 8 15 8.604e+02 9.74e-12 2.51e-10 2.1e-06 4.2e+02 1.9e-05 1.61e-06
## 9 16 8.604e+02 3.57e-12 1.26e-10 1.1e-06 5.6e+03 9.4e-06 1.61e-06
## 10 17 8.604e+02 1.69e-12 6.27e-11 5.3e-07 1.9e+04 4.7e-06 1.61e-06
## 11 18 8.604e+02 8.28e-13 3.14e-11 2.7e-07 4.8e+04 2.4e-06 1.60e-06
## 12 19 8.604e+02 4.09e-13 1.57e-11 1.3e-07 1.0e+05 1.2e-06 1.60e-06
## 13 20 8.604e+02 2.04e-13 7.84e-12 6.7e-08 2.2e+05 5.9e-07 1.60e-06
## 14 21 8.604e+02 1.01e-13 3.92e-12 3.3e-08 4.4e+05 2.9e-07 1.60e-06
## 15 22 8.604e+02 5.23e-14 1.96e-12 1.7e-08 8.9e+05 1.5e-07 1.60e-06
## 16 23 8.604e+02 2.43e-14 9.80e-13 8.4e-09 1.8e+06 7.3e-08 1.60e-06
## 17 24 8.604e+02 1.33e-14 4.90e-13 4.2e-09 3.6e+06 3.7e-08 1.60e-06
## 18 25 8.604e+02 6.47e-15 2.45e-13 2.1e-09 7.2e+06 1.8e-08 1.60e-06
## 19 26 8.604e+02 3.57e-15 1.23e-13 1.0e-09 1.4e+07 9.2e-09 1.60e-06
## 20 27 8.604e+02 5.29e-16 6.13e-14 5.2e-10 2.9e+07 4.6e-09 1.60e-06
## 21 28 8.604e+02 1.98e-15 3.06e-14 2.6e-10 5.8e+07 2.3e-09 1.60e-06
## 22 36 8.604e+02 0.00e+00 1.82e-18 1.6e-14 9.7e+11 1.4e-13 1.60e-06
##
## ***** FALSE CONVERGENCE *****
##
## FUNCTION 8.604278e+02 RELDX 1.557e-14
## FUNC. EVALS 36 GRAD. EVALS 22
## PRELDF 1.824e-18 NPRELDF 1.601e-06
##
## I FINAL X(I) D(I) G(I)
##
## 1 4.278928e+00 1.000e+00 -1.119e-02
## 2 4.975478e-02 1.000e+00 2.591e-03
summary(arch1_model)
##
## Call:
## garch(x = nyse_clean$return, order = c(1, 0))
##
## Model:
## GARCH(1,0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2203 -0.5202 0.1519 0.7279 3.9815
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 4.27893 83.25229 0.051 0.959
## b1 0.04975 18.48916 0.003 0.998
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 776.88, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 87.793, df = 1, p-value < 2.2e-16
arch2_model <- garch(nyse_clean$return, order = c(2, 0))
##
## ***** ESTIMATION WITH ANALYTICAL GRADIENT *****
##
##
## I INITIAL X(I) D(I)
##
## 1 4.021092e+00 1.000e+00
## 2 5.000000e-02 1.000e+00
## 3 5.000000e-02 1.000e+00
##
## IT NF F RELDF PRELDF RELDX STPPAR D*STEP NPRELDF
## 0 1 8.581e+02
## 1 5 8.581e+02 5.14e-06 1.02e-05 2.8e-04 8.7e+02 3.2e-03 4.45e-03
## 2 12 8.581e+02 6.59e-11 6.25e-10 3.8e-06 2.2e+00 4.7e-05 1.35e-07
## 3 15 8.581e+02 7.54e-11 3.87e-10 2.1e-06 3.6e+00 2.0e-05 1.35e-07
## 4 31 8.581e+02 1.85e-15 1.60e-14 1.1e-10 1.5e+06 9.0e-10 1.36e-07
## 5 39 8.581e+02 -1.59e-15 1.30e-18 9.1e-15 2.0e+10 7.3e-14 1.60e-07
##
## ***** FALSE CONVERGENCE *****
##
## FUNCTION 8.581133e+02 RELDX 9.055e-15
## FUNC. EVALS 39 GRAD. EVALS 5
## PRELDF 1.304e-18 NPRELDF 1.595e-07
##
## I FINAL X(I) D(I) G(I)
##
## 1 4.021524e+00 1.000e+00 1.530e-02
## 2 5.223246e-02 1.000e+00 -8.293e-04
## 3 5.222700e-02 1.000e+00 -1.780e-04
## Warning in garch(nyse_clean$return, order = c(2, 0)): singular information
summary(arch2_model)
##
## Call:
## garch(x = nyse_clean$return, order = c(2, 0))
##
## Model:
## GARCH(2,0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2303 -0.5216 0.1484 0.7264 3.9869
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 4.02152 NA NA NA
## b1 0.05223 NA NA NA
## b2 0.05223 NA NA NA
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 786.77, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 88.025, df = 1, p-value < 2.2e-16
# Compare ARCH(1) and ARCH(2) models
arch1_aic <- AIC(arch1_model)
arch2_aic <- AIC(arch2_model)
cat("ARCH(1) AIC:", arch1_aic, "\n")
## ARCH(1) AIC: 2987.477
cat("ARCH(2) AIC:", arch2_aic, "\n")
## ARCH(2) AIC: 2983.01
if (arch2_aic < arch1_aic) {
cat("ARCH(2) model fits better than ARCH(1).\n")
} else {
cat("ARCH(1) model fits better than ARCH(2).\n")
}
## ARCH(2) model fits better than ARCH(1).
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.