final project

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

## *Chapter 7*

#### **Exercise 1**


library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(dslabs)
library(ISLR2)
library(matlib)

## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display

## Warning: 'rgl.init' failed, will use the null device.
## See '?rgl.useNULL' for ways to avoid this warning.

library(wooldridge)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)
data <- wooldridge::sleep75
head(sleep75)

##   age black case clerical construc educ earns74 gdhlth inlf leis1 leis2 leis3
## 1  32     0    1        0        0   12       0      0    1  3529  3479  3479
## 2  31     0    2        0        0   14    9500      1    1  2140  2140  2140
## 3  44     0    3        0        0   17   42500      1    1  4595  4505  4227
## 4  30     0    4        0        0   12   42500      1    1  3211  3211  3211
## 5  64     0    5        0        0   14    2500      1    1  4052  4007  4007
## 6  41     0    6        0        0   12       0      1    1  4812  4797  4797
##   smsa  lhrwage   lothinc male marr prot rlxall selfe sleep slpnaps south
## 1    0 1.955861 10.075380    1    1    1   3163     0  3113    3163     0
## 2    0 0.357674  0.000000    1    0    1   2920     1  2920    2920     1
## 3    1 3.021887  0.000000    1    1    0   3038     1  2670    2760     0
## 4    0 2.263844  0.000000    0    1    1   3083     1  3083    3083     0
## 5    0 1.011601  9.328213    1    1    1   3493     0  3448    3493     0
## 6    0 2.957511 10.657280    1    1    1   4078     0  4063    4078     0
##   spsepay spwrk75 totwrk union worknrm workscnd exper yngkid yrsmarr    hrwage
## 1       0       0   3438     0    3438        0    14      0      13  7.070004
## 2       0       0   5020     0    5020        0    11      0       0  1.429999
## 3   20000       1   2815     0    2815        0    21      0       0 20.529997
## 4    5000       1   3786     0    3786        0    12      0      12  9.619998
## 5    2400       1   2580     0    2580        0    44      0      33  2.750000
## 6       0       0   1205     0       0     1205    23      0      23 19.249998
##   agesq
## 1  1024
## 2   961
## 3  1936
## 4   900
## 5  4096
## 6  1681

data(sleep75)
model1 <- lm(sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
summary(model1)

## 
## Call:
## lm(formula = sleep ~ totwrk + educ + age + I(age^2) + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2378.00  -243.29     6.74   259.24  1350.19 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3840.83197  235.10870  16.336   <2e-16 ***
## totwrk        -0.16342    0.01813  -9.013   <2e-16 ***
## educ         -11.71332    5.86689  -1.997   0.0463 *  
## age           -8.69668   11.20746  -0.776   0.4380    
## I(age^2)       0.12844    0.13390   0.959   0.3378    
## male          87.75243   34.32616   2.556   0.0108 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 417.7 on 700 degrees of freedom
## Multiple R-squared:  0.1228, Adjusted R-squared:  0.1165 
## F-statistic: 19.59 on 5 and 700 DF,  p-value: < 2.2e-16

2*(1-pt(87.75/34.33,700,FALSE))

## [1] 0.01079613

  model1 <- lm(sleep ~ totwrk + educ + male, data = sleep75)
summary(model1)

## 
## Call:
## lm(formula = sleep ~ totwrk + educ + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2380.27  -239.15     6.74   257.31  1370.63 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3747.51727   81.00609  46.262  < 2e-16 ***
## totwrk        -0.16734    0.01794  -9.329  < 2e-16 ***
## educ         -13.88479    5.65757  -2.454  0.01436 *  
## male          90.96919   34.27441   2.654  0.00813 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 418 on 702 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  0.1155 
## F-statistic: 31.69 on 3 and 702 DF,  p-value: < 2.2e-16

2*(1-pt(2.19/0.53,4131,FALSE))

## [1] 3.666238e-05

2*(1-pt(45.09/4.29,4131,FALSE))

## [1] 0

2*(1-pt(169.81/12.71,4131,FALSE))

## [1] 0

  #### **Exercise 3**
  
  
library(wooldridge)
data1 <- wooldridge::gpa2
head(data1)

##    sat tothrs colgpa athlete verbmath hsize hsrank   hsperc female white black
## 1  920     43   2.04       1  0.48387  0.10      4 40.00000      1     0     0
## 2 1170     18   4.00       0  0.82813  9.40    191 20.31915      0     1     0
## 3  810     14   1.78       1  0.88372  1.19     42 35.29412      0     1     0
## 4  940     40   2.42       0  0.80769  5.71    252 44.13310      0     1     0
## 5 1180     18   2.61       0  0.73529  2.14     86 40.18692      0     1     0
## 6  980    114   3.03       0  0.81481  2.68     41 15.29851      1     1     0
##   hsizesq
## 1  0.0100
## 2 88.3600
## 3  1.4161
## 4 32.6041
## 5  4.5796
## 6  7.1824

data("gpa2")
model2 <- lm(sat ~ hsize + I(hsize^2) + female + black + I(female*black), data = gpa2)
summary(model2)

## 
## Call:
## lm(formula = sat ~ hsize + I(hsize^2) + female + black + I(female * 
##     black), data = gpa2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -570.45  -89.54   -5.24   85.41  479.13 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1028.0972     6.2902 163.445  < 2e-16 ***
## hsize               19.2971     3.8323   5.035 4.97e-07 ***
## I(hsize^2)          -2.1948     0.5272  -4.163 3.20e-05 ***
## female             -45.0915     4.2911 -10.508  < 2e-16 ***
## black             -169.8126    12.7131 -13.357  < 2e-16 ***
## I(female * black)   62.3064    18.1542   3.432 0.000605 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared:  0.08578,    Adjusted R-squared:  0.08468 
## F-statistic: 77.52 on 5 and 4131 DF,  p-value: < 2.2e-16

model4<- lm(sat ~ hsize + hsizesq + female + black + female*black , data = data1)
summary(model4)

## 
## Call:
## lm(formula = sat ~ hsize + hsizesq + female + black + female * 
##     black, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -570.45  -89.54   -5.24   85.41  479.13 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1028.0972     6.2902 163.445  < 2e-16 ***
## hsize          19.2971     3.8323   5.035 4.97e-07 ***
## hsizesq        -2.1948     0.5272  -4.163 3.20e-05 ***
## female        -45.0915     4.2911 -10.508  < 2e-16 ***
## black        -169.8126    12.7131 -13.357  < 2e-16 ***
## female:black   62.3064    18.1542   3.432 0.000605 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared:  0.08578,    Adjusted R-squared:  0.08468 
## F-statistic: 77.52 on 5 and 4131 DF,  p-value: < 2.2e-16

2*(1-pt(2.19/0.53,4131,FALSE))

## [1] 3.666238e-05

t_stat <- 62.31/18.15
df <- 4131-1
p_value <- 2 * (1 - pt(abs(t_stat), df))
p_value

## [1] 0.0006026815

  2*(1-pt(169.81/12.71,4131,FALSE))

## [1] 0

#### **C1**


library(wooldridge)
data1 <- wooldridge::gpa1
head(data1)

##   age soph junior senior senior5 male campus business engineer colGPA hsGPA ACT
## 1  21    0      0      1       0    0      0        1        0    3.0   3.0  21
## 2  21    0      0      1       0    0      0        1        0    3.4   3.2  24
## 3  20    0      1      0       0    0      0        1        0    3.0   3.6  26
## 4  19    1      0      0       0    1      1        1        0    3.5   3.5  27
## 5  20    0      1      0       0    0      0        1        0    3.6   3.9  28
## 6  20    0      0      1       0    1      1        1        0    3.0   3.4  25
##   job19 job20 drive bike walk voluntr PC greek car siblings bgfriend clubs
## 1     0     1     1    0    0       0  0     0   1        1        0     0
## 2     0     1     1    0    0       0  0     0   1        0        1     1
## 3     1     0     0    0    1       0  0     0   1        1        0     1
## 4     1     0     0    0    1       0  0     0   0        1        0     0
## 5     0     1     0    1    0       0  0     0   1        1        1     0
## 6     0     0     0    0    1       0  0     0   1        1        0     0
##   skipped alcohol gradMI fathcoll mothcoll
## 1       2     1.0      1        0        0
## 2       0     1.0      1        1        1
## 3       0     1.0      1        1        1
## 4       0     0.0      0        0        0
## 5       0     1.5      1        1        0
## 6       0     0.0      0        1        0

library(wooldridge)
data1 <- wooldridge::gpa1
head(data1)

##   age soph junior senior senior5 male campus business engineer colGPA hsGPA ACT
## 1  21    0      0      1       0    0      0        1        0    3.0   3.0  21
## 2  21    0      0      1       0    0      0        1        0    3.4   3.2  24
## 3  20    0      1      0       0    0      0        1        0    3.0   3.6  26
## 4  19    1      0      0       0    1      1        1        0    3.5   3.5  27
## 5  20    0      1      0       0    0      0        1        0    3.6   3.9  28
## 6  20    0      0      1       0    1      1        1        0    3.0   3.4  25
##   job19 job20 drive bike walk voluntr PC greek car siblings bgfriend clubs
## 1     0     1     1    0    0       0  0     0   1        1        0     0
## 2     0     1     1    0    0       0  0     0   1        0        1     1
## 3     1     0     0    0    1       0  0     0   1        1        0     1
## 4     1     0     0    0    1       0  0     0   0        1        0     0
## 5     0     1     0    1    0       0  0     0   1        1        1     0
## 6     0     0     0    0    1       0  0     0   1        1        0     0
##   skipped alcohol gradMI fathcoll mothcoll
## 1       2     1.0      1        0        0
## 2       0     1.0      1        1        1
## 3       0     1.0      1        1        1
## 4       0     0.0      0        0        0
## 5       0     1.5      1        1        0
## 6       0     0.0      0        1        0

data_C1 <- gpa1
model_C1 <- lm(colGPA~PC+hsGPA+ACT+mothcoll+fathcoll,data= data_C1)
summary(model_C1)

## 
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll, 
##     data = data_C1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78149 -0.25726 -0.02121  0.24691  0.74432 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.255554   0.335392   3.744 0.000268 ***
## PC           0.151854   0.058716   2.586 0.010762 *  
## hsGPA        0.450220   0.094280   4.775 4.61e-06 ***
## ACT          0.007724   0.010678   0.723 0.470688    
## mothcoll    -0.003758   0.060270  -0.062 0.950376    
## fathcoll     0.041800   0.061270   0.682 0.496265    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3344 on 135 degrees of freedom
## Multiple R-squared:  0.2222, Adjusted R-squared:  0.1934 
## F-statistic: 7.713 on 5 and 135 DF,  p-value: 2.083e-06

library(wooldridge)
data1 <- wooldridge::gpa1

# Perform the regression analysis
model_7C1_ur <- lm(colGPA ~ PC + hsGPA + ACT, data = data1)
summary(model_7C1_ur)

## 
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7901 -0.2622 -0.0107  0.2334  0.7570 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.263520   0.333125   3.793 0.000223 ***
## PC          0.157309   0.057287   2.746 0.006844 ** 
## hsGPA       0.447242   0.093647   4.776 4.54e-06 ***
## ACT         0.008659   0.010534   0.822 0.412513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3325 on 137 degrees of freedom
## Multiple R-squared:  0.2194, Adjusted R-squared:  0.2023 
## F-statistic: 12.83 on 3 and 137 DF,  p-value: 1.932e-07

F= ((0.2222-0.2194)/2)/((1-0.2222)/135)
F

## [1] 0.2429931

critical_value = qf(0.95,2,135, TRUE)
critical_value

## [1] 4.423013

model6<- lm(colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll , data = data1)
summary(model6)

## 
## Call:
## lm(formula = colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll, 
##     data = data1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78149 -0.25726 -0.02121  0.24691  0.74432 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.255554   0.335392   3.744 0.000268 ***
## PC           0.151854   0.058716   2.586 0.010762 *  
## hsGPA        0.450220   0.094280   4.775 4.61e-06 ***
## ACT          0.007724   0.010678   0.723 0.470688    
## mothcoll    -0.003758   0.060270  -0.062 0.950376    
## fathcoll     0.041800   0.061270   0.682 0.496265    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3344 on 135 degrees of freedom
## Multiple R-squared:  0.2222, Adjusted R-squared:  0.1934 
## F-statistic: 7.713 on 5 and 135 DF,  p-value: 2.083e-06

library(car)
hypotheses <- c("mothcoll=0", "fathcoll=0")
joint_test <- linearHypothesis(model6, hypotheses)
joint_test

## 
## Linear hypothesis test:
## mothcoll = 0
## fathcoll = 0
## 
## Model 1: restricted model
## Model 2: colGPA ~ PC + hsGPA + ACT + mothcoll + fathcoll
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    137 15.149                           
## 2    135 15.094  2  0.054685 0.2446 0.7834

summary(joint_test)

##      Res.Df           RSS              Df      Sum of Sq             F         
##  Min.   :135.0   Min.   :15.09   Min.   :2   Min.   :0.05469   Min.   :0.2446  
##  1st Qu.:135.5   1st Qu.:15.11   1st Qu.:2   1st Qu.:0.05469   1st Qu.:0.2446  
##  Median :136.0   Median :15.12   Median :2   Median :0.05469   Median :0.2446  
##  Mean   :136.0   Mean   :15.12   Mean   :2   Mean   :0.05469   Mean   :0.2446  
##  3rd Qu.:136.5   3rd Qu.:15.14   3rd Qu.:2   3rd Qu.:0.05469   3rd Qu.:0.2446  
##  Max.   :137.0   Max.   :15.15   Max.   :2   Max.   :0.05469   Max.   :0.2446  
##                                  NA's   :1   NA's   :1         NA's   :1       
##      Pr(>F)      
##  Min.   :0.7834  
##  1st Qu.:0.7834  
##  Median :0.7834  
##  Mean   :0.7834  
##  3rd Qu.:0.7834  
##  Max.   :0.7834  
##  NA's   :1

model3 <- lm (colGPA ~ PC + hsGPA + I(hsGPA^2) + ACT + mothcoll + fathcoll, data = gpa1)
summary (model3)

## 
## Call:
## lm(formula = colGPA ~ PC + hsGPA + I(hsGPA^2) + ACT + mothcoll + 
##     fathcoll, data = gpa1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78998 -0.24327 -0.00648  0.26179  0.72231 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  5.040328   2.443038   2.063   0.0410 *
## PC           0.140446   0.058858   2.386   0.0184 *
## hsGPA       -1.802520   1.443552  -1.249   0.2140  
## I(hsGPA^2)   0.337341   0.215711   1.564   0.1202  
## ACT          0.004786   0.010786   0.444   0.6580  
## mothcoll     0.003091   0.060110   0.051   0.9591  
## fathcoll     0.062761   0.062401   1.006   0.3163  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3326 on 134 degrees of freedom
## Multiple R-squared:  0.2361, Adjusted R-squared:  0.2019 
## F-statistic: 6.904 on 6 and 134 DF,  p-value: 2.088e-06

  #### **C2**
  
  
data3 <- wooldridge::wage2
head(data3)

##   wage hours  IQ KWW educ exper tenure age married black south urban sibs
## 1  769    40  93  35   12    11      2  31       1     0     0     1    1
## 2  808    50 119  41   18    11     16  37       1     0     0     1    1
## 3  825    40 108  46   14    11      9  33       1     0     0     1    1
## 4  650    40  96  32   12    13      7  32       1     0     0     1    4
## 5  562    40  74  27   11    14      5  34       1     0     0     1   10
## 6 1400    40 116  43   16    14      2  35       1     1     0     1    1
##   brthord meduc feduc    lwage
## 1       2     8     8 6.645091
## 2      NA    14    14 6.694562
## 3       2    14    14 6.715384
## 4       3    12    12 6.476973
## 5       6     6    11 6.331502
## 6       2     8    NA 7.244227

data("wage2")
model4 <- lm(log(wage) ~ educ +exper +tenure + married + black + south + urban, data = wage2)
summary(model4)

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16

model4 <- lm(log(wage) ~ educ +exper + I(exper^2) + tenure + I(tenure^2)
             + married + black + south + urban, data = wage2)
summary(model4)

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + I(exper^2) + tenure + 
##     I(tenure^2) + married + black + south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98236 -0.21972 -0.00036  0.24078  1.25127 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.3586756  0.1259143  42.558  < 2e-16 ***
## educ         0.0642761  0.0063115  10.184  < 2e-16 ***
## exper        0.0172146  0.0126138   1.365 0.172665    
## I(exper^2)  -0.0001138  0.0005319  -0.214 0.830622    
## tenure       0.0249291  0.0081297   3.066 0.002229 ** 
## I(tenure^2) -0.0007964  0.0004710  -1.691 0.091188 .  
## married      0.1985470  0.0391103   5.077 4.65e-07 ***
## black       -0.1906636  0.0377011  -5.057 5.13e-07 ***
## south       -0.0912153  0.0262356  -3.477 0.000531 ***
## urban        0.1854241  0.0269585   6.878 1.12e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3653 on 925 degrees of freedom
## Multiple R-squared:  0.255,  Adjusted R-squared:  0.2477 
## F-statistic: 35.17 on 9 and 925 DF,  p-value: < 2.2e-16

linearHypothesis(model4, c("I(exper^2)=0", "I(tenure^2)=0" ))

## 
## Linear hypothesis test:
## I(exper^2) = 0
## I(tenure^2) = 0
## 
## Model 1: restricted model
## Model 2: log(wage) ~ educ + exper + I(exper^2) + tenure + I(tenure^2) + 
##     married + black + south + urban
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    927 123.82                           
## 2    925 123.42  2   0.39756 1.4898  0.226

F= ((0.255-0.2526)/2)/((1-0.255)/925)
F

## [1] 1.489933

1-pf(F,2,925)

## [1] 0.2259282

model4 <- lm(log(wage) ~ educ +exper +tenure + married
             + black + south + urban + I(black*educ), data = wage2)
summary(model4)

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + I(black * educ), data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.374817   0.114703  46.859  < 2e-16 ***
## educ             0.067115   0.006428  10.442  < 2e-16 ***
## exper            0.013826   0.003191   4.333 1.63e-05 ***
## tenure           0.011787   0.002453   4.805 1.80e-06 ***
## married          0.198908   0.039047   5.094 4.25e-07 ***
## black            0.094809   0.255399   0.371 0.710561    
## south           -0.089450   0.026277  -3.404 0.000692 ***
## urban            0.183852   0.026955   6.821 1.63e-11 ***
## I(black * educ) -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16

model4 <- lm(log(wage)~educ+exper+tenure+married+black+south+urban+educ*black,data= wage2)
summary(model4)

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + educ * black, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## black        0.094809   0.255399   0.371 0.710561    
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ:black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16

model_7C2_4 <- lm(log(wage)~educ+exper+tenure+married+black+south+urban+married*black,data= wage2)
summary(model4)

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + educ * black, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## black        0.094809   0.255399   0.371 0.710561    
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ:black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16

  ## *Chapter 8*
  
  #### **Exercise 1**
  
  
  #### **Exercise 5**
  
  
data4 <- wooldridge::smoke
head(data4)

##   educ cigpric white age income cigs restaurn   lincome agesq lcigpric
## 1 16.0  60.506     1  46  20000    0        0  9.903487  2116 4.102743
## 2 16.0  57.883     1  40  30000    0        0 10.308952  1600 4.058424
## 3 12.0  57.664     1  58  30000    3        0 10.308952  3364 4.054633
## 4 13.5  57.883     1  30  20000    0        0  9.903487   900 4.058424
## 5 10.0  58.320     1  17  20000    0        0  9.903487   289 4.065945
## 6  6.0  59.340     1  86   6500    0        0  8.779557  7396 4.083283

model11 <- lm(cigs ~ log(cigpric) + log(income) + educ + age + agesq + restaurn + white , data = data4)
summary(model11)

## 
## Call:
## lm(formula = cigs ~ log(cigpric) + log(income) + educ + age + 
##     agesq + restaurn + white, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.772  -9.330  -5.907   7.945  70.275 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.682419  24.220729  -0.111  0.91184    
## log(cigpric) -0.850907   5.782321  -0.147  0.88305    
## log(income)   0.869014   0.728763   1.192  0.23344    
## educ         -0.501753   0.167168  -3.001  0.00277 ** 
## age           0.774502   0.160516   4.825 1.68e-06 ***
## agesq        -0.009069   0.001748  -5.188 2.70e-07 ***
## restaurn     -2.865621   1.117406  -2.565  0.01051 *  
## white        -0.559236   1.459461  -0.383  0.70169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.41 on 799 degrees of freedom
## Multiple R-squared:  0.05291,    Adjusted R-squared:  0.04461 
## F-statistic: 6.377 on 7 and 799 DF,  p-value: 2.588e-07

s_error <- coef(summary(model11))
s_error

##                  Estimate   Std. Error    t value     Pr(>|t|)
## (Intercept)  -2.682418774 24.220728831 -0.1107489 9.118433e-01
## log(cigpric) -0.850907441  5.782321084 -0.1471567 8.830454e-01
## log(income)   0.869013971  0.728763480  1.1924499 2.334389e-01
## educ         -0.501753247  0.167167689 -3.0014966 2.770186e-03
## age           0.774502156  0.160515805  4.8250835 1.676279e-06
## agesq        -0.009068603  0.001748055 -5.1878253 2.699355e-07
## restaurn     -2.865621212  1.117405936 -2.5645301 1.051320e-02
## white        -0.559236403  1.459461034 -0.3831801 7.016882e-01

age <- -coef(model11)["age"] / (2 * coef(model11)["I(age^2)"])
age

## age 
##  NA

coef_res <- coef(model11)["restaurn"]
coef_res

##  restaurn 
## -2.865621

library(lmtest)
data_update <- data.frame(cigpric = 67.44, income = 6500, educ = 16, age = 77, restaurn = 0, white = 0)
het_test <- bptest(model11)
het_test

## 
##  studentized Breusch-Pagan test
## 
## data:  model11
## BP = 32.377, df = 7, p-value = 3.458e-05

#### **C4**


data5 <- wooldridge::vote1
head(data5)

##   state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1    AL        7      1    68 328.296   8.737       41 5.793916 2.167567
## 2    AK        1      0    62 626.377 402.477       60 6.439952 5.997638
## 3    AZ        2      1    73  99.607   3.065       55 4.601233 1.120048
## 4    AZ        3      0    69 319.690  26.281       64 5.767352 3.268846
## 5    AR        3      0    75 159.221  60.054       66 5.070293 4.095244
## 6    AR        4      1    69 570.155  21.393       46 6.345908 3.063064
##     shareA
## 1 97.40767
## 2 60.88104
## 3 97.01476
## 4 92.40370
## 5 72.61247
## 6 96.38355

data ("vote1")
model5 <- lm (voteA ~ prtystrA + democA + lexpendA + lexpendB, data = vote1)
summary(model5)

## 
## Call:
## lm(formula = voteA ~ prtystrA + democA + lexpendA + lexpendB, 
##     data = vote1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.66142    4.73604   7.952 2.56e-13 ***
## prtystrA     0.25192    0.07129   3.534  0.00053 ***
## democA       3.79294    1.40652   2.697  0.00772 ** 
## lexpendA     5.77929    0.39182  14.750  < 2e-16 ***
## lexpendB    -6.23784    0.39746 -15.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  0.8012, Adjusted R-squared:  0.7964 
## F-statistic: 169.2 on 4 and 168 DF,  p-value: < 2.2e-16

data5 <- data5 %>% mutate(residual_squared=(model5$residuals)^2)
head(data5)

##   state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1    AL        7      1    68 328.296   8.737       41 5.793916 2.167567
## 2    AK        1      0    62 626.377 402.477       60 6.439952 5.997638
## 3    AZ        2      1    73  99.607   3.065       55 4.601233 1.120048
## 4    AZ        3      0    69 319.690  26.281       64 5.767352 3.268846
## 5    AR        3      0    75 159.221  60.054       66 5.070293 4.095244
## 6    AR        4      1    69 570.155  21.393       46 6.345908 3.063064
##     shareA residual_squared
## 1 97.40767        14.038468
## 2 60.88104        88.688062
## 3 97.01476         3.667331
## 4 92.40370         5.176383
## 5 72.61247       287.464399
## 6 96.38355         2.593858

model5_r<- lm(residual_squared~prtystrA+democA+log(expendA)+log(expendB),data= data5)
summary(model5_r)

## 
## Call:
## lm(formula = residual_squared ~ prtystrA + democA + log(expendA) + 
##     log(expendB), data = data5)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -94.88 -46.16 -23.51  15.84 508.32 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  113.9635    50.8150   2.243   0.0262 *
## prtystrA      -0.2993     0.7649  -0.391   0.6961  
## democA        15.6192    15.0912   1.035   0.3022  
## log(expendA) -10.3057     4.2040  -2.451   0.0153 *
## log(expendB)  -0.0514     4.2645  -0.012   0.9904  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81.25 on 168 degrees of freedom
## Multiple R-squared:  0.05256,    Adjusted R-squared:   0.03 
## F-statistic:  2.33 on 4 and 168 DF,  p-value: 0.05806

bptest(model5)

## 
##  studentized Breusch-Pagan test
## 
## data:  model5
## BP = 9.0934, df = 4, p-value = 0.05881

bptest (model5, ~ prtystrA*democA*lexpendA*lexpendB + I(prtystrA^2) + I(democA^2) + I(lexpendA^2) + I(lexpendB^2), data = vote1)

## 
##  studentized Breusch-Pagan test
## 
## data:  model5
## BP = 43.327, df = 18, p-value = 0.0007194

#### **C13**


library(sandwich)
library(lmtest)
data6 <- wooldridge::fertil2
head(data6)

##   mnthborn yearborn age electric radio tv bicycle educ ceb agefbrth children
## 1        5       64  24        1     1  1       1   12   0       NA        0
## 2        1       56  32        1     1  1       1   13   3       25        3
## 3        7       58  30        1     0  0       0    5   1       27        1
## 4       11       45  42        1     0  1       0    4   3       17        2
## 5        5       45  43        1     1  1       1   11   2       24        2
## 6        8       52  36        1     0  0       0    7   1       26        1
##   knowmeth usemeth monthfm yearfm agefm idlnchld heduc agesq urban urb_educ
## 1        1       0      NA     NA    NA        2    NA   576     1       12
## 2        1       1      11     80    24        3    12  1024     1       13
## 3        1       0       6     83    24        5     7   900     1        5
## 4        1       0       1     61    15        3    11  1764     1        4
## 5        1       1       3     66    20        2    14  1849     1       11
## 6        1       1      11     76    24        4     9  1296     1        7
##   spirit protest catholic frsthalf educ0 evermarr
## 1      0       0        0        1     0        0
## 2      0       0        0        1     0        1
## 3      1       0        0        0     0        1
## 4      0       0        0        0     0        1
## 5      0       1        0        1     0        1
## 6      0       0        0        0     0        1

model <- lm(children ~ age + I(age^2) + educ + electric + urban, data = fertil2)
robust_se <- sqrt(diag(vcovHC(model)))
summary_with_robust_se <- cbind(coef(model), "Robust SE" = robust_se)
summary(model)

## 
## Call:
## lm(formula = children ~ age + I(age^2) + educ + electric + urban, 
##     data = fertil2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9012 -0.7136 -0.0039  0.7119  7.4318 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.2225162  0.2401888 -17.580  < 2e-16 ***
## age          0.3409255  0.0165082  20.652  < 2e-16 ***
## I(age^2)    -0.0027412  0.0002718 -10.086  < 2e-16 ***
## educ        -0.0752323  0.0062966 -11.948  < 2e-16 ***
## electric    -0.3100404  0.0690045  -4.493 7.20e-06 ***
## urban       -0.2000339  0.0465062  -4.301 1.74e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.452 on 4352 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.5734, Adjusted R-squared:  0.5729 
## F-statistic:  1170 on 5 and 4352 DF,  p-value: < 2.2e-16

print(summary_with_robust_se)

##                             Robust SE
## (Intercept) -4.222516228 0.2443961935
## age          0.340925520 0.0192199445
## I(age^2)    -0.002741209 0.0003513959
## educ        -0.075232323 0.0063159137
## electric    -0.310040409 0.0640737262
## urban       -0.200033857 0.0455162364

coeftest(model6, vcov=vcovHC(model6, type = "HC0"))

## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  1.2555544  0.3368805  3.7270  0.000284 ***
## PC           0.1518539  0.0596641  2.5451  0.012048 *  
## hsGPA        0.4502203  0.0962508  4.6776 6.962e-06 ***
## ACT          0.0077242  0.0103074  0.7494  0.454932    
## mothcoll    -0.0037579  0.0601271 -0.0625  0.950258    
## fathcoll     0.0417999  0.0578749  0.7222  0.471392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

F= ((0.574-0.5734)/3)/((1-0.574)/4349)
F

## [1] 2.041784

critical_value = qf(0.95,3,4349, TRUE)
critical_value

## [1] 3.403585

p_value = 1- pf(F,3,4349)
p_value

## [1] 0.1058349

a <- coeftest(model, vcov = sandwich)
a

## 
## t test of coefficients:
## 
##                Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept) -4.22251623  0.24368307 -17.3279 < 2.2e-16 ***
## age          0.34092552  0.01916146  17.7923 < 2.2e-16 ***
## I(age^2)    -0.00274121  0.00035027  -7.8260 6.278e-15 ***
## educ        -0.07523232  0.00630336 -11.9353 < 2.2e-16 ***
## electric    -0.31004041  0.06390411  -4.8517 1.267e-06 ***
## urban       -0.20003386  0.04543962  -4.4022 1.097e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

bptest(model)

## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 1065, df = 5, p-value < 2.2e-16

usqr=(summary(model6)$residual)^2
yhat=model6$fitted.values
yhatsqr=model6$fitted.values^2
model6WT=summary(lm(usqr~yhat+yhatsqr,data=fertil2))$r.squared
model6WT*4361

## [1] 143.6236

1-pchisq(1093.183,2)

## [1] 0

joint_test <- coeftest(model6, vcov = vcovHC)
print(joint_test[, "Pr(>|t|)"])

##  (Intercept)           PC        hsGPA          ACT     mothcoll     fathcoll 
## 5.903211e-04 1.682044e-02 1.977193e-05 4.846838e-01 9.528773e-01 4.948069e-01

p_value = 1- pf(F,3,4349)
p_value

## [1] 0.1058349

head(summary_with_robust_se)

##                             Robust SE
## (Intercept) -4.222516228 0.2443961935
## age          0.340925520 0.0192199445
## I(age^2)    -0.002741209 0.0003513959
## educ        -0.075232323 0.0063159137
## electric    -0.310040409 0.0640737262
## urban       -0.200033857 0.0455162364

fv <- fitted(model6)
head(fv,6)

##        1        2        3        4        5        6 
## 2.768423 2.919682 3.115218 3.039878 3.269490 3.021208

residuals <- resid(model6)
head(residuals,6)

##           1           2           3           4           5           6 
##  0.23157702  0.48031850 -0.11521801  0.46012183  0.33050949 -0.02120773

hetero_test <- lm(residuals^2 ~ fv)
summary(hetero_test)

## 
## Call:
## lm(formula = residuals^2 ~ fv)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.13813 -0.07873 -0.04322  0.04132  0.51313 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.27680    0.18877  -1.466   0.1448  
## fv           0.12558    0.06165   2.037   0.0436 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.128 on 139 degrees of freedom
## Multiple R-squared:  0.02898,    Adjusted R-squared:  0.022 
## F-statistic: 4.149 on 1 and 139 DF,  p-value: 0.04357

## *Chapter 9*

#### **Exercise 1**


data7 <- wooldridge::ceosal2
head(data7)

##   salary age college grad comten ceoten sales profits mktval  lsalary   lsales
## 1   1161  49       1    1      9      2  6200     966  23200 7.057037 8.732305
## 2    600  43       1    1     10     10   283      48   1100 6.396930 5.645447
## 3    379  51       1    1      9      3   169      40   1100 5.937536 5.129899
## 4    651  55       1    0     22     22  1100     -54   1000 6.478509 7.003066
## 5    497  44       1    1      8      6   351      28    387 6.208590 5.860786
## 6   1067  64       1    1      7      7 19000     614   3900 6.972606 9.852194
##     lmktval comtensq ceotensq  profmarg
## 1 10.051908       81        4 15.580646
## 2  7.003066      100      100 16.961130
## 3  7.003066       81        9 23.668638
## 4  6.907755      484      484 -4.909091
## 5  5.958425       64       36  7.977208
## 6  8.268732       49       49  3.231579

model11 <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceoten + comten, data = data7)
summary(model11)

## 
## Call:
## lm(formula = log(salary) ~ log(sales) + log(mktval) + profmarg + 
##     ceoten + comten, data = data7)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5436 -0.2796 -0.0164  0.2857  1.9879 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.571977   0.253466  18.038  < 2e-16 ***
## log(sales)   0.187787   0.040003   4.694 5.46e-06 ***
## log(mktval)  0.099872   0.049214   2.029  0.04397 *  
## profmarg    -0.002211   0.002105  -1.050  0.29514    
## ceoten       0.017104   0.005540   3.087  0.00236 ** 
## comten      -0.009238   0.003337  -2.768  0.00626 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4947 on 171 degrees of freedom
## Multiple R-squared:  0.3525, Adjusted R-squared:  0.3336 
## F-statistic: 18.62 on 5 and 171 DF,  p-value: 9.488e-15

r_squared <- summary(model11)$r.squared
r_squared

## [1] 0.3525374

model12 <- lm(log(salary) ~ log(sales) + log(mktval) + profmarg + ceotensq + comtensq, data = data7)
summary(model12)

## 
## Call:
## lm(formula = log(salary) ~ log(sales) + log(mktval) + profmarg + 
##     ceotensq + comtensq, data = data7)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47481 -0.25933 -0.00511  0.27010  2.07583 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.612e+00  2.524e-01  18.276  < 2e-16 ***
## log(sales)   1.805e-01  4.021e-02   4.489 1.31e-05 ***
## log(mktval)  1.018e-01  4.988e-02   2.040   0.0429 *  
## profmarg    -2.077e-03  2.135e-03  -0.973   0.3321    
## ceotensq     3.761e-04  1.916e-04   1.963   0.0512 .  
## comtensq    -1.788e-04  7.236e-05  -2.471   0.0144 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5024 on 171 degrees of freedom
## Multiple R-squared:  0.3324, Adjusted R-squared:  0.3129 
## F-statistic: 17.03 on 5 and 171 DF,  p-value: 1.195e-13

r_squared <- summary(model12)$r.squared
r_squared

## [1] 0.3323998

#### **Exercise 5**


data9 <- wooldridge::campus
head(data9)

##   enroll priv police crime   lcrime  lenroll  lpolice
## 1  21836    0     24   446 6.100319 9.991315 3.178054
## 2   6485    0     13     1 0.000000 8.777247 2.564949
## 3   2123    0      3     1 0.000000 7.660585 1.098612
## 4   8240    0     17   121 4.795791 9.016756 2.833213
## 5  19793    0     30   470 6.152733 9.893084 3.401197
## 6   3256    1      9    25 3.218876 8.088255 2.197225

b0 <- -6.63
se_b0 <- 1.03
b1 <- 1.27
se_b1 <- 0.11
n <- nrow(data9)
t_stat <- (b1 - 1) / se_b1
df <- n - 2
critical_value <- qt(0.95, df)
if (t_stat > critical_value) {
cat("Reject the null hypothesis H0: B1 = 1 in favor of H1: B1 > 1 at the 5% level.\n")
} else {
cat("Fail to reject the null hypothesis H0: B1 = 1 at the 5% level.\n")
}

## Reject the null hypothesis H0: B1 = 1 in favor of H1: B1 > 1 at the 5% level.

#### **C4**


data ("infmrt")
model9 <- lm(infmort ~ lpcinc + lphysic + lpopul + DC, data = infmrt )
summary(model9)

## 
## Call:
## lm(formula = infmort ~ lpcinc + lphysic + lpopul + DC, data = infmrt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6894 -0.8973 -0.1412  0.7054  3.1705 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  36.2125     6.7267   5.383 5.09e-07 ***
## lpcinc       -2.3964     0.8866  -2.703  0.00812 ** 
## lphysic      -1.5548     0.7734  -2.010  0.04718 *  
## lpopul        0.5755     0.1365   4.215 5.60e-05 ***
## DC           13.9632     1.2466  11.201  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.255 on 97 degrees of freedom
## Multiple R-squared:  0.6432, Adjusted R-squared:  0.6285 
## F-statistic: 43.72 on 4 and 97 DF,  p-value: < 2.2e-16

model9 <- lm(infmort~log(pcinc)+log(physic)+log(popul), data=infmrt)
summary(model9)

## 
## Call:
## lm(formula = infmort ~ log(pcinc) + log(physic) + log(popul), 
##     data = infmrt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0204 -1.1897 -0.1152  0.9083  8.1890 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 36.22614   10.13466   3.574 0.000547 ***
## log(pcinc)  -4.88415    1.29319  -3.777 0.000273 ***
## log(physic)  4.02783    0.89101   4.521 1.73e-05 ***
## log(popul)  -0.05356    0.18748  -0.286 0.775712    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.891 on 98 degrees of freedom
## Multiple R-squared:  0.1818, Adjusted R-squared:  0.1568 
## F-statistic:  7.26 on 3 and 98 DF,  p-value: 0.0001898

#### **C5**


library(wooldridge)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ car::recode()   masks dplyr::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(quantreg)

## Loading required package: SparseM

data <- as.data.frame(rdchem)
data$sales_billion <- data$sales / 1e3

ols_full <- lm(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data)
summary(ols_full)

## 
## Call:
## lm(formula = rdintens ~ sales_billion + I(sales_billion^2) + 
##     profmarg, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0371 -1.1238 -0.4547  0.7165  5.8522 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         2.058967   0.626269   3.288  0.00272 **
## sales_billion       0.316611   0.138854   2.280  0.03041 * 
## I(sales_billion^2) -0.007390   0.003716  -1.989  0.05657 . 
## profmarg            0.053322   0.044210   1.206  0.23787   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.774 on 28 degrees of freedom
## Multiple R-squared:  0.1905, Adjusted R-squared:  0.1037 
## F-statistic: 2.196 on 3 and 28 DF,  p-value: 0.1107

data_filtered <- data %>% filter(sales_billion < 40)
ols_filtered <- lm(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data_filtered)
summary(ols_filtered)

## 
## Call:
## lm(formula = rdintens ~ sales_billion + I(sales_billion^2) + 
##     profmarg, data = data_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0371 -1.1238 -0.4547  0.7165  5.8522 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         2.058967   0.626269   3.288  0.00272 **
## sales_billion       0.316611   0.138854   2.280  0.03041 * 
## I(sales_billion^2) -0.007390   0.003716  -1.989  0.05657 . 
## profmarg            0.053322   0.044210   1.206  0.23787   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.774 on 28 degrees of freedom
## Multiple R-squared:  0.1905, Adjusted R-squared:  0.1037 
## F-statistic: 2.196 on 3 and 28 DF,  p-value: 0.1107

lad_full <- rq(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data, tau = 0.5)
summary(lad_full)

## 
## Call: rq(formula = rdintens ~ sales_billion + I(sales_billion^2) + 
##     profmarg, tau = 0.5, data = data)
## 
## tau: [1] 0.5
## 
## Coefficients:
##                    coefficients lower bd upper bd
## (Intercept)         1.40428      0.87031  2.66628
## sales_billion       0.26346     -0.13508  0.75753
## I(sales_billion^2) -0.00600     -0.01679  0.00344
## profmarg            0.11400      0.01376  0.16427

lad_filtered <- rq(rdintens ~ sales_billion + I(sales_billion^2) + profmarg, data = data_filtered, tau = 0.5)
summary(lad_filtered)

## 
## Call: rq(formula = rdintens ~ sales_billion + I(sales_billion^2) + 
##     profmarg, tau = 0.5, data = data_filtered)
## 
## tau: [1] 0.5
## 
## Coefficients:
##                    coefficients lower bd upper bd
## (Intercept)         1.40428      0.87031  2.66628
## sales_billion       0.26346     -0.13508  0.75753
## I(sales_billion^2) -0.00600     -0.01679  0.00344
## profmarg            0.11400      0.01376  0.16427

comparison <- data.frame(
Method = c("OLS (Full)", "OLS (Filtered)", "LAD (Full)", "LAD (Filtered)"),
Intercept = c(coef(ols_full)[1], coef(ols_filtered)[1], coef(lad_full)[1], coef(lad_filtered)[1]),
Sales = c(coef(ols_full)[2], coef(ols_filtered)[2], coef(lad_full)[2], coef(lad_filtered)[2]),
Sales_Squared = c(coef(ols_full)[3], coef(ols_filtered)[3], coef(lad_full)[3], coef(lad_filtered)[3]),
Profmarg = c(coef(ols_full)[4], coef(ols_filtered)[4], coef(lad_full)[4], coef(lad_filtered)[4])
)
print(comparison)

##           Method Intercept     Sales Sales_Squared   Profmarg
## 1     OLS (Full)  2.058967 0.3166110  -0.007389624 0.05332217
## 2 OLS (Filtered)  2.058967 0.3166110  -0.007389624 0.05332217
## 3     LAD (Full)  1.404279 0.2634638  -0.006001127 0.11400366
## 4 LAD (Filtered)  1.404279 0.2634638  -0.006001127 0.11400366

## *Chapter 10*

#### **Exercise 1**


#### **Exercise 5**




#### **C1**


data_10C1 <- intdef %>% mutate(later1979= ifelse(year>1979,1,0))
head(data_10C1,5)

##   year   i3  inf  rec  out        def i3_1 inf_1      def_1        ci3 cinf
## 1 1948 1.04  8.1 16.2 11.6 -4.6000004   NA    NA         NA         NA   NA
## 2 1949 1.10 -1.2 14.5 14.3 -0.1999998 1.04   8.1 -4.6000004 0.06000006 -9.3
## 3 1950 1.22  1.3 14.4 15.6  1.2000008 1.10  -1.2 -0.1999998 0.12000000  2.5
## 4 1951 1.55  7.9 16.1 14.2 -1.9000006 1.22   1.3  1.2000008 0.32999992  6.6
## 5 1952 1.77  1.9 19.0 19.4  0.3999996 1.55   7.9 -1.9000006 0.22000003 -6.0
##        cdef y77 later1979
## 1        NA   0         0
## 2  4.400001   0         0
## 3  1.400001   0         0
## 4 -3.100001   0         0
## 5  2.300000   0         0

model_10C1 <- lm(i3~inf+def+later1979, data=data_10C1)
summary(model_10C1)

## 
## Call:
## lm(formula = i3 ~ inf + def + later1979, data = data_10C1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4674 -0.8407  0.2388  1.0148  3.9654 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.29623    0.42535   3.047  0.00362 ** 
## inf          0.60842    0.07625   7.979 1.37e-10 ***
## def          0.36266    0.12025   3.016  0.00396 ** 
## later1979    1.55877    0.50577   3.082  0.00329 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.711 on 52 degrees of freedom
## Multiple R-squared:  0.6635, Adjusted R-squared:  0.6441 
## F-statistic: 34.18 on 3 and 52 DF,  p-value: 2.408e-12

data_10C1 <- as.integer(data_10C1$year > 1979)
data13 <- wooldridge::intdef
head(data13)

##   year   i3  inf  rec  out        def i3_1 inf_1      def_1        ci3 cinf
## 1 1948 1.04  8.1 16.2 11.6 -4.6000004   NA    NA         NA         NA   NA
## 2 1949 1.10 -1.2 14.5 14.3 -0.1999998 1.04   8.1 -4.6000004 0.06000006 -9.3
## 3 1950 1.22  1.3 14.4 15.6  1.2000008 1.10  -1.2 -0.1999998 0.12000000  2.5
## 4 1951 1.55  7.9 16.1 14.2 -1.9000006 1.22   1.3  1.2000008 0.32999992  6.6
## 5 1952 1.77  1.9 19.0 19.4  0.3999996 1.55   7.9 -1.9000006 0.22000003 -6.0
## 6 1953 1.93  0.8 18.7 20.4  1.6999989 1.77   1.9  0.3999996 0.15999997 -1.1
##        cdef y77
## 1        NA   0
## 2  4.400001   0
## 3  1.400001   0
## 4 -3.100001   0
## 5  2.300000   0
## 6  1.299999   0

data13$dummy <- as.integer(data13$year > 1979)
data13$dummy

##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
## [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

model15 <- lm(i3 ~ inf + def , data = data13)
summary(model15)

## 
## Call:
## lm(formula = i3 ~ inf + def, data = data13)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9948 -1.1694  0.1959  0.9602  4.7224 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.73327    0.43197   4.012  0.00019 ***
## inf          0.60587    0.08213   7.376 1.12e-09 ***
## def          0.51306    0.11838   4.334 6.57e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.843 on 53 degrees of freedom
## Multiple R-squared:  0.6021, Adjusted R-squared:  0.5871 
## F-statistic: 40.09 on 2 and 53 DF,  p-value: 2.483e-11

model16 <- lm(i3 ~ inf + def + y77 , data = data13)
summary(model16)

## 
## Call:
## lm(formula = i3 ~ inf + def + y77, data = data13)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4048 -0.9632  0.2192  0.8497  4.3447 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.40531    0.42239   3.327  0.00162 ** 
## inf          0.56884    0.07832   7.263 1.88e-09 ***
## def          0.36276    0.12337   2.940  0.00488 ** 
## y77          1.47773    0.52349   2.823  0.00673 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.733 on 52 degrees of freedom
## Multiple R-squared:  0.6549, Adjusted R-squared:  0.635 
## F-statistic:  32.9 on 3 and 52 DF,  p-value: 4.608e-12

#### **C9**


# Load necessary library
library(wooldridge)

# Load the dataset 'volat'
data("volat")

# Display the structure of the dataset to verify variable names
str(volat)

## 'data.frame':    558 obs. of  17 variables:
##  $ date  : num  1947 1947 1947 1947 1947 ...
##  $ sp500 : num  15.2 15.8 15.2 14.6 14.3 ...
##  $ divyld: num  4.49 4.38 4.61 4.75 5.05 ...
##  $ i3    : num  0.38 0.38 0.38 0.38 0.38 ...
##  $ ip    : num  22.4 22.5 22.6 22.5 22.6 ...
##  $ pcsp  : num  NA 46.5 -48.6 -44.3 -21.4 ...
##  $ rsp500: num  NA 50.9 -44 -39.6 -16.3 ...
##  $ pcip  : num  NA 5.36 5.33 -5.31 5.33 ...
##  $ ci3   : num  NA 0 0 0 0 ...
##  $ ci3_1 : num  NA NA 0 0 0 ...
##  $ ci3_2 : num  NA NA NA 0 0 ...
##  $ pcip_1: num  NA NA 5.36 5.33 -5.31 ...
##  $ pcip_2: num  NA NA NA 5.36 5.33 ...
##  $ pcip_3: num  NA NA NA NA 5.36 ...
##  $ pcsp_1: num  NA NA 46.5 -48.6 -44.3 ...
##  $ pcsp_2: num  NA NA NA 46.5 -48.6 ...
##  $ pcsp_3: num  NA NA NA NA 46.5 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"

# Check the first few rows to confirm the data
head(volat)

##      date sp500 divyld   i3   ip      pcsp    rsp500      pcip ci3 ci3_1 ci3_2
## 1 1947.01 15.21   4.49 0.38 22.4        NA        NA        NA  NA    NA    NA
## 2 1947.02 15.80   4.38 0.38 22.5  46.54833  50.92833  5.357163   0    NA    NA
## 3 1947.03 15.16   4.61 0.38 22.6 -48.60762 -43.99762  5.333354   0     0    NA
## 4 1947.04 14.60   4.75 0.38 22.5 -44.32714 -39.57714 -5.309754   0     0     0
## 5 1947.05 14.34   5.05 0.38 22.6 -21.36988 -16.31988  5.333354   0     0     0
## 6 1947.06 14.84   5.03 0.38 22.6  41.84100  46.87100  0.000000   0     0     0
##      pcip_1    pcip_2   pcip_3    pcsp_1    pcsp_2    pcsp_3
## 1        NA        NA       NA        NA        NA        NA
## 2        NA        NA       NA        NA        NA        NA
## 3  5.357163        NA       NA  46.54833        NA        NA
## 4  5.333354  5.357163       NA -48.60762  46.54833        NA
## 5 -5.309754  5.333354 5.357163 -44.32714 -48.60762  46.54833
## 6  5.333354 -5.309754 5.333354 -21.36988 -44.32714 -48.60762

# Estimate the equation rsp500_t = β0 + β1 * pcip_t + β2 * i3_t + u_t using OLS
model <- lm(rsp500 ~ pcip + i3, data = volat)

# Summarize the results of the model
summary(model)

## 
## Call:
## lm(formula = rsp500 ~ pcip + i3, data = volat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.871  -22.580    2.103   25.524  138.137 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.84306    3.27488   5.754 1.44e-08 ***
## pcip         0.03642    0.12940   0.281   0.7785    
## i3          -1.36169    0.54072  -2.518   0.0121 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.13 on 554 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01189,    Adjusted R-squared:  0.008325 
## F-statistic: 3.334 on 2 and 554 DF,  p-value: 0.03637

## *Chapter 11*

#### **C7**


# Load the wooldridge package
library(wooldridge)

# Load the CONSUMP dataset
data("consump", package = "wooldridge")

# View the structure of the dataset
str(consump)

## 'data.frame':    37 obs. of  24 variables:
##  $ year   : int  1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 ...
##  $ i3     : num  3.41 2.93 2.38 2.78 3.16 ...
##  $ inf    : num  0.7 1.7 1 1 1.3 ...
##  $ rdisp  : num  1530 1565 1616 1694 1756 ...
##  $ rnondc : num  606 615 627 646 660 ...
##  $ rserv  : num  687 717 746 783 819 ...
##  $ pop    : num  177830 180671 183691 186538 189242 ...
##  $ y      : num  8604 8664 8796 9080 9276 ...
##  $ rcons  : num  1294 1333 1373 1430 1479 ...
##  $ c      : num  7275 7377 7476 7665 7814 ...
##  $ r3     : num  2.71 1.23 1.38 1.78 1.86 ...
##  $ lc     : num  8.89 8.91 8.92 8.94 8.96 ...
##  $ ly     : num  9.06 9.07 9.08 9.11 9.14 ...
##  $ gc     : num  NA 0.0139 0.0133 0.0251 0.0192 ...
##  $ gy     : num  NA 0.00696 0.01511 0.03171 0.02145 ...
##  $ gc_1   : num  NA NA 0.0139 0.0133 0.0251 ...
##  $ gy_1   : num  NA NA 0.00696 0.01511 0.03171 ...
##  $ r3_1   : num  NA 2.71 1.23 1.38 1.78 ...
##  $ lc_ly  : num  -0.168 -0.161 -0.163 -0.169 -0.172 ...
##  $ lc_ly_1: num  NA -0.168 -0.161 -0.163 -0.169 ...
##  $ gc_2   : num  NA NA NA 0.0139 0.0133 ...
##  $ gy_2   : num  NA NA NA 0.00696 0.01511 ...
##  $ r3_2   : num  NA NA 2.71 1.23 1.38 ...
##  $ lc_ly_2: num  NA NA -0.168 -0.161 -0.163 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"

# Generate growth in consumption (gc) as specified
consump$gc <- log(consump$c) - log(dplyr::lag(consump$c, n = 1))


# Run regression: gc_t = β0 + β1 * gc_t-1 + u_t
model1 <- lm(gc ~ dplyr::lag(gc, n = 1), data = consump)
summary(model1)

## 
## Call:
## lm(formula = gc ~ dplyr::lag(gc, n = 1), data = consump)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027878 -0.005974 -0.001451  0.007142  0.020227 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           0.011431   0.003778   3.026  0.00478 **
## dplyr::lag(gc, n = 1) 0.446141   0.156047   2.859  0.00731 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01161 on 33 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1985, Adjusted R-squared:  0.1742 
## F-statistic: 8.174 on 1 and 33 DF,  p-value: 0.00731

# Include lagged variables of gy, i3, and inf
consump$gy_lag <- dplyr::lag(consump$gy, n = 1)
consump$i3_lag <- dplyr::lag(consump$i3, n = 1)
consump$inf_lag <- dplyr::lag(consump$inf, n = 1)

# Run regression with additional variables
model2 <- lm(gc ~ dplyr::lag(gc, n = 1) + gy_lag + i3_lag + inf_lag, data = consump)
summary(model2)

## 
## Call:
## lm(formula = gc ~ dplyr::lag(gc, n = 1) + gy_lag + i3_lag + inf_lag, 
##     data = consump)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0249093 -0.0075869  0.0000855  0.0087234  0.0188617 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            0.0225940  0.0070892   3.187  0.00335 **
## dplyr::lag(gc, n = 1)  0.4335832  0.2896525   1.497  0.14487   
## gy_lag                -0.1079087  0.1946371  -0.554  0.58341   
## i3_lag                -0.0007467  0.0011107  -0.672  0.50654   
## inf_lag               -0.0008281  0.0010041  -0.825  0.41606   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01134 on 30 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3038, Adjusted R-squared:  0.211 
## F-statistic: 3.273 on 4 and 30 DF,  p-value: 0.02431

# Check p-value of lag(gc)
p_value_lag_gc <- summary(model2)$coefficients["dplyr::lag(gc, n = 1)", "Pr(>|t|)"]


#### **C12**


library(wooldridge)
library(lmtest)
library(sandwich)
library(ggplot2)
library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

data("minwage")

# Focus on the relevant variables
# gwage232: growth in wage (sector 232)
# gemp232: growth in employment (sector 232)
# gmwage: growth in federal minimum wage
# gcpi: growth in CPI

# Create lagged variables
minwage$gwage232_lag1 <- c(NA, minwage$gwage232[-nrow(minwage)]) # Lagged gwage232
minwage$gemp232_lag1 <- c(NA, minwage$gemp232[-nrow(minwage)]) # Lagged gemp232

# Remove rows with NA values
minwage_clean <- na.omit(minwage)


acf_gwage <- acf(minwage_clean$gwage232, lag.max = 1, plot = FALSE) # ACF
first_order_autocorrelation <- acf_gwage$acf[2] # First lag

cat("First-order autocorrelation in gwage232:", first_order_autocorrelation, "\n")

## First-order autocorrelation in gwage232: -0.02425413

model_dynamic <- lm(gwage232 ~ gwage232_lag1 + gmwage + gcpi, data = minwage_clean)
summary(model_dynamic)

## 
## Call:
## lm(formula = gwage232 ~ gwage232_lag1 + gmwage + gcpi, data = minwage_clean)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044649 -0.004114 -0.001262  0.004481  0.041568 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.0023648  0.0004295   5.506 5.45e-08 ***
## gwage232_lag1 -0.0684816  0.0343986  -1.991   0.0470 *  
## gmwage         0.1517511  0.0095115  15.955  < 2e-16 ***
## gcpi           0.2586795  0.0858602   3.013   0.0027 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007775 on 595 degrees of freedom
## Multiple R-squared:  0.3068, Adjusted R-squared:  0.3033 
## F-statistic: 87.79 on 3 and 595 DF,  p-value: < 2.2e-16

# Check if gmwage has a significant effect while controlling for other variables
cat("Effect of gmwage (contemporaneous effect):\n")

## Effect of gmwage (contemporaneous effect):

print(summary(model_dynamic)$coefficients["gmwage", ])

##     Estimate   Std. Error      t value     Pr(>|t|) 
## 1.517511e-01 9.511457e-03 1.595456e+01 5.753131e-48

model_with_lagged_employment <- lm(gwage232 ~ gwage232_lag1 + gmwage + gcpi + gemp232_lag1, data = minwage_clean)
summary(model_with_lagged_employment)

## 
## Call:
## lm(formula = gwage232 ~ gwage232_lag1 + gmwage + gcpi + gemp232_lag1, 
##     data = minwage_clean)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.043900 -0.004316 -0.000955  0.004255  0.042430 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.0023960  0.0004254   5.633 2.74e-08 ***
## gwage232_lag1 -0.0656875  0.0340720  -1.928 0.054344 .  
## gmwage         0.1525470  0.0094213  16.192  < 2e-16 ***
## gcpi           0.2536899  0.0850342   2.983 0.002968 ** 
## gemp232_lag1   0.0606620  0.0169691   3.575 0.000379 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007699 on 594 degrees of freedom
## Multiple R-squared:  0.3214, Adjusted R-squared:  0.3169 
## F-statistic: 70.34 on 4 and 594 DF,  p-value: < 2.2e-16

# Check if the lagged growth in employment is statistically significant
cat("Effect of lagged growth in employment (gemp232_lag1):\n")

## Effect of lagged growth in employment (gemp232_lag1):

print(summary(model_with_lagged_employment)$coefficients["gemp232_lag1", ])

##     Estimate   Std. Error      t value     Pr(>|t|) 
## 0.0606619954 0.0169690920 3.5748521768 0.0003789121

model_without_lags <- lm(gwage232 ~ gmwage + gcpi, data = minwage_clean)
summary(model_without_lags)

## 
## Call:
## lm(formula = gwage232 ~ gmwage + gcpi, data = minwage_clean)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044501 -0.004053 -0.001328  0.004503  0.041194 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0021926  0.0004217   5.199 2.75e-07 ***
## gmwage      0.1506131  0.0095178  15.824  < 2e-16 ***
## gcpi        0.2374126  0.0854046   2.780  0.00561 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007794 on 596 degrees of freedom
## Multiple R-squared:  0.3022, Adjusted R-squared:  0.2999 
## F-statistic: 129.1 on 2 and 596 DF,  p-value: < 2.2e-16

# Compare coefficients of gmwage across models
cat("Coefficient of gmwage in model without lags:", summary(model_without_lags)$coefficients["gmwage", 1], "\n")

## Coefficient of gmwage in model without lags: 0.1506131

cat("Coefficient of gmwage in model with lags:", summary(model_with_lagged_employment)$coefficients["gmwage", 1], "\n")

## Coefficient of gmwage in model with lags: 0.152547

model_gmwage <- lm(gmwage ~ gwage232_lag1 + gemp232_lag1, data = minwage_clean)
summary(model_gmwage)

## 
## Call:
## lm(formula = gmwage ~ gwage232_lag1 + gemp232_lag1, data = minwage_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.01987 -0.00511 -0.00385 -0.00290  0.62191 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    0.003487   0.001465   2.380   0.0176 *
## gwage232_lag1  0.212051   0.146727   1.445   0.1489  
## gemp232_lag1  -0.042776   0.073749  -0.580   0.5621  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03347 on 596 degrees of freedom
## Multiple R-squared:  0.004117,   Adjusted R-squared:  0.0007754 
## F-statistic: 1.232 on 2 and 596 DF,  p-value: 0.2924

# Report R-squared of the regression
r_squared_gmwage <- summary(model_gmwage)$r.squared
cat("R-squared of gmwage regression:", r_squared_gmwage, "\n")

## R-squared of gmwage regression: 0.004117273

# Interpret R-squared value
cat("Interpretation: R-squared indicates how well the lagged variables explain variation in gmwage.\n")

## Interpretation: R-squared indicates how well the lagged variables explain variation in gmwage.

## **Chapter 12**

#### **C11**


# Load necessary libraries
library(wooldridge) # For accessing Wooldridge datasets
library(lmtest) # For model diagnostics
library(sandwich) # For robust standard errors
library(tseries) # For ARCH models
library(ggplot2) # For plotting

# Load the NYSE dataset
data("nyse") # The dataset is part of the Wooldridge package

# Add lagged returns for the analysis
nyse$return_lag1 <- c(NA, nyse$return[-nrow(nyse)]) # Lagged return_t-1
nyse$return_lag2 <- c(NA, nyse$return_lag1[-nrow(nyse)]) # Lagged return_t-2

# Remove rows with NA values (introduced by lags)
nyse_clean <- na.omit(nyse)



model_12_47 <- lm(return ~ return_lag1, data = nyse_clean)
summary(model_12_47)

## 
## Call:
## lm(formula = return ~ return_lag1, data = nyse_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2633  -1.2997   0.1011   1.3203   8.0688 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.17862    0.08084   2.210   0.0275 *
## return_lag1  0.05806    0.03811   1.523   0.1281  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.112 on 686 degrees of freedom
## Multiple R-squared:  0.003372,   Adjusted R-squared:  0.001919 
## F-statistic: 2.321 on 1 and 686 DF,  p-value: 0.1281

# Calculate squared residuals
nyse_clean$residuals_squared <- residuals(model_12_47)^2

# Find average, minimum, and maximum of squared residuals
avg_res <- mean(nyse_clean$residuals_squared, na.rm = TRUE)
min_res <- min(nyse_clean$residuals_squared, na.rm = TRUE)
max_res <- max(nyse_clean$residuals_squared, na.rm = TRUE)

cat("Average of squared residuals:", avg_res, "\n")

## Average of squared residuals: 4.446345

cat("Minimum of squared residuals:", min_res, "\n")

## Minimum of squared residuals: 3.591354e-06

cat("Maximum of squared residuals:", max_res, "\n")

## Maximum of squared residuals: 232.9687

nyse_clean$return_lag1_sq <- nyse_clean$return_lag1^2 # Squared lagged return
hetero_model <- lm(residuals_squared ~ return_lag1 + return_lag1_sq, data = nyse_clean)
summary(hetero_model)

## 
## Call:
## lm(formula = residuals_squared ~ return_lag1 + return_lag1_sq, 
##     data = nyse_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.545  -3.021  -1.974   0.695 221.544 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.25771    0.44131   7.382 4.54e-13 ***
## return_lag1    -0.78556    0.19626  -4.003 6.95e-05 ***
## return_lag1_sq  0.29749    0.03558   8.361 3.46e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.67 on 685 degrees of freedom
## Multiple R-squared:  0.1305, Adjusted R-squared:  0.128 
## F-statistic: 51.41 on 2 and 685 DF,  p-value: < 2.2e-16

# Extract coefficients, standard errors, R-squared, and adjusted R-squared
coefs <- coef(hetero_model)
std_errors <- sqrt(diag(vcov(hetero_model)))
r_squared <- summary(hetero_model)$r.squared
adj_r_squared <- summary(hetero_model)$adj.r.squared

cat("Coefficients:", coefs, "\n")

## Coefficients: 3.257706 -0.7855573 0.2974907

cat("Standard Errors:", std_errors, "\n")

## Standard Errors: 0.441314 0.1962611 0.0355787

cat("R-squared:", r_squared, "\n")

## R-squared: 0.1305106

cat("Adjusted R-squared:", adj_r_squared, "\n")

## Adjusted R-squared: 0.127972

nyse_clean$conditional_variance <- predict(hetero_model, newdata = nyse_clean)

ggplot(nyse_clean, aes(x = return_lag1, y = conditional_variance)) +
geom_line(color = "blue") +
labs(title = "Conditional Variance vs Lagged Return",
x = "Lagged Return (return_t-1)", y = "Conditional Variance") +
theme_minimal()

# Find value of return where variance is smallest
min_variance <- min(nyse_clean$conditional_variance, na.rm = TRUE)
cat("Minimum conditional variance:", min_variance, "\n")

## Minimum conditional variance: 2.739119

negative_variance <- sum(nyse_clean$conditional_variance < 0, na.rm = TRUE)
cat("Number of negative variance estimates:", negative_variance, "\n")

## Number of negative variance estimates: 0

arch1_model <- garch(nyse_clean$return, order = c(1, 0))

## 
##  ***** ESTIMATION WITH ANALYTICAL GRADIENT ***** 
## 
## 
##      I     INITIAL X(I)        D(I)
## 
##      1     4.244486e+00     1.000e+00
##      2     5.000000e-02     1.000e+00
## 
##     IT   NF      F         RELDF    PRELDF    RELDX   STPPAR   D*STEP   NPRELDF
##      0    1  8.604e+02
##      1    4  8.604e+02  1.04e-05  3.38e-05  1.1e-03  2.9e+02  1.0e-02  4.94e-03
##      2    5  8.604e+02  1.86e-06  1.97e-06  1.0e-03  1.3e+00  1.0e-02  1.99e-06
##      3    6  8.604e+02  4.21e-09  1.02e-07  1.5e-03  0.0e+00  1.3e-02  1.02e-07
##      4    7  8.604e+02  2.86e-09  9.16e-08  7.7e-04  1.8e+00  6.7e-03  1.13e-06
##      5    8  8.604e+02  1.89e-09  4.61e-08  3.9e-04  1.9e+00  3.4e-03  1.67e-06
##      6   11  8.604e+02  1.01e-10  5.29e-09  4.5e-05  2.5e+01  4.0e-04  1.62e-06
##      7   14  8.604e+02  3.62e-11  5.04e-10  4.1e-06  3.9e+00  3.8e-05  1.61e-06
##      8   15  8.604e+02  9.74e-12  2.51e-10  2.1e-06  4.2e+02  1.9e-05  1.61e-06
##      9   16  8.604e+02  3.57e-12  1.26e-10  1.1e-06  5.6e+03  9.4e-06  1.61e-06
##     10   17  8.604e+02  1.69e-12  6.27e-11  5.3e-07  1.9e+04  4.7e-06  1.61e-06
##     11   18  8.604e+02  8.28e-13  3.14e-11  2.7e-07  4.8e+04  2.4e-06  1.60e-06
##     12   19  8.604e+02  4.09e-13  1.57e-11  1.3e-07  1.0e+05  1.2e-06  1.60e-06
##     13   20  8.604e+02  2.04e-13  7.84e-12  6.7e-08  2.2e+05  5.9e-07  1.60e-06
##     14   21  8.604e+02  1.01e-13  3.92e-12  3.3e-08  4.4e+05  2.9e-07  1.60e-06
##     15   22  8.604e+02  5.23e-14  1.96e-12  1.7e-08  8.9e+05  1.5e-07  1.60e-06
##     16   23  8.604e+02  2.43e-14  9.80e-13  8.4e-09  1.8e+06  7.3e-08  1.60e-06
##     17   24  8.604e+02  1.33e-14  4.90e-13  4.2e-09  3.6e+06  3.7e-08  1.60e-06
##     18   25  8.604e+02  6.47e-15  2.45e-13  2.1e-09  7.2e+06  1.8e-08  1.60e-06
##     19   26  8.604e+02  3.57e-15  1.23e-13  1.0e-09  1.4e+07  9.2e-09  1.60e-06
##     20   27  8.604e+02  5.29e-16  6.13e-14  5.2e-10  2.9e+07  4.6e-09  1.60e-06
##     21   28  8.604e+02  1.98e-15  3.06e-14  2.6e-10  5.8e+07  2.3e-09  1.60e-06
##     22   36  8.604e+02  0.00e+00  1.82e-18  1.6e-14  9.7e+11  1.4e-13  1.60e-06
## 
##  ***** FALSE CONVERGENCE *****
## 
##  FUNCTION     8.604278e+02   RELDX        1.557e-14
##  FUNC. EVALS      36         GRAD. EVALS      22
##  PRELDF       1.824e-18      NPRELDF      1.601e-06
## 
##      I      FINAL X(I)        D(I)          G(I)
## 
##      1    4.278928e+00     1.000e+00    -1.119e-02
##      2    4.975478e-02     1.000e+00     2.591e-03

summary(arch1_model)

## 
## Call:
## garch(x = nyse_clean$return, order = c(1, 0))
## 
## Model:
## GARCH(1,0)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2203 -0.5202  0.1519  0.7279  3.9815 
## 
## Coefficient(s):
##     Estimate  Std. Error  t value Pr(>|t|)
## a0   4.27893    83.25229    0.051    0.959
## b1   0.04975    18.48916    0.003    0.998
## 
## Diagnostic Tests:
##  Jarque Bera Test
## 
## data:  Residuals
## X-squared = 776.88, df = 2, p-value < 2.2e-16
## 
## 
##  Box-Ljung test
## 
## data:  Squared.Residuals
## X-squared = 87.793, df = 1, p-value < 2.2e-16

arch2_model <- garch(nyse_clean$return, order = c(2, 0))

## 
##  ***** ESTIMATION WITH ANALYTICAL GRADIENT ***** 
## 
## 
##      I     INITIAL X(I)        D(I)
## 
##      1     4.021092e+00     1.000e+00
##      2     5.000000e-02     1.000e+00
##      3     5.000000e-02     1.000e+00
## 
##     IT   NF      F         RELDF    PRELDF    RELDX   STPPAR   D*STEP   NPRELDF
##      0    1  8.581e+02
##      1    5  8.581e+02  5.14e-06  1.02e-05  2.8e-04  8.7e+02  3.2e-03  4.45e-03
##      2   12  8.581e+02  6.59e-11  6.25e-10  3.8e-06  2.2e+00  4.7e-05  1.35e-07
##      3   15  8.581e+02  7.54e-11  3.87e-10  2.1e-06  3.6e+00  2.0e-05  1.35e-07
##      4   31  8.581e+02  1.85e-15  1.60e-14  1.1e-10  1.5e+06  9.0e-10  1.36e-07
##      5   39  8.581e+02 -1.59e-15  1.30e-18  9.1e-15  2.0e+10  7.3e-14  1.60e-07
## 
##  ***** FALSE CONVERGENCE *****
## 
##  FUNCTION     8.581133e+02   RELDX        9.055e-15
##  FUNC. EVALS      39         GRAD. EVALS       5
##  PRELDF       1.304e-18      NPRELDF      1.595e-07
## 
##      I      FINAL X(I)        D(I)          G(I)
## 
##      1    4.021524e+00     1.000e+00     1.530e-02
##      2    5.223246e-02     1.000e+00    -8.293e-04
##      3    5.222700e-02     1.000e+00    -1.780e-04

## Warning in garch(nyse_clean$return, order = c(2, 0)): singular information

summary(arch2_model)

## 
## Call:
## garch(x = nyse_clean$return, order = c(2, 0))
## 
## Model:
## GARCH(2,0)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2303 -0.5216  0.1484  0.7264  3.9869 
## 
## Coefficient(s):
##     Estimate  Std. Error  t value Pr(>|t|)
## a0   4.02152          NA       NA       NA
## b1   0.05223          NA       NA       NA
## b2   0.05223          NA       NA       NA
## 
## Diagnostic Tests:
##  Jarque Bera Test
## 
## data:  Residuals
## X-squared = 786.77, df = 2, p-value < 2.2e-16
## 
## 
##  Box-Ljung test
## 
## data:  Squared.Residuals
## X-squared = 88.025, df = 1, p-value < 2.2e-16

# Compare ARCH(1) and ARCH(2) models
arch1_aic <- AIC(arch1_model)
arch2_aic <- AIC(arch2_model)

cat("ARCH(1) AIC:", arch1_aic, "\n")

## ARCH(1) AIC: 2987.477

cat("ARCH(2) AIC:", arch2_aic, "\n")

## ARCH(2) AIC: 2983.01

if (arch2_aic < arch1_aic) {
cat("ARCH(2) model fits better than ARCH(1).\n")
} else {
cat("ARCH(1) model fits better than ARCH(2).\n")
}

## ARCH(2) model fits better than ARCH(1).

final project

2025-01-08

R Markdown

Including Plots