install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
#Chapter 7 1
library(wooldridge)
data("sleep75")
data(sleep75)
model <- lm(sleep ~ totwrk +  educ + age + agesq + male , data = sleep75)
summary(model)
## 
## Call:
## lm(formula = sleep ~ totwrk + educ + age + agesq + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2378.00  -243.29     6.74   259.24  1350.19 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3840.83197  235.10870  16.336   <2e-16 ***
## totwrk        -0.16342    0.01813  -9.013   <2e-16 ***
## educ         -11.71332    5.86689  -1.997   0.0463 *  
## age           -8.69668   11.20746  -0.776   0.4380    
## agesq          0.12844    0.13390   0.959   0.3378    
## male          87.75243   34.32616   2.556   0.0108 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 417.7 on 700 degrees of freedom
## Multiple R-squared:  0.1228, Adjusted R-squared:  0.1165 
## F-statistic: 19.59 on 5 and 700 DF,  p-value: < 2.2e-16
# (i) All other factors being equal, is there evidence that men sleep more than women? How strong is the evidence?
#p value is equal to 87.75/34.33=2.56. That means this value is significant according to the table. We can interpret that men sleep 87.75 minutes more than women.
# (ii) Is there a statistically significant tradeoff between working and sleeping? What is the estimated tradeoff?
#t significant=0.163/0.018=9.05. it is significant statistically
#When working time increased by one minute, sleeping time will be decreased by 0.163 which is equal to 9.78 minutes. 
# (iii) What other regression do you need to run to test the null hypothesis that, holding other factors fixed, age has no effect on sleeping?
#We just have to exclude age variable.
data(sleep75)
model <- lm(sleep ~ totwrk +  educ + male , data = sleep75)
summary(model)
## 
## Call:
## lm(formula = sleep ~ totwrk + educ + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2380.27  -239.15     6.74   257.31  1370.63 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3747.51727   81.00609  46.262  < 2e-16 ***
## totwrk        -0.16734    0.01794  -9.329  < 2e-16 ***
## educ         -13.88479    5.65757  -2.454  0.01436 *  
## male          90.96919   34.27441   2.654  0.00813 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 418 on 702 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  0.1155 
## F-statistic: 31.69 on 3 and 702 DF,  p-value: < 2.2e-16
#Rsq hasn't quite changed so age has no effect on sleeping time.
library(wooldridge)
data("gpa2")
data(gpa2)
model <- lm(sat ~ hsize +  hsizesq + female + black + I(female*black) , data = gpa2)
summary(model)
## 
## Call:
## lm(formula = sat ~ hsize + hsizesq + female + black + I(female * 
##     black), data = gpa2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -570.45  -89.54   -5.24   85.41  479.13 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1028.0972     6.2902 163.445  < 2e-16 ***
## hsize               19.2971     3.8323   5.035 4.97e-07 ***
## hsizesq             -2.1948     0.5272  -4.163 3.20e-05 ***
## female             -45.0915     4.2911 -10.508  < 2e-16 ***
## black             -169.8126    12.7131 -13.357  < 2e-16 ***
## I(female * black)   62.3064    18.1542   3.432 0.000605 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared:  0.08578,    Adjusted R-squared:  0.08468 
## F-statistic: 77.52 on 5 and 4131 DF,  p-value: < 2.2e-16
# (i) Is there strong evidence that hsize2 should be included in the model? From this equation,what is the optimal high school size?

# From this equation we don’t have the information about SE to calculate the significance of the coeficientes so we can not be sure. However R square is very small so seems that the equation is not very good. To calculate the optimal size we need to take the first derivative 19.30hsize − 2.19hsize2 the result is 4.4
# (ii) Holding hsize fixed, what is the estimated difference in SAT score between nonblack females and non-black males?

# For that we just need to use white female (female =1, black = 0) − 45.09female +62.31female = 17.22. This is just a simple comparison between white male and female
# (iii) What is the estimated difference in SAT score between non-black males and black males?

# − 169.81black
# (iv) What is the estimated difference in SAT score between black females and non-black females?

# − 169.81black +62.31female · black = -107.5

Chapter 7 (C1)

data(gpa1)
names(gpa1)
##  [1] "age"      "soph"     "junior"   "senior"   "senior5"  "male"    
##  [7] "campus"   "business" "engineer" "colGPA"   "hsGPA"    "ACT"     
## [13] "job19"    "job20"    "drive"    "bike"     "walk"     "voluntr" 
## [19] "PC"       "greek"    "car"      "siblings" "bgfriend" "clubs"   
## [25] "skipped"  "alcohol"  "gradMI"   "fathcoll" "mothcoll"
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)

(i) Add variables mothcoll and fathcoll to the equation

model <- lm(hsGPA ~ mothcoll + fathcoll, data = gpa1)

Add mothcoll and fathcoll to the model

model_updated <- update(model, . ~ . + mothcoll + fathcoll)

# View the results
summary(model_updated)
## 
## Call:
## lm(formula = hsGPA ~ mothcoll + fathcoll, data = gpa1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99342 -0.20982  0.00926  0.20926  0.60658 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.393421   0.046719  72.635   <2e-16 ***
## mothcoll     0.019080   0.057555   0.332    0.741    
## fathcoll    -0.002679   0.058303  -0.046    0.963    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3221 on 138 degrees of freedom
## Multiple R-squared:  0.0008268,  Adjusted R-squared:  -0.01365 
## F-statistic: 0.0571 on 2 and 138 DF,  p-value: 0.9445

(ii) Test for joint significance of mothcoll and fathcoll

joint_test <- coeftest(model, vcov = vcovHC(model, type = "HC1"), terms = c("mothcoll", "fathcoll"))

# Report the p-value
joint_test
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.3934207  0.0542083 62.5996   <2e-16 ***
## mothcoll     0.0190800  0.0571079  0.3341   0.7388    
## fathcoll    -0.0026795  0.0600842 -0.0446   0.9645    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(iii) Add hsGPA to the model

model_iii <- lm(PC ~ mothcoll + fathcoll + hsGPA, data = gpa1)

# Compare models to decide whether the generalization is needed
anova(model, model_iii)
## Warning in anova.lmlist(object, ...): models with response '"PC"' removed
## because response differs from model 1
## Analysis of Variance Table
## 
## Response: hsGPA
##            Df  Sum Sq  Mean Sq F value Pr(>F)
## mothcoll    1  0.0116 0.011629  0.1121 0.7383
## fathcoll    1  0.0002 0.000219  0.0021 0.9634
## Residuals 138 14.3175 0.103750

Chapter 8 1

#Which of the following are consequences of heteroskedasticity?
# (i) The OLS estimators, b^ j, are inconsistent.
# (ii) The usual F statistic no longer has an F distribution.
# (iii) The OLS estimators are no longer BLUE.
# All three statements are consequences of heteroskedasticity. It is important to detect and address heteroskedasticity to obtain valid and efficient inference in regression analysis. Common remedies include using heteroskedasticity-robust standard errors or transforming the data to stabilize the variance.

#Chapter 8 #C13

library(wooldridge)
data("fertil2")
library(sandwich)
library(lmtest)
# Assuming 'children', 'age', 'age_squared', 'educ', 'electric', 'urban' are columns in the dataset
model <- lm(children ~ age + I(age^2) + educ + electric + urban, data = fertil2)
# Calculate robust standard errors
robust_se <- sqrt(diag(vcovHC(model)))
# Combine non-robust and robust standard errors
summary_with_robust_se <- cbind(coef(model), "Robust SE" = robust_se)
summary(model)
## 
## Call:
## lm(formula = children ~ age + I(age^2) + educ + electric + urban, 
##     data = fertil2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9012 -0.7136 -0.0039  0.7119  7.4318 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.2225162  0.2401888 -17.580  < 2e-16 ***
## age          0.3409255  0.0165082  20.652  < 2e-16 ***
## I(age^2)    -0.0027412  0.0002718 -10.086  < 2e-16 ***
## educ        -0.0752323  0.0062966 -11.948  < 2e-16 ***
## electric    -0.3100404  0.0690045  -4.493 7.20e-06 ***
## urban       -0.2000339  0.0465062  -4.301 1.74e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.452 on 4352 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.5734, Adjusted R-squared:  0.5729 
## F-statistic:  1170 on 5 and 4352 DF,  p-value: < 2.2e-16
print(summary_with_robust_se)
##                             Robust SE
## (Intercept) -4.222516228 0.2443961935
## age          0.340925520 0.0192199445
## I(age^2)    -0.002741209 0.0003513959
## educ        -0.075232323 0.0063159137
## electric    -0.310040409 0.0640737262
## urban       -0.200033857 0.0455162364
# Assuming 'relig1', 'relig2', and 'relig3' are the religious dummy variables in the dataset
joint_test <- coeftest(model, vcov = vcovHC)
print(joint_test[, "Pr(>|t|)"])
##  (Intercept)          age     I(age^2)         educ     electric        urban 
## 9.635281e-65 5.015759e-68 7.640643e-15 3.247461e-32 1.351408e-06 1.135260e-05
# Obtain fitted values and residuals
fitted_values <- fitted(model)
residuals <- resid(model)

# Regression of residuals on fitted values
hetero_test <- lm(residuals^2 ~ fitted_values)
summary(hetero_test)
## 
## Call:
## lm(formula = residuals^2 ~ fitted_values)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.336 -1.897 -0.321  0.682 49.275 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.54042    0.09451  -5.718 1.15e-08 ***
## fitted_values  1.16693    0.03347  34.863  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.717 on 4356 degrees of freedom
## Multiple R-squared:  0.2182, Adjusted R-squared:  0.218 
## F-statistic:  1215 on 1 and 4356 DF,  p-value: < 2.2e-16

Chapter 8

#Question C4

library(wooldridge)
data(vote1)

str(vote1)
## 'data.frame':    173 obs. of  10 variables:
##  $ state   : chr  "AL" "AK" "AZ" "AZ" ...
##  $ district: int  7 1 2 3 3 4 2 3 5 6 ...
##  $ democA  : int  1 0 1 0 0 1 0 1 1 1 ...
##  $ voteA   : int  68 62 73 69 75 69 59 71 76 73 ...
##  $ expendA : num  328.3 626.4 99.6 319.7 159.2 ...
##  $ expendB : num  8.74 402.48 3.07 26.28 60.05 ...
##  $ prtystrA: int  41 60 55 64 66 46 58 49 71 64 ...
##  $ lexpendA: num  5.79 6.44 4.6 5.77 5.07 ...
##  $ lexpendB: num  2.17 6 1.12 3.27 4.1 ...
##  $ shareA  : num  97.4 60.9 97 92.4 72.6 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
summary(vote1)
##     state              district          democA           voteA     
##  Length:173         Min.   : 1.000   Min.   :0.0000   Min.   :16.0  
##  Class :character   1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:36.0  
##  Mode  :character   Median : 6.000   Median :1.0000   Median :50.0  
##                     Mean   : 8.838   Mean   :0.5549   Mean   :50.5  
##                     3rd Qu.:11.000   3rd Qu.:1.0000   3rd Qu.:65.0  
##                     Max.   :42.000   Max.   :1.0000   Max.   :84.0  
##     expendA            expendB           prtystrA        lexpendA     
##  Min.   :   0.302   Min.   :   0.93   Min.   :22.00   Min.   :-1.197  
##  1st Qu.:  81.634   1st Qu.:  60.05   1st Qu.:44.00   1st Qu.: 4.402  
##  Median : 242.782   Median : 221.53   Median :50.00   Median : 5.492  
##  Mean   : 310.611   Mean   : 305.09   Mean   :49.76   Mean   : 5.026  
##  3rd Qu.: 457.410   3rd Qu.: 450.72   3rd Qu.:56.00   3rd Qu.: 6.126  
##  Max.   :1470.674   Max.   :1548.19   Max.   :71.00   Max.   : 7.293  
##     lexpendB            shareA        
##  Min.   :-0.07257   Min.   : 0.09464  
##  1st Qu.: 4.09524   1st Qu.:18.86800  
##  Median : 5.40056   Median :50.84990  
##  Mean   : 4.94437   Mean   :51.07654  
##  3rd Qu.: 6.11084   3rd Qu.:84.25510  
##  Max.   : 7.34484   Max.   :99.49500
model <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1)
summary(model)
## 
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = vote1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.66141    4.73604   7.952 2.56e-13 ***
## prtystrA      0.25192    0.07129   3.534  0.00053 ***
## democA        3.79294    1.40652   2.697  0.00772 ** 
## log(expendA)  5.77929    0.39182  14.750  < 2e-16 ***
## log(expendB) -6.23784    0.39746 -15.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  0.8012, Adjusted R-squared:  0.7964 
## F-statistic: 169.2 on 4 and 168 DF,  p-value: < 2.2e-16
data5 <- wooldridge::vote1
head(data5)
##   state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1    AL        7      1    68 328.296   8.737       41 5.793916 2.167567
## 2    AK        1      0    62 626.377 402.477       60 6.439952 5.997638
## 3    AZ        2      1    73  99.607   3.065       55 4.601233 1.120048
## 4    AZ        3      0    69 319.690  26.281       64 5.767352 3.268846
## 5    AR        3      0    75 159.221  60.054       66 5.070293 4.095244
## 6    AR        4      1    69 570.155  21.393       46 6.345908 3.063064
##     shareA
## 1 97.40767
## 2 60.88104
## 3 97.01476
## 4 92.40370
## 5 72.61247
## 6 96.38355
model9 <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(model9)
## 
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = data5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.66141    4.73604   7.952 2.56e-13 ***
## prtystrA      0.25192    0.07129   3.534  0.00053 ***
## democA        3.79294    1.40652   2.697  0.00772 ** 
## log(expendA)  5.77929    0.39182  14.750  < 2e-16 ***
## log(expendB) -6.23784    0.39746 -15.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  0.8012, Adjusted R-squared:  0.7964 
## F-statistic: 169.2 on 4 and 168 DF,  p-value: < 2.2e-16
residuals <- residuals(model9)
residuals_model <- lm(residuals ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(residuals_model)
## 
## Call:
## lm(formula = residuals ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = data5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -1.183e-14  4.736e+00       0        1
## prtystrA      1.493e-16  7.129e-02       0        1
## democA        1.843e-15  1.407e+00       0        1
## log(expendA) -3.811e-16  3.918e-01       0        1
## log(expendB)  1.119e-15  3.975e-01       0        1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  5.525e-32,  Adjusted R-squared:  -0.02381 
## F-statistic: 2.32e-30 on 4 and 168 DF,  p-value: 1
bptest_result <- bptest(model9)
print(bptest_result)
## 
##  studentized Breusch-Pagan test
## 
## data:  model9
## BP = 9.0934, df = 4, p-value = 0.05881
white_data <- data.frame(residuals_squared = residuals^2, data5$prtystrA, data5$democA, log_expendA = log(data5$expendA), log_expendB = log(data5$expendB))
white_model <- lm(residuals_squared ~ data5$prtystrA + data5$democA + log_expendA + log_expendB, data = white_data)
f_statistic <- summary(white_model)$fstatistic
p_value <- pf(f_statistic[1], f_statistic[2], f_statistic[3], lower.tail = FALSE)
print(paste("F-statistic:", f_statistic[1], "P-value:", p_value))
## [1] "F-statistic: 2.33011268371627 P-value: 0.0580575140885532"

#Chapter9 C4.) # Load the ‘wooldridge’ package

library(wooldridge)
# Load the 'infmrt' dataset
data("infmrt")
# Filter the dataset for the year 1990
infmrt_1990 <- subset(infmrt, year == 1990)
# Re-estimate equation 9.43 including a dummy variable for the observation on the District of Columbia (DC)
model_with_dummy <- lm(infmort ~ log(pcinc) + log(physic) + log(popul) + DC, data = infmrt_1990)

# Print the summary of the model
summary(model_with_dummy)
## 
## Call:
## lm(formula = infmort ~ log(pcinc) + log(physic) + log(popul) + 
##     DC, data = infmrt_1990)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4964 -0.8076  0.0000  0.9358  2.6077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.9548    12.4195   1.929  0.05994 .  
## log(pcinc)   -0.5669     1.6412  -0.345  0.73135    
## log(physic)  -2.7418     1.1908  -2.303  0.02588 *  
## log(popul)    0.6292     0.1911   3.293  0.00191 ** 
## DC           16.0350     1.7692   9.064 8.43e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.246 on 46 degrees of freedom
## Multiple R-squared:  0.691,  Adjusted R-squared:  0.6641 
## F-statistic: 25.71 on 4 and 46 DF,  p-value: 3.146e-11

#Chapter 10

library(wooldridge)
data(intdef)
# 1. Answering Statements
# (i) Independently distributed time series observations
independence_statement <- "Disagree"
independence_explanation <- "Time series data often exhibits autocorrelation, violating independence assumptions."
# (ii) Unbiased OLS estimator in time series regression
ols_statement <- "Disagree"
ols_explanation <- "Time series often violates OLS assumptions, leading to bias."
# (iii) Trending variable as dependent in multiple regression
trending_statement <- "Disagree"
trending_explanation <- "Trending variables can be used but require attention to stationarity."
# (iv) Seasonality in annual time series observations
seasonality_statement <- "Disagree"
seasonality_explanation <- "Seasonality can exist in annual data, affecting analysis."
# C1. Using a Dummy Variable for Federal Reserve Policy Change
# Create a dummy variable for the policy change after 1979
intdef$dummy <- ifelse(intdef$year > 1979, 1, 0)
# Equation with dummy variable
model_with_dummy <- lm(inf ~ dummy + ci3 + cdef + cinf, data = intdef)
summary(model_with_dummy)
## 
## Call:
## lm(formula = inf ~ dummy + ci3 + cdef + cinf, data = intdef)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1867 -1.8047 -0.8382  0.9943  6.7831 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3937     0.5193   6.535 3.21e-08 ***
## dummy         0.9400     0.7977   1.178  0.24423    
## ci3           0.4391     0.3172   1.385  0.17233    
## cdef          0.4382     0.3370   1.300  0.19954    
## cinf          0.5707     0.2103   2.714  0.00909 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.799 on 50 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2012, Adjusted R-squared:  0.1373 
## F-statistic: 3.148 on 4 and 50 DF,  p-value: 0.02196
# Conclusion
cat("Conclusion regarding the model:\n")
## Conclusion regarding the model:
cat("From the regression results:\n")
## From the regression results:
cat("- The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).\n")
## - The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).
cat("- Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).\n")
## - Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).
cat("- The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).\n")
## - The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).
cat("Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.")
## Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.

#Chapter 10 #C6

data(fertil3)
# Step (i): Regress gfr on t and tsq to obtain the residuals (gft)
model_t_tsq <- lm(gfr ~ t + tsq, data = fertil3)
residuals_gft <- resid(model_t_tsq)
# Step (ii): Regress gft on all variables from equation (10.35), including t and tsq
model_10_35 <- lm(residuals_gft ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq, data = fertil3)
summary(model_10_35)
## Warning in summary.lm(model_10_35): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = residuals_gft ~ pe + year + tsq + pe_1 + pe_2 + 
##     pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + 
##     cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + 
##     gfr_2 + t + tsq, data = fertil3)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.495e-14 -2.374e-15 -2.200e-17  2.638e-15  3.812e-14 
## 
## Coefficients: (6 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  3.003e+01  4.452e-12  6.746e+12   <2e-16 ***
## pe          -2.624e-16  1.381e-16 -1.900e+00   0.0634 .  
## year        -7.170e-02  2.307e-15 -3.107e+13   <2e-16 ***
## tsq          7.959e-03  6.783e-17  1.173e+14   <2e-16 ***
## pe_1         3.803e-16  1.519e-16  2.504e+00   0.0157 *  
## pe_2         2.118e-17  1.653e-16  1.280e-01   0.8986    
## pe_3        -2.758e-16  1.563e-16 -1.764e+00   0.0839 .  
## pe_4         1.363e-17  1.222e-16  1.120e-01   0.9117    
## pill         5.956e-15  1.134e-14  5.250e-01   0.6020    
## ww2         -3.761e-15  1.268e-14 -2.970e-01   0.7679    
## tcu          1.130e-18  5.813e-19  1.945e+00   0.0576 .  
## cgfr         1.000e+00  5.225e-16  1.914e+15   <2e-16 ***
## cpe                 NA         NA         NA       NA    
## cpe_1               NA         NA         NA       NA    
## cpe_2               NA         NA         NA       NA    
## cpe_3               NA         NA         NA       NA    
## cpe_4       -2.998e-17  1.239e-16 -2.420e-01   0.8098    
## gfr_1        1.000e+00  2.436e-16  4.105e+15   <2e-16 ***
## cgfr_1      -2.649e-16  5.109e-16 -5.190e-01   0.6064    
## cgfr_2       4.872e-16  5.394e-16  9.030e-01   0.3708    
## cgfr_3      -7.235e-16  4.843e-16 -1.494e+00   0.1416    
## cgfr_4       8.808e-19  4.778e-16  2.000e-03   0.9985    
## gfr_2               NA         NA         NA       NA    
## t                   NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.244e-14 on 49 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 6.667e+30 on 17 and 49 DF,  p-value: < 2.2e-16
# Step (iii): Re-estimate equation (10.35) but add the 'pe_3' as an additional variable to check stat. significance.
model_with_pe_3 <- lm(gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq + pe_3, data = fertil3)
summary(model_with_pe_3)
## Warning in summary.lm(model_with_pe_3): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + 
##     pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + 
##     gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq + 
##     pe_3, data = fertil3)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.294e-14 -3.614e-15  3.870e-16  3.960e-15  5.021e-14 
## 
## Coefficients: (6 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -5.849e-12  4.744e-12 -1.233e+00    0.224    
## pe          -6.052e-17  1.472e-16 -4.110e-01    0.683    
## year         3.028e-15  2.459e-15  1.231e+00    0.224    
## tsq         -7.141e-17  7.229e-17 -9.880e-01    0.328    
## pe_1         1.081e-16  1.618e-16  6.680e-01    0.507    
## pe_2         3.984e-18  1.762e-16  2.300e-02    0.982    
## pe_3        -2.328e-17  1.666e-16 -1.400e-01    0.889    
## pe_4        -5.775e-17  1.303e-16 -4.430e-01    0.660    
## pill        -1.025e-14  1.209e-14 -8.480e-01    0.401    
## ww2          1.107e-14  1.351e-14  8.190e-01    0.417    
## tcu          5.447e-19  6.195e-19  8.790e-01    0.384    
## cgfr         1.000e+00  5.569e-16  1.796e+15   <2e-16 ***
## cpe                 NA         NA         NA       NA    
## cpe_1               NA         NA         NA       NA    
## cpe_2               NA         NA         NA       NA    
## cpe_3               NA         NA         NA       NA    
## cpe_4        5.996e-17  1.320e-16  4.540e-01    0.652    
## gfr_1        1.000e+00  2.596e-16  3.852e+15   <2e-16 ***
## cgfr_1      -7.823e-16  5.445e-16 -1.437e+00    0.157    
## cgfr_2       5.662e-17  5.749e-16  9.900e-02    0.922    
## cgfr_3      -5.129e-16  5.162e-16 -9.940e-01    0.325    
## cgfr_4      -1.866e-16  5.092e-16 -3.660e-01    0.716    
## gfr_2               NA         NA         NA       NA    
## t                   NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.326e-14 on 49 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 7.853e+30 on 17 and 49 DF,  p-value: < 2.2e-16

(i) Estimate the model

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
## Analysis of Variance Table
## 
## Model 1: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban + exper + tenure
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1    927 123.82                      
## 2    927 123.82  0         0
## 
## Call:
## lm(formula = log(wage) ~ educ * black + exper + tenure + married + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## black        0.094809   0.255399   0.371 0.710561    
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ:black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
## <NA> 
##   NA