install.packages("wooldridge")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

#Chapter 7 1

library(wooldridge)
data("sleep75")

data(sleep75)
model <- lm(sleep ~ totwrk +  educ + age + agesq + male , data = sleep75)
summary(model)

## 
## Call:
## lm(formula = sleep ~ totwrk + educ + age + agesq + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2378.00  -243.29     6.74   259.24  1350.19 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3840.83197  235.10870  16.336   <2e-16 ***
## totwrk        -0.16342    0.01813  -9.013   <2e-16 ***
## educ         -11.71332    5.86689  -1.997   0.0463 *  
## age           -8.69668   11.20746  -0.776   0.4380    
## agesq          0.12844    0.13390   0.959   0.3378    
## male          87.75243   34.32616   2.556   0.0108 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 417.7 on 700 degrees of freedom
## Multiple R-squared:  0.1228, Adjusted R-squared:  0.1165 
## F-statistic: 19.59 on 5 and 700 DF,  p-value: < 2.2e-16

# (i) All other factors being equal, is there evidence that men sleep more than women? How strong is the evidence?
#p value is equal to 87.75/34.33=2.56. That means this value is significant according to the table. We can interpret that men sleep 87.75 minutes more than women.

# (ii) Is there a statistically significant tradeoff between working and sleeping? What is the estimated tradeoff?
#t significant=0.163/0.018=9.05. it is significant statistically
#When working time increased by one minute, sleeping time will be decreased by 0.163 which is equal to 9.78 minutes.

# (iii) What other regression do you need to run to test the null hypothesis that, holding other factors fixed, age has no effect on sleeping?
#We just have to exclude age variable.
data(sleep75)
model <- lm(sleep ~ totwrk +  educ + male , data = sleep75)
summary(model)

## 
## Call:
## lm(formula = sleep ~ totwrk + educ + male, data = sleep75)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2380.27  -239.15     6.74   257.31  1370.63 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3747.51727   81.00609  46.262  < 2e-16 ***
## totwrk        -0.16734    0.01794  -9.329  < 2e-16 ***
## educ         -13.88479    5.65757  -2.454  0.01436 *  
## male          90.96919   34.27441   2.654  0.00813 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 418 on 702 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  0.1155 
## F-statistic: 31.69 on 3 and 702 DF,  p-value: < 2.2e-16

#Rsq hasn't quite changed so age has no effect on sleeping time.

library(wooldridge)
data("gpa2")

data(gpa2)
model <- lm(sat ~ hsize +  hsizesq + female + black + I(female*black) , data = gpa2)
summary(model)

## 
## Call:
## lm(formula = sat ~ hsize + hsizesq + female + black + I(female * 
##     black), data = gpa2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -570.45  -89.54   -5.24   85.41  479.13 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1028.0972     6.2902 163.445  < 2e-16 ***
## hsize               19.2971     3.8323   5.035 4.97e-07 ***
## hsizesq             -2.1948     0.5272  -4.163 3.20e-05 ***
## female             -45.0915     4.2911 -10.508  < 2e-16 ***
## black             -169.8126    12.7131 -13.357  < 2e-16 ***
## I(female * black)   62.3064    18.1542   3.432 0.000605 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared:  0.08578,    Adjusted R-squared:  0.08468 
## F-statistic: 77.52 on 5 and 4131 DF,  p-value: < 2.2e-16

# (i) Is there strong evidence that hsize2 should be included in the model? From this equation,what is the optimal high school size?

# From this equation we don’t have the information about SE to calculate the significance of the coeficientes so we can not be sure. However R square is very small so seems that the equation is not very good. To calculate the optimal size we need to take the first derivative 19.30hsize − 2.19hsize2 the result is 4.4

# (ii) Holding hsize fixed, what is the estimated difference in SAT score between nonblack females and non-black males?

# For that we just need to use white female (female =1, black = 0) − 45.09female +62.31female = 17.22. This is just a simple comparison between white male and female

# (iii) What is the estimated difference in SAT score between non-black males and black males?

# − 169.81black

# (iv) What is the estimated difference in SAT score between black females and non-black females?

# − 169.81black +62.31female · black = -107.5

Chapter 7 (C1)

data(gpa1)
names(gpa1)

##  [1] "age"      "soph"     "junior"   "senior"   "senior5"  "male"    
##  [7] "campus"   "business" "engineer" "colGPA"   "hsGPA"    "ACT"     
## [13] "job19"    "job20"    "drive"    "bike"     "walk"     "voluntr" 
## [19] "PC"       "greek"    "car"      "siblings" "bgfriend" "clubs"   
## [25] "skipped"  "alcohol"  "gradMI"   "fathcoll" "mothcoll"

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)

(i) Add variables mothcoll and fathcoll to the equation

model <- lm(hsGPA ~ mothcoll + fathcoll, data = gpa1)

Add mothcoll and fathcoll to the model

model_updated <- update(model, . ~ . + mothcoll + fathcoll)

# View the results
summary(model_updated)

## 
## Call:
## lm(formula = hsGPA ~ mothcoll + fathcoll, data = gpa1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99342 -0.20982  0.00926  0.20926  0.60658 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.393421   0.046719  72.635   <2e-16 ***
## mothcoll     0.019080   0.057555   0.332    0.741    
## fathcoll    -0.002679   0.058303  -0.046    0.963    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3221 on 138 degrees of freedom
## Multiple R-squared:  0.0008268,  Adjusted R-squared:  -0.01365 
## F-statistic: 0.0571 on 2 and 138 DF,  p-value: 0.9445

(ii) Test for joint significance of mothcoll and fathcoll

joint_test <- coeftest(model, vcov = vcovHC(model, type = "HC1"), terms = c("mothcoll", "fathcoll"))

# Report the p-value
joint_test

## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.3934207  0.0542083 62.5996   <2e-16 ***
## mothcoll     0.0190800  0.0571079  0.3341   0.7388    
## fathcoll    -0.0026795  0.0600842 -0.0446   0.9645    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(iii) Add hsGPA to the model

model_iii <- lm(PC ~ mothcoll + fathcoll + hsGPA, data = gpa1)

# Compare models to decide whether the generalization is needed
anova(model, model_iii)

## Warning in anova.lmlist(object, ...): models with response '"PC"' removed
## because response differs from model 1

## Analysis of Variance Table
## 
## Response: hsGPA
##            Df  Sum Sq  Mean Sq F value Pr(>F)
## mothcoll    1  0.0116 0.011629  0.1121 0.7383
## fathcoll    1  0.0002 0.000219  0.0021 0.9634
## Residuals 138 14.3175 0.103750

Chapter 8 1

#Which of the following are consequences of heteroskedasticity?
# (i) The OLS estimators, b^ j, are inconsistent.
# (ii) The usual F statistic no longer has an F distribution.
# (iii) The OLS estimators are no longer BLUE.
# All three statements are consequences of heteroskedasticity. It is important to detect and address heteroskedasticity to obtain valid and efficient inference in regression analysis. Common remedies include using heteroskedasticity-robust standard errors or transforming the data to stabilize the variance.

#Chapter 8 #C13

library(wooldridge)
data("fertil2")
library(sandwich)
library(lmtest)

# Assuming 'children', 'age', 'age_squared', 'educ', 'electric', 'urban' are columns in the dataset
model <- lm(children ~ age + I(age^2) + educ + electric + urban, data = fertil2)

# Calculate robust standard errors
robust_se <- sqrt(diag(vcovHC(model)))

# Combine non-robust and robust standard errors
summary_with_robust_se <- cbind(coef(model), "Robust SE" = robust_se)
summary(model)

## 
## Call:
## lm(formula = children ~ age + I(age^2) + educ + electric + urban, 
##     data = fertil2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9012 -0.7136 -0.0039  0.7119  7.4318 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.2225162  0.2401888 -17.580  < 2e-16 ***
## age          0.3409255  0.0165082  20.652  < 2e-16 ***
## I(age^2)    -0.0027412  0.0002718 -10.086  < 2e-16 ***
## educ        -0.0752323  0.0062966 -11.948  < 2e-16 ***
## electric    -0.3100404  0.0690045  -4.493 7.20e-06 ***
## urban       -0.2000339  0.0465062  -4.301 1.74e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.452 on 4352 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.5734, Adjusted R-squared:  0.5729 
## F-statistic:  1170 on 5 and 4352 DF,  p-value: < 2.2e-16

print(summary_with_robust_se)

##                             Robust SE
## (Intercept) -4.222516228 0.2443961935
## age          0.340925520 0.0192199445
## I(age^2)    -0.002741209 0.0003513959
## educ        -0.075232323 0.0063159137
## electric    -0.310040409 0.0640737262
## urban       -0.200033857 0.0455162364

# Assuming 'relig1', 'relig2', and 'relig3' are the religious dummy variables in the dataset
joint_test <- coeftest(model, vcov = vcovHC)
print(joint_test[, "Pr(>|t|)"])

##  (Intercept)          age     I(age^2)         educ     electric        urban 
## 9.635281e-65 5.015759e-68 7.640643e-15 3.247461e-32 1.351408e-06 1.135260e-05

# Obtain fitted values and residuals
fitted_values <- fitted(model)
residuals <- resid(model)

# Regression of residuals on fitted values
hetero_test <- lm(residuals^2 ~ fitted_values)
summary(hetero_test)

## 
## Call:
## lm(formula = residuals^2 ~ fitted_values)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.336 -1.897 -0.321  0.682 49.275 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.54042    0.09451  -5.718 1.15e-08 ***
## fitted_values  1.16693    0.03347  34.863  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.717 on 4356 degrees of freedom
## Multiple R-squared:  0.2182, Adjusted R-squared:  0.218 
## F-statistic:  1215 on 1 and 4356 DF,  p-value: < 2.2e-16

Chapter 8

#Question C4

library(wooldridge)
data(vote1)

str(vote1)

## 'data.frame':    173 obs. of  10 variables:
##  $ state   : chr  "AL" "AK" "AZ" "AZ" ...
##  $ district: int  7 1 2 3 3 4 2 3 5 6 ...
##  $ democA  : int  1 0 1 0 0 1 0 1 1 1 ...
##  $ voteA   : int  68 62 73 69 75 69 59 71 76 73 ...
##  $ expendA : num  328.3 626.4 99.6 319.7 159.2 ...
##  $ expendB : num  8.74 402.48 3.07 26.28 60.05 ...
##  $ prtystrA: int  41 60 55 64 66 46 58 49 71 64 ...
##  $ lexpendA: num  5.79 6.44 4.6 5.77 5.07 ...
##  $ lexpendB: num  2.17 6 1.12 3.27 4.1 ...
##  $ shareA  : num  97.4 60.9 97 92.4 72.6 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"

summary(vote1)

##     state              district          democA           voteA     
##  Length:173         Min.   : 1.000   Min.   :0.0000   Min.   :16.0  
##  Class :character   1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:36.0  
##  Mode  :character   Median : 6.000   Median :1.0000   Median :50.0  
##                     Mean   : 8.838   Mean   :0.5549   Mean   :50.5  
##                     3rd Qu.:11.000   3rd Qu.:1.0000   3rd Qu.:65.0  
##                     Max.   :42.000   Max.   :1.0000   Max.   :84.0  
##     expendA            expendB           prtystrA        lexpendA     
##  Min.   :   0.302   Min.   :   0.93   Min.   :22.00   Min.   :-1.197  
##  1st Qu.:  81.634   1st Qu.:  60.05   1st Qu.:44.00   1st Qu.: 4.402  
##  Median : 242.782   Median : 221.53   Median :50.00   Median : 5.492  
##  Mean   : 310.611   Mean   : 305.09   Mean   :49.76   Mean   : 5.026  
##  3rd Qu.: 457.410   3rd Qu.: 450.72   3rd Qu.:56.00   3rd Qu.: 6.126  
##  Max.   :1470.674   Max.   :1548.19   Max.   :71.00   Max.   : 7.293  
##     lexpendB            shareA        
##  Min.   :-0.07257   Min.   : 0.09464  
##  1st Qu.: 4.09524   1st Qu.:18.86800  
##  Median : 5.40056   Median :50.84990  
##  Mean   : 4.94437   Mean   :51.07654  
##  3rd Qu.: 6.11084   3rd Qu.:84.25510  
##  Max.   : 7.34484   Max.   :99.49500

model <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1)
summary(model)

## 
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = vote1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.66141    4.73604   7.952 2.56e-13 ***
## prtystrA      0.25192    0.07129   3.534  0.00053 ***
## democA        3.79294    1.40652   2.697  0.00772 ** 
## log(expendA)  5.77929    0.39182  14.750  < 2e-16 ***
## log(expendB) -6.23784    0.39746 -15.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  0.8012, Adjusted R-squared:  0.7964 
## F-statistic: 169.2 on 4 and 168 DF,  p-value: < 2.2e-16

data5 <- wooldridge::vote1
head(data5)

##   state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1    AL        7      1    68 328.296   8.737       41 5.793916 2.167567
## 2    AK        1      0    62 626.377 402.477       60 6.439952 5.997638
## 3    AZ        2      1    73  99.607   3.065       55 4.601233 1.120048
## 4    AZ        3      0    69 319.690  26.281       64 5.767352 3.268846
## 5    AR        3      0    75 159.221  60.054       66 5.070293 4.095244
## 6    AR        4      1    69 570.155  21.393       46 6.345908 3.063064
##     shareA
## 1 97.40767
## 2 60.88104
## 3 97.01476
## 4 92.40370
## 5 72.61247
## 6 96.38355

model9 <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(model9)

## 
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = data5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.66141    4.73604   7.952 2.56e-13 ***
## prtystrA      0.25192    0.07129   3.534  0.00053 ***
## democA        3.79294    1.40652   2.697  0.00772 ** 
## log(expendA)  5.77929    0.39182  14.750  < 2e-16 ***
## log(expendB) -6.23784    0.39746 -15.694  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  0.8012, Adjusted R-squared:  0.7964 
## F-statistic: 169.2 on 4 and 168 DF,  p-value: < 2.2e-16

residuals <- residuals(model9)
residuals_model <- lm(residuals ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(residuals_model)

## 
## Call:
## lm(formula = residuals ~ prtystrA + democA + log(expendA) + log(expendB), 
##     data = data5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.576  -4.864  -1.146   4.903  24.566 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -1.183e-14  4.736e+00       0        1
## prtystrA      1.493e-16  7.129e-02       0        1
## democA        1.843e-15  1.407e+00       0        1
## log(expendA) -3.811e-16  3.918e-01       0        1
## log(expendB)  1.119e-15  3.975e-01       0        1
## 
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared:  5.525e-32,  Adjusted R-squared:  -0.02381 
## F-statistic: 2.32e-30 on 4 and 168 DF,  p-value: 1

bptest_result <- bptest(model9)
print(bptest_result)

## 
##  studentized Breusch-Pagan test
## 
## data:  model9
## BP = 9.0934, df = 4, p-value = 0.05881

white_data <- data.frame(residuals_squared = residuals^2, data5$prtystrA, data5$democA, log_expendA = log(data5$expendA), log_expendB = log(data5$expendB))
white_model <- lm(residuals_squared ~ data5$prtystrA + data5$democA + log_expendA + log_expendB, data = white_data)
f_statistic <- summary(white_model)$fstatistic
p_value <- pf(f_statistic[1], f_statistic[2], f_statistic[3], lower.tail = FALSE)
print(paste("F-statistic:", f_statistic[1], "P-value:", p_value))

## [1] "F-statistic: 2.33011268371627 P-value: 0.0580575140885532"

#Chapter9 C4.) # Load the ‘wooldridge’ package

library(wooldridge)

# Load the 'infmrt' dataset
data("infmrt")

# Filter the dataset for the year 1990
infmrt_1990 <- subset(infmrt, year == 1990)

# Re-estimate equation 9.43 including a dummy variable for the observation on the District of Columbia (DC)
model_with_dummy <- lm(infmort ~ log(pcinc) + log(physic) + log(popul) + DC, data = infmrt_1990)

# Print the summary of the model
summary(model_with_dummy)

## 
## Call:
## lm(formula = infmort ~ log(pcinc) + log(physic) + log(popul) + 
##     DC, data = infmrt_1990)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4964 -0.8076  0.0000  0.9358  2.6077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  23.9548    12.4195   1.929  0.05994 .  
## log(pcinc)   -0.5669     1.6412  -0.345  0.73135    
## log(physic)  -2.7418     1.1908  -2.303  0.02588 *  
## log(popul)    0.6292     0.1911   3.293  0.00191 ** 
## DC           16.0350     1.7692   9.064 8.43e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.246 on 46 degrees of freedom
## Multiple R-squared:  0.691,  Adjusted R-squared:  0.6641 
## F-statistic: 25.71 on 4 and 46 DF,  p-value: 3.146e-11

#Chapter 10

library(wooldridge)

data(intdef)

# 1. Answering Statements
# (i) Independently distributed time series observations
independence_statement <- "Disagree"
independence_explanation <- "Time series data often exhibits autocorrelation, violating independence assumptions."

# (ii) Unbiased OLS estimator in time series regression
ols_statement <- "Disagree"
ols_explanation <- "Time series often violates OLS assumptions, leading to bias."

# (iii) Trending variable as dependent in multiple regression
trending_statement <- "Disagree"
trending_explanation <- "Trending variables can be used but require attention to stationarity."

# (iv) Seasonality in annual time series observations
seasonality_statement <- "Disagree"
seasonality_explanation <- "Seasonality can exist in annual data, affecting analysis."

# C1. Using a Dummy Variable for Federal Reserve Policy Change
# Create a dummy variable for the policy change after 1979
intdef$dummy <- ifelse(intdef$year > 1979, 1, 0)

# Equation with dummy variable
model_with_dummy <- lm(inf ~ dummy + ci3 + cdef + cinf, data = intdef)
summary(model_with_dummy)

## 
## Call:
## lm(formula = inf ~ dummy + ci3 + cdef + cinf, data = intdef)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1867 -1.8047 -0.8382  0.9943  6.7831 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3937     0.5193   6.535 3.21e-08 ***
## dummy         0.9400     0.7977   1.178  0.24423    
## ci3           0.4391     0.3172   1.385  0.17233    
## cdef          0.4382     0.3370   1.300  0.19954    
## cinf          0.5707     0.2103   2.714  0.00909 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.799 on 50 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2012, Adjusted R-squared:  0.1373 
## F-statistic: 3.148 on 4 and 50 DF,  p-value: 0.02196

# Conclusion
cat("Conclusion regarding the model:\n")

## Conclusion regarding the model:

cat("From the regression results:\n")

## From the regression results:

cat("- The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).\n")

## - The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).

cat("- Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).\n")

## - Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).

cat("- The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).\n")

## - The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).

cat("Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.")

## Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.

#Chapter 10 #C6

data(fertil3)

# Step (i): Regress gfr on t and tsq to obtain the residuals (gft)
model_t_tsq <- lm(gfr ~ t + tsq, data = fertil3)
residuals_gft <- resid(model_t_tsq)

# Step (ii): Regress gft on all variables from equation (10.35), including t and tsq
model_10_35 <- lm(residuals_gft ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq, data = fertil3)
summary(model_10_35)

## Warning in summary.lm(model_10_35): essentially perfect fit: summary may be
## unreliable

## 
## Call:
## lm(formula = residuals_gft ~ pe + year + tsq + pe_1 + pe_2 + 
##     pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + 
##     cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + 
##     gfr_2 + t + tsq, data = fertil3)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.495e-14 -2.374e-15 -2.200e-17  2.638e-15  3.812e-14 
## 
## Coefficients: (6 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  3.003e+01  4.452e-12  6.746e+12   <2e-16 ***
## pe          -2.624e-16  1.381e-16 -1.900e+00   0.0634 .  
## year        -7.170e-02  2.307e-15 -3.107e+13   <2e-16 ***
## tsq          7.959e-03  6.783e-17  1.173e+14   <2e-16 ***
## pe_1         3.803e-16  1.519e-16  2.504e+00   0.0157 *  
## pe_2         2.118e-17  1.653e-16  1.280e-01   0.8986    
## pe_3        -2.758e-16  1.563e-16 -1.764e+00   0.0839 .  
## pe_4         1.363e-17  1.222e-16  1.120e-01   0.9117    
## pill         5.956e-15  1.134e-14  5.250e-01   0.6020    
## ww2         -3.761e-15  1.268e-14 -2.970e-01   0.7679    
## tcu          1.130e-18  5.813e-19  1.945e+00   0.0576 .  
## cgfr         1.000e+00  5.225e-16  1.914e+15   <2e-16 ***
## cpe                 NA         NA         NA       NA    
## cpe_1               NA         NA         NA       NA    
## cpe_2               NA         NA         NA       NA    
## cpe_3               NA         NA         NA       NA    
## cpe_4       -2.998e-17  1.239e-16 -2.420e-01   0.8098    
## gfr_1        1.000e+00  2.436e-16  4.105e+15   <2e-16 ***
## cgfr_1      -2.649e-16  5.109e-16 -5.190e-01   0.6064    
## cgfr_2       4.872e-16  5.394e-16  9.030e-01   0.3708    
## cgfr_3      -7.235e-16  4.843e-16 -1.494e+00   0.1416    
## cgfr_4       8.808e-19  4.778e-16  2.000e-03   0.9985    
## gfr_2               NA         NA         NA       NA    
## t                   NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.244e-14 on 49 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 6.667e+30 on 17 and 49 DF,  p-value: < 2.2e-16

# Step (iii): Re-estimate equation (10.35) but add the 'pe_3' as an additional variable to check stat. significance.
model_with_pe_3 <- lm(gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq + pe_3, data = fertil3)
summary(model_with_pe_3)

## Warning in summary.lm(model_with_pe_3): essentially perfect fit: summary may be
## unreliable

## 
## Call:
## lm(formula = gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + 
##     pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + 
##     gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq + 
##     pe_3, data = fertil3)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.294e-14 -3.614e-15  3.870e-16  3.960e-15  5.021e-14 
## 
## Coefficients: (6 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -5.849e-12  4.744e-12 -1.233e+00    0.224    
## pe          -6.052e-17  1.472e-16 -4.110e-01    0.683    
## year         3.028e-15  2.459e-15  1.231e+00    0.224    
## tsq         -7.141e-17  7.229e-17 -9.880e-01    0.328    
## pe_1         1.081e-16  1.618e-16  6.680e-01    0.507    
## pe_2         3.984e-18  1.762e-16  2.300e-02    0.982    
## pe_3        -2.328e-17  1.666e-16 -1.400e-01    0.889    
## pe_4        -5.775e-17  1.303e-16 -4.430e-01    0.660    
## pill        -1.025e-14  1.209e-14 -8.480e-01    0.401    
## ww2          1.107e-14  1.351e-14  8.190e-01    0.417    
## tcu          5.447e-19  6.195e-19  8.790e-01    0.384    
## cgfr         1.000e+00  5.569e-16  1.796e+15   <2e-16 ***
## cpe                 NA         NA         NA       NA    
## cpe_1               NA         NA         NA       NA    
## cpe_2               NA         NA         NA       NA    
## cpe_3               NA         NA         NA       NA    
## cpe_4        5.996e-17  1.320e-16  4.540e-01    0.652    
## gfr_1        1.000e+00  2.596e-16  3.852e+15   <2e-16 ***
## cgfr_1      -7.823e-16  5.445e-16 -1.437e+00    0.157    
## cgfr_2       5.662e-17  5.749e-16  9.900e-02    0.922    
## cgfr_3      -5.129e-16  5.162e-16 -9.940e-01    0.325    
## cgfr_4      -1.866e-16  5.092e-16 -3.660e-01    0.716    
## gfr_2               NA         NA         NA       NA    
## t                   NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.326e-14 on 49 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 7.853e+30 on 17 and 49 DF,  p-value: < 2.2e-16

(i) Estimate the model

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16

## Analysis of Variance Table
## 
## Model 1: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban + exper + tenure
##   Res.Df    RSS Df Sum of Sq F Pr(>F)
## 1    927 123.82                      
## 2    927 123.82  0         0

## 
## Call:
## lm(formula = log(wage) ~ educ * black + exper + tenure + married + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## black        0.094809   0.255399   0.371 0.710561    
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ:black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16

## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16

## <NA> 
##   NA

final - Khulan Bayarkhuu

2024-01-05

Chapter 7 (C1)

(i) Add variables mothcoll and fathcoll to the equation

Add mothcoll and fathcoll to the model

(ii) Test for joint significance of mothcoll and fathcoll

(iii) Add hsGPA to the model

Chapter 8 1

Chapter 8

(i) Estimate the model