install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
#Chapter 7 1
library(wooldridge)
data("sleep75")
data(sleep75)
model <- lm(sleep ~ totwrk + educ + age + agesq + male , data = sleep75)
summary(model)
##
## Call:
## lm(formula = sleep ~ totwrk + educ + age + agesq + male, data = sleep75)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2378.00 -243.29 6.74 259.24 1350.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3840.83197 235.10870 16.336 <2e-16 ***
## totwrk -0.16342 0.01813 -9.013 <2e-16 ***
## educ -11.71332 5.86689 -1.997 0.0463 *
## age -8.69668 11.20746 -0.776 0.4380
## agesq 0.12844 0.13390 0.959 0.3378
## male 87.75243 34.32616 2.556 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 417.7 on 700 degrees of freedom
## Multiple R-squared: 0.1228, Adjusted R-squared: 0.1165
## F-statistic: 19.59 on 5 and 700 DF, p-value: < 2.2e-16
# (i) All other factors being equal, is there evidence that men sleep more than women? How strong is the evidence?
#p value is equal to 87.75/34.33=2.56. That means this value is significant according to the table. We can interpret that men sleep 87.75 minutes more than women.
# (ii) Is there a statistically significant tradeoff between working and sleeping? What is the estimated tradeoff?
#t significant=0.163/0.018=9.05. it is significant statistically
#When working time increased by one minute, sleeping time will be decreased by 0.163 which is equal to 9.78 minutes.
# (iii) What other regression do you need to run to test the null hypothesis that, holding other factors fixed, age has no effect on sleeping?
#We just have to exclude age variable.
data(sleep75)
model <- lm(sleep ~ totwrk + educ + male , data = sleep75)
summary(model)
##
## Call:
## lm(formula = sleep ~ totwrk + educ + male, data = sleep75)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2380.27 -239.15 6.74 257.31 1370.63
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3747.51727 81.00609 46.262 < 2e-16 ***
## totwrk -0.16734 0.01794 -9.329 < 2e-16 ***
## educ -13.88479 5.65757 -2.454 0.01436 *
## male 90.96919 34.27441 2.654 0.00813 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 418 on 702 degrees of freedom
## Multiple R-squared: 0.1193, Adjusted R-squared: 0.1155
## F-statistic: 31.69 on 3 and 702 DF, p-value: < 2.2e-16
#Rsq hasn't quite changed so age has no effect on sleeping time.
library(wooldridge)
data("gpa2")
data(gpa2)
model <- lm(sat ~ hsize + hsizesq + female + black + I(female*black) , data = gpa2)
summary(model)
##
## Call:
## lm(formula = sat ~ hsize + hsizesq + female + black + I(female *
## black), data = gpa2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -570.45 -89.54 -5.24 85.41 479.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1028.0972 6.2902 163.445 < 2e-16 ***
## hsize 19.2971 3.8323 5.035 4.97e-07 ***
## hsizesq -2.1948 0.5272 -4.163 3.20e-05 ***
## female -45.0915 4.2911 -10.508 < 2e-16 ***
## black -169.8126 12.7131 -13.357 < 2e-16 ***
## I(female * black) 62.3064 18.1542 3.432 0.000605 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 133.4 on 4131 degrees of freedom
## Multiple R-squared: 0.08578, Adjusted R-squared: 0.08468
## F-statistic: 77.52 on 5 and 4131 DF, p-value: < 2.2e-16
# (i) Is there strong evidence that hsize2 should be included in the model? From this equation,what is the optimal high school size?
# From this equation we don’t have the information about SE to calculate the significance of the coeficientes so we can not be sure. However R square is very small so seems that the equation is not very good. To calculate the optimal size we need to take the first derivative 19.30hsize − 2.19hsize2 the result is 4.4
# (ii) Holding hsize fixed, what is the estimated difference in SAT score between nonblack females and non-black males?
# For that we just need to use white female (female =1, black = 0) − 45.09female +62.31female = 17.22. This is just a simple comparison between white male and female
# (iii) What is the estimated difference in SAT score between non-black males and black males?
# − 169.81black
# (iv) What is the estimated difference in SAT score between black females and non-black females?
# − 169.81black +62.31female · black = -107.5
data(gpa1)
names(gpa1)
## [1] "age" "soph" "junior" "senior" "senior5" "male"
## [7] "campus" "business" "engineer" "colGPA" "hsGPA" "ACT"
## [13] "job19" "job20" "drive" "bike" "walk" "voluntr"
## [19] "PC" "greek" "car" "siblings" "bgfriend" "clubs"
## [25] "skipped" "alcohol" "gradMI" "fathcoll" "mothcoll"
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
model <- lm(hsGPA ~ mothcoll + fathcoll, data = gpa1)
model_updated <- update(model, . ~ . + mothcoll + fathcoll)
# View the results
summary(model_updated)
##
## Call:
## lm(formula = hsGPA ~ mothcoll + fathcoll, data = gpa1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99342 -0.20982 0.00926 0.20926 0.60658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.393421 0.046719 72.635 <2e-16 ***
## mothcoll 0.019080 0.057555 0.332 0.741
## fathcoll -0.002679 0.058303 -0.046 0.963
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3221 on 138 degrees of freedom
## Multiple R-squared: 0.0008268, Adjusted R-squared: -0.01365
## F-statistic: 0.0571 on 2 and 138 DF, p-value: 0.9445
joint_test <- coeftest(model, vcov = vcovHC(model, type = "HC1"), terms = c("mothcoll", "fathcoll"))
# Report the p-value
joint_test
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3934207 0.0542083 62.5996 <2e-16 ***
## mothcoll 0.0190800 0.0571079 0.3341 0.7388
## fathcoll -0.0026795 0.0600842 -0.0446 0.9645
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_iii <- lm(PC ~ mothcoll + fathcoll + hsGPA, data = gpa1)
# Compare models to decide whether the generalization is needed
anova(model, model_iii)
## Warning in anova.lmlist(object, ...): models with response '"PC"' removed
## because response differs from model 1
## Analysis of Variance Table
##
## Response: hsGPA
## Df Sum Sq Mean Sq F value Pr(>F)
## mothcoll 1 0.0116 0.011629 0.1121 0.7383
## fathcoll 1 0.0002 0.000219 0.0021 0.9634
## Residuals 138 14.3175 0.103750
#Which of the following are consequences of heteroskedasticity?
# (i) The OLS estimators, b^ j, are inconsistent.
# (ii) The usual F statistic no longer has an F distribution.
# (iii) The OLS estimators are no longer BLUE.
# All three statements are consequences of heteroskedasticity. It is important to detect and address heteroskedasticity to obtain valid and efficient inference in regression analysis. Common remedies include using heteroskedasticity-robust standard errors or transforming the data to stabilize the variance.
#Chapter 8 #C13
library(wooldridge)
data("fertil2")
library(sandwich)
library(lmtest)
# Assuming 'children', 'age', 'age_squared', 'educ', 'electric', 'urban' are columns in the dataset
model <- lm(children ~ age + I(age^2) + educ + electric + urban, data = fertil2)
# Calculate robust standard errors
robust_se <- sqrt(diag(vcovHC(model)))
# Combine non-robust and robust standard errors
summary_with_robust_se <- cbind(coef(model), "Robust SE" = robust_se)
summary(model)
##
## Call:
## lm(formula = children ~ age + I(age^2) + educ + electric + urban,
## data = fertil2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9012 -0.7136 -0.0039 0.7119 7.4318
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.2225162 0.2401888 -17.580 < 2e-16 ***
## age 0.3409255 0.0165082 20.652 < 2e-16 ***
## I(age^2) -0.0027412 0.0002718 -10.086 < 2e-16 ***
## educ -0.0752323 0.0062966 -11.948 < 2e-16 ***
## electric -0.3100404 0.0690045 -4.493 7.20e-06 ***
## urban -0.2000339 0.0465062 -4.301 1.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.452 on 4352 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.5734, Adjusted R-squared: 0.5729
## F-statistic: 1170 on 5 and 4352 DF, p-value: < 2.2e-16
print(summary_with_robust_se)
## Robust SE
## (Intercept) -4.222516228 0.2443961935
## age 0.340925520 0.0192199445
## I(age^2) -0.002741209 0.0003513959
## educ -0.075232323 0.0063159137
## electric -0.310040409 0.0640737262
## urban -0.200033857 0.0455162364
# Assuming 'relig1', 'relig2', and 'relig3' are the religious dummy variables in the dataset
joint_test <- coeftest(model, vcov = vcovHC)
print(joint_test[, "Pr(>|t|)"])
## (Intercept) age I(age^2) educ electric urban
## 9.635281e-65 5.015759e-68 7.640643e-15 3.247461e-32 1.351408e-06 1.135260e-05
# Obtain fitted values and residuals
fitted_values <- fitted(model)
residuals <- resid(model)
# Regression of residuals on fitted values
hetero_test <- lm(residuals^2 ~ fitted_values)
summary(hetero_test)
##
## Call:
## lm(formula = residuals^2 ~ fitted_values)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.336 -1.897 -0.321 0.682 49.275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.54042 0.09451 -5.718 1.15e-08 ***
## fitted_values 1.16693 0.03347 34.863 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.717 on 4356 degrees of freedom
## Multiple R-squared: 0.2182, Adjusted R-squared: 0.218
## F-statistic: 1215 on 1 and 4356 DF, p-value: < 2.2e-16
#Question C4
library(wooldridge)
data(vote1)
str(vote1)
## 'data.frame': 173 obs. of 10 variables:
## $ state : chr "AL" "AK" "AZ" "AZ" ...
## $ district: int 7 1 2 3 3 4 2 3 5 6 ...
## $ democA : int 1 0 1 0 0 1 0 1 1 1 ...
## $ voteA : int 68 62 73 69 75 69 59 71 76 73 ...
## $ expendA : num 328.3 626.4 99.6 319.7 159.2 ...
## $ expendB : num 8.74 402.48 3.07 26.28 60.05 ...
## $ prtystrA: int 41 60 55 64 66 46 58 49 71 64 ...
## $ lexpendA: num 5.79 6.44 4.6 5.77 5.07 ...
## $ lexpendB: num 2.17 6 1.12 3.27 4.1 ...
## $ shareA : num 97.4 60.9 97 92.4 72.6 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
summary(vote1)
## state district democA voteA
## Length:173 Min. : 1.000 Min. :0.0000 Min. :16.0
## Class :character 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:36.0
## Mode :character Median : 6.000 Median :1.0000 Median :50.0
## Mean : 8.838 Mean :0.5549 Mean :50.5
## 3rd Qu.:11.000 3rd Qu.:1.0000 3rd Qu.:65.0
## Max. :42.000 Max. :1.0000 Max. :84.0
## expendA expendB prtystrA lexpendA
## Min. : 0.302 Min. : 0.93 Min. :22.00 Min. :-1.197
## 1st Qu.: 81.634 1st Qu.: 60.05 1st Qu.:44.00 1st Qu.: 4.402
## Median : 242.782 Median : 221.53 Median :50.00 Median : 5.492
## Mean : 310.611 Mean : 305.09 Mean :49.76 Mean : 5.026
## 3rd Qu.: 457.410 3rd Qu.: 450.72 3rd Qu.:56.00 3rd Qu.: 6.126
## Max. :1470.674 Max. :1548.19 Max. :71.00 Max. : 7.293
## lexpendB shareA
## Min. :-0.07257 Min. : 0.09464
## 1st Qu.: 4.09524 1st Qu.:18.86800
## Median : 5.40056 Median :50.84990
## Mean : 4.94437 Mean :51.07654
## 3rd Qu.: 6.11084 3rd Qu.:84.25510
## Max. : 7.34484 Max. :99.49500
model <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = vote1)
summary(model)
##
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB),
## data = vote1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.576 -4.864 -1.146 4.903 24.566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.66141 4.73604 7.952 2.56e-13 ***
## prtystrA 0.25192 0.07129 3.534 0.00053 ***
## democA 3.79294 1.40652 2.697 0.00772 **
## log(expendA) 5.77929 0.39182 14.750 < 2e-16 ***
## log(expendB) -6.23784 0.39746 -15.694 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared: 0.8012, Adjusted R-squared: 0.7964
## F-statistic: 169.2 on 4 and 168 DF, p-value: < 2.2e-16
data5 <- wooldridge::vote1
head(data5)
## state district democA voteA expendA expendB prtystrA lexpendA lexpendB
## 1 AL 7 1 68 328.296 8.737 41 5.793916 2.167567
## 2 AK 1 0 62 626.377 402.477 60 6.439952 5.997638
## 3 AZ 2 1 73 99.607 3.065 55 4.601233 1.120048
## 4 AZ 3 0 69 319.690 26.281 64 5.767352 3.268846
## 5 AR 3 0 75 159.221 60.054 66 5.070293 4.095244
## 6 AR 4 1 69 570.155 21.393 46 6.345908 3.063064
## shareA
## 1 97.40767
## 2 60.88104
## 3 97.01476
## 4 92.40370
## 5 72.61247
## 6 96.38355
model9 <- lm(voteA ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(model9)
##
## Call:
## lm(formula = voteA ~ prtystrA + democA + log(expendA) + log(expendB),
## data = data5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.576 -4.864 -1.146 4.903 24.566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.66141 4.73604 7.952 2.56e-13 ***
## prtystrA 0.25192 0.07129 3.534 0.00053 ***
## democA 3.79294 1.40652 2.697 0.00772 **
## log(expendA) 5.77929 0.39182 14.750 < 2e-16 ***
## log(expendB) -6.23784 0.39746 -15.694 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared: 0.8012, Adjusted R-squared: 0.7964
## F-statistic: 169.2 on 4 and 168 DF, p-value: < 2.2e-16
residuals <- residuals(model9)
residuals_model <- lm(residuals ~ prtystrA + democA + log(expendA) + log(expendB), data = data5)
summary(residuals_model)
##
## Call:
## lm(formula = residuals ~ prtystrA + democA + log(expendA) + log(expendB),
## data = data5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.576 -4.864 -1.146 4.903 24.566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.183e-14 4.736e+00 0 1
## prtystrA 1.493e-16 7.129e-02 0 1
## democA 1.843e-15 1.407e+00 0 1
## log(expendA) -3.811e-16 3.918e-01 0 1
## log(expendB) 1.119e-15 3.975e-01 0 1
##
## Residual standard error: 7.573 on 168 degrees of freedom
## Multiple R-squared: 5.525e-32, Adjusted R-squared: -0.02381
## F-statistic: 2.32e-30 on 4 and 168 DF, p-value: 1
bptest_result <- bptest(model9)
print(bptest_result)
##
## studentized Breusch-Pagan test
##
## data: model9
## BP = 9.0934, df = 4, p-value = 0.05881
white_data <- data.frame(residuals_squared = residuals^2, data5$prtystrA, data5$democA, log_expendA = log(data5$expendA), log_expendB = log(data5$expendB))
white_model <- lm(residuals_squared ~ data5$prtystrA + data5$democA + log_expendA + log_expendB, data = white_data)
f_statistic <- summary(white_model)$fstatistic
p_value <- pf(f_statistic[1], f_statistic[2], f_statistic[3], lower.tail = FALSE)
print(paste("F-statistic:", f_statistic[1], "P-value:", p_value))
## [1] "F-statistic: 2.33011268371627 P-value: 0.0580575140885532"
#Chapter9 C4.) # Load the ‘wooldridge’ package
library(wooldridge)
# Load the 'infmrt' dataset
data("infmrt")
# Filter the dataset for the year 1990
infmrt_1990 <- subset(infmrt, year == 1990)
# Re-estimate equation 9.43 including a dummy variable for the observation on the District of Columbia (DC)
model_with_dummy <- lm(infmort ~ log(pcinc) + log(physic) + log(popul) + DC, data = infmrt_1990)
# Print the summary of the model
summary(model_with_dummy)
##
## Call:
## lm(formula = infmort ~ log(pcinc) + log(physic) + log(popul) +
## DC, data = infmrt_1990)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4964 -0.8076 0.0000 0.9358 2.6077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.9548 12.4195 1.929 0.05994 .
## log(pcinc) -0.5669 1.6412 -0.345 0.73135
## log(physic) -2.7418 1.1908 -2.303 0.02588 *
## log(popul) 0.6292 0.1911 3.293 0.00191 **
## DC 16.0350 1.7692 9.064 8.43e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.246 on 46 degrees of freedom
## Multiple R-squared: 0.691, Adjusted R-squared: 0.6641
## F-statistic: 25.71 on 4 and 46 DF, p-value: 3.146e-11
#Chapter 10
library(wooldridge)
data(intdef)
# 1. Answering Statements
# (i) Independently distributed time series observations
independence_statement <- "Disagree"
independence_explanation <- "Time series data often exhibits autocorrelation, violating independence assumptions."
# (ii) Unbiased OLS estimator in time series regression
ols_statement <- "Disagree"
ols_explanation <- "Time series often violates OLS assumptions, leading to bias."
# (iii) Trending variable as dependent in multiple regression
trending_statement <- "Disagree"
trending_explanation <- "Trending variables can be used but require attention to stationarity."
# (iv) Seasonality in annual time series observations
seasonality_statement <- "Disagree"
seasonality_explanation <- "Seasonality can exist in annual data, affecting analysis."
# C1. Using a Dummy Variable for Federal Reserve Policy Change
# Create a dummy variable for the policy change after 1979
intdef$dummy <- ifelse(intdef$year > 1979, 1, 0)
# Equation with dummy variable
model_with_dummy <- lm(inf ~ dummy + ci3 + cdef + cinf, data = intdef)
summary(model_with_dummy)
##
## Call:
## lm(formula = inf ~ dummy + ci3 + cdef + cinf, data = intdef)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1867 -1.8047 -0.8382 0.9943 6.7831
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3937 0.5193 6.535 3.21e-08 ***
## dummy 0.9400 0.7977 1.178 0.24423
## ci3 0.4391 0.3172 1.385 0.17233
## cdef 0.4382 0.3370 1.300 0.19954
## cinf 0.5707 0.2103 2.714 0.00909 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.799 on 50 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.2012, Adjusted R-squared: 0.1373
## F-statistic: 3.148 on 4 and 50 DF, p-value: 0.02196
# Conclusion
cat("Conclusion regarding the model:\n")
## Conclusion regarding the model:
cat("From the regression results:\n")
## From the regression results:
cat("- The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).\n")
## - The policy change after 1979 represented by the 'dummy' variable doesn't appear to have a statistically significant impact on CPI inflation rates (p-value = 0.24423).
cat("- Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).\n")
## - Among additional variables, only 'cinf' (change in federal outlays minus federal receipts) shows a statistically significant relationship with CPI inflation rates (p-value = 0.00909).
cat("- The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).\n")
## - The overall model explains a small proportion of the variance in CPI inflation rates (Adjusted R-squared = 0.1373).
cat("Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.")
## Therefore, while 'cinf' seems to be related to CPI inflation rates, the policy change after 1979, as represented by the 'dummy' variable, does not show a significant impact in this model.
#Chapter 10 #C6
data(fertil3)
# Step (i): Regress gfr on t and tsq to obtain the residuals (gft)
model_t_tsq <- lm(gfr ~ t + tsq, data = fertil3)
residuals_gft <- resid(model_t_tsq)
# Step (ii): Regress gft on all variables from equation (10.35), including t and tsq
model_10_35 <- lm(residuals_gft ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq, data = fertil3)
summary(model_10_35)
## Warning in summary.lm(model_10_35): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = residuals_gft ~ pe + year + tsq + pe_1 + pe_2 +
## pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 +
## cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 +
## gfr_2 + t + tsq, data = fertil3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.495e-14 -2.374e-15 -2.200e-17 2.638e-15 3.812e-14
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.003e+01 4.452e-12 6.746e+12 <2e-16 ***
## pe -2.624e-16 1.381e-16 -1.900e+00 0.0634 .
## year -7.170e-02 2.307e-15 -3.107e+13 <2e-16 ***
## tsq 7.959e-03 6.783e-17 1.173e+14 <2e-16 ***
## pe_1 3.803e-16 1.519e-16 2.504e+00 0.0157 *
## pe_2 2.118e-17 1.653e-16 1.280e-01 0.8986
## pe_3 -2.758e-16 1.563e-16 -1.764e+00 0.0839 .
## pe_4 1.363e-17 1.222e-16 1.120e-01 0.9117
## pill 5.956e-15 1.134e-14 5.250e-01 0.6020
## ww2 -3.761e-15 1.268e-14 -2.970e-01 0.7679
## tcu 1.130e-18 5.813e-19 1.945e+00 0.0576 .
## cgfr 1.000e+00 5.225e-16 1.914e+15 <2e-16 ***
## cpe NA NA NA NA
## cpe_1 NA NA NA NA
## cpe_2 NA NA NA NA
## cpe_3 NA NA NA NA
## cpe_4 -2.998e-17 1.239e-16 -2.420e-01 0.8098
## gfr_1 1.000e+00 2.436e-16 4.105e+15 <2e-16 ***
## cgfr_1 -2.649e-16 5.109e-16 -5.190e-01 0.6064
## cgfr_2 4.872e-16 5.394e-16 9.030e-01 0.3708
## cgfr_3 -7.235e-16 4.843e-16 -1.494e+00 0.1416
## cgfr_4 8.808e-19 4.778e-16 2.000e-03 0.9985
## gfr_2 NA NA NA NA
## t NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.244e-14 on 49 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 6.667e+30 on 17 and 49 DF, p-value: < 2.2e-16
# Step (iii): Re-estimate equation (10.35) but add the 'pe_3' as an additional variable to check stat. significance.
model_with_pe_3 <- lm(gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 + pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 + gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq + pe_3, data = fertil3)
summary(model_with_pe_3)
## Warning in summary.lm(model_with_pe_3): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = gfr ~ pe + year + tsq + pe_1 + pe_2 + pe_3 + pe_4 +
## pill + ww2 + tcu + cgfr + cpe + cpe_1 + cpe_2 + cpe_3 + cpe_4 +
## gfr_1 + cgfr_1 + cgfr_2 + cgfr_3 + cgfr_4 + gfr_2 + t + tsq +
## pe_3, data = fertil3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.294e-14 -3.614e-15 3.870e-16 3.960e-15 5.021e-14
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.849e-12 4.744e-12 -1.233e+00 0.224
## pe -6.052e-17 1.472e-16 -4.110e-01 0.683
## year 3.028e-15 2.459e-15 1.231e+00 0.224
## tsq -7.141e-17 7.229e-17 -9.880e-01 0.328
## pe_1 1.081e-16 1.618e-16 6.680e-01 0.507
## pe_2 3.984e-18 1.762e-16 2.300e-02 0.982
## pe_3 -2.328e-17 1.666e-16 -1.400e-01 0.889
## pe_4 -5.775e-17 1.303e-16 -4.430e-01 0.660
## pill -1.025e-14 1.209e-14 -8.480e-01 0.401
## ww2 1.107e-14 1.351e-14 8.190e-01 0.417
## tcu 5.447e-19 6.195e-19 8.790e-01 0.384
## cgfr 1.000e+00 5.569e-16 1.796e+15 <2e-16 ***
## cpe NA NA NA NA
## cpe_1 NA NA NA NA
## cpe_2 NA NA NA NA
## cpe_3 NA NA NA NA
## cpe_4 5.996e-17 1.320e-16 4.540e-01 0.652
## gfr_1 1.000e+00 2.596e-16 3.852e+15 <2e-16 ***
## cgfr_1 -7.823e-16 5.445e-16 -1.437e+00 0.157
## cgfr_2 5.662e-17 5.749e-16 9.900e-02 0.922
## cgfr_3 -5.129e-16 5.162e-16 -9.940e-01 0.325
## cgfr_4 -1.866e-16 5.092e-16 -3.660e-01 0.716
## gfr_2 NA NA NA NA
## t NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.326e-14 on 49 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 7.853e+30 on 17 and 49 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98069 -0.21996 0.00707 0.24288 1.22822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.395497 0.113225 47.653 < 2e-16 ***
## educ 0.065431 0.006250 10.468 < 2e-16 ***
## exper 0.014043 0.003185 4.409 1.16e-05 ***
## tenure 0.011747 0.002453 4.789 1.95e-06 ***
## married 0.199417 0.039050 5.107 3.98e-07 ***
## black -0.188350 0.037667 -5.000 6.84e-07 ***
## south -0.090904 0.026249 -3.463 0.000558 ***
## urban 0.183912 0.026958 6.822 1.62e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared: 0.2526, Adjusted R-squared: 0.2469
## F-statistic: 44.75 on 7 and 927 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Model 1: log(wage) ~ educ + exper + tenure + married + black + south +
## urban
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south +
## urban + exper + tenure
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 927 123.82
## 2 927 123.82 0 0
##
## Call:
## lm(formula = log(wage) ~ educ * black + exper + tenure + married +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97782 -0.21832 0.00475 0.24136 1.23226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.374817 0.114703 46.859 < 2e-16 ***
## educ 0.067115 0.006428 10.442 < 2e-16 ***
## black 0.094809 0.255399 0.371 0.710561
## exper 0.013826 0.003191 4.333 1.63e-05 ***
## tenure 0.011787 0.002453 4.805 1.80e-06 ***
## married 0.198908 0.039047 5.094 4.25e-07 ***
## south -0.089450 0.026277 -3.404 0.000692 ***
## urban 0.183852 0.026955 6.821 1.63e-11 ***
## educ:black -0.022624 0.020183 -1.121 0.262603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared: 0.2536, Adjusted R-squared: 0.2471
## F-statistic: 39.32 on 8 and 926 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98069 -0.21996 0.00707 0.24288 1.22822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.395497 0.113225 47.653 < 2e-16 ***
## educ 0.065431 0.006250 10.468 < 2e-16 ***
## exper 0.014043 0.003185 4.409 1.16e-05 ***
## tenure 0.011747 0.002453 4.789 1.95e-06 ***
## married 0.199417 0.039050 5.107 3.98e-07 ***
## black -0.188350 0.037667 -5.000 6.84e-07 ***
## south -0.090904 0.026249 -3.463 0.000558 ***
## urban 0.183912 0.026958 6.822 1.62e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared: 0.2526, Adjusted R-squared: 0.2469
## F-statistic: 44.75 on 7 and 927 DF, p-value: < 2.2e-16
## <NA>
## NA