library(wooldridge) 
library(dplyr)       
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)     
data("wage2")
reg1 <- lm(IQ ~ educ, data = wage2)
summary(reg1)
## 
## Call:
## lm(formula = IQ ~ educ, data = wage2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.228  -7.262   0.907   8.772  37.373 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  53.6872     2.6229   20.47   <2e-16 ***
## educ          3.5338     0.1922   18.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.9 on 933 degrees of freedom
## Multiple R-squared:  0.2659, Adjusted R-squared:  0.2652 
## F-statistic:   338 on 1 and 933 DF,  p-value: < 2.2e-16
delta_1 <- coef(reg1)["educ"]
cat("Slope coefficient δ1 (IQ on educ):", delta_1, "\n\n")
## Slope coefficient δ1 (IQ on educ): 3.533829
reg2 <- lm(log(wage) ~ educ, data = wage2)
summary(reg2)
## 
## Call:
## lm(formula = log(wage) ~ educ, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94620 -0.24832  0.03507  0.27440  1.28106 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.973062   0.081374   73.40   <2e-16 ***
## educ        0.059839   0.005963   10.04   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4003 on 933 degrees of freedom
## Multiple R-squared:  0.09742,    Adjusted R-squared:  0.09645 
## F-statistic: 100.7 on 1 and 933 DF,  p-value: < 2.2e-16
beta_1_hat <- coef(reg2)["educ"]
cat("Slope coefficient β1 (log(wage) on educ):", beta_1_hat, "\n\n")
## Slope coefficient β1 (log(wage) on educ): 0.05983921
cat("------ (iii) Multiple Regression: log(wage) on educ and IQ ------\n")
## ------ (iii) Multiple Regression: log(wage) on educ and IQ ------
reg3 <- lm(log(wage) ~ educ + IQ, data = wage2)
summary(reg3)
## 
## Call:
## lm(formula = log(wage) ~ educ + IQ, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.01601 -0.24367  0.03359  0.27960  1.23783 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.6582876  0.0962408  58.793  < 2e-16 ***
## educ        0.0391199  0.0068382   5.721 1.43e-08 ***
## IQ          0.0058631  0.0009979   5.875 5.87e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3933 on 932 degrees of freedom
## Multiple R-squared:  0.1297, Adjusted R-squared:  0.1278 
## F-statistic: 69.42 on 2 and 932 DF,  p-value: < 2.2e-16
beta_1 <- coef(reg3)["educ"]
beta_2 <- coef(reg3)["IQ"]
cat("Slope coefficients from multiple regression:\n")
## Slope coefficients from multiple regression:
cat("β1 (log(wage) on educ):", beta_1, "\n")
## β1 (log(wage) on educ): 0.0391199
cat("β2 (log(wage) on IQ):", beta_2, "\n\n")
## β2 (log(wage) on IQ): 0.005863132
cat("------ (iv) Verification of Relationship ------\n")
## ------ (iv) Verification of Relationship ------
beta_1_hat_check <- beta_1 + beta_2 * delta_1

cat("Calculated β1_hat from multiple regression:", beta_1_hat_check, "\n")
## Calculated β1_hat from multiple regression: 0.05983921
cat("Original β1_hat from simple regression (ii):", beta_1_hat, "\n")
## Original β1_hat from simple regression (ii): 0.05983921
# Check if the calculated and original values of β1_hat are close
if (all.equal(beta_1_hat_check, beta_1_hat)) {
  cat("Verification successful: β1_hat from (ii) equals β1 + β2 * δ1 from (iii)\n")
} else {
  cat("Verification failed: β1_hat does NOT equal β1 + β2 * δ1\n")
}
## Verification successful: β1_hat from (ii) equals β1 + β2 * δ1 from (iii)