library(wooldridge)
data("discrim")

# (i)
# Estimate the OLS regression
model1 <- lm(log(psoda) ~ prpblck + log(income) + prppov, data = discrim)

# View the summary of the model
summary(model1)
## 
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income) + prppov, data = discrim)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32218 -0.04648  0.00651  0.04272  0.35622 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.46333    0.29371  -4.982  9.4e-07 ***
## prpblck      0.07281    0.03068   2.373   0.0181 *  
## log(income)  0.13696    0.02676   5.119  4.8e-07 ***
## prppov       0.38036    0.13279   2.864   0.0044 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08137 on 397 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.08696,    Adjusted R-squared:  0.08006 
## F-statistic:  12.6 on 3 and 397 DF,  p-value: 6.917e-08
# To specifically extract the coefficient for prpblck and its statistical significance:
coef_prpblck <- summary(model1)$coefficients["prpblck", ]
p_value_prpblck <- coef_prpblck["Pr(>|t|)"]

# Check if prpblck is significant at 5% and 1% levels
significant_5 <- p_value_prpblck < 0.05
significant_1 <- p_value_prpblck < 0.01
significant_5
## Pr(>|t|) 
##     TRUE
significant_1
## Pr(>|t|) 
##    FALSE
# (ii)
# Compute the correlation between log(income) and prppov
correlation <- cor(log(discrim$income), discrim$prppov)

# Check the statistical significance of log(income) and prppov in the model
p_value_log_income <- summary(model1)$coefficients["log(income)", "Pr(>|t|)"]
p_value_prppov <- summary(model1)$coefficients["prppov", "Pr(>|t|)"]

# Report the p-values
p_value_log_income
## [1] 4.802041e-07
p_value_prppov
## [1] 0.00440036
# (iii)
# Estimate the new model with log(hseval)
model2 <- lm(log(psoda) ~ prpblck + log(income) + prppov + log(hseval), data = discrim)

# View the summary of the updated model
summary(model2)
## 
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income) + prppov + log(hseval), 
##     data = discrim)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30652 -0.04380  0.00701  0.04332  0.35272 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.84151    0.29243  -2.878 0.004224 ** 
## prpblck      0.09755    0.02926   3.334 0.000937 ***
## log(income) -0.05299    0.03753  -1.412 0.158706    
## prppov       0.05212    0.13450   0.388 0.698571    
## log(hseval)  0.12131    0.01768   6.860 2.67e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07702 on 396 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.1839, Adjusted R-squared:  0.1757 
## F-statistic: 22.31 on 4 and 396 DF,  p-value: < 2.2e-16
# Extract and interpret the coefficient for log(hseval)
coef_log_hseval <- summary(model2)$coefficients["log(hseval)", ]
p_value_log_hseval <- coef_log_hseval["Pr(>|t|)"]
coef_log_hseval
##     Estimate   Std. Error      t value     Pr(>|t|) 
## 1.213057e-01 1.768407e-02 6.859601e+00 2.668125e-11
p_value_log_hseval
##     Pr(>|t|) 
## 2.668125e-11
# (iv)
# Check the updated p-values for log(income) and prppov in the new model
p_value_log_income_model2 <- summary(model2)$coefficients["log(income)", "Pr(>|t|)"]
p_value_prppov_model2 <- summary(model2)$coefficients["prppov", "Pr(>|t|)"]

# Perform an F-test for joint significance of log(income) and prppov
library (car)
## Loading required package: carData
linearHypothesis(model2, c("log(income) = 0", "prppov = 0"))
## 
## Linear hypothesis test:
## log(income) = 0
## prppov = 0
## 
## Model 1: restricted model
## Model 2: log(psoda) ~ prpblck + log(income) + prppov + log(hseval)
## 
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1    398 2.3911                              
## 2    396 2.3493  2  0.041797 3.5227 0.03045 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00