library(wooldridge)
data("discrim")
# (i)
# Estimate the OLS regression
model1 <- lm(log(psoda) ~ prpblck + log(income) + prppov, data = discrim)
# View the summary of the model
summary(model1)
##
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income) + prppov, data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32218 -0.04648 0.00651 0.04272 0.35622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.46333 0.29371 -4.982 9.4e-07 ***
## prpblck 0.07281 0.03068 2.373 0.0181 *
## log(income) 0.13696 0.02676 5.119 4.8e-07 ***
## prppov 0.38036 0.13279 2.864 0.0044 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08137 on 397 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.08696, Adjusted R-squared: 0.08006
## F-statistic: 12.6 on 3 and 397 DF, p-value: 6.917e-08
# To specifically extract the coefficient for prpblck and its statistical significance:
coef_prpblck <- summary(model1)$coefficients["prpblck", ]
p_value_prpblck <- coef_prpblck["Pr(>|t|)"]
# Check if prpblck is significant at 5% and 1% levels
significant_5 <- p_value_prpblck < 0.05
significant_1 <- p_value_prpblck < 0.01
significant_5
## Pr(>|t|)
## TRUE
significant_1
## Pr(>|t|)
## FALSE
# (ii)
# Compute the correlation between log(income) and prppov
correlation <- cor(log(discrim$income), discrim$prppov)
# Check the statistical significance of log(income) and prppov in the model
p_value_log_income <- summary(model1)$coefficients["log(income)", "Pr(>|t|)"]
p_value_prppov <- summary(model1)$coefficients["prppov", "Pr(>|t|)"]
# Report the p-values
p_value_log_income
## [1] 4.802041e-07
p_value_prppov
## [1] 0.00440036
# (iii)
# Estimate the new model with log(hseval)
model2 <- lm(log(psoda) ~ prpblck + log(income) + prppov + log(hseval), data = discrim)
# View the summary of the updated model
summary(model2)
##
## Call:
## lm(formula = log(psoda) ~ prpblck + log(income) + prppov + log(hseval),
## data = discrim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30652 -0.04380 0.00701 0.04332 0.35272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.84151 0.29243 -2.878 0.004224 **
## prpblck 0.09755 0.02926 3.334 0.000937 ***
## log(income) -0.05299 0.03753 -1.412 0.158706
## prppov 0.05212 0.13450 0.388 0.698571
## log(hseval) 0.12131 0.01768 6.860 2.67e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07702 on 396 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.1839, Adjusted R-squared: 0.1757
## F-statistic: 22.31 on 4 and 396 DF, p-value: < 2.2e-16
# Extract and interpret the coefficient for log(hseval)
coef_log_hseval <- summary(model2)$coefficients["log(hseval)", ]
p_value_log_hseval <- coef_log_hseval["Pr(>|t|)"]
coef_log_hseval
## Estimate Std. Error t value Pr(>|t|)
## 1.213057e-01 1.768407e-02 6.859601e+00 2.668125e-11
p_value_log_hseval
## Pr(>|t|)
## 2.668125e-11
# (iv)
# Check the updated p-values for log(income) and prppov in the new model
p_value_log_income_model2 <- summary(model2)$coefficients["log(income)", "Pr(>|t|)"]
p_value_prppov_model2 <- summary(model2)$coefficients["prppov", "Pr(>|t|)"]
# Perform an F-test for joint significance of log(income) and prppov
library (car)
## Loading required package: carData
linearHypothesis(model2, c("log(income) = 0", "prppov = 0"))
##
## Linear hypothesis test:
## log(income) = 0
## prppov = 0
##
## Model 1: restricted model
## Model 2: log(psoda) ~ prpblck + log(income) + prppov + log(hseval)
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 398 2.3911
## 2 396 2.3493 2 0.041797 3.5227 0.03045 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00