Data Prep:
library("AER")
data("CASchools")
Interpret the effect of class-size on test scores (Test Scores = beta_1 + beta_2 Class-Size)
teacher_stu_ratio <- CASchools$students/CASchools$teachers
lm_math <- lm(CASchools$math ~ teacher_stu_ratio)
plot(CASchools$math ~ teacher_stu_ratio)
regLine(lm_math)
lm_math$coefficients
(Intercept) teacher_stu_ratio
691.417362 -1.938591
summary(lm_math)
Call:
lm(formula = CASchools$math ~ teacher_stu_ratio)
Residuals:
Min 1Q Median 3Q Max
-44.615 -13.374 -0.828 12.728 52.711
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 691.4174 9.3825 73.692 < 2e-16 ***
teacher_stu_ratio -1.9386 0.4755 -4.077 5.47e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 18.41 on 418 degrees of freedom
Multiple R-squared: 0.03824, Adjusted R-squared: 0.03594
F-statistic: 16.62 on 1 and 418 DF, p-value: 5.467e-05
Test whether the effect of class-size on test scores is statistically different from zero (H0: \(beta_1 = 0\); HA: \(beta_1 \neq 0\)) ### According to the summary function, the std. error is .475 for the intercept. the t-value is -4.07. And 4.07 is bigger than 1.96!
beta1 = lm_math$coefficients[2]
beta1
teacher_stu_ratio
-1.938591
SE_beta1 <- 0.4755
t_beta1<- (beta1-0) / (SE_beta1)
t_beta1
teacher_stu_ratio
-4.076952
abs(t_beta1) > 1.96
teacher_stu_ratio
TRUE
Test whether the effect of class-size on test scores is statistically different from one (\(H0: beta_1 = 1\); \(HA: beta_1 \neq 1\))
t_beta1<- (beta1-1) / (SE_beta1)
t_beta1
teacher_stu_ratio
-6.180001
abs(t_beta1) > 1.96
teacher_stu_ratio
TRUE
Construct the dummy variable (D_i, see p. 195 in Stock and Watson) and interpret the coefficient on the dummy variable (Test Scores = \(beta_1\) + \(beta_2 \cdot D_i\)). Generate \(D_i\) (= 1, if class-size < median), a small class-size dummy and compare it to the difference of means test as in chapter 3 (difference in test scores for class-sizes above and below median)
med <- median(teacher_stu_ratio)
med
[1] 19.72321
#lm_math <- lm(CASchools$math ~ teacher_stu_ratio) so:
D_i <- ifelse(teacher_stu_ratio < med , 1,0) #puts 1 when the ratio is less than median.
D_i #dummy
[1] 1 0 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0
[45] 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 1 1 1 1 1 0 1 0 1 1 1 0 0
[89] 0 0 1 0 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 1 0
[133] 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 1 0 1 1 1
[177] 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 1 1 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 0
[221] 0 1 1 0 1 0 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0
[265] 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 1 1 0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 1
[309] 1 0 0 0 0 0 1 1 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1
[353] 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 1 0 1 0
[397] 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1
teacher_stu_ratio <- CASchools$students/CASchools$teachers
lm_math <- lm(CASchools$math ~ teacher_stu_ratio)
beta_0 <- lm_math$coefficients[1]
math_scores <- beta_0 + D_i*beta1*teacher_stu_ratio
Subsetting:
low_ratio <-subset(CASchools,CASchools$students/CASchools$teachers < med)
low_ratio
high_ratio <- subset(CASchools,CASchools$students/CASchools$teachers >= med)
high_ratio
teacher_stu_ratio <- low_ratio$students/low_ratio$teachers
lm_math <- lm(low_ratio$math ~ teacher_stu_ratio)
lm_math
Call:
lm(formula = low_ratio$math ~ teacher_stu_ratio)
Coefficients:
(Intercept) teacher_stu_ratio
688.44 -1.74
beta_0 <- lm_math$coefficients[1]
beta_0
(Intercept)
688.4394
beta1 = lm_math$coefficients[2]
beta1
teacher_stu_ratio
-1.739883
math_scores_low<- beta_0 + beta1*teacher_stu_ratio
mean(math_scores_low)
[1] 656.7967
teacher_stu_ratio <- high_ratio$students/high_ratio$teachers
lm_math <- lm(high_ratio$math ~ teacher_stu_ratio)
lm_math
Call:
lm(formula = high_ratio$math ~ teacher_stu_ratio)
Coefficients:
(Intercept) teacher_stu_ratio
667.2536 -0.8232
beta_0 <- lm_math$coefficients[1]
beta_0
(Intercept)
667.2536
beta1 = lm_math$coefficients[2]
beta1
teacher_stu_ratio
-0.8232153
math_scores_high <- beta_0 + beta1*teacher_stu_ratio
mean(math_scores_high)
[1] 649.8886
diff_means<-mean(math_scores_low)-mean(math_scores_high)
diff_means
[1] 6.908093
Calculate the t statistic and compare it with the critical values from the t table in the link here ().
#SE for beta_1:
summary(lm_math)
Call:
lm(formula = high_ratio$math ~ teacher_stu_ratio)
Residuals:
Min 1Q Median 3Q Max
-44.232 -12.171 0.339 12.361 41.017
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 667.2536 21.1377 31.567 <2e-16 ***
teacher_stu_ratio -0.8232 1.0005 -0.823 0.412
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 17.06 on 208 degrees of freedom
Multiple R-squared: 0.003244, Adjusted R-squared: -0.001548
F-statistic: 0.677 on 1 and 208 DF, p-value: 0.4116
SE_beta1 = 1
t <- diff_means/SE_beta1
t
[1] 6.908093