response variable
is.continuous(dependent_variable)
predictor variable
is.binary(predictor_variable)
library(tidyverse)
library(here)
library(AER)
data('CASchools')
df<-CASchools
df<-df%>%dplyr::mutate(STR=as.numeric(students)/as.numeric(teachers))
df<-df%>%mutate(D=STR<20)%>%mutate(score=(read+math)/2)
library(car)
car::powerTransform(df%>%select(c(score)))
## Estimated transformation parameter
## score
## -0.2071764
m.list<-list()
m.list[['lm.dummy']]<-lm(score~D, data=df)
m.list[['lm.d_transform']]<-lm(score^-.207~D, data=df)
summary(m.list[['lm.dummy']])
##
## Call:
## lm(formula = score ~ D, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.496 -14.029 -0.346 12.884 49.504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 650.077 1.393 466.666 < 2e-16 ***
## DTRUE 7.169 1.847 3.882 0.00012 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.74 on 418 degrees of freedom
## Multiple R-squared: 0.0348, Adjusted R-squared: 0.0325
## F-statistic: 15.07 on 1 and 418 DF, p-value: 0.0001202
lapply(X=m.list,
FUN = summary)
## $lm.dummy
##
## Call:
## lm(formula = score ~ D, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.496 -14.029 -0.346 12.884 49.504
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 650.077 1.393 466.666 < 2e-16 ***
## DTRUE 7.169 1.847 3.882 0.00012 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.74 on 418 degrees of freedom
## Multiple R-squared: 0.0348, Adjusted R-squared: 0.0325
## F-statistic: 15.07 on 1 and 418 DF, p-value: 0.0001202
##
##
## $lm.d_transform
##
## Call:
## lm(formula = score^-0.207 ~ D, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0039230 -0.0010778 0.0000003 0.0011418 0.0043277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2616719 0.0001152 2272.050 < 2e-16 ***
## DTRUE -0.0005898 0.0001527 -3.863 0.00013 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001549 on 418 degrees of freedom
## Multiple R-squared: 0.03447, Adjusted R-squared: 0.03216
## F-statistic: 14.92 on 1 and 418 DF, p-value: 0.0001298
none
Estimate is not a slope, but a difference in group averages (DTRUE vs DFALSE) ## Check Vailidity of Model
ggResidpanel::resid_panel(m.list[['lm.dummy']])
Since the predictor is binary, we are only looking at the QQ and the Density vs Residuals , and maybe the Cook’s distance for leverage.
confint(m.list[['lm.dummy']])
## 2.5 % 97.5 %
## (Intercept) 647.338594 652.81500
## DTRUE 3.539562 10.79931
Reject null hypothesis since 0 falls outside of the confidence interval