## ID Gender Program Math Days
## 1 S1001 male Academic 63 4
## 2 S1002 male Academic 27 4
## 3 S1003 female Academic 20 2
## 4 S1004 female Academic 16 3
## 5 S1005 female Academic 2 3
## 6 S1006 female Academic 71 13
## 'data.frame': 314 obs. of 5 variables:
## $ ID : chr "S1001" "S1002" "S1003" "S1004" ...
## $ Gender : chr "male" "male" "female" "female" ...
## $ Program: chr "Academic" "Academic" "Academic" "Academic" ...
## $ Math : int 63 27 20 16 2 71 63 3 51 49 ...
## $ Days : int 4 4 2 3 3 13 11 7 10 9 ...
library(ggplot2)
ggplot(dtahw2,
aes(Days)) +
geom_histogram(binwidth = 1, fill=8, col=1)+
facet_grid(Gender ~ Program) +
labs(x="Days absent",
y="Frequency") +
theme_minimal()+
theme(legend.position = c(.95, .9))
Gender | Program | Days | Math |
---|---|---|---|
female | Academic | 7.759036 | 43.69880 |
male | Academic | 6.119048 | 41.50000 |
female | General | 12.363636 | 44.40909 |
male | General | 8.555556 | 45.33333 |
female | Vocational | 2.709091 | 58.00000 |
male | Vocational | 2.634615 | 58.84615 |
Gender | Program | Days | Math |
---|---|---|---|
female | Academic | 68.55098 | 586.7252 |
male | Academic | 41.81698 | 689.8193 |
female | General | 84.33766 | 804.5390 |
male | General | 41.67320 | 923.5294 |
female | Vocational | 10.28418 | 512.4815 |
male | Vocational | 18.07956 | 364.9170 |
ggplot(dtahw2,
aes(Math, Days,
color=Gender)) +
stat_smooth(method="glm",
formula=y ~ x,
method.args=list(family=poisson),
size=rel(.5)) +
geom_point(pch=20, alpha=.5) +
facet_grid(. ~ Program) +
labs(y="Days absent",
x="Math score") +
theme_minimal()+
theme(legend.position = c(.95, .9))
sjPlot::tab_model(m0 <- glm(Days ~ Program*Gender*Math, data=dtahw2,
family=poisson(link=log)), show.se=T, show.r2=F, show.obs=F)
Days | ||||
---|---|---|---|---|
Predictors | Incidence Rate Ratios | std. Error | CI | p |
(Intercept) | 10.43 | 0.08 | 8.94 – 12.14 | <0.001 |
Program [General] | 1.47 | 0.13 | 1.13 – 1.91 | 0.004 |
Program [Vocational] | 0.23 | 0.25 | 0.14 – 0.36 | <0.001 |
Gender [male] | 0.85 | 0.11 | 0.69 – 1.06 | 0.146 |
Math | 0.99 | 0.00 | 0.99 – 1.00 | <0.001 |
Program [General] * Gender [male] |
0.97 | 0.20 | 0.65 – 1.43 | 0.863 |
Program [Vocational] * Gender [male] |
1.59 | 0.37 | 0.76 – 3.27 | 0.214 |
Program [General] * Math | 1.00 | 0.00 | 1.00 – 1.01 | 0.479 |
Program [Vocational] * Math |
1.01 | 0.00 | 1.00 – 1.02 | 0.019 |
Gender [male] * Math | 1.00 | 0.00 | 0.99 – 1.00 | 0.274 |
(Program [General] Gender [male]) Math |
1.00 | 0.00 | 0.99 – 1.01 | 0.692 |
(Program [Vocational] Gender [male]) Math |
1.00 | 0.01 | 0.98 – 1.01 | 0.634 |
## [1] 0
## # Overdispersion test
##
## dispersion ratio = 6.492
## Pearson's Chi-Squared = 1960.446
## p-value = < 0.001
## Overdispersion detected.
#
plot(log(fitted(m0)), log((dtahw2$Days-fitted(m0))^2),
xlab=expression(hat(mu)),
ylab=expression((y-hat(mu))^2))
grid()
abline(0, 1, col=2)
Estimate | Std. Error | z value | Pr(>|z|) | |
---|---|---|---|---|
(Intercept) | 2.3450136 | 0.1989867 | 11.7847779 | 0.0000000 |
ProgramGeneral | 0.3873043 | 0.3411743 | 1.1352096 | 0.2562875 |
ProgramVocational | -1.4878389 | 0.6257345 | -2.3777477 | 0.0174187 |
Gendermale | -0.1588403 | 0.2785265 | -0.5702878 | 0.5684825 |
Math | -0.0071045 | 0.0043252 | -1.6425893 | 0.1004679 |
ProgramGeneral:Gendermale | -0.0347250 | 0.5144980 | -0.0674929 | 0.9461893 |
ProgramVocational:Gendermale | 0.4617659 | 0.9469746 | 0.4876222 | 0.6258175 |
ProgramGeneral:Math | 0.0019809 | 0.0071360 | 0.2775861 | 0.7813301 |
ProgramVocational:Math | 0.0094842 | 0.0103312 | 0.9180121 | 0.3586125 |
Gendermale:Math | -0.0026914 | 0.0062654 | -0.4295662 | 0.6675112 |
ProgramGeneral:Gendermale:Math | -0.0017088 | 0.0110050 | -0.1552787 | 0.8766016 |
ProgramVocational:Gendermale:Math | -0.0029735 | 0.0159262 | -0.1867067 | 0.8518906 |
## Warning in drop1.glm(m0, test = "F"): F test assumes 'quasipoisson' family
## Single term deletions
##
## Model:
## Days ~ Program * Gender * Math
## Df Deviance AIC F value Pr(>F)
## <none> 1730.4 2637.7
## Program:Gender:Math 2 1730.7 2634.0 0.0274 0.973
sjPlot::tab_model(m1 <- update(m0, . ~ . - Gender:Math:Program, family=quasipoisson),
show.se=T, show.r2=F, show.obs=F)
Days | ||||
---|---|---|---|---|
Predictors | Incidence Rate Ratios | std. Error | CI | p |
(Intercept) | 10.27 | 0.18 | 7.08 – 14.59 | <0.001 |
Program [General] | 1.52 | 0.28 | 0.86 – 2.63 | 0.144 |
Program [Vocational] | 0.24 | 0.50 | 0.09 – 0.61 | 0.005 |
Gender [male] | 0.88 | 0.24 | 0.55 – 1.40 | 0.586 |
Math | 0.99 | 0.00 | 0.99 – 1.00 | 0.084 |
Program [General] * Gender [male] |
0.91 | 0.30 | 0.50 – 1.61 | 0.738 |
Program [Vocational] * Gender [male] |
1.36 | 0.35 | 0.68 – 2.71 | 0.386 |
Program [General] * Math | 1.00 | 0.01 | 0.99 – 1.01 | 0.819 |
Program [Vocational] * Math |
1.01 | 0.01 | 0.99 – 1.02 | 0.296 |
Gender [male] * Math | 1.00 | 0.00 | 0.99 – 1.01 | 0.469 |
## Single term deletions
##
## Model:
## Days ~ Program + Gender + Math + Program:Gender + Program:Math +
## Gender:Math
## Df Deviance F value Pr(>F)
## <none> 1730.7
## Program:Gender 2 1737.5 0.5943 0.5526
## Program:Math 2 1737.9 0.6333 0.5315
## Gender:Math 1 1734.1 0.5952 0.4410
sjPlot::tab_model(m2 <- update(m1, . ~ . - Math:Program, family=quasipoisson), show.se=T, show.r2=F, show.obs=F)
Days | ||||
---|---|---|---|---|
Predictors | Incidence Rate Ratios | std. Error | CI | p |
(Intercept) | 9.71 | 0.16 | 6.99 – 13.30 | <0.001 |
Program [General] | 1.60 | 0.18 | 1.10 – 2.27 | 0.012 |
Program [Vocational] | 0.38 | 0.24 | 0.23 – 0.59 | <0.001 |
Gender [male] | 0.89 | 0.24 | 0.56 – 1.41 | 0.628 |
Math | 0.99 | 0.00 | 0.99 – 1.00 | 0.097 |
Program [General] * Gender [male] |
0.90 | 0.30 | 0.50 – 1.60 | 0.722 |
Program [Vocational] * Gender [male] |
1.35 | 0.35 | 0.68 – 2.67 | 0.386 |
Gender [male] * Math | 1.00 | 0.00 | 0.99 – 1.01 | 0.440 |
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta_m2 <- broom::augment(m2)
dta_m2 <- dta_m2 %>% mutate(yhat=predict(m2, type="response"),
residSqr=residuals(m2, type="response")^2)
ggplot(data=dta_m2,
aes(x=yhat,
y=residSqr)) +
geom_point() +
geom_abline(intercept=0, slope=1) +
geom_abline(intercept=0,
slope=summary(m2)$dispersion,
color="green") +
stat_smooth(method="loess",
formula=y ~ x,
se=FALSE) +
labs(x="Fitted values",
y="Squared residuals") +
theme_minimal()
# Outliers