##Reading achievement data for 6,528 middle school students clustered in 122 schools.Outcome= readprof, is a binary variable representing whether or not a student scored “proficient” on a standardized reading exam (1 = proficient, 0 = not proficient).
suppressPackageStartupMessages(library(tidyverse))
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
##Load in dataset
readprof <- haven::read_dta("readprof.dta")
glimpse(readprof)
## Rows: 6,528
## Columns: 8
## $ schcode <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ lowses <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ female <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ minor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ schcomp <dbl> -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, …
## $ smallsch <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ readprof <dbl> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
#make new clean dataset
#well that didn’t work, take two while fixing the dbl lbl (not sure if I want factors for all of them, *review Garson)
readprof.clean <- readprof %>%
mutate(.,
lowses.fac = as_factor(lowses),
female.fac = as_factor(female),
minor.fac = as_factor(minor),
smallsch.fac = as_factor(smallsch),
readprof.fac = as_factor(readprof)
)
glimpse(readprof.clean)
## Rows: 6,528
## Columns: 13
## $ schcode <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1…
## $ lowses <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ female <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ minor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ schcomp <dbl> -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2…
## $ smallsch <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ readprof <dbl> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,…
## $ lowses.fac <fct> non-low socioeconomic status, non-low socioeconomic stat…
## $ female.fac <fct> male, male, male, male, male, male, male, male, male, ma…
## $ minor.fac <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smallsch.fac <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ readprof.fac <fct> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,…
#trying Hmisc package again…never seem to get it
#null model time, multilevel logistic regression models require glmer instead of lmer because of binary outcome variable.
model.null <- glmer(readprof.fac ~ (1|schcode), family = binomial, data = readprof.clean)
summary(model.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: readprof.fac ~ (1 | schcode)
## Data: readprof.clean
##
## AIC BIC logLik deviance df.resid
## 8868.5 8882.1 -4432.3 8864.5 6526
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7663 -0.9815 0.6879 0.9145 1.3931
##
## Random effects:
## Groups Name Variance Std.Dev.
## schcode (Intercept) 0.2283 0.4778
## Number of obs: 6528, groups: schcode, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.13155 0.05198 2.531 0.0114 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##ICC for null model ##logit=0.13155 ##to calculate it is random effect/random effect+pi^2/3
icc.null <- 0.2283/ (0.2283 + (pi ^ 2/3))
icc.null
## [1] 0.06489173
##DV= “readprof” as the DV, “lowses”, “female”, and “minor” as level-1 IVs. Include all three IVs as fixed effects (use a random intercept only).
model.2 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + (1|schcode), family = binomial, data = readprof.clean)
summary(model.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + (1 | schcode)
## Data: readprof.clean
##
## AIC BIC logLik deviance df.resid
## 8696.8 8730.7 -4343.4 8686.8 6523
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9202 -0.9482 0.6194 0.8937 1.6114
##
## Random effects:
## Groups Name Variance Std.Dev.
## schcode (Intercept) 0.1033 0.3214
## Number of obs: 6528, groups: schcode, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.39009 0.05876 6.639 3.15e-11 ***
## lowses.faclow socioeconomic status -0.43685 0.05716 -7.642 2.14e-14 ***
## female.facfemale 0.32357 0.05168 6.260 3.84e-10 ***
## minor.fac1 -0.43906 0.05605 -7.833 4.77e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) lws.ss fml.fc
## lwss.fclwss -0.359
## femal.fcfml -0.415 -0.031
## minor.fac1 -0.393 -0.171 -0.004
##now conducting odds ratio for model; lowes.fac
lowes.fac.odds.ratio <- exp(-0.43685)
lowes.fac.odds.ratio
## [1] 0.6460683
When the OR is less than one (.65 in this case), we subtract 1 - OR which equals .35. This means that for children in lower SES, the odds of being proficient in reading are 35% lower than for children who live higher SES.
##now conducting odds ratio for model; female
female.fac.odds.ratio <- exp(0.32357)
female.fac.odds.ratio
## [1] 1.382053
##now conducting odds ratio for model; minority
minor.fac.odds.ratio <- exp(-0.43906)
minor.fac.odds.ratio
## [1] 0.6446421
When the OR is less than one (.64 in this case), we subtract 1 - OR which equals .36. This means that for children who are identified as minority, the odds of being proficient in reading is 36% lower than for children who are not minority.
##Building from the previous section, test the impact of “smallsch” as a level-2 IVs, keeping all IVs as fixed effects (random intercept only). Interpret the results for the smallsch coefficient in terms of odds ratios.
model.3 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac + (1|schcode), family = binomial, data = readprof.clean)
summary(model.3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac +
## (1 | schcode)
## Data: readprof.clean
##
## AIC BIC logLik deviance df.resid
## 8695.8 8736.5 -4341.9 8683.8 6522
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9364 -0.9556 0.6210 0.8947 1.6222
##
## Random effects:
## Groups Name Variance Std.Dev.
## schcode (Intercept) 0.09668 0.3109
## Number of obs: 6528, groups: schcode, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.32449 0.07001 4.635 3.57e-06 ***
## lowses.faclow socioeconomic status -0.43920 0.05713 -7.688 1.49e-14 ***
## female.facfemale 0.32292 0.05167 6.250 4.12e-10 ***
## minor.fac1 -0.43983 0.05598 -7.857 3.93e-15 ***
## smallsch.fac1 0.13854 0.07941 1.745 0.0811 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) lws.ss fml.fc mnr.f1
## lwss.fclwss -0.303
## femal.fcfml -0.346 -0.031
## minor.fac1 -0.336 -0.172 -0.003
## smllsch.fc1 -0.552 -0.002 -0.003 0.009
##now conducting odds ratio for model; smallsch.fac.fac1, 1=small school
smallsch.fac.odds.ratio <- exp(0.13854)
smallsch.fac.odds.ratio
## [1] 1.148596
##Next, add a second level-2 IV, schcomp to the model along with all variables from #4. Interpret the smallsch and schcomp coefficients.
model.4 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac + schcomp + (1|schcode), family = binomial, data = readprof.clean)
summary(model.4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac +
## schcomp + (1 | schcode)
## Data: readprof.clean
##
## AIC BIC logLik deviance df.resid
## 8665.5 8713.0 -4325.8 8651.5 6521
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0503 -0.9646 0.6064 0.8824 1.6455
##
## Random effects:
## Groups Name Variance Std.Dev.
## schcode (Intercept) 0.05658 0.2379
## Number of obs: 6528, groups: schcode, 122
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.28348 0.06401 4.429 9.49e-06 ***
## lowses.faclow socioeconomic status -0.36153 0.05778 -6.257 3.92e-10 ***
## female.facfemale 0.32237 0.05161 6.246 4.21e-10 ***
## minor.fac1 -0.39549 0.05580 -7.087 1.37e-12 ***
## smallsch.fac1 0.10372 0.07008 1.480 0.139
## schcomp -0.20725 0.03511 -5.902 3.58e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) lws.ss fml.fc mnr.f1 smll.1
## lwss.fclwss -0.329
## femal.fcfml -0.377 -0.031
## minor.fac1 -0.366 -0.159 -0.004
## smllsch.fc1 -0.500 -0.024 -0.005 -0.002
## schcomp 0.154 -0.278 -0.011 -0.175 0.083
##now conducting odds ratio for model; smallsch.fac.fac1, 1=small school
smallsch.fac.odds.ratio <- exp(0.10372)
smallsch.fac.odds.ratio
## [1] 1.10929
##now conducting odds ratio for model; schcomp (scores of extra help)
schcomp.fac.odds.ratio <- exp(-0.20725)
schcomp.fac.odds.ratio
## [1] 0.8128164
##okay predicted probabilities associated with each estimate of A male student, who qualifies as low SES, who identifies as a member of a minority racial/ethnic group. logit=.390 + -.437 + -.437
logit.male <- .390 + -.437 + -.437
logit.male
## [1] -0.484
exp(logit.male)/(1+exp(logit.male))
## [1] 0.381308
So, for a male student who qualifies as low SES, and identifies as a member of a minority racial/ethnic group, the predicted probability of being proficient in reading is 38%.
##okay predicted probabilities associated with each estimate of a female student, who does not qualify as low SES, who does not identify as a member of a minority racial/ethnic group.
logit.female <- .390 + .323
logit.female
## [1] 0.713
exp(logit.female)/(1+exp(logit.female))
## [1] 0.6710637
So, for a female student who does not qualify as low SES, and does not identify as a member of a minority racial/ethnic group, the predicted probability of being proficient in reading is 67%.