##Reading achievement data for 6,528 middle school students clustered in 122 schools.Outcome= readprof, is a binary variable representing whether or not a student scored “proficient” on a standardized reading exam (1 = proficient, 0 = not proficient).

suppressPackageStartupMessages(library(tidyverse))
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units

##Load in dataset

readprof <- haven::read_dta("readprof.dta")

glimpse(readprof)
## Rows: 6,528
## Columns: 8
## $ schcode  <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ lowses   <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ female   <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ minor    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ schcomp  <dbl> -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, …
## $ smallsch <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ id       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
## $ readprof <dbl> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …

#make new clean dataset

#well that didn’t work, take two while fixing the dbl lbl (not sure if I want factors for all of them, *review Garson)

readprof.clean <- readprof %>%
 mutate(.,
        lowses.fac = as_factor(lowses),
        female.fac = as_factor(female),
        minor.fac = as_factor(minor),
        smallsch.fac = as_factor(smallsch),
        readprof.fac = as_factor(readprof)
        )
glimpse(readprof.clean)
## Rows: 6,528
## Columns: 13
## $ schcode      <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1…
## $ lowses       <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ female       <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ minor        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ schcomp      <dbl> -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2.4, -2…
## $ smallsch     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ id           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ readprof     <dbl> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,…
## $ lowses.fac   <fct> non-low socioeconomic status, non-low socioeconomic stat…
## $ female.fac   <fct> male, male, male, male, male, male, male, male, male, ma…
## $ minor.fac    <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smallsch.fac <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ readprof.fac <fct> 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,…

#trying Hmisc package again…never seem to get it

#null model time, multilevel logistic regression models require glmer instead of lmer because of binary outcome variable.

model.null <- glmer(readprof.fac ~ (1|schcode), family = binomial, data = readprof.clean)
summary(model.null)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: readprof.fac ~ (1 | schcode)
##    Data: readprof.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##   8868.5   8882.1  -4432.3   8864.5     6526 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7663 -0.9815  0.6879  0.9145  1.3931 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  schcode (Intercept) 0.2283   0.4778  
## Number of obs: 6528, groups:  schcode, 122
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  0.13155    0.05198   2.531   0.0114 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##ICC for null model ##logit=0.13155 ##to calculate it is random effect/random effect+pi^2/3

icc.null <- 0.2283/ (0.2283 + (pi ^ 2/3))
icc.null
## [1] 0.06489173

##DV= “readprof” as the DV, “lowses”, “female”, and “minor” as level-1 IVs. Include all three IVs as fixed effects (use a random intercept only).

model.2 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + (1|schcode), family = binomial, data = readprof.clean)
summary(model.2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + (1 | schcode)
##    Data: readprof.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##   8696.8   8730.7  -4343.4   8686.8     6523 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9202 -0.9482  0.6194  0.8937  1.6114 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  schcode (Intercept) 0.1033   0.3214  
## Number of obs: 6528, groups:  schcode, 122
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         0.39009    0.05876   6.639 3.15e-11 ***
## lowses.faclow socioeconomic status -0.43685    0.05716  -7.642 2.14e-14 ***
## female.facfemale                    0.32357    0.05168   6.260 3.84e-10 ***
## minor.fac1                         -0.43906    0.05605  -7.833 4.77e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lws.ss fml.fc
## lwss.fclwss -0.359              
## femal.fcfml -0.415 -0.031       
## minor.fac1  -0.393 -0.171 -0.004

##now conducting odds ratio for model; lowes.fac

lowes.fac.odds.ratio <- exp(-0.43685)
lowes.fac.odds.ratio
## [1] 0.6460683

When the OR is less than one (.65 in this case), we subtract 1 - OR which equals .35. This means that for children in lower SES, the odds of being proficient in reading are 35% lower than for children who live higher SES.

##now conducting odds ratio for model; female

female.fac.odds.ratio <- exp(0.32357)
female.fac.odds.ratio
## [1] 1.382053

##now conducting odds ratio for model; minority

minor.fac.odds.ratio <- exp(-0.43906)
minor.fac.odds.ratio
## [1] 0.6446421

When the OR is less than one (.64 in this case), we subtract 1 - OR which equals .36. This means that for children who are identified as minority, the odds of being proficient in reading is 36% lower than for children who are not minority.

##Building from the previous section, test the impact of “smallsch” as a level-2 IVs, keeping all IVs as fixed effects (random intercept only). Interpret the results for the smallsch coefficient in terms of odds ratios.

model.3 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac + (1|schcode), family = binomial, data = readprof.clean)
summary(model.3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac +  
##     (1 | schcode)
##    Data: readprof.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##   8695.8   8736.5  -4341.9   8683.8     6522 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9364 -0.9556  0.6210  0.8947  1.6222 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  schcode (Intercept) 0.09668  0.3109  
## Number of obs: 6528, groups:  schcode, 122
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         0.32449    0.07001   4.635 3.57e-06 ***
## lowses.faclow socioeconomic status -0.43920    0.05713  -7.688 1.49e-14 ***
## female.facfemale                    0.32292    0.05167   6.250 4.12e-10 ***
## minor.fac1                         -0.43983    0.05598  -7.857 3.93e-15 ***
## smallsch.fac1                       0.13854    0.07941   1.745   0.0811 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lws.ss fml.fc mnr.f1
## lwss.fclwss -0.303                     
## femal.fcfml -0.346 -0.031              
## minor.fac1  -0.336 -0.172 -0.003       
## smllsch.fc1 -0.552 -0.002 -0.003  0.009

##now conducting odds ratio for model; smallsch.fac.fac1, 1=small school

smallsch.fac.odds.ratio <- exp(0.13854)
smallsch.fac.odds.ratio
## [1] 1.148596

##Next, add a second level-2 IV, schcomp to the model along with all variables from #4. Interpret the smallsch and schcomp coefficients.

model.4 <- glmer(readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac + schcomp + (1|schcode), family = binomial, data = readprof.clean)
summary(model.4)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: readprof.fac ~ lowses.fac + female.fac + minor.fac + smallsch.fac +  
##     schcomp + (1 | schcode)
##    Data: readprof.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##   8665.5   8713.0  -4325.8   8651.5     6521 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0503 -0.9646  0.6064  0.8824  1.6455 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  schcode (Intercept) 0.05658  0.2379  
## Number of obs: 6528, groups:  schcode, 122
## 
## Fixed effects:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         0.28348    0.06401   4.429 9.49e-06 ***
## lowses.faclow socioeconomic status -0.36153    0.05778  -6.257 3.92e-10 ***
## female.facfemale                    0.32237    0.05161   6.246 4.21e-10 ***
## minor.fac1                         -0.39549    0.05580  -7.087 1.37e-12 ***
## smallsch.fac1                       0.10372    0.07008   1.480    0.139    
## schcomp                            -0.20725    0.03511  -5.902 3.58e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) lws.ss fml.fc mnr.f1 smll.1
## lwss.fclwss -0.329                            
## femal.fcfml -0.377 -0.031                     
## minor.fac1  -0.366 -0.159 -0.004              
## smllsch.fc1 -0.500 -0.024 -0.005 -0.002       
## schcomp      0.154 -0.278 -0.011 -0.175  0.083

##now conducting odds ratio for model; smallsch.fac.fac1, 1=small school

smallsch.fac.odds.ratio <- exp(0.10372)
smallsch.fac.odds.ratio
## [1] 1.10929

##now conducting odds ratio for model; schcomp (scores of extra help)

schcomp.fac.odds.ratio <- exp(-0.20725)
schcomp.fac.odds.ratio
## [1] 0.8128164

##okay predicted probabilities associated with each estimate of A male student, who qualifies as low SES, who identifies as a member of a minority racial/ethnic group. logit=.390 + -.437 + -.437

logit.male <- .390 + -.437 + -.437
logit.male
## [1] -0.484
exp(logit.male)/(1+exp(logit.male))
## [1] 0.381308

So, for a male student who qualifies as low SES, and identifies as a member of a minority racial/ethnic group, the predicted probability of being proficient in reading is 38%.

##okay predicted probabilities associated with each estimate of a female student, who does not qualify as low SES, who does not identify as a member of a minority racial/ethnic group.

logit.female <- .390 + .323
logit.female
## [1] 0.713
exp(logit.female)/(1+exp(logit.female))
## [1] 0.6710637

So, for a female student who does not qualify as low SES, and does not identify as a member of a minority racial/ethnic group, the predicted probability of being proficient in reading is 67%.