#load packages
library(tidyverse)
library(psych)
library(lme4)
library(lmerTest)
#load data
firstlast <- read_csv("isbell_et_al_2019_firstlast.csv")
For this data, test takers whose final test score in a longitudinal dataset was lower than their first score were coded as “1”. Those whose final score was the same or higher than their first test were coded as “0”.
First we can run some univariate regressions. Of these, only starting proficiency seems to have an effect on the odds of decreasing in proficiency.
The issue seemed to be a bit more complex, so we built some multivariate models, too.
#multivariate
## intercept-only as a point of comparison
mod0 <- glm(Decrease ~ 1, family = "binomial", data = firstlast)
## all possible predictors
fullmod <- glm(Decrease ~ is.minor + is.major + all.course + preMSU + is.heritage
+ motiv + SAdur + start.prof + Time + CHS + FRN + RUS + SPN, family = "binomial", data = firstlast)
## after removing variables that had little effect, we arrived at a final model
finalmod <- glm(Decrease ~ all.course
+ motiv + start.prof, family = "binomial", data = firstlast)
Some ways of looking at/accessing data from the model.
#summary
summary(finalmod)
##
## Call:
## glm(formula = Decrease ~ all.course + motiv + start.prof, family = "binomial",
## data = firstlast)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4942 -0.5820 -0.4322 -0.3128 2.4740
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.84787 0.48715 -5.846 5.04e-09 ***
## all.course -0.10139 0.03640 -2.785 0.00535 **
## motiv -0.22241 0.09081 -2.449 0.01432 *
## start.prof 0.63563 0.08285 7.672 1.69e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 684.34 on 813 degrees of freedom
## Residual deviance: 617.62 on 810 degrees of freedom
## AIC: 625.62
##
## Number of Fisher Scoring iterations: 5
#get the odds of the coefficients
exp(finalmod$coefficients)
## (Intercept) all.course motiv start.prof
## 0.0579676 0.9035767 0.8005898 1.8882155
#get 95% CIs for the odds
exp(confint(finalmod))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.02143373 0.1454310
## all.course 0.83969902 0.9687380
## motiv 0.67183222 0.9602795
## start.prof 1.61059207 2.2301380
#do some graphical model inspection - note that these are hard to interpret for
#logistic regression
plot(finalmod)
Logistic regression predicts the probability/odds of a discrete outcome. In other words, it attempts to predict classfications. Examining classification accuracy is useful - it can tell you about how well your model seems to account for the data. This is similar to the idea behind R^2, but suits the nature of the data a bit better.
#check predicted probabilities
##get the model predicted probabilities of Decrease = 1 for each person
firstlast$pred.prob <- fitted(finalmod) #fitted vals from the regression model are probabilities
#using a .5 split
firstlast$pred.class <- cut(firstlast$pred.prob, breaks = c(-Inf, .5, Inf), labels= c(0, 1))
table(firstlast$Decrease, firstlast$pred.class)
##
## 0 1
## 0 683 10
## 1 115 6
Overall, this model didn’t have great predictive power. Only 6 out of 121 decreasers were accurately classified. So the variables were identified in the final model are important, potentially, but there’s also likely a lot of other important stuff going on that leads to score decreases.
Now we’re going to look at a new dataset - this is from Chapter 3 of the McNamara et al. textbook.
We’ll calculate total scores and do some basic CTT analysis with the alpha() function from the psych package.
d <- read_csv("reading test.csv") %>%
mutate(`Candidate ID` = as.character(`Candidate ID`),
total = rowSums(.[2:13]))
##
## -- Column specification --------------------------------------------------------
## cols(
## `Candidate ID` = col_double(),
## `Item 1` = col_double(),
## `Item 2` = col_double(),
## `Item 3` = col_double(),
## `Item 4` = col_double(),
## `Item 5` = col_double(),
## `Item 6` = col_double(),
## `Item 7` = col_double(),
## `Item 8` = col_double(),
## `Item 9` = col_double(),
## `Item 10` = col_double(),
## `Item 11` = col_double(),
## `Item 12` = col_double()
## )
d %>% select(2:13) %>% alpha()
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.78 0.79 0.88 0.24 3.7 0.071 0.52 0.25 0.23
##
## lower alpha upper 95% confidence boundaries
## 0.64 0.78 0.92
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Item 1 0.79 0.79 0.87 0.25 3.7 0.070 0.029 0.25
## Item 2 0.79 0.79 0.87 0.26 3.8 0.068 0.029 0.25
## Item 3 0.76 0.77 0.84 0.23 3.3 0.078 0.031 0.21
## Item 4 0.76 0.77 0.87 0.23 3.3 0.077 0.033 0.21
## Item 5 0.77 0.78 0.87 0.24 3.5 0.074 0.035 0.23
## Item 6 0.74 0.75 0.85 0.22 3.0 0.085 0.032 0.21
## Item 7 0.74 0.75 0.85 0.21 3.0 0.087 0.031 0.21
## Item 8 0.78 0.79 0.88 0.25 3.7 0.073 0.035 0.24
## Item 9 0.78 0.78 0.86 0.25 3.6 0.072 0.033 0.25
## Item 10 0.77 0.78 0.86 0.24 3.5 0.075 0.033 0.23
## Item 11 0.77 0.78 0.88 0.24 3.5 0.074 0.037 0.24
## Item 12 0.74 0.75 0.85 0.22 3.0 0.085 0.030 0.21
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Item 1 20 0.41 0.40 0.37 0.25 0.55 0.51
## Item 2 20 0.37 0.37 0.32 0.21 0.60 0.50
## Item 3 20 0.61 0.61 0.61 0.52 0.85 0.37
## Item 4 20 0.56 0.58 0.53 0.44 0.25 0.44
## Item 5 20 0.51 0.49 0.45 0.36 0.40 0.50
## Item 6 20 0.74 0.72 0.72 0.64 0.65 0.49
## Item 7 20 0.77 0.75 0.74 0.68 0.45 0.51
## Item 8 20 0.44 0.44 0.35 0.30 0.75 0.44
## Item 9 20 0.46 0.46 0.43 0.32 0.70 0.47
## Item 10 20 0.52 0.52 0.48 0.39 0.30 0.47
## Item 11 20 0.46 0.52 0.45 0.40 0.05 0.22
## Item 12 20 0.74 0.72 0.72 0.64 0.65 0.49
##
## Non missing response frequency for each item
## 0 1 miss
## Item 1 0.45 0.55 0
## Item 2 0.40 0.60 0
## Item 3 0.15 0.85 0
## Item 4 0.75 0.25 0
## Item 5 0.60 0.40 0
## Item 6 0.35 0.65 0
## Item 7 0.55 0.45 0
## Item 8 0.25 0.75 0
## Item 9 0.30 0.70 0
## Item 10 0.70 0.30 0
## Item 11 0.95 0.05 0
## Item 12 0.35 0.65 0
Let’s look at a single item - Item 1. We can illustrate the observed probability of a correct response with the following code.
# plotting the probability of Item 1 responses
ggplot(d, aes(x = `Item 1`))+
geom_bar(aes(y = (..count..)/sum(..count..)))+
scale_x_continuous(breaks = c(0, 1))+
scale_y_continuous(limits = c(0, 1))+
labs(y = "Probability")+
theme_minimal()
We can even run a regression on Item 1, with the score as the outcome variable and no predictors.
We can then convert the intercept from this model back to a probability, and see that it lines up exactly with what we observed in the data.
#A logistic regression for Item 1
Item1mod <- glm(`Item 1` ~ 1, data = d, family = 'binomial')
summary(Item1mod)
##
## Call:
## glm(formula = `Item 1` ~ 1, family = "binomial", data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.264 -1.264 1.093 1.093 1.093
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2007 0.4495 0.446 0.655
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.526 on 19 degrees of freedom
## Residual deviance: 27.526 on 19 degrees of freedom
## AIC: 29.526
##
## Number of Fisher Scoring iterations: 3
exp(Item1mod$coefficients)
## (Intercept)
## 1.222222
exp(Item1mod$coefficients)/(1+exp(Item1mod$coefficients))
## (Intercept)
## 0.55
We can do the same thing for a different item, Item 11.
# plotting the probability of Item 11 responses
ggplot(d, aes(x = `Item 11`))+
geom_bar(aes(y = (..count..)/sum(..count..)))+
scale_x_continuous(breaks = c(0, 1))+
scale_y_continuous(limits = c(0, 1))+
labs(y = "Probability")+
theme_minimal()
#A logistic regression for Item 1
Item11mod <- glm(`Item 11` ~ 1, data = d, family = 'binomial')
summary(Item11mod)
##
## Call:
## glm(formula = `Item 11` ~ 1, family = "binomial", data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3203 -0.3203 -0.3203 -0.3203 2.4478
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.944 1.026 -2.87 0.0041 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7.9406 on 19 degrees of freedom
## Residual deviance: 7.9406 on 19 degrees of freedom
## AIC: 9.9406
##
## Number of Fisher Scoring iterations: 5
exp(Item11mod$coefficients)
## (Intercept)
## 0.05263158
exp(Item11mod$coefficients)/(1+exp(Item11mod$coefficients))
## (Intercept)
## 0.05
We could do this for all of the items. Running the separate regressions isn’t all that useful, but it does illustrate (a) the relationship between logistic regression output, logits, and (b) observed probabilities. But note that these models aren’t really telling us anything about the test takers - we just know that some got the item right and some got it wrong.
Now we’ll try to put multiple items in the same model, along with accounting for differing abilities of people.
Sometimes we need to convert our data from ‘wide’ to ‘long’ format. We can use the tidverse function pivot_longer() to go from wide to long (and pivot_wider() to do the opposite).
dl <- d %>% pivot_longer(`Item 1`:`Item 12`, names_to = "item",
values_to = "score")
Now we can try fitting a standard logistic regression to the data, this time with 2 predictors - one for items, and one for people.
mod <- glm(score ~ -1 + item + `Candidate ID`, family = "binomial", data = dl)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(mod)
##
## Call:
## glm(formula = score ~ -1 + item + `Candidate ID`, family = "binomial",
## data = dl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4765 -0.5845 0.0000 0.6509 2.0920
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## itemItem 1 1.139e+00 9.249e-01 1.231 0.21825
## itemItem 10 -3.734e-01 9.331e-01 -0.400 0.68906
## itemItem 11 -1.843e+01 1.591e+03 -0.012 0.99075
## itemItem 12 1.769e+00 9.558e-01 1.851 0.06421 .
## itemItem 2 1.444e+00 9.376e-01 1.540 0.12351
## itemItem 3 3.643e+00 1.184e+00 3.076 0.00209 **
## itemItem 4 -7.361e-01 9.585e-01 -0.768 0.44247
## itemItem 5 2.587e-01 9.132e-01 0.283 0.77698
## itemItem 6 1.769e+00 9.558e-01 1.851 0.06421 .
## itemItem 7 5.531e-01 9.127e-01 0.606 0.54453
## itemItem 8 2.530e+00 1.020e+00 2.482 0.01308 *
## itemItem 9 2.125e+00 9.817e-01 2.165 0.03041 *
## `Candidate ID`10 -1.042e+00 1.037e+00 -1.005 0.31507
## `Candidate ID`11 -5.447e-01 1.049e+00 -0.519 0.60352
## `Candidate ID`12 1.575e+00 1.349e+00 1.167 0.24321
## `Candidate ID`13 -2.574e-16 1.080e+00 0.000 1.00000
## `Candidate ID`14 2.324e-16 1.080e+00 0.000 1.00000
## `Candidate ID`15 1.331e-16 1.080e+00 0.000 1.00000
## `Candidate ID`16 -2.016e+00 1.056e+00 -1.908 0.05633 .
## `Candidate ID`17 -1.523e+00 1.039e+00 -1.466 0.14278
## `Candidate ID`18 -4.152e+00 1.393e+00 -2.981 0.00287 **
## `Candidate ID`19 -5.447e-01 1.049e+00 -0.519 0.60352
## `Candidate ID`2 -2.115e+01 2.618e+03 -0.008 0.99355
## `Candidate ID`20 -1.042e+00 1.037e+00 -1.005 0.31507
## `Candidate ID`3 -2.989e-16 1.080e+00 0.000 1.00000
## `Candidate ID`4 -1.042e+00 1.037e+00 -1.005 0.31507
## `Candidate ID`5 -3.208e+00 1.177e+00 -2.725 0.00643 **
## `Candidate ID`6 3.487e+01 2.756e+03 0.013 0.98990
## `Candidate ID`7 6.496e-01 1.151e+00 0.564 0.57243
## `Candidate ID`8 -1.523e+00 1.039e+00 -1.466 0.14278
## `Candidate ID`9 -2.016e+00 1.056e+00 -1.908 0.05633 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 332.71 on 240 degrees of freedom
## Residual deviance: 188.71 on 209 degrees of freedom
## AIC: 250.71
##
## Number of Fisher Scoring iterations: 18
We get an error message - this kind of model is technically possible, though some tricks are needed to make it really interpretable. You would also generally need a bit more data. So be wary about this one - we’re mainly looking at this model just to explore what’s possible.
In the plot below, you can see how the difficulty of items (red) compares to the ability of people (blue) all on the same scale, which is in the unit of logits.
items <- mod$coefficients[1:12]
people <- mod$coefficients[13:31]
ggplot()+
geom_point(aes(x = items, y = -.5), color = "red")+
geom_jitter(aes(x = people, y = .5), color = "blue",
width = 0, height = .1)+
labs(x = "logits", y = NULL)+
theme_minimal()+
coord_flip()
## Multilevel logistic regression for test data Now we’ll do something that works a little better - run roughly the same model as before, but as a multilevel model. People are random effects instead of fixed effects. I’ve added some code that helps the model converge, but the main change is the random effect (1|
Candidate ID).
#A multilevel logistic regression for all items
mod2 <- glmer(score ~ -1 + item + (1|`Candidate ID`), family = "binomial",
data = dl, control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=100000)))
summary(mod2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: score ~ -1 + item + (1 | `Candidate ID`)
## Data: dl
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+05))
##
## AIC BIC logLik deviance df.resid
## 272.3 317.5 -123.1 246.3 227
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9992 -0.5252 0.1393 0.5484 2.3256
##
## Random effects:
## Groups Name Variance Std.Dev.
## Candidate ID (Intercept) 2.321 1.523
## Number of obs: 240, groups: Candidate ID, 20
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## itemItem 1 0.2560 0.6297 0.406 0.68438
## itemItem 10 -1.1842 0.6669 -1.776 0.07577 .
## itemItem 11 -3.8709 1.2486 -3.100 0.00193 **
## itemItem 12 0.8395 0.6502 1.291 0.19665
## itemItem 2 0.5415 0.6371 0.850 0.39534
## itemItem 3 2.3798 0.8219 2.896 0.00378 **
## itemItem 4 -1.5200 0.6954 -2.186 0.02884 *
## itemItem 5 -0.5834 0.6358 -0.918 0.35882
## itemItem 6 0.8395 0.6502 1.291 0.19665
## itemItem 7 -0.3013 0.6291 -0.479 0.63201
## itemItem 8 1.5067 0.7013 2.149 0.03167 *
## itemItem 9 1.1576 0.6706 1.726 0.08431 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## itmIt1 itmI10 itmI11 itmI12 itmIt2 itmIt3 itmIt4 itmIt5 itmIt6
## itemItem 10 0.279
## itemItem 11 0.133 0.162
## itemItem 12 0.293 0.261 0.115
## itemItem 2 0.299 0.271 0.125 0.292
## itemItem 3 0.227 0.183 0.060 0.235 0.232
## itemItem 4 0.265 0.274 0.165 0.246 0.257 0.168
## itemItem 5 0.296 0.288 0.153 0.281 0.290 0.205 0.278
## itemItem 6 0.293 0.261 0.115 0.289 0.292 0.235 0.246 0.281
## itemItem 7 0.301 0.288 0.147 0.287 0.295 0.214 0.276 0.301 0.287
## itemItem 8 0.270 0.231 0.092 0.272 0.272 0.235 0.215 0.253 0.272
## itemItem 9 0.284 0.247 0.104 0.282 0.284 0.236 0.232 0.269 0.282
## itmIt7 itmIt8
## itemItem 10
## itemItem 11
## itemItem 12
## itemItem 2
## itemItem 3
## itemItem 4
## itemItem 5
## itemItem 6
## itemItem 7
## itemItem 8 0.261
## itemItem 9 0.276 0.269
fixefs <- tibble(fixef(mod2))
ranefs <- tibble(ranef(mod2)$`Candidate ID`)
This model is one way of estimating the Rasch model (Rasch, 1960) with lme4 (De Boeck et al., 2011). This is the big reveal of the week - we have ended up previewing what we’ll learn about in more detail in the next unit.
The following plot puts people and items on the same logit scale, just as before.
ggplot()+
geom_point(aes(x = fixefs$`fixef(mod2)`, y = -.5), color = "red")+
geom_jitter(aes(x = ranefs$`(Intercept)`, y = .5), color = "blue",
width = 0, height = .1)+
scale_y_continuous(limits = c(-1, 1), breaks = NULL)+
labs(x = "logits", y = NULL)+
theme_minimal()+
coord_flip()