MATH165

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(foreign)
library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

math165 <- read_csv("F165GradesAsPredictorsForS181wFirstMath.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   MathTERM = col_double(),
##   gpa = col_double(),
##   grade165 = col_character(),
##   math165grade = col_double(),
##   RAVEN_GRADE_HISTORY_CRSE_NUMB = col_character(),
##   grade181 = col_character(),
##   math181grade = col_double(),
##   `1stMathTerm` = col_double(),
##   SUBJ_CODE = col_character(),
##   MinOf1stCourse = col_character(),
##   firstmathgrade = col_character(),
##   `1stGrade` = col_character()
## )

gsub( " ", "", math165)

Summary of the Dataset MATH165

summary(math165)

##     MathTERM           gpa          grade165          math165grade  
##  Min.   :201520   Min.   :0.400   Length:1423        Min.   :2.000  
##  1st Qu.:201620   1st Qu.:2.800   Class :character   1st Qu.:2.000  
##  Median :201720   Median :3.200   Mode  :character   Median :3.000  
##  Mean   :201720   Mean   :3.208                      Mean   :3.082  
##  3rd Qu.:201820   3rd Qu.:3.600                      3rd Qu.:4.000  
##  Max.   :201920   Max.   :4.000                      Max.   :4.000  
##  RAVEN_GRADE_HISTORY_CRSE_NUMB   grade181          math181grade   
##  Length:1423                   Length:1423        Min.   :-1.000  
##  Class :character              Class :character   1st Qu.: 1.000  
##  Mode  :character              Mode  :character   Median : 2.000  
##                                                   Mean   : 1.996  
##                                                   3rd Qu.: 3.000  
##                                                   Max.   : 4.000  
##   1stMathTerm      SUBJ_CODE         MinOf1stCourse     firstmathgrade    
##  Min.   :201020   Length:1423        Length:1423        Length:1423       
##  1st Qu.:201520   Class :character   Class :character   Class :character  
##  Median :201620   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :201644                                                           
##  3rd Qu.:201820                                                           
##  Max.   :201920                                                           
##    1stGrade        
##  Length:1423       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

Calculate the means of the MATH165 Grades and MATH181 Grades

mean(math165$math165grade)

## [1] 3.081518

mean(math165$math181grade)

## [1] 1.996486

Create a new column, “level”, which identifies their first math course as developmental or college.

math165$MinOf1stCourse <- as.numeric(math165$MinOf1stCourse)

## Warning: NAs introduced by coercion

math165new <- math165 %>%
  filter(math181grade >= 0) %>%
  mutate(level = ifelse(MinOf1stCourse %in% 0:105, "develop", "college"))
math165new

## # A tibble: 1,248 x 13
##    MathTERM   gpa grade165 math165grade RAVEN_GRADE_HISTO~ grade181 math181grade
##       <dbl> <dbl> <chr>           <dbl> <chr>              <chr>           <dbl>
##  1   201620   3   A                   4 181                B                   3
##  2   201820   4   A                   4 181                A                   4
##  3   201520   3   A                   4 181                A                   4
##  4   201820   2.4 A                   4 181                A                   4
##  5   201620   4   A                   4 181                B                   3
##  6   201620   3.6 A                   4 181                A                   4
##  7   201720   3   A                   4 181                B                   3
##  8   201520   3.4 A                   4 181                D                   1
##  9   201620   3.4 A                   4 181                C                   2
## 10   201920   3.8 A                   4 181                B                   3
## # ... with 1,238 more rows, and 6 more variables: 1stMathTerm <dbl>,
## #   SUBJ_CODE <chr>, MinOf1stCourse <dbl>, firstmathgrade <chr>,
## #   1stGrade <chr>, level <chr>

Plot MATH165 and MATH 181 grade distributions

par(mfrow = c(1,2))
boxplot(math165new$math165grade)
boxplot(math165new$math181grade)

# These plots make sense because you have to get a C or higher to get into MATH 181

Plot scatterplot of math165 grades to math181 grades, then perform a linear regression

plot(jitter(math165new$math181grade) ~ jitter(math165new$math165grade))

fit1 <- lm(math181grade ~ math165grade, math165new)
summary(fit1)

## 
## Call:
## lm(formula = math181grade ~ math165grade, data = math165new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1719 -0.4199  0.5801  0.8281  2.5801 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.33205    0.13182  -2.519   0.0119 *  
## math165grade  0.87599    0.04066  21.545   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.172 on 1246 degrees of freedom
## Multiple R-squared:  0.2714, Adjusted R-squared:  0.2708 
## F-statistic: 464.2 on 1 and 1246 DF,  p-value: < 2.2e-16

Try multiple regression with gpa, math165 grade, and first level of math

fit2 <- lm(math181grade ~ gpa + math165grade + as.factor(level), math165new)
summary(fit2)

## 
## Call:
## lm(formula = math181grade ~ gpa + math165grade + as.factor(level), 
##     data = math165new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5407 -0.7159  0.2597  0.7268  2.9805 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -1.51494    0.18853  -8.036 2.14e-15 ***
## gpa                      0.82480    0.07489  11.013  < 2e-16 ***
## math165grade             0.43912    0.05383   8.158 8.27e-16 ***
## as.factor(level)develop -0.26751    0.06465  -4.138 3.75e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.113 on 1244 degrees of freedom
## Multiple R-squared:  0.344,  Adjusted R-squared:  0.3424 
## F-statistic: 217.4 on 3 and 1244 DF,  p-value: < 2.2e-16

par(mfrow = c(2,2))
plot(fit2)

Residual plots of fitted values clearly indicate that linear regression is not a good model.

Create a three-way cross tabs (xtabs) and flatten the table

ftable(xtabs(~ gpa + math165grade, data = math165new))

##     math165grade   2   3   4
## gpa                         
## 0.4                1   0   0
## 0.8                1   0   0
## 1                  1   0   0
## 1.2                1   0   0
## 1.4                3   0   0
## 1.6                2   2   0
## 1.8                3   3   0
## 2                 23   3   2
## 2.2               23   7   1
## 2.4               40  11   3
## 2.6               51  27   4
## 2.8               62  46  10
## 3                 54  72  26
## 3.2               47  68  34
## 3.4               24  69  46
## 3.6                4  60  85
## 3.8                1  26  96
## 4                  0   0 206

We can also examine the distribution of gpa at every level of Math165 grade. This creates a 2 x 2 grid with a boxplot of math181 grades to gpa for every level of Math165 grade. To better see the data, we also add the raw data points on top of the box plots, with a small amount of noise (often called “jitter”) and 3% transparency so they do not overwhelm the boxplots.

p1 <- ggplot(math165new, aes(x = grade181, y = gpa)) +
  geom_boxplot() +
  geom_jitter(alpha = .03) +
  facet_grid(. ~ grade165, margins = TRUE) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(title="Boxplots of Math181 Grades to GPA, separated by Math165 grades")
p1

Try ordinal (multinomial) logistic regression, where the output is the grade level for MATH181

library(MASS)
math165new$grade181 <- as.factor(math165new$grade181)
logitfit1 <- polr(grade181 ~ gpa + math165grade + as.factor(level), data = math165new, Hess = TRUE)
summary(logitfit1)

## Call:
## polr(formula = grade181 ~ gpa + math165grade + as.factor(level), 
##     data = math165new, Hess = TRUE)
## 
## Coefficients:
##                           Value Std. Error t value
## gpa                     -1.3477    0.13330 -10.110
## math165grade            -0.7222    0.09177  -7.870
## as.factor(level)develop  0.4373    0.10756   4.066
## 
## Intercepts:
##     Value    Std. Error t value 
## A|B  -7.7382   0.3823   -20.2390
## B|C  -6.2067   0.3586   -17.3064
## C|D  -4.9707   0.3437   -14.4603
## D|F  -4.0886   0.3387   -12.0731
## 
## Residual Deviance: 3354.567 
## AIC: 3368.567

What does this mean?

logit(P-hat(Y<=D)))) = -4.123 + 1.347(gpa) + 0.742(math165grade) - 0.435(develomental)

logit(P-hat(Y<=C)))) = -5.004 + 1.347(gpa) + 0.742(math165grade) - 0.435(develomental)

logit(P-hat(Y<=B)))) = -6.255 + 1.347(gpa) + 0.742(math165grade) - 0.435(develomental)

logit(P-hat(Y<=A)))) = -7.797 + 1.347(gpa) + 0.742(math165grade) - 0.435(develomental)

This shows the usual regression output coefficient table including the value of each coefficient, standard errors, and t value, which is simply the ratio of the coefficient to its standard error. There is no significance test by default.
Next we see the estimates for the two intercepts, which are sometimes called cutpoints. The intercepts indicate where the latent variable is cut to make the three groups that we observe in our data. Note that this latent variable is continuous. In general, these are not used in the interpretation of the results. The cutpoints are closely related to thresholds, which are reported by other statistical packages.
Finally, we see the residual deviance, -2 * Log Likelihood of the model as well as the AIC. Both the deviance and AIC are useful for model comparison.

One way to calculate a p-value in this case is by comparing the t-value against the standard normal distribution, like a z test. Of course this is only true with infinite degrees of freedom, but is reasonably approximated by large samples, becoming increasingly biased as sample size decreases.

ctable <- coef(summary(logitfit1))
(ci <- confint(logitfit1))

## Waiting for profiling to be done...

##                              2.5 %     97.5 %
## gpa                     -1.6109926 -1.0882855
## math165grade            -0.9025632 -0.5427238
## as.factor(level)develop  0.2266321  0.6483706

## calculate and store p values
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2

## combined table
(ctable <- cbind(ctable, "p value" = p))

##                              Value Std. Error    t value      p value
## gpa                     -1.3477063 0.13329796 -10.110480 4.964039e-24
## math165grade            -0.7221654 0.09176726  -7.869532 3.559710e-15
## as.factor(level)develop  0.4373333 0.10756413   4.065791 4.786973e-05
## A|B                     -7.7382155 0.38234138 -20.239022 4.438937e-91
## B|C                     -6.2066881 0.35863608 -17.306368 4.211566e-67
## C|D                     -4.9706547 0.34374528 -14.460285 2.159015e-47
## D|F                     -4.0885901 0.33865158 -12.073147 1.464263e-33

Calculate the exponentiated values of the odds ratios to improve interpretation of the coefficients

The coefficients from the model can be somewhat difficult to interpret because they are scaled in terms of logs. Another way to interpret logistic regression models is to convert the coefficients into odds ratios. To get the OR and confidence intervals, we just exponentiate the estimates and confidence intervals.

exp(coef(logitfit1))

##                     gpa            math165grade as.factor(level)develop 
##               0.2598356               0.4856994               1.5485721

exp(cbind(OR = coef(logitfit1), ci))

##                                OR     2.5 %    97.5 %
## gpa                     0.2598356 0.1996893 0.3367934
## math165grade            0.4856994 0.4055289 0.5811631
## as.factor(level)develop 1.5485721 1.2543683 1.9124223

Proportional odds assumption

One of the assumptions underlying ordinal logistic (and ordinal probit) regression is that the relationship between each pair of outcome groups is the same. In other words, ordinal logistic regression assumes that the coefficients that describe the relationship between, say, the lowest versus all higher categories of the response variable are the same as those that describe the relationship between the next lowest category and all higher categories, etc. This is called the proportional odds assumption or the parallel regression assumption. Because the relationship between all pairs of groups is the same, there is only one set of coefficients.

In order to asses the appropriateness of our model, we need to evaluate whether the proportional odds assumption is tenable. The following code creates a graph to test the proportional odds assumption.

Graph predicted logits from individual logistic regressions with a single predictor where the outcome groups are defined by Math181grade >= 1, Math181grade >= 2 or Math181grade >= 3. If the difference between predicted logits for varying levels of a predictor, say level, are the same whether the outcome is defined by Math181grade >= 1, Math181grade >= 2 or Math181grade >= 3, then we can be confident that the proportional odds assumption holds. In other words, if the difference between logits for level = 0 (developmental) versus level = 1 (college) is the same when the outcome is Math181grade >= 1 as the difference when the outcome is Math181grade >= 2 or Math181grade >= 3, then the proportional odds assumption likely holds.

The first command creates the function that estimates the values that will be graphed. The first line of this command tells R that sf is a function, and that this function takes one argument, which we label y. The sf function will calculate the log odds of being greater than or equal to each value of the target variable. For our purposes, we would like the log odds of Math181grade being greater than or equal to 1, and then greater than or equal to 2, and then greater than or equal to 3.

Inside the sf function we find the qlogis function, which transforms a probability to a logit, and it will return the logit transformations of these probabilites. Inside the qlogis function we see that we want the log odds of the mean of y >= 1. When we supply a y argument, such as Math181grade, to function sf, y >= 1 will evaluate to a 0/1 (FALSE/TRUE) vector, and taking the mean of that vector will give you the proportion of or probability that Math181grade >= 1.

The second command calls the function sf on several subsets of the data defined by the predictors. In this statement we see the summary function with a formula supplied as the first argument. When R sees a call to summary with a formula argument, it will calculate descriptive statistics for the variable on the left side of the formula by groups on the right side of the formula and will return the results in a nice table. By default, summary will calculate the mean of the left side variable. So, if we had used the code summary(as.numeric(Math181grade) ~ gpa + level + grade165) without the fun argument, we would get means on Math181grade by level, then by math165grade, and finally by gpa broken up into 4 equal groups. However, we can override calculation of the mean by supplying our own function, namely sf to the fun= argument. The final command asks R to return the contents to the object s, which is a table. ___________________________________________________________________

sf <- function(y) {
  c('Y>=1' = qlogis(mean(y >= 1)),
    'Y>=2' = qlogis(mean(y >= 2)),
    'Y>=3' = qlogis(mean(y >= 3)))
}

(s <- with(math165new, summary(as.numeric(math181grade) ~ gpa + level + grade165, fun=sf)))

## as.numeric(math181grade)     N= 1248 
## 
## +--------+---------+----+---------+-----------+-----------+
## |        |         |   N|     Y>=1|       Y>=2|       Y>=3|
## +--------+---------+----+---------+-----------+-----------+
## |     gpa|[0.4,3.0)| 330|0.6390800|-0.08489944|-1.34883680|
## |        |[3.0,3.4)| 301|1.8471096| 0.88395535|-0.07312226|
## |        |[3.4,4.0)| 411|2.9216243| 1.93207867| 0.66047640|
## |        |      4.0| 206|4.2145937| 3.50655790| 2.28666964|
## +--------+---------+----+---------+-----------+-----------+
## |   level|  college| 658|2.0966537| 1.48011312| 0.51569181|
## |        |  develop| 590|1.5071878| 0.69823625|-0.24529031|
## +--------+---------+----+---------+-----------+-----------+
## |grade165|        A| 513|3.2047769| 2.44340692| 1.41827741|
## |        |        B| 394|1.6402096| 0.94849387|-0.12197843|
## |        |        C| 341|0.9514546| 0.04106149|-1.30052754|
## +--------+---------+----+---------+-----------+-----------+
## | Overall|         |1248|1.7870931| 1.07313320| 0.15092687|
## +--------+---------+----+---------+-----------+-----------+

The table above displays the (linear) predicted values we would get if we regressed our dependent variable on our predictor variables one at a time, without the parallel slopes assumption. Evaluate the parallel slopes assumption by running a series of binary logistic regressions with varying cutpoints on the dependent variable and checking the equality of coefficients across cutpoints. Relax the parallel slopes assumption to checks its tenability. To accomplish this, transform the original, ordinal, dependent variable into a new, binary, dependent variable which is equal to zero if the original, ordinal dependent variable (here Math181grade) is less than some value a, and 1 if the ordinal variable is greater than or equal to a (note, this is what the ordinal regression model coefficients represent as well). This is done for k-1 levels of the ordinal variable and is executed by the as.numeric(math181grade) >= a coding below. The first line of code estimates the effect of level on the resulting calculus 1 grade of 0 or 1 versus 2 or 3 or 4. The second line of code estimates the effect of of level on the resulting calculus 1 grade of 0 or 1 or 2 versus getting 3 or 4. The third line of code estimates the effect of of level on the resulting calculus 1 grade of 0, 1, 2, or 3 versus getting a 4.

Looking at the intercept for the first model (2.0967), we see that it matches the predicted value in the cell for level equal to “college” in the column for Y>=1, the value below it, for level equals “developmental” is equal to the intercept plus the coefficient for level (i.e. 2.0967 - 0.5895 = 1.507).

glm(I(as.numeric(math181grade) >= 1) ~ level, family="binomial", data = math165new)

## 
## Call:  glm(formula = I(as.numeric(math181grade) >= 1) ~ level, family = "binomial", 
##     data = math165new)
## 
## Coefficients:
##  (Intercept)  leveldevelop  
##       2.0967       -0.5895  
## 
## Degrees of Freedom: 1247 Total (i.e. Null);  1246 Residual
## Null Deviance:       1026 
## Residual Deviance: 1013  AIC: 1017

Looking at the intercept for the second model (1.4081), we see that it matches the predicted value in the cell for level equal to “college” in the column for Y>=2, the value below it, for level equals “developmental” is equal to the intercept plus the coefficient for level (i.e. 1.4801 -0.7819 = 0.6982).

glm(I(as.numeric(math181grade) >= 2) ~ level, family="binomial", data = math165new)

## 
## Call:  glm(formula = I(as.numeric(math181grade) >= 2) ~ level, family = "binomial", 
##     data = math165new)
## 
## Coefficients:
##  (Intercept)  leveldevelop  
##       1.4801       -0.7819  
## 
## Degrees of Freedom: 1247 Total (i.e. Null);  1246 Residual
## Null Deviance:       1417 
## Residual Deviance: 1381  AIC: 1385

Looking at the intercept for the third model (0.5157), we see that it matches the predicted value in the cell for level equal to “college” in the column for Y>=3, the value below it, for level equals “developmental” is equal to the intercept plus the coefficient for level (i.e. 0.5157 - 0.7610 = -0.2453).

glm(I(as.numeric(math181grade) >= 3) ~ level, family="binomial", data = math165new)

## 
## Call:  glm(formula = I(as.numeric(math181grade) >= 3) ~ level, family = "binomial", 
##     data = math165new)
## 
## Coefficients:
##  (Intercept)  leveldevelop  
##       0.5157       -0.7610  
## 
## Degrees of Freedom: 1247 Total (i.e. Null);  1246 Residual
## Null Deviance:       1723 
## Residual Deviance: 1679  AIC: 1683

s[, 4] <- s[, 4] - s[, 3]
s[, 3] <- s[, 3] - s[, 3]
s # print

## as.numeric(math181grade)     N= 1248 
## 
## +--------+---------+----+---------+----+----------+
## |        |         |   N|     Y>=1|Y>=2|      Y>=3|
## +--------+---------+----+---------+----+----------+
## |     gpa|[0.4,3.0)| 330|0.6390800|   0|-1.2639374|
## |        |[3.0,3.4)| 301|1.8471096|   0|-0.9570776|
## |        |[3.4,4.0)| 411|2.9216243|   0|-1.2716023|
## |        |      4.0| 206|4.2145937|   0|-1.2198883|
## +--------+---------+----+---------+----+----------+
## |   level|  college| 658|2.0966537|   0|-0.9644213|
## |        |  develop| 590|1.5071878|   0|-0.9435266|
## +--------+---------+----+---------+----+----------+
## |grade165|        A| 513|3.2047769|   0|-1.0251295|
## |        |        B| 394|1.6402096|   0|-1.0704723|
## |        |        C| 341|0.9514546|   0|-1.3415890|
## +--------+---------+----+---------+----+----------+
## | Overall|         |1248|1.7870931|   0|-0.9222063|
## +--------+---------+----+---------+----+----------+

plot(s, which=1:3, pch=1:3, xlab='logit', main=' ', xlim=range(s[,3:4]))

If the proportional odds assumption holds, for each predictor variable, distance between the symbols for each set of categories of the dependent variable, should remain similar. To help demonstrate this, we normalize all the first set of coefficients to be zero so there is a common reference point. Looking at the coefficients for the variable “level” we see that the distance between the two sets of coefficients is similar. In contrast, the distances between the estimates for gpa are more varied (i.e., the markers are much further apart on the second line than on the first), suggesting that the proportional odds assumption may not hold.

Once we are done assessing whether the assumptions of our model hold, we can obtain predicted probabilities, which are usually easier to understand than either the coefficients or the odds ratios. We create a simulated dataset of the same length as the original dataset, where we vary “level” (college and develpmental), gpa for each level, and for math165grade for each grade A, B, and C, and then we calculate the probability of being in each category of math181grade.

newdat <- data.frame(
  level = rep(c("college","develop"), 624),
  math165grade = rep(2:4, each = 416),
  gpa = rep(seq(from = 0.4, to = 4, length.out = 312), 4))

newdat <- cbind(newdat, predict(logitfit1, newdat, type = "probs"))

##show first few rows
head(newdat)

##     level math165grade       gpa           A           B          C          D
## 1 college            2 0.4000000 0.003157532 0.011281516 0.03356656 0.06058908
## 2 develop            2 0.4115756 0.002073307 0.007444687 0.02249818 0.04197694
## 3 college            2 0.4231511 0.003257277 0.011632571 0.03456195 0.06220028
## 4 develop            2 0.4347267 0.002138875 0.007677809 0.02318069 0.04316219
## 5 college            2 0.4463023 0.003360162 0.011994340 0.03558472 0.06384524
## 6 develop            2 0.4578778 0.002206512 0.007918140 0.02388293 0.04437649
##           F
## 1 0.8914053
## 2 0.9260069
## 3 0.8883479
## 4 0.9238404
## 5 0.8852155
## 6 0.9216159

Finally, reshape the data to long format with the reshape2 package and plot all of the predicted probabilities for the different conditions.

lnewdat <- melt(newdat, id.vars = c("level", "math165grade", "gpa"),
  variable.name = "Math181Grade", value.name="Probability")
## view first few rows
head(lnewdat)

##     level math165grade       gpa Math181Grade Probability
## 1 college            2 0.4000000            A 0.003157532
## 2 develop            2 0.4115756            A 0.002073307
## 3 college            2 0.4231511            A 0.003257277
## 4 develop            2 0.4347267            A 0.002138875
## 5 college            2 0.4463023            A 0.003360162
## 6 develop            2 0.4578778            A 0.002206512

We plot the predicted probilities, connected with a line, colored by level of the outcome, math181grade, and facetted by level and math165grade. We also use a custom label function, to add clearer labels showing what each column and row of the plot represent.

ggplot(lnewdat, aes(x = gpa, y = Probability, color = Math181Grade)) +
  geom_line() + facet_grid(level ~ math165grade, labeller="label_both")

Understanding the final plots

These final plots show the predicted probabilities for getting an A, B, C, D, or F in Math 181, based on a student’s gpa, Math 165 grade, and starting level of math at MC (developmental or college). Follow each curve on each faceted plot for each predicted probability.

Sources

https://stats.idre.ucla.edu/r/faq/ologit-coefficients/

https://stats.idre.ucla.edu/r/dae/ordinal-logistic-regression/

https://stats.idre.ucla.edu/r/dae/multinomial-logistic-regression/

https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faq-how-do-i-interpret-odds-ratios-in-logistic-regression/

https://www.analyticsvidhya.com/blog/2016/02/multinomial-ordinal-logistic-regression/