###         ---    ooo    ---      ooo        ---- ###
###                    UNAL                        ###
###            Categorical Data Analysis            ###
###               TRABAJO EN CLASE
###         ---    ooo    ---      ooo        ---- ###
rm(list=ls())
## Fixing set working directory
#setwd("D:/CEAM/2022/Semestre I/Teaching/ProbStat/Bases")
#      ---           ooo           --- 
#      ---           ooo           --- 
#install.packages("vcdExtra") 
library("vcdExtra") 
## Loading required package: vcd
## Loading required package: grid
## Loading required package: gnm
library(nnet) # for multinom()
library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:vcdExtra':
## 
##     summarise
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
#install.packages("haven")
setwd("C:/Users/mjsal.DESKTOP-AEUGMMS/Downloads/")
dat <- read.csv("ologit.csv")
dat[1:3,]
##   apply pared public  gpa
## 1     2     0      0 3.26
## 2     1     1      0 3.21
## 3     0     1      1 3.94
table(dat$apply)
## 
##   0   1   2 
## 220 140  40
# Ordered response
dat <- dat %>%
  mutate(
    applyf = case_when(
      apply == 0 ~ "unlikely",
      apply == 1 ~ "somewhat likely",
      apply == 2 ~ "very likely"
    )
  )
dat$applyf <- ordered(dat$applyf,
                     levels = c("unlikely", "somewhat likely", "very likely"))

# Other predictors as factors
dat$pared  <- factor(dat$pared)   # 0/1: parent has college degree
dat$public <- factor(dat$public)  # 0/1: public high school

## Fiiting
# Main effects model
mo <- polr(applyf ~ gpa + pared + public, data = dat, Hess = TRUE)
summary(mo)
## Call:
## polr(formula = applyf ~ gpa + pared + public, data = dat, Hess = TRUE)
## 
## Coefficients:
##            Value Std. Error t value
## gpa      0.61594     0.2606  2.3632
## pared1   1.04769     0.2658  3.9418
## public1 -0.05879     0.2979 -0.1974
## 
## Intercepts:
##                             Value   Std. Error t value
## unlikely|somewhat likely     2.2039  0.7795     2.8272
## somewhat likely|very likely  4.2994  0.8043     5.3453
## 
## Residual Deviance: 717.0249 
## AIC: 727.0249
## 
# Coefficients table with p-values
ctab <- coef(summary(mo))
pvals <- 2 * (1 - pnorm(abs(ctab[, "t value"])))
ctab <- cbind(ctab, "p value" = pvals)
ctab
##                                   Value Std. Error    t value      p value
## gpa                          0.61594057  0.2606340  2.3632399 1.811594e-02
## pared1                       1.04769010  0.2657894  3.9418050 8.087072e-05
## public1                     -0.05878572  0.2978614 -0.1973593 8.435464e-01
## unlikely|somewhat likely     2.20391473  0.7795455  2.8271792 4.696004e-03
## somewhat likely|very likely  4.29936315  0.8043267  5.3452947 9.027008e-08
# Odds ratios and 95% CIs
OR  <- exp(coef(mo))
CI  <- exp(confint(mo))   # profile CIs
## Waiting for profiling to be done...
cbind(OR, CI)
##                OR     2.5 %   97.5 %
## gpa     1.8513972 1.1136247 3.098490
## pared1  2.8510579 1.6958376 4.817114
## public1 0.9429088 0.5208954 1.680579
#3 Predicted Probab
library(ggplot2)
library(tidyr)

gpa_seq <- seq(min(dat$gpa), max(dat$gpa), length.out = 100)

newdat <- expand.grid(
  gpa    = gpa_seq,
  pared  = "1",   # parent has college degree
  public = "0"    # private school
)

probs <- predict(mo, newdata = newdat, type = "probs") %>% as.data.frame()
plotdat <- bind_cols(newdat, probs) |>
  pivot_longer(cols = levels(dat$applyf),
               names_to = "apply",
               values_to = "prob")

ggplot(plotdat, aes(x = gpa, y = prob, colour = apply)) +
  geom_line(linewidth = 1) +
  labs(x = "GPA", y = "Fitted probability",
       colour = "Application level") +
  theme_minimal()