Part 1: Logistic Regression: Probit vs Logit

install.packages("titanic", repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/cc/1pvgy4s11875jgp6h1fml9wr0000gn/T//Rtmp1ZBJsG/downloaded_packages
library(tidyverse)
library(titanic)

# LINEAR MODEL
lm_t=lm(Survived ~ Fare, data=titanic_train)

ggplot(titanic_train, aes(x=Fare, y=Survived)) + geom_point() + 
  stat_smooth(method="lm", se=TRUE)

# LOGIT MODEL
logit_t=glm(formula=Survived ~ Pclass+Sex, family=binomial(link="logit"), data=titanic_train)

summary(logit_t)
## 
## Call:
## glm(formula = Survived ~ Pclass + Sex, family = binomial(link = "logit"), 
##     data = titanic_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2030  -0.7036  -0.4519   0.6719   2.1599  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.2946     0.2974  11.077   <2e-16 ***
## Pclass       -0.9606     0.1061  -9.057   <2e-16 ***
## Sexmale      -2.6434     0.1838 -14.380   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1186.7  on 890  degrees of freedom
## Residual deviance:  827.2  on 888  degrees of freedom
## AIC: 833.2
## 
## Number of Fisher Scoring iterations: 4
ggplot(titanic_train, aes(x=Fare, y=Survived)) + geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"(link="logit")), se=TRUE)

# PROBIT MODEL
probit_t=glm(formula=Survived ~ Fare, family=binomial(link="probit"), data=titanic_train)

summary(probit_t)
## 
## Call:
## glm(formula = Survived ~ Fare, family = binomial(link = "probit"), 
##     data = titanic_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4601  -0.8920  -0.8601   1.3527   1.5811  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.563592   0.056440  -9.986  < 2e-16 ***
## Fare         0.008453   0.001242   6.804 1.02e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1186.7  on 890  degrees of freedom
## Residual deviance: 1119.9  on 889  degrees of freedom
## AIC: 1123.9
## 
## Number of Fisher Scoring iterations: 6
ggplot(titanic_train, aes(x=Fare, y=Survived)) + geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"(link="probit")), se=TRUE)

Part 2: Multinomial Logistic Regression

install.packages("nnet", repos = "http://cran.us.r-project.org")
## 
##   There is a binary version available but the source version is
##   later:
##      binary source needs_compilation
## nnet 7.3-12 7.3-13              TRUE
install.packages("Ecdat", repos = "http://cran.us.r-project.org")

library(nnet)
library(Ecdat)

Y <- Yogurt
M <- multinom(choice ~., data = Y) #the model
## # weights:  44 (30 variable)
## initial  value 3343.741999 
## iter  10 value 2740.831153
## iter  20 value 2593.985140
## iter  30 value 2563.707966
## final  value 2562.990614 
## converged
summary(M)
## Call:
## multinom(formula = choice ~ ., data = Y)
## 
## Coefficients:
##        (Intercept)            id feat.yoplait feat.dannon feat.hiland
## dannon -0.03520968  0.0001402729   -0.3401250  -0.4537247 -0.06022368
## hiland  4.14419228 -0.0005795527   -0.5042782  -0.5157286  1.07464455
## weight  1.18249094  0.0048663666   -0.2611470  -0.7810408 -1.23737832
##        feat.weight price.yoplait price.dannon price.hiland price.weight
## dannon   0.4865494     0.3675842   -0.7044580    0.1846033   0.13864682
## hiland   1.0134997     0.1305399   -0.4733090   -0.7054197  -0.06976759
## weight   1.1985995     0.4751706   -0.5396113   -0.3682489  -0.05816486
## 
## Std. Errors:
##        (Intercept)          id feat.yoplait feat.dannon feat.hiland
## dannon   0.8822363 0.001870004    0.2239925   0.3140816   0.3276537
## hiland   2.0291901 0.004573466    0.7435141   0.8224360   0.4603284
## weight   1.0029899 0.002120281    0.2688138   0.3821215   0.3936001
##        feat.weight price.yoplait price.dannon price.hiland price.weight
## dannon   0.3276952    0.03461712   0.06710173   0.07237536   0.08089368
## hiland   0.6936778    0.06907950   0.15048557   0.15430184   0.16553123
## weight   0.3360577    0.04614420   0.07464468   0.08106812   0.08519369
## 
## Residual Deviance: 5125.981 
## AIC: 5185.981
exp(coef(M)) #log odds relative to baseline
##        (Intercept)        id feat.yoplait feat.dannon feat.hiland
## dannon    0.965403 1.0001403    0.7116813   0.6352576   0.9415539
## hiland   63.066661 0.9994206    0.6039413   0.5970654   2.9289516
## weight    3.262491 1.0048782    0.7701677   0.4579292   0.2901439
##        feat.weight price.yoplait price.dannon price.hiland price.weight
## dannon    1.626694      1.444241    0.4943765    1.2027413    1.1487183
## hiland    2.755227      1.139443    0.6229375    0.4939013    0.9326105
## weight    3.315470      1.608289    0.5829748    0.6919450    0.9434944
head(fitted(M)) #probabilities of picking each category for each observation
##     yoplait    dannon      hiland    weight
## 1 0.3245838 0.5091098 0.014125195 0.1521812
## 2 0.6018923 0.2850152 0.009748865 0.1033436
## 3 0.5813145 0.3033498 0.010775186 0.1045605
## 4 0.5813145 0.3033498 0.010775186 0.1045605
## 5 0.4133581 0.2930154 0.023418517 0.2702081
## 6 0.4915570 0.2899564 0.027613810 0.1908728
Y$choice <- relevel(Y$choice, ref = "dannon") #set new baseline

# FULL MODEL
M <- multinom(choice ~., data = Y)
## # weights:  44 (30 variable)
## initial  value 3343.741999 
## iter  10 value 2723.422944
## iter  20 value 2610.939896
## iter  30 value 2563.450128
## final  value 2562.990625 
## converged
summary(M)
## Call:
## multinom(formula = choice ~ ., data = Y)
## 
## Coefficients:
##         (Intercept)            id feat.yoplait feat.dannon feat.hiland
## yoplait  0.03458607 -0.0001407410   0.34004481  0.45379140  0.06035367
## hiland   4.17701865 -0.0007153663  -0.16441842 -0.06136236  1.13495466
## weight   1.21648919  0.0047271381   0.07897677 -0.32719222 -1.17703765
##         feat.weight price.yoplait price.dannon price.hiland price.weight
## yoplait  -0.4864858    -0.3675775    0.7044753   -0.1845759   -0.1386124
## hiland    0.5276242    -0.2369168    0.2316330   -0.8900015   -0.2088435
## weight    0.7120903     0.1076158    0.1648777   -0.5528200   -0.1967649
## 
## Std. Errors:
##         (Intercept)          id feat.yoplait feat.dannon feat.hiland
## yoplait   0.8822345 0.001870002    0.2239905   0.3140810   0.3276568
## hiland    2.0045405 0.004559142    0.7531383   0.8017917   0.4601638
## weight    0.8945313 0.001968840    0.2807492   0.3197073   0.3823698
##         feat.weight price.yoplait price.dannon price.hiland price.weight
## yoplait   0.3276950    0.03461661   0.06710192   0.07237505   0.08089485
## hiland    0.6865561    0.07314327   0.14532253   0.15387341   0.16449378
## weight    0.2871956    0.04632864   0.05918325   0.07819600   0.07641287
## 
## Residual Deviance: 5125.981 
## AIC: 5185.981
z <- summary(M)$coefficients/summary(M)$standard.errors #Wald test for p values
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
##         (Intercept)         id feat.yoplait feat.dannon feat.hiland
## yoplait   0.9687287 0.94000587    0.1289838   0.1485077 0.853858262
## hiland    0.0371803 0.87531728    0.8271868   0.9389962 0.013647324
## weight    0.1738559 0.01635142    0.7784748   0.3061132 0.002082057
##         feat.weight price.yoplait price.dannon price.hiland price.weight
## yoplait  0.13765801   0.000000000  0.000000000 1.076396e-02   0.08662261
## hiland   0.44218515   0.001199163  0.110953132 7.295151e-09   0.20422249
## weight   0.01315811   0.020185676  0.005338194 1.552980e-12   0.01002335
library(stargazer) #a package for making the output into a nice table

# MODEL 1
M1 <- multinom(choice ~ price.yoplait + price.dannon + price.hiland + price.weight, data = Y) #price model
## # weights:  24 (15 variable)
## initial  value 3343.741999 
## iter  10 value 2696.572347
## iter  20 value 2590.714860
## final  value 2590.678971 
## converged
summary(M1)
## Call:
## multinom(formula = choice ~ price.yoplait + price.dannon + price.hiland + 
##     price.weight, data = Y)
## 
## Coefficients:
##         (Intercept) price.yoplait price.dannon price.hiland price.weight
## yoplait -0.01730093   -0.37181637    0.6872791   -0.2021630  -0.09475388
## hiland   6.36906160   -0.24038641    0.2362219   -1.1644689  -0.29925903
## weight   1.11884075    0.08901697    0.1897998   -0.4480783  -0.22525465
## 
## Std. Errors:
##         (Intercept) price.yoplait price.dannon price.hiland price.weight
## yoplait   0.8186591    0.03417663   0.06291827   0.06846684   0.07733802
## hiland    1.6863041    0.07344800   0.13278123   0.11799583   0.14801776
## weight    0.8152734    0.04589639   0.05402714   0.07013378   0.07000950
## 
## Residual Deviance: 5181.358 
## AIC: 5211.358
z1 <- summary(M1)$coefficients/summary(M1)$standard.errors 
p1 <- (1 - pnorm(abs(z1), 0, 1)) * 2
p1
##          (Intercept) price.yoplait price.dannon price.hiland price.weight
## yoplait 0.9831393611   0.000000000 0.0000000000 3.149938e-03  0.220503123
## hiland  0.0001587699   0.001064581 0.0752347040 0.000000e+00  0.043199301
## weight  0.1699543794   0.052438052 0.0004430004 1.670737e-10  0.001293189
# MODEL 2
M2 <- multinom(choice ~ feat.yoplait + feat.dannon + feat.hiland + feat.weight, data = Y) #feature model
## # weights:  24 (15 variable)
## initial  value 3343.741999 
## iter  10 value 2811.342546
## iter  20 value 2786.117255
## final  value 2786.109276 
## converged
summary(M2)
## Call:
## multinom(formula = choice ~ feat.yoplait + feat.dannon + feat.hiland + 
##     feat.weight, data = Y)
## 
## Coefficients:
##         (Intercept) feat.yoplait feat.dannon feat.hiland feat.weight
## yoplait  -0.1936272  0.846540156  -0.6273882 -0.02145402  -0.1248708
## hiland   -2.9226341 -0.021333436  -0.2961226  2.48442120   0.5246249
## weight   -0.5541230  0.007649006  -0.5247896 -0.31481937   0.4915248
## 
## Std. Errors:
##         (Intercept) feat.yoplait feat.dannon feat.hiland feat.weight
## yoplait  0.05202093    0.2066922   0.2610760   0.2737980   0.2732667
## hiland   0.15475460    0.7416536   0.7374798   0.3258950   0.6225818
## weight   0.05788051    0.2740806   0.2866666   0.3354511   0.2567316
## 
## Residual Deviance: 5572.219 
## AIC: 5602.219
z2 <- summary(M2)$coefficients/summary(M2)$standard.errors
p2 <- (1 - pnorm(abs(z2), 0, 1)) * 2
p2
##          (Intercept) feat.yoplait feat.dannon  feat.hiland feat.weight
## yoplait 0.0001975709 4.209728e-05  0.01625733 9.375440e-01   0.6477029
## hiland  0.0000000000 9.770523e-01  0.68802757 2.464695e-14   0.3994185
## weight  0.0000000000 9.777356e-01  0.06715108 3.479898e-01   0.0555503
# MODEL 3
M3 <- multinom(choice ~ feat.weight + price.yoplait + price.dannon + price.hiland, data = Y)
## # weights:  24 (15 variable)
## initial  value 3343.741999 
## iter  10 value 2689.814698
## iter  20 value 2586.232922
## final  value 2586.218053 
## converged
summary(M3)
## Call:
## multinom(formula = choice ~ feat.weight + price.yoplait + price.dannon + 
##     price.hiland, data = Y)
## 
## Coefficients:
##         (Intercept) feat.weight price.yoplait price.dannon price.hiland
## yoplait  -0.4868515  -0.3600374   -0.38592053    0.6645636   -0.1906188
## hiland    4.3842502   0.9043384   -0.24106611    0.2005119   -1.1886847
## weight   -0.1938915   0.9480919    0.08262245    0.1532484   -0.4768760
## 
## Std. Errors:
##         (Intercept) feat.weight price.yoplait price.dannon price.hiland
## yoplait   0.6953676   0.3133467    0.03414217   0.06176640   0.06882024
## hiland    1.4367336   0.6610763    0.07445377   0.12879778   0.11938915
## weight    0.7029393   0.2679710    0.04484957   0.05149647   0.07105390
## 
## Residual Deviance: 5172.436 
## AIC: 5202.436
z3 <- summary(M3)$coefficients/summary(M3)$standard.errors
p3 <- (1 - pnorm(abs(z3), 0, 1)) * 2
p3
##         (Intercept)  feat.weight price.yoplait price.dannon price.hiland
## yoplait 0.483842694 0.2505532970    0.00000000  0.000000000 5.608959e-03
## hiland  0.002276706 0.1713186599    0.00120457  0.119518777 0.000000e+00
## weight  0.782678888 0.0004031105    0.06544402  0.002921295 1.926770e-11
stargazer(M3, type = "text", title = "M3")
## 
## M3
## ===============================================
##                        Dependent variable:     
##                   -----------------------------
##                    yoplait   hiland    weight  
##                      (1)       (2)       (3)   
## -----------------------------------------------
## feat.weight        -0.360     0.904   0.948*** 
##                    (0.313)   (0.661)   (0.268) 
##                                                
## price.yoplait     -0.386*** -0.241***  0.083*  
##                    (0.034)   (0.074)   (0.045) 
##                                                
## price.dannon      0.665***    0.201   0.153*** 
##                    (0.062)   (0.129)   (0.051) 
##                                                
## price.hiland      -0.191*** -1.189*** -0.477***
##                    (0.069)   (0.119)   (0.071) 
##                                                
## Constant           -0.487   4.384***   -0.194  
##                    (0.695)   (1.437)   (0.703) 
##                                                
## -----------------------------------------------
## Akaike Inf. Crit. 5,202.436 5,202.436 5,202.436
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Part 3: Ordinal Logistic Regression

Reference: UCLA Institution For Digital Research and Education

A study looks at factors that influence the decision of whether to apply to graduate school. College juniors are asked if they are unlikely, somewhat likely, or very likely to apply to graduate school. Hence, our outcome variable has three categories. Data on parental educational status, whether the undergraduate institution is public or private, and current GPA is also collected. The researchers have reason to believe that the “distances” between these three points are not equal. For example, the “distance” between “unlikely” and “somewhat likely” may be shorter than the distance between “somewhat likely” and “very likely”.

## ORDINAL LOGISTIC REGRESSION 
#install.packages("foreign")
library(foreign)

#import graduate school data 
dat <- read.dta("https://stats.idre.ucla.edu/stat/data/ologit.dta")
head(dat)
##             apply pared public  gpa
## 1     very likely     0      0 3.26
## 2 somewhat likely     1      0 3.21
## 3        unlikely     1      1 3.94
## 4 somewhat likely     0      0 2.81
## 5 somewhat likely     0      0 2.53
## 6        unlikely     0      1 2.59

Variables:

  • apply: levels “unlikely”, “somewhat likely”, and “very likely”, coded 1, 2, and 3, respectively
  • pared: (stands for parent education) a 0/1 variable indicating whether at least one parent has a graduate degree
  • public: a 0/1 variable where 1 indicates that the undergraduate institution is public and 0 private
  • gpa: the student’s grade point average

Visualized the data

# create a viz comparing the apply with gpa, pared, and public 
library(tidyverse)
ggplot(dat, aes(x = apply, y = gpa)) +
  geom_boxplot(size = .75) +
  geom_jitter(alpha = .5) +
  facet_grid(pared~ public) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

Created a “flattened” table

#flatten table
ftable(xtabs(~ public + apply + pared, data = dat))
##                        pared   0   1
## public apply                        
## 0      unlikely              175  14
##        somewhat likely        98  26
##        very likely            20  10
## 1      unlikely               25   6
##        somewhat likely        12   4
##        very likely             7   3

Fit a ordinal logistic model

# fit the model 
library(MASS)
m <- polr(apply ~ pared + public + gpa, data = dat, Hess=TRUE)
summary(m)
## Call:
## polr(formula = apply ~ pared + public + gpa, data = dat, Hess = TRUE)
## 
## Coefficients:
##           Value Std. Error t value
## pared   1.04769     0.2658  3.9418
## public -0.05879     0.2979 -0.1974
## gpa     0.61594     0.2606  2.3632
## 
## Intercepts:
##                             Value   Std. Error t value
## unlikely|somewhat likely     2.2039  0.7795     2.8272
## somewhat likely|very likely  4.2994  0.8043     5.3453
## 
## Residual Deviance: 717.0249 
## AIC: 727.0249
coef(m)
##       pared      public         gpa 
##  1.04769010 -0.05878572  0.61594057
# store the coefficients
ctable <- coef(summary(m))
ctable
##                                   Value Std. Error    t value
## pared                        1.04769010  0.2657894  3.9418050
## public                      -0.05878572  0.2978614 -0.1973593
## gpa                          0.61594057  0.2606340  2.3632399
## unlikely|somewhat likely     2.20391473  0.7795455  2.8271792
## somewhat likely|very likely  4.29936315  0.8043267  5.3452947
# calculate and store p values
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
p
##                       pared                      public 
##                8.087072e-05                8.435464e-01 
##                         gpa    unlikely|somewhat likely 
##                1.811594e-02                4.696004e-03 
## somewhat likely|very likely 
##                9.027008e-08
# combined table
ctable <- cbind(ctable, "p value" = p)
ctable
##                                   Value Std. Error    t value      p value
## pared                        1.04769010  0.2657894  3.9418050 8.087072e-05
## public                      -0.05878572  0.2978614 -0.1973593 8.435464e-01
## gpa                          0.61594057  0.2606340  2.3632399 1.811594e-02
## unlikely|somewhat likely     2.20391473  0.7795455  2.8271792 4.696004e-03
## somewhat likely|very likely  4.29936315  0.8043267  5.3452947 9.027008e-08
# confidence intervals from profiles
ci <- confint(m)
ci
##             2.5 %    97.5 %
## pared   0.5281768 1.5721750
## public -0.6522060 0.5191384
## gpa     0.1076202 1.1309148
# odds ratios
exp(coef(m))
##     pared    public       gpa 
## 2.8510579 0.9429088 1.8513972
# OR and CI
exp(cbind(OR = coef(m), ci))
##               OR     2.5 %   97.5 %
## pared  2.8510579 1.6958376 4.817114
## public 0.9429088 0.5208954 1.680579
## gpa    1.8513972 1.1136247 3.098490