library(nnet)
library(faraway)

This hsb data was collected as a subset of the High School and Beyond study conducted by the National Education Longitudinal Studies program of the National Center for Education Statistics. The variables are gender; race; socioeconomic status; school type; chosen high school program type; scores on reading, writing, math, science, and social studies. We want to determine which factors are related to the choice of the type of program—academic, vocational, or general—that the students pursue in high school. The response is multinomial with three levels.

data(hsb)
head(hsb,n=20)
##     id gender         race    ses  schtyp     prog read write math science
## 1   70   male        white    low  public  general   57    52   41      47
## 2  121 female        white middle  public vocation   68    59   53      63
## 3   86   male        white   high  public  general   44    33   54      58
## 4  141   male        white   high  public vocation   63    44   47      53
## 5  172   male        white middle  public academic   47    52   57      53
## 6  113   male        white middle  public academic   44    52   51      63
## 7   50   male african-amer middle  public  general   50    59   42      53
## 8   11   male     hispanic middle  public academic   34    46   45      39
## 9   84   male        white middle  public  general   63    57   54      58
## 10  48   male african-amer middle  public academic   57    55   52      50
## 11  75   male        white middle  public vocation   60    46   51      53
## 12  60   male        white middle  public academic   57    65   51      63
## 13  95   male        white   high  public academic   73    60   71      61
## 14 104   male        white   high  public academic   54    63   57      55
## 15  38   male african-amer    low  public academic   45    57   50      31
## 16 115   male        white    low  public  general   42    49   43      50
## 17  76   male        white   high  public academic   47    52   51      50
## 18 195   male        white middle private  general   57    57   60      58
## 19 114   male        white   high  public academic   68    65   62      55
## 20  85   male        white middle  public  general   55    39   57      53
##    socst
## 1     57
## 2     61
## 3     31
## 4     56
## 5     61
## 6     61
## 7     61
## 8     36
## 9     51
## 10    51
## 11    61
## 12    61
## 13    71
## 14    46
## 15    56
## 16    56
## 17    56
## 18    56
## 19    61
## 20    46
  1. Fit a trinomial response model with the other relevant variables as predictors (untransformed).
m1 <- multinom(prog ~ gender + race + ses + schtyp + read + write + math ++science + socst, hsb, trace = FALSE)
summary(m1)
## Call:
## multinom(formula = prog ~ gender + race + ses + schtyp + read + 
##     write + math + +science + socst, data = hsb, trace = FALSE)
## 
## Coefficients:
##          (Intercept)  gendermale raceasian racehispanic racewhite
## general     3.631901 -0.09264717  1.352739   -0.6322019 0.2965156
## vocation    7.481381 -0.32104341 -0.700070   -0.1993556 0.3358881
##              seslow sesmiddle schtyppublic        read       write
## general  1.09864111 0.7029621    0.5845405 -0.04418353 -0.03627381
## vocation 0.04747323 1.1815808    2.0553336 -0.03481202 -0.03166001
##                math    science       socst
## general  -0.1092888 0.10193746 -0.01976995
## vocation -0.1139877 0.05229938 -0.08040129
## 
## Std. Errors:
##          (Intercept) gendermale raceasian racehispanic racewhite    seslow
## general     1.823452  0.4548778  1.058754    0.8935504 0.7354829 0.6066763
## vocation    2.104698  0.5021132  1.470176    0.8393676 0.7480573 0.7045772
##          sesmiddle schtyppublic       read      write       math
## general  0.5045938    0.5642925 0.03103707 0.03381324 0.03522441
## vocation 0.5700833    0.8348229 0.03422409 0.03585729 0.03885131
##             science      socst
## general  0.03274038 0.02712589
## vocation 0.03424763 0.02938212
## 
## Residual Deviance: 305.8705 
## AIC: 357.8705
  1. Use backward elimination to reduce the model to one where all predictors are statistically significant. Give an interpretation of the resulting model.
m2 <- step(m1, scope=~., direction="backward", trace = FALSE)
## trying - gender 
## trying - race 
## trying - ses 
## trying - schtyp 
## trying - read 
## trying - write 
## trying - math 
## trying - science 
## trying - socst 
## trying - gender 
## trying - ses 
## trying - schtyp 
## trying - read 
## trying - write 
## trying - math 
## trying - science 
## trying - socst 
## trying - ses 
## trying - schtyp 
## trying - read 
## trying - write 
## trying - math 
## trying - science 
## trying - socst 
## trying - ses 
## trying - schtyp 
## trying - read 
## trying - math 
## trying - science 
## trying - socst 
## trying - ses 
## trying - schtyp 
## trying - math 
## trying - science 
## trying - socst
summary(m2)
## Call:
## multinom(formula = prog ~ ses + schtyp + math + science + socst, 
##     data = hsb, trace = FALSE)
## 
## Coefficients:
##          (Intercept)      seslow sesmiddle schtyppublic       math
## general     2.587029  0.87607389 0.6978995    0.6468812 -0.1212242
## vocation    6.687272 -0.01569301 1.2065000    1.9955504 -0.1369641
##             science       socst
## general  0.08209791 -0.04441228
## vocation 0.03941237 -0.09363417
## 
## Std. Errors:
##          (Intercept)    seslow sesmiddle schtyppublic       math
## general     1.686492 0.5758781 0.4930330     0.545598 0.03213345
## vocation    1.945363 0.6690861 0.5571202     0.812881 0.03591701
##             science      socst
## general  0.02787694 0.02344856
## vocation 0.02864929 0.02586717
## 
## Residual Deviance: 315.5511 
## AIC: 343.5511
(dev<-deviance(m2)-deviance(m1))
## [1] 9.680566
pchisq(dev,m1$math-m2$math,lower=F)
## numeric(0)
  1. For the student with id 99, compute the predicted probabilities of the three possible choices.
predict(m2,type="probs")[99,]
##  academic   general  vocation 
## 0.1877186 0.3566929 0.4555884
sprog<-hsb$prog
matplot(prop.table(table(hsb$math,sprog),1),type="l",xlab="Math",ylab="Proportion",lty=c(1,2,5))

matplot(prop.table(table(hsb$science,sprog),1),type="l",xlab="Science",ylab="Proportion",lty=c(1,2,5))

matplot(prop.table(table(hsb$socst,sprog),1),type="l",xlab="Social",ylab="Proportion",lty=c(1,2,5))