require(foreign)
require(MASS)
require(Hmisc)
require(reshape2)
require(sandwich)
require(msm)
library(nnet)
library(ggplot2)
library(reshape2)
ml <- read.csv("https://raw.githubusercontent.com/RWorkshop/workshopdatasets/master/multilog.csv")
head(ml)
## X id female ses schtyp prog read write math science socst
## 1 1 45 female low public vocation 34 35 41 29 26
## 2 2 108 male middle public general 34 33 41 36 36
## 3 3 15 male high public vocation 39 39 44 26 42
## 4 4 67 male low public vocation 37 37 42 33 32
## 5 5 153 male middle public vocation 39 31 40 39 51
## 6 6 51 female high public general 42 36 42 31 39
## honors awards cid
## 1 not enrolled 0 1
## 2 not enrolled 0 1
## 3 not enrolled 0 1
## 4 not enrolled 0 1
## 5 not enrolled 0 1
## 6 not enrolled 0 1
with(ml, table(ses, prog))
## prog
## ses academic general vocation
## high 42 9 7
## low 19 16 12
## middle 44 20 31
with(ml, do.call(rbind,
tapply(write, prog, function(x) c(M = mean(x), SD = sd(x)))))
## M SD
## academic 56.25714 7.943343
## general 51.33333 9.397775
## vocation 46.76000 9.318754
ml <- ml %>% mutate(prog2 = factor(prog,levels=c("academic","vocation","general")))
test <- nnet::multinom(prog2 ~ ses + write,
data = ml)
## # weights: 15 (8 variable)
## initial value 219.722458
## iter 10 value 179.983731
## final value 179.981726
## converged
summary(test)
## Call:
## nnet::multinom(formula = prog2 ~ ses + write, data = ml)
##
## Coefficients:
## (Intercept) seslow sesmiddle write
## vocation 4.235574 0.9827182 1.2740985 -0.11360389
## general 1.689478 1.1628411 0.6295638 -0.05793086
##
## Std. Errors:
## (Intercept) seslow sesmiddle write
## vocation 1.204690 0.5955688 0.5111119 0.02222000
## general 1.226939 0.5142211 0.4650289 0.02141101
##
## Residual Deviance: 359.9635
## AIC: 375.9635
Wald Test Statistics
z <- summary(test)$coefficients/summary(test)$standard.errors
z
## (Intercept) seslow sesmiddle write
## vocation 3.515904 1.650050 2.492798 -5.112687
## general 1.376987 2.261364 1.353816 -2.705658
P-Values
p <- (1 - pnorm(abs(z), 0, 1)) * 2
p
## (Intercept) seslow sesmiddle write
## vocation 0.0004382601 0.09893276 0.0126741 3.176088e-07
## general 0.1685163893 0.02373673 0.1757949 6.816914e-03
## extract the coefficients from the model and exponentiate
exp(coef(test))
## (Intercept) seslow sesmiddle write
## vocation 69.101326 2.671709 3.575477 0.8926115
## general 5.416653 3.199009 1.876792 0.9437152
head(pp <- fitted(test))
## academic vocation general
## 1 0.1482721 0.5134769 0.3382509
## 2 0.1201988 0.6991678 0.1806335
## 3 0.4186768 0.3445095 0.2368137
## 4 0.1726839 0.4764728 0.3508433
## 5 0.1001206 0.7309367 0.1689428
## 6 0.3533583 0.4088370 0.2378047
dses <- data.frame(ses = c("low", "middle", "high"),
write = mean(ml$write))
predict(test, newdata = dses, "probs")
## academic vocation general
## 1 0.4396813 0.2021272 0.3581915
## 2 0.4777451 0.2939190 0.2283359
## 3 0.7009046 0.1206026 0.1784928
dwrite <- data.frame(ses = rep(c("low", "middle", "high"), each = 41),
write = rep(c(30:70), 3))
## store the predicted probabilities for each value of ses and write
pp.write <- cbind(dwrite,
predict(test, newdata = dwrite,
type = "probs", se = TRUE))
## calculate the mean probabilities within each level of ses
by(pp.write[, 3:5], pp.write$ses, colMeans)
## pp.write$ses: high
## academic vocation general
## 0.6164348 0.2027603 0.1808049
## ------------------------------------------------------------
## pp.write$ses: low
## academic vocation general
## 0.3972955 0.2748864 0.3278180
## ------------------------------------------------------------
## pp.write$ses: middle
## academic vocation general
## 0.4256172 0.3732951 0.2010877
## melt data set to long for ggplot2
lpp <- melt(pp.write, id.vars = c("ses", "write"),
value.name = "probability")
head(lpp) # view first few rows
## ses write variable probability
## 1 low 30 academic 0.09843258
## 2 low 31 academic 0.10716517
## 3 low 32 academic 0.11650018
## 4 low 33 academic 0.12645441
## 5 low 34 academic 0.13704163
## 6 low 35 academic 0.14827211
## plot predicted probabilities across write values for each level of ses
## facetted by program type
ggplot(lpp, aes(x = write, y = probability, colour = ses)) +
geom_line() + facet_grid(variable ~
., scales = "free")
