library(car)
## Loading required package: carData
library(plyr)
library(ggplot2)
library(ggfortify)
library(emmeans)
library(kableExtra)
#` This is largely taken from https://stats.idre.ucla.edu/r/dae/poisson-regression/
programmes <- read.csv("~/Dropbox/Teaching/second_year_stats/r_sessions/generalized/awards.csv", stringsAsFactors=TRUE)
programmes$prog<-as.factor(programmes$prog)
programmes$id<-as.factor(programmes$id)
Me <- tapply(programmes$num_awards, programmes$prog, mean)
Var <- tapply(programmes$num_awards, programmes$prog, var)
rbind(Me, Var) %>% kbl() %>%
kable_classic(full_width = F, html_font = "Cambria", font_size = 12)
|
Academic
|
General
|
Vocational
|
Me
|
1.000000
|
0.2000000
|
0.2400000
|
Var
|
1.634615
|
0.1636364
|
0.2677551
|
ggplot(programmes, aes(num_awards, fill = prog)) +
geom_histogram(binwidth=.5, position="dodge")

awards_model <- glm(num_awards ~ prog+math, family=poisson,data=programmes)
summary(awards_model)
##
## Call:
## glm(formula = num_awards ~ prog + math, family = poisson, data = programmes)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2043 -0.8436 -0.5106 0.2558 2.6796
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.16327 0.66288 -6.281 3.37e-10 ***
## progGeneral -1.08386 0.35825 -3.025 0.00248 **
## progVocational -0.71405 0.32001 -2.231 0.02566 *
## math 0.07015 0.01060 6.619 3.63e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 287.67 on 199 degrees of freedom
## Residual deviance: 189.45 on 196 degrees of freedom
## AIC: 373.5
##
## Number of Fisher Scoring iterations: 6
Anova(awards_model)
## Analysis of Deviance Table (Type II tests)
##
## Response: num_awards
## LR Chisq Df Pr(>Chisq)
## prog 14.572 2 0.0006852 ***
## math 45.010 1 1.96e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
100*((awards_model$null.deviance-awards_model$deviance)/awards_model$null.deviance)
## [1] 34.14393
awards_model$deviance/awards_model$df.residual
## [1] 0.9665797
autoplot(awards_model)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

plot(awards_model)




marginal = emmeans(awards_model, ~ prog)
pairs(marginal)
## contrast estimate SE df z.ratio p.value
## Academic - General 1.084 0.358 Inf 3.025 0.0070
## Academic - Vocational 0.714 0.320 Inf 2.231 0.0660
## General - Vocational -0.370 0.441 Inf -0.838 0.6791
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
## calculate and store predicted values
programmes$phat <- predict(awards_model, type="response")
## order by program and then by math
programmes <- programmes[with(programmes, order(prog, math)), ]
## create the plot
ggplot(programmes, aes(x = math, y = phat, colour = prog)) +
geom_point(aes(y = num_awards), alpha=.5, position=position_jitter(h=.2)) +
geom_line(size = 1) +
labs(x = "Math Score", y = "Expected number of awards")
