1 input data

load("ghq.rda")
dta2 <- ghq
head(dta2)
##   sex ghq c nc
## 1 men   0 0 18
## 2 men   1 0  8
## 3 men   2 1  1
## 4 men   3 0  0
## 5 men   4 1  0
## 6 men   5 3  0

2 Detecting mental illness by questionnaire

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta2 <-  ghq %>% mutate(Total = c + nc) %>%
  dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
head(dta2)
##   Gender Score Case Noncase Total
## 1    men     0    0      18    18
## 2    men     1    0       8     8
## 3    men     2    1       1     2
## 4    men     3    0       0     0
## 5    men     4    1       0     1
## 6    men     5    3       0     3

2.1 freq by Score

knitr::kable(t(aggregate(cbind(Case, Noncase) ~ Score, data=dta2, sum)))
## Warning in kable_pipe(x = structure(c("Score", "Case", "Noncase", "0", "2", :
## The table should have a header (column names)
Score 0 1 2 3 4 5 6 7 8 9 10
Case 2 2 5 3 3 6 1 3 3 1 1
Noncase 60 22 6 1 1 0 0 0 0 0 0

Noncase 大多數都在0~1分

case分數比較平均散布在0~10分

2.2 freq by Gender

knitr::kable(t(aggregate(cbind(Case, Noncase) ~ Gender, data=dta2, sum)))
## Warning in kable_pipe(x = structure(c("Gender", "Case", "Noncase", "men", : The
## table should have a header (column names)
Gender men women
Case 8 22
Noncase 27 63

男性看起來比例略高於女性是case的人

3 Gender and GHQ score effects

library(ggplot2)
ggplot(dta2, 
       aes(x=Score, 
           y=Case/Total, 
           color=Gender)) + 
 geom_point(pch=1, size=rel(2))+
 stat_smooth(method="glm", 
             formula=y ~ x, 
             method.args=list(family=binomial),
             aes(weight=Total), 
             se=FALSE) +
 scale_color_manual(values=c("black", "red3"))+
 scale_x_continuous(breaks=seq(0, 10, by=1))+
 scale_y_continuous(breaks=seq(0, 1, by=.1))+
 labs(x="Score on GHQ", 
      y="Proportion of cases") +
 theme_minimal()+
 theme(legend.position=c(.1, .9))
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: Removed 5 rows containing missing values (geom_point).

4 Analysis of deviance

m0 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta2, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta2, family=binomial)
m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta2, family=binomial)
knitr::kable(anova(m2, m1, m0, test="Chisq"))
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
15 5.743961 NA NA NA
14 4.941883 1 0.8020783 0.3704727
13 1.542230 1 3.3996529 0.0652101

看起來m2, m1, m0模型fit差不多

性別和性別的交互作用項可以不用考慮

5 Testing for GHQ score effect

m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta2, family=binomial(link=logit))
knitr::kable(anova(m3, m1, test="Chisq"))
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
15 83.053819 NA NA NA
14 4.941883 1 78.11194 0

分數效果有顯著,所以應該考慮分數差異進去

6 Goodness of fit

(m1$null.deviance - m1$deviance)/m1$null.deviance
## [1] 0.9405854
1 - pchisq(m1$deviance, m1$df.residual)
## [1] 0.9866052

7 Parameter estimates

sjPlot::tab_model(m1, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)
  cbind(Case, Noncase)
Predictors Odds Ratios std. Error CI p
(Intercept) 0.02 0.98 0.00 – 0.08 <0.001
Score 4.19 0.29 2.57 – 8.20 <0.001
Gender [women] 2.21 0.93 0.42 – 17.73 0.393

For a given GHQ score, the odds of a female being a case is between 0.42 to 17.73 of the corresponding odds for a female. (But no statistical significant)

Odds “m to n” in favor of an event mean we expect the event will occur m times for every n times it does not occur.

For every 100 male cases, we expect between 42 to 177 female cases. (But no statistical significant)

Conditional on gender, one point increases in GHQ is associated with the increase of 157% to 720% in the odds for being a case (2.57 to 8.2 times).

The odds of being a case with an increase of 1 GHQ unit moves up from 1:1 to between 2.57 and 8.20 to 1.

8 Linear predictors

dta_p <- dta2 %>%
       mutate(Prop=Case/Total,
              phat=predict(m1, 
                              newdata=dta2[, c(1,2)], 
                              type="response"),
              elgit=log((Case+1/120)/(Noncase+(1/120))))
dta_m1 <- broom::augment(m1, newdata=dta_p)
ggplot(dta_m1, 
       aes(Score, .fitted, 
           color=Gender)) + 
  geom_line(alpha=.5) +
  geom_point(data=dta_p,
             aes(Score, elgit))+
  scale_color_manual(values=c("black", "red3"))+
  labs(x="Score on GHQ",
       y="Empirical and fitted log-odds")+
  theme_minimal()+
  theme(legend.position = c(.1, .9))

性別呈平行線,無交互作用

9 From effect to probability

curve(faraway::ilogit(x), -6, 6, 
      bty='n', 
      xlab=expression(eta), 
      ylab="Probability")
grid()

10 Model fit

library(faraway)
beta <- coef(m1)
with(dta2, plot(Score, Case/Total, 
               col=Gender, 
               bty='n', 
               xlab="GHQ score"))
grid()
curve(ilogit(beta[1]+beta[2]*x), add=T, col=1)
curve(ilogit(beta[1]+beta[2]*x+beta[3]), add=T, col=2)

女性有比較高的機會呈為case

11 Residual plot