1 Introduction

Silvapulle (1981) reported a study on the relation between psychiatric diagnosis and the score on a 12-item General Health Questionnaire (GHQ) for 120 patients admitted to a hospital. Download the ghq.rda data file to your “data” folder and load it to your R session with the command

#> load(“data/ghq.rda”)

Column 1: Gender ID
Column 2: GHQ score
Column 3: Number of cases
Column 4: Number of non-cases

The patient was classified by psychiatrists as either a ‘case’ or a ‘non-case’.

The question of interest was whether GHQ score, together with patient’s gender, could be used to detect psychiatric cases. Perform an appropriate analysis.

2 Data management

load("~/data/ghq.rda") #load data
head(ghq) #show first 6 lines
##   sex ghq c nc
## 1 men   0 0 18
## 2 men   1 0  8
## 3 men   2 1  1
## 4 men   3 0  0
## 5 men   4 1  0
## 6 men   5 3  0
str(ghq) #show structure of data
## 'data.frame':    22 obs. of  4 variables:
##  $ sex: Factor w/ 2 levels "men","women": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ghq: int  0 1 2 3 4 5 6 7 8 9 ...
##  $ c  : int  0 0 1 0 1 3 0 2 0 0 ...
##  $ nc : int  18 8 1 0 0 0 0 0 0 0 ...
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta <-  ghq %>% mutate(Total = c + nc) %>% #mutate一個新變數Total
  dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc) #rename將變數重新命名
knitr::kable(aggregate(cbind(Case, Noncase) ~ Gender, data=dta, sum))
Gender Case Noncase
men 8 27
women 22 63

For men, Case:Non-case 約 1:3.375 For women, Case:Non-case 約 1:2.86 可能有gender effect

knitr::kable(t(aggregate(cbind(Case, Noncase) ~ Score, data=dta, sum))) #aggregate前面括弧的t為Matrix Transpose
## Warning in kable_pipe(x = structure(c("Score", "Case", "Noncase", "0", "2", :
## The table should have a header (column names)
Score 0 1 2 3 4 5 6 7 8 9 10
Case 2 2 5 3 3 6 1 3 3 1 1
Noncase 60 22 6 1 1 0 0 0 0 0 0

Score=2時,Case對Non-case的比值接近1, Score大於3以後,Case對Non-case的比值變大,應有score effect。

3 Gender and GHQ score effects

library(ggplot2)
ggplot(dta, 
       aes(x=Score, 
           y=Case/Total, 
           color=Gender)) + 
 geom_point(pch=1, size=rel(2))+
 stat_smooth(method="bayesglm", 
             formula=y ~ x, 
             method.args=list(family=binomial),
             aes(weight=Total), 
             se=FALSE) +
 scale_color_manual(values=c("black", "red3"))+
 scale_x_continuous(breaks=seq(0, 10, by=1))+
 scale_y_continuous(breaks=seq(0, 1, by=.1))+
 labs(x="Score on GHQ", 
      y="Proportion of cases") +
 theme_minimal()+
 theme(legend.position=c(.1, .9))
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## 找不到模式 'function' 的物件 'bayesglm'
## Warning: Removed 5 rows containing missing values (geom_point).

當Proportion of cases=0.35的時候,men和women的Score on GHQ相等。 當Proportion of cases=1的時候,men的Score on GHQ=2,women的Score on GHQ=6.5。

4 Analysis of deviance

m0 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta, family=binomial)
m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta, family=binomial)
anova(m2, m1, m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(Case, Noncase) ~ Score
## Model 2: cbind(Case, Noncase) ~ Score + Gender
## Model 3: cbind(Case, Noncase) ~ Score + Gender + Score:Gender
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1        15     5.7440                       
## 2        14     4.9419  1   0.8021  0.37047  
## 3        13     1.5422  1   3.3997  0.06521 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1.有Gender effect 2.Score和Gender有交互作用

5 Testing for GHQ score effect

m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta, family=binomial(link=logit)) 
anova(m3, m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(Case, Noncase) ~ Gender
## Model 2: cbind(Case, Noncase) ~ Score + Gender
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        15     83.054                          
## 2        14      4.942  1   78.112 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

比較m1和m3,加入Score後,p值達顯著水準,表示有Score effect。

6 Goodness of fit

(m1$null.deviance - m1$deviance)/m1$null.deviance
## [1] 0.9405854
1 - pchisq(m1$deviance, m1$df.residual)
## [1] 0.9866052

7 Parameter estimates

#sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)

8

dta_p <- dta %>%
       mutate(Prop=Case/Total,
              phat=predict(m1, 
                              newdata=dta[, c(1,2)], 
                              type="response"),
              elgit=log((Case+1/278)/(Noncase+(1/278))))
dta_m1 <- broom::augment(m1,newdata = dta_p)
ggplot(dta_m1, 
       aes(Score, .fitted, 
           color=Gender)) + 
  geom_line(alpha=.5) +
  geom_point(data=dta_p,
             aes(Score, elgit))+
  scale_color_manual(values=c("black", "red3"))+
  labs(x="Score on GHQ",
       y="Empirical and fitted log-odds")+
  theme_minimal()+
  theme(legend.position = c(.1, .9))

9 From effect to probability

curve(faraway::ilogit(x), -6, 6, 
      bty='n', 
      xlab=expression(eta), 
      ylab="Probability")
grid()

10 Model fit

beta <- coef(m1)

library(faraway)

with(dta, plot(Score, Case/Total, 
               col=Gender, 
               bty='n', 
               xlab="GHQ score"))
grid()
curve(ilogit(beta[1]+beta[2]*x), add=T, col=1)
curve(ilogit(beta[1]+beta[2]*x+beta[3]), add=T, col=2)

11 Residual plot

#ggplot(dta_m1, 
   #    aes(Score, .std.resid,
        #   color=Gender)) + 
 # geom_point(alpha=.5) +
 # scale_y_continuous(breaks=seq(-3, 3, by=1), 
                  #   limits=c(-3.3, 3.3))+
 # scale_color_manual(values=c("black", "red3"))+
 # labs(x="Score on GHQ",
      # y="Standardized residuals")+
 # theme_minimal()+
 # theme(legend.position = c(.9, .9))