1 Description

Silvapulle (1981) reported a study on the relation between psychiatric diagnosis and the score on a 12-item General Health Questionnaire (GHQ) for 120 patients admitted to
a hospital. Download the ghq.rda data file to your “data” folder and load it to your R session with the command

Column 1: Gender ID
Column 2: GHQ score
Column 3: Number of cases
Column 4: Number of non-cases
The patient was classified by psychiatrists as either a ‘case’ or a ‘non-case’. The question of interest was whether GHQ score, together with patient’s gender, could be
used to detect psychiatric cases. Perform an appropriate analysis.

2 input data

load("C:/Users/HANK/Desktop/HOMEWORK/ghq.rda")
dta <- ghq
head(dta)
##   sex ghq c nc
## 1 men   0 0 18
## 2 men   1 0  8
## 3 men   2 1  1
## 4 men   3 0  0
## 5 men   4 1  0
## 6 men   5 3  0

#Detecting mental illness by questionnaire

dta <- dta %>%
  mutate(Total = c + nc) %>%
  dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
knitr::kable(t(aggregate(cbind(Case, Noncase) ~ Score, data=dta, sum)))
## Warning in kable_pipe(x = structure(c("Score", "Case", "Noncase", "0", "2", :
## The table should have a header (column names)
Score 0 1 2 3 4 5 6 7 8 9 10
Case 2 2 5 3 3 6 1 3 3 1 1
Noncase 60 22 6 1 1 0 0 0 0 0 0

3 Gender and GHQ score effects

ggplot(dta, aes(x=Score, y=Case/Total, color=Gender)) + 
 geom_point(pch=1, size=rel(2))+
 stat_smooth(method="glm", 
             formula=y ~ x, 
             method.args=list(family=binomial),
             aes(weight=Total), 
             se=FALSE) +
 scale_color_manual(values=c("black", "red3"))+
 scale_x_continuous(breaks=seq(0, 10, by=1))+
 scale_y_continuous(breaks=seq(0, 1, by=.1))+
 labs(x="Score on GHQ", 
      y="Proportion of cases") +
 theme_minimal()+
 theme(legend.position=c(.1, .9))
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: Removed 5 rows containing missing values (geom_point).

4 Analysis of deviance

4.1 m0

m0 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

4.2 m1

m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta, family=binomial)

4.3 m2

m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta, family=binomial)
anova(m2, m1, m0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(Case, Noncase) ~ Score
## Model 2: cbind(Case, Noncase) ~ Score + Gender
## Model 3: cbind(Case, Noncase) ~ Score + Gender + Score:Gender
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1        15     5.7440                       
## 2        14     4.9419  1   0.8021  0.37047  
## 3        13     1.5422  1   3.3997  0.06521 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5 Testing for GHQ score effect

m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta, family=binomial(link=logit))
anova(m3, m1, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(Case, Noncase) ~ Gender
## Model 2: cbind(Case, Noncase) ~ Score + Gender
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1        15     83.054                          
## 2        14      4.942  1   78.112 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6 Goodness of fit

(m1$null.deviance - m1$deviance)/m1$null.deviance
## [1] 0.9405854
1 - pchisq(m1$deviance, m1$df.residual)
## [1] 0.9866052

7 Parameter estimates

sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)
  cbind(Case, Noncase)
Predictors Odds Ratios std. Error CI p
(Intercept) 0.03 0.56 0.01 – 0.08 <0.001
Score 4.22 0.30 2.56 – 8.37 <0.001

表格顯示:GHQ與精神疾病有關,且GHQ升高1個百分點,則病例數亦隨之增長,增長範圍從256%至837%。

8 Linear predictors

dta_p <- dta %>%
       mutate(Prop=Case/Total,
              phat=predict(m2, 
                              newdata=dta[, c(1,2)], 
                              type="response"),
              elgit=log((Case+1/120)/(Noncase+(1/120))))
dta_m1 <- broom::augment(m2, newdata = dta_p)
ggplot(dta_m1, 
       aes(Score, .fitted, 
           color=Gender)) + 
  geom_line(alpha=.5) +
  geom_point(data=dta_p,
             aes(Score, elgit))+
  scale_color_manual(values=c("black", "red3"))+
  labs(x="Score on GHQ",
       y="Empirical and fitted log-odds")+
  theme_minimal()+
  theme(legend.position = c(.1, .9))

library(faraway)
## Warning: package 'faraway' was built under R version 4.0.3
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:lattice':
## 
##     melanoma
## The following objects are masked from 'package:car':
## 
##     logit, vif

9 Model fit

with(dta, plot(Score, Case/Total, 
               col=Gender, 
               bty='n', 
               xlab="GHQ score"))

10 Residual plot

ggplot(dta_m1, 
       aes(Score, elgit,
           color=Gender)) + 
  geom_point(alpha=.5) +
  scale_y_continuous(breaks=seq(-3, 3, by=1), 
                     limits=c(-3.3, 3.3))+
  scale_color_manual(values=c("black", "red3"))+
  labs(x="Score on GHQ",
       y="Standardized residuals")+
  theme_minimal()+
  theme(legend.position = c(.9, .9))
## Warning: Removed 11 rows containing missing values (geom_point).