Silvapulle (1981) reported a study on the relation between psychiatric diagnosis and the score on a 12-item General Health Questionnaire (GHQ) for 120 patients admitted to a hospital. Download the ghq.rda data file to your “data” folder and load it to your R session with the command
#> load(“data/ghq.rda”)
Column 1: Gender ID
Column 2: GHQ score
Column 3: Number of cases
Column 4: Number of non-cases
The patient was classified by psychiatrists as either a ‘case’ or a ‘non-case’.
The question of interest was whether GHQ score, together with patient’s gender, could be used to detect psychiatric cases. Perform an appropriate analysis.
load("~/data/ghq.rda") #load data
head(ghq) #show first 6 lines
## sex ghq c nc
## 1 men 0 0 18
## 2 men 1 0 8
## 3 men 2 1 1
## 4 men 3 0 0
## 5 men 4 1 0
## 6 men 5 3 0
str(ghq) #show structure of data
## 'data.frame': 22 obs. of 4 variables:
## $ sex: Factor w/ 2 levels "men","women": 1 1 1 1 1 1 1 1 1 1 ...
## $ ghq: int 0 1 2 3 4 5 6 7 8 9 ...
## $ c : int 0 0 1 0 1 3 0 2 0 0 ...
## $ nc : int 18 8 1 0 0 0 0 0 0 0 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta <- ghq %>% mutate(Total = c + nc) %>% #mutate一個新變數Total
dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc) #rename將變數重新命名
knitr::kable(aggregate(cbind(Case, Noncase) ~ Gender, data=dta, sum))
Gender | Case | Noncase |
---|---|---|
men | 8 | 27 |
women | 22 | 63 |
For men, Case:Non-case 約 1:3.375 For women, Case:Non-case 約 1:2.86 可能有gender effect
knitr::kable(t(aggregate(cbind(Case, Noncase) ~ Score, data=dta, sum))) #aggregate前面括弧的t為Matrix Transpose
## Warning in kable_pipe(x = structure(c("Score", "Case", "Noncase", "0", "2", :
## The table should have a header (column names)
Score | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
Case | 2 | 2 | 5 | 3 | 3 | 6 | 1 | 3 | 3 | 1 | 1 |
Noncase | 60 | 22 | 6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Score=2時,Case對Non-case的比值接近1, Score大於3以後,Case對Non-case的比值變大,應有score effect。
library(ggplot2)
ggplot(dta,
aes(x=Score,
y=Case/Total,
color=Gender)) +
geom_point(pch=1, size=rel(2))+
stat_smooth(method="bayesglm",
formula=y ~ x,
method.args=list(family=binomial),
aes(weight=Total),
se=FALSE) +
scale_color_manual(values=c("black", "red3"))+
scale_x_continuous(breaks=seq(0, 10, by=1))+
scale_y_continuous(breaks=seq(0, 1, by=.1))+
labs(x="Score on GHQ",
y="Proportion of cases") +
theme_minimal()+
theme(legend.position=c(.1, .9))
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## 找不到模式 'function' 的物件 'bayesglm'
## Warning: Removed 5 rows containing missing values (geom_point).
當Proportion of cases=0.35的時候,men和women的Score on GHQ相等。 當Proportion of cases=1的時候,men的Score on GHQ=2,women的Score on GHQ=6.5。
m0 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta, family=binomial)
m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta, family=binomial)
anova(m2, m1, m0, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(Case, Noncase) ~ Score
## Model 2: cbind(Case, Noncase) ~ Score + Gender
## Model 3: cbind(Case, Noncase) ~ Score + Gender + Score:Gender
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 15 5.7440
## 2 14 4.9419 1 0.8021 0.37047
## 3 13 1.5422 1 3.3997 0.06521 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
1.有Gender effect 2.Score和Gender有交互作用
m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta, family=binomial(link=logit))
anova(m3, m1, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(Case, Noncase) ~ Gender
## Model 2: cbind(Case, Noncase) ~ Score + Gender
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 15 83.054
## 2 14 4.942 1 78.112 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
比較m1和m3,加入Score後,p值達顯著水準,表示有Score effect。
(m1$null.deviance - m1$deviance)/m1$null.deviance
## [1] 0.9405854
1 - pchisq(m1$deviance, m1$df.residual)
## [1] 0.9866052
#sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)
dta_p <- dta %>%
mutate(Prop=Case/Total,
phat=predict(m1,
newdata=dta[, c(1,2)],
type="response"),
elgit=log((Case+1/278)/(Noncase+(1/278))))
dta_m1 <- broom::augment(m1,newdata = dta_p)
ggplot(dta_m1,
aes(Score, .fitted,
color=Gender)) +
geom_line(alpha=.5) +
geom_point(data=dta_p,
aes(Score, elgit))+
scale_color_manual(values=c("black", "red3"))+
labs(x="Score on GHQ",
y="Empirical and fitted log-odds")+
theme_minimal()+
theme(legend.position = c(.1, .9))
curve(faraway::ilogit(x), -6, 6,
bty='n',
xlab=expression(eta),
ylab="Probability")
grid()
beta <- coef(m1)
library(faraway)
with(dta, plot(Score, Case/Total,
col=Gender,
bty='n',
xlab="GHQ score"))
grid()
curve(ilogit(beta[1]+beta[2]*x), add=T, col=1)
curve(ilogit(beta[1]+beta[2]*x+beta[3]), add=T, col=2)
#ggplot(dta_m1,
# aes(Score, .std.resid,
# color=Gender)) +
# geom_point(alpha=.5) +
# scale_y_continuous(breaks=seq(-3, 3, by=1),
# limits=c(-3.3, 3.3))+
# scale_color_manual(values=c("black", "red3"))+
# labs(x="Score on GHQ",
# y="Standardized residuals")+
# theme_minimal()+
# theme(legend.position = c(.9, .9))