## sex ghq c nc
## 1 men 0 0 18
## 2 men 1 0 8
## 3 men 2 1 1
## 4 men 3 0 0
## 5 men 4 1 0
## 6 men 5 3 0
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta2 <- ghq %>% mutate(Total = c + nc) %>%
dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
head(dta2)
## Gender Score Case Noncase Total
## 1 men 0 0 18 18
## 2 men 1 0 8 8
## 3 men 2 1 1 2
## 4 men 3 0 0 0
## 5 men 4 1 0 1
## 6 men 5 3 0 3
## Warning in kable_pipe(x = structure(c("Score", "Case", "Noncase", "0", "2", :
## The table should have a header (column names)
Score | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
Case | 2 | 2 | 5 | 3 | 3 | 6 | 1 | 3 | 3 | 1 | 1 |
Noncase | 60 | 22 | 6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Noncase 大多數都在0~1分
case分數比較平均散布在0~10分
library(ggplot2)
ggplot(dta2,
aes(x=Score,
y=Case/Total,
color=Gender)) +
geom_point(pch=1, size=rel(2))+
stat_smooth(method="glm",
formula=y ~ x,
method.args=list(family=binomial),
aes(weight=Total),
se=FALSE) +
scale_color_manual(values=c("black", "red3"))+
scale_x_continuous(breaks=seq(0, 10, by=1))+
scale_y_continuous(breaks=seq(0, 1, by=.1))+
labs(x="Score on GHQ",
y="Proportion of cases") +
theme_minimal()+
theme(legend.position=c(.1, .9))
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
m1 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta2, family=binomial)
m2 <- glm(cbind(Case, Noncase) ~ Score, data=dta2, family=binomial)
knitr::kable(anova(m2, m1, m0, test="Chisq"))
Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
---|---|---|---|---|
15 | 5.743961 | NA | NA | NA |
14 | 4.941883 | 1 | 0.8020783 | 0.3704727 |
13 | 1.542230 | 1 | 3.3996529 | 0.0652101 |
看起來m2, m1, m0模型fit差不多
性別和性別的交互作用項可以不用考慮
m3 <- glm(cbind(Case, Noncase) ~ Gender, data=dta2, family=binomial(link=logit))
knitr::kable(anova(m3, m1, test="Chisq"))
Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
---|---|---|---|---|
15 | 83.053819 | NA | NA | NA |
14 | 4.941883 | 1 | 78.11194 | 0 |
分數效果有顯著,所以應該考慮分數差異進去
## [1] 0.9405854
## [1] 0.9866052
cbind(Case, Noncase) | ||||
---|---|---|---|---|
Predictors | Odds Ratios | std. Error | CI | p |
(Intercept) | 0.02 | 0.98 | 0.00 – 0.08 | <0.001 |
Score | 4.19 | 0.29 | 2.57 – 8.20 | <0.001 |
Gender [women] | 2.21 | 0.93 | 0.42 – 17.73 | 0.393 |
For a given GHQ score, the odds of a female being a case is between 0.42 to 17.73 of the corresponding odds for a female. (But no statistical significant)
Odds “m to n” in favor of an event mean we expect the event will occur m times for every n times it does not occur.
For every 100 male cases, we expect between 42 to 177 female cases. (But no statistical significant)
Conditional on gender, one point increases in GHQ is associated with the increase of 157% to 720% in the odds for being a case (2.57 to 8.2 times).
The odds of being a case with an increase of 1 GHQ unit moves up from 1:1 to between 2.57 and 8.20 to 1.
dta_p <- dta2 %>%
mutate(Prop=Case/Total,
phat=predict(m1,
newdata=dta2[, c(1,2)],
type="response"),
elgit=log((Case+1/120)/(Noncase+(1/120))))
dta_m1 <- broom::augment(m1, newdata=dta_p)
ggplot(dta_m1,
aes(Score, .fitted,
color=Gender)) +
geom_line(alpha=.5) +
geom_point(data=dta_p,
aes(Score, elgit))+
scale_color_manual(values=c("black", "red3"))+
labs(x="Score on GHQ",
y="Empirical and fitted log-odds")+
theme_minimal()+
theme(legend.position = c(.1, .9))
性別呈平行線,無交互作用
library(faraway)
beta <- coef(m1)
with(dta2, plot(Score, Case/Total,
col=Gender,
bty='n',
xlab="GHQ score"))
grid()
curve(ilogit(beta[1]+beta[2]*x), add=T, col=1)
curve(ilogit(beta[1]+beta[2]*x+beta[3]), add=T, col=2)
女性有比較高的機會呈為case