Load data
dta<-load("C:/Users/ASUS/Desktop/data/ghq.rda")
str(ghq)
## 'data.frame': 22 obs. of 4 variables:
## $ sex: Factor w/ 2 levels "men","women": 1 1 1 1 1 1 1 1 1 1 ...
## $ ghq: int 0 1 2 3 4 5 6 7 8 9 ...
## $ c : int 0 0 1 0 1 3 0 2 0 0 ...
## $ nc : int 18 8 1 0 0 0 0 0 0 0 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
dta <- ghq %>% mutate(Total = c + nc) %>%
dplyr::rename(Gender=sex, Score=ghq, Case=c, Noncase=nc)
m2 is a saturated model comprises Score, Gender, and their interaction term as IVs
m2 <- glm(cbind(Case, Noncase) ~ Score + Gender + Score:Gender, data=dta, family=binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m2)
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score + Gender + Score:Gender,
## family = binomial, data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.94972 0.00000 0.00000 0.06319 0.48718
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -43.38 21986.72 -0.002 0.998
## Score 21.69 10993.36 0.002 0.998
## Genderwomen 40.38 21986.72 0.002 0.999
## Score:Genderwomen -20.45 10993.36 -0.002 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.1763 on 16 degrees of freedom
## Residual deviance: 1.5422 on 13 degrees of freedom
## AIC: 22.018
##
## Number of Fisher Scoring iterations: 23
m3 is the model comprises Score & Gender as IVs
m3 <- glm(cbind(Case, Noncase) ~ Score + Gender, data=dta, family=binomial)
summary(m3)
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score + Gender, family = binomial,
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.25768 0.00000 0.02623 0.23648 0.82922
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.0721 0.9758 -4.173 3.01e-05 ***
## Score 1.4328 0.2907 4.929 8.26e-07 ***
## Genderwomen 0.7938 0.9289 0.855 0.393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.1763 on 16 degrees of freedom
## Residual deviance: 4.9419 on 14 degrees of freedom
## AIC: 23.418
##
## Number of Fisher Scoring iterations: 6
m4 is a model comprising Score as IV
m4 <- glm(cbind(Case, Noncase) ~ Score, data=dta, family=binomial)
summary(m4)
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Score, family = binomial,
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.41618 0.00000 0.04739 0.33149 0.53195
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4536 0.5633 -6.131 8.73e-10 ***
## Score 1.4402 0.2979 4.834 1.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.176 on 16 degrees of freedom
## Residual deviance: 5.744 on 15 degrees of freedom
## AIC: 22.22
##
## Number of Fisher Scoring iterations: 6
m5 is a model comprising Gender as IV
m5 <- glm(cbind(Case, Noncase) ~ Gender, data=dta, family=binomial(link=logit))
summary(m5)
##
## Call:
## glm(formula = cbind(Case, Noncase) ~ Gender, family = binomial(link = logit),
## data = dta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.781 0.000 1.340 1.718 2.976
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2164 0.4025 -3.022 0.00251 **
## Genderwomen 0.1643 0.4726 0.348 0.72811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.176 on 16 degrees of freedom
## Residual deviance: 83.054 on 15 degrees of freedom
## AIC: 99.53
##
## Number of Fisher Scoring iterations: 5
knitr::kable(anova(m4, m3, m2, test="Chisq"))
| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 15 | 5.743961 | NA | NA | NA |
| 14 | 4.941883 | 1 | 0.8020783 | 0.3704727 |
| 13 | 1.542230 | 1 | 3.3996529 | 0.0652101 |
knitr::kable(anova(m5, m2, test="Chisq"))
| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 15 | 83.05382 | NA | NA | NA |
| 13 | 1.54223 | 2 | 81.51159 | 0 |
knitr::kable(anova(m3, m2, test="Chisq"))
| Resid. Df | Resid. Dev | Df | Deviance | Pr(>Chi) |
|---|---|---|---|---|
| 14 | 4.941883 | NA | NA | NA |
| 13 | 1.542230 | 1 | 3.399653 | 0.0652101 |
The output indicated that the deviance value did not approximate the chisquare value, thus m3(the model of both Score + Gender) is Not a good fit for the data
(m3$null.deviance - m3$deviance)/m3$null.deviance
## [1] 0.9405854
1 - pchisq(m3$deviance, m3$df.residual)
## [1] 0.9866052
The output showed score is a significant predictor for cbind(c,nc), while gender is not.
Furthermore, conditional on gender, one point increases in GHQ is associated with the
increase of 157% to 720% in the odds for being a case.
sjPlot::tab_model(m3, show.r2=FALSE, show.obs=FALSE, show.se=TRUE)
| cbind(Case, Noncase) | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Error | CI | p |
| (Intercept) | 0.02 | 0.98 | 0.00 – 0.08 | <0.001 |
| Score | 4.19 | 0.29 | 2.57 – 8.20 | <0.001 |
| Gender [women] | 2.21 | 0.93 | 0.42 – 17.73 | 0.393 |
dta_p <- dta %>%
mutate(Prop=Case/Total,
phat=predict(m3,
newdata=dta[, c(1,2)],
type="response"),
elgit=log((Case+1/22)/(Noncase+(1/22))))
library(ggplot2)
ggplot(dta,
aes(Score, fitted(m3),
color=Gender)) +
geom_line(alpha=.5) +
geom_point(data=dta_p,
aes(Score, elgit))+
scale_color_manual(values=c("black", "red3"))+
labs(x="Score on GHQ",
y="Empirical and fitted log-odds")+
theme_minimal()+
theme(legend.position = c(.1, .9))
library(faraway)
##
## Attaching package: 'faraway'
## The following object is masked from 'package:plyr':
##
## ozone
beta <- coef(m3)
with(dta, plot(Score, Case/Total,
col=Gender,
bty='n',
xlab="GHQ score"))
grid()
curve(ilogit(beta[1]+beta[2]*x), add=T, col=1)
curve(ilogit(beta[1]+beta[2]*x+beta[3]), add=T, col=2)