dta1 <- read.table("C:/Users/ASUS/Desktop/data/ccs-data.txt", h=T)
str(dta1)
## 'data.frame': 2500 obs. of 5 variables:
## $ X1 : int 2 3 4 5 6 7 8 9 10 11 ...
## $ X1.1: int 0 1 1 1 1 1 0 0 0 1 ...
## $ X1.2: int 0 0 1 0 0 0 0 1 0 0 ...
## $ X0 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ . : chr "." "." "." "." ...
names(dta1) <- c("ID", "Problem", "Single", "Parent", "Teacher")
str(dta1)
## 'data.frame': 2500 obs. of 5 variables:
## $ ID : int 2 3 4 5 6 7 8 9 10 11 ...
## $ Problem: int 0 1 1 1 1 1 0 0 0 1 ...
## $ Single : int 0 0 1 0 0 0 0 1 0 0 ...
## $ Parent : int 1 0 1 0 0 0 0 0 0 0 ...
## $ Teacher: chr "." "." "." "." ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta1 <- dta1 %>%
mutate(ID=factor(ID),
Problem=factor(ifelse(Problem == 1, "Bad", "Good")),
Single=factor(Single))
head(dta1)
## ID Problem Single Parent Teacher
## 1 2 Good 0 1 .
## 2 3 Bad 0 0 .
## 3 4 Bad 1 1 .
## 4 5 Bad 0 0 .
## 5 6 Bad 0 0 .
## 6 7 Bad 0 0 .
with(dta1, ftable(Teacher, Parent))
## Parent 0 1
## Teacher
## . 916 156
## 0 1030 129
## 1 165 104
dta1$Teacher<-as.numeric(dta1$Teacher)
## Warning: 強制變更過程中產生了 NA
cor(dta1[,4:5], use="pair")
## Parent Teacher
## Parent 1.0000000 0.2913296
## Teacher 0.2913296 1.0000000
sum(is.na(dta1[,4]) | is.na(dta1[,5]))/dim(dta1)[1]
## [1] 0.4288
5.合併Parent&Teacher為同一變項(w to L)
# stack the parents and teachers report; call them informants
library(tidyr)
dtaL <- dta1 %>%
gather(Informant, Report, 4:5)
str(dtaL)
## 'data.frame': 5000 obs. of 5 variables:
## $ ID : Factor w/ 2500 levels "2","3","4","5",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Problem : Factor w/ 2 levels "Bad","Good": 2 1 1 1 1 1 2 2 2 1 ...
## $ Single : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 1 1 ...
## $ Informant: chr "Parent" "Parent" "Parent" "Parent" ...
## $ Report : num 1 0 1 0 0 0 0 0 0 0 ...
# sort according to child id
dtaL <- dtaL[order(dtaL$ID), ]
dtaL<-dtaL %>% mutate(Informant=factor(Informant))
# get rid of annoying row names
rownames(dtaL) <- NULL
# have a look
head(dtaL)
## ID Problem Single Informant Report
## 1 2 Good 0 Parent 1
## 2 2 Good 0 Teacher NA
## 3 3 Bad 0 Parent 0
## 4 3 Bad 0 Teacher NA
## 5 4 Bad 1 Parent 1
## 6 4 Bad 1 Teacher NA
(IVs=Informant + Problem + Single + Informant:Problem) m1 output showed that Problem, Single, and InformantProblem were significant risk factors
for Mental Health issue. Being in good physical health were less likely(odds=0.55) than
being in poor health , Single parent was more likely(odds=1.85) than Not Single parent to
be reported as having Mental health issue. a significant interaction was observed(Informant
Problem).
library(geepack)
sjPlot::tab_model(m1 <- geeglm(Report ~ Informant + Problem + Single + Informant:Problem, data=dtaL, id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)
| Report | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.21 | 0.18 – 0.25 | <0.001 |
| Informant [Teacher] | 1.02 | 0.83 – 1.24 | 0.881 |
| Problem [Good] | 0.55 | 0.44 – 0.68 | <0.001 |
| Single [1] | 1.85 | 1.50 – 2.29 | <0.001 |
|
Informant [Teacher] * Problem [Good] |
1.58 | 1.16 – 2.15 | 0.004 |
(Ivs= Informant + Problem + Single + Informant:Problem + “Single:Problem”) m1a output is similar to m1 output showing that Problem, Single,Informant:Problem were
significant risk factors for Mental health issue. The only difference is that m1a includes
Single:Problem as a IV which is found as a insignificant risk factor.
sjPlot::tab_model(m1a <- geeglm(Report ~ Informant + Problem + Single + Informant:Problem + Single:Problem, data=dtaL, id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)
| Report | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.21 | 0.18 – 0.25 | <0.001 |
| Informant [Teacher] | 1.02 | 0.83 – 1.24 | 0.884 |
| Problem [Good] | 0.54 | 0.42 – 0.69 | <0.001 |
| Single [1] | 1.79 | 1.35 – 2.38 | <0.001 |
|
Informant [Teacher] * Problem [Good] |
1.58 | 1.16 – 2.16 | 0.003 |
|
Problem [Good] * Single [1] |
1.08 | 0.71 – 1.65 | 0.727 |
# residual plot - not informative
plot(m1a, "pearson", which = 1)
grid()
library(ggplot2)
# set theme
ot <- theme_set(theme_bw())
x=Physical problem, y=probability of report, grouped by single or not single parent Seems that being in bad physical health is more likely to have Mental health risk
for both single or not single parent families. However, Children in single parent families
have greater risk for mental health issue than its counterpart.
# fortify data with model fits
dta_m1 <- data.frame(na.omit(dtaL), phat = fitted(m1a))
# compare observed with fitted
ggplot(dta_m1, aes(Problem, Report, color = Informant)) +
geom_point(aes(Problem, phat),position = position_dodge(width=.2),
pch = 1, size = rel(2)) +
geom_hline(yintercept = c(mean(dta1$Parent, na.rm = T),
mean(dta1$Teacher, na.rm = T)), linetype = "dotted")+
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2)) +
facet_wrap( ~ Single)+
labs(x = "Physical Problem", y = "Probability of report") +
theme(legend.position = c(.1, .9))