1 Data Management

  1. 載入資料並改變項名稱
dta1 <- read.table("C:/Users/ASUS/Desktop/data/ccs-data.txt", h=T)
str(dta1)
## 'data.frame':    2500 obs. of  5 variables:
##  $ X1  : int  2 3 4 5 6 7 8 9 10 11 ...
##  $ X1.1: int  0 1 1 1 1 1 0 0 0 1 ...
##  $ X1.2: int  0 0 1 0 0 0 0 1 0 0 ...
##  $ X0  : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ .   : chr  "." "." "." "." ...
names(dta1) <- c("ID", "Problem", "Single", "Parent", "Teacher")
str(dta1)
## 'data.frame':    2500 obs. of  5 variables:
##  $ ID     : int  2 3 4 5 6 7 8 9 10 11 ...
##  $ Problem: int  0 1 1 1 1 1 0 0 0 1 ...
##  $ Single : int  0 0 1 0 0 0 0 1 0 0 ...
##  $ Parent : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ Teacher: chr  "." "." "." "." ...
  1. 定義為名義變項(ID,Problem,Single)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta1 <- dta1 %>%
       mutate(ID=factor(ID), 
              Problem=factor(ifelse(Problem == 1, "Bad", "Good")),
              Single=factor(Single))
head(dta1)
##   ID Problem Single Parent Teacher
## 1  2    Good      0      1       .
## 2  3     Bad      0      0       .
## 3  4     Bad      1      1       .
## 4  5     Bad      0      0       .
## 5  6     Bad      0      0       .
## 6  7     Bad      0      0       .
  1. Crosstab & compute Cor for Parent vs Teacher
with(dta1, ftable(Teacher, Parent))
##         Parent    0    1
## Teacher                 
## .               916  156
## 0              1030  129
## 1               165  104
dta1$Teacher<-as.numeric(dta1$Teacher)
## Warning: 強制變更過程中產生了 NA
cor(dta1[,4:5], use="pair")
##            Parent   Teacher
## Parent  1.0000000 0.2913296
## Teacher 0.2913296 1.0000000
  1. Compute missing %
sum(is.na(dta1[,4]) | is.na(dta1[,5]))/dim(dta1)[1]
## [1] 0.4288

5.合併Parent&Teacher為同一變項(w to L)

# stack the parents and teachers report; call them informants
library(tidyr)
dtaL <- dta1 %>%
        gather(Informant, Report, 4:5)
str(dtaL)
## 'data.frame':    5000 obs. of  5 variables:
##  $ ID       : Factor w/ 2500 levels "2","3","4","5",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Problem  : Factor w/ 2 levels "Bad","Good": 2 1 1 1 1 1 2 2 2 1 ...
##  $ Single   : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 1 1 ...
##  $ Informant: chr  "Parent" "Parent" "Parent" "Parent" ...
##  $ Report   : num  1 0 1 0 0 0 0 0 0 0 ...
# sort according to child id
dtaL <- dtaL[order(dtaL$ID), ]
dtaL<-dtaL %>% mutate(Informant=factor(Informant)) 

# get rid of annoying row names
rownames(dtaL) <- NULL

# have a look
head(dtaL)
##   ID Problem Single Informant Report
## 1  2    Good      0    Parent      1
## 2  2    Good      0   Teacher     NA
## 3  3     Bad      0    Parent      0
## 4  3     Bad      0   Teacher     NA
## 5  4     Bad      1    Parent      1
## 6  4     Bad      1   Teacher     NA

2 Analysis

2.1 m1

(IVs=Informant + Problem + Single + Informant:Problem) m1 output showed that Problem, Single, and InformantProblem were significant risk factors
for Mental Health issue. Being in good physical health were less likely(odds=0.55) than
being in poor health , Single parent was more likely(odds=1.85) than Not Single parent to
be reported as having Mental health issue. a significant interaction was observed(Informant

Problem).

library(geepack)
sjPlot::tab_model(m1 <- geeglm(Report ~ Informant + Problem + Single + Informant:Problem, data=dtaL, id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)
  Report
Predictors Odds Ratios CI p
(Intercept) 0.21 0.18 – 0.25 <0.001
Informant [Teacher] 1.02 0.83 – 1.24 0.881
Problem [Good] 0.55 0.44 – 0.68 <0.001
Single [1] 1.85 1.50 – 2.29 <0.001
Informant [Teacher] *
Problem [Good]
1.58 1.16 – 2.15 0.004

2.2 m1a

(Ivs= Informant + Problem + Single + Informant:Problem + “Single:Problem”) m1a output is similar to m1 output showing that Problem, Single,Informant:Problem were
significant risk factors for Mental health issue. The only difference is that m1a includes
Single:Problem as a IV which is found as a insignificant risk factor.

sjPlot::tab_model(m1a <- geeglm(Report ~ Informant + Problem + Single + Informant:Problem + Single:Problem, data=dtaL, id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)
  Report
Predictors Odds Ratios CI p
(Intercept) 0.21 0.18 – 0.25 <0.001
Informant [Teacher] 1.02 0.83 – 1.24 0.884
Problem [Good] 0.54 0.42 – 0.69 <0.001
Single [1] 1.79 1.35 – 2.38 <0.001
Informant [Teacher] *
Problem [Good]
1.58 1.16 – 2.16 0.003
Problem [Good] * Single
[1]
1.08 0.71 – 1.65 0.727

3 Visdualization

3.1 Residual plot

# residual plot - not informative
plot(m1a, "pearson", which = 1)
grid()

library(ggplot2)
# set theme
ot <- theme_set(theme_bw())

3.2 Means Plot

x=Physical problem, y=probability of report, grouped by single or not single parent Seems that being in bad physical health is more likely to have Mental health risk
for both single or not single parent families. However, Children in single parent families
have greater risk for mental health issue than its counterpart.

# fortify data with model fits
dta_m1 <- data.frame(na.omit(dtaL), phat = fitted(m1a))
# compare observed with fitted
ggplot(dta_m1, aes(Problem, Report, color = Informant)) +
 geom_point(aes(Problem, phat),position = position_dodge(width=.2),
            pch = 1, size = rel(2)) +
 geom_hline(yintercept = c(mean(dta1$Parent, na.rm = T),
                           mean(dta1$Teacher, na.rm = T)), linetype = "dotted")+
 stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2)) +
 facet_wrap( ~ Single)+
 labs(x = "Physical Problem", y = "Probability of report") +
 theme(legend.position = c(.1, .9))