1 load package

pacman::p_load(MASS, gee, tidyverse, geepack)

2 input data

fL <- "https://www.hsph.harvard.edu/fitzmaur/ala2e/ccs-data.txt"
dta <- read.table(fL, na.string = ".")

3 data management

names(dta) <- c("ID", "Problem", "Single", "Parent", "Teacher")
head(dta)
##   ID Problem Single Parent Teacher
## 1  1       1      1      0      NA
## 2  2       0      0      1      NA
## 3  3       1      0      0      NA
## 4  4       1      1      1      NA
## 5  5       1      0      0      NA
## 6  6       1      0      0      NA
str(dta)
## 'data.frame':    2501 obs. of  5 variables:
##  $ ID     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Problem: int  1 0 1 1 1 1 1 0 0 0 ...
##  $ Single : int  1 0 0 1 0 0 0 0 1 0 ...
##  $ Parent : int  0 1 0 1 0 0 0 0 0 0 ...
##  $ Teacher: int  NA NA NA NA NA NA NA 0 NA 0 ...

4 descriptive analysis

with(dta, ftable(Teacher, Parent))
##         Parent    0    1
## Teacher                 
## 0              1030  129
## 1               165  104
# correlation
cor(dta[,4:5], use="pair")
##            Parent   Teacher
## Parent  1.0000000 0.2913296
## Teacher 0.2913296 1.0000000
# proportion of missing responses
sum(is.na(dta[,4]) | is.na(dta[,5]))/dim(dta)[1]
## [1] 0.4290284

165>129 相比家長,老師報告學生違規和攻擊行為較多

5 Translate to long form

# stack the parents and teachers report; call them informants
dtaL <- dta %>%
        gather(Informant, Report, 4:5)

# sort according to child id; put same ID nearby
dtaL <- dtaL[order(dtaL$ID), ]

# get rid of annoying row names; because resort the child id, rowname is Disorganized
rownames(dtaL) <- NULL

# have a look
head(dtaL)
##   ID Problem Single Informant Report
## 1  1       1      1    Parent      0
## 2  1       1      1   Teacher     NA
## 3  2       0      0    Parent      1
## 4  2       0      0   Teacher     NA
## 5  3       1      0    Parent      0
## 6  3       1      0   Teacher     NA

6 model

# use gee function 
summary(m1 <- gee(Report ~ Informant + Problem + Single + Informant:Problem,
        data = dtaL, id = ID, corstr = "exchangeable", family = binomial))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##              (Intercept)         InformantTeacher                  Problem 
##               -2.1595398                0.4712270                0.5996091 
##                   Single InformantTeacher:Problem 
##                0.6332404               -0.4247098
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = Report ~ Informant + Problem + Single + Informant:Problem, 
##     id = ID, data = dtaL, family = binomial, corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2841247 -0.1765046 -0.1570826 -0.1039640  0.8960360 
## 
## 
## Coefficients:
##                            Estimate Naive S.E.    Naive z Robust S.E.
## (Intercept)              -2.1539359 0.09016552 -23.888688  0.08888885
## InformantTeacher          0.4738391 0.11498088   4.121025  0.11798848
## Problem                   0.5995711 0.11284664   5.313149  0.11278090
## Single                    0.6137249 0.10596325   5.791865  0.10759321
## InformantTeacher:Problem -0.4572920 0.15701454  -2.912418  0.15711252
##                            Robust z
## (Intercept)              -24.231790
## InformantTeacher           4.015977
## Problem                    5.316247
## Single                     5.704123
## InformantTeacher:Problem  -2.910602
## 
## Estimated Scale Parameter:  1.001019
## Number of Iterations:  2
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.2681987
## [2,] 0.2681987 1.0000000
# use geeglm function: output more useful
summary(m1a <- geeglm(Report ~ Informant + Problem + Single +
                      Informant:Problem + Single:Problem,
        data = dtaL, id = ID, corstr = "exchangeable", family = binomial))
## 
## Call:
## geeglm(formula = Report ~ Informant + Problem + Single + Informant:Problem + 
##     Single:Problem, family = binomial, data = dtaL, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                          Estimate  Std.err    Wald Pr(>|W|)    
## (Intercept)              -2.16615  0.09410 529.882  < 2e-16 ***
## InformantTeacher          0.47515  0.11819  16.162 5.81e-05 ***
## Problem                   0.62177  0.12654  24.144 8.94e-07 ***
## Single                    0.65789  0.15949  17.014 3.71e-05 ***
## InformantTeacher:Problem -0.45906  0.15722   8.526   0.0035 ** 
## Problem:Single           -0.07945  0.21572   0.136   0.7127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)        1 0.06415
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.2668 0.03029
## Number of clusters:   2501  Maximum cluster size: 2

Problem:Single did’t sig

# residual plot - not informative
plot(m1a, "pearson", which =1)
grid()

# set theme
ot <- theme_set(theme_bw())

7 fortify data with model fits

dta_m1 <- data.frame(na.omit(dtaL), phat = fitted(m1a))

8 ggplot

# compare observed with fitted
ggplot(dta_m1, aes(Problem, Report, color = Informant)) +
 geom_point(aes(Problem, phat),position = position_dodge(width=.2),
            pch = 1, size = rel(2)) +
 geom_hline(yintercept = c(mean(dta$Parent, na.rm = T),
                           mean(dta$Teacher, na.rm = T)), linetype = "dotted")+
 stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2)) +
 facet_wrap( ~ Single)+
 labs(x = "Physical Problem", y = "Probability of report") +
 theme(legend.position = c(.1, .9))

For kids of single parent status, both informants were more likely to report behavior problems.

The effect of children health problems on report of behavior problems is more apparent on parents than on teachers.