The data are obtained from two surveys of children’s mental health. The data set consists of 1,428 children who had both parent and teacher responses, and an additional 1,073 children with only a parent response. In the psychiatric literature, these sources of respondents are often referred to as “informants”.
A standardized measure of childhood psychopathology was used both by parents (Child Behavior Checklist, CBC) and teachers (Teacher Report Forms, TRF) to assess children in the study. The response variable is derived from the externalizing scale, which assesses delinquent and aggressive behavior. The scale has been dichotomized at the cutpoint for borderline/clinical psychopathology.
Source: Zahner, G.E.P., Jacobs, J.H., Freeman, D.H. & Trainor, K.F. (1993). Rural-urban child psychopathology in a northeastern U.S. State: 1986-1989. Journal of the American Academy of Child and Adolescent Psychiatry, 32, 378-387. Reported in Fitzmaurice, G.M., Laird, N.M., & Ware, J.H. (2004). Applied Longitudinal Analysis. p. 434-439.
dta1<-read.table("ccs_data.txt", na.string = ".")
head(dta1)
## V1 V2 V3 V4 V5
## 1 1 1 1 0 NA
## 2 2 0 0 1 NA
## 3 3 1 0 0 NA
## 4 4 1 1 1 NA
## 5 5 1 0 0 NA
## 6 6 1 0 0 NA
#require packages
pacman::p_load(MASS, gee, tidyverse, geepack)
# input data
fL <- "https://www.hsph.harvard.edu/fitzmaur/ala2e/ccs-data.txt"
dta <- read.table(fL, na.string = ".")
#rename variables
names(dta) <- c("ID", "Problem", "Single", "Parent", "Teacher")
#recode and factor them
dta <- dta %>%
mutate(ID = factor(ID), Problem = factor(ifelse(Problem == 1, "Y", "N")),
Single = factor(ifelse(Single == 1, "Y", "N")))
#take a look
head(dta)
## ID Problem Single Parent Teacher
## 1 1 Y Y 0 NA
## 2 2 N N 1 NA
## 3 3 Y N 0 NA
## 4 4 Y Y 1 NA
## 5 5 Y N 0 NA
## 6 6 Y N 0 NA
#check data dimensions
str(dta)
## 'data.frame': 2501 obs. of 5 variables:
## $ ID : Factor w/ 2501 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Problem: Factor w/ 2 levels "N","Y": 2 1 2 2 2 2 2 1 1 1 ...
## $ Single : Factor w/ 2 levels "N","Y": 2 1 1 2 1 1 1 1 2 1 ...
## $ Parent : int 0 1 0 1 0 0 0 0 0 0 ...
## $ Teacher: int NA NA NA NA NA NA NA 0 NA 0 ...
##
with(dta, ftable(Teacher, Parent))
## Parent 0 1
## Teacher
## 0 1030 129
## 1 165 104
# correlation
cor(dta[,4:5], use="pair")
## Parent Teacher
## Parent 1.0000000 0.2913296
## Teacher 0.2913296 1.0000000
# proportion of missing responses (Parent or(|) Teacher missing)
sum(is.na(dta[,4]) | is.na(dta[,5]))/dim(dta)[1]
## [1] 0.4290284
# stack the parents and teachers report; call them informants
dtaL <- dta %>%
gather(Informant, Report, 4:5)
# sort according to child id
dtaL <- dtaL[order(dtaL$ID), ]
# get rid of annoying row names
rownames(dtaL) <- NULL
# have a look
head(dtaL)
## ID Problem Single Informant Report
## 1 1 Y Y Parent 0
## 2 1 Y Y Teacher NA
## 3 2 N N Parent 1
## 4 2 N N Teacher NA
## 5 3 Y N Parent 0
## 6 3 Y N Teacher NA
# gee function
summary(m1 <- gee(Report ~ Informant + Problem + Single + Informant:Problem,
data = dtaL, id = ID, corstr = "exchangeable", family = binomial))
## (Intercept) InformantTeacher ProblemY
## -2.1595398 0.4712270 0.5996091
## SingleY InformantTeacher:ProblemY
## 0.6332404 -0.4247098
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logit
## Variance to Mean Relation: Binomial
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = Report ~ Informant + Problem + Single + Informant:Problem,
## id = ID, data = dtaL, family = binomial, corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -0.2841247 -0.1765046 -0.1570826 -0.1039640 0.8960360
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E.
## (Intercept) -2.1539359 0.09016552 -23.888688 0.08888885
## InformantTeacher 0.4738391 0.11498088 4.121025 0.11798848
## ProblemY 0.5995711 0.11284664 5.313149 0.11278090
## SingleY 0.6137249 0.10596325 5.791865 0.10759321
## InformantTeacher:ProblemY -0.4572920 0.15701454 -2.912418 0.15711252
## Robust z
## (Intercept) -24.231790
## InformantTeacher 4.015977
## ProblemY 5.316247
## SingleY 5.704123
## InformantTeacher:ProblemY -2.910602
##
## Estimated Scale Parameter: 1.001019
## Number of Iterations: 2
##
## Working Correlation
## [,1] [,2]
## [1,] 1.0000000 0.2681987
## [2,] 0.2681987 1.0000000
#'glmgee' function, add Single:Problem interaction
summary(m1a <- geeglm(Report ~ Informant + Problem + Single +
Informant:Problem + Single:Problem,
data = dtaL, id = ID, corstr = "exchangeable", family = binomial))
##
## Call:
## geeglm(formula = Report ~ Informant + Problem + Single + Informant:Problem +
## Single:Problem, family = binomial, data = dtaL, id = ID,
## corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.16615 0.09410 529.882 < 2e-16 ***
## InformantTeacher 0.47515 0.11819 16.162 5.81e-05 ***
## ProblemY 0.62177 0.12654 24.144 8.94e-07 ***
## SingleY 0.65789 0.15949 17.014 3.71e-05 ***
## InformantTeacher:ProblemY -0.45906 0.15722 8.526 0.0035 **
## ProblemY:SingleY -0.07945 0.21572 0.136 0.7127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 1 0.06415
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.2668 0.03029
## Number of clusters: 2501 Maximum cluster size: 2
# exp(SingleY's coefficiet)
exp(0.6579)
## [1] 1.931
#Teacher found out Problem
exp((-2.1662+0.4751)+(-0.4591))
## [1] 0.1165
# residual plot - not informative
plot(m1a, "pearson", which = 1)
grid() #加格線
# set theme
ot <- theme_set(theme_bw())
# fortify data with model fits
dta_m1 <- data.frame(na.omit(dtaL), phat = fitted(m1a))
# compare observed with fitted
ggplot(dta_m1, aes(Problem, Report, color = Informant)) +
geom_point(aes(Problem, phat),position = position_dodge(width=.2),
pch = 1, size = rel(2)) +
geom_hline(yintercept = c(mean(dta$Parent, na.rm = T),
mean(dta$Teacher, na.rm = T)), linetype = "dotted")+
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2)) +
facet_wrap( ~ Single)+
labs(x = "Physical Problem", y = "Probability of report") +
theme(legend.position = c(.1, .85))
The data come from a randomized, double-blind clinical trial comparing active hypnotic drug with a placebo in patients who have insomnia problems. The response is the patient’s reported time (in minutes) to fall asleep after going to bed. Patients responded before and after a two-week treatment period. The patients receiving the two treatments were independent samples.
Source: Agresti, A. (2002). Categorical Data Analysis. 2nd Ed. p. 462
dta2<- read.table("insomnia.txt", h=T)
head(dta2)
## case treat occasion resp
## 1 1 1 0 1
## 2 1 1 1 1
## 3 2 1 0 1
## 4 2 1 1 1
## 5 3 1 0 1
## 6 3 1 1 1
#
library(repolr)
#
dta2_repolr <- repolr(resp ~ treat + occasion + treat:occasion,
subjects="case", data=dta2, times=c(1,2), categories=4)
#
summary(dta2_repolr)
##
## repolr: 2016-02-26 version 3.4
##
## Call:
## repolr(formula = resp ~ treat + occasion + treat:occasion, subjects = "case",
## data = dta2, times = c(1, 2), categories = 4)
##
## Coefficients:
## coeff se.robust z.robust p.value
## cuts1|2 -2.2671 0.2091 -10.8422 0.0000
## cuts2|3 -0.9515 0.1769 -5.3787 0.0000
## cuts3|4 0.3517 0.1751 2.0086 0.0446
## treat 0.0336 0.2369 0.1418 0.8872
## occasion 1.0381 0.1564 6.6375 0.0000
## treat:occasion 0.7077 0.2458 2.8792 0.0040
##
## Correlation Structure: independence
## Fixed Correlation: 0
-1.7458 + 0.7077 = -1.04 for Placebo.
Odds ration: exp(-1.7458)=0.175, exp(-1.04)=0.353
exp(-0.7414+0.7077) = 0.967 at follow-up.
# hand-made cumulative prop. against time to sleep
#
dta2$treat <- factor(ifelse(dta2$treat==1, "Active", "Placebo"))
#
dta2$occasion <- factor(ifelse(dta2$occasion==1, "Follow-up", "Initial"))
#
dta2.tbl <- ftable(dta2$treat, dta2$occasion , dta2$resp)
#
dta2.tbl
## 1 2 3 4
##
## Active Follow-up 40 49 19 11
## Initial 12 20 40 47
## Placebo Follow-up 31 29 35 25
## Initial 14 20 35 51
#
obsp <- as.numeric(t(prop.table(dta2.tbl, m=1)))
#
clp <- c(cumsum(obsp[1:4]), cumsum(obsp[5:8]), cumsum(obsp[9:12]),
cumsum(obsp[13:16]))
#
yt <-rep(c(20,25,45,60),4)
#
y <- data.frame(time2sleep=yt, clprop=clp)
#
plot(y[,1], y[,2], type="n",
xlab="Time to Sleep (min.)", ylab="Cumulative proportion")
#
lines(y[1:4, 1], y[1:4,2], lty=1, col="steelblue")
lines(y[5:8, 1], y[5:8,2], lty=5, col="steelblue")
lines(y[9:12, 1], y[9:12,2], lty=2, col="indianred")
lines(y[13:16,1], y[13:16,2], lty=4, col="indianred")
#
text(22, .12, "T,I")
text(22, .47, "T,F")
text(22, .20, "P,I")
text(22, .33, "P,F")
#
grid()
The data are from the Muscatine Coronary Risk Factor (MCRF) study, a longitudinal survey of school-age children in Muscatine, Iowa. The MCRF study had the goal of examining the development and persistence of risk factors for coronary disease in children. In the MCRF study, weight and height measurements of five cohorts of children, initially aged 5-7, 7-9, 9-11, 11-13, and 13-15 years, were obtained biennially from 1977 to 1981. Data were collected on 4856 boys and girls. On the basis of a comparison of their weight to age-gender specific norms, children were classified as obese or not obese. The goal of the analysis is to determine whether the risk of obesity increases with age and whether patterns of change in obesity are the same for boys and girls.
Source: Woolson, R.F., & Clarke, W.R. (1984). Analysis of categorical incompletel longitudinal data. Journal of the Royal Statistical Society, Series A, 147, 87-99. Reported in Fitzmaurice, G.M., Laired, N.M., & Ware, J.H. (2004). Applied Longitudinal Data Analysis. pp. 306-312.
library(knitr)
library(rmdformats)
fL <- "https://content.sph.harvard.edu/fitzmaur/ala2e/muscatine-data.txt"
dta_1 <- read.table(fL, h=F, na.strings = '.')
names(dta_1) <- c("ID","Gender","Age0","Age","Occasion","Obese")
dta_1$Gender <- ifelse(dta_1$Gender == 0, "M", "F")
head(dta_1)
## ID Gender Age0 Age Occasion Obese
## 1 1 M 6 6 1 1
## 2 1 M 6 8 2 1
## 3 1 M 6 10 3 1
## 4 2 M 6 6 1 1
## 5 2 M 6 8 2 1
## 6 2 M 6 10 3 1
dta_1$Age_c<-factor(dta_1$Age-12)
dta_1$Gender<-factor(dta_1$Gender)
dta_1$Occasion<-factor(dta_1$Occasion)
dta_1$Obese<-as.integer(ifelse(dta_1$Obese==1, 1, 0))
#m0_1<-repolr(Obese~Age_c*Gender, data=dta_1, subjects = "ID", times=c(1,2,3), categories=2, corr.mod="ar1")
#summary(m0_1)
The data are from a survey of primary care patients in Cantabrian, Spain, to estimate the prevalence of psychiatric illness. The prevalence for each of the sexes is estimated from data provided by the general practitioner and from completing the general health questionnaire.
Source: Everitt, B.S., & Dunn, G. (2001). Applied Multivariate Data Analysis, 2nd Ed. p. 237
dta_2<-read.table("cantabrian.txt", h=T)
names(dta_2)<-c("ID", "Illness", "Gender", "Type")
dta_2<-dta_2 %>% mutate(ID=factor(ID),
Illness=factor(ifelse(Illness==1, 1, 0),
labels = c("Illness","Non-illness")),
Gender=factor(Gender),
Type=factor(Type))
str(dta_2)
## 'data.frame': 1646 obs. of 4 variables:
## $ ID : Factor w/ 823 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ Illness: Factor w/ 2 levels "Illness","Non-illness": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ Type : Factor w/ 2 levels "GHQ","GP": 2 1 2 1 2 1 2 1 2 1 ...
with(dta_2, ftable(ftable(Illness, Gender, Type)))
## Type GHQ GP
## Illness Gender
## Illness F 306 420
## M 244 287
## Non-illness F 193 79
## M 80 37
dta_2p<-as.data.frame(prop.table(with(dta_2,ftable(Illness,Type)), m=2))
dta_2p
## Illness Type Freq
## 1 Illness GHQ 0.6683
## 2 Non-illness GHQ 0.3317
## 3 Illness GP 0.8591
## 4 Non-illness GP 0.1409
#summary(m0_2 <- geeglm(Illness ~ Gender + Type + Gender*Type, data=dta_2, id = ID, corstr="exchangeable", family = binomial, na.action=na.omit))