library

library(pacman)
p_load(gee, tidyverse, visreg, geepack,aspect, MASS,repolr,GGally,multgee,nlme,lme4)

Question 1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/cantabrian.txt",header = T)
str(dta1)
## 'data.frame':    1646 obs. of  4 variables:
##  $ sbj   : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ resp  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ gender: Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ type  : Factor w/ 2 levels "GHQ","GP": 2 1 2 1 2 1 2 1 2 1 ...
dta1$sbj <- as.factor(dta1$sbj)


# histograms
ggplot(dta1, aes(resp)) +
  geom_histogram(binwidth = 1, alpha = 0.5) +
  facet_grid(type ~ gender) +
  labs( y = "Count")

# 
dta1$gender <- relevel(dta1$gender, ref = "M")
dta1$type <- relevel(dta1$type, ref = "GP")

# gee poisson regression(independence)
summary(r1_1 <- gee(resp ~ gender*type, data = dta1, id = sbj, 
                    corstr = "independence", family = poisson,
                    scale.fix = TRUE,scale.value = 1))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##     (Intercept)         genderF         typeGHQ genderF:typeGHQ 
##      -2.1698256       0.3266674       0.7711087       0.1221336
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Independent 
## 
## Call:
## gee(formula = resp ~ gender * type, id = sbj, data = dta1, family = poisson, 
##     corstr = "independence", scale.fix = TRUE, scale.value = 1)
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3867735 -0.2469136 -0.1583166 -0.1141975  0.8858025 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.     Naive z Robust S.E.    Robust z
## (Intercept)     -2.1698256  0.1643990 -13.1985339   0.1547275 -14.0235268
## genderF          0.3266674  0.1992116   1.6398010   0.1859968   1.7563059
## typeGHQ          0.7711087  0.1988141   3.8785424   0.1612871   4.7809710
## genderF:typeGHQ  0.1221336  0.2395133   0.5099242   0.1896910   0.6438555
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  1
## 
## Working Correlation
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
# gee poisson regression(exchangeable)
summary(r1_2 <- gee(resp ~ gender*type, data = dta1, id = sbj, 
                  corstr = "exchangeable", family = poisson,
                 scale.fix = TRUE,scale.value = 1))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##     (Intercept)         genderF         typeGHQ genderF:typeGHQ 
##      -2.1698256       0.3266674       0.7711087       0.1221336
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = resp ~ gender * type, id = sbj, data = dta1, family = poisson, 
##     corstr = "exchangeable", scale.fix = TRUE, scale.value = 1)
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3867735 -0.2469136 -0.1583166 -0.1141975  0.8858025 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.     Naive z Robust S.E.    Robust z
## (Intercept)     -2.1698256  0.1643990 -13.1985339   0.1547275 -14.0235268
## genderF          0.3266674  0.1992116   1.6398010   0.1859968   1.7563059
## typeGHQ          0.7711087  0.1695782   4.5472158   0.1612871   4.7809710
## genderF:typeGHQ  0.1221336  0.2045749   0.5970119   0.1896910   0.6438555
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  1
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.2929827
## [2,] 0.2929827 1.0000000

Question 3

dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/insomnia.txt",header = T)
str(dta3)
## 'data.frame':    478 obs. of  4 variables:
##  $ case    : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ treat   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ occasion: int  0 1 0 1 0 1 0 1 0 1 ...
##  $ resp    : int  1 1 1 1 1 1 1 1 1 1 ...
dta3$case <- as.factor(dta3$case)
dta3$treat <- as.factor(dta3$treat)
dta3$occasion <- as.factor(dta3$occasion)
dta3 <- dta3 %>% mutate(sleep = ifelse(resp == 1,20,
                                       ifelse(resp == 2,25,
                                              ifelse(resp ==3,45,60))))
dta3 <- dta3 %>% mutate(status = ifelse(treat == 1&occasion == 1,"TF",
                                       ifelse(treat == 0&occasion == 1,"PF",
                                              ifelse(treat == 0&occasion == 0,"PI","TI"))))
porp.dta3 <- as.data.frame(matrix(0,4,8))
porp.dta3[,1:2] <- dta3 %>% group_by(status)%>%summarise(n())
porp.dta3[,3:4] <- dta3 %>% group_by(status)%>%filter(sleep == 20)%>%summarise(n())
porp.dta3[,5:6] <- dta3 %>% group_by(status)%>%filter(sleep == 25)%>%summarise(n())
porp.dta3[,7:8] <- dta3 %>% group_by(status)%>%filter(sleep == 45)%>%summarise(n())
porp.dta3 <- porp.dta3[,c(1,2,4,6,8)]
porp.dta3 <- porp.dta3%>%mutate(X = porp.dta3[,2]-porp.dta3[,3]-porp.dta3[,4]-porp.dta3[,5])
colnames(porp.dta3) <- c("status","Total","20min","25min","40min","60min")
porp.dta3 <- porp.dta3%>%mutate(T = round(porp.dta3[,2]/porp.dta3[,2],2),
                                S1 = round(porp.dta3[,3]/porp.dta3[,2],2),
                                S2 = round((porp.dta3[,3]+porp.dta3[,4])/porp.dta3[,2],2),
                                S3 = round((porp.dta3[,3]+porp.dta3[,4]+porp.dta3[,5])/porp.dta3[,2],2),
                                S4 = round((porp.dta3[,3]+porp.dta3[,4]+porp.dta3[,5]+porp.dta3[,6])/porp.dta3[,2],2))
porp.dta3L <- porp.dta3[,c(1,7,8,9,10)]
dta3L <- gather(porp.dta3L,key = "Time",value = "Proportion",2:5)
str(dta3L)
## 'data.frame':    16 obs. of  3 variables:
##  $ status    : chr  "PF" "PI" "TF" "TI" ...
##  $ Time      : chr  "T" "T" "T" "T" ...
##  $ Proportion: num  1 1 1 1 0.26 0.12 0.34 0.1 0.5 0.28 ...
dta3L$status <- as.factor(dta3L$status)
dta3L <- dta3L%>%mutate(Time = ifelse(Time == "T",60,
                                      ifelse(Time == "S1",20,
                                             ifelse(Time == "S2",25,45))))
dta3L <- arrange(dta3L,Time)


#ggplot
ggplot(data = dta3L, aes(x = Time,y = Proportion,color = status,linetype = status))+
  geom_line()+
  labs(x = "Time to Sleep(min.)",y = "Cumulative proportion")+
  theme_bw()

# proportional odds model for ordinal gee
summary(r3_1 <- repolr(resp ~ treat*occasion, subjects = "case", data = dta3, times=c(1,2),
             categories = 4, corr.mod = "ar1"))
## 
## repolr: 2016-02-26 version 3.4 
## 
## Call:
## repolr(formula = resp ~ treat * occasion, subjects = "case", 
##     data = dta3, times = c(1, 2), categories = 4, corr.mod = "ar1")
## 
## Coefficients: 
##                   coeff     se.robust  z.robust  p.value 
## cuts1|2            -2.2686    0.2094   -10.8338    0.0000
## cuts2|3            -0.9503    0.1770    -5.3689    0.0000
## cuts3|4             0.3544    0.1750     2.0251    0.0429
## treat1              0.0263    0.2370     0.1110    0.9116
## occasion1           1.0348    0.1563     6.6206    0.0000
## treat1:occasion1    0.7187    0.2460     2.9215    0.0035
## 
## Correlation Structure:  ar1 
## Estimated Correlation:  0.05
# glm for ordinal data - ignoring cluster
summary(r3_2 <- polr(ordered(resp) ~ treat*occasion, data = dta3))
## 
## Re-fitting to get Hessian
## Call:
## polr(formula = ordered(resp) ~ treat * occasion, data = dta3)
## 
## Coefficients:
##                     Value Std. Error t value
## treat1           -0.03361     0.2377 -0.1414
## occasion1        -1.03808     0.2410 -4.3076
## treat1:occasion1 -0.70775     0.3339 -2.1194
## 
## Intercepts:
##     Value    Std. Error t value 
## 1|2  -2.2671   0.2048   -11.0714
## 2|3  -0.9515   0.1812    -5.2515
## 3|4   0.3517   0.1746     2.0139
## 
## Residual Deviance: 1241.988 
## AIC: 1253.988

Question 4

dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/physicalAggression.txt",header = T,na.strings = ".")
str(dta4)
## 'data.frame':    1037 obs. of  12 variables:
##  $ id : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ t1 : int  0 2 6 3 NA 0 1 1 0 1 ...
##  $ t2 : int  0 2 1 1 0 2 0 3 NA 5 ...
##  $ t3 : int  0 2 1 1 0 0 0 0 0 NA ...
##  $ t4 : int  0 NA NA 1 NA NA 3 0 NA NA ...
##  $ t5 : int  0 0 0 0 NA NA 0 0 NA 2 ...
##  $ ses: num  0.143 0.857 0.6 0.6 0.143 ...
##  $ s1 : int  0 1 1 0 NA 0 1 0 0 1 ...
##  $ s2 : int  0 1 0 0 0 0 0 0 0 1 ...
##  $ s3 : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ s4 : int  0 0 0 0 0 NA 0 1 1 0 ...
##  $ s5 : int  0 0 0 0 0 NA 3 0 0 1 ...
#
ggpairs(dta4,columns = 2:6,lower = list(continuous = "points",combo = "denstrip"),
        diag = list(continuous = wrap("barDiag",col = "cyan",fill = "cyan")))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#
ggpairs(dta4,columns = 8:12,lower = list(continuous = "points",combo = "denstrip"),
        diag = list(continuous = wrap("barDiag",col = "cyan",fill = "cyan")))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#
dta4r <- gather(dta4,key = "age",value = "TR",2:6)
dta4r <- gather(dta4r,key = "age_s",value = "SR",3:7)
dta4r$age <- as.factor(dta4r$age)
dta4r$age_s <- as.factor(dta4r$age_s)
levels(dta4r$age) <- c("11","12","13","14","15")
levels(dta4r$age_s) <- c("11","12","13","14","15")
dta4L.11 <- filter(dta4r,age_s == "11"&age == "11")
dta4L.12 <- filter(dta4r,age_s == "12"&age == "12")
dta4L.13 <- filter(dta4r,age_s == "13"&age == "13")
dta4L.14 <- filter(dta4r,age_s == "14"&age == "14")
dta4L.15 <- filter(dta4r,age_s == "15"&age == "15")
dta4L <- as.data.frame(matrix(0,5185,6))
dta4L[1:1037,] <- dta4L.11
dta4L[1038:2074,] <- dta4L.12
dta4L[2075:3111,] <- dta4L.13
dta4L[3112:4148,] <- dta4L.14
dta4L[4149:5185,] <- dta4L.15
dta4L <- dta4L[,c(1,2,3,4,6)]
colnames(dta4L) <- c("id","ses","age","TR","SR")
str(dta4L)
## 'data.frame':    5185 obs. of  5 variables:
##  $ id : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ ses: num  0.143 0.857 0.6 0.6 0.143 ...
##  $ age: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ TR : num  0 2 6 3 NA 0 1 1 0 1 ...
##  $ SR : num  0 1 1 0 NA 0 1 0 0 1 ...
dta4L <- gather(dta4L,Informant,Report,4:5)
dta4L <- dta4L[order(dta4L$id), ]
dta4L <- dta4L%>%mutate(id = factor(id),age = factor(age),Informant = factor(Informant))

#
summary(m1 <- gee(Report ~ Informant + ses +Informant:ses+age,
                  data = dta4L, id = id, corstr = "unstructured", family = poisson))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##     (Intercept)     InformantTR             ses            age2 
##      -0.4195521      -0.1463901       0.9997728      -0.2982575 
##            age3            age4            age5 InformantTR:ses 
##      -0.3761945      -0.4408703      -0.5504326       0.5147644
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Unstructured 
## 
## Call:
## gee(formula = Report ~ Informant + ses + Informant:ses + age, 
##     id = id, data = dta4L, family = poisson, corstr = "unstructured")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.6739544 -0.7170768 -0.4945809  0.3255234 11.3255234 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.   Naive z Robust S.E.  Robust z
## (Intercept)     -0.4063190 0.07793282 -5.213708  0.08139030 -4.992229
## InformantTR     -0.1456137 0.08612526 -1.690720  0.08787402 -1.657073
## ses              0.9779077 0.16115445  6.068140  0.17428546  5.610954
## age2            -0.3138033 0.04460387 -7.035339  0.04016214 -7.813412
## age3            -0.4088211 0.05045034 -8.103435  0.04861706 -8.409004
## age4            -0.4374295 0.05217338 -8.384150  0.05534152 -7.904183
## age5            -0.5463051 0.05752572 -9.496710  0.05943835 -9.191122
## InformantTR:ses  0.5575834 0.17801764  3.132181  0.18820969  2.962565
## 
## Estimated Scale Parameter:  2.451022
## Number of Iterations:  3
## 
## Working Correlation
##            [,1]       [,2]      [,3]       [,4]       [,5]      [,6]
##  [1,] 1.0000000 0.40583935 0.3503451 0.23527921 0.19991770 0.1590500
##  [2,] 0.4058393 1.00000000 0.3955962 0.26312671 0.18536540 0.1316966
##  [3,] 0.3503451 0.39559617 1.0000000 0.29481995 0.18477068 0.1831140
##  [4,] 0.2352792 0.26312671 0.2948199 1.00000000 0.24542522 0.2096933
##  [5,] 0.1999177 0.18536540 0.1847707 0.24542522 1.00000000 0.1992844
##  [6,] 0.1590500 0.13169660 0.1831140 0.20969332 0.19928436 1.0000000
##  [7,] 0.1628397 0.11667462 0.1924531 0.20548059 0.20512158 0.4270308
##  [8,] 0.1408142 0.18226509 0.2699504 0.27531779 0.19717535 0.3862256
##  [9,] 0.1498754 0.14778494 0.1205390 0.17401340 0.11397153 0.3004065
## [10,] 0.0806198 0.04499225 0.1127230 0.06253217 0.08723628 0.1405979
##            [,7]      [,8]      [,9]      [,10]
##  [1,] 0.1628397 0.1408142 0.1498754 0.08061980
##  [2,] 0.1166746 0.1822651 0.1477849 0.04499225
##  [3,] 0.1924531 0.2699504 0.1205390 0.11272305
##  [4,] 0.2054806 0.2753178 0.1740134 0.06253217
##  [5,] 0.2051216 0.1971753 0.1139715 0.08723628
##  [6,] 0.4270308 0.3862256 0.3004065 0.14059785
##  [7,] 1.0000000 0.5461239 0.3647192 0.21762763
##  [8,] 0.5461239 1.0000000 0.5190945 0.28173785
##  [9,] 0.3647192 0.5190945 1.0000000 0.34178840
## [10,] 0.2176276 0.2817379 0.3417884 1.00000000

Question 5

dta5 <- read.table("https://www.hsph.harvard.edu/fitzmaur/ala2e/ccs-data.txt",na.strings = ".")
str(dta5)
## 'data.frame':    2501 obs. of  5 variables:
##  $ V1: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ V2: int  1 0 1 1 1 1 1 0 0 0 ...
##  $ V3: int  1 0 0 1 0 0 0 0 1 0 ...
##  $ V4: int  0 1 0 1 0 0 0 0 0 0 ...
##  $ V5: int  NA NA NA NA NA NA NA 0 NA 0 ...
colnames(dta5) <- c("ID", "Problem", "Single", "Parent", "Teacher")

#
dta5 <- dta5 %>%
            mutate(ID = factor(ID),
              Single = factor(ifelse(Single == 1, "Y", "N")))

#Parent
summary(r5_1 <- lm(Problem ~ Single*Parent,data = dta5))
## 
## Call:
## lm(formula = Problem ~ Single * Parent, data = dta5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6037 -0.4381 -0.4381  0.5619  0.5619 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.43808    0.01199  36.531  < 2e-16 ***
## SingleY         0.03692    0.02756   1.340     0.18    
## Parent          0.16562    0.03249   5.097  3.7e-07 ***
## SingleY:Parent -0.06079    0.06116  -0.994     0.32    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4962 on 2497 degrees of freedom
## Multiple R-squared:  0.01286,    Adjusted R-squared:  0.01168 
## F-statistic: 10.85 on 3 and 2497 DF,  p-value: 4.448e-07
#Teacher
summary(r5_2 <- lm(Problem ~ Single*Teacher,data = dta5))
## 
## Call:
## lm(formula = Problem ~ Single * Teacher, data = dta5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5658 -0.4735 -0.4735  0.5265  0.5265 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.47352    0.01611  29.390   <2e-16 ***
## SingleY          0.02138    0.03918   0.546    0.585    
## Teacher          0.03425    0.03943   0.869    0.385    
## SingleY:Teacher  0.03664    0.07823   0.468    0.640    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5 on 1424 degrees of freedom
##   (1073 observations deleted due to missingness)
## Multiple R-squared:  0.002077,   Adjusted R-squared:  -2.559e-05 
## F-statistic: 0.9878 on 3 and 1424 DF,  p-value: 0.3976
#
dta5L <- dta5%>%mutate(Problem = factor(ifelse(Problem == 1, "Y", "N")))%>%gather(Informant, Report, 4:5)

# sort according to child id
dta5L <- dta5L[order(dta5L$ID), ]

# get rid of annoying row names
rownames(dta5L) <- NULL

#
summary(m1 <- gee(Report ~ Informant + Problem + Single + Informant:Problem,
                  data = dta5L, id = ID, corstr = "exchangeable", family = binomial))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##               (Intercept)          InformantTeacher 
##                -2.1595398                 0.4712270 
##                  ProblemY                   SingleY 
##                 0.5996091                 0.6332404 
## InformantTeacher:ProblemY 
##                -0.4247098
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = Report ~ Informant + Problem + Single + Informant:Problem, 
##     id = ID, data = dta5L, family = binomial, corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2841247 -0.1765046 -0.1570826 -0.1039640  0.8960360 
## 
## 
## Coefficients:
##                             Estimate Naive S.E.    Naive z Robust S.E.
## (Intercept)               -2.1539359 0.09016552 -23.888688  0.08888885
## InformantTeacher           0.4738391 0.11498088   4.121025  0.11798848
## ProblemY                   0.5995711 0.11284664   5.313149  0.11278090
## SingleY                    0.6137249 0.10596325   5.791865  0.10759321
## InformantTeacher:ProblemY -0.4572920 0.15701454  -2.912418  0.15711252
##                             Robust z
## (Intercept)               -24.231790
## InformantTeacher            4.015977
## ProblemY                    5.316247
## SingleY                     5.704123
## InformantTeacher:ProblemY  -2.910602
## 
## Estimated Scale Parameter:  1.001019
## Number of Iterations:  2
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.2681987
## [2,] 0.2681987 1.0000000
summary(m1a <- geeglm(Report ~ Informant + Problem + Single +
                        Informant:Problem + Single:Problem,
                      data = dta5L, id = ID, corstr = "exchangeable", family = binomial))
## 
## Call:
## geeglm(formula = Report ~ Informant + Problem + Single + Informant:Problem + 
##     Single:Problem, family = binomial, data = dta5L, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                           Estimate  Std.err    Wald Pr(>|W|)    
## (Intercept)               -2.16615  0.09410 529.882  < 2e-16 ***
## InformantTeacher           0.47515  0.11819  16.162 5.81e-05 ***
## ProblemY                   0.62177  0.12654  24.144 8.94e-07 ***
## SingleY                    0.65789  0.15949  17.014 3.71e-05 ***
## InformantTeacher:ProblemY -0.45906  0.15722   8.526   0.0035 ** 
## ProblemY:SingleY          -0.07945  0.21572   0.136   0.7127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)        1 0.06415
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.2668 0.03029
## Number of clusters:   2501   Maximum cluster size: 2

Question 6

dta6 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/sexIsFun.txt",header = T)
str(dta6)
## 'data.frame':    16 obs. of  3 variables:
##  $ husband: int  4 4 4 4 3 3 3 3 2 2 ...
##  $ wife   : int  4 3 2 1 4 3 2 1 4 3 ...
##  $ count  : int  14 9 8 2 9 4 5 1 7 3 ...
dta6L <- as.data.frame(matrix(0,91,3))
dta6L$V1 <- seq(91)
dta6L$V2 <- c(rep(4,33),rep(3,19),rep(2,20),rep(1,19))
dta6L$V3 <- c(rep(4,14),rep(3,9),rep(2,8),rep(1,2),
              rep(4,9),rep(3,4),rep(2,5),rep(1,1),
              rep(4,7),rep(3,3),rep(2,8),rep(1,2),
              rep(4,3),rep(3,2),rep(2,7),rep(1,7))
colnames(dta6L) <- c("ID","Husband","Wife")
dta6L <- gather(dta6L,key = "Informant",value = "Report",2:3)
dta6L <- dta6L%>%mutate(ID = factor(ID),Informant = factor(Informant))

#
summary(r6 <- geeglm(Report ~ Informant,
                  data = dta6L, id = ID, corstr = "exchangeable", family = poisson))
## 
## Call:
## geeglm(formula = Report ~ Informant, family = poisson, data = dta6L, 
##     id = ID, corstr = "exchangeable")
## 
##  Coefficients:
##               Estimate Std.err   Wald Pr(>|W|)    
## (Intercept)     1.0026  0.0446 506.18   <2e-16 ***
## InformantWife   0.0239  0.0601   0.16     0.69    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)    0.453  0.0305
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha        0       0
## Number of clusters:   182   Maximum cluster size: 1