1016_HW

#peijun

Refer to the questions here.

Q1

library(HLMdiag) library(lme4) require(lme4) data(radon) str(radon)

'data.frame': 919 obs. of 5 variables: $ log.radon : num 0.788 0.788 1.065 0 1.131 ... $ basement : int 1 0 0 0 0 0 0 0 0 0 ... $ uranium : num -0.689 -0.689 -0.689 -0.689 -0.847 ... $ county : int 1 1 1 1 2 2 2 2 2 2 ... $ county.name: Factor w/ 85 levels "AITKIN","ANOKA",..: 1 1 1 1 2 2 2 2 2 2 ...

dta <- radon #summary dta_arr <- dta %>% mutate(g_Mean = mean(log.radon),county = county.name) %>% group_by(county) %>% mutate( grp_Mean = mean(log.radon), grp_sd = sd(log.radon)) %>% select( - basement, - uranium, - county.name, -log.radon) %>% mutate(N = n()) dta_arr <- dta_arr[!duplicated(dta_arr$county),] # analysis of variance summary(m1 <- lmer(log.radon ~ (1 | county), data = radon))

Linear mixed model fit by REML ['lmerMod'] Formula: log.radon ~ (1 | county) Data: radon REML criterion at convergence: 2259.4 Scaled residuals: Min 1Q Median 3Q Max -4.4661 -0.5734 0.0441 0.6432 3.3516 Random effects: Groups Name Variance Std.Dev. county (Intercept) 0.09581 0.3095 Residual 0.63662 0.7979 Number of obs: 919, groups: county, 85 Fixed effects: Estimate Std. Error t value (Intercept) 1.31258 0.04891 26.84

Q2

library(nlme) dta <- ergoStool str(ergoStool)

Classes 'nffGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 36 obs. of 3 variables: $ effort : num 12 15 12 10 10 14 13 12 7 14 ... $ Type : Factor w/ 4 levels "T1","T2","T3",..: 1 2 3 4 1 2 3 4 1 2 ... $ Subject: Ord.factor w/ 9 levels "8"<"5"<"4"<"9"<..: 8 8 8 8 9 9 9 9 6 6 ... - attr(*, "formula")=Class 'formula' language effort ~ Type | Subject .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> - attr(*, "labels")=List of 2 ..$ x: chr "Type of stool" ..$ y: chr "Effort required to arise" - attr(*, "units")=List of 1 ..$ y: chr "(Borg scale)" - attr(*, "FUN")=function (x) ..- attr(*, "source")= chr "function (x) mean(x, na.rm = TRUE)" - attr(*, "order.groups")= logi TRUE

with(ergoStool, tapply(effort, list(Type), mean))

T1 T2 T3 T4 8.555556 12.444444 10.777778 9.222222

with(ergoStool, tapply(effort, list(Subject), mean))

8 5 4 9 6 3 7 1 2 8.25 8.50 9.25 10.00 10.25 10.75 10.75 12.25 12.25

summary(m0 <- lme(effort ~ Type, random = ~1 | Subject,data = ergoStool))

Linear mixed-effects model fit by REML Data: ergoStool AIC BIC logLik 133.1308 141.9252 -60.56539 Random effects: Formula: ~1 | Subject (Intercept) Residual StdDev: 1.332465 1.100295 Fixed effects: effort ~ Type Value Std.Error DF t-value p-value (Intercept) 8.555556 0.5760123 24 14.853079 0.0000 TypeT2 3.888889 0.5186838 24 7.497610 0.0000 TypeT3 2.222222 0.5186838 24 4.284348 0.0003 TypeT4 0.666667 0.5186838 24 1.285304 0.2110 Correlation: (Intr) TypeT2 TypeT3 TypeT2 -0.45 TypeT3 -0.45 0.50 TypeT4 -0.45 0.50 0.50 Standardized Within-Group Residuals: Min Q1 Med Q3 Max -1.80200345 -0.64316591 0.05783115 0.70099706 1.63142054 Number of Observations: 36 Number of Groups: 9

intervals(m0)

Approximate 95% confidence intervals Fixed effects: lower est. upper (Intercept) 7.3667247 8.5555556 9.744386 TypeT2 2.8183781 3.8888889 4.959400 TypeT3 1.1517114 2.2222222 3.292733 TypeT4 -0.4038442 0.6666667 1.737177 attr(,"label") [1] "Fixed effects:" Random Effects: Level: Subject lower est. upper sd((Intercept)) 0.749509 1.332465 2.368835 Within-group standard error: lower est. upper 0.8292494 1.1002946 1.4599324

Q3

dta <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/thetaEEG.txt", header = T) Mean <- colMeans(x=dta[,2:10], na.rm = TRUE) Cov <- cov(dta[,2:10]) dtaL <- gather(dta,ch,effects,2:10) #repeat measure #univariate (balanced) m1 <- aov(effects ~ ch + Error(ch/ID), data = dtaL) summary(m1)

Error: ch Df Sum Sq Mean Sq ch 8 10.7 1.338 Error: ch:ID Df Sum Sq Mean Sq F value Pr(>F) Residuals 9 134.5 14.94 Error: Within Df Sum Sq Mean Sq F value Pr(>F) Residuals 153 1020 6.67

#univariate m0 <- lm(as.matrix(dta[, 2:10]) ~ ID, data = dta) CH <- factor(c("CH3","CH4","CH5","CH6", "CH7","ch8","ch17","ch18","ch19")) summary(r0 <- Anova(m0, idata = data.frame(CH), idesign = ~ CH, type = "III"), multivariate = F)

Univariate Type III Repeated-Measures ANOVA Assuming Sphericity SS num Df Error SS den Df F Pr(>F) (Intercept) 256.375 1 653.46 17 6.6697 0.01937 * ID 112.217 1 653.46 17 2.9193 0.10572 CH 9.844 8 367.04 136 0.4559 0.88493 ID:CH 22.270 8 367.04 136 1.0315 0.41552 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Mauchly Tests for Sphericity Test statistic p-value CH 0.00030986 6.6009e-10 ID:CH 0.00030986 6.6009e-10 Greenhouse-Geisser and Huynh-Feldt Corrections for Departure from Sphericity GG eps Pr(>F[GG]) CH 0.31815 0.6833 ID:CH 0.31815 0.3793 HF eps Pr(>F[HF]) CH 0.3788889 0.7161976 ID:CH 0.3788889 0.3869541

#mutivariate summary(r0, univariate = F)

Type III Repeated Measures MANOVA Tests: ------------------------------------------ Term: (Intercept) Response transformation matrix: (Intercept) Ch3 1 Ch4 1 Ch5 1 Ch6 1 Ch7 1 Ch8 1 Ch17 1 Ch18 1 Ch19 1 Sum of squares and products for the hypothesis: (Intercept) (Intercept) 2307.376 Multivariate Tests: (Intercept) Df test stat approx F num Df den Df Pr(>F) Pillai 1 0.2817811 6.669663 1 17 0.019365 * Wilks 1 0.7182189 6.669663 1 17 0.019365 * Hotelling-Lawley 1 0.3923331 6.669663 1 17 0.019365 * Roy 1 0.3923331 6.669663 1 17 0.019365 * --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ------------------------------------------ Term: ID Response transformation matrix: (Intercept) Ch3 1 Ch4 1 Ch5 1 Ch6 1 Ch7 1 Ch8 1 Ch17 1 Ch18 1 Ch19 1 Sum of squares and products for the hypothesis: (Intercept) (Intercept) 1009.949 Multivariate Tests: ID Df test stat approx F num Df den Df Pr(>F) Pillai 1 0.1465582 2.919344 1 17 0.10572 Wilks 1 0.8534418 2.919344 1 17 0.10572 Hotelling-Lawley 1 0.1717261 2.919344 1 17 0.10572 Roy 1 0.1717261 2.919344 1 17 0.10572 ------------------------------------------ Term: CH Response transformation matrix: CH1 CH2 CH3 CH4 CH5 CH6 CH7 CH8 Ch3 0 0 0 1 0 0 0 0 Ch4 0 0 0 0 1 0 0 0 Ch5 0 0 0 0 0 1 0 0 Ch6 0 0 0 0 0 0 1 0 Ch7 0 0 0 0 0 0 0 1 Ch8 -1 -1 -1 -1 -1 -1 -1 -1 Ch17 1 0 0 0 0 0 0 0 Ch18 0 1 0 0 0 0 0 0 Ch19 0 0 1 0 0 0 0 0 Sum of squares and products for the hypothesis: CH1 CH2 CH3 CH4 CH5 CH6 CH1 0.9801653 -2.4993306 0.28404791 0.3971579 0.4637146 -1.5733563 CH2 -2.4993306 6.3730613 -0.72429582 -1.0127158 -1.1824291 4.0119126 CH3 0.2840479 -0.7242958 0.08231592 0.1150947 0.1343826 -0.4559522 CH4 0.3971579 -1.0127158 0.11509474 0.1609263 0.1878947 -0.6375158 CH5 0.4637146 -1.1824291 0.13438259 0.1878947 0.2193826 -0.7443522 CH6 -1.5733563 4.0119126 -0.45595223 -0.6375158 -0.7443522 2.5255433 CH7 -0.4520762 1.1527525 -0.13100985 -0.1831789 -0.2138765 0.7256704 CH8 -0.4764440 1.2148880 -0.13807152 -0.1930526 -0.2254049 0.7647854 CH7 CH8 CH1 -0.4520762 -0.4764440 CH2 1.1527525 1.2148880 CH3 -0.1310099 -0.1380715 CH4 -0.1831789 -0.1930526 CH5 -0.2138765 -0.2254049 CH6 0.7256704 0.7647854 CH7 0.2085086 0.2197476 CH8 0.2197476 0.2315924 Multivariate Tests: CH Df test stat approx F num Df den Df Pr(>F) Pillai 1 0.4912732 1.207115 8 10 0.38253 Wilks 1 0.5087268 1.207115 8 10 0.38253 Hotelling-Lawley 1 0.9656917 1.207115 8 10 0.38253 Roy 1 0.9656917 1.207115 8 10 0.38253 ------------------------------------------ Term: ID:CH Response transformation matrix: CH1 CH2 CH3 CH4 CH5 CH6 CH7 CH8 Ch3 0 0 0 1 0 0 0 0 Ch4 0 0 0 0 1 0 0 0 Ch5 0 0 0 0 0 1 0 0 Ch6 0 0 0 0 0 0 1 0 Ch7 0 0 0 0 0 0 0 1 Ch8 -1 -1 -1 -1 -1 -1 -1 -1 Ch17 1 0 0 0 0 0 0 0 Ch18 0 1 0 0 0 0 0 0 Ch19 0 0 1 0 0 0 0 0 Sum of squares and products for the hypothesis: CH1 CH2 CH3 CH4 CH5 CH6 CH1 5.9629649 -7.019525 1.6272860 1.39920 5.5998684 -3.3476474 CH2 -7.0195246 8.263293 -1.9156198 -1.64712 -6.5920921 3.9408068 CH3 1.6272860 -1.915620 0.4440844 0.38184 1.5281974 -0.9135689 CH4 1.3992000 -1.647120 0.3818400 0.32832 1.3140000 -0.7855200 CH5 5.5998684 -6.592092 1.5281974 1.31400 5.2588816 -3.1438026 CH6 -3.3476474 3.940807 -0.9135689 -0.78552 -3.1438026 1.8793911 CH7 0.4377614 -0.515327 0.1194646 0.10272 0.4111053 -0.2457621 CH8 -1.3490825 1.588122 -0.3681630 -0.31656 -1.2669342 0.7573837 CH7 CH8 CH1 0.43776140 -1.3490825 CH2 -0.51532702 1.5881223 CH3 0.11946456 -0.3681630 CH4 0.10272000 -0.3165600 CH5 0.41110526 -1.2669342 CH6 -0.24576211 0.7573837 CH7 0.03213754 -0.0990407 CH8 -0.09904070 0.3052212 Multivariate Tests: ID:CH Df test stat approx F num Df den Df Pr(>F) Pillai 1 0.4832893 1.169149 8 10 0.4004 Wilks 1 0.5167107 1.169149 8 10 0.4004 Hotelling-Lawley 1 0.9353190 1.169149 8 10 0.4004 Roy 1 0.9353190 1.169149 8 10 0.4004

#mixed effect options(contrasts = c("contr.sum", "contr.poly")) summary(m2 <- lme(effects ~ ch, random = ~1 | ID, data = dtaL))

Linear mixed-effects model fit by REML Data: dtaL AIC BIC logLik 723.3534 757.3169 -350.6767 Random effects: Formula: ~1 | ID (Intercept) Residual StdDev: 2.103812 1.644238 Fixed effects: effects ~ ch Value Std.Error DF t-value p-value (Intercept) 1.0698830 0.4987572 144 2.1450979 0.0336 ch1 0.3327485 0.3556409 144 0.9356308 0.3510 ch2 -0.2156725 0.3556409 144 -0.6064335 0.5452 ch3 -0.0751462 0.3556409 144 -0.2112980 0.8330 ch4 -0.1688304 0.3556409 144 -0.4747216 0.6357 ch5 0.5195906 0.3556409 144 1.4609982 0.1462 ch6 -0.0325146 0.3556409 144 -0.0914254 0.9273 ch7 0.0759064 0.3556409 144 0.2134356 0.8313 ch8 -0.2188304 0.3556409 144 -0.6153129 0.5393 Correlation: (Intr) ch1 ch2 ch3 ch4 ch5 ch6 ch7 ch1 0.000 ch2 0.000 -0.125 ch3 0.000 -0.125 -0.125 ch4 0.000 -0.125 -0.125 -0.125 ch5 0.000 -0.125 -0.125 -0.125 -0.125 ch6 0.000 -0.125 -0.125 -0.125 -0.125 -0.125 ch7 0.000 -0.125 -0.125 -0.125 -0.125 -0.125 -0.125 ch8 0.000 -0.125 -0.125 -0.125 -0.125 -0.125 -0.125 -0.125 Standardized Within-Group Residuals: Min Q1 Med Q3 Max -3.6501858992 -0.3881757038 -0.0002076963 0.4215089300 4.6394378448 Number of Observations: 171 Number of Groups: 19

anova(m2, type = "marginal")

numDF denDF F-value p-value (Intercept) 1 144 4.601445 0.0336 ch 8 144 0.494820 0.8584

Q4

#intraclass correlation dta <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/skin_resistance.txt", header = T) # coerce pair variable to factor type dta$Subject <- factor(dta$Subject) # show first 6 lines head(dta)

Electrode Subject Kohm 1 E1 S1 500 2 E2 S1 400 3 E3 S1 98 4 E4 S1 200 5 E5 S1 250 6 E1 S2 660

# dot plot ggplot(dta, aes(reorder(Subject, Kohm, mean), Kohm, color = Subject)) + geom_point() + geom_hline(yintercept = mean(dta$Kohm), linetype = "dashed") + labs(y = "Kohm", x = "Subject") + theme_bw() + theme(legend.position = "NONE")

# random effect anova for balanced designs summary(m0 <- aov(Kohm ~ Error(Subject), data = dta))

Error: Subject Df Sum Sq Mean Sq F value Pr(>F) Residuals 15 1399155 93277 Error: Within Df Sum Sq Mean Sq F value Pr(>F) Residuals 64 1624299 25380

1.Estimated Var(Electrode) = (93277- 25380) / 5 = 13579.4
2.The estimate of intraclass correlation = 13579.4 / (13579.4+25380) = 0.349

Q5

library(mets) data(dermalridgesMZ) dta <- dermalridgesMZ # coerce pair variable to factor type dta$id <- factor(dta$id) # dot plot (left) ggplot(dta, aes(reorder(id, left, mean), left, color = id)) + geom_point() + geom_hline(yintercept = mean(dta$left), linetype = "dashed") + labs(y = "Number of finger ridges on LEFT", x = "Twin ID") + theme_bw() + theme(legend.position = "NONE")

# random effect anova for balanced designs summary(m0 <- aov(left ~ Error(id), data = dta))

Error: id Df Sum Sq Mean Sq F value Pr(>F) Residuals 17 16832 990.1 Error: Within Df Sum Sq Mean Sq F value Pr(>F) Residuals 18 644.5 35.81

# dot plot (right) ggplot(dta, aes(reorder(id, right, mean), right, color = id)) + geom_point() + geom_hline(yintercept = mean(dta$right), linetype = "dashed") + labs(y = "Number of finger ridges on RIGHT", x = "Twin ID") + theme_bw() + theme(legend.position = "NONE")

# random effect anova for balanced designs summary(m0 <- aov(right ~ Error(id), data = dta))

Error: id Df Sum Sq Mean Sq F value Pr(>F) Residuals 17 14407 847.4 Error: Within Df Sum Sq Mean Sq F value Pr(>F) Residuals 18 599 33.28

1. left estimate intracorrelation : 0.930
2. right estimate intracorrelation : 0.924

Q6

dta <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/dog_scans.txt", header = T) # fixed effect anova(m0 <- lm( Thickness ~ Dog + Observer + Scan + Dog:Observer, data = dta))

## Analysis of Variance Table ## ## Response: Thickness ## Df Sum Sq Mean Sq F value Pr(>F) ## Dog 10 43.669 4.3669 10.8028 0.000711 *** ## Observer 2 1.849 0.9243 2.2866 0.157410 ## Scan 18 33.016 1.8342 4.5376 0.012683 * ## Dog:Observer 15 18.107 1.2071 2.9862 0.050982 . ## Residuals 9 3.638 0.4042 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1.There was variability between the heart wall thickness of individual dogs.
2.There was variability between scans.
3.There were some variation between the observers in their overall ratings for each dog.
4.There were no systematic bias between observers.

Q7

dta <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/healthAwareness.txt", header=T) dta %<>% mutate( CityID = State*10 + City) # plot ggplot(dta, aes(x = as.factor(CityID), y = Index)) + geom_point(col = "blue", shape = 18) + geom_vline(xintercept = with(dta, CityID)) + geom_hline(yintercept = with(dta,tapply(Index, State, mean)), lty = 2) + labs(x = "City ID", y = "Health awareness index") + facet_wrap( ~ State) + theme_bw()

# lm library(lme4) anova(m0 <- lm(Index ~ State/City, data = dta))

## Analysis of Variance Table ## ## Response: Index ## Df Sum Sq Mean Sq F value Pr(>F) ## State 1 1470.0 1470.00 6.5173 0.01441 * ## State:City 1 94.5 94.46 0.4188 0.52105 ## Residuals 42 9473.2 225.55 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(m1 <- lmer(Index ~ 1 + (1 | State/City), data = dta))

## Linear mixed model fit by REML ['lmerMod'] ## Formula: Index ~ 1 + (1 | State/City) ## Data: dta ## ## REML criterion at convergence: 337 ## ## Scaled residuals: ## Min 1Q Median 3Q Max ## -1.8204 -0.7006 0.0102 0.8238 1.9435 ## ## Random effects: ## Groups Name Variance Std.Dev. ## City:State (Intercept) 0.00 0.000 ## State (Intercept) 226.12 15.037 ## Residual 96.69 9.833 ## Number of obs: 45, groups: City:State, 9; State, 3 ## ## Fixed effects: ## Estimate Std. Error t value ## (Intercept) 41.689 8.805 4.735

# nlme library(nlme) summary(m2 <- lme(Index ~ 1, random = ~1 | State/City, correlation = corCompSymm(form = ~ 1 | State/City), data = dta))

## Linear mixed-effects model fit by REML ## Data: dta ## AIC BIC logLik ## 343.5714 352.4924 -166.7857 ## ## Random effects: ## Formula: ~1 | State ## (Intercept) ## StdDev: 15.18879 ## ## Formula: ~1 | City %in% State ## (Intercept) Residual ## StdDev: 1.128569 9.530402 ## ## Correlation Structure: Compound symmetry ## Formula: ~1 | State/City ## Parameter estimate(s): ## Rho ## -0.1906437 ## Fixed effects: Index ~ 1 ## Value Std.Error DF t-value p-value ## (Intercept) 41.68889 8.804571 36 4.734914 0 ## ## Standardized Within-Group Residuals: ## Min Q1 Med Q3 Max ## -1.83856604 -0.78560485 -0.01795103 0.78429008 1.94169103 ## ## Number of Observations: 45 ## Number of Groups: ## State City %in% State ## 3 9

Q8

dta <- read.table("http://www.biostat.ucsf.edu/vgsm/1st_ed/data/gababies.txt", header = T) summary(m2 <- lmer(bweight ~ (1 | momid) + birthord + momage + timesnc + delwght + lowbrth + lastwght + initage + initwght + cinitage , data = dta))

fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients

Linear mixed model fit by REML ['lmerMod'] Formula: bweight ~ (1 | momid) + birthord + momage + timesnc + delwght + lowbrth + lastwght + initage + initwght + cinitage Data: dta REML criterion at convergence: 14548.9 Scaled residuals: Min 1Q Median 3Q Max -6.5008 -0.5949 0.0229 0.6103 4.6381 Random effects: Groups Name Variance Std.Dev. momid (Intercept) 1281 35.78 Residual 123691 351.70 Number of obs: 1000, groups: momid, 200 Fixed effects: Estimate Std. Error t value (Intercept) 2068.12222 105.18821 19.661 birthord 14.13043 10.03942 1.407 momage 1.49459 3.62632 0.412 timesnc 7.82592 4.83761 1.618 delwght -0.17041 0.02231 -7.640 lowbrth -695.37316 26.30080 -26.439 lastwght 0.39002 0.02718 14.350 Correlation of Fixed Effects: (Intr) brthrd momage timsnc dlwght lwbrth birthord -0.194 momage -0.475 0.033 timesnc 0.351 -0.412 -0.781 delwght 0.406 0.116 0.076 -0.183 lowbrth -0.479 0.005 0.031 0.021 -0.194 lastwght -0.743 -0.053 -0.168 0.201 -0.584 0.441 fit warnings: fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients