library(pacman)
p = c("nlme","dplyr", "tidyr", "magrittr", "ggplot2", "HLMdiag", "lme4", "arm")
p_load(p,character.only = TRUE)
data("radon", package = "HLMdiag")
dat = radon
str(dat)
## 'data.frame': 919 obs. of 5 variables:
## $ log.radon : num 0.788 0.788 1.065 0 1.131 ...
## $ basement : int 1 0 0 0 0 0 0 0 0 0 ...
## $ uranium : num -0.689 -0.689 -0.689 -0.689 -0.847 ...
## $ county : int 1 1 1 1 2 2 2 2 2 2 ...
## $ county.name: Factor w/ 85 levels "AITKIN","ANOKA",..: 1 1 1 1 2 2 2 2 2 2 ...
sum(sapply(dat,is.na)) #check is there's any missing data
## [1] 0
dat$county %<>% as.factor
“basement”" was considered to be individual level/measurement error.
As a result, the fixed effect model would be like:
summary(m <- lm(log.radon~basement+county, data = dat))
##
## Call:
## lm(formula = log.radon ~ basement + county, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.14595 -0.45405 0.00065 0.45376 2.65987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.84054 0.37866 2.220 0.02670 *
## basement -0.72054 0.07352 -9.800 < 2e-16 ***
## county2 0.03428 0.39274 0.087 0.93047
## county3 0.68816 0.57854 1.189 0.23459
## county4 0.71218 0.47470 1.500 0.13392
## county5 0.59203 0.53487 1.107 0.26867
## county6 0.67247 0.57802 1.163 0.24500
## county7 1.17162 0.42892 2.732 0.00644 **
## county8 1.14904 0.53519 2.147 0.03208 *
## county9 0.16250 0.44764 0.363 0.71669
## county10 0.72336 0.48861 1.480 0.13913
## county11 0.56059 0.50775 1.104 0.26988
## county12 0.88971 0.53519 1.662 0.09680 .
## county13 0.19818 0.48861 0.406 0.68514
## county14 1.14784 0.42886 2.677 0.00759 **
## county15 0.49743 0.53519 0.929 0.35292
## county16 -0.17568 0.65534 -0.268 0.78871
## county17 0.43426 0.53613 0.810 0.41818
## county18 0.28101 0.43672 0.643 0.52011
## county19 0.49777 0.39027 1.275 0.20251
## county20 0.95977 0.57802 1.660 0.09720 .
## county21 0.89345 0.45467 1.965 0.04974 *
## county22 -0.20375 0.48831 -0.417 0.67660
## county23 0.55945 0.65534 0.854 0.39353
## county24 1.26108 0.45456 2.774 0.00566 **
## county25 1.11018 0.42892 2.588 0.00981 **
## county26 0.52004 0.38549 1.349 0.17770
## county27 0.93282 0.48831 1.910 0.05644 .
## county28 0.40105 0.50807 0.789 0.43013
## county29 0.21546 0.57802 0.373 0.70942
## county30 0.08522 0.44204 0.193 0.84718
## county31 1.18003 0.50775 2.324 0.02036 *
## county32 0.39575 0.53519 0.739 0.45983
## county33 1.22133 0.53519 2.282 0.02274 *
## county34 0.74990 0.57854 1.296 0.19527
## county35 -0.02134 0.47470 -0.045 0.96415
## county36 2.11842 0.65534 3.233 0.00128 **
## county37 -0.43845 0.45467 -0.964 0.33516
## county38 1.02717 0.53519 1.919 0.05529 .
## county39 0.90752 0.50743 1.788 0.07407 .
## county40 1.47526 0.53487 2.758 0.00594 **
## county41 1.12661 0.46330 2.432 0.01524 *
## county42 0.52044 0.84590 0.615 0.53856
## county43 0.76170 0.45511 1.674 0.09457 .
## county44 0.20045 0.47418 0.423 0.67261
## county45 0.45487 0.43252 1.052 0.29325
## county46 0.37407 0.50775 0.737 0.46150
## county47 0.04339 0.65534 0.066 0.94723
## county48 0.30758 0.45467 0.676 0.49892
## county49 0.86157 0.43256 1.992 0.04672 *
## county50 1.65266 0.84590 1.954 0.05107 .
## county51 1.32450 0.53519 2.475 0.01353 *
## county52 1.08715 0.57802 1.881 0.06034 .
## county53 0.41026 0.57776 0.710 0.47784
## county54 0.46621 0.40987 1.137 0.25567
## county55 0.77745 0.46330 1.678 0.09371 .
## county56 0.26056 0.57854 0.450 0.65256
## county57 -0.07836 0.48831 -0.160 0.87255
## county58 1.02038 0.53487 1.908 0.05677 .
## county59 0.88124 0.53519 1.647 0.10002
## county60 0.43885 0.65534 0.670 0.50327
## county61 0.31819 0.40132 0.793 0.42809
## county62 1.14247 0.50743 2.251 0.02462 *
## county63 0.83016 0.57776 1.437 0.15113
## county64 1.00729 0.44181 2.280 0.02286 *
## county65 0.45858 0.65534 0.700 0.48427
## county66 0.82520 0.42950 1.921 0.05503 .
## county67 0.96258 0.43252 2.226 0.02631 *
## county68 0.24948 0.46357 0.538 0.59060
## county69 0.40191 0.53519 0.751 0.45288
## county70 0.02709 0.38476 0.070 0.94388
## county71 0.65130 0.40740 1.599 0.11027
## county72 0.73936 0.44788 1.651 0.09916 .
## county73 0.95122 0.65534 1.451 0.14702
## county74 0.14650 0.53519 0.274 0.78436
## county75 0.88318 0.57776 1.529 0.12674
## county76 1.16790 0.53487 2.184 0.02928 *
## county77 0.98114 0.47418 2.069 0.03884 *
## county78 0.44515 0.50754 0.877 0.38070
## county79 -0.22566 0.53487 -0.422 0.67321
## county80 0.48898 0.39445 1.240 0.21545
## county81 1.86899 0.57854 3.231 0.00128 **
## county82 1.38947 0.84590 1.643 0.10084
## county83 0.78238 0.43250 1.809 0.07082 .
## county84 0.80481 0.43269 1.860 0.06323 .
## county85 0.34598 0.65534 0.528 0.59768
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7564 on 833 degrees of freedom
## Multiple R-squared: 0.287, Adjusted R-squared: 0.2142
## F-statistic: 3.945 on 85 and 833 DF, p-value: < 2.2e-16
If random effect model was used,
“basement”" would still be considered as individual level/measurement error.
“county” " would be considered as random effect
The random effect model would be like:
m0 = lmer(log.radon~basement+(1|county), data = dat) #random effect and individual level
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.radon ~ basement + (1 | county)
## Data: dat
##
## REML criterion at convergence: 2171.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.3989 -0.6155 0.0029 0.6405 3.4281
##
## Random effects:
## Groups Name Variance Std.Dev.
## county (Intercept) 0.1077 0.3282
## Residual 0.5709 0.7556
## Number of obs: 919, groups: county, 85
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.46160 0.05158 28.339
## basement -0.69299 0.07043 -9.839
##
## Correlation of Fixed Effects:
## (Intr)
## basement -0.288
Use a random effect model could even pick out the counties that had higher log.radon than overall average level.
rf = ranef(m0) #see how intercept shift in particular county
print(table(dat[which(rf[[1]]>0),]$county.name), zero.print = ".")
##
## AITKIN ANOKA BECKER BELTRAMI
## 2 25 2 4
## BENTON BIG STONE BLUE EARTH BROWN
## 1 3 7 .
## CARLTON CARVER CASS CHIPPEWA
## . . . .
## CHISAGO CLAY CLEARWATER COOK
## . . . .
## COTTONWOOD CROW WING DAKOTA DODGE
## . . . .
## DOUGLAS FARIBAULT FILLMORE FREEBORN
## . . . .
## GOODHUE HENNEPIN HOUSTON HUBBARD
## . . . .
## ISANTI ITASCA JACKSON KANABEC
## . . . .
## KANDIYOHI KITTSON KOOCHICHING LAC QUI PARLE
## . . . .
## LAKE LAKE OF THE WOODS LE SUEUR LINCOLN
## . . . .
## LYON MAHNOMEN MARSHALL MARTIN
## . . . .
## MCLEOD MEEKER MILLE LACS MORRISON
## . . . .
## MOWER MURRAY NICOLLET NOBLES
## . . . .
## NORMAN OLMSTED OTTER TAIL PENNINGTON
## . . . .
## PINE PIPESTONE POLK POPE
## . . . .
## RAMSEY REDWOOD RENVILLE RICE
## . . . .
## ROCK ROSEAU SCOTT SHERBURNE
## . . . .
## SIBLEY ST LOUIS STEARNS STEELE
## . . . .
## STEVENS SWIFT TODD TRAVERSE
## . . . .
## WABASHA WADENA WASECA WASHINGTON
## . . . .
## WATONWAN WILKIN WINONA WRIGHT
## . . . .
## YELLOW MEDICINE
## .
data("ergoStool", package = "nlme")
dat = ergoStool
str(dat)
## Classes 'nffGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 36 obs. of 3 variables:
## $ effort : num 12 15 12 10 10 14 13 12 7 14 ...
## $ Type : Factor w/ 4 levels "T1","T2","T3",..: 1 2 3 4 1 2 3 4 1 2 ...
## $ Subject: Ord.factor w/ 9 levels "8"<"5"<"4"<"9"<..: 8 8 8 8 9 9 9 9 6 6 ...
## - attr(*, "formula")=Class 'formula' length 3 effort ~ Type | Subject
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Type of stool"
## ..$ y: chr "Effort required to arise"
## - attr(*, "units")=List of 1
## ..$ y: chr "(Borg scale)"
## - attr(*, "FUN")=function (x)
## ..- attr(*, "source")= chr "function (x) mean(x, na.rm = TRUE)"
## - attr(*, "order.groups")= logi TRUE
#Subject was considered as individual level error.
m0 = lmer(effort~Type+(1|Subject), data = dat)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: effort ~ Type + (1 | Subject)
## Data: dat
##
## REML criterion at convergence: 121.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.80200 -0.64317 0.05783 0.70100 1.63142
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 1.775 1.332
## Residual 1.211 1.100
## Number of obs: 36, groups: Subject, 9
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.5556 0.5760 14.853
## TypeT2 3.8889 0.5187 7.498
## TypeT3 2.2222 0.5187 4.284
## TypeT4 0.6667 0.5187 1.285
##
## Correlation of Fixed Effects:
## (Intr) TypeT2 TypeT3
## TypeT2 -0.450
## TypeT3 -0.450 0.500
## TypeT4 -0.450 0.500 0.500
#95% CI
confint(m0, oldNames = FALSE) #the table is not completely similar to teacher's example
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|Subject 0.7342354 2.287261
## sigma 0.8119798 1.390104
## (Intercept) 7.4238425 9.687269
## TypeT2 2.8953043 4.882473
## TypeT3 1.2286377 3.215807
## TypeT4 -0.3269179 1.660251
dat = read.table("D:/Multilevel_publish/Multilevel/week4_ANOVA/thetaEEG.txt", header = TRUE)
str(dat)
## 'data.frame': 19 obs. of 10 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Ch3 : num -3.54 5.72 0.52 0 2.07 1.67 9.13 -0.43 -0.56 1.28 ...
## $ Ch4 : num -3.11 5.07 -0.18 0.74 0.76 ...
## $ Ch5 : num -0.24 6.87 0.9 1.1 3.51 2.77 3.44 -0.31 -1.22 1.89 ...
## $ Ch6 : num 0.42 5.96 0.6 0.13 0.6 4.55 4.8 -0.61 0.67 1.77 ...
## $ Ch7 : num -0.49 8.2 1.27 0.19 3.71 1.8 0.48 -1.04 -0.97 1.83 ...
## $ Ch8 : num 2.13 4.87 1.28 0.07 1.86 4.79 1.63 -0.13 -0.98 0.91 ...
## $ Ch17: num -4.15 5.48 -0.95 0.8 1.49 4.51 9.94 -0.61 0 1.4 ...
## $ Ch18: num 2.87 5.57 1.74 0.25 3.11 3.24 1.34 -0.61 -1.22 1.1 ...
## $ Ch19: num 1.34 6.33 0.79 -0.66 1.8 3.99 1.53 -0.43 -0.91 -0.12 ...
dat$ID %<>% as.factor
#Arrange the data
data <- dat %>% gather(key = channel, value = change, Ch3, Ch4,Ch5,Ch6,Ch7,Ch8,Ch17,Ch18,Ch19) %>% mutate(channel = as.factor(channel))
str(data)
## 'data.frame': 171 obs. of 3 variables:
## $ ID : Factor w/ 19 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ channel: Factor w/ 9 levels "Ch17","Ch18",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ change : num -3.54 5.72 0.52 0 2.07 1.67 9.13 -0.43 -0.56 1.28 ...
#Channel was considered as random effect
m0 = lmer(change~ID+(1|channel)-1, data = data)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: change ~ ID + (1 | channel) - 1
## Data: data
##
## REML criterion at convergence: 620.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7493 -0.3818 0.0185 0.3853 4.8678
##
## Random effects:
## Groups Name Variance Std.Dev.
## channel (Intercept) 0.000 0.000
## Residual 2.632 1.622
## Number of obs: 171, groups: channel, 9
##
## Fixed effects:
## Estimate Std. Error t value
## ID1 -0.5300 0.5407 -0.980
## ID2 6.0078 0.5407 11.110
## ID3 0.6633 0.5407 1.227
## ID4 0.2911 0.5407 0.538
## ID5 2.1011 0.5407 3.886
## ID6 3.5000 0.5407 6.473
## ID7 5.0233 0.5407 9.290
## ID8 -0.6400 0.5407 -1.184
## ID9 -0.4744 0.5407 -0.877
## ID10 1.2200 0.5407 2.256
## ID11 2.4011 0.5407 4.440
## ID12 1.1933 0.5407 2.207
## ID13 -1.7556 0.5407 -3.247
## ID14 -0.2633 0.5407 -0.487
## ID15 -0.2622 0.5407 -0.485
## ID16 3.3767 0.5407 6.244
## ID17 0.9633 0.5407 1.781
## ID18 -1.4300 0.5407 -2.645
## ID19 -1.0578 0.5407 -1.956
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
#95% CI
confint(m0, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|channel 0.00000000 0.34744215
## sigma 1.38063561 1.70706290
## ID1 -1.53486379 0.47486379
## ID2 5.00291399 7.01264157
## ID3 -0.34153045 1.66819712
## ID4 -0.71375268 1.29597490
## ID5 1.09624732 3.10597490
## ID6 2.49513621 4.50486379
## ID7 4.01846955 6.02819712
## ID8 -1.64486379 0.36486379
## ID9 -1.47930823 0.53041934
## ID10 0.21513621 2.22486379
## ID11 1.39624732 3.40597490
## ID12 0.18846955 2.19819712
## ID13 -2.76041934 -0.75069177
## ID14 -1.26819712 0.74153045
## ID15 -1.26708601 0.74264157
## ID16 2.37180288 4.38153045
## ID17 -0.04153045 1.96819712
## ID18 -2.43486379 -0.42513621
## ID19 -2.06264157 -0.05291399
Some participants’ brain activities changed.
dat = read.table("D:/Multilevel_publish/Multilevel/week4_ANOVA/skin_resistance.txt", header = TRUE)
str(dat)
## 'data.frame': 80 obs. of 3 variables:
## $ Electrode: Factor w/ 5 levels "E1","E2","E3",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ Subject : Factor w/ 16 levels "S1","S10","S11",..: 1 1 1 1 1 9 9 9 9 9 ...
## $ Kohm : int 500 400 98 200 250 660 600 600 75 310 ...
#Subject was considered as randome effect
m0 = lmer(Kohm~Electrode+(1|Subject), data = dat)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Kohm ~ Electrode + (1 | Subject)
## Data: dat
##
## REML criterion at convergence: 999.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4000 -0.4998 -0.0682 0.3076 3.6676
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 14180 119.1
## Residual 22379 149.6
## Number of obs: 80, groups: Subject, 16
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 181.69 47.80 3.801
## ElectrodeE2 105.63 52.89 1.997
## ElectrodeE3 76.31 52.89 1.443
## ElectrodeE4 -31.31 52.89 -0.592
## ElectrodeE5 -43.81 52.89 -0.828
##
## Correlation of Fixed Effects:
## (Intr) ElctE2 ElctE3 ElctE4
## ElectrodeE2 -0.553
## ElectrodeE3 -0.553 0.500
## ElectrodeE4 -0.553 0.500 0.500
## ElectrodeE5 -0.553 0.500 0.500 0.500
#95% CI
confint(m0, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|Subject 70.548336 184.6781
## sigma 122.962425 174.0792
## (Intercept) 89.126933 274.2481
## ElectrodeE2 3.729397 207.5206
## ElectrodeE3 -25.583103 178.2081
## ElectrodeE4 -133.208103 70.5831
## ElectrodeE5 -145.708103 58.0831
The 95% CI of Electrode2 did not include zero point, suggesting that Electrode2 had significant difference from Electrode1.
On the other hand, other electrode seemed to be similar with each other.
Thus, Electrode2 might be problematic.
data("dermalridgesMZ", package = "mets")
dat = dermalridgesMZ
str(dat)
## 'data.frame': 36 obs. of 5 variables:
## $ sex : Factor w/ 2 levels "female","male": 1 1 1 1 2 2 1 1 1 1 ...
## $ left : int 95 90 93 80 99 94 55 48 41 26 ...
## $ right: num 83 85 90 80 94 99 61 54 24 42 ...
## $ group: Factor w/ 2 levels "normal","psychotic&neurotic": 2 2 2 2 2 2 2 2 2 2 ...
## $ id : int 1 1 2 2 3 3 4 4 5 5 ...
## - attr(*, "na.action")=Class 'omit' Named int [1:12] 37 38 39 40 41 42 43 44 45 46 ...
## .. ..- attr(*, "names")= chr [1:12] "37" "38" "39" "40" ...
dat$id %<>% as.factor
m0 = lmer(right~left+(1|id), data = dat)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: right ~ left + (1 | id)
## Data: dat
##
## REML criterion at convergence: 250.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.66773 -0.46281 0.02278 0.59754 1.60380
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.00 0.000
## Residual 63.38 7.961
## Number of obs: 36, groups: id, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 10.07986 4.13771 2.436
## left 0.85750 0.06022 14.240
##
## Correlation of Fixed Effects:
## (Intr)
## left -0.947
ranef(m0)
## $id
## (Intercept)
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 0
## 14 0
## 15 0
## 16 0
## 17 0
## 18 0
Because the random effect seemed to be minuscule,
a fixed effect model was appplied to explore the data.
m1 = lm(right~left, data = dat)
summary(m1)
##
## Call:
## lm(formula = right ~ left, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.2374 -3.6843 0.1813 4.7570 12.7676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.07986 4.13771 2.436 0.0202 *
## left 0.85750 0.06022 14.240 6.86e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.961 on 34 degrees of freedom
## Multiple R-squared: 0.8564, Adjusted R-squared: 0.8522
## F-statistic: 202.8 on 1 and 34 DF, p-value: 6.857e-16
anova(m0,m1) #examine if two models perform differently
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m1: right ~ left
## m0: right ~ left + (1 | id)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 3 255.47 260.22 -124.74 249.47
## m0 4 257.47 263.81 -124.74 249.47 0 1 1
Two models did not differ.
Thus, a fixed effect model might be good enough in this example.
dat = read.table("D:/Multilevel_publish/Multilevel/week4_ANOVA/dog_scans.txt", header = TRUE)
str(dat)
## 'data.frame': 55 obs. of 4 variables:
## $ Dog : Factor w/ 11 levels "Aussie","Corrie",..: 9 11 11 11 11 11 11 7 7 7 ...
## $ Observer : Factor w/ 3 levels "A","B","C": 3 3 2 1 3 2 1 3 2 1 ...
## $ Scan : Factor w/ 29 levels "dog_100","dog_101",..: 14 15 15 15 16 16 16 17 17 17 ...
## $ Thickness: num 2.6 4.93 3.47 5.29 2.54 2.8 3 3.57 3.3 2.66 ...
sum(sapply(dat, is.na))
## [1] 0
#Adopted a fixed effect model:
m1 = lm(Thickness~., data = dat)
anova(m1)
## Analysis of Variance Table
##
## Response: Thickness
## Df Sum Sq Mean Sq F value Pr(>F)
## Dog 10 43.669 4.3669 4.8197 0.0007705 ***
## Observer 2 1.849 0.9243 1.0202 0.3756449
## Scan 18 33.016 1.8342 2.0245 0.0534845 .
## Residuals 24 21.745 0.9060
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
If a fixed effect model was used,
the variance would be misattributed to dogs.
dat = read.table("D:/Multilevel_publish/Multilevel/week4_ANOVA/healthAwareness.txt", header = TRUE)
str(dat)
## 'data.frame': 45 obs. of 4 variables:
## $ Index : num 42 56 35 40 28 26 38 42 35 53 ...
## $ State : int 1 1 1 1 1 1 1 1 1 1 ...
## $ City : int 1 1 1 1 1 2 2 2 2 2 ...
## $ Household: int 1 2 3 4 5 1 2 3 4 5 ...
dat$State %<>% as.factor
dat$City %<>% as.factor
dat$Household %<>% as.factor
m0 = lmer(Index~State+(1|City)+(1|Household)-1, data = dat)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Index ~ State + (1 | City) + (1 | Household) - 1
## Data: dat
##
## REML criterion at convergence: 319.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.86449 -0.69834 0.01356 0.77970 1.94585
##
## Random effects:
## Groups Name Variance Std.Dev.
## Household (Intercept) 9.925e-14 3.150e-07
## City (Intercept) 0.000e+00 0.000e+00
## Residual 9.669e+01 9.833e+00
## Number of obs: 45, groups: Household, 5; City, 3
##
## Fixed effects:
## Estimate Std. Error t value
## State1 40.867 2.539 16.10
## State2 57.333 2.539 22.58
## State3 26.867 2.539 10.58
##
## Correlation of Fixed Effects:
## State1 State2
## State2 0.000
## State3 0.000 0.000
#95% CI
confint(m0, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|Household 0.000000 5.457223
## sd_(Intercept)|City 0.000000 4.932286
## sigma 7.829822 11.859121
## State1 35.952706 45.780606
## State2 52.421585 62.245082
## State3 21.954918 31.778415
dat = read.table("D:/Multilevel_publish/Multilevel/week4_ANOVA/gababies.txt", header = TRUE)
str(dat)
## 'data.frame': 1000 obs. of 11 variables:
## $ momid : int 39 39 39 39 39 62 62 62 62 62 ...
## $ birthord: int 1 2 3 4 5 1 2 3 4 5 ...
## $ momage : int 15 17 19 24 25 17 21 25 27 28 ...
## $ timesnc : int 0 2 4 9 10 0 4 8 10 11 ...
## $ delwght : int -1240 -1240 -1240 -1240 -1240 -170 -170 -170 -170 -170 ...
## $ lowbrth : int 0 0 0 0 1 1 1 1 1 1 ...
## $ bweight : int 3720 3260 3910 3320 2480 2381 2835 2381 2268 2211 ...
## $ lastwght: int 2480 2480 2480 2480 2480 2211 2211 2211 2211 2211 ...
## $ initage : int 15 15 15 15 15 17 17 17 17 17 ...
## $ initwght: int 3720 3720 3720 3720 3720 2381 2381 2381 2381 2381 ...
## $ cinitage: num -2.55 -2.55 -2.55 -2.55 -2.55 ...
dat$momid %<>% as.factor
m0 = lmer(bweight~(1|momid)+(1|birthord)+(1|momage)+(1|timesnc)+(1|initage)+(1|initwght)-1, data = dat)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: bweight ~ (1 | momid) + (1 | birthord) + (1 | momage) + (1 |
## timesnc) + (1 | initage) + (1 | initwght) - 1
## Data: dat
##
## REML criterion at convergence: 15310.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2376 -0.4225 0.0343 0.5248 3.3038
##
## Random effects:
## Groups Name Variance Std.Dev.
## momid (Intercept) 57984 240.80
## initwght (Intercept) 92355 303.90
## momage (Intercept) 1268 35.60
## timesnc (Intercept) 6049 77.78
## initage (Intercept) 1230 35.07
## birthord (Intercept) 0 0.00
## Residual 195048 441.64
## Number of obs: 1000, groups:
## momid, 200; initwght, 107; momage, 29; timesnc, 21; initage, 18; birthord, 5
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3139.35 46.96 66.86
#95% CI
confint(m0, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|momid 178.5536 315.33665
## sd_(Intercept)|initwght 201.3620 390.38820
## sd_(Intercept)|momage 0.0000 100.07861
## sd_(Intercept)|timesnc 0.0000 153.49611
## sd_(Intercept)|initage 0.0000 159.97113
## sd_(Intercept)|birthord 0.0000 94.96695
## sigma 420.6359 464.54893
## (Intercept) 3044.6789 3233.94968
momage, timesnc, initage, birthord did not contribute much to the model.
Mom and the birthweight of the first child were most important to the model.
Thus, I revised the model:
m1 = lmer(bweight~(1|momid)+(1|momage)+(1|timesnc)+(1|initage)+(1|initwght)-1, data = dat)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## bweight ~ (1 | momid) + (1 | momage) + (1 | timesnc) + (1 | initage) +
## (1 | initwght) - 1
## Data: dat
##
## REML criterion at convergence: 15310.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2376 -0.4225 0.0343 0.5248 3.3038
##
## Random effects:
## Groups Name Variance Std.Dev.
## momid (Intercept) 57984 240.80
## initwght (Intercept) 92355 303.90
## momage (Intercept) 1268 35.60
## timesnc (Intercept) 6049 77.78
## initage (Intercept) 1230 35.07
## Residual 195048 441.64
## Number of obs: 1000, groups:
## momid, 200; initwght, 107; momage, 29; timesnc, 21; initage, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3139.35 46.96 66.86
anova(m0,m1) #examine if the two models differ
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m1: bweight ~ (1 | momid) + (1 | momage) + (1 | timesnc) + (1 | initage) +
## m1: (1 | initwght) - 1
## m0: bweight ~ (1 | momid) + (1 | birthord) + (1 | momage) + (1 |
## m0: timesnc) + (1 | initage) + (1 | initwght) - 1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 7 15334 15368 -7659.9 15320
## m0 8 15336 15375 -7659.9 15320 0 1 1
#95% CI
confint(m1, oldNames = FALSE)
## Computing profile confidence intervals ...
## 2.5 % 97.5 %
## sd_(Intercept)|momid 178.55313 315.3359
## sd_(Intercept)|initwght 201.36091 390.3882
## sd_(Intercept)|momage 0.00000 100.0788
## sd_(Intercept)|timesnc 25.24335 153.4966
## sd_(Intercept)|initage 0.00000 159.9679
## sigma 420.63593 464.5489
## (Intercept) 3045.00959 3233.9506
Results showed that mom and birthweight of the first child were very important to the birth weight of a child.
Also, years since first birth contributed to the model as well.
The average birth weight was 3139.35 (lower:3045.00, upper:3233.95)