Prepare data
## Load dataset from Applied Longitudinal Analysis
dental <- read.table("http://www.hsph.harvard.edu/fitzmaur/ala2e/dental.dat",
col.names = c("id","gender","age8","age10","age12","age14"))
dental
id gender age8 age10 age12 age14
1 1 F 21.0 20.0 21.5 23.0
2 2 F 21.0 21.5 24.0 25.5
3 3 F 20.5 24.0 24.5 26.0
4 4 F 23.5 24.5 25.0 26.5
5 5 F 21.5 23.0 22.5 23.5
6 6 F 20.0 21.0 21.0 22.5
7 7 F 21.5 22.5 23.0 25.0
8 8 F 23.0 23.0 23.5 24.0
9 9 F 20.0 21.0 22.0 21.5
10 10 F 16.5 19.0 19.0 19.5
11 11 F 24.5 25.0 28.0 28.0
12 12 M 26.0 25.0 29.0 31.0
13 13 M 21.5 22.5 23.0 26.5
14 14 M 23.0 22.5 24.0 27.5
15 15 M 25.5 27.5 26.5 27.0
16 16 M 20.0 23.5 22.5 26.0
17 17 M 24.5 25.5 27.0 28.5
18 18 M 22.0 22.0 24.5 26.5
19 19 M 24.0 21.5 24.5 25.5
20 20 M 23.0 20.5 31.0 26.0
21 21 M 27.5 28.0 31.0 31.5
22 22 M 23.0 23.0 23.5 25.0
23 23 M 21.5 23.5 24.0 28.0
24 24 M 17.0 24.5 26.0 29.5
25 25 M 22.5 25.5 25.5 26.0
26 26 M 23.0 24.5 26.0 30.0
27 27 M 22.0 21.5 23.5 25.0
## Subset to females only
dental <- subset(dental, gender == "F")
## Load reshape2
library(reshape2)
## Change to long format
dental.long <- melt(dental, id.vars = c("id","gender"),
value.name = "distance", variable.name = "age")
## Delete age from age variable values
dental.long$age <- as.numeric(gsub("age", "", dental.long$age))
## Make ID to factor
dental.long$id <- factor(dental.long$id)
## Create categorical age
dental.long$age.cat <- factor(dental.long$age)
## Dataset ordered by age (within-cluster ordering), then id (cluster id)
head(dental.long, 10)
id gender age distance age.cat
1 1 F 8 21.0 8
2 2 F 8 21.0 8
3 3 F 8 20.5 8
4 4 F 8 23.5 8
5 5 F 8 21.5 8
6 6 F 8 20.0 8
7 7 F 8 21.5 8
8 8 F 8 23.0 8
9 9 F 8 20.0 8
10 10 F 8 16.5 8
The result becomes disorganized when the dataset is ordered by the within-cluster ordering, and then the cluster ID.
Within-cluster occasion variable: age.cat (age as categorical)
The saturated model explains four mean distances by four different ages.
The variance-covariance matrix is dictated by data.
Different variances are allowed at different age.cat values
## Load nlme
library(nlme)
## Cluster ID, then within-cluster ordering
dental.long.by.id.age <- dental.long[with(dental.long, order(id, age)),]
head(dental.long.by.id.age)
id gender age distance age.cat
1 1 F 8 21.0 8
12 1 F 10 20.0 10
23 1 F 12 21.5 12
34 1 F 14 23.0 14
2 2 F 8 21.0 8
13 2 F 10 21.5 10
gls.by.id.age <- gls(model = distance ~ -1 + age.cat,
data = dental.long.by.id.age,
correlation = corSymm(form = ~ as.numeric(age.cat) | id),
weights = varIdent(form = ~ 1 | as.numeric(age.cat)),
method = "REML"
)
## Cluster ID, then reversed within-cluster ordering
dental.long.by.id.rev_age <- dental.long[with(dental.long, order(id,rev(age))),]
head(dental.long.by.id.rev_age)
id gender age distance age.cat
34 1 F 14 23.0 14
23 1 F 12 21.5 12
12 1 F 10 20.0 10
1 1 F 8 21.0 8
35 2 F 14 25.5 14
24 2 F 12 24.0 12
gls.by.id.rev_age <- gls(model = distance ~ -1 + age.cat,
data = dental.long.by.id.rev_age,
correlation = corSymm(form = ~ as.numeric(age.cat) | id),
weights = varIdent(form = ~ 1 | as.numeric(age.cat)),
method = "REML"
)
## within-cluster ordering, then cluster id
dental.long.by.age.id <- dental.long[with(dental.long, order(age, id)),]
head(dental.long.by.age.id)
id gender age distance age.cat
1 1 F 8 21.0 8
2 2 F 8 21.0 8
3 3 F 8 20.5 8
4 4 F 8 23.5 8
5 5 F 8 21.5 8
6 6 F 8 20.0 8
gls.by.age.id <- gls(model = distance ~ -1 + age.cat,
data = dental.long.by.age.id,
correlation = corSymm(form = ~ as.numeric(age.cat) | id),
weights = varIdent(form = ~ 1 | as.numeric(age.cat)),
method = "REML"
)
Demonstration of effects of data configuration on the variance-covariance matrix returned by getVarCov()
Variance-covariance matrix from raw data
cov(dental[,c("age8","age10","age12","age14")])
age8 age10 age12 age14
age8 4.513636 3.354545 4.331818 4.356818
age10 3.354545 3.618182 4.027273 4.077273
age12 4.331818 4.027273 5.590909 5.465909
age14 4.356818 4.077273 5.465909 5.940909
Variance-covariance matrix from data sorted by ID, then age (CORRECT RESULT)
getVarCov(gls.by.id.age)
Marginal variance covariance matrix
[,1] [,2] [,3] [,4]
[1,] 4.5136 3.3545 4.3318 4.3568
[2,] 3.3545 3.6182 4.0273 4.0773
[3,] 4.3318 4.0273 5.5909 5.4659
[4,] 4.3568 4.0773 5.4659 5.9409
Standard Deviations: 2.1245 1.9022 2.3645 2.4374
When the data are ordered by the patient id (cluster ID) and then age (within-cluster ordering), the matrix is correct, and is the same as the raw data matrix. This is an expected result as the data is balanced and complete.
Variance-covariance matrix from data sorted by ID, then reversed age (EXPECTED REVERSE RESULT)
getVarCov(gls.by.id.rev_age)
Marginal variance covariance matrix
[,1] [,2] [,3] [,4]
[1,] 5.9409 5.4659 4.0773 4.3568
[2,] 5.4659 5.5909 4.0273 4.3318
[3,] 4.0773 4.0273 3.6182 3.3545
[4,] 4.3568 4.3318 3.3545 4.5136
Standard Deviations: 2.4374 2.3645 1.9022 2.1245
When the data are ordered by the patient id (cluster ID) and then reversed age (within-cluster ordering), the matrix has reversed rows and columns. This is an expected behavior, I think.
getVarCov(gls.by.age.id)
Marginal variance covariance matrix
[,1] [,2] [,3] [,4]
[1,] 4.5136 4.2985 4.3318 3.4001
[2,] 4.2985 5.9409 5.1605 4.0773
[3,] 4.3318 5.1605 5.5909 4.2656
[4,] 3.4001 4.0773 4.2656 3.6182
Standard Deviations: 2.1245 2.4374 2.3645 1.9022
However, when the data are ordered by the age (within-cluster ordering) and then the patient id (cluster ID), the matrix has an unexpected configuration (the diagonal elements are ordered age8, age14, age12, age10). This appears to be due to problem with the nlme:::getVarCov.gls function.
## Show the code
nlme:::getVarCov.gls
function (obj, individual = 1, ...)
{
S <- corMatrix(obj$modelStruct$corStruct)[[individual]]
if (!is.null(obj$modelStruct$varStruct)) {
ind <- obj$groups == individual
vw <- 1/varWeights(obj$modelStruct$varStruct)[ind]
}
else vw <- rep(1, nrow(S))
vars <- (obj$sigma * vw)^2
result <- t(S * sqrt(vars)) * sqrt(vars)
class(result) <- c("marginal", "VarCov")
attr(result, "group.levels") <- names(obj$groups)
result
}
<bytecode: 0x104df0600>
<environment: namespace:nlme>
## Show data that causes problem (individual 1 at rows 1, 12, 23, and 34)
dental.long.by.age.id[dental.long.by.age.id$id == 1,]
id gender age distance age.cat
1 1 F 8 21.0 8
12 1 F 10 20.0 10
23 1 F 12 21.5 12
34 1 F 14 23.0 14
## Put gls object from the dataset that causes the unexpected behavior to obj
obj <- gls.by.age.id
## Default 1 for individual
individual <- 1
## Extract Correlation Matrix
S <- corMatrix(obj$modelStruct$corStruct)[[individual]]
S
[,1] [,2] [,3] [,4]
[1,] 1.0000000 0.8300900 0.8623146 0.8413558
[2,] 0.8300900 1.0000000 0.8954155 0.8794235
[3,] 0.8623146 0.8954155 1.0000000 0.9484069
[4,] 0.8413558 0.8794235 0.9484069 1.0000000
## The above matrix is the same as the correlation matrix from the raw data, thus correct.
cor(dental[,c("age8","age10","age12","age14")])
age8 age10 age12 age14
age8 1.0000000 0.8300900 0.8623146 0.8413558
age10 0.8300900 1.0000000 0.8954156 0.8794236
age12 0.8623146 0.8954156 1.0000000 0.9484070
age14 0.8413558 0.8794236 0.9484070 1.0000000
## Extract indexes for individual 1 from obj$groups, which corresponds to data (1, 12, 23, and 34)
ind <- obj$groups == individual
which(ind)
[1] 1 12 23 34
Extract Variance Function Weights. THIS APPEARS TO BE ORDERED BY PATIENT ID(CLUSTER ID), THEN AGE (WITHIN-CLUSTER ORDERING), REGARDLESS OF DATA ORDERING. Individual 1 is at positions 1, 2, 3, and 4. (NOT 1, 12, 23, and 34, as in index)
## PROBLEM
##
## Extract Variance Function Weights.
## THIS APPEARS TO BE ORDERED BY CLUSTER ID, THEN WITHIN-CLUSTER ORDERING, REGARDLESS OF DATA ORDERING.
## Individual 1 is at positions 1, 2, 3, and 4. (NOT 1, 12, 23, and 34)
varWeights(obj$modelStruct$varStruct)
1 2 3 4 1 2 3 4 1 2 3
1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081
4 1 2 3 4 1 2 3 4 1 2
0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098
3 4 1 2 3 4 1 2 3 4 1
0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000
2 3 4 1 2 3 4 1 2 3 4
1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393 1.0000000 1.1169098 0.8985081 0.8716393
This part extracts positions 1, 12, 23, and 34 (ind), although what we want is at positions 1, 2, 3, and 4. Thus, the element obtained are wrong (named 1, 4, 3, and 2, coming from positions 1, 12, 23, and 34). These are occasion 1 from subject 1, occasion 4 from subject 3, occasion 3 from subject 6, occasion 2 from subject 9.
## PROBLEM
##
## This part extracts positions 1, 12, 23, and 34 (ind), although what we want is at positions 1, 2, 3, and 4.
## Thus, the element obtained are wrong (named 1, 4, 3, and 2, coming from positions 1, 12, 23, and 34).
## These are occasion 1 from subject 1, occasion 4 from subject 3, occasion 3 from subject 6,
## occasion 2 from subject 9.
vw <- 1/varWeights(obj$modelStruct$varStruct)[ind]
vw
1 4 3 2
1.0000000 1.1472635 1.1129560 0.8953274
## The rest has no problem.
vars <- (obj$sigma * vw)^2
result <- t(S * sqrt(vars)) * sqrt(vars)
## Show result
result
[,1] [,2] [,3] [,4]
[1,] 4.513634 4.298478 4.331817 3.400071
[2,] 4.298478 5.940907 5.160505 4.077271
[3,] 4.331817 5.160505 5.590908 4.265608
[4,] 3.400071 4.077271 4.265608 3.618180
## The result matrix is different from the matrix from the raw data.
cov(dental[,c("age8","age10","age12","age14")])
age8 age10 age12 age14
age8 4.513636 3.354545 4.331818 4.356818
age10 3.354545 3.618182 4.027273 4.077273
age12 4.331818 4.027273 5.590909 5.465909
age14 4.356818 4.077273 5.465909 5.940909