library(refundDevel)
## Loading required package: fda
## Loading required package: splines
## Loading required package: Matrix
##
## Attaching package: 'fda'
##
## The following object is masked from 'package:graphics':
##
## matplot
residualfunction <- function(t){
#generate quintic polynomial error functions
drop(poly(t, 5)%*%rnorm(5, sd=sqrt(2:6)))
}
# generate data Y(t) = mu(t) + E(t) + white noise
set.seed(1122)
n <- 50
T <- 30
t <- seq(0,1, l=T)
# E(t): smooth residual functions
E <- t(replicate(n, residualfunction(t)))
int <- matrix(scale(3*dnorm(t, m=.5, sd=.5) - dbeta(t, 5, 2)), byrow=T, n, T)
Y <- int + E + matrix(.2*rnorm(n*T), n, T)
data <- data.frame(Y=I(Y), id = factor(1:n))
# fit model under independence assumption:
summary(m0 <- pffr(Y ~ 1, yind=t, data=data))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y ~ 1
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.00665 0.02116 -0.314 0.753
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Int(t) 14.18 19 119.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.603 Deviance explained = 60.6%
## -REML score = 1856.9 Scale est. = 0.67178 n = 1500 (50 x 30)
## functional random intercept models:
# fit model with iid spline based smooth residuals
summary(m_ri <- pffr(Y ~ 1 + s(id, bs="re"), yind=t, data=data))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y ~ 1 + s(id, bs = "re")
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.03253 0.05693 -0.571 0.568
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Int(t) 15.75 19 25.582 <2e-16 ***
## s(id) 220.83 247 9.266 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.844 Deviance explained = 86.8%
## -REML score = 1480.5 Scale est. = 0.26415 n = 1500 (50 x 30)
t_ri <- predict(m_ri, type="terms")
## drawback: not centered for each t, check colMeans(t_ri[[2]]) !
summary(m_mrf <- pffr(Y ~ 1 + s(id, bs="mrf", k=50, xt=list(penalty=diag(50))),
yind=t, data=data))
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y ~ 1 + s(id, bs = "mrf", k = 50, xt = list(penalty = diag(50)))
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.00665 0.01321 -0.504 0.615
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Int(t) 15.76 19 308.472 <2e-16 ***
## s(id) 223.10 245 9.496 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.845 Deviance explained = 87%
## -REML score = 1477 Scale est. = 0.2616 n = 1500 (50 x 30)
t_mrf <- predict(m_mrf, type="terms")
## always specify k=<# levels> to switch off automatic rank reduction for mrf
## (see smooth.construct.mrf.smooth.spec)
## advantages of using the mrf-basis:
## -- centered for each t, check colMeans(t_mrf[[2]]) !
## -- potentially correlated random intercepts: use inverse of between-subject
## correlation as <penalty> to include marginal dependency
## FPC-based functional random intercept models:
# get first 5 eigenfunctions of residual covariance
# (i.e. first 5 functional PCs of empirical residual process)
Ehat <- resid(m0)
fpcE <- fpca.sc(Ehat, npc=5)
efunctions <- fpcE$efunctions
evalues <- fpcE$evalues
# refit model with fpc-based residuals
m_pcre <- pffr(Y ~ 1 + pcre(id=id, efunctions=efunctions, evalues=evalues, yind=t),
yind=t, data=data)
t_pcre <- predict(m_pcre, type="terms")
summary(m_pcre)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Y ~ 1 + pcre(id = id, efunctions = efunctions, evalues = evalues,
## yind = t)
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.006650 0.005822 -1.142 0.254
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Int(t) 17.54 19 1591.15 <2e-16 ***
## pcre(id,efunct) 240.84 245 73.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.97 Deviance explained = 97.5%
## -REML score = 460.12 Scale est. = 0.050838 n = 1500 (50 x 30)
#compare squared errors
mean((int-fitted(m0))^2)
## [1] 0.005946653
mean((int-t_ri[[1]])^2)
## [1] 0.01219615
mean((int-t_mrf[[1]])^2)
## [1] 0.006723139
mean((int-t_pcre[[1]])^2)
## [1] 0.007333578
mean((E-t_ri[[2]])^2)
## [1] 0.2145552
mean((E-t_mrf[[2]])^2)
## [1] 0.206188
mean((E-t_pcre[[2]])^2)
## [1] 0.02190603
# compare fitted & true smooth residuals and fitted intercept functions:
layout(t(matrix(1:8,4,2)))
matplot(t(E), lty=1, type="l", ylim=range(E, t_pcre[[2]]),
col=scales::alpha(palette(), .3))
matplot(t(t_ri[[2]]), lty=1, type="l", ylim=range(E, t_pcre[[2]]),
col=scales::alpha(palette(), .3))
matplot(t(t_mrf[[2]]), lty=1, type="l", ylim=range(E, t_pcre[[2]]),
col=scales::alpha(palette(), .3))
matplot(t(t_pcre[[2]]), lty=1, type="l", ylim=range(E, t_pcre[[2]]),
col=scales::alpha(palette(), .3))
plot(m0, select=1, main="m0", ylim=range(Y))
lines(t, int[1,], col=rgb(1,0,0,.5))
plot(m_ri, select=1, main="m_ri", ylim=range(Y))
lines(t, int[1,], col=rgb(1,0,0,.5))
## wide CIS because no centering means model is not properly identifiable
## plot method for mrf-basis is broken: below does not work:
# plot(m_mrf, select=1, main="m_mrf", ylim=range(Y))
cf_mrf <- coef(m_mrf)
## using seWithMean for s(t.vec) .
with(cf_mrf$smterms[[1]]$coef,
matplot(t.vec, cbind(value, value + 2*se, value - 2*se),
type = "l", lty = c(1, 2, 2), col = 1, ylim=range(Y),
main="m_mrf"))
lines(t, int[1,], col=rgb(1,0,0,.5))
## more reasonable CIs
plot(m_pcre, select=1, main="m_pcre", ylim=range(Y))
lines(t, int[1,], col=rgb(1,0,0,.5))

## sharper CIs for int because fit is close to perfect
sessionInfo()
## R version 3.1.3 (2015-03-09)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.2 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] refundDevel_0.3-3 fda_2.4.4 Matrix_1.1-5
##
## loaded via a namespace (and not attached):
## [1] boot_1.3-15 colorspace_1.2-6 digest_0.6.8 evaluate_0.5.5
## [5] formatR_1.0 gamm4_0.2-3 grid_3.1.3 htmltools_0.2.6
## [9] knitr_1.9 lattice_0.20-30 lme4_1.1-7 magic_1.5-6
## [13] MASS_7.3-39 mgcv_1.8-6 minqa_1.2.4 munsell_0.4.2
## [17] nlme_3.1-120 nloptr_1.0.4 parallel_3.1.3 plyr_1.8.1
## [21] Rcpp_0.11.5 RLRsim_3.1-1 rmarkdown_0.5.1 scales_0.2.4
## [25] stringr_0.6.2 tools_3.1.3