Using ggpredict function
library(nlme)
library(ggeffects)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks nlme::collapse()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
m <- fm2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject)
ggpredict(m, "age",interval = "confidence") %>% plot(rawdata = T, dot.alpha = 0.2)
ggpredict(m, "age", type = "re",interval = "prediction") %>% plot(rawdata = T, dot.alpha = 0.2)
outcome1=ggpredict(m, terms = "age [15:40]")
head(outcome1)
## # Predicted values of distance
##
## age | Predicted | 95% CI
## --------------------------------
## 15 | 27.61 | [26.54, 28.68]
## 16 | 28.27 | [27.14, 29.40]
## 17 | 28.93 | [27.73, 30.13]
## 18 | 29.59 | [28.32, 30.86]
## 19 | 30.25 | [28.89, 31.61]
## 20 | 30.91 | [29.47, 32.36]
##
## Adjusted for:
## * Sex = Male
## * Subject = 0 (population-level)
outcome2=ggpredict(m, "age", type = "re",interval = "prediction")
head(outcome2)
## # Predicted values of distance
##
## age | Predicted | 95% CI
## --------------------------------
## 8 | 22.99 | [20.00, 25.97]
## 10 | 24.31 | [21.34, 27.27]
## 12 | 25.63 | [22.66, 28.59]
## 14 | 26.95 | [23.96, 29.93]
##
## Adjusted for:
## * Sex = Male
## * Subject = 0 (population-level)
##
## Intervals are prediction intervals.
# read ggpredict docment
Using predictInterval function
# Lsmeans gives a Confidence Interval, and predictInterval gives a prediction interval.
library(lsmeans)
## Loading required package: emmeans
## The 'lsmeans' package is now basically a front end for 'emmeans'.
## Users are encouraged to switch the rest of the way.
## See help('transition') for more information, including how to
## convert old 'lsmeans' objects and scripts to work with 'emmeans'.
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
library(nlme)
# fit model
n1 <- lmer(distance ~ age + Sex+(1|Subject), data = Orthodont )
n2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject)
summary(n1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ age + Sex + (1 | Subject)
## Data: Orthodont
##
## REML criterion at convergence: 437.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7489 -0.5503 -0.0252 0.4534 3.6575
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 3.267 1.807
## Residual 2.049 1.432
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 17.70671 0.83392 21.233
## age 0.66019 0.06161 10.716
## SexFemale -2.32102 0.76142 -3.048
##
## Correlation of Fixed Effects:
## (Intr) age
## age -0.813
## SexFemale -0.372 0.000
summary(n2)
## Linear mixed-effects model fit by REML
## Data: Orthodont
## AIC BIC logLik
## 447.5125 460.7823 -218.7563
##
## Random effects:
## Formula: ~1 | Subject
## (Intercept) Residual
## StdDev: 1.807425 1.431592
##
## Fixed effects: distance ~ age + Sex
## Value Std.Error DF t-value p-value
## (Intercept) 17.706713 0.8339225 80 21.233044 0.0000
## age 0.660185 0.0616059 80 10.716263 0.0000
## SexFemale -2.321023 0.7614168 25 -3.048294 0.0054
## Correlation:
## (Intr) age
## age -0.813
## SexFemale -0.372 0.000
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.74889609 -0.55034466 -0.02516628 0.45341781 3.65746539
##
## Number of Observations: 108
## Number of Groups: 27
# lsmeans, confidence interval
lsmeans(n1, list (pairwise ~ Sex ), at = list(age=11))
## $`lsmeans of Sex`
## Sex lsmean SE df lower.CL upper.CL
## Male 25.0 0.486 25 24.0 26.0
## Female 22.6 0.586 25 21.4 23.9
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $`pairwise differences of Sex`
## 1 estimate SE df t.ratio p.value
## Male - Female 2.32 0.761 25 3.048 0.0054
##
## Degrees-of-freedom method: kenward-roger
lsmeans(n2, list (pairwise ~ Sex ), at = list(age=11))
## $`lsmeans of Sex`
## Sex lsmean SE df lower.CL upper.CL
## Male 25.0 0.486 26 24.0 26.0
## Female 22.6 0.586 25 21.4 23.9
##
## Degrees-of-freedom method: containment
## Confidence level used: 0.95
##
## $`pairwise differences of Sex`
## 1 estimate SE df t.ratio p.value
## Male - Female 2.32 0.761 25 3.048 0.0054
##
## Degrees-of-freedom method: containment
lsmeans(n1, "age")
## age lsmean SE df lower.CL upper.CL
## 11 23.8 0.381 25 23 24.6
##
## Results are averaged over the levels of: Sex
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
# first method: predictInterval
library(merTools)
## Loading required package: arm
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is C:/Users/hed2/Downloads/code-storage/code
new.data <- data.frame(age=11, Subject='M01',Sex="Male")
pI <- predictInterval(n1,newdata=new.data, which='fixed',level=0.95)
pI # no applicable method for 'getME' applied to an object of class "lme"
## fit upr lwr
## 1 24.98264 27.99309 21.80242
# 1 predictInterval, using the original data xs, then aggregate, the overall effect
pI <- predictInterval(n1,newdata=Orthodont ,level=0.95)
pred.data <- data.frame(Orthodont,pred=pI[1],pred_low=pI[3],pred_up=pI[2])
mean((pred.data$distance-pred.data$fit)^2)
## [1] 1.606495
# 2 fixed effect
pI <- predictInterval(n1,newdata=Orthodont, which='fixed',level=0.95)
pred.data <- data.frame(Orthodont,pred=pI[1],pred_low=pI[3],pred_up=pI[2])
mean((pred.data$distance-pred.data$fit)^2)
## [1] 4.99668
# 3 random effect
pI <- predictInterval(n1,newdata=Orthodont, which='random',level=0.95)
pred.data <- data.frame(Orthodont,pred=pI[1],pred_low=pI[3],pred_up=pI[2])
mean((pred.data$distance-pred.data$fit)^2)
## [1] 581.5902
############## random and fixed effects
m1 <- lmer(Reaction ~ Days + (1 | Subject), sleepstudy)
intFit <- predictInterval(m1, newdata = sleepstudy ) # bounded values
head(intFit)
## fit upr lwr
## 1 292.7711 336.8842 248.9802
## 2 301.9809 343.8089 259.6131
## 3 312.2498 356.2606 269.4721
## 4 322.0792 369.1102 281.0829
## 5 334.5273 379.8743 292.9503
## 6 343.9391 386.7206 299.5938
intFit2 <- predictInterval(m1, newdata = sleepstudy ,which = "random") # bounded values
head(intFit2)
## fit upr lwr
## 1 43.02962 83.64611 0.2887879
## 2 41.02894 84.01380 -2.7855840
## 3 40.22276 86.12611 -1.7813414
## 4 41.65719 84.93009 -0.2718662
## 5 40.52752 84.09293 -1.2122379
## 6 41.03242 81.07262 -0.1648371
intFit3 <- predictInterval(m1, newdata = sleepstudy ,which = "fixed") # bounded values
head(intFit3)
## fit upr lwr
## 1 252.0878 295.3780 210.3195
## 2 262.1760 302.6446 218.9889
## 3 271.7278 316.3573 227.9273
## 4 282.7686 326.1952 240.9669
## 5 290.3763 333.8694 251.8883
## 6 302.5332 345.1220 261.0191
Using Simulation
# second method: Simulation. This has result similar to predictInterval
sim <- simulate(n1, nsim = 1, newdata=new.data) #based on distribution with random error, like MCMC
s <- numeric()
for (i in 1:500) {
s[i] <- simulate(n1, nsim = 1, newdata=new.data)[1]
}
s <- unlist(s)
hist(s)
quantile(s,0.025)
## 2.5%
## 20.73304
quantile(s,0.975)
## 97.5%
## 29.30722
# third method:
# calculate mean and std, then calculate percentile by using normal distribution
intFit4 <- predictInterval(n1, newdata = new.data,level=0.95 ) # bounded values
head(intFit4) #using the overall effect
## fit upr lwr
## 1 27.41754 30.88217 24.18202
For general linear regression
library(ggplot2)
library(nlraa)
library(nlme)
library(mgcv)
## This is mgcv 1.8-42. For overview type 'help("mgcv-package")'.
data(Oats, package = "nlme")
## A subset for simplicity
Oats.I <- subset(Oats, subset = Block == "I")
plot(Oats.I)
# fitting a linear model
fm1 <- lm(yield ~ nitro, data = Oats.I)
fm1.prd <- predict(fm1, interval = "conf")
Oats.IA <- cbind(Oats.I, fm1.prd)
## Make a plot
ggplot(data = Oats.IA, aes(x = nitro, y = yield)) +
geom_point() +
geom_line(aes(y = fit)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "purple", alpha = 0.4)
# bootstrapped confidence bands
fm1.boot <- boot_lm(fm1, fitted, R=200) #randomly sampling data
fm1.boot.prd <- summary_simulate(t(fm1.boot$t))
Oats.IAB <- cbind(Oats.I, fm1.boot.prd)
## Make a plot
ggplot(data = Oats.IAB, aes(x = nitro, y = yield)) +
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "deepskyblue", alpha = 0.6) +
geom_line(aes(y = Estimate), color = "white", size = 1.5) +
# geom_point() +
ggtitle("95% Bootstrapped Confidence Bands")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
For nonlinear
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:arm':
##
## logit
# Nonlinear Example: Loblolly, then using bootstrap or simulation
Lob <- subset(Loblolly, Seed %in% c("301", "303", "305", "307", "309"))
fnlm1 <- nls(height ~ SSasymp(age, Asym, R0, lrc), data = Lob)
# bootstrapped confidence bands
fm1.boot <- boot(fnlm1, fitted, R=200) #randomly sampling data
fm1.boot.prd <- summary_simulate(t(fm1.boot$t))
Oats.IAB <- cbind(Lob, fm1.boot.prd)
## Plot of observed and fitted
## Make a plot
ggplot(data = Oats.IAB, aes(x = age, y = height)) +
geom_line(aes(y = Estimate), color = "white", size = 0.6) +
# geom_point() +
geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "black", alpha = 1.5) +
ggtitle("95% Bootstrapped Confidence Bands")
Bootstrapping uses the original, initial sample as the population from which to resample, whereas Monte Carlo simulation is based on setting up a data generation process (parameters).