age & vocabulary size
options(digits = 4, show.signif.stars = FALSE)
pacman::p_load(tidyverse, nlme,segmented)
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/age_vocab.txt", h = T)
str(dta)
## 'data.frame': 15 obs. of 2 variables:
## $ Month : int 8 10 12 15 18 21 24 30 36 42 ...
## $ Voc_size: int 0 1 3 19 22 118 272 446 896 1222 ...
head(dta)
## Month Voc_size
## 1 8 0
## 2 10 1
## 3 12 3
## 4 15 19
## 5 18 22
## 6 21 118
#看看資料
ggplot(dta, aes(Month, Voc_size)) +
geom_point(pch = 1, size = rel(1.5)) +
geom_line() +
labs(x = "age in month", y = "vocabulary size")+
theme_bw()

## set initial values for the paramters
# 不知道怎麼找起始值,最後我偷看老師的報表
# VocSizei = β1 exp(β2 exp(β3 month)), i = 1, 2 ,..., n
b_init <- c(b1 = 2900, b2 = -10, b3=-0.05)
summary(m1 <- nls(Voc_size ~ b1 * exp(b2 * exp(b3 * Month))
, data = dta, start = b_init))
##
## Formula: Voc_size ~ b1 * exp(b2 * exp(b3 * Month))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b1 2.91e+03 9.91e+01 29.31 1.6e-12
## b2 -1.02e+01 1.05e+00 -9.72 4.9e-07
## b3 -5.81e-02 3.45e-03 -16.87 1.0e-09
##
## Residual standard error: 45.2 on 12 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 2.98e-06
plot(m1)

# show that we are still doing least squares estimation
plot(dta$Voc_size ~ dta$Month, #原始資料
xlab = "age in month", ylab = "vocabulary size", pch=16, col="blue")
s <- seq(0, 80, length = 16)
lines(s, predict(m1, list(Month = s)), col="red", lwd = 0.8) #fitted value 畫成線

# VocSizei = β1 / (1 + ((β1 - β2) / β2) × exp(β3 month)), i = 1, 2 ,..., n,
b2_init <- c(b1 = 2500, b2 = 19.5, b3=-0.1)
summary(m2 <- nls(Voc_size ~ b1 * b1/(1 + (((b1 - b2)/b2) * exp(b3 * Month)))
, data = dta, start = b2_init))
##
## Formula: Voc_size ~ b1 * b1/(1 + (((b1 - b2)/b2) * exp(b3 * Month)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b1 50.20070 0.94246 53.27 1.3e-15
## b2 0.38761 0.13722 2.82 0.015
## b3 -0.11164 0.00952 -11.73 6.2e-08
##
## Residual standard error: 89 on 12 degrees of freedom
##
## Number of iterations to convergence: 13
## Achieved convergence tolerance: 4.1e-06
plot(m2)

free call
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/freeRecall.asc", h = T)
str(dta)
## 'data.frame': 20 obs. of 3 variables:
## $ grp : Factor w/ 2 levels "C","E": 1 1 1 1 1 1 1 1 1 1 ...
## $ trial: int 1 2 3 4 5 6 7 8 9 10 ...
## $ ncr : num 7.9 10.9 11.9 13 14.2 14.2 14.7 15.1 14.8 15.2 ...
#看看資料
ggplot(dta, aes(trial, ncr, color = grp)) +
geom_point(size = rel(3)) +
stat_smooth(aes(group = grp), method = "loess", se = F )+
labs(x = "Trials in List B", y = "Mean Number of correct responses") +
scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
scale_y_continuous(limits = c(5, 20), breaks = seq(5, 20, by = 2)) +
scale_color_manual(values = c("light blue", "orange")) +
theme_bw()

## CRi = α × exp(β sqrt(triali)),
# where α is starting performance and β is the rate of improvement over trial.
summary(m1 <- nls(ncr ~ a[grp] * exp(b[grp] * sqrt(trial)), data = dta,
start = list(a = c(5, 10), b = c(log(12/5),log(12/10))))) #亂看亂定的斜率><
##
## Formula: ncr ~ a[grp] * exp(b[grp] * sqrt(trial))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a1 7.7416 0.5448 14.21 1.7e-10
## a2 10.3086 0.6569 15.69 3.9e-11
## b1 0.2322 0.0280 8.31 3.4e-07
## b2 0.1212 0.0261 4.64 0.00027
##
## Residual standard error: 0.727 on 16 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 7.04e-06
#預測圖
ggplot(dta, aes(trial, ncr, color = grp))+
geom_point()+ #原始資料
scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
scale_y_continuous(limits = c(5, 20), breaks = seq(5, 20, by = 2)) +
geom_line(aes(trial,fitted(m1)))+ #預測值
labs(x = "Trials in List B", y = "Mean Number of correct responses") +
scale_color_manual(values = c("light blue", "orange")) +
theme_bw()

continuous monitoring
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/continuousMonitoring.asc", h = T)
str(dta)
## 'data.frame': 31 obs. of 2 variables:
## $ rate : num 1.5 2 2 2.5 2 1.5 2.5 3.5 3.5 4.5 ...
## $ accuracy: num 28 29 32 33 39 49 44 22 23 47 ...
# 看看資料
ggplot(dta, aes(rate, accuracy)) +
geom_point() +
labs(x = "Rate of stimulus change (ms/100)", y = "Percentage accuracy")+
theme_bw()

#use Michaelis-Menten-type equation
summary(m1 <- nls(accuracy ~ SSmicmen(rate, phi1, phi2), data = dta))
##
## Formula: accuracy ~ SSmicmen(rate, phi1, phi2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## phi1 89.819 3.768 23.84 < 2e-16
## phi2 4.084 0.586 6.97 1.1e-07
##
## Residual standard error: 8 on 29 degrees of freedom
##
## Number of iterations to convergence: 4
## Achieved convergence tolerance: 7.82e-06
#預測圖
plot(dta$accuracy ~ dta$rate, #原始資料
xlab = "Rate of stimulus change (ms/100)", ylab = " Percentage accuracy ",
pch=16, col="blue")
s <- seq(0, 60, length = 35)
lines(s, predict(m1, list(rate = s)), col="red", lwd = 0.8, lty=2) #fitted value 畫成線

national population
## The logistic model (in the following form) is one of the models for population growth:
# Population at time t = β1/ (1 + exp(β2 - β3 t )),
# Where β1 is the upper bound for population, β2 is the population size at time 0,
# and β3 (> 0) governs the rate of growth. Fit the model to the data us_national_population{historydata} and draw conclusions.
require(historydata)
## Loading required package: historydata
dta<-as.data.frame(us_national_population)
str(dta)
## 'data.frame': 23 obs. of 2 variables:
## $ year : int 1790 1800 1810 1820 1830 1840 1850 1860 1870 1880 ...
## $ population: int 3929625 5308483 7239881 9638239 12860702 17063353 23191876 31443321 38558371 50189209 ...
#看看資料
ggplot(dta, aes(y=population, x=year)) +
geom_point() +
scale_x_continuous( breaks = seq(1790, 2010, by = 10))+
labs(x = "Time (in minutes)", y = "Sensitivity (logarithmic unit)")+
theme_bw()

#modeling
summary(m1<-nls(population ~ SSlogis(year,phi1,phi2,phi3), data=dta))
##
## Formula: population ~ SSlogis(year, phi1, phi2, phi3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## phi1 4.99e+08 3.95e+07 12.6 5.3e-11
## phi2 1.99e+03 7.81e+00 254.7 < 2e-16
## phi3 4.85e+01 2.17e+00 22.4 1.3e-15
##
## Residual standard error: 5300000 on 20 degrees of freedom
##
## Number of iterations to convergence: 0
## Achieved convergence tolerance: 6.67e-07
#預測圖
ggplot(dta, aes(year, population))+
geom_point()+ #原始資料
scale_x_continuous( breaks = seq(1790, 2010, by = 10))+
geom_line(aes(year,fitted(m1)),col='red')+ #預測值
theme_bw()

Q6
#
# cochlear implant eyetracking data example
#
# set options
options(digits = 4, show.signif.stars = FALSE)
# load packages for later use
pacman::p_load(bdots, tidyverse, nlme)
# load data
data(ci)
str(ci)
## 'data.frame': 108216 obs. of 5 variables:
## $ protocol : Factor w/ 2 levels "CI","NH": 2 2 2 2 2 2 2 2 2 2 ...
## $ Subject : int 36 36 36 36 36 36 36 36 36 36 ...
## $ Time : int 0 4 8 12 16 20 24 28 32 36 ...
## $ Fixations: num 0 0 0 0 0 0 0 0 0 0 ...
## $ LookType : Factor w/ 4 levels "Cohort","Rhyme",..: 1 1 1 1 1 1 1 1 1 1 ...
# set plot themes to black-n-white
ot <- theme_set(theme_bw())
#ci$Subject <- as.factor(ci$Subject)
ggplot(ci, aes(Time, Fixations)) +
geom_point(size=rel(.1), pch = 1, alpha = .2) +
facet_grid(protocol ~ LookType)

# select target looking time course data
dta_t <- subset(ci, LookType == "Target")
# order data by subject , time
dta_t <- dta_t[order(dta_t$Subject, dta_t$Time, decreasing = F), ]
# have a look
head(dta_t)
## protocol Subject Time Fixations LookType
## 39079 CI 2 0 0 Target
## 39080 CI 2 4 0 Target
## 39081 CI 2 8 0 Target
## 39082 CI 2 12 0 Target
## 39083 CI 2 16 0 Target
## 39084 CI 2 20 0 Target
# convert to groupedData object for nlme processing
dtag <- groupedData(Fixations ~ Time | Subject, outer = ~ protocol,
data = as.data.frame(dta_t),
labels = list(x = "Time", y = "Fixations"),
units = list(x = "(ms)", y = "(proportion)") )
# lattice plot by subject
plot(dtag, pch = 20, aspect = .89)

# plot by subject given group = protocol
p <- plot(dtag, outer = TRUE, cex = .1, layout = c(2,1), aspect = .89) #numeric, giving the aspect ratio y/x
update(p, legend = NULL)

##
# define the 3-parameter curve fitting function
fx <- function(x, P, S, C) {
P / (1 + exp(S*(C - x)/P))
}
#
# fit nls separately for all subjects
#
m0 <- nlsList(Fixations ~ fx(Time, P, S, C),
start = c(P = 0.87, S = 0.004, C = 757.8),
data = dtag)
# results of parameter estimates
coef(m0)
## P S C
## 24 0.5571 0.004108 708.9
## 81 0.7501 0.004582 804.5
## 6 0.7480 0.004294 801.2
## 90 0.7795 0.004376 780.4
## 2 0.7770 0.003588 811.9
## 41 0.8007 0.006199 728.2
## 84 0.8190 0.004794 792.4
## 30 0.8259 0.005005 819.4
## 16 0.8409 0.004715 804.8
## 10 0.8438 0.004999 788.8
## 23 0.8913 0.005303 924.2
## 75 0.8929 0.005409 805.3
## 79 0.9074 0.006026 698.8
## 48 0.8881 0.004349 792.9
## 76 0.8908 0.005846 761.5
## 4 0.9082 0.006417 736.7
## 74 0.9020 0.004945 762.7
## 72 0.8887 0.007259 822.3
## 17 0.9086 0.005659 788.8
## 29 0.9401 0.006854 777.9
## 19 0.9224 0.007384 709.1
## 82 0.9483 0.007750 734.1
## 83 0.9332 0.009380 761.1
## 94 0.9353 0.008019 735.2
## 22 0.9474 0.005805 814.3
## 86 0.9674 0.007940 690.5
## 99 0.9681 0.007187 817.4
## 3 0.9716 0.007568 677.7
## 68 0.6759 0.004333 745.9
## 64 0.8652 0.007813 761.6
## 55 0.8759 0.007716 711.8
## 98 0.8861 0.005607 744.8
## 100 0.8969 0.007230 795.8
## 51 0.9269 0.006000 705.6
## 105 0.9254 0.007172 685.0
## 38 0.9375 0.008724 696.7
## 40 0.9477 0.007433 721.8
## 65 0.9450 0.009253 671.1
## 42 0.9622 0.008466 622.0
## 71 0.9539 0.008714 658.7
## 63 0.9633 0.007947 671.7
## 95 0.9625 0.007309 725.4
## 66 0.9669 0.008559 777.7
## 106 0.9630 0.005560 743.0
## 56 0.9674 0.007671 681.7
## 60 0.9673 0.008303 680.0
## 85 0.9713 0.007033 690.3
## 102 0.9636 0.008226 722.6
## 36 0.9795 0.007957 698.8
## 61 0.9684 0.007889 683.7
## 73 0.9773 0.008403 673.0
## 47 0.9812 0.010171 704.2
## 53 0.9811 0.009324 722.4
## 58 0.9768 0.007508 682.5
# covert fitting separately by subject to subjects as random effects
m1 <- nlme(m0)
# extract mean parameters values to use as initial values
prm0 <- round(unlist(lapply(m1$coef$fixed, mean)), digits=3)
#
# update to let parameters dependent on protocol
# and simplify the covariance structure to a simple separate
# variances for each one of the parameters
#
m2 <- update(m1,
fixed = list(P + S + C ~ protocol),
random = pdDiag(P + S + C ~ 1),
groups = ~ Subject,
start = list(fixed = c(P = c(prm0[1], 0),
S = c(prm0[2], 0),
C = c(prm0[3], 0))))
#
# update model 2 to include auto-regressive dependence over time
# results have convergence issues - takes a few minutes
# set lmeControl options - change optimizer, etc.
#
m3 <- update(m2, corr = corAR1(0.95, form = ~ 1 | Subject),
control = lmeControl(opt = 'optim',
niterEM = 500,
maxIter = 1e6,
msMaxIter = 1e3))
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
# show summary
summary(m3)
## Nonlinear mixed-effects model fit by maximum likelihood
## Model: Fixations ~ fx(Time, P, S, C)
## Data: dtag
## AIC BIC logLik
## -244799 -244709 122411
##
## Random effects:
## Formula: list(P ~ 1, S ~ 1, C ~ 1)
## Level: Subject
## Structure: Diagonal
## P.(Intercept) S.(Intercept) C.(Intercept) Residual
## StdDev: 0.07106 0.001399 46.43 0.02976
##
## Correlation Structure: AR(1)
## Formula: ~1 | Subject
## Parameter estimate(s):
## Phi
## 0.9962
## Fixed effects: list(P + S + C ~ protocol)
## Value Std.Error DF t-value p-value
## P.(Intercept) 0.9 0.014 26995 61.37 0.0000
## P.protocolNH 0.1 0.021 26995 3.17 0.0015
## S.(Intercept) 0.0 0.000 26995 21.23 0.0000
## S.protocolNH 0.0 0.000 26995 4.49 0.0000
## C.(Intercept) 754.6 9.312 26995 81.04 0.0000
## C.protocolNH -69.0 13.205 26995 -5.23 0.0000
## Correlation:
## P.(In) P.prNH S.(In) S.prNH C.(In)
## P.protocolNH -0.696
## S.(Intercept) 0.007 -0.005
## S.protocolNH -0.005 0.008 -0.692
## C.(Intercept) 0.012 -0.008 0.001 0.000
## C.protocolNH -0.008 0.008 -0.001 0.001 -0.705
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.30299 -1.01280 -0.47437 -0.06979 1.58923
##
## Number of Observations: 27054
## Number of Groups: 54
#
# check the auto-correlation function of residuals
#
plot(ACF(m3, resType = "n"), alpha = 0.05)

# residual plot to identify outlier
plot(m3, id = 0.05, pch = '.', adj = -0.1, cex = .4)

# residual plot by protocol
plot(m3, resid(., type = "p") ~ fitted(.) | protocol, abline = 0, pch = '.')

# residual plot by subject
plot(m3, resid(., type = "p") ~ fitted(.) | Subject, abline = 0, pch = '.')

# residual plot by subject
plot(m3, resid(., type = "p") ~ fitted(.) | Subject*protocol,
abline = 0, pch = '.')

# plot fitted lines over observations by subject
plot(augPred(m3, level = 0:1))

# fortify data with fitted values
dta_t$yhat <- fitted(m3)
# Normal group
ggplot(subset(dta_t, protocol == "NH"), aes(Time, Fixations)) +
geom_point(size = rel(.5), pch = 1, col = "skyblue") +
geom_line(aes(y = yhat, group = Subject)) +
coord_cartesian(ylim = c(0, 1)) +
facet_wrap(~ Subject, ncol = 7) +
labs(x = "Time (ms)", y = "Proportion of fixations") +
theme(legend.position = "NONE")

# implant group
ggplot(subset(dta_t, protocol == "CI"), aes(Time, Fixations)) +
geom_point(size = rel(.5), pch = 1, col = "pink") +
geom_line(aes(y = yhat, group = Subject)) +
coord_cartesian(ylim = c(0, 1)) +
facet_wrap(~ Subject, ncol = 7) +
labs(x = "Time (ms)", y = "Proportion of fixations") +
theme(legend.position = "NONE")

###