age & vocabulary size

options(digits = 4, show.signif.stars = FALSE)
pacman::p_load(tidyverse, nlme,segmented)
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/age_vocab.txt", h = T)
str(dta)
## 'data.frame':    15 obs. of  2 variables:
##  $ Month   : int  8 10 12 15 18 21 24 30 36 42 ...
##  $ Voc_size: int  0 1 3 19 22 118 272 446 896 1222 ...
head(dta)
##   Month Voc_size
## 1     8        0
## 2    10        1
## 3    12        3
## 4    15       19
## 5    18       22
## 6    21      118
#看看資料
ggplot(dta, aes(Month, Voc_size)) +
  geom_point(pch = 1, size = rel(1.5)) +
  geom_line() +
  labs(x = "age in month", y = "vocabulary size")+
  theme_bw()

## set initial values for the paramters
 # 不知道怎麼找起始值,最後我偷看老師的報表
 # VocSizei = β1 exp(β2 exp(β3 month)),   i = 1, 2 ,..., n
b_init <- c(b1 = 2900, b2 = -10, b3=-0.05)
summary(m1 <- nls(Voc_size ~ b1 * exp(b2 * exp(b3 * Month))
                  , data = dta, start = b_init))
## 
## Formula: Voc_size ~ b1 * exp(b2 * exp(b3 * Month))
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)
## b1  2.91e+03   9.91e+01   29.31  1.6e-12
## b2 -1.02e+01   1.05e+00   -9.72  4.9e-07
## b3 -5.81e-02   3.45e-03  -16.87  1.0e-09
## 
## Residual standard error: 45.2 on 12 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 2.98e-06
plot(m1)

# show that we are still doing least squares estimation
plot(dta$Voc_size ~ dta$Month,                                #原始資料
     xlab = "age in month", ylab = "vocabulary size", pch=16, col="blue")
s <- seq(0, 80, length = 16)
lines(s, predict(m1, list(Month = s)), col="red", lwd = 0.8)  #fitted value 畫成線

 # VocSizei = β1 / (1 + ((β1 - β2) / β2) × exp(β3 month)),   i = 1, 2 ,..., n,
b2_init <- c(b1 = 2500, b2 = 19.5, b3=-0.1)
summary(m2 <- nls(Voc_size ~ b1 * b1/(1 + (((b1 - b2)/b2) * exp(b3 * Month)))
                  , data = dta, start = b2_init))
## 
## Formula: Voc_size ~ b1 * b1/(1 + (((b1 - b2)/b2) * exp(b3 * Month)))
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)
## b1 50.20070    0.94246   53.27  1.3e-15
## b2  0.38761    0.13722    2.82    0.015
## b3 -0.11164    0.00952  -11.73  6.2e-08
## 
## Residual standard error: 89 on 12 degrees of freedom
## 
## Number of iterations to convergence: 13 
## Achieved convergence tolerance: 4.1e-06
plot(m2)

free call

dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/freeRecall.asc", h = T)
str(dta)
## 'data.frame':    20 obs. of  3 variables:
##  $ grp  : Factor w/ 2 levels "C","E": 1 1 1 1 1 1 1 1 1 1 ...
##  $ trial: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ ncr  : num  7.9 10.9 11.9 13 14.2 14.2 14.7 15.1 14.8 15.2 ...
#看看資料
ggplot(dta, aes(trial, ncr, color = grp)) +
  geom_point(size = rel(3)) +
  stat_smooth(aes(group = grp), method =  "loess", se = F )+
  labs(x = "Trials in List B", y = "Mean Number of correct responses") +
  scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  scale_y_continuous(limits = c(5, 20), breaks = seq(5, 20, by = 2)) +
  scale_color_manual(values = c("light blue", "orange")) + 
  theme_bw()

## CRi = α × exp(β sqrt(triali)),
 # where α is starting performance and β is the rate of improvement over trial.
summary(m1 <- nls(ncr ~ a[grp] * exp(b[grp] * sqrt(trial)), data = dta,
          start = list(a = c(5, 10), b = c(log(12/5),log(12/10)))))  #亂看亂定的斜率><
## 
## Formula: ncr ~ a[grp] * exp(b[grp] * sqrt(trial))
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)
## a1   7.7416     0.5448   14.21  1.7e-10
## a2  10.3086     0.6569   15.69  3.9e-11
## b1   0.2322     0.0280    8.31  3.4e-07
## b2   0.1212     0.0261    4.64  0.00027
## 
## Residual standard error: 0.727 on 16 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 7.04e-06
#預測圖
ggplot(dta, aes(trial, ncr, color = grp))+
  geom_point()+       #原始資料
  scale_x_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  scale_y_continuous(limits = c(5, 20), breaks = seq(5, 20, by = 2)) +
  geom_line(aes(trial,fitted(m1)))+    #預測值
  labs(x = "Trials in List B", y = "Mean Number of correct responses") +
  scale_color_manual(values = c("light blue", "orange")) +
  theme_bw()

continuous monitoring

dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/continuousMonitoring.asc", h = T)
str(dta)
## 'data.frame':    31 obs. of  2 variables:
##  $ rate    : num  1.5 2 2 2.5 2 1.5 2.5 3.5 3.5 4.5 ...
##  $ accuracy: num  28 29 32 33 39 49 44 22 23 47 ...
# 看看資料
ggplot(dta, aes(rate, accuracy)) +
  geom_point() +
  labs(x = "Rate of stimulus change (ms/100)", y = "Percentage accuracy")+
  theme_bw()

#use Michaelis-Menten-type equation
summary(m1 <- nls(accuracy ~ SSmicmen(rate, phi1, phi2), data = dta))
## 
## Formula: accuracy ~ SSmicmen(rate, phi1, phi2)
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)
## phi1   89.819      3.768   23.84  < 2e-16
## phi2    4.084      0.586    6.97  1.1e-07
## 
## Residual standard error: 8 on 29 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 7.82e-06
#預測圖
plot(dta$accuracy ~ dta$rate,                                #原始資料
     xlab = "Rate of stimulus change (ms/100)", ylab = " Percentage accuracy ", 
     pch=16, col="blue")
s <- seq(0, 60, length = 35)
lines(s, predict(m1, list(rate = s)), col="red", lwd = 0.8, lty=2)  #fitted value 畫成線

national population

## The logistic model (in the following form) is one of the models for population growth:
 # Population at time t = β1/ (1 + exp(β2 - β3 t )),
 # Where β1 is the upper bound for population, β2 is the population size at time 0, 
 # and β3 (> 0) governs the rate of growth. Fit the model to the data us_national_population{historydata} and draw conclusions.

require(historydata)
## Loading required package: historydata
dta<-as.data.frame(us_national_population)
str(dta)
## 'data.frame':    23 obs. of  2 variables:
##  $ year      : int  1790 1800 1810 1820 1830 1840 1850 1860 1870 1880 ...
##  $ population: int  3929625 5308483 7239881 9638239 12860702 17063353 23191876 31443321 38558371 50189209 ...
#看看資料
ggplot(dta, aes(y=population, x=year)) +
  geom_point()  +
  scale_x_continuous( breaks = seq(1790, 2010, by = 10))+
  labs(x = "Time (in minutes)", y = "Sensitivity (logarithmic unit)")+
  theme_bw()

#modeling
summary(m1<-nls(population ~ SSlogis(year,phi1,phi2,phi3), data=dta))
## 
## Formula: population ~ SSlogis(year, phi1, phi2, phi3)
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)
## phi1 4.99e+08   3.95e+07    12.6  5.3e-11
## phi2 1.99e+03   7.81e+00   254.7  < 2e-16
## phi3 4.85e+01   2.17e+00    22.4  1.3e-15
## 
## Residual standard error: 5300000 on 20 degrees of freedom
## 
## Number of iterations to convergence: 0 
## Achieved convergence tolerance: 6.67e-07
#預測圖
ggplot(dta, aes(year, population))+
  geom_point()+    #原始資料
  scale_x_continuous( breaks = seq(1790, 2010, by = 10))+
  geom_line(aes(year,fitted(m1)),col='red')+    #預測值
  theme_bw()

Q6

#
# cochlear implant eyetracking data example
#

# set options
options(digits = 4, show.signif.stars = FALSE)

# load packages for later use
pacman::p_load(bdots, tidyverse, nlme)

# load data
data(ci)

str(ci)
## 'data.frame':    108216 obs. of  5 variables:
##  $ protocol : Factor w/ 2 levels "CI","NH": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Subject  : int  36 36 36 36 36 36 36 36 36 36 ...
##  $ Time     : int  0 4 8 12 16 20 24 28 32 36 ...
##  $ Fixations: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ LookType : Factor w/ 4 levels "Cohort","Rhyme",..: 1 1 1 1 1 1 1 1 1 1 ...
# set plot themes to black-n-white
ot <- theme_set(theme_bw())

#ci$Subject <- as.factor(ci$Subject)
ggplot(ci, aes(Time, Fixations)) +
  geom_point(size=rel(.1), pch = 1, alpha = .2) +
  facet_grid(protocol ~ LookType)

# select target looking time course data
dta_t <- subset(ci, LookType == "Target")

# order data by subject , time
dta_t <- dta_t[order(dta_t$Subject, dta_t$Time, decreasing = F), ]

# have a look
head(dta_t)
##       protocol Subject Time Fixations LookType
## 39079       CI       2    0         0   Target
## 39080       CI       2    4         0   Target
## 39081       CI       2    8         0   Target
## 39082       CI       2   12         0   Target
## 39083       CI       2   16         0   Target
## 39084       CI       2   20         0   Target
# convert to groupedData object for nlme processing
dtag <- groupedData(Fixations ~ Time | Subject, outer = ~ protocol,
                    data = as.data.frame(dta_t),
                    labels = list(x = "Time", y = "Fixations"),
                    units = list(x = "(ms)", y = "(proportion)") )

# lattice plot by subject
plot(dtag, pch = 20, aspect = .89)

# plot by subject given group = protocol
p <- plot(dtag, outer = TRUE, cex = .1, layout = c(2,1), aspect = .89) #numeric, giving the aspect ratio y/x
update(p, legend = NULL)

##
# define the 3-parameter curve fitting function
fx <- function(x, P, S, C) {
  P / (1 + exp(S*(C - x)/P))
}
#
# fit nls separately for all subjects
#
m0 <- nlsList(Fixations ~ fx(Time, P, S, C),
              start = c(P = 0.87, S = 0.004, C = 757.8),
              data = dtag)

# results of parameter estimates
coef(m0)
##          P        S     C
## 24  0.5571 0.004108 708.9
## 81  0.7501 0.004582 804.5
## 6   0.7480 0.004294 801.2
## 90  0.7795 0.004376 780.4
## 2   0.7770 0.003588 811.9
## 41  0.8007 0.006199 728.2
## 84  0.8190 0.004794 792.4
## 30  0.8259 0.005005 819.4
## 16  0.8409 0.004715 804.8
## 10  0.8438 0.004999 788.8
## 23  0.8913 0.005303 924.2
## 75  0.8929 0.005409 805.3
## 79  0.9074 0.006026 698.8
## 48  0.8881 0.004349 792.9
## 76  0.8908 0.005846 761.5
## 4   0.9082 0.006417 736.7
## 74  0.9020 0.004945 762.7
## 72  0.8887 0.007259 822.3
## 17  0.9086 0.005659 788.8
## 29  0.9401 0.006854 777.9
## 19  0.9224 0.007384 709.1
## 82  0.9483 0.007750 734.1
## 83  0.9332 0.009380 761.1
## 94  0.9353 0.008019 735.2
## 22  0.9474 0.005805 814.3
## 86  0.9674 0.007940 690.5
## 99  0.9681 0.007187 817.4
## 3   0.9716 0.007568 677.7
## 68  0.6759 0.004333 745.9
## 64  0.8652 0.007813 761.6
## 55  0.8759 0.007716 711.8
## 98  0.8861 0.005607 744.8
## 100 0.8969 0.007230 795.8
## 51  0.9269 0.006000 705.6
## 105 0.9254 0.007172 685.0
## 38  0.9375 0.008724 696.7
## 40  0.9477 0.007433 721.8
## 65  0.9450 0.009253 671.1
## 42  0.9622 0.008466 622.0
## 71  0.9539 0.008714 658.7
## 63  0.9633 0.007947 671.7
## 95  0.9625 0.007309 725.4
## 66  0.9669 0.008559 777.7
## 106 0.9630 0.005560 743.0
## 56  0.9674 0.007671 681.7
## 60  0.9673 0.008303 680.0
## 85  0.9713 0.007033 690.3
## 102 0.9636 0.008226 722.6
## 36  0.9795 0.007957 698.8
## 61  0.9684 0.007889 683.7
## 73  0.9773 0.008403 673.0
## 47  0.9812 0.010171 704.2
## 53  0.9811 0.009324 722.4
## 58  0.9768 0.007508 682.5
# covert fitting separately by subject to subjects as random effects
m1 <- nlme(m0)

# extract mean parameters values to use as initial values
prm0 <- round(unlist(lapply(m1$coef$fixed, mean)), digits=3)

#
# update to let parameters dependent on protocol
# and simplify the covariance structure to a simple separate
# variances for each one of the parameters
#
m2 <- update(m1, 
             fixed = list(P + S + C ~ protocol),
             random = pdDiag(P + S + C ~ 1),
             groups = ~ Subject, 
             start = list(fixed = c(P = c(prm0[1], 0), 
                                    S = c(prm0[2], 0), 
                                    C = c(prm0[3], 0))))

#  
# update model 2 to include auto-regressive dependence over time
# results have convergence issues - takes a few minutes
# set lmeControl options - change optimizer, etc.
#
m3 <- update(m2, corr = corAR1(0.95, form = ~ 1 | Subject),
             control = lmeControl(opt = 'optim', 
                                  niterEM = 500,
                                  maxIter = 1e6,
                                  msMaxIter = 1e3))
## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value

## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value

## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value

## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value

## Warning in nlm(f = function(nlmePars) -logLik(nlmeSt, nlmePars), p =
## c(coef(nlmeSt)), : NA/Inf replaced by maximum positive value
# show summary
summary(m3)
## Nonlinear mixed-effects model fit by maximum likelihood
##   Model: Fixations ~ fx(Time, P, S, C) 
##  Data: dtag 
##       AIC     BIC logLik
##   -244799 -244709 122411
## 
## Random effects:
##  Formula: list(P ~ 1, S ~ 1, C ~ 1)
##  Level: Subject
##  Structure: Diagonal
##         P.(Intercept) S.(Intercept) C.(Intercept) Residual
## StdDev:       0.07106      0.001399         46.43  0.02976
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | Subject 
##  Parameter estimate(s):
##    Phi 
## 0.9962 
## Fixed effects: list(P + S + C ~ protocol) 
##               Value Std.Error    DF t-value p-value
## P.(Intercept)   0.9     0.014 26995   61.37  0.0000
## P.protocolNH    0.1     0.021 26995    3.17  0.0015
## S.(Intercept)   0.0     0.000 26995   21.23  0.0000
## S.protocolNH    0.0     0.000 26995    4.49  0.0000
## C.(Intercept) 754.6     9.312 26995   81.04  0.0000
## C.protocolNH  -69.0    13.205 26995   -5.23  0.0000
##  Correlation: 
##               P.(In) P.prNH S.(In) S.prNH C.(In)
## P.protocolNH  -0.696                            
## S.(Intercept)  0.007 -0.005                     
## S.protocolNH  -0.005  0.008 -0.692              
## C.(Intercept)  0.012 -0.008  0.001  0.000       
## C.protocolNH  -0.008  0.008 -0.001  0.001 -0.705
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -3.30299 -1.01280 -0.47437 -0.06979  1.58923 
## 
## Number of Observations: 27054
## Number of Groups: 54
#
# check the auto-correlation function of residuals
#
plot(ACF(m3, resType = "n"), alpha = 0.05)

# residual plot to identify outlier
plot(m3, id = 0.05, pch = '.', adj = -0.1, cex = .4)

# residual plot by protocol
plot(m3, resid(., type = "p") ~ fitted(.) | protocol, abline = 0, pch = '.')

# residual plot by subject
plot(m3, resid(., type = "p") ~ fitted(.) | Subject, abline = 0, pch = '.')

# residual plot by subject
plot(m3, resid(., type = "p") ~ fitted(.) | Subject*protocol, 
     abline = 0, pch = '.')

# plot fitted lines over observations by subject
plot(augPred(m3, level = 0:1))

# fortify data with fitted values
dta_t$yhat <- fitted(m3)

# Normal group
ggplot(subset(dta_t, protocol == "NH"), aes(Time, Fixations)) +
  geom_point(size = rel(.5), pch = 1, col = "skyblue") +
  geom_line(aes(y = yhat, group = Subject)) +
  coord_cartesian(ylim = c(0, 1)) +
  facet_wrap(~ Subject, ncol = 7) +
  labs(x = "Time (ms)", y = "Proportion of fixations") +
  theme(legend.position = "NONE")

# implant group
ggplot(subset(dta_t, protocol == "CI"), aes(Time, Fixations)) +
  geom_point(size = rel(.5), pch = 1, col = "pink") +
  geom_line(aes(y = yhat, group = Subject)) +
  coord_cartesian(ylim = c(0, 1)) +
  facet_wrap(~ Subject, ncol = 7) +
  labs(x = "Time (ms)", y = "Proportion of fixations") +
  theme(legend.position = "NONE")

###