1 Introduction

The data set is from a psychiatric study focusing on the clinical response over time in 66 depressed patients. Depression is often classified into two types: non-endogenous depression, which is associated with some tragic life event such as the death of a close friend or family member, or endogenous depression, which is not a result of any specific event. Antidepressant medications are held to be more effective for endogenous depression. In this sample, 29 patients were diagnosed as non-endogenous, and 37 patients were endogenous.

Following a placebo period of 1 week, patients received 225 mg/day doses of imipramine for 4 weeks. Patients were rated with the Hamilton depression (HD) rating scale twice during the baseline placebo week (at the start and end of this week), as well as at the end of each of the 4 treatment weeks of the study.

The number of patients with all measures at each of the weeks fluctuated. Only 46 had complete data at all time points.

For the 17-item version, score on the Hamilton Scale goes from 0 to 54. One suggestion is that scores between 0 and 6 indicate a normal person with regard to depression, scores between 7 and 17 indicate mild depression, scores between 18 and 24 indicate moderate depression, and scores over 24 indicate severe depression.

2 Data

# data input
# 按照"ID", "Endog", "week", "HamD"排列
# 使用names()重新命名
dta <- read.csv("C:/Users/user/Desktop/CCU/class/Multilevel in R/DAY3/Homework/Homework 2/riesby_data.csv")
dta <- dta[, c("ID", "Endog", "week", "HamD")]
names(dta) <- c("ID", "Endogenous", "Week", "Score")

# 顯示前六項的資料
head(dta)

   ID Endogenous Week Score
1 101          0    0    26
2 101          0    1    22
3 101          0    2    18
4 101          0    3     7
5 101          0    4     4
6 101          0    5     3

# 用str()列出資料內每個欄位的狀態
str(dta)

'data.frame':   396 obs. of  4 variables:
 $ ID        : int  101 101 101 101 101 101 103 103 103 103 ...
 $ Endogenous: int  0 0 0 0 0 0 0 0 0 0 ...
 $ Week      : int  0 1 2 3 4 5 0 1 2 3 ...
 $ Score     : int  26 22 18 7 4 3 33 24 15 24 ...

# verify the number of endogenous cases
# 資料處理中常用到的函式，簡單說有點類似sql語言中的group by，可以按照要求把資料打組聚合，然後對聚合以後的資料進行加和、求平均等
aggregate(Endogenous ~ ID, data=dta, mean)[,2] |> sum()

[1] 37

# make a copy of the original data in the long format
dtaL <- dta %>%                                        
 dplyr::filter(!is.na(Score)) %>%          # remove NA or missing scores
 dplyr::mutate(ID = as.factor(ID),
               Endogenous = as.factor(Endogenous) %>% 
               forcats::fct_recode("N" = "0",
                                   "Y" = "1")) %>%
 dplyr::arrange(ID, Week)                     # sort observations

# long to wide format
dtaw <- dtaL %>%     
 tidyr::pivot_wider(names_from = Week,
                    names_prefix = "Score_",
                    values_from = Score)

3 Summary statistics

# 有Depression的人為26.1%(37人)、沒有的為43.9%(29人)
dtaw %>% 
  furniture::table1("Endogenous Depression" = Endogenous, 
                    align="c",
                    output = "markdown")

	Mean/Count (SD/%)
	n = 66
Endogenous Depression
N	29 (43.9%)
Y	37 (56.1%)

# number of patients by week
# 製作列表顯示一週有幾個病人
dtaL %>% 
  dplyr::group_by(Week) %>% 
  dplyr::tally() %>%
  knitr::kable()

Week	n
0	61
1	63
2	65
3	65
4	63
5	58

3.1 Missing data

# which indices are true and return array indices
which(is.na(dtaw), arr.ind=TRUE)

      row col
 [1,]   9   3
 [2,]  10   3
 [3,]  12   3
 [4,]  40   3
 [5,]  65   3
 [6,]  24   4
 [7,]  39   4
 [8,]  66   4
 [9,]  35   5
[10,]  18   6
[11,]   6   7
[12,]  23   7
[13,]  66   7
[14,]   5   8
[15,]   8   8
[16,]  22   8
[17,]  26   8
[18,]  30   8
[19,]  43   8
[20,]  50   8
[21,]  61   8

# compare original data set with missing removed 
# setdiff（差），求向量x與向量y中不同的元素(只取x中不同的元素)
# 因此，以下圖表表示刪除missing跟沒刪除之間的差別
dtaw %>%
     dplyr::setdiff(., na.omit(.))

# A tibble: 20 x 8
   ID    Endogenous Score_0 Score_1 Score_2 Score_3 Score_4 Score_5
   <fct> <fct>        <int>   <int>   <int>   <int>   <int>   <int>
 1 106   Y               21      25      23      18      20      NA
 2 107   Y               21      21      16      19      NA       6
 3 113   N               21      23      19      23      23      NA
 4 114   N               NA      17      11      13       7       7
 5 115   Y               NA      16      16      16      16      11
 6 118   Y               NA      26      18      18      14      11
 7 304   Y               21      27      29      NA      12      24
 8 310   Y               24      19      11       7       6      NA
 9 311   Y               20      16      21      17      NA      15
10 312   Y               17      NA      18      17      17       6
11 315   Y               27      21      17      13       5      NA
12 322   Y               28      21      25      32      34      NA
13 334   N               31      25      NA       7       8      11
14 339   Y               27      NA      14      12      11      12
15 344   Y               NA      21      12      13      13      18
16 347   Y               18      15      14      10       8      NA
17 354   Y               28      27      27      26      23      NA
18 604   N               27      27      13       5       7      NA
19 609   Y               NA      25      22      14      15       2
20 610   Y               34      NA      33      23      NA      11

# 在缺失值處理上，如果需要快速找出缺失值，或者簡單查看缺失值占比和分布，可以使用 naniar

dtaw %>% 
  naniar::gg_miss_upset(sets = c("Score_5_NA",
                                 "Score_4_NA",
                                 "Score_3_NA",
                                 "Score_2_NA",
                                 "Score_1_NA",
                                 "Score_0_NA"),
                        keep.order = TRUE)

# The furniture R package contains table1 for publication-ready simple and stratified descriptive statistics, tableC for publication-ready correlation matrixes, and other tables 
# 顯示出每個禮拜的描述性統計
# 第二週p=0.095，輕微顯著
dtaw %>%                
  dplyr::group_by(Endogenous) %>% 
  furniture::table1("Baseline" = Score_0,
                    "Week 1" = Score_1,
                    "Week 2" = Score_2,
                    "Week 3" = Score_3,
                    "Week 4" = Score_4,
                    "Week 5" = Score_5,
                    total = TRUE,
                    test = TRUE,
                    na.rm = FALSE,   # default: COMPLETE CASES ONLY
                    digits = 2,
                    align = "c",
                    output = "markdown",
                    caption = "Weekly Hamilton Depression Scores by Diagnosis for all participants")

Weekly Hamilton Depression Scores by Diagnosis for all participants
	Total	N	Y	P-Value
	n = 66	n = 29	n = 37
Baseline				0.301
	23.44 (4.53)	22.79 (4.12)	24.00 (4.85)
Week 1				0.033
	21.84 (4.70)	20.48 (3.83)	23.00 (5.10)
Week 2				0.095
	18.31 (5.49)	17.00 (4.35)	19.30 (6.08)
Week 3				0.23
	16.42 (6.42)	15.34 (6.17)	17.28 (6.56)
Week 4				0.298
	13.62 (6.97)	12.62 (6.72)	14.47 (7.17)
Week 5				0.48
	11.95 (7.22)	11.22 (6.34)	12.58 (7.96)

3.1.1 Problem

Summarize Hamilton depression scores across time by depression type for patients with complete data for all 6 weeks

data_sum_all <- dtaL %>% 
  dplyr::group_by(Endogenous, Week) %>%             # specify the groups
  dplyr::summarise(Score_n    = n(),                # count of valid scores
                   Score_mean = mean(Score),        # mean score
                   Score_sd   = sd(Score),          # standard deviation of scores
                   Score_sem  = Score_sd / sqrt(Score_n)) # se for the mean

data_sum_all |> knitr::kable()

Endogenous	Week	Score_n	Score_mean	Score_sd	Score_sem
N	0	28	22.7857	4.12182	0.778951
N	1	29	20.4828	3.83239	0.711656
N	2	28	17.0000	4.34614	0.821342
N	3	29	15.3448	6.17180	1.146075
N	4	29	12.6207	6.72104	1.248066
N	5	27	11.2222	6.33873	1.219889
Y	0	33	24.0000	4.84768	0.843873
Y	1	34	23.0000	5.09902	0.874475
Y	2	37	19.2973	6.08214	0.999899
Y	3	36	17.2778	6.56228	1.093713
Y	4	34	14.4706	7.16572	1.228911
Y	5	31	12.5806	7.95728	1.429169

3.2 Covariances and correlations

# dplyr · select() : 選要分析的欄位
# cov函式計算的是列與列的協方差，計算共變
dtaw %>% 
  dplyr::select(starts_with("Score_")) %>%  
  cov(use = "pairwise.complete.obs")  %>%  
  round(3)

        Score_0 Score_1 Score_2 Score_3 Score_4 Score_5
Score_0  20.551  10.115  10.139  10.086   7.191   6.278
Score_1  10.115  22.071  12.277  12.550  10.264   7.720
Score_2  10.139  12.277  30.091  25.126  24.626  18.384
Score_3  10.086  12.550  25.126  41.153  37.339  23.992
Score_4   7.191  10.264  24.626  37.339  48.594  30.513
Score_5   6.278   7.720  18.384  23.992  30.513  52.120

3.2.1 Problem

Compute the covariance matrix of the scores over the weeks using complete cases only

3.2.2 Problem

Compute the correlation matrix of the scores over the weeks as long as pair-wise cases are available

# 繪製相關矩陣圖
dtaw %>% 
  dplyr::select(starts_with("Score_")) %>% # just the outcome(s)
  cor(use = "pairwise.complete.obs") %>%   # correlation matrix
  corrplot::corrplot.mixed(upper = "ellipse")

# 針對Endogenous group畫相關矩陣圖
dtaw %>% 
  dplyr::filter(Endogenous == "Y") %>%    # for the Endogenous group
  dplyr::select(starts_with("Score_")) %>%
  cor(use = "pairwise.complete.obs") %>%   
  corrplot::corrplot.mixed(upper = "ellipse")

3.2.3 Problem

Draw the correlation plot for scores over the weeks for the non-endogenous group

4 Visualization

# Use theme_set() to completely override the current theme.
old <- theme_set(theme_minimal())

# 從圖可知，Time weeks (since baseline)對Hamilton Depression Score可能存在副向相關(in no Endogenous depression)
ggplot(dtaL, aes(x = Week, y = Score)) +
  geom_line(aes(group = ID), alpha=.4, size=rel(.4)) +
  facet_grid( ~ Endogenous) +
  scale_y_continuous(limits=c(0, 42), breaks=seq(0, 42, by=6))+
  labs(x="Time (weeks since baseline)",
       y="Hamilton Depression Score",
       subtitle="Endogenous depression")

# 從圖可知，無論是有沒有Endogenous，x和y存在負相關
# 兩者的bar接觸不到1/4，有可能彼此有顯著關係
ggplot(dtaL, aes(x = Week, y = Score, color=Endogenous)) +
  stat_summary(fun.data="mean_se", position=position_dodge(width=0.2)) +
  stat_summary(aes(group=Endogenous, linetype=Endogenous), 
               fun = mean,
               geom = "line",
               position=position_dodge(width=.2)) +
  scale_linetype_manual(values = c("solid", "longdash")) +
  scale_color_manual(values=c('black','gray50'))+
  labs(x="Time (weeks since baseline)",
       y="Mean Hamilton Depression Score")+
  theme(legend.position = "bottom",
        legend.key.width = unit(2, "cm"))

# geom_smooth(method = "loess") 選項命令，給散點圖添加平滑曲線。把觀測數據中變量之間的關係視覺化，用於輔助判斷一個模型是否可以被擬合爲線性關係。
# 從圖一樣可以發現x對上y呈現負相關
ggplot(dtaL, aes(x = Week, y = Score)) +
  geom_line(aes(group = ID), col='gray', alpha=.5, size=rel(.5)) +
  facet_grid( ~ Endogenous) +
  geom_smooth(method="loess", 
              se=FALSE, 
              formula=y~x,
              color="orange") +  
  geom_smooth(method = "lm", 
              formula=y~x,
              se = FALSE, 
              color="skyblue")+
  scale_y_continuous(limits=c(0, 42), breaks=seq(0, 42, by=6))+
  labs(x="Time (weeks since baseline)",
       y="Hamilton Depression Score")

# 從兩個區間可以看到重疊很多，兩者之間並不會因為Endogenous有太大影響
ggplot(dtaL, aes(x = Week,
             y = Score,
             group = Endogenous,
             linetype = Endogenous)) +
  geom_smooth(method = "loess",
              formula=y~x,
              color = "black",
              alpha = .2) +
  scale_linetype_manual(values = c("solid", "longdash")) +
  labs(x = "Time (weeks since baseline)",
       y = "Hamilton Depression Scale",
       linetype = "Endogenous Depression")+
  theme(legend.position = c(1, 1),
        legend.justification = c(1.1, 1.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

5 Models

5.1 Random intercepts only

# 1 is included whether you type it in or not
m0 <- lme4::lmer(Score ~ 1 + Week + (1 | ID), data=dtaL)

# “asis”: 照程式結果輸出
# texreg將r輸出為LaTeX或HTML表
# 結果發現m0 fit
texreg::knitreg(m0, 
                single.row = TRUE,
                stars = numeric(0),
                caption = "Random Intercepts Model",
                caption.above = TRUE,
                custom.note = "Model fit w/ REML")

Random Intercepts Model
	Model 1
(Intercept)	23.55 (0.64)
Week	-2.38 (0.14)
AIC	2294.73
BIC	2310.43
Log Likelihood	-1143.36
Num. obs.	375
Num. groups: ID	66
Var: ID (Intercept)	16.45
Var: Residual	19.10
Model fit w/ REML

5.1.1 Fitted values

dtaL %>% 
  dplyr::mutate(pred_fixed = predict(m0, re.form = NA)) %>% # fixed effects only
  dplyr::mutate(pred_wrand = predict(m0)) %>%               # fixed and random effects together
  ggplot(aes(x = Week, y = Score, group = ID)) +
  geom_line(aes(y        = pred_wrand,
                color    = "BLUP",
                size     = "BLUP",
                linetype = "BLUP")) +
  geom_line(aes(y        = pred_fixed,
                color    = "Marginal",
                size     = "Marginal",
                linetype = "Marginal")) +
  scale_color_manual(name   = "Type of Prediction",
                     values = c("BLUP"     = "gray50",
                                "Marginal" = "blue"))  +
  scale_size_manual(name    = "Type of Prediction",
                    values = c("BLUP"      = .8,
                               "Marginal" = .5))  +
  scale_linetype_manual(name   = "Type of Prediction",
                        values = c("BLUP"     = "dotted",
                                   "Marginal" = "solid")) +
  labs(x = "Weeks since baseline",
       y = "Hamilton Depression Scores")+
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

obs <- dtaL %>% 
  dplyr::group_by(Week) %>% 
  dplyr::summarise(observed = mean(Score, na.rm = TRUE))

# 求模型(m0)的效果估計
effects::Effect(focal.predictors = "Week",
                mod = m0,
                xlevels = list(Week = 0:5)) %>% 
  data.frame() %>% 
  dplyr::rename(estimated = fit) %>% 
  dplyr::left_join(obs, by = "Week") %>% 
  dplyr::select(Week, observed, estimated) %>% 
  dplyr::mutate(diff = observed - estimated) %>% 
  pander::pander(caption = "Observed and Estimated Means")

Observed and Estimated Means
Week	observed	estimated	diff
0	23.44	23.55	-0.109
1	21.84	21.18	0.6652
2	18.31	18.8	-0.4928
3	16.42	16.42	-0.009553
4	13.62	14.05	-0.4303
5	11.95	11.67	0.2745

# m0的內部一致性為0.463(0.319)，非常低
performance::icc(m0)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.463
  Unadjusted ICC: 0.319

# m00 忽略 clusters，僅關注week對上score的線性
# fit a linear model ignoring patient clusters
m00 <- lm(Score ~ Week, data = dtaL)

# 做圖比較M00、m0差別，分別為線性模型與多層次模型(差別在有無clusters)
texreg::knitreg(list(m00, m0),
                custom.model.names = c("Linear model", "Multilevel"),
                single.row = TRUE,
                stars = numeric(0),
                caption = "Random Intercepts Models",
                caption.above = TRUE,
                custom.note = "")

Random Intercepts Models
	Linear model	Multilevel
(Intercept)	23.60 (0.55)	23.55 (0.64)
Week	-2.41 (0.18)	-2.38 (0.14)
R²	0.32
Adj. R²	0.32
Num. obs.	375	375
AIC		2294.73
BIC		2310.43
Log Likelihood		-1143.36
Num. groups: ID		66
Var: ID (Intercept)		16.45
Var: Residual		19.10

# 計算m00的最大概似估計值
# Returns an object of class logLik. This is a number with at least one attribute, "df" (degrees of freedom), giving the number of (estimated) parameters in the model.
# logLik is most commonly used for a model fitted by maximum likelihood, and some uses, e.g.by AIC, assume this. So care is needed where other fit criteria have been used, for example REML (the default for "lme").
logLik(m00)

'log Lik.' -1199.86 (df=3)

# Extract the estimated standard deviation of the errors, the “residual standard deviation” (misnamed also “residual standard error”, e.g., in summary.lm()'s output, from a fitted model).
sigma(m00)^2

[1] 35.3997

VarCorr(m0) %>% print(., comp=c("Variance","Std.Dev."), digits=4)

 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 16.45    4.055   
 Residual             19.10    4.370

# 上一個coding跟這個結果相同，寫法不同
lme4::VarCorr(m0) %>% 
  print(comp = c("Variance", "Std.Dev"))

 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 16.45    4.055   
 Residual             19.10    4.370

5.2 Random slopes and intercepts model

# m1包含固定與隨機效果
m1 <- lme4::lmer(Score ~ 1 + Week + (1 + Week | ID), data = dtaL)

# 比較兩種模型(m0,m1)
# AIC: M0<M1
# BIC: M0>M1
# 粗略看起來M0比較適配
texreg::knitreg(list(m0, m1),
                single.row = TRUE,
                stars = numeric(0),
                custom.model.names = c("Random Intercepts",
                                       " And Random Slopes"),
                caption = "MLM: Models fit w/REML",
                caption.above = TRUE,
                custom.note = "")

MLM: Models fit w/REML
	Random Intercepts	And Random Slopes
(Intercept)	23.55 (0.64)	23.58 (0.55)
Week	-2.38 (0.14)	-2.38 (0.21)
AIC	2294.73	2231.92
BIC	2310.43	2255.48
Log Likelihood	-1143.36	-1109.96
Num. obs.	375	375
Num. groups: ID	66	66
Var: ID (Intercept)	16.45	12.94
Var: Residual	19.10	12.21
Var: ID Week		2.13
Cov: ID (Intercept) Week		-1.48

# 而用ANOVA進行檢定
# 發現M1比較適配，其結果較為顯著
anova(m0, m1, 
      model.names = c("Random Intercepts", "Random Intercepts/Slopes"),
      refit = TRUE) %>% 
  pander::pander(caption = "LRT: Significance of Random Slopes")

refitting model(s) with ML (instead of REML)

LRT: Significance of Random Slopes (continued below)
	npar	AIC	BIC	logLik	deviance	Chisq
Random Intercepts	4	2293	2309	-1143	2285	NA
Random Intercepts/Slopes	6	2231	2255	-1110	2219	66.15

	Df	Pr(>Chisq)
Random Intercepts	NA	NA
Random Intercepts/Slopes	2	4.319e-15

dtaL %>% 
  dplyr::mutate(pred_fixed = predict(m1, re.form = NA)) %>% # fixed effects only
  dplyr::mutate(pred_wrand = predict(m1)) %>%               # fixed and random effects together
  ggplot(aes(x = Week,
             y = Score,
             group = ID)) +
  geom_line(aes(y        = pred_wrand,
                color    = "BLUP",
                size     = "BLUP",
                linetype = "BLUP")) +
  geom_line(aes(y        = pred_fixed,
                color    = "Marginal",
                size     = "Marginal",
                linetype = "Marginal")) +
  scale_color_manual(name   = "Type of Prediction",
                     values = c("BLUP"     = "gray50",
                                "Marginal" = "blue"))  +
  scale_size_manual(name    = "Type of Prediction",
                    values = c("BLUP"      = .7,
                                "Marginal" = .5))  +
  scale_linetype_manual(name   = "Type of Prediction",
                        values = c("BLUP"     = "longdash",
                                   "Marginal" = "solid"))+
  labs(x = "Weeks Since Baseline",
       y = "Hamilton Depression Scores")+
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

# Extract the fixed-effects estimates
fixef(m1)

(Intercept)        Week 
   23.57704    -2.37705

# A generic function to extract the conditional modes of the random effects from a fitted model object. For linear mixed models the conditional modes of the random effects are also the conditional means.
ranef(m1)$ID %>% head()

    (Intercept)      Week
101    1.057202 -2.115138
103    3.670790 -0.483248
104    2.672755 -1.500882
105   -3.041339  0.226450
106    0.315424  1.025475
107   -0.614899 -0.429738

# only the first 6 participants

# coef is a generic function which extracts model coefficients from objects returned by modeling functions
coef(m1)$ID %>% head()

    (Intercept)     Week
101     24.6342 -4.49219
103     27.2478 -2.86030
104     26.2498 -3.87793
105     20.5357 -2.15060
106     23.8925 -1.35157
107     22.9621 -2.80679

coef(m1)$ID %>% 
  ggplot(aes(x = Week,
             y = `(Intercept)`)) +
  geom_point(pch=1, size=rel(.5)) +
  geom_hline(yintercept = fixef(m1)["(Intercept)"],
             linetype = "dotted") +
  geom_vline(xintercept = fixef(m1)["Week"],
             linetype = "dotted") +
  labs(subtitle = "Estimated random effects",
       x = "Weekly Change in Depression (Slopes)",
       y = "Baseline Depression Level (Intercepts)")

5.2.1 Problem

Fit a model with random intercepts and slopes but the slopes and intercepts are uncorrelated. Compare models.

# 加入交互作用
m2 <- lme4::lmer(Score ~ 1 + Week * Endogenous + 
                (1 + Week | ID), 
                data = dtaL)

#  plots regression lines at user-specified levels of a moderator variable to explore interactions.
interactions::interact_plot(m2,
                            pred = Week,
                            modx = Endogenous,
                            interval = TRUE,
                   main.title = "Time by Diagnosis Effect")+
  labs(x="Time (weeks since baseline)",
       y="Mean depression score")+
  theme(legend.position=c(.2,.2))

# 比較M1,M2，相較於M1,M2更加適配
# npar較多，且bic略大
anova(m1, 
      m2, 
      model.names = c("Time only", 
                      "Time X Dx")) %>% 
  pander::pander(caption = "LRT: Significance of Diagnosis by Time")

refitting model(s) with ML (instead of REML)

LRT: Significance of Diagnosis by Time
	npar	AIC	BIC	logLik	deviance	Chisq	Df	Pr(>Chisq)
Time only	6	2231	2255	-1110	2219	NA	NA	NA
Time X Dx	8	2231	2262	-1107	2215	4.108	2	0.1282

5.3 Models with a quadratic time trend

It might be better to augment the data with a column variable coding the quadratic trend in number of weeks since baseline.

# 加入平方項，為二次方程式
m3 <- lme4::lmer(Score ~ 1 + Week + I(Week^2) + 
                 (1 + Week + I(Week^2) | ID), 
                 data = dtaL,
                 control = lmerControl(optimizer = "optimx",    # get it to converge
                 calc.derivs = FALSE,
                 optCtrl = list(method = "nlminb",
                                starttests = FALSE,
                                kkt = FALSE)))

# 比較線性跟二次方程式的差別
texreg::knitreg(list(m1,
                     m3),
                custom.model.names = c("Linear Trend",
                                       "QUadratic Trend"),
                single.row = TRUE,
                stars = numeric(0),
                caption = "Linear versus quadratic trend",
                caption.above = TRUE, 
                custom.note = "")

Linear versus quadratic trend
	Linear Trend	QUadratic Trend
(Intercept)	23.58 (0.55)	23.76 (0.56)
Week	-2.38 (0.21)	-2.63 (0.48)
Week^2		0.05 (0.09)
AIC	2231.92	2231.62
BIC	2255.48	2270.89
Log Likelihood	-1109.96	-1105.81
Num. obs.	375	375
Num. groups: ID	66	66
Var: ID (Intercept)	12.94	10.75
Var: ID Week	2.13	6.86
Cov: ID (Intercept) Week	-1.48	-1.03
Var: Residual	12.21	10.51
Var: ID I(Week^2)		0.20
Cov: ID (Intercept) I(Week^2)		-0.10
Cov: ID Week I(Week^2)		-0.97

#發現m3更加適配(二次方程式)，npar、bic較大，aic較小
anova(m1, m3)

Data: dtaL
Models:
m1: Score ~ 1 + Week + (1 + Week | ID)
m3: Score ~ 1 + Week + I(Week^2) + (1 + Week + I(Week^2) | ID)
   npar  AIC  BIC logLik deviance Chisq Df Pr(>Chisq)
m1    6 2231 2255  -1110     2219                    
m3   10 2228 2267  -1104     2208 11.39  4     0.0225

# 了解m3的固定效果
fixef(m3)

(Intercept)        Week   I(Week^2) 
 23.7607641  -2.6332347   0.0516461

# 了解m3的相關係數
coef(m3)$ID |> head()

    (Intercept)      Week I(Week^2)
101     25.1998 -5.306775  0.153508
103     27.5675 -3.485660  0.128336
104     26.0138 -3.088029 -0.179923
105     20.9825 -2.936237  0.162536
106     23.6232 -0.721623 -0.145820
107     22.6693 -1.943059 -0.187268

# subject 115
fun_115 <- function(Week){
  coef(m3)$ID["115", "(Intercept)"] +
  coef(m3)$ID["115", "Week"] * Week +
  coef(m3)$ID["115", "I(Week^2)"] * Week^2
}

# subject 610
fun_610 <- function(Week){
  coef(m3)$ID["610", "(Intercept)"] +
  coef(m3)$ID["610", "Week"] * Week +
  coef(m3)$ID["610", "I(Week^2)"] * Week^2
}

# 畫圖表示115、610的xy曲線關係，一樣為負相關
dtaL %>% 
  dplyr::mutate(pred_fixed = predict(m3, re.form = NA)) %>% # fixed effects only
  dplyr::mutate(pred_wrand = predict(m3)) %>%               # fixed and random effects together
  ggplot(aes(x = Week,
             y = Score,
             group = ID)) +
  stat_function(fun = fun_115) +          # add cure for ID = 115
  stat_function(fun = fun_610) +          # add cure for ID = 610
  geom_line(aes(y        = pred_fixed),
                color    = "blue",
                size     = 1.25)  +
  labs(x = "Weeks Since Baseline",
       y = "Hamilton Depression Scores",
       subtitle = "Marginal Mean show in thicker blue\nBLUPs for two of the participant in thinner black")+
  theme_minimal()+
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

dtaL %>% 
  dplyr::mutate(pred_fixed = predict(m3, re.form = NA)) %>% # fixed effects only
  dplyr::mutate(pred_wrand = predict(m3)) %>%               # fixed and random effects together
  ggplot(aes(x = Week,
             y = Score,
             group = ID)) +
  geom_line(aes(y        = pred_wrand,
                color    = "BLUP",
                size     = "BLUP",
                linetype = "BLUP")) +
  geom_line(aes(y        = pred_fixed,
                color    = "Marginal",
                size     = "Marginal",
                linetype = "Marginal")) +
  scale_color_manual(name   = "Type of Prediction",
                     values = c("BLUP"     = "gray50",
                                "Marginal" = "blue"))  +
  scale_size_manual(name    = "Type of Prediction",
                    values = c("BLUP"      = .5,
                                "Marginal" = 1.25))  +
  scale_linetype_manual(name   = "Type of Prediction",
                        values = c("BLUP"     = "longdash",
                                   "Marginal" = "solid"))  +
  labs(x = "Weeks Since Baseline",
       y = "Hamilton Depression Scores")+
  theme_minimal()+
  theme(legend.position = c(0, 0),
        legend.justification = c(-0.1, -0.1),
        legend.background = element_rect(color = "black"),
        legend.key.width = unit(1.5, "cm"))

6 References

Hedeker, D. (2004). An introduction to growth modeling. In D. Kaplan (Ed.), Quantitative Methodology for the Social Sciences. Thousand Oaks CA: Sage Publications. pdf

Reisby, N., Gram, L.F., Bech, P. et al. (1977). Imipramine: Clinical effects and pharmacokinetic variability. Psychopharmacology, 54, 263-272.

Riesby

Your NAME

2022-10-06