Stat question dicsussion

how to manage censoring for the cases who lost the follow up.

Here it should be “right censored”: everyone’s observation time is [0,t], t is time of death or censor. Sometimes it can be other conditions like “left censored” (\(t_i\),T), “interval” (\(t_{i1},t_{i2}\)).

1, Kaplan-Meier plotter

2, Cox model: include predictors (e.g. sex), assume proportional hazard ratio.

https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf

library(readxl)
dat <- read_excel(path0)

## New names:
## • `` -> `...3`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`

View(dat)

library(survival)
library("survminer")

## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma

require(coxme)

## Loading required package: coxme
## Loading required package: bdsmatrix
## 
## Attaching package: 'bdsmatrix'
## 
## The following object is masked from 'package:base':
## 
##     backsolve

n<-length(dat$OS)
df<-data.frame(OS = dat$OS, OS_t = dat$OS_Time,
               PFS = dat$PFS, PFS_t = dat$PFS_Time,
               x = rnorm(n,0,1))

# OS, 1 is death
km1<- survfit(Surv(df$OS_t, df$OS)~1)
plot(km1)
summary(km1)

## Call: survfit(formula = Surv(df$OS_t, df$OS) ~ 1)
## 
##   time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   6.30     96       1    0.990  0.0104        0.969        1.000
##   7.93     95       1    0.979  0.0146        0.951        1.000
##   8.53     94       1    0.969  0.0178        0.935        1.000
##   9.40     93       1    0.958  0.0204        0.919        0.999
##  11.30     89       1    0.948  0.0228        0.904        0.993
##  11.47     88       1    0.937  0.0250        0.889        0.987
##  11.97     87       1    0.926  0.0269        0.875        0.980
##  15.00     81       1    0.915  0.0289        0.860        0.973
##  15.83     80       1    0.903  0.0307        0.845        0.965
##  16.03     79       1    0.892  0.0324        0.830        0.958
##  16.10     78       1    0.880  0.0339        0.816        0.949
##  16.90     77       1    0.869  0.0354        0.802        0.941
##  18.23     72       1    0.857  0.0369        0.787        0.932
##  18.30     71       1    0.845  0.0383        0.773        0.923
##  19.20     65       1    0.832  0.0398        0.757        0.914
##  19.63     63       1    0.819  0.0413        0.741        0.904
##  21.63     60       1    0.805  0.0428        0.725        0.893
##  22.33     59       1    0.791  0.0442        0.709        0.883
##  22.67     58       1    0.778  0.0455        0.693        0.872
##  23.93     56       1    0.764  0.0468        0.677        0.861
##  24.60     53       1    0.749  0.0481        0.661        0.850
##  24.90     52       1    0.735  0.0493        0.644        0.838
##  25.03     51       1    0.720  0.0504        0.628        0.826
##  27.10     47       1    0.705  0.0516        0.611        0.814
##  27.23     46       1    0.690  0.0527        0.594        0.801
##  27.77     44       1    0.674  0.0538        0.577        0.788
##  28.27     43       1    0.658  0.0547        0.559        0.775
##  32.10     38       1    0.641  0.0560        0.540        0.761
##  32.90     37       1    0.624  0.0571        0.521        0.746
##  33.13     36       1    0.606  0.0581        0.503        0.732
##  33.20     35       1    0.589  0.0589        0.484        0.717
##  33.37     33       1    0.571  0.0598        0.465        0.701
##  34.07     32       1    0.553  0.0605        0.447        0.686
##  37.40     27       1    0.533  0.0617        0.425        0.669
##  37.83     25       1    0.512  0.0628        0.402        0.651
##  39.43     24       1    0.490  0.0637        0.380        0.632
##  39.73     23       1    0.469  0.0644        0.358        0.614
##  40.43     21       1    0.447  0.0651        0.336        0.594
##  44.97     10       1    0.402  0.0723        0.283        0.572
##  46.83      6       1    0.335  0.0858        0.203        0.554

#cox model: x can be any covariate/predictor
coxfit<- coxph( Surv(OS_t, OS) ~ x, data=df)

# check proportional hazard ratio assumption
test.ph <- cox.zph(coxfit)
ggcoxzph(test.ph)

# cox model result
summary(coxfit)

## Call:
## coxph(formula = Surv(OS_t, OS) ~ x, data = df)
## 
##   n= 96, number of events= 40 
## 
##      coef exp(coef) se(coef)      z Pr(>|z|)  
## x -0.2548    0.7751   0.1495 -1.704   0.0883 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##   exp(coef) exp(-coef) lower .95 upper .95
## x    0.7751       1.29    0.5782     1.039
## 
## Concordance= 0.552  (se = 0.048 )
## Likelihood ratio test= 2.91  on 1 df,   p=0.09
## Wald test            = 2.9  on 1 df,   p=0.09
## Score (logrank) test = 2.91  on 1 df,   p=0.09

# PFS
km2<- survfit(Surv(df$PFS_t, df$PFS)~1)
plot(km1)

#summary(km1)

#cox model: x can be any covariate/predictor
coxfit<- coxph( Surv(PFS_t, PFS) ~ x, data=df)

# check proportional hazard ratio assumption
test.ph <- cox.zph(coxfit)
ggcoxzph(test.ph)

# cox model result
summary(coxfit)

## Call:
## coxph(formula = Surv(PFS_t, PFS) ~ x, data = df)
## 
##   n= 76, number of events= 46 
##    (20 observations deleted due to missingness)
## 
##      coef exp(coef) se(coef)      z Pr(>|z|)  
## x -0.3202    0.7260   0.1452 -2.205   0.0275 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##   exp(coef) exp(-coef) lower .95 upper .95
## x     0.726      1.377    0.5462    0.9651
## 
## Concordance= 0.59  (se = 0.048 )
## Likelihood ratio test= 4.85  on 1 df,   p=0.03
## Wald test            = 4.86  on 1 df,   p=0.03
## Score (logrank) test = 4.88  on 1 df,   p=0.03

# event = 1 for PFS;
# event = 2 for OS;
# event = 0 for censor;
etime <- with(df, ifelse(PFS==1, PFS_t, OS_t))
event <- with(df, ifelse(PFS==1, 1, 2*OS))

etime[which(event==0)] <- pmax( df$OS_t[which(event==0)] ,
                                df$PFS_t[which(event==0)] ,na.rm = T)

event <- factor(event, 0:2, c("censored", "PFS", "death"))

crfit <- survfit(Surv(etime, event) ~ 1, df)
plot(crfit)

#res<- summary(crfit)


### Compare
plot(km1$time,1 - km1$surv,type="l",ylim = c(0,0.8),xlab="time",ylab="cumulative probability",main="Comparison with competing risks")
lines(km2$time,1 - km2$surv,col=2)
lines(crfit$time,crfit$pstate[,2],col=3)
lines(crfit$time,crfit$pstate[,3],col=4)
legend(5,0.7,legend=c("OS","PFS","OS_cr","PFS_cr"),
       col=c(1,2,3,4),lwd=c(1,1,1,1))

What is mixed model in regression

https://www.rensvandeschoot.com/tutorials/lme4/

\(Y_{ij}\) i th class, j th observation. There is correlation between \(Y_{i1},Y_{i2},Y_{i3},...,Y_{iJ}\).

\[Y_{ij} = X_{ij}\beta + b_i + \epsilon_{ij} \]

\(\beta\) is the fixed effect (e.g. sex/time/treatment effect, which is our main interest), \(b_i\) is the random effect (different class/people may have different random baseline value, which is not our interest). Fix + Random = Mixed effect

library(lme4) # for the analysis

## Loading required package: Matrix

library(haven) # to load the SPSS .sav file
library(tidyverse) # needed for data manipulation.

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.1 
## ✔ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tidyr::pack()   masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()

library(RColorBrewer) # needed for some extra colours in one of the graphs
library(lmerTest)# to get p-value estimations that are not part of the standard lme4 packages

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

popular2data <- read_sav(file ="https://github.com/MultiLevelAnalysis/Datasets-third-edition-Multilevel-book/blob/master/chapter%202/popularity/SPSS/popular2.sav?raw=true")

popular2data <- select(popular2data, pupil, class, extrav, sex, texp, popular) # we select just the variables we will use
head(popular2data) # we have a look at the first 6 observations

## # A tibble: 6 × 6
##   pupil class extrav       sex  texp popular
##   <dbl> <dbl>  <dbl> <dbl+lbl> <dbl>   <dbl>
## 1     1     1      5  1 [girl]    24     6.3
## 2     2     1      7  0 [boy]     24     4.9
## 3     3     1      4  1 [girl]    24     5.3
## 4     4     1      3  1 [girl]    24     4.7
## 5     5     1      5  1 [girl]    24     6  
## 6     6     1      4  0 [boy]     24     4.7

interceptonlymodel <- lmer(formula = popular ~ 1 + (1|class),
                           data    = popular2data) #to run the model
summary(interceptonlymodel) #to get paramater estimates.

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + (1 | class)
##    Data: popular2data
## 
## REML criterion at convergence: 6330.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5655 -0.6975  0.0020  0.6758  3.3175 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  class    (Intercept) 0.7021   0.8379  
##  Residual             1.2218   1.1053  
## Number of obs: 2000, groups:  class, 100
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  5.07786    0.08739 98.90973    58.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model1 <- lmer(formula = popular ~ 1 + sex + extrav + (1|class), 
               data    = popular2data,REML = TRUE)
model2 <- lmer(popular ~ 1 + sex + extrav + texp + (1 | class), data=popular2data,REML = TRUE)
model3 <- lmer(formula = popular ~ 1 + sex + extrav + texp + (1 + extrav  | class),data    = popular2data,REML = TRUE)
summary(model2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + sex + extrav + texp + (1 | class)
##    Data: popular2data
## 
## REML criterion at convergence: 4885
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1745 -0.6491 -0.0075  0.6705  3.0078 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  class    (Intercept) 0.2954   0.5435  
##  Residual             0.5920   0.7694  
## Number of obs: 2000, groups:  class, 100
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept) 8.098e-01  1.700e-01 2.264e+02   4.764  3.4e-06 ***
## sex         1.254e+00  3.729e-02 1.948e+03  33.623  < 2e-16 ***
## extrav      4.544e-01  1.616e-02 1.955e+03  28.112  < 2e-16 ***
## texp        8.841e-02  8.764e-03 1.016e+02  10.087  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##        (Intr) sex    extrav
## sex    -0.040              
## extrav -0.589 -0.090       
## texp   -0.802 -0.036  0.139

### hypothesis testing
model1 <- lmer(formula = popular ~ 1 + sex + extrav + (1|class), 
               data    = popular2data,REML = FALSE)
model2 <- lmer(popular ~ 1 + sex + extrav + texp + (1 | class), data=popular2data,REML = FALSE)
model3 <- lmer(formula = popular ~ 1 + sex + extrav + texp + (1 + extrav | class),data    = popular2data,REML = FALSE)

anova(model1,model2,model3)

## Data: popular2data
## Models:
## model1: popular ~ 1 + sex + extrav + (1 | class)
## model2: popular ~ 1 + sex + extrav + texp + (1 | class)
## model3: popular ~ 1 + sex + extrav + texp + (1 + extrav | class)
##        npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## model1    5 4944.0 4972.0 -2467.0   4934.0                         
## model2    6 4874.3 4907.9 -2431.2   4862.3 71.657  1  < 2.2e-16 ***
## model3    8 4828.8 4873.6 -2406.4   4812.8 49.489  2  1.794e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Two datasets statistics

Correlation test: no assumption

pearson cares the numeric value. spearman only cares rank information, (1,2,3) and (1,2,300000) are the same.

Population test:

All “t-test” assumes normal distribution, but usually they are robust to non-normal dist. We should check normal assumption first.

t test assuming equal variance
t test don’t assume equal variance
paired t test
Wilcoxon

x<- c(0.32,0.58,0.42,0.49,0.35,4.9,4.38,4.58,4.75,
  4.65,2.93,2.06,5.94,3.47,3.42,0.07,2.66,0.68,
  1.07,1.09,2.16,0.11,0.36)

y<- c(7.37,13.96,12.72,7.78,2.56,1.89,0.79,1.04,11.71,7.62,7.49,7.33,6.7)


# cor.test(x,y, method=c("spearman"))
# cor.test(x,y, method=c("pearson"))

normal assumption test, null hypothesis=“normal distribution”, p < 0.05 => not normal

### qqnorm is visualization, shapiro test is numeric p value
qqnorm(x)

shapiro.test(x)

## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.88042, p-value = 0.0102

qqnorm(y)

shapiro.test(y)

## 
##  Shapiro-Wilk normality test
## 
## data:  y
## W = 0.91201, p-value = 0.1954

t test assuming equal variance (personally not recommended)

NUll hypothesis “The two populations’ means are equal”

Steps:

1, equal variance test, null hypothesis = “equal variance”; p value < 0.05 => variance not equal.

2, when we don’t reject “equal variance”, we can use t test assuming equal variance

df<-data.frame(value = c(x,y),
                grp = rep(1:2,c(length(x),length(y))))
var.test(value~grp, df)

## 
##  F test to compare two variances
## 
## data:  value by grp
## F = 0.19636, num df = 22, denom df = 12, p-value = 0.0009803
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.06451858 0.51085846
## sample estimates:
## ratio of variances 
##          0.1963589

t.test(x,y,var.equal = T)

## 
##  Two Sample t-test
## 
## data:  x and y
## t = -4.4302, df = 34, p-value = 9.291e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.719715 -2.493395
## sample estimates:
## mean of x mean of y 
##  2.236522  6.843077

t test assuming unequal variance (personally not recommended)

NUll hypothesis “The two populations’ means are equal”

t.test(x,y)

## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = -3.6431, df = 14.713, p-value = 0.002472
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.306274 -1.906837
## sample estimates:
## mean of x mean of y 
##  2.236522  6.843077

Wilcoxon Rank Sum Test

NUll hypothesis “The two populations are equal”

wilcox.test(x,y, alternative = "two.sided")

## 
##  Wilcoxon rank sum exact test
## 
## data:  x and y
## W = 50, p-value = 0.0006587
## alternative hypothesis: true location shift is not equal to 0

paired t test

Only use it when data are paired, so their lengths must be equal. For example, for the same patient, before treatment \(X_i\) and after treatment \(Y_i\).

Null hypothesis “mean of X = mean of Y”. Don’t need to consider variance.

x<-rnorm(20,0,1)
y<-rnorm(20,0,1)
t.test(x-y)

## 
##  One Sample t-test
## 
## data:  x - y
## t = -0.44377, df = 19, p-value = 0.6622
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.7709851  0.5012415
## sample estimates:
##  mean of x 
## -0.1348718

Avery

x<-c(0.422376,0.555784,0.607614,0.781744,0.840055,0.995218,1.090951,1.373964,1.399265,1.599938,1.978858,2.039173,2.305395,2.586751,2.826646,2.955038,3.167083,3.578979,4.067678,4.266187,4.384108,4.518732,5.180756,5.520317,5.848557,6.722273,7.210925,7.29198,8.930193,10.14035,10.32676,11.24256,11.80399,14.9188,15.17216,15.87535,17.9984,19.24664,20.00856,22.84224,23.60682,25.31154,29.38721,30.11277,32.06283,33.92767,35.70798,35.89244,38.22954,40.64752,45.71114,47.97598,51.18917,51.67894,58.33876,64.37144,66.79435,68.9272,80.53041,80.96609,87.36751,103.3995,112.9875,122.5788,128.8399,131.667,139.3232,162.7967,177.5409,213.0073,232.8893,259.1322,284.0513,329.038,361.9655,419.6987,461.9741)

y<-c(6.634894,10.33565,7.045976,3.236468,12.94213,3.883939,5.739301,10.26963,3.982524,3.258092,2.547234,3.361322,3.741736,1.142273,3.127348,2.698529,2.777205,2.824821,2.009283,1.906099,3.013689,1.405149,1.814392,1.568679,1.177385,1.160061,3.969568,1.672434,1.691622,1.074819,1.339342,1.116547,1.308455,1.630997,1.936904,1.604003,1.284585,1.38928,0.940331,0.877433,1.659416,0.16255,0.434923,1.100193,0.79823,0.745545,1.020949,0.890098,0.960144,0.67472,0.702365,0.574157,0.917713,0.46149,0.271931,1.212296,0.448782,0.36205,0.54298,0.964284,0.649908,0.49208,0.351167,0.135533,0.200054,0.460075,0.561182,0.478768,0.146954,0.235475,0.449661,0.332787,0.278518,0.314042,0.429166,0.197705,0.547256)

summary(log(x))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8619  1.4507  2.9962  2.8630  4.2331  6.1355

plot(x,y)

plot(log(x),log(y))

df<-data.frame(x=x,y=y)
lm0<-lm(I(log(y)) ~ I(log(x)),data=df)
(res<-summary(lm0))

## 
## Call:
## lm(formula = I(log(y)) ~ I(log(x)), data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.73320 -0.20505  0.01994  0.27359  0.95200 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.55336    0.09820   15.82   <2e-16 ***
## I(log(x))   -0.50659    0.02891  -17.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4638 on 75 degrees of freedom
## Multiple R-squared:  0.8037, Adjusted R-squared:  0.8011 
## F-statistic: 307.1 on 1 and 75 DF,  p-value: < 2.2e-16

new_df<-data.frame(x= exp(seq(-0.9,6.5,0.1)))
pred<- predict(lm0, newdata = df, interval = 'confidence')
pred<-as.data.frame(pred)
pred<-cbind(pred,df)
head(pred) # confidence interval of log(y): [L,U]

##        fit      lwr      upr        x         y
## 1 1.989975 1.751019 2.228931 0.422376  6.634894
## 2 1.850924 1.626050 2.075798 0.555784 10.335650
## 3 1.805756 1.585406 2.026106 0.607614  7.045976
## 4 1.678101 1.470383 1.885819 0.781744  3.236468
## 5 1.641656 1.437499 1.845814 0.840055 12.942130
## 6 1.555792 1.359932 1.751651 0.995218  3.883939

# Then confidence interval of y is exp( [L,U] )

ggplot(df, aes(y = log(y), x = log(x))) +
  geom_point(position = "jitter") +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(df, aes(y = y, x = x)) +
  geom_point(position = "jitter") +
geom_ribbon(aes(ymin = exp(pred$lwr), ymax = exp(pred$upr)))

Gunjan

library(dplyr)

dat <- read_excel(path0,sheet=4)

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `G2 type patients (N=54)` -> `G2 type patients (N=54)...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `G2 type patients (N=54)` -> `G2 type patients (N=54)...11`
## • `` -> `...12`

#View(dat)
df<- rbind(as.matrix(dat[2:25,1:4]) , as.matrix(dat[2:55,6:9]))
df<- data.frame(value = as.numeric(df), time = rep(1:4,each = (24+54)) )

df$group  <- as.factor(rep(rep(1:2,c(24,54)) ,4))
df$id <- rep(1:78,4)

df$baseline <- rep(df$value[1:78],4)
df$ratio <- df$value / df$baseline


ggplot(df, aes(y = value, x = time, color = group)) +
  geom_point(position = "jitter") +
  geom_smooth(method = "lm", fill = NA)

## `geom_smooth()` using formula = 'y ~ x'

ggplot(df, aes(y = log(value), x = time, color = group)) +
  geom_point(position = "jitter") +
  geom_smooth(method = "lm")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(df, aes(y = log(value), x = time, color = group)) +
  geom_point(position = "jitter") +
  geom_smooth(method = "loess")

## `geom_smooth()` using formula = 'y ~ x'

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.985

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2117e-16

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 0.985

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.2117e-16

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.985

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0188e-16

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 0.985

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0188e-16

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602

ggplot(df%>%filter(time>1), aes(y = log(ratio), x = time, color = group)) +
  geom_point(position = "jitter") +
  geom_smooth(method = "lm", fill = NA)

## `geom_smooth()` using formula = 'y ~ x'

model1 <- lmer(formula = I(log(value)) ~ 1 + time + group + time * group + (1|id), 
               data    = df,REML = TRUE)
summary(model1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + group + time * group + (1 | id)
##    Data: df
## 
## REML criterion at convergence: 372.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8361 -0.3961 -0.0614  0.3978  4.2506 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.43159  0.6570  
##  Residual             0.08672  0.2945  
## Number of obs: 312, groups:  id, 78
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   3.28397    0.15298 114.55341  21.466  < 2e-16 ***
## time         -0.11949    0.02688 232.00000  -4.445 1.36e-05 ***
## group2       -0.08314    0.18386 114.55341  -0.452  0.65198    
## time:group2  -0.12300    0.03231 232.00000  -3.807  0.00018 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time   group2
## time        -0.439              
## group2      -0.832  0.366       
## time:group2  0.366 -0.832 -0.439

model2 <- lmer(formula = I(log(value)) ~ 1 + time  + time : group + (1|id), 
               data    = df,REML = TRUE)
summary(model2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + time:group + (1 | id)
##    Data: df
## 
## REML criterion at convergence: 371
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8384 -0.3995 -0.0482  0.3944  4.2395 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.42782  0.6541  
##  Residual             0.08667  0.2944  
## Number of obs: 312, groups:  id, 78
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   3.22641    0.08457 116.17567  38.152  < 2e-16 ***
## time         -0.11502    0.02500 285.74399  -4.600 6.34e-06 ***
## time:group2  -0.12946    0.02899 304.19613  -4.465 1.13e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time  
## time        -0.263       
## time:group2  0.000 -0.803

model3 <- lmer(formula = I(log(value)) ~ 1 + time  + time : group + (1+time|id), 
               data    = df,REML = TRUE)
summary(model3)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + time:group + (1 + time | id)
##    Data: df
## 
## REML criterion at convergence: 266.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3628 -0.3433  0.0349  0.3362  2.5897 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  id       (Intercept) 0.78554  0.8863        
##           time        0.03451  0.1858   -0.68
##  Residual             0.02984  0.1728        
## Number of obs: 312, groups:  id, 78
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  3.22641    0.10317 77.00058  31.271  < 2e-16 ***
## time        -0.11071    0.03359 99.89250  -3.295 0.001361 ** 
## time:group2 -0.13569    0.03567 76.00025  -3.804 0.000286 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time  
## time        -0.469       
## time:group2  0.000 -0.735

model1 <- lmer(formula = I(log(ratio)) ~ 1 + time + group + time * group + (1|id), 
               data    = df%>%filter(time>1),REML = TRUE)
summary(model1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + group + time * group + (1 | id)
##    Data: df %>% filter(time > 1)
## 
## REML criterion at convergence: 167.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7787 -0.4739 -0.0594  0.5011  3.7847 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.21035  0.4586  
##  Residual             0.04625  0.2151  
## Number of obs: 234, groups:  id, 78
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)  -0.03014    0.13446 197.77447  -0.224 0.822850    
## time         -0.08496    0.03104 154.00000  -2.737 0.006930 ** 
## group2        0.21393    0.16160 197.77447   1.324 0.187091    
## time:group2  -0.14398    0.03731 154.00000  -3.859 0.000167 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time   group2
## time        -0.693              
## group2      -0.832  0.576       
## time:group2  0.576 -0.832 -0.693

model2 <- lmer(formula = I(log(ratio)) ~ 1 + time  + time : group + (1|id), 
               data    = df%>%filter(time>1),REML = TRUE)
summary(model2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + time:group + (1 | id)
##    Data: df %>% filter(time > 1)
## 
## REML criterion at convergence: 167.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7637 -0.4565 -0.0642  0.4895  3.8409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.21147  0.4599  
##  Residual             0.04636  0.2153  
## Number of obs: 234, groups:  id, 78
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   0.11796    0.07473 198.36497   1.579    0.116    
## time         -0.10861    0.02541 225.54228  -4.275 2.83e-05 ***
## time:group2  -0.10983    0.02696 210.05005  -4.074 6.56e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time  
## time        -0.470       
## time:group2  0.000 -0.735

model3 <- lmer(formula = I(log(ratio)) ~ 1 + time  + time : group + (1+time|id), 
               data    = df%>%filter(time>1),REML = TRUE)
summary(model3)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + time:group + (1 + time | id)
##    Data: df %>% filter(time > 1)
## 
## REML criterion at convergence: 96.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4517 -0.3960 -0.0028  0.3808  3.2423 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  id       (Intercept) 0.06940  0.2634        
##           time        0.02412  0.1553   -0.28
##  Residual             0.02293  0.1514        
## Number of obs: 234, groups:  id, 78
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  0.11796    0.04807 76.99994   2.454 0.016383 *  
## time        -0.12253    0.03389 89.75265  -3.616 0.000494 ***
## time:group2 -0.08971    0.03800 76.00014  -2.361 0.020789 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) time  
## time        -0.360       
## time:group2  0.000 -0.776

model diagnosis

plot(fitted(model2), resid(model2, type = "pearson"),col=df$time)# this will create the plot
abline(0,0, col="red")

plot(fitted(model3), resid(model3, type = "pearson"))# this will create the plot
abline(0,0, col="red")

qqnorm(resid(model2),col=df$time) 
qqline(resid(model2), col = "red") # add a perfect fit line

shapiro.test(resid(model2))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model2)
## W = 0.9712, p-value = 0.0001084

qqnorm(resid(model3)) 
qqline(resid(model3), col = "red") # add a perfect fit line

shapiro.test(resid(model3))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model3)
## W = 0.90458, p-value = 4.683e-11