Here it should be “right censored”: everyone’s observation time is [0,t], t is time of death or censor. Sometimes it can be other conditions like “left censored” (\(t_i\),T), “interval” (\(t_{i1},t_{i2}\)).
1, Kaplan-Meier plotter
2, Cox model: include predictors (e.g. sex), assume proportional hazard ratio.
https://cran.r-project.org/web/packages/survival/vignettes/survival.pdf
library(readxl)
dat <- read_excel(path0)
## New names:
## • `` -> `...3`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
View(dat)
library(survival)
library("survminer")
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
require(coxme)
## Loading required package: coxme
## Loading required package: bdsmatrix
##
## Attaching package: 'bdsmatrix'
##
## The following object is masked from 'package:base':
##
## backsolve
n<-length(dat$OS)
df<-data.frame(OS = dat$OS, OS_t = dat$OS_Time,
PFS = dat$PFS, PFS_t = dat$PFS_Time,
x = rnorm(n,0,1))
# OS, 1 is death
km1<- survfit(Surv(df$OS_t, df$OS)~1)
plot(km1)
summary(km1)
## Call: survfit(formula = Surv(df$OS_t, df$OS) ~ 1)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6.30 96 1 0.990 0.0104 0.969 1.000
## 7.93 95 1 0.979 0.0146 0.951 1.000
## 8.53 94 1 0.969 0.0178 0.935 1.000
## 9.40 93 1 0.958 0.0204 0.919 0.999
## 11.30 89 1 0.948 0.0228 0.904 0.993
## 11.47 88 1 0.937 0.0250 0.889 0.987
## 11.97 87 1 0.926 0.0269 0.875 0.980
## 15.00 81 1 0.915 0.0289 0.860 0.973
## 15.83 80 1 0.903 0.0307 0.845 0.965
## 16.03 79 1 0.892 0.0324 0.830 0.958
## 16.10 78 1 0.880 0.0339 0.816 0.949
## 16.90 77 1 0.869 0.0354 0.802 0.941
## 18.23 72 1 0.857 0.0369 0.787 0.932
## 18.30 71 1 0.845 0.0383 0.773 0.923
## 19.20 65 1 0.832 0.0398 0.757 0.914
## 19.63 63 1 0.819 0.0413 0.741 0.904
## 21.63 60 1 0.805 0.0428 0.725 0.893
## 22.33 59 1 0.791 0.0442 0.709 0.883
## 22.67 58 1 0.778 0.0455 0.693 0.872
## 23.93 56 1 0.764 0.0468 0.677 0.861
## 24.60 53 1 0.749 0.0481 0.661 0.850
## 24.90 52 1 0.735 0.0493 0.644 0.838
## 25.03 51 1 0.720 0.0504 0.628 0.826
## 27.10 47 1 0.705 0.0516 0.611 0.814
## 27.23 46 1 0.690 0.0527 0.594 0.801
## 27.77 44 1 0.674 0.0538 0.577 0.788
## 28.27 43 1 0.658 0.0547 0.559 0.775
## 32.10 38 1 0.641 0.0560 0.540 0.761
## 32.90 37 1 0.624 0.0571 0.521 0.746
## 33.13 36 1 0.606 0.0581 0.503 0.732
## 33.20 35 1 0.589 0.0589 0.484 0.717
## 33.37 33 1 0.571 0.0598 0.465 0.701
## 34.07 32 1 0.553 0.0605 0.447 0.686
## 37.40 27 1 0.533 0.0617 0.425 0.669
## 37.83 25 1 0.512 0.0628 0.402 0.651
## 39.43 24 1 0.490 0.0637 0.380 0.632
## 39.73 23 1 0.469 0.0644 0.358 0.614
## 40.43 21 1 0.447 0.0651 0.336 0.594
## 44.97 10 1 0.402 0.0723 0.283 0.572
## 46.83 6 1 0.335 0.0858 0.203 0.554
#cox model: x can be any covariate/predictor
coxfit<- coxph( Surv(OS_t, OS) ~ x, data=df)
# check proportional hazard ratio assumption
test.ph <- cox.zph(coxfit)
ggcoxzph(test.ph)
# cox model result
summary(coxfit)
## Call:
## coxph(formula = Surv(OS_t, OS) ~ x, data = df)
##
## n= 96, number of events= 40
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x -0.2548 0.7751 0.1495 -1.704 0.0883 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## x 0.7751 1.29 0.5782 1.039
##
## Concordance= 0.552 (se = 0.048 )
## Likelihood ratio test= 2.91 on 1 df, p=0.09
## Wald test = 2.9 on 1 df, p=0.09
## Score (logrank) test = 2.91 on 1 df, p=0.09
# PFS
km2<- survfit(Surv(df$PFS_t, df$PFS)~1)
plot(km1)
#summary(km1)
#cox model: x can be any covariate/predictor
coxfit<- coxph( Surv(PFS_t, PFS) ~ x, data=df)
# check proportional hazard ratio assumption
test.ph <- cox.zph(coxfit)
ggcoxzph(test.ph)
# cox model result
summary(coxfit)
## Call:
## coxph(formula = Surv(PFS_t, PFS) ~ x, data = df)
##
## n= 76, number of events= 46
## (20 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## x -0.3202 0.7260 0.1452 -2.205 0.0275 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## x 0.726 1.377 0.5462 0.9651
##
## Concordance= 0.59 (se = 0.048 )
## Likelihood ratio test= 4.85 on 1 df, p=0.03
## Wald test = 4.86 on 1 df, p=0.03
## Score (logrank) test = 4.88 on 1 df, p=0.03
# event = 1 for PFS;
# event = 2 for OS;
# event = 0 for censor;
etime <- with(df, ifelse(PFS==1, PFS_t, OS_t))
event <- with(df, ifelse(PFS==1, 1, 2*OS))
etime[which(event==0)] <- pmax( df$OS_t[which(event==0)] ,
df$PFS_t[which(event==0)] ,na.rm = T)
event <- factor(event, 0:2, c("censored", "PFS", "death"))
crfit <- survfit(Surv(etime, event) ~ 1, df)
plot(crfit)
#res<- summary(crfit)
### Compare
plot(km1$time,1 - km1$surv,type="l",ylim = c(0,0.8),xlab="time",ylab="cumulative probability",main="Comparison with competing risks")
lines(km2$time,1 - km2$surv,col=2)
lines(crfit$time,crfit$pstate[,2],col=3)
lines(crfit$time,crfit$pstate[,3],col=4)
legend(5,0.7,legend=c("OS","PFS","OS_cr","PFS_cr"),
col=c(1,2,3,4),lwd=c(1,1,1,1))
https://www.rensvandeschoot.com/tutorials/lme4/
\(Y_{ij}\) i th class, j th observation. There is correlation between \(Y_{i1},Y_{i2},Y_{i3},...,Y_{iJ}\).
\[Y_{ij} = X_{ij}\beta + b_i + \epsilon_{ij} \]
\(\beta\) is the fixed effect (e.g. sex/time/treatment effect, which is our main interest), \(b_i\) is the random effect (different class/people may have different random baseline value, which is not our interest). Fix + Random = Mixed effect
library(lme4) # for the analysis
## Loading required package: Matrix
library(haven) # to load the SPSS .sav file
library(tidyverse) # needed for data manipulation.
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
library(RColorBrewer) # needed for some extra colours in one of the graphs
library(lmerTest)# to get p-value estimations that are not part of the standard lme4 packages
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
popular2data <- read_sav(file ="https://github.com/MultiLevelAnalysis/Datasets-third-edition-Multilevel-book/blob/master/chapter%202/popularity/SPSS/popular2.sav?raw=true")
popular2data <- select(popular2data, pupil, class, extrav, sex, texp, popular) # we select just the variables we will use
head(popular2data) # we have a look at the first 6 observations
## # A tibble: 6 × 6
## pupil class extrav sex texp popular
## <dbl> <dbl> <dbl> <dbl+lbl> <dbl> <dbl>
## 1 1 1 5 1 [girl] 24 6.3
## 2 2 1 7 0 [boy] 24 4.9
## 3 3 1 4 1 [girl] 24 5.3
## 4 4 1 3 1 [girl] 24 4.7
## 5 5 1 5 1 [girl] 24 6
## 6 6 1 4 0 [boy] 24 4.7
interceptonlymodel <- lmer(formula = popular ~ 1 + (1|class),
data = popular2data) #to run the model
summary(interceptonlymodel) #to get paramater estimates.
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + (1 | class)
## Data: popular2data
##
## REML criterion at convergence: 6330.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5655 -0.6975 0.0020 0.6758 3.3175
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.7021 0.8379
## Residual 1.2218 1.1053
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.07786 0.08739 98.90973 58.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model1 <- lmer(formula = popular ~ 1 + sex + extrav + (1|class),
data = popular2data,REML = TRUE)
model2 <- lmer(popular ~ 1 + sex + extrav + texp + (1 | class), data=popular2data,REML = TRUE)
model3 <- lmer(formula = popular ~ 1 + sex + extrav + texp + (1 + extrav | class),data = popular2data,REML = TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: popular ~ 1 + sex + extrav + texp + (1 | class)
## Data: popular2data
##
## REML criterion at convergence: 4885
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1745 -0.6491 -0.0075 0.6705 3.0078
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.2954 0.5435
## Residual 0.5920 0.7694
## Number of obs: 2000, groups: class, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.098e-01 1.700e-01 2.264e+02 4.764 3.4e-06 ***
## sex 1.254e+00 3.729e-02 1.948e+03 33.623 < 2e-16 ***
## extrav 4.544e-01 1.616e-02 1.955e+03 28.112 < 2e-16 ***
## texp 8.841e-02 8.764e-03 1.016e+02 10.087 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sex extrav
## sex -0.040
## extrav -0.589 -0.090
## texp -0.802 -0.036 0.139
### hypothesis testing
model1 <- lmer(formula = popular ~ 1 + sex + extrav + (1|class),
data = popular2data,REML = FALSE)
model2 <- lmer(popular ~ 1 + sex + extrav + texp + (1 | class), data=popular2data,REML = FALSE)
model3 <- lmer(formula = popular ~ 1 + sex + extrav + texp + (1 + extrav | class),data = popular2data,REML = FALSE)
anova(model1,model2,model3)
## Data: popular2data
## Models:
## model1: popular ~ 1 + sex + extrav + (1 | class)
## model2: popular ~ 1 + sex + extrav + texp + (1 | class)
## model3: popular ~ 1 + sex + extrav + texp + (1 + extrav | class)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 5 4944.0 4972.0 -2467.0 4934.0
## model2 6 4874.3 4907.9 -2431.2 4862.3 71.657 1 < 2.2e-16 ***
## model3 8 4828.8 4873.6 -2406.4 4812.8 49.489 2 1.794e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation test: no assumption
pearson cares the numeric value. spearman only cares rank information, (1,2,3) and (1,2,300000) are the same.
Population test:
All “t-test” assumes normal distribution, but usually they are robust to non-normal dist. We should check normal assumption first.
t test assuming equal variance
t test don’t assume equal variance
paired t test
Wilcoxon
x<- c(0.32,0.58,0.42,0.49,0.35,4.9,4.38,4.58,4.75,
4.65,2.93,2.06,5.94,3.47,3.42,0.07,2.66,0.68,
1.07,1.09,2.16,0.11,0.36)
y<- c(7.37,13.96,12.72,7.78,2.56,1.89,0.79,1.04,11.71,7.62,7.49,7.33,6.7)
# cor.test(x,y, method=c("spearman"))
# cor.test(x,y, method=c("pearson"))
### qqnorm is visualization, shapiro test is numeric p value
qqnorm(x)
shapiro.test(x)
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.88042, p-value = 0.0102
qqnorm(y)
shapiro.test(y)
##
## Shapiro-Wilk normality test
##
## data: y
## W = 0.91201, p-value = 0.1954
NUll hypothesis “The two populations’ means are equal”
Steps:
1, equal variance test, null hypothesis = “equal variance”; p value < 0.05 => variance not equal.
2, when we don’t reject “equal variance”, we can use t test assuming equal variance
df<-data.frame(value = c(x,y),
grp = rep(1:2,c(length(x),length(y))))
var.test(value~grp, df)
##
## F test to compare two variances
##
## data: value by grp
## F = 0.19636, num df = 22, denom df = 12, p-value = 0.0009803
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.06451858 0.51085846
## sample estimates:
## ratio of variances
## 0.1963589
t.test(x,y,var.equal = T)
##
## Two Sample t-test
##
## data: x and y
## t = -4.4302, df = 34, p-value = 9.291e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.719715 -2.493395
## sample estimates:
## mean of x mean of y
## 2.236522 6.843077
NUll hypothesis “The two populations’ means are equal”
t.test(x,y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = -3.6431, df = 14.713, p-value = 0.002472
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.306274 -1.906837
## sample estimates:
## mean of x mean of y
## 2.236522 6.843077
NUll hypothesis “The two populations are equal”
wilcox.test(x,y, alternative = "two.sided")
##
## Wilcoxon rank sum exact test
##
## data: x and y
## W = 50, p-value = 0.0006587
## alternative hypothesis: true location shift is not equal to 0
Only use it when data are paired, so their lengths must be equal. For example, for the same patient, before treatment \(X_i\) and after treatment \(Y_i\).
Null hypothesis “mean of X = mean of Y”. Don’t need to consider variance.
x<-rnorm(20,0,1)
y<-rnorm(20,0,1)
t.test(x-y)
##
## One Sample t-test
##
## data: x - y
## t = -0.44377, df = 19, p-value = 0.6622
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.7709851 0.5012415
## sample estimates:
## mean of x
## -0.1348718
x<-c(0.422376,0.555784,0.607614,0.781744,0.840055,0.995218,1.090951,1.373964,1.399265,1.599938,1.978858,2.039173,2.305395,2.586751,2.826646,2.955038,3.167083,3.578979,4.067678,4.266187,4.384108,4.518732,5.180756,5.520317,5.848557,6.722273,7.210925,7.29198,8.930193,10.14035,10.32676,11.24256,11.80399,14.9188,15.17216,15.87535,17.9984,19.24664,20.00856,22.84224,23.60682,25.31154,29.38721,30.11277,32.06283,33.92767,35.70798,35.89244,38.22954,40.64752,45.71114,47.97598,51.18917,51.67894,58.33876,64.37144,66.79435,68.9272,80.53041,80.96609,87.36751,103.3995,112.9875,122.5788,128.8399,131.667,139.3232,162.7967,177.5409,213.0073,232.8893,259.1322,284.0513,329.038,361.9655,419.6987,461.9741)
y<-c(6.634894,10.33565,7.045976,3.236468,12.94213,3.883939,5.739301,10.26963,3.982524,3.258092,2.547234,3.361322,3.741736,1.142273,3.127348,2.698529,2.777205,2.824821,2.009283,1.906099,3.013689,1.405149,1.814392,1.568679,1.177385,1.160061,3.969568,1.672434,1.691622,1.074819,1.339342,1.116547,1.308455,1.630997,1.936904,1.604003,1.284585,1.38928,0.940331,0.877433,1.659416,0.16255,0.434923,1.100193,0.79823,0.745545,1.020949,0.890098,0.960144,0.67472,0.702365,0.574157,0.917713,0.46149,0.271931,1.212296,0.448782,0.36205,0.54298,0.964284,0.649908,0.49208,0.351167,0.135533,0.200054,0.460075,0.561182,0.478768,0.146954,0.235475,0.449661,0.332787,0.278518,0.314042,0.429166,0.197705,0.547256)
summary(log(x))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8619 1.4507 2.9962 2.8630 4.2331 6.1355
plot(x,y)
plot(log(x),log(y))
df<-data.frame(x=x,y=y)
lm0<-lm(I(log(y)) ~ I(log(x)),data=df)
(res<-summary(lm0))
##
## Call:
## lm(formula = I(log(y)) ~ I(log(x)), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.73320 -0.20505 0.01994 0.27359 0.95200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.55336 0.09820 15.82 <2e-16 ***
## I(log(x)) -0.50659 0.02891 -17.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4638 on 75 degrees of freedom
## Multiple R-squared: 0.8037, Adjusted R-squared: 0.8011
## F-statistic: 307.1 on 1 and 75 DF, p-value: < 2.2e-16
new_df<-data.frame(x= exp(seq(-0.9,6.5,0.1)))
pred<- predict(lm0, newdata = df, interval = 'confidence')
pred<-as.data.frame(pred)
pred<-cbind(pred,df)
head(pred) # confidence interval of log(y): [L,U]
## fit lwr upr x y
## 1 1.989975 1.751019 2.228931 0.422376 6.634894
## 2 1.850924 1.626050 2.075798 0.555784 10.335650
## 3 1.805756 1.585406 2.026106 0.607614 7.045976
## 4 1.678101 1.470383 1.885819 0.781744 3.236468
## 5 1.641656 1.437499 1.845814 0.840055 12.942130
## 6 1.555792 1.359932 1.751651 0.995218 3.883939
# Then confidence interval of y is exp( [L,U] )
ggplot(df, aes(y = log(y), x = log(x))) +
geom_point(position = "jitter") +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df, aes(y = y, x = x)) +
geom_point(position = "jitter") +
geom_ribbon(aes(ymin = exp(pred$lwr), ymax = exp(pred$upr)))
library(dplyr)
dat <- read_excel(path0,sheet=4)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `G2 type patients (N=54)` -> `G2 type patients (N=54)...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `G2 type patients (N=54)` -> `G2 type patients (N=54)...11`
## • `` -> `...12`
#View(dat)
df<- rbind(as.matrix(dat[2:25,1:4]) , as.matrix(dat[2:55,6:9]))
df<- data.frame(value = as.numeric(df), time = rep(1:4,each = (24+54)) )
df$group <- as.factor(rep(rep(1:2,c(24,54)) ,4))
df$id <- rep(1:78,4)
df$baseline <- rep(df$value[1:78],4)
df$ratio <- df$value / df$baseline
ggplot(df, aes(y = value, x = time, color = group)) +
geom_point(position = "jitter") +
geom_smooth(method = "lm", fill = NA)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df, aes(y = log(value), x = time, color = group)) +
geom_point(position = "jitter") +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df, aes(y = log(value), x = time, color = group)) +
geom_point(position = "jitter") +
geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.2117e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 0.985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.2117e-16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0188e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 0.985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0188e-16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602
ggplot(df%>%filter(time>1), aes(y = log(ratio), x = time, color = group)) +
geom_point(position = "jitter") +
geom_smooth(method = "lm", fill = NA)
## `geom_smooth()` using formula = 'y ~ x'
model1 <- lmer(formula = I(log(value)) ~ 1 + time + group + time * group + (1|id),
data = df,REML = TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + group + time * group + (1 | id)
## Data: df
##
## REML criterion at convergence: 372.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8361 -0.3961 -0.0614 0.3978 4.2506
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.43159 0.6570
## Residual 0.08672 0.2945
## Number of obs: 312, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.28397 0.15298 114.55341 21.466 < 2e-16 ***
## time -0.11949 0.02688 232.00000 -4.445 1.36e-05 ***
## group2 -0.08314 0.18386 114.55341 -0.452 0.65198
## time:group2 -0.12300 0.03231 232.00000 -3.807 0.00018 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time group2
## time -0.439
## group2 -0.832 0.366
## time:group2 0.366 -0.832 -0.439
model2 <- lmer(formula = I(log(value)) ~ 1 + time + time : group + (1|id),
data = df,REML = TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + time:group + (1 | id)
## Data: df
##
## REML criterion at convergence: 371
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8384 -0.3995 -0.0482 0.3944 4.2395
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.42782 0.6541
## Residual 0.08667 0.2944
## Number of obs: 312, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.22641 0.08457 116.17567 38.152 < 2e-16 ***
## time -0.11502 0.02500 285.74399 -4.600 6.34e-06 ***
## time:group2 -0.12946 0.02899 304.19613 -4.465 1.13e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time
## time -0.263
## time:group2 0.000 -0.803
model3 <- lmer(formula = I(log(value)) ~ 1 + time + time : group + (1+time|id),
data = df,REML = TRUE)
summary(model3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(value)) ~ 1 + time + time:group + (1 + time | id)
## Data: df
##
## REML criterion at convergence: 266.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3628 -0.3433 0.0349 0.3362 2.5897
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.78554 0.8863
## time 0.03451 0.1858 -0.68
## Residual 0.02984 0.1728
## Number of obs: 312, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.22641 0.10317 77.00058 31.271 < 2e-16 ***
## time -0.11071 0.03359 99.89250 -3.295 0.001361 **
## time:group2 -0.13569 0.03567 76.00025 -3.804 0.000286 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time
## time -0.469
## time:group2 0.000 -0.735
model1 <- lmer(formula = I(log(ratio)) ~ 1 + time + group + time * group + (1|id),
data = df%>%filter(time>1),REML = TRUE)
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + group + time * group + (1 | id)
## Data: df %>% filter(time > 1)
##
## REML criterion at convergence: 167.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7787 -0.4739 -0.0594 0.5011 3.7847
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.21035 0.4586
## Residual 0.04625 0.2151
## Number of obs: 234, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.03014 0.13446 197.77447 -0.224 0.822850
## time -0.08496 0.03104 154.00000 -2.737 0.006930 **
## group2 0.21393 0.16160 197.77447 1.324 0.187091
## time:group2 -0.14398 0.03731 154.00000 -3.859 0.000167 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time group2
## time -0.693
## group2 -0.832 0.576
## time:group2 0.576 -0.832 -0.693
model2 <- lmer(formula = I(log(ratio)) ~ 1 + time + time : group + (1|id),
data = df%>%filter(time>1),REML = TRUE)
summary(model2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + time:group + (1 | id)
## Data: df %>% filter(time > 1)
##
## REML criterion at convergence: 167.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7637 -0.4565 -0.0642 0.4895 3.8409
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.21147 0.4599
## Residual 0.04636 0.2153
## Number of obs: 234, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.11796 0.07473 198.36497 1.579 0.116
## time -0.10861 0.02541 225.54228 -4.275 2.83e-05 ***
## time:group2 -0.10983 0.02696 210.05005 -4.074 6.56e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time
## time -0.470
## time:group2 0.000 -0.735
model3 <- lmer(formula = I(log(ratio)) ~ 1 + time + time : group + (1+time|id),
data = df%>%filter(time>1),REML = TRUE)
summary(model3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: I(log(ratio)) ~ 1 + time + time:group + (1 + time | id)
## Data: df %>% filter(time > 1)
##
## REML criterion at convergence: 96.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4517 -0.3960 -0.0028 0.3808 3.2423
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.06940 0.2634
## time 0.02412 0.1553 -0.28
## Residual 0.02293 0.1514
## Number of obs: 234, groups: id, 78
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.11796 0.04807 76.99994 2.454 0.016383 *
## time -0.12253 0.03389 89.75265 -3.616 0.000494 ***
## time:group2 -0.08971 0.03800 76.00014 -2.361 0.020789 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) time
## time -0.360
## time:group2 0.000 -0.776
plot(fitted(model2), resid(model2, type = "pearson"),col=df$time)# this will create the plot
abline(0,0, col="red")
plot(fitted(model3), resid(model3, type = "pearson"))# this will create the plot
abline(0,0, col="red")
qqnorm(resid(model2),col=df$time)
qqline(resid(model2), col = "red") # add a perfect fit line
shapiro.test(resid(model2))
##
## Shapiro-Wilk normality test
##
## data: resid(model2)
## W = 0.9712, p-value = 0.0001084
qqnorm(resid(model3))
qqline(resid(model3), col = "red") # add a perfect fit line
shapiro.test(resid(model3))
##
## Shapiro-Wilk normality test
##
## data: resid(model3)
## W = 0.90458, p-value = 4.683e-11