Giới thiệu tập tin số liệu glucose.csv:
* id: mã số của chuột
* time: thời điểm đo nồng độ đường huyết (trước khi dùng thuốc (T0), 2 (T2), 3 (T3) và 4 giờ (T4) sau dùng thuốc).
* treatment: can thiệp (1= có dùng thuốc, 0= không dùng thuốc)
glucose= read.csv("file:///E:/Dropbox/Phuong Nam's research/Research documents/GS Nguyen Van Tuan/GS Tuan - Khoa hoc phan tich 2020-01-05/Textbook/TDTU Datasets for 2020 Workshop/glucose.csv", header = T)
head (glucose)
## id treatment time glucose
## 1 1 1 0 5.9
## 2 1 1 2 3.9
## 3 1 1 3 3.9
## 4 1 1 4 3.6
## 5 2 1 0 5.3
## 6 2 1 2 4.7
glucose$treatment[glucose$treatment==0] = "N"
glucose$treatment[glucose$treatment==1] = "Y"
glucose$treatment=factor(glucose$treatment, levels = c("N", "Y"))
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.6.2
p = ggplot(data = glucose, aes(x=time, y=glucose, group=id, col=treatment))
p = p + geom_point(size=4) + geom_line(col="black", size=0.5)
p
# Cách code khác
ggplot(data= glucose) +
geom_point(aes(x= time, y= glucose,
fill= factor(treatment),
col= factor(treatment)), size=3)+
geom_line(aes(x= time, y= glucose, group= id))
# Vẽ biểu đồ Chia thành 2 nhóm có điều trị (Y) và nhóm chứng (không điều trị - N)
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 3.6.2
ggplot(data= glucose) +
geom_point(aes(x= time,y= glucose),
color="blue", size=3)+
geom_line(aes(x= time, y= glucose, group= id))+
facet_grid(~ treatment) +
stat_smooth(aes(x= time, y= glucose)) + theme(legend.position = "None")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 3.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1.6401e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0804
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 3.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 1.6401e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0804
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at -0.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 3.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 2.2134e-016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0804
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## -0.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 3.02
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 2.2134e-016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0804
# Phương pháp code đơn giản để hiển thị giá trị glucose của 2 nhóm ở thời điểm trước khi điều trị (t=0)
g0 = subset(glucose, time==0)
g2 = subset(glucose, time==2)
head(g0)
## id treatment time glucose
## 1 1 Y 0 5.9
## 5 2 Y 0 5.3
## 9 3 Y 0 4.6
## 13 4 Y 0 6.0
## 17 5 Y 0 6.0
## 21 6 Y 0 6.4
head(g2)
## id treatment time glucose
## 2 1 Y 2 3.9
## 6 2 Y 2 4.7
## 10 3 Y 2 3.7
## 14 4 Y 2 5.4
## 18 5 Y 2 5.4
## 22 6 Y 2 4.7
g0$glucose0 = g0$glucose
head(g0)
## id treatment time glucose glucose0
## 1 1 Y 0 5.9 5.9
## 5 2 Y 0 5.3 5.3
## 9 3 Y 0 4.6 4.6
## 13 4 Y 0 6.0 6.0
## 17 5 Y 0 6.0 6.0
## 21 6 Y 0 6.4 6.4
g2$glucose2 = g2$glucose
head(g2)
## id treatment time glucose glucose2
## 2 1 Y 2 3.9 3.9
## 6 2 Y 2 4.7 4.7
## 10 3 Y 2 3.7 3.7
## 14 4 Y 2 5.4 5.4
## 18 5 Y 2 5.4 5.4
## 22 6 Y 2 4.7 4.7
g0 = g0[, c("id", "treatment", "glucose0")]
head(g0)
## id treatment glucose0
## 1 1 Y 5.9
## 5 2 Y 5.3
## 9 3 Y 4.6
## 13 4 Y 6.0
## 17 5 Y 6.0
## 21 6 Y 6.4
g2 = g2[, c("id", "treatment", "glucose2")]
head(g2)
## id treatment glucose2
## 2 1 Y 3.9
## 6 2 Y 4.7
## 10 3 Y 3.7
## 14 4 Y 5.4
## 18 5 Y 5.4
## 22 6 Y 4.7
g02 = merge(g0, g2, by = c("id", "treatment"))
head(g02)
## id treatment glucose0 glucose2
## 1 1 Y 5.9 3.9
## 2 10 N 6.2 5.3
## 3 11 N 6.9 5.6
## 4 12 N 5.6 4.7
## 5 13 N 5.1 3.9
## 6 14 N 5.7 4.7
p = ggplot(data = g0, aes(x=treatment, y=glucose0, fill=treatment))
p = p + geom_boxplot() + geom_jitter(alpha = 0.4)
p
p = ggplot(data = g2, aes(x=treatment, y=glucose2, fill=treatment))
p = p + geom_boxplot() + geom_jitter(alpha = 0.2)
p
# Phương pháp code hiện đại để hiển thị giá trị glucose của 2 nhóm ở thời điểm trước khi điều trị (t=0) sử dụng package "tidyverse"
library (tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## -- Attaching packages ------------------------------------------------------- tidyverse 1.3.0 --
## <U+2713> tibble 2.1.3 <U+2713> dplyr 0.8.3
## <U+2713> tidyr 1.0.0 <U+2713> stringr 1.4.0
## <U+2713> readr 1.3.1 <U+2713> forcats 0.4.0
## <U+2713> purrr 0.3.3
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'stringr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## -- Conflicts ---------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
g0= filter (.data= glucose, time== 0)
ggplot (data= g0, aes(y= glucose, x= factor(treatment), fill= factor(treatment))) + geom_boxplot()
ggplot (data= g0, aes(y= glucose, x= factor(treatment), fill= factor(treatment))) + geom_boxplot() + geom_jitter(alpha= 0.2)
# So sánh đường huyết lúc chưa điều trị của 2 nhóm dùng package "coin"
library(coin)
## Warning: package 'coin' was built under R version 3.6.2
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.6.2
oneway_test(g0$glucose ~ factor(g0$treatment))
##
## Asymptotic Two-Sample Fisher-Pitman Permutation Test
##
## data: g0$glucose by factor(g0$treatment) (N, Y)
## Z = 0.36664, p-value = 0.7139
## alternative hypothesis: true mu is not equal to 0
median_test(g0$glucose ~ factor(g0$treatment), conf.int = TRUE)
##
## Asymptotic Two-Sample Brown-Mood Median Test
##
## data: g0$glucose by factor(g0$treatment) (N, Y)
## Z = 0.7151, p-value = 0.4745
## alternative hypothesis: true mu is not equal to 0
## 95 percent confidence interval:
## -0.8 1.8
## sample estimates:
## difference in location
## 0.2
# Lọc dữ liệu với time== 0 và time== 2 Sau đó biến đổi dữ liệu từ dòng sang cột bằng package reshape2 và hàm dcast
g2= filter (.data= glucose, time== 0|time== 2)
library (reshape2)
## Warning: package 'reshape2' was built under R version 3.6.2
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
g3= dcast(g2, id + treatment ~ time)
## Using glucose as value column: use value.var to override.
head (g3)
## id treatment 0 2
## 1 1 Y 5.9 3.9
## 2 2 Y 5.3 4.7
## 3 3 Y 4.6 3.7
## 4 4 Y 6.0 5.4
## 5 5 Y 6.0 5.4
## 6 6 Y 6.4 4.7
# Đổi tên biến số 0 thành glucose0 và 2 thành glucose2
names (g3)[names(g3)=='0']= 'glucose0'
names (g3)[names(g3)=='2']= 'glucose2'
head (g3)
## id treatment glucose0 glucose2
## 1 1 Y 5.9 3.9
## 2 2 Y 5.3 4.7
## 3 3 Y 4.6 3.7
## 4 4 Y 6.0 5.4
## 5 5 Y 6.0 5.4
## 6 6 Y 6.4 4.7
# Kiểm định sự khác biệt về mức đường huyết ở thời điểm 2 giờ ở nhóm điều trị và không điều trị
oneway_test(g3$glucose2 ~ factor(g3$treatment))
##
## Asymptotic Two-Sample Fisher-Pitman Permutation Test
##
## data: g3$glucose2 by factor(g3$treatment) (N, Y)
## Z = 0.93739, p-value = 0.3486
## alternative hypothesis: true mu is not equal to 0
# Đặt biến thay đổi đường huyết là diff= glucose2 - glucose0
g3$diff= g3$glucose2 - g3$glucose0
head (g3)
## id treatment glucose0 glucose2 diff
## 1 1 Y 5.9 3.9 -2.0
## 2 2 Y 5.3 4.7 -0.6
## 3 3 Y 4.6 3.7 -0.9
## 4 4 Y 6.0 5.4 -0.6
## 5 5 Y 6.0 5.4 -0.6
## 6 6 Y 6.4 4.7 -1.7
oneway_test(g3$diff ~ factor(g3$treatment))
##
## Asymptotic Two-Sample Fisher-Pitman Permutation Test
##
## data: g3$diff by factor(g3$treatment) (N, Y)
## Z = 0.51813, p-value = 0.6044
## alternative hypothesis: true mu is not equal to 0
# Không xét đến ảnh hưởng của treatment mà chỉ xét ảnh hưởng của time (unconditional model)
# Yij = alphai + betaiTimeij + eij
# alpha i = alpha0 + random u
# beta i = beta0 + random v
# Viết lại phương trình:
# Yij = (ao + boTime) + (random u + random v.Timeij + eij)
# Xét đến ảnh hưởng của treatmen:
# Yij = Ai + BiTimeij + Eij
# Ai = Ao + A1Treat + ui
# Bi = Bo + B1Treat + vi
# Viết lại phương trình:
# Yij = (Ao + A1Treat + BoTime + B1TreatTime) + (u + v.Timeij + Eij)
library("lme4")
## Warning: package 'lme4' was built under R version 3.6.2
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# Không xét đến ảnh hưởng của treatment mà chỉ xét ảnh hưởng của time (unconditional model
m = lmer(glucose ~ time + (1 + time|id), data = glucose)
## boundary (singular) fit: see ?isSingular
summary(m)
## Linear mixed model fit by REML ['lmerMod']
## Formula: glucose ~ time + (1 + time | id)
## Data: glucose
##
## REML criterion at convergence: 165.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6169 -0.4435 -0.0086 0.4548 3.2935
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.6900322 0.83068
## time 0.0009616 0.03101 1.00
## Residual 0.2536389 0.50363
## Number of obs: 76, groups: id, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.0481 0.2177 27.79
## time -0.4699 0.0397 -11.84
##
## Correlation of Fixed Effects:
## (Intr)
## time -0.240
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# Xét đến ảnh hưởng của treatmen
m1 = lmer(glucose ~ treatment + time + treatment:time + (1 + time|id), data = glucose)
## boundary (singular) fit: see ?isSingular
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: glucose ~ treatment + time + treatment:time + (1 + time | id)
## Data: glucose
##
## REML criterion at convergence: 166.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.60331 -0.41198 0.00053 0.48474 2.99267
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 0.7680707 0.87640
## time 0.0001578 0.01256 1.00
## Residual 0.2534072 0.50340
## Number of obs: 76, groups: id, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.96539 0.29442 20.262
## treatmentY 0.18082 0.39045 0.463
## time -0.41860 0.05327 -7.859
## treatmentY:time -0.11451 0.07910 -1.448
##
## Correlation of Fixed Effects:
## (Intr) trtmnY time
## treatmentY -0.637
## time -0.403 0.357
## tretmntY:tm 0.269 -0.437 -0.675
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# Vì kết quả cho thấy dao động của slope theo time rất nhỏ (=0.0001578) nên có thể bỏ qua dao động này (vi = 0), khi đó phương trình được viết lại như sau:
# Yij = (Ao + A1Treat + BoTime + B1TreatTime) + (u + Eij)
m2 = lmer(glucose ~ treatment + time + treatment:time + (1|id), data = glucose)
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: glucose ~ treatment + time + treatment:time + (1 | id)
## Data: glucose
##
## REML criterion at convergence: 166.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.60263 -0.43223 -0.00001 0.53418 2.95623
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.8148 0.9027
## Residual 0.2542 0.5042
## Number of obs: 76, groups: id, 19
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.96580 0.30178 19.769
## treatmentY 0.18047 0.40102 0.450
## time -0.41616 0.05342 -7.790
## treatmentY:time -0.12016 0.07925 -1.516
##
## Correlation of Fixed Effects:
## (Intr) trtmnY time
## treatmentY -0.638
## time -0.453 0.384
## tretmntY:tm 0.295 -0.476 -0.678
# Tổng quát: Glucose = 5.965 + 0.181treatmentY - 0.419time - 0.115treamentY.time
# Nhóm control: Glucose = 5.965 - 0.419time
# Nhóm treatment: Glucose = 6.146 - 0.534time
data("Orthodont", package="nlme")
head(Orthodont)
## Grouped Data: distance ~ age | Subject
## distance age Subject Sex
## 1 26.0 8 M01 Male
## 2 25.0 10 M01 Male
## 3 29.0 12 M01 Male
## 4 31.0 14 M01 Male
## 5 21.5 8 M02 Male
## 6 22.5 10 M02 Male
library(ggplot2)
theme_set(theme_bw())
ggplot(data=Orthodont)+ geom_point(aes(x= age,y= distance), color= "blue", size= 3)+ geom_line(aes(x= age, y= distance, group= Subject)) + facet_grid(~ Sex)
lm= lm(distance~age:Sex+ Sex, data= Orthodont)
summary(lm)
##
## Call:
## lm(formula = distance ~ age:Sex + Sex, data = Orthodont)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6156 -1.3219 -0.1682 1.3299 5.2469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.3406 1.4162 11.538 < 2e-16 ***
## SexFemale 1.0321 2.2188 0.465 0.64279
## age:SexMale 0.7844 0.1262 6.217 1.07e-08 ***
## age:SexFemale 0.4795 0.1522 3.152 0.00212 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.257 on 104 degrees of freedom
## Multiple R-squared: 0.4227, Adjusted R-squared: 0.4061
## F-statistic: 25.39 on 3 and 104 DF, p-value: 2.108e-12
Orthodont$pred.lm= predict(lm)
ggplot(data=Orthodont) + geom_point(aes(x=age,y=distance), color="blue", size=3)+ geom_line(aes(x=age,y=pred.lm)) + facet_grid(~Sex)
m1= lmer(distance~ Sex+ age+ age:Sex+ (1+ age|Subject), data= Orthodont)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ Sex + age + age:Sex + (1 + age | Subject)
## Data: Orthodont
##
## REML criterion at convergence: 432.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1694 -0.3862 0.0069 0.4454 3.8491
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 5.77441 2.4030
## age 0.03245 0.1801 -0.67
## Residual 1.71661 1.3102
## Number of obs: 108, groups: Subject, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 16.34063 1.01824 16.048
## SexFemale 1.03210 1.59528 0.647
## age 0.78437 0.08598 9.123
## SexFemale:age -0.30483 0.13471 -2.263
##
## Correlation of Fixed Effects:
## (Intr) SexFml age
## SexFemale -0.638
## age -0.880 0.562
## SexFemale:g 0.562 -0.880 -0.638