setwd("e:\\cystatin.c in gbs")
load("gbs.Rdata")
options(digit = 3) #显示2位小数点数字
attach(gbsdata)
# 下列部分创建一个根据disease分组比较cystatin c 的箱图
boxplot(cystatin.c ~ disease, col = "burlywood1", cex.axis = 0.75, notch = TRUE,
varwidth = TRUE, main = "Figue 1.Levels of CSF cystatin c in defferent groups",
xlab = "Groups", ylab = "CSF cystatin c(mg/l)", cex.lab = 0.8, cex.main = 1.2,
axes = FALSE) #去除默认的坐标轴,然后通过以下语句重新建立坐标轴)
axis(2, at = 0:14, labels = 0:14)
sor.Dis = length(levels(disease)) + 1
name.Dis = levels(disease)
labe.Dis = c("0", name.Dis, "0")
# labe.Dis
axis(1, at = 0:sor.Dis, labels = labe.Dis)
table(disease) #创建与disease这个变量相关的列联表,即列出disease的分类数
## disease
## Enc GBS MC Men NCG
## 37 20 24 54 22
# 各疾病组cystatin.c的均值和SD值列表
a.mean <- aggregate(cystatin.c, by = list(disease), FUN = mean) #根据disease分类列出不同disease的cystatin c的均值
b.sd <- aggregate(cystatin.c, by = list(disease), FUN = sd) #同上,列出SD值
names(a.mean)[2] <- "mean"
names(b.sd)[2] <- "sd"
merge(a.mean, b.sd, by = "Group.1")
## Group.1 mean sd
## 1 Enc 3.147 0.9344
## 2 GBS 3.744 0.9680
## 3 MC 3.373 1.2792
## 4 Men 4.874 1.2633
## 5 NCG 5.398 1.1481
fit.cc <- aov(cystatin.c ~ disease) #根据不同disease分组通过ANOVA比较各组间cc的均值差异
summary(fit.cc) #与上一语句相关,列出ANOVA结果
## Df Sum Sq Mean Sq F value Pr(>F)
## disease 4 118 29.39 22.4 1.4e-14 ***
## Residuals 152 199 1.31
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit.cc) #通过TukeyHSD函数观察各组间比较
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cystatin.c ~ disease)
##
## $disease
## diff lwr upr p adj
## GBS-Enc 0.5970 -0.2800 1.4740 0.3329
## MC-Enc 0.2255 -0.6027 1.0537 0.9438
## Men-Enc 1.7274 1.0530 2.4018 0.0000
## NCG-Enc 2.2507 1.4000 3.1014 0.0000
## MC-GBS -0.3715 -1.3282 0.5852 0.8206
## Men-GBS 1.1304 0.3033 1.9576 0.0021
## NCG-GBS 1.6537 0.6774 2.6300 0.0001
## Men-MC 1.5019 0.7267 2.2772 0.0000
## NCG-MC 2.0252 1.0925 2.9579 0.0000
## NCG-Men 0.5233 -0.2760 1.3225 0.3730
# plot(TukeyHSD(fit.cc))
# #绘制前述函数的结果plot,显示95%CL的组间比较结果图
library(multcomp) #引入multcomp包,以便使用glht函数,简介明了显示各组间cc比较差异,注意mcp中的disease的使用
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
par(mar = c(5, 4, 6, 2))
# 下一语句将cystatin c临时根据四分位分组
cc.cut <- cut(cystatin.c, breaks = c(quantile(cystatin.c, 0) - 0.01, quantile(cystatin.c,
0.25), quantile(cystatin.c, 0.5), quantile(cystatin.c, 0.75), max(cystatin.c)))
xtabs(~disease + cc.cut) #创建disease和cc.cut相关列联表,按实际数字显示
## cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
## Enc 18 12 6 1
## GBS 6 8 4 2
## MC 11 5 6 2
## Men 5 11 16 22
## NCG 0 3 7 12
chisq.test(xtabs(~disease + cc.cut)) #独立性检验
## Warning: Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: xtabs(~disease + cc.cut)
## X-squared = 54.69, df = 12, p-value = 2.054e-07
prop.table(xtabs(~disease + cc.cut), 1) * 100 #创建disease和cc.cut相关列联表,按行百分比显示
## cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
## Enc 48.649 32.432 16.216 2.703
## GBS 30.000 40.000 20.000 10.000
## MC 45.833 20.833 25.000 8.333
## Men 9.259 20.370 29.630 40.741
## NCG 0.000 13.636 31.818 54.545
prop.table(xtabs(~disease + cc.cut), 2) * 100 #创建disease和cc.cut相关列联表,按列百分比显示
## cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
## Enc 45.000 30.769 15.385 2.564
## GBS 15.000 20.513 10.256 5.128
## MC 27.500 12.821 15.385 5.128
## Men 12.500 28.205 41.026 56.410
## NCG 0.000 7.692 17.949 30.769
library(vcd) #引入vcd包,创建分布图,使用assocstats和spine函数,前者估计相关性,后者绘制分布图
## Loading required package: MASS
## Loading required package: grid
## Loading required package: colorspace
assocstats(xtabs(~disease + cc.cut))
## X^2 df P(> X^2)
## Likelihood Ratio 62.876 12 6.7173e-09
## Pearson 54.694 12 2.0537e-07
##
## Phi-Coefficient : 0.59
## Contingency Coeff.: 0.508
## Cramer's V : 0.341
spine(table(disease, cc.cut), main = "Figue 2. The distribution of CSF cystatin c among different groups")
tuk.cc <- glht(fit.cc, linfct = mcp(disease = "Tukey")) #disease=Tukey表明是根据disease分组
plot(cld(tuk.cc, level = 0.05), col = "lightgrey", ylab = "CSF cystatin c(mg/L)")
# 下面语句将cystatin c按二分位将之分为2部分,原因基于前述四分位结果
cc.cut <- cut(cystatin.c, breaks = c(quantile(cystatin.c, 0) - 0.01, quantile(cystatin.c,
0.5), max(cystatin.c)))
xtabs(~disease + cc.cut) #创建disease和cc.cut相关列联表,按实际数字显示
## cc.cut
## disease (1.57,4.11] (4.11,8.23]
## Enc 30 7
## GBS 14 6
## MC 16 8
## Men 16 38
## NCG 3 19
chisq.test(xtabs(~disease + cc.cut)) #独立性检验
##
## Pearson's Chi-squared test
##
## data: xtabs(~disease + cc.cut)
## X-squared = 40.76, df = 4, p-value = 3.016e-08
prop.table(xtabs(~disease + cc.cut), 1) * 100 #创建disease和cc.cut相关列联表,按行百分比显示
## cc.cut
## disease (1.57,4.11] (4.11,8.23]
## Enc 81.08 18.92
## GBS 70.00 30.00
## MC 66.67 33.33
## Men 29.63 70.37
## NCG 13.64 86.36
prop.table(xtabs(~disease + cc.cut), 2) * 100 #创建disease和cc.cut相关列联表,按列百分比显示
## cc.cut
## disease (1.57,4.11] (4.11,8.23]
## Enc 37.975 8.974
## GBS 17.722 7.692
## MC 20.253 10.256
## Men 20.253 48.718
## NCG 3.797 24.359
# 各疾病组GLU的均值和SD值列表
a.mean <- aggregate(Glu, by = list(disease), FUN = mean) #根据disease分类列出不同disease的cystatin c的均值
b.sd <- aggregate(Glu, by = list(disease), FUN = sd) #同上,列出SD值
names(a.mean)[2] <- "mean"
names(b.sd)[2] <- "sd"
merge(a.mean, b.sd, by = "Group.1")
## Group.1 mean sd
## 1 Enc 4.271 1.5321
## 2 GBS 4.069 0.6749
## 3 MC 2.893 1.4741
## 4 Men 2.341 1.0417
## 5 NCG 4.677 2.7980
fit.glu <- aov(Glu ~ disease) #根据不同disease分组通过ANOVA比较各组间cc的均值差异
summary(fit.glu) #与上一语句相关,列出ANOVA结果
## Df Sum Sq Mean Sq F value Pr(>F)
## disease 4 139 34.9 14.5 4.6e-10 ***
## Residuals 152 365 2.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit.glu) #通过TukeyHSD函数观察各组间比较
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Glu ~ disease)
##
## $disease
## diff lwr upr p adj
## GBS-Enc -0.2016 -1.3890 0.9859 0.9900
## MC-Enc -1.3782 -2.4995 -0.2568 0.0077
## Men-Enc -1.9296 -2.8427 -1.0165 0.0000
## NCG-Enc 0.4057 -0.7461 1.5576 0.8672
## MC-GBS -1.1766 -2.4720 0.1188 0.0943
## Men-GBS -1.7280 -2.8480 -0.6081 0.0003
## NCG-GBS 0.6073 -0.7146 1.9292 0.7109
## Men-MC -0.5514 -1.6011 0.4982 0.5962
## NCG-MC 1.7839 0.5210 3.0468 0.0013
## NCG-Men 2.3353 1.2532 3.4175 0.0000
# plot(TukeyHSD(fit.glu))
# #绘制前述函数的结果plot,显示95%CL的组间比较结果图
library(multcomp) #引入multcomp包,以便使用glht函数,简介明了显示各组间cc比较差异,注意mcp中的disease的使用
par(mar = c(5, 4, 6, 2))
tuk.glu <- glht(fit.glu, linfct = mcp(disease = "Tukey")) #disease=Tukey表明是根据disease分组
plot(cld(tuk.glu, level = 0.05), col = "lightgrey", ylab = "GLU(mmol/L)")
# FILE<-'gbscc'
# #将gbscc.md这个文件转换为FILE目标,便于下一语句利用其生成同名docx文档
# system(paste0('pandoc -o ', FILE, '.docx ', FILE, '.md'))