setwd("e:\\cystatin.c in gbs")
load("gbs.Rdata")
options(digit = 3)  #显示2位小数点数字
attach(gbsdata)
# 下列部分创建一个根据disease分组比较cystatin c 的箱图
boxplot(cystatin.c ~ disease, col = "burlywood1", cex.axis = 0.75, notch = TRUE, 
    varwidth = TRUE, main = "Figue 1.Levels of CSF cystatin c in defferent groups", 
    xlab = "Groups", ylab = "CSF cystatin c(mg/l)", cex.lab = 0.8, cex.main = 1.2, 
    axes = FALSE)  #去除默认的坐标轴，然后通过以下语句重新建立坐标轴)
axis(2, at = 0:14, labels = 0:14)
sor.Dis = length(levels(disease)) + 1
name.Dis = levels(disease)
labe.Dis = c("0", name.Dis, "0")
# labe.Dis
axis(1, at = 0:sor.Dis, labels = labe.Dis)

plot of chunk unnamed-chunk-1


table(disease)  #创建与disease这个变量相关的列联表，即列出disease的分类数

## disease
## Enc GBS  MC Men NCG 
##  37  20  24  54  22


# 各疾病组cystatin.c的均值和SD值列表
a.mean <- aggregate(cystatin.c, by = list(disease), FUN = mean)  #根据disease分类列出不同disease的cystatin c的均值
b.sd <- aggregate(cystatin.c, by = list(disease), FUN = sd)  #同上，列出SD值
names(a.mean)[2] <- "mean"
names(b.sd)[2] <- "sd"
merge(a.mean, b.sd, by = "Group.1")

##   Group.1  mean     sd
## 1     Enc 3.147 0.9344
## 2     GBS 3.744 0.9680
## 3      MC 3.373 1.2792
## 4     Men 4.874 1.2633
## 5     NCG 5.398 1.1481

fit.cc <- aov(cystatin.c ~ disease)  #根据不同disease分组通过ANOVA比较各组间cc的均值差异
summary(fit.cc)  #与上一语句相关，列出ANOVA结果

##              Df Sum Sq Mean Sq F value  Pr(>F)    
## disease       4    118   29.39    22.4 1.4e-14 ***
## Residuals   152    199    1.31                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(fit.cc)  #通过TukeyHSD函数观察各组间比较

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cystatin.c ~ disease)
## 
## $disease
##            diff     lwr    upr  p adj
## GBS-Enc  0.5970 -0.2800 1.4740 0.3329
## MC-Enc   0.2255 -0.6027 1.0537 0.9438
## Men-Enc  1.7274  1.0530 2.4018 0.0000
## NCG-Enc  2.2507  1.4000 3.1014 0.0000
## MC-GBS  -0.3715 -1.3282 0.5852 0.8206
## Men-GBS  1.1304  0.3033 1.9576 0.0021
## NCG-GBS  1.6537  0.6774 2.6300 0.0001
## Men-MC   1.5019  0.7267 2.2772 0.0000
## NCG-MC   2.0252  1.0925 2.9579 0.0000
## NCG-Men  0.5233 -0.2760 1.3225 0.3730

# plot(TukeyHSD(fit.cc))
# #绘制前述函数的结果plot，显示95%CL的组间比较结果图
library(multcomp)  #引入multcomp包，以便使用glht函数，简介明了显示各组间cc比较差异，注意mcp中的disease的使用

## Loading required package: mvtnorm

## Loading required package: survival

## Loading required package: splines

par(mar = c(5, 4, 6, 2))
# 下一语句将cystatin c临时根据四分位分组
cc.cut <- cut(cystatin.c, breaks = c(quantile(cystatin.c, 0) - 0.01, quantile(cystatin.c, 
    0.25), quantile(cystatin.c, 0.5), quantile(cystatin.c, 0.75), max(cystatin.c)))
xtabs(~disease + cc.cut)  #创建disease和cc.cut相关列联表,按实际数字显示

##        cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
##     Enc          18          12           6           1
##     GBS           6           8           4           2
##     MC           11           5           6           2
##     Men           5          11          16          22
##     NCG           0           3           7          12

chisq.test(xtabs(~disease + cc.cut))  #独立性检验

## Warning: Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  xtabs(~disease + cc.cut)
## X-squared = 54.69, df = 12, p-value = 2.054e-07

prop.table(xtabs(~disease + cc.cut), 1) * 100  #创建disease和cc.cut相关列联表，按行百分比显示

##        cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
##     Enc      48.649      32.432      16.216       2.703
##     GBS      30.000      40.000      20.000      10.000
##     MC       45.833      20.833      25.000       8.333
##     Men       9.259      20.370      29.630      40.741
##     NCG       0.000      13.636      31.818      54.545

prop.table(xtabs(~disease + cc.cut), 2) * 100  #创建disease和cc.cut相关列联表，按列百分比显示

##        cc.cut
## disease (1.57,3.08] (3.08,4.11] (4.11,5.17] (5.17,8.23]
##     Enc      45.000      30.769      15.385       2.564
##     GBS      15.000      20.513      10.256       5.128
##     MC       27.500      12.821      15.385       5.128
##     Men      12.500      28.205      41.026      56.410
##     NCG       0.000       7.692      17.949      30.769

library(vcd)  #引入vcd包，创建分布图，使用assocstats和spine函数，前者估计相关性，后者绘制分布图

## Loading required package: MASS

## Loading required package: grid

## Loading required package: colorspace

assocstats(xtabs(~disease + cc.cut))

##                     X^2 df   P(> X^2)
## Likelihood Ratio 62.876 12 6.7173e-09
## Pearson          54.694 12 2.0537e-07
## 
## Phi-Coefficient   : 0.59 
## Contingency Coeff.: 0.508 
## Cramer's V        : 0.341

spine(table(disease, cc.cut), main = "Figue 2. The distribution of CSF cystatin c among different groups")

plot of chunk unnamed-chunk-1


tuk.cc <- glht(fit.cc, linfct = mcp(disease = "Tukey"))  #disease=Tukey表明是根据disease分组
plot(cld(tuk.cc, level = 0.05), col = "lightgrey", ylab = "CSF cystatin c(mg/L)")

plot of chunk unnamed-chunk-1


# 下面语句将cystatin c按二分位将之分为2部分，原因基于前述四分位结果
cc.cut <- cut(cystatin.c, breaks = c(quantile(cystatin.c, 0) - 0.01, quantile(cystatin.c, 
    0.5), max(cystatin.c)))
xtabs(~disease + cc.cut)  #创建disease和cc.cut相关列联表,按实际数字显示

##        cc.cut
## disease (1.57,4.11] (4.11,8.23]
##     Enc          30           7
##     GBS          14           6
##     MC           16           8
##     Men          16          38
##     NCG           3          19

chisq.test(xtabs(~disease + cc.cut))  #独立性检验

## 
##  Pearson's Chi-squared test
## 
## data:  xtabs(~disease + cc.cut)
## X-squared = 40.76, df = 4, p-value = 3.016e-08

prop.table(xtabs(~disease + cc.cut), 1) * 100  #创建disease和cc.cut相关列联表，按行百分比显示

##        cc.cut
## disease (1.57,4.11] (4.11,8.23]
##     Enc       81.08       18.92
##     GBS       70.00       30.00
##     MC        66.67       33.33
##     Men       29.63       70.37
##     NCG       13.64       86.36

prop.table(xtabs(~disease + cc.cut), 2) * 100  #创建disease和cc.cut相关列联表，按列百分比显示

##        cc.cut
## disease (1.57,4.11] (4.11,8.23]
##     Enc      37.975       8.974
##     GBS      17.722       7.692
##     MC       20.253      10.256
##     Men      20.253      48.718
##     NCG       3.797      24.359



# 各疾病组GLU的均值和SD值列表
a.mean <- aggregate(Glu, by = list(disease), FUN = mean)  #根据disease分类列出不同disease的cystatin c的均值
b.sd <- aggregate(Glu, by = list(disease), FUN = sd)  #同上，列出SD值
names(a.mean)[2] <- "mean"
names(b.sd)[2] <- "sd"
merge(a.mean, b.sd, by = "Group.1")

##   Group.1  mean     sd
## 1     Enc 4.271 1.5321
## 2     GBS 4.069 0.6749
## 3      MC 2.893 1.4741
## 4     Men 2.341 1.0417
## 5     NCG 4.677 2.7980

fit.glu <- aov(Glu ~ disease)  #根据不同disease分组通过ANOVA比较各组间cc的均值差异
summary(fit.glu)  #与上一语句相关，列出ANOVA结果

##              Df Sum Sq Mean Sq F value  Pr(>F)    
## disease       4    139    34.9    14.5 4.6e-10 ***
## Residuals   152    365     2.4                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(fit.glu)  #通过TukeyHSD函数观察各组间比较

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Glu ~ disease)
## 
## $disease
##            diff     lwr     upr  p adj
## GBS-Enc -0.2016 -1.3890  0.9859 0.9900
## MC-Enc  -1.3782 -2.4995 -0.2568 0.0077
## Men-Enc -1.9296 -2.8427 -1.0165 0.0000
## NCG-Enc  0.4057 -0.7461  1.5576 0.8672
## MC-GBS  -1.1766 -2.4720  0.1188 0.0943
## Men-GBS -1.7280 -2.8480 -0.6081 0.0003
## NCG-GBS  0.6073 -0.7146  1.9292 0.7109
## Men-MC  -0.5514 -1.6011  0.4982 0.5962
## NCG-MC   1.7839  0.5210  3.0468 0.0013
## NCG-Men  2.3353  1.2532  3.4175 0.0000

# plot(TukeyHSD(fit.glu))
# #绘制前述函数的结果plot，显示95%CL的组间比较结果图
library(multcomp)  #引入multcomp包，以便使用glht函数，简介明了显示各组间cc比较差异，注意mcp中的disease的使用
par(mar = c(5, 4, 6, 2))
tuk.glu <- glht(fit.glu, linfct = mcp(disease = "Tukey"))  #disease=Tukey表明是根据disease分组
plot(cld(tuk.glu, level = 0.05), col = "lightgrey", ylab = "GLU(mmol/L)")

plot of chunk unnamed-chunk-1


# FILE<-'gbscc'
# #将gbscc.md这个文件转换为FILE目标，便于下一语句利用其生成同名docx文档
# system(paste0('pandoc -o ', FILE, '.docx ', FILE, '.md'))