table1 package
https://cran.r-project.org/web/packages/table1/vignettes/table1-examples.html
https://www.bioinfo-scrounger.com/archives/855/
1.特征表
例1
library(boot)
library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.7 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
melanoma2 <- melanoma
melanoma2$status <-
factor(melanoma2$status,
levels = c(2,1,3),
labels = c("Alive",#reference 注意顺序
"Melanoma death",
"Non-melanoma death"))利用table1 执行操作:
library(table1)##
## 载入程辑包:'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~factor(sex)+age+factor(ulcer)+thickness|status,data = melanoma2)| Alive (N=134) |
Melanoma death (N=57) |
Non-melanoma death (N=14) |
Overall (N=205) |
|
|---|---|---|---|---|
| factor(sex) | ||||
| 0 | 91 (67.9%) | 28 (49.1%) | 7 (50.0%) | 126 (61.5%) |
| 1 | 43 (32.1%) | 29 (50.9%) | 7 (50.0%) | 79 (38.5%) |
| age | ||||
| Mean (SD) | 50.0 (15.9) | 55.1 (17.9) | 65.3 (10.9) | 52.5 (16.7) |
| Median [Min, Max] | 52.0 [4.00, 84.0] | 56.0 [14.0, 95.0] | 65.0 [49.0, 86.0] | 54.0 [4.00, 95.0] |
| factor(ulcer) | ||||
| 0 | 92 (68.7%) | 16 (28.1%) | 7 (50.0%) | 115 (56.1%) |
| 1 | 42 (31.3%) | 41 (71.9%) | 7 (50.0%) | 90 (43.9%) |
| thickness | ||||
| Mean (SD) | 2.24 (2.33) | 4.31 (3.57) | 3.72 (3.63) | 2.92 (2.96) |
| Median [Min, Max] | 1.36 [0.100, 12.9] | 3.54 [0.320, 17.4] | 2.26 [0.160, 12.6] | 1.94 [0.100, 17.4] |
我们会发现,有些分类变量的生成不是很完美,当然可以在excel里面手动修改。其实原因在于没有很好的变量标签和类别,为连续性变量增加单位;把总计一列放到最左侧,。如何实现呢?
melanoma2$sex <-
factor(melanoma2$sex, levels=c(1,0),
labels=c("Male",
"Female"))
melanoma2$ulcer <-
factor(melanoma2$ulcer, levels=c(0,1),
labels=c("Absent",
"Present"))
label(melanoma2$sex) <- "Sex"
label(melanoma2$age) <- "Age"
label(melanoma2$ulcer) <- "Ulceration"
label(melanoma2$thickness) <- "Thicknessᵃ"
units(melanoma2$age) <- "years"
units(melanoma2$thickness) <- "mm"
caption <- "Basic stats"
footnote <- "ᵃ Also known as Breslow thickness"
table1(~ sex + age + ulcer + thickness | status, data=melanoma2,
overall=c(left="Total"), caption=caption, footnote=footnote)| Total (N=205) |
Alive (N=134) |
Melanoma death (N=57) |
Non-melanoma death (N=14) |
|
|---|---|---|---|---|
Also known as Breslow thickness | ||||
| Sex | ||||
| Male | 79 (38.5%) | 43 (32.1%) | 29 (50.9%) | 7 (50.0%) |
| Female | 126 (61.5%) | 91 (67.9%) | 28 (49.1%) | 7 (50.0%) |
| Age (years) | ||||
| Mean (SD) | 52.5 (16.7) | 50.0 (15.9) | 55.1 (17.9) | 65.3 (10.9) |
| Median [Min, Max] | 54.0 [4.00, 95.0] | 52.0 [4.00, 84.0] | 56.0 [14.0, 95.0] | 65.0 [49.0, 86.0] |
| Ulceration | ||||
| Absent | 115 (56.1%) | 92 (68.7%) | 16 (28.1%) | 7 (50.0%) |
| Present | 90 (43.9%) | 42 (31.3%) | 41 (71.9%) | 7 (50.0%) |
| Thickness (mm) | ||||
| Mean (SD) | 2.92 (2.96) | 2.24 (2.33) | 4.31 (3.57) | 3.72 (3.63) |
| Median [Min, Max] | 1.94 [0.100, 17.4] | 1.36 [0.100, 12.9] | 3.54 [0.320, 17.4] | 2.26 [0.160, 12.6] |
2.新的问题 两个“死亡”层(黑色素瘤和非黑色素瘤)应该是 在一个共同的标题下分组;连续变量年龄 和厚度仅显示平均值 (SD)(带±),而不显示中位数 [最小值、最大值] ; 使用两个有效数字而不是三个。我们需要进一步自定义输出 首先,我们使用列表以不同的方式设置标签:
labels <- list(
variables=list(sex="Sex",
age="Age (years)",
ulcer="Ulceration",
thickness="Thicknessᵃ (mm)"),
groups=list("", "", "Death"))
# 将"death"这个词从标签中移除, 因为上面groups定义了
levels(melanoma2$status) <- c("Alive", "Melanoma", "Non-melanoma")接下来,我们定义分层或者说是列,按照我们希望的顺序显示
strata <- c(list(Total=melanoma2), split(melanoma2, melanoma2$status))最后,我们可以使用自定义渲染器自定义内容。一个 自定义渲染可以是将向量作为第一个参数的函数 并返回一个(命名的)字符向量。还有一种更简单的方法 使用缩写代码语法而不是 一个渲染函数,但它允许较少控制舍入(见下文)。 例如,在这里,我们为连续和分类变量如下
#连续变量
my.render.cont <- function(x) {
with(stats.apply.rounding(stats.default(x), digits=2), c("",
"Mean (SD)"=sprintf("%s (± %s)", MEAN, SD)))
}
#分类变量
my.render.cat <- function(x) {
c("", sapply(stats.default(x), function(y) with(y,
sprintf("%d (%0.0f %%)", FREQ, PCT))))
}展示结果
table1(strata, labels, groupspan=c(1, 1, 2), caption=caption, footnote=footnote,
render.continuous=my.render.cont, render.categorical=my.render.cat)Death |
||||
|---|---|---|---|---|
| Total (N=205) |
Alive (N=134) |
Melanoma (N=57) |
Non-melanoma (N=14) |
|
Also known as Breslow thickness | ||||
| Sex | ||||
| Male | 79 (39 %) | 43 (32 %) | 29 (51 %) | 7 (50 %) |
| Female | 126 (61 %) | 91 (68 %) | 28 (49 %) | 7 (50 %) |
| Age (years) | ||||
| Mean (SD) | 52 (± 17) | 50 (± 16) | 55 (± 18) | 65 (± 11) |
| Ulceration | ||||
| Absent | 115 (56 %) | 92 (69 %) | 16 (28 %) | 7 (50 %) |
| Present | 90 (44 %) | 42 (31 %) | 41 (72 %) | 7 (50 %) |
| Thickness (mm) | ||||
| Mean (SD) | 2.9 (± 3.0) | 2.2 (± 2.3) | 4.3 (± 3.6) | 3.7 (± 3.6) |
2.p和统计量.
pvalue <- function(x, ...) {
# 构建数据y的向量,和分组变量g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
# 数值型变量,采用标准的t检验(两组比较)
p <- t.test(y ~ g)$p.value
} else {
# 分类变量,此案用卡方检验
p <- chisq.test(table(y, g))$p.value
}
# Format the p-value, using an HTML entity for the less-than sign.
# The initial empty string places the output on the line below the variable label.
c("", sub("<", "<", format.pval(p, digits=3, eps=0.001)))
}
#统计量
stats <- function(x, ...) {
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
s <- t.test(y ~ g)$statistic
} else {
s <- chisq.test(table(y, g))$statistic
}
c("", sprintf("%.4f", s))
}用两组的进行分析
library(MatchIt)
data(lalonde)
lalonde$treat <- factor(lalonde$treat, levels=c(0, 1), labels=c("Control", "Treatment"))
lalonde$married <- as.logical(lalonde$married == 1)
lalonde$nodegree <- as.logical(lalonde$nodegree == 1)
lalonde$race <- factor(lalonde$race, levels=c("white", "black", "hispan"),
labels=c("White", "Black", "Hispanic"))
label(lalonde$race) <- "Race"
label(lalonde$married) <- "Married"
label(lalonde$nodegree) <- "No high school diploma"
label(lalonde$age) <- "Age"
label(lalonde$re74) <- "1974 Income"
label(lalonde$re75) <- "1975 Income"
label(lalonde$re78) <- "1978 Income"
units(lalonde$age) <- "years"结果显示
table1(~ age + race + married + nodegree + re74 + re75 + re78 | treat,
data=lalonde, overall=F, extra.col=list(`P-value`=pvalue, `statistic`=stats))| Control (N=429) |
Treatment (N=185) |
P-value | statistic | |
|---|---|---|---|---|
| Age (years) | ||||
| Mean (SD) | 28.0 (10.8) | 25.8 (7.16) | 0.00291 | 2.9911 |
| Median [Min, Max] | 25.0 [16.0, 55.0] | 25.0 [17.0, 48.0] | ||
| Race | ||||
| White | 281 (65.5%) | 18 (9.7%) | <0.001 | 224.0708 |
| Black | 87 (20.3%) | 156 (84.3%) | ||
| Hispanic | 61 (14.2%) | 11 (5.9%) | ||
| Married | ||||
| Yes | 220 (51.3%) | 35 (18.9%) | <0.001 | 54.4276 |
| No | 209 (48.7%) | 150 (81.1%) | ||
| No high school diploma | ||||
| Yes | 256 (59.7%) | 131 (70.8%) | 0.0113 | 6.4107 |
| No | 173 (40.3%) | 54 (29.2%) | ||
| 1974 Income | ||||
| Mean (SD) | 5620 (6790) | 2100 (4890) | <0.001 | 7.2456 |
| Median [Min, Max] | 2550 [0, 25900] | 0 [0, 35000] | ||
| 1975 Income | ||||
| Mean (SD) | 2470 (3290) | 1530 (3220) | 0.00115 | 3.2776 |
| Median [Min, Max] | 1090 [0, 18300] | 0 [0, 25100] | ||
| 1978 Income | ||||
| Mean (SD) | 6980 (7290) | 6350 (7870) | 0.349 | 0.9377 |
| Median [Min, Max] | 4980 [0, 25600] | 4230 [0, 60300] |
x <- table1(~ age + race + married + nodegree + re74 + re75 + re78 | treat,
data=lalonde, overall=F, extra.col=list(`P-value`=pvalue, `statistic`=stats))
as.data.frame(x)## Control Treatment P-value
## 1 (N=429) (N=185)
## 2 Age (years)
## 3 Mean (SD) 28.0 (10.8) 25.8 (7.16) 0.00291
## 4 Median [Min, Max] 25.0 [16.0, 55.0] 25.0 [17.0, 48.0]
## 5 Race
## 6 White 281 (65.5%) 18 (9.7%) <0.001
## 7 Black 87 (20.3%) 156 (84.3%)
## 8 Hispanic 61 (14.2%) 11 (5.9%)
## 9 Married
## 10 Yes 220 (51.3%) 35 (18.9%) <0.001
## 11 No 209 (48.7%) 150 (81.1%)
## 12 No high school diploma
## 13 Yes 256 (59.7%) 131 (70.8%) 0.0113
## 14 No 173 (40.3%) 54 (29.2%)
## 15 1974 Income
## 16 Mean (SD) 5620 (6790) 2100 (4890) <0.001
## 17 Median [Min, Max] 2550 [0, 25900] 0 [0, 35000]
## 18 1975 Income
## 19 Mean (SD) 2470 (3290) 1530 (3220) 0.00115
## 20 Median [Min, Max] 1090 [0, 18300] 0 [0, 25100]
## 21 1978 Income
## 22 Mean (SD) 6980 (7290) 6350 (7870) 0.349
## 23 Median [Min, Max] 4980 [0, 25600] 4230 [0, 60300]
## statistic
## 1
## 2
## 3 2.9911
## 4
## 5
## 6 224.0708
## 7
## 8
## 9
## 10 54.4276
## 11
## 12
## 13 6.4107
## 14
## 15
## 16 7.2456
## 17
## 18
## 19 3.2776
## 20
## 21
## 22 0.9377
## 23
knitr::kable(as.data.frame(x),booktabs=TRUE)| Control | Treatment | P-value | statistic | |
|---|---|---|---|---|
| (N=429) | (N=185) | |||
| Age (years) | ||||
| Mean (SD) | 28.0 (10.8) | 25.8 (7.16) | 0.00291 | 2.9911 |
| Median [Min, Max] | 25.0 [16.0, 55.0] | 25.0 [17.0, 48.0] | ||
| Race | ||||
| White | 281 (65.5%) | 18 (9.7%) | <0.001 | 224.0708 |
| Black | 87 (20.3%) | 156 (84.3%) | ||
| Hispanic | 61 (14.2%) | 11 (5.9%) | ||
| Married | ||||
| Yes | 220 (51.3%) | 35 (18.9%) | <0.001 | 54.4276 |
| No | 209 (48.7%) | 150 (81.1%) | ||
| No high school diploma | ||||
| Yes | 256 (59.7%) | 131 (70.8%) | 0.0113 | 6.4107 |
| No | 173 (40.3%) | 54 (29.2%) | ||
| 1974 Income | ||||
| Mean (SD) | 5620 (6790) | 2100 (4890) | <0.001 | 7.2456 |
| Median [Min, Max] | 2550 [0, 25900] | 0 [0, 35000] | ||
| 1975 Income | ||||
| Mean (SD) | 2470 (3290) | 1530 (3220) | 0.00115 | 3.2776 |
| Median [Min, Max] | 1090 [0, 18300] | 0 [0, 25100] | ||
| 1978 Income | ||||
| Mean (SD) | 6980 (7290) | 6350 (7870) | 0.349 | 0.9377 |
| Median [Min, Max] | 4980 [0, 25600] | 4230 [0, 60300] |
t1kable(x)## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
| Control | Treatment | P-value | statistic | |
|---|---|---|---|---|
| (N=429) | (N=185) | |||
| Age (years) | ||||
| Mean (SD) | 28.0 (10.8) | 25.8 (7.16) | 0.00291 | 2.9911 |
| Median [Min, Max] | 25.0 [16.0, 55.0] | 25.0 [17.0, 48.0] | ||
| Race | ||||
| White | 281 (65.5%) | 18 (9.7%) | <0.001 | 224.0708 |
| Black | 87 (20.3%) | 156 (84.3%) | ||
| Hispanic | 61 (14.2%) | 11 (5.9%) | ||
| Married | ||||
| Yes | 220 (51.3%) | 35 (18.9%) | <0.001 | 54.4276 |
| No | 209 (48.7%) | 150 (81.1%) | ||
| No high school diploma | ||||
| Yes | 256 (59.7%) | 131 (70.8%) | 0.0113 | 6.4107 |
| No | 173 (40.3%) | 54 (29.2%) | ||
| 1974 Income | ||||
| Mean (SD) | 5620 (6790) | 2100 (4890) | <0.001 | 7.2456 |
| Median [Min, Max] | 2550 [0, 25900] | 0 [0, 35000] | ||
| 1975 Income | ||||
| Mean (SD) | 2470 (3290) | 1530 (3220) | 0.00115 | 3.2776 |
| Median [Min, Max] | 1090 [0, 18300] | 0 [0, 25100] | ||
| 1978 Income | ||||
| Mean (SD) | 6980 (7290) | 6350 (7870) | 0.349 | 0.9377 |
| Median [Min, Max] | 4980 [0, 25600] | 4230 [0, 60300] | ||