table1 package

https://cran.r-project.org/web/packages/table1/vignettes/table1-examples.html

https://www.bioinfo-scrounger.com/archives/855/

1.特征表

例1

library(boot)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.7     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

melanoma2 <- melanoma
melanoma2$status <- 
  factor(melanoma2$status,
         levels = c(2,1,3),
         labels = c("Alive",#reference 注意顺序
                    "Melanoma death",
                    "Non-melanoma death"))

利用table1 执行操作：

library(table1)

## 
## 载入程辑包：'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table1(~factor(sex)+age+factor(ulcer)+thickness|status,data = melanoma2)

	Alive (N=134)	Melanoma death (N=57)	Non-melanoma death (N=14)	Overall (N=205)
factor(sex)
0	91 (67.9%)	28 (49.1%)	7 (50.0%)	126 (61.5%)
1	43 (32.1%)	29 (50.9%)	7 (50.0%)	79 (38.5%)
age
Mean (SD)	50.0 (15.9)	55.1 (17.9)	65.3 (10.9)	52.5 (16.7)
Median [Min, Max]	52.0 [4.00, 84.0]	56.0 [14.0, 95.0]	65.0 [49.0, 86.0]	54.0 [4.00, 95.0]
factor(ulcer)
0	92 (68.7%)	16 (28.1%)	7 (50.0%)	115 (56.1%)
1	42 (31.3%)	41 (71.9%)	7 (50.0%)	90 (43.9%)
thickness
Mean (SD)	2.24 (2.33)	4.31 (3.57)	3.72 (3.63)	2.92 (2.96)
Median [Min, Max]	1.36 [0.100, 12.9]	3.54 [0.320, 17.4]	2.26 [0.160, 12.6]	1.94 [0.100, 17.4]

我们会发现，有些分类变量的生成不是很完美，当然可以在excel里面手动修改。其实原因在于没有很好的变量标签和类别，为连续性变量增加单位；把总计一列放到最左侧，。如何实现呢？

melanoma2$sex <- 
  factor(melanoma2$sex, levels=c(1,0),
         labels=c("Male", 
                  "Female"))
 
melanoma2$ulcer <- 
  factor(melanoma2$ulcer, levels=c(0,1),
         labels=c("Absent", 
                  "Present"))

label(melanoma2$sex)       <- "Sex"
label(melanoma2$age)       <- "Age"
label(melanoma2$ulcer)     <- "Ulceration"
label(melanoma2$thickness) <- "Thicknessᵃ"

units(melanoma2$age)       <- "years"
units(melanoma2$thickness) <- "mm"

caption  <- "Basic stats"
footnote <- "ᵃ Also known as Breslow thickness"

table1(~ sex + age + ulcer + thickness | status, data=melanoma2,
    overall=c(left="Total"), caption=caption, footnote=footnote)

Basic stats
	Total (N=205)	Alive (N=134)	Melanoma death (N=57)	Non-melanoma death (N=14)
Also known as Breslow thickness
Sex
Male	79 (38.5%)	43 (32.1%)	29 (50.9%)	7 (50.0%)
Female	126 (61.5%)	91 (67.9%)	28 (49.1%)	7 (50.0%)
Age (years)
Mean (SD)	52.5 (16.7)	50.0 (15.9)	55.1 (17.9)	65.3 (10.9)
Median [Min, Max]	54.0 [4.00, 95.0]	52.0 [4.00, 84.0]	56.0 [14.0, 95.0]	65.0 [49.0, 86.0]
Ulceration
Absent	115 (56.1%)	92 (68.7%)	16 (28.1%)	7 (50.0%)
Present	90 (43.9%)	42 (31.3%)	41 (71.9%)	7 (50.0%)
Thickness (mm)
Mean (SD)	2.92 (2.96)	2.24 (2.33)	4.31 (3.57)	3.72 (3.63)
Median [Min, Max]	1.94 [0.100, 17.4]	1.36 [0.100, 12.9]	3.54 [0.320, 17.4]	2.26 [0.160, 12.6]

2.新的问题两个“死亡”层（黑色素瘤和非黑色素瘤）应该是在一个共同的标题下分组;连续变量年龄和厚度仅显示平均值（SD）（带±），而不显示中位数 [最小值、最大值] ; 使用两个有效数字而不是三个。我们需要进一步自定义输出首先，我们使用列表以不同的方式设置标签：

labels <- list(
    variables=list(sex="Sex",
                   age="Age (years)",
                   ulcer="Ulceration",
                   thickness="Thicknessᵃ (mm)"),
    groups=list("", "", "Death"))

# 将"death"这个词从标签中移除, 因为上面groups定义了
levels(melanoma2$status) <- c("Alive", "Melanoma", "Non-melanoma")

接下来，我们定义分层或者说是列，按照我们希望的顺序显示

strata <- c(list(Total=melanoma2), split(melanoma2, melanoma2$status))

最后，我们可以使用自定义渲染器自定义内容。一个自定义渲染可以是将向量作为第一个参数的函数并返回一个（命名的）字符向量。还有一种更简单的方法使用缩写代码语法而不是一个渲染函数，但它允许较少控制舍入（见下文）。例如，在这里，我们为连续和分类变量如下

#连续变量
my.render.cont <- function(x) {
    with(stats.apply.rounding(stats.default(x), digits=2), c("",
        "Mean (SD)"=sprintf("%s (&plusmn; %s)", MEAN, SD)))
}

#分类变量
my.render.cat <- function(x) {
    c("", sapply(stats.default(x), function(y) with(y,
        sprintf("%d (%0.0f %%)", FREQ, PCT))))
}

展示结果

table1(strata, labels, groupspan=c(1, 1, 2), caption=caption, footnote=footnote,
       render.continuous=my.render.cont, render.categorical=my.render.cat)

Basic stats
			Death
	Total (N=205)	Alive (N=134)	Melanoma (N=57)	Non-melanoma (N=14)
Also known as Breslow thickness
Sex
Male	79 (39 %)	43 (32 %)	29 (51 %)	7 (50 %)
Female	126 (61 %)	91 (68 %)	28 (49 %)	7 (50 %)
Age (years)
Mean (SD)	52 (± 17)	50 (± 16)	55 (± 18)	65 (± 11)
Ulceration
Absent	115 (56 %)	92 (69 %)	16 (28 %)	7 (50 %)
Present	90 (44 %)	42 (31 %)	41 (72 %)	7 (50 %)
Thickness (mm)
Mean (SD)	2.9 (± 3.0)	2.2 (± 2.3)	4.3 (± 3.6)	3.7 (± 3.6)

2.p和统计量.

pvalue <- function(x, ...) {
  # 构建数据y的向量，和分组变量g 
  y <- unlist(x)
  g <- factor(rep(1:length(x), times=sapply(x, length)))
  if (is.numeric(y)) {
    # 数值型变量，采用标准的t检验（两组比较）
    p <- t.test(y ~ g)$p.value
  } else {
    # 分类变量，此案用卡方检验
    p <- chisq.test(table(y, g))$p.value
  }
  # Format the p-value, using an HTML entity for the less-than sign.
  # The initial empty string places the output on the line below the variable label.
  c("", sub("<", "&lt;", format.pval(p, digits=3, eps=0.001)))
}

#统计量
stats <- function(x, ...) {
  y <- unlist(x)
  g <- factor(rep(1:length(x), times=sapply(x, length)))
  if (is.numeric(y)) {
    s <- t.test(y ~ g)$statistic
  } else {
    s <- chisq.test(table(y, g))$statistic
  }
  c("", sprintf("%.4f", s))
}

用两组的进行分析

library(MatchIt)

data(lalonde)

lalonde$treat    <- factor(lalonde$treat, levels=c(0, 1), labels=c("Control", "Treatment"))
lalonde$married  <- as.logical(lalonde$married == 1)
lalonde$nodegree <- as.logical(lalonde$nodegree == 1)
lalonde$race     <- factor(lalonde$race, levels=c("white", "black", "hispan"),
                           labels=c("White", "Black", "Hispanic"))

label(lalonde$race)     <- "Race"
label(lalonde$married)  <- "Married"
label(lalonde$nodegree) <- "No high school diploma"
label(lalonde$age)      <- "Age"
label(lalonde$re74)     <- "1974 Income"
label(lalonde$re75)     <- "1975 Income"
label(lalonde$re78)     <- "1978 Income"
units(lalonde$age)      <- "years"

结果显示

table1(~ age + race + married + nodegree + re74 + re75 + re78 | treat,
       data=lalonde, overall=F, extra.col=list(`P-value`=pvalue, `statistic`=stats))

	Control (N=429)	Treatment (N=185)	P-value	statistic
Age (years)
Mean (SD)	28.0 (10.8)	25.8 (7.16)	0.00291	2.9911
Median [Min, Max]	25.0 [16.0, 55.0]	25.0 [17.0, 48.0]
Race
White	281 (65.5%)	18 (9.7%)	<0.001	224.0708
Black	87 (20.3%)	156 (84.3%)
Hispanic	61 (14.2%)	11 (5.9%)
Married
Yes	220 (51.3%)	35 (18.9%)	<0.001	54.4276
No	209 (48.7%)	150 (81.1%)
No high school diploma
Yes	256 (59.7%)	131 (70.8%)	0.0113	6.4107
No	173 (40.3%)	54 (29.2%)
1974 Income
Mean (SD)	5620 (6790)	2100 (4890)	<0.001	7.2456
Median [Min, Max]	2550 [0, 25900]	0 [0, 35000]
1975 Income
Mean (SD)	2470 (3290)	1530 (3220)	0.00115	3.2776
Median [Min, Max]	1090 [0, 18300]	0 [0, 25100]
1978 Income
Mean (SD)	6980 (7290)	6350 (7870)	0.349	0.9377
Median [Min, Max]	4980 [0, 25600]	4230 [0, 60300]

x <- table1(~ age + race + married + nodegree + re74 + re75 + re78 | treat,
       data=lalonde, overall=F, extra.col=list(`P-value`=pvalue, `statistic`=stats))
as.data.frame(x)

##                                     Control         Treatment   P-value
## 1                                   (N=429)           (N=185)          
## 2             Age (years)                                              
## 3               Mean (SD)       28.0 (10.8)       25.8 (7.16)   0.00291
## 4       Median [Min, Max] 25.0 [16.0, 55.0] 25.0 [17.0, 48.0]          
## 5                    Race                                              
## 6                   White       281 (65.5%)         18 (9.7%) &lt;0.001
## 7                   Black        87 (20.3%)       156 (84.3%)          
## 8                Hispanic        61 (14.2%)         11 (5.9%)          
## 9                 Married                                              
## 10                    Yes       220 (51.3%)        35 (18.9%) &lt;0.001
## 11                     No       209 (48.7%)       150 (81.1%)          
## 12 No high school diploma                                              
## 13                    Yes       256 (59.7%)       131 (70.8%)    0.0113
## 14                     No       173 (40.3%)        54 (29.2%)          
## 15            1974 Income                                              
## 16              Mean (SD)       5620 (6790)       2100 (4890) &lt;0.001
## 17      Median [Min, Max]   2550 [0, 25900]      0 [0, 35000]          
## 18            1975 Income                                              
## 19              Mean (SD)       2470 (3290)       1530 (3220)   0.00115
## 20      Median [Min, Max]   1090 [0, 18300]      0 [0, 25100]          
## 21            1978 Income                                              
## 22              Mean (SD)       6980 (7290)       6350 (7870)     0.349
## 23      Median [Min, Max]   4980 [0, 25600]   4230 [0, 60300]          
##    statistic
## 1           
## 2           
## 3     2.9911
## 4           
## 5           
## 6   224.0708
## 7           
## 8           
## 9           
## 10   54.4276
## 11          
## 12          
## 13    6.4107
## 14          
## 15          
## 16    7.2456
## 17          
## 18          
## 19    3.2776
## 20          
## 21          
## 22    0.9377
## 23

knitr::kable(as.data.frame(x),booktabs=TRUE)

	Control	Treatment	P-value	statistic
	(N=429)	(N=185)
Age (years)
Mean (SD)	28.0 (10.8)	25.8 (7.16)	0.00291	2.9911
Median [Min, Max]	25.0 [16.0, 55.0]	25.0 [17.0, 48.0]
Race
White	281 (65.5%)	18 (9.7%)	<0.001	224.0708
Black	87 (20.3%)	156 (84.3%)
Hispanic	61 (14.2%)	11 (5.9%)
Married
Yes	220 (51.3%)	35 (18.9%)	<0.001	54.4276
No	209 (48.7%)	150 (81.1%)
No high school diploma
Yes	256 (59.7%)	131 (70.8%)	0.0113	6.4107
No	173 (40.3%)	54 (29.2%)
1974 Income
Mean (SD)	5620 (6790)	2100 (4890)	<0.001	7.2456
Median [Min, Max]	2550 [0, 25900]	0 [0, 35000]
1975 Income
Mean (SD)	2470 (3290)	1530 (3220)	0.00115	3.2776
Median [Min, Max]	1090 [0, 18300]	0 [0, 25100]
1978 Income
Mean (SD)	6980 (7290)	6350 (7870)	0.349	0.9377
Median [Min, Max]	4980 [0, 25600]	4230 [0, 60300]

t1kable(x)

## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'

	Control	Treatment	P-value	statistic
	(N=429)	(N=185)
Age (years)
Mean (SD)	28.0 (10.8)	25.8 (7.16)	0.00291	2.9911
Median [Min, Max]	25.0 [16.0, 55.0]	25.0 [17.0, 48.0]
Race
White	281 (65.5%)	18 (9.7%)	<0.001	224.0708
Black	87 (20.3%)	156 (84.3%)
Hispanic	61 (14.2%)	11 (5.9%)
Married
Yes	220 (51.3%)	35 (18.9%)	<0.001	54.4276
No	209 (48.7%)	150 (81.1%)
No high school diploma
Yes	256 (59.7%)	131 (70.8%)	0.0113	6.4107
No	173 (40.3%)	54 (29.2%)
1974 Income
Mean (SD)	5620 (6790)	2100 (4890)	<0.001	7.2456
Median [Min, Max]	2550 [0, 25900]	0 [0, 35000]
1975 Income
Mean (SD)	2470 (3290)	1530 (3220)	0.00115	3.2776
Median [Min, Max]	1090 [0, 18300]	0 [0, 25100]
1978 Income
Mean (SD)	6980 (7290)	6350 (7870)	0.349	0.9377
Median [Min, Max]	4980 [0, 25600]	4230 [0, 60300]