if(!require(table1)) install.packages("table1",ask=F,update=F)
## Loading required package: table1
## Warning: package 'table1' was built under R version 3.6.1
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
require(table1)
library(boot)
## Warning: package 'boot' was built under R version 3.6.1
melanoma2 <- melanoma
head(melanoma)
## time status sex age year thickness ulcer
## 1 10 3 1 76 1972 6.76 1
## 2 30 3 1 56 1968 0.65 0
## 3 35 2 1 41 1977 1.34 0
## 4 99 3 0 71 1968 2.90 0
## 5 185 1 1 52 1965 12.08 1
## 6 204 1 1 28 1971 4.84 1
dim(melanoma)
## [1] 205 7
## input melanoma是一个数据框
## 对我们感兴趣的变量因子化
melanoma2$status <-
factor(melanoma2$status,
levels=c(2,1,3),
labels=c("Alive", # 第一个作为参考组
"Melanoma death",
"Non-melanoma death"))
table1(~ factor(sex) + age + factor(ulcer) + thickness | status, data=melanoma2)
| Alive (n=134) |
Melanoma death (n=57) |
Non-melanoma death (n=14) |
Overall (n=205) |
|
|---|---|---|---|---|
| factor(sex) | ||||
| 0 | 91 (67.9%) | 28 (49.1%) | 7 (50.0%) | 126 (61.5%) |
| 1 | 43 (32.1%) | 29 (50.9%) | 7 (50.0%) | 79 (38.5%) |
| age | ||||
| Mean (SD) | 50.0 (15.9) | 55.1 (17.9) | 65.3 (10.9) | 52.5 (16.7) |
| Median [Min, Max] | 52.0 [4.00, 84.0] | 56.0 [14.0, 95.0] | 65.0 [49.0, 86.0] | 54.0 [4.00, 95.0] |
| factor(ulcer) | ||||
| 0 | 92 (68.7%) | 16 (28.1%) | 7 (50.0%) | 115 (56.1%) |
| 1 | 42 (31.3%) | 41 (71.9%) | 7 (50.0%) | 90 (43.9%) |
| thickness | ||||
| Mean (SD) | 2.24 (2.33) | 4.31 (3.57) | 3.72 (3.63) | 2.92 (2.96) |
| Median [Min, Max] | 1.36 [0.100, 12.9] | 3.54 [0.320, 17.4] | 2.26 [0.160, 12.6] | 1.94 [0.100, 17.4] |
## 给分类变量sex指定标签
melanoma2$sex <-
factor(melanoma2$sex, levels=c(1,0),
labels=c("Male",
"Female"))
## 给分类变量ulcer指定标签
melanoma2$ulcer <-
factor(melanoma2$ulcer, levels=c(0,1),
labels=c("Absent",
"Present"))
## 给变量名指定标签
label(melanoma2$sex) <- "Sex"
label(melanoma2$age) <- "Age"
label(melanoma2$ulcer) <- "Ulceration"
label(melanoma2$thickness) <- "Thickness"
## 给连续型变量指定单位
units(melanoma2$age) <- "years"
units(melanoma2$thickness) <- "mm"
## 再增加overall统计量
table1(~ sex + age + ulcer + thickness | status, data=melanoma2, overall="Total")
| Alive (n=134) |
Melanoma death (n=57) |
Non-melanoma death (n=14) |
Total (n=205) |
|
|---|---|---|---|---|
| Sex | ||||
| Male | 43 (32.1%) | 29 (50.9%) | 7 (50.0%) | 79 (38.5%) |
| Female | 91 (67.9%) | 28 (49.1%) | 7 (50.0%) | 126 (61.5%) |
| Age (years) | ||||
| Mean (SD) | 50.0 (15.9) | 55.1 (17.9) | 65.3 (10.9) | 52.5 (16.7) |
| Median [Min, Max] | 52.0 [4.00, 84.0] | 56.0 [14.0, 95.0] | 65.0 [49.0, 86.0] | 54.0 [4.00, 95.0] |
| Ulceration | ||||
| Absent | 92 (68.7%) | 16 (28.1%) | 7 (50.0%) | 115 (56.1%) |
| Present | 42 (31.3%) | 41 (71.9%) | 7 (50.0%) | 90 (43.9%) |
| Thickness (mm) | ||||
| Mean (SD) | 2.24 (2.33) | 4.31 (3.57) | 3.72 (3.63) | 2.92 (2.96) |
| Median [Min, Max] | 1.36 [0.100, 12.9] | 3.54 [0.320, 17.4] | 2.26 [0.160, 12.6] | 1.94 [0.100, 17.4] |
labels <- list(
variables=list(sex="Sex",
age="Age (years)",
ulcer="Ulceration",
thickness="Thickness (mm)"),
groups=list("", "", "Death"))##表格上的第一级Death
# 重新给status命名标签,death放到上面去
levels(melanoma2$status) <- c("Alive", "Melanoma", "Non-melanoma")
#按想要的顺序顺序设置分组或列,
#Total放第一列,split分开status
strata <- c(list(Total=melanoma2), split(melanoma2, melanoma2$status))
# 添加渲染风格-连续型变量与分类变量展示不同
# 连续型渲染风格函数
my.render.cont <- function(x) {
with(stats.apply.rounding(stats.default(x), digits=2), c("",
"Mean (SD)"=sprintf("%s (± %s)", MEAN, SD)))
}
# 分类变量渲染风格
my.render.cat <- function(x) {
c("", sapply(stats.default(x), function(y) with(y,
sprintf("%d (%0.0f %%)", FREQ, PCT))))
}
## 结果
## groupsapn为分组的个数,1为Total, 1为Alive,以及2为Death
## 增加了Death的亚组
table1(strata, labels, groupspan=c(1, 1, 2),
render.continuous=my.render.cont, render.categorical=my.render.cat)
Death |
||||
|---|---|---|---|---|
| Total (n=205) |
Alive (n=134) |
Melanoma (n=57) |
Non-melanoma (n=14) |
|
| Sex | ||||
| Male | 79 (39 %) | 43 (32 %) | 29 (51 %) | 7 (50 %) |
| Female | 126 (61 %) | 91 (68 %) | 28 (49 %) | 7 (50 %) |
| Age (years) | ||||
| Mean (SD) | 52 (± 17) | 50 (± 16) | 55 (± 18) | 65 (± 11) |
| Ulceration | ||||
| Absent | 115 (56 %) | 92 (69 %) | 16 (28 %) | 7 (50 %) |
| Present | 90 (44 %) | 42 (31 %) | 41 (72 %) | 7 (50 %) |
| Thickness (mm) | ||||
| Mean (SD) | 2.9 (± 3.0) | 2.2 (± 2.3) | 4.3 (± 3.6) | 3.7 (± 3.6) |
f <- function(x, n, ...) factor(sample(x, n, replace=T, ...), levels=x)
set.seed(427)
## 构造数据框
n <- 146
dat <- data.frame(id=1:n)
dat$treat <- f(c("Placebo", "Treated"), n, prob=c(1, 2)) # 2:1 randomization
dat$age <- sample(18:65, n, replace=TRUE)
dat$sex <- f(c("Female", "Male"), n, prob=c(.6, .4)) # 60% female
dat$wt <- round(exp(rnorm(n, log(70), 0.23)), 1)
dat$wt[sample.int(n, 5)] <- NA## 加入一些缺失值
head(dat)
## id treat age sex wt
## 1 1 Treated 18 Female 62.6
## 2 2 Treated 50 Male 57.4
## 3 3 Treated 37 Male 104.6
## 4 4 Treated 25 Female 55.5
## 5 5 Placebo 60 Female 58.4
## 6 6 Treated 44 Female 41.9
## 分类变量
label(dat$age) <- "Age"
label(dat$sex) <- "Sex"
label(dat$wt) <- "Weight"
label(dat$treat) <- "Treatment Group"
## 连续型变量
units(dat$age) <- "years"
units(dat$wt) <- "kg"
## 绘制默认表格
table1(~ age + sex + wt | treat, data=dat)
| Placebo (n=52) |
Treated (n=94) |
Overall (n=146) |
|
|---|---|---|---|
| Age (years) | |||
| Mean (SD) | 39.2 (14.2) | 40.1 (13.3) | 39.8 (13.6) |
| Median [Min, Max] | 37.5 [18.0, 65.0] | 39.5 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Sex | |||
| Female | 34 (65.4%) | 53 (56.4%) | 87 (59.6%) |
| Male | 18 (34.6%) | 41 (43.6%) | 59 (40.4%) |
| Weight (kg) | |||
| Mean (SD) | 68.1 (16.3) | 68.3 (16.7) | 68.2 (16.5) |
| Median [Min, Max] | 66.7 [37.5, 116] | 64.9 [40.0, 119] | 66.2 [37.5, 119] |
| Missing | 2 (3.8%) | 3 (3.2%) | 5 (3.4%) |
table1(~ age + sex + wt | treat, data=dat, overall=F)
| Placebo (n=52) |
Treated (n=94) |
|
|---|---|---|
| Age (years) | ||
| Mean (SD) | 39.2 (14.2) | 40.1 (13.3) |
| Median [Min, Max] | 37.5 [18.0, 65.0] | 39.5 [18.0, 65.0] |
| Sex | ||
| Female | 34 (65.4%) | 53 (56.4%) |
| Male | 18 (34.6%) | 41 (43.6%) |
| Weight (kg) | ||
| Mean (SD) | 68.1 (16.3) | 68.3 (16.7) |
| Median [Min, Max] | 66.7 [37.5, 116] | 64.9 [40.0, 119] |
| Missing | 2 (3.8%) | 3 (3.2%) |
table1(~ age + wt | treat*sex, data=dat)
Placebo |
Treated |
Overall |
||||
|---|---|---|---|---|---|---|
| Female (n=34) |
Male (n=18) |
Female (n=53) |
Male (n=41) |
Female (n=87) |
Male (n=59) |
|
| Age (years) | ||||||
| Mean (SD) | 40.6 (14.5) | 36.6 (13.6) | 40.1 (13.4) | 40.1 (13.3) | 40.3 (13.8) | 39.0 (13.4) |
| Median [Min, Max] | 39.5 [18.0, 65.0] | 33.5 [18.0, 64.0] | 39.0 [18.0, 65.0] | 41.0 [18.0, 65.0] | 39.0 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Weight (kg) | ||||||
| Mean (SD) | 68.8 (14.8) | 66.8 (19.3) | 65.6 (15.1) | 71.5 (18.0) | 66.9 (15.0) | 70.1 (18.4) |
| Median [Min, Max] | 67.2 [45.8, 116] | 66.6 [37.5, 105] | 61.4 [41.9, 103] | 68.3 [40.0, 119] | 63.8 [41.9, 116] | 67.3 [37.5, 119] |
| Missing | 1 (2.9%) | 1 (5.6%) | 3 (5.7%) | 0 (0%) | 4 (4.6%) | 1 (1.7%) |
table1(~ age + wt | treat*sex, data=dat)
Placebo |
Treated |
Overall |
||||
|---|---|---|---|---|---|---|
| Female (n=34) |
Male (n=18) |
Female (n=53) |
Male (n=41) |
Female (n=87) |
Male (n=59) |
|
| Age (years) | ||||||
| Mean (SD) | 40.6 (14.5) | 36.6 (13.6) | 40.1 (13.4) | 40.1 (13.3) | 40.3 (13.8) | 39.0 (13.4) |
| Median [Min, Max] | 39.5 [18.0, 65.0] | 33.5 [18.0, 64.0] | 39.0 [18.0, 65.0] | 41.0 [18.0, 65.0] | 39.0 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Weight (kg) | ||||||
| Mean (SD) | 68.8 (14.8) | 66.8 (19.3) | 65.6 (15.1) | 71.5 (18.0) | 66.9 (15.0) | 70.1 (18.4) |
| Median [Min, Max] | 67.2 [45.8, 116] | 66.6 [37.5, 105] | 61.4 [41.9, 103] | 68.3 [40.0, 119] | 63.8 [41.9, 116] | 67.3 [37.5, 119] |
| Missing | 1 (2.9%) | 1 (5.6%) | 3 (5.7%) | 0 (0%) | 4 (4.6%) | 1 (1.7%) |
table1(~ treat + age + sex + wt, data=dat)
| Overall (n=146) |
|
|---|---|
| Treatment Group | |
| Placebo | 52 (35.6%) |
| Treated | 94 (64.4%) |
| Age (years) | |
| Mean (SD) | 39.8 (13.6) |
| Median [Min, Max] | 39.0 [18.0, 65.0] |
| Sex | |
| Female | 87 (59.6%) |
| Male | 59 (40.4%) |
| Weight (kg) | |
| Mean (SD) | 68.2 (16.5) |
| Median [Min, Max] | 66.2 [37.5, 119] |
| Missing | 5 (3.4%) |
## 给原数据增加一个dose列
dat$dose <- (dat$treat != "Placebo")*sample(1:2, n, replace=T)
## 给dose加标签
dat$dose <- factor(dat$dose, labels=c("Placebo", "5 mg", "10 mg"))
## strata定制
## split指定按dose分亚组
strata <- c(split(dat, dat$dose), ##dose分组
list("All treated"=subset(dat, treat=="Treated")), ## all treated组
list(Overall=dat))## overall
labels <- list(
variables=list(age=render.varlabel(dat$age),
sex=render.varlabel(dat$sex),
wt=render.varlabel(dat$wt)),
groups=list("", "Treated", ""))## 一级分组标签
## groupspan二级分组告诉你标题栏的线包括几个变量
## 对应groups
table1(strata, labels, groupspan=c(1, 3, 1))
Treated |
|||||
|---|---|---|---|---|---|
| Placebo (n=52) |
5 mg (n=43) |
10 mg (n=51) |
All treated (n=94) |
Overall (n=146) |
|
| Age (years) | |||||
| Mean (SD) | 39.2 (14.2) | 35.5 (12.4) | 43.9 (12.9) | 40.1 (13.3) | 39.8 (13.6) |
| Median [Min, Max] | 37.5 [18.0, 65.0] | 37.0 [18.0, 64.0] | 45.0 [21.0, 65.0] | 39.5 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Sex | |||||
| Female | 34 (65.4%) | 24 (55.8%) | 29 (56.9%) | 53 (56.4%) | 87 (59.6%) |
| Male | 18 (34.6%) | 19 (44.2%) | 22 (43.1%) | 41 (43.6%) | 59 (40.4%) |
| Weight (kg) | |||||
| Mean (SD) | 68.1 (16.3) | 68.7 (16.3) | 68.0 (17.1) | 68.3 (16.7) | 68.2 (16.5) |
| Median [Min, Max] | 66.7 [37.5, 116] | 67.2 [40.4, 111] | 63.4 [40.0, 119] | 64.9 [40.0, 119] | 66.2 [37.5, 119] |
| Missing | 2 (3.8%) | 2 (4.7%) | 1 (2.0%) | 3 (3.2%) | 5 (3.4%) |
rndr <- function(x, name, ...) {
if (!is.numeric(x)) return(render.categorical.default(x))
what <- switch(name,
age = "Median [Min, Max]",
wt = "Mean (SD)")
parse.abbrev.render.code(c("", what))(x)
}
table1(~ age + sex + wt | treat, data=dat,
render=rndr)
| Placebo (n=52) |
Treated (n=94) |
Overall (n=146) |
|
|---|---|---|---|
| Age (years) | |||
| Median [Min, Max] | 37.5 [18.0, 65.0] | 39.5 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Sex | |||
| Female | 34 (65.4%) | 53 (56.4%) | 87 (59.6%) |
| Male | 18 (34.6%) | 41 (43.6%) | 59 (40.4%) |
| Weight (kg) | |||
| Mean (SD) | 68.1 (16.3) | 68.3 (16.7) | 68.2 (16.5) |
## 更换表格风格,用topclass参数设置
## zebra似乎不错
table1(~ age + sex + wt | treat, data=dat, topclass="Rtable1-zebra")
| Placebo (n=52) |
Treated (n=94) |
Overall (n=146) |
|
|---|---|---|---|
| Age (years) | |||
| Mean (SD) | 39.2 (14.2) | 40.1 (13.3) | 39.8 (13.6) |
| Median [Min, Max] | 37.5 [18.0, 65.0] | 39.5 [18.0, 65.0] | 39.0 [18.0, 65.0] |
| Sex | |||
| Female | 34 (65.4%) | 53 (56.4%) | 87 (59.6%) |
| Male | 18 (34.6%) | 41 (43.6%) | 59 (40.4%) |
| Weight (kg) | |||
| Mean (SD) | 68.1 (16.3) | 68.3 (16.7) | 68.2 (16.5) |
| Median [Min, Max] | 66.7 [37.5, 116] | 64.9 [40.0, 119] | 66.2 [37.5, 119] |
| Missing | 2 (3.8%) | 3 (3.2%) | 5 (3.4%) |
library(MatchIt)
## Warning: package 'MatchIt' was built under R version 3.6.1
data(lalonde)
head(lalonde)
## treat age educ black hispan married nodegree re74 re75 re78
## NSW1 1 37 11 1 0 1 1 0 0 9930.0460
## NSW2 1 22 9 0 1 0 1 0 0 3595.8940
## NSW3 1 30 12 1 0 0 0 0 0 24909.4500
## NSW4 1 27 11 1 0 0 1 0 0 7506.1460
## NSW5 1 33 8 1 0 0 1 0 0 289.7899
## NSW6 1 22 9 1 0 0 1 0 0 4056.4940
## 分类变量
lalonde$treat <- factor(lalonde$treat, levels=c(0, 1, 2), labels=c("Control", "Treatment", "P-value"))
lalonde$black <- factor(lalonde$black)
lalonde$hispan <- factor(lalonde$hispan)
lalonde$married <- factor(lalonde$married)
lalonde$nodegree <- factor(lalonde$nodegree)
lalonde$black <- as.logical(lalonde$black == 1)
lalonde$hispan <- as.logical(lalonde$hispan == 1)
lalonde$married <- as.logical(lalonde$married == 1)
lalonde$nodegree <- as.logical(lalonde$nodegree == 1)
##连续变量
label(lalonde$black) <- "Black"
label(lalonde$hispan) <- "Hispanic"
label(lalonde$married) <- "Married"
label(lalonde$nodegree) <- "No high school diploma"
label(lalonde$age) <- "Age"
label(lalonde$re74) <- "1974 Income"
label(lalonde$re75) <- "1975 Income"
label(lalonde$re78) <- "1978 Income"
units(lalonde$age) <- "years"
rndr <- function(x, name, ...) {
if (length(x) == 0) {
y <- lalonde[[name]]
s <- rep("", length(render.default(x=y, name=name, ...)))
if (is.numeric(y)) {
p <- t.test(y ~ lalonde$treat)$p.value
} else {
p <- chisq.test(table(y, droplevels(lalonde$treat)))$p.value
}
s[2] <- sub("<", "<", format.pval(p, digits=3, eps=0.001))
s
} else {
render.default(x=x, name=name, ...)
}
}
rndr.strat <- function(label, n, ...) {
ifelse(n==0, label, render.strat.default(label, n, ...))
}
## 绘图
table1(~ age + black + hispan + married + nodegree + re74 + re75 + re78 | treat,
data=lalonde, droplevels=F, render=rndr, render.strat=rndr.strat, overall=F)
| Control (n=429) |
Treatment (n=185) |
P-value | |
|---|---|---|---|
| Age (years) | |||
| Mean (SD) | 28.0 (10.8) | 25.8 (7.16) | 0.00291 |
| Median [Min, Max] | 25.0 [16.0, 55.0] | 25.0 [17.0, 48.0] | |
| Black | |||
| Yes | 87 (20.3%) | 156 (84.3%) | <0.001 |
| No | 342 (79.7%) | 29 (15.7%) | |
| Hispanic | |||
| Yes | 61 (14.2%) | 11 (5.9%) | 0.00532 |
| No | 368 (85.8%) | 174 (94.1%) | |
| Married | |||
| Yes | 220 (51.3%) | 35 (18.9%) | <0.001 |
| No | 209 (48.7%) | 150 (81.1%) | |
| No high school diploma | |||
| Yes | 256 (59.7%) | 131 (70.8%) | 0.0113 |
| No | 173 (40.3%) | 54 (29.2%) | |
| 1974 Income | |||
| Mean (SD) | 5620 (6790) | 2100 (4890) | <0.001 |
| Median [Min, Max] | 2550 [0.00, 25900] | 0.00 [0.00, 35000] | |
| 1975 Income | |||
| Mean (SD) | 2470 (3290) | 1530 (3220) | 0.00115 |
| Median [Min, Max] | 1090 [0.00, 18300] | 0.00 [0.00, 25100] | |
| 1978 Income | |||
| Mean (SD) | 6980 (7290) | 6350 (7870) | 0.349 |
| Median [Min, Max] | 4980 [0.00, 25600] | 4230 [0.00, 60300] |