TCGA资料三线临床三线表示例

load("F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
mydata<-data
head(data)
##                ID group    CCL14     HBA1    CCL16    TUBB3 PAM50 Os_time
## 1 TCGA.A1.A0SK.01  TNBC 3.372704 0.000000 0.000000 5.936486 Basal     967
## 2 TCGA.A1.A0SP.01  TNBC 4.970021 3.394930 0.000000 6.001432 Basal     584
## 3 TCGA.A2.A04U.01  TNBC 4.766927 0.000000 0.000000 5.043662 Basal    2654
## 4 TCGA.A2.A0CM.01  TNBC 4.192953 3.738738 2.842951 5.127705 Basal     754
## 5 TCGA.A2.A0D0.01  TNBC 0.000000 0.000000 0.000000 5.653455 Basal    2048
## 6 TCGA.A2.A0D2.01  TNBC 5.338654 0.000000 4.069236 6.043691 Basal    1027
##   OS_event RFS_time RFS_event age       ER       PR gender     HER2
## 1        1       NA        NA  54 Negative Negative FEMALE Negative
## 2        0       NA        NA  40 Negative Negative FEMALE Negative
## 3        0       NA        NA  47 Negative Negative FEMALE Negative
## 4        1       NA        NA  40 Negative Negative FEMALE Negative
## 5        0     2048         0  60 Negative Negative FEMALE Negative
## 6        0     1027         0  45 Negative Negative FEMALE Negative
##   Margin_status Node M_stage N_stage T_stage Pathologic stage
## 1      Positive    0      M0      N0      T2        Stage_IIA
## 2      Negative    0      M0      N0      T2        Stage_IIA
## 3      Negative    0      M0      N0      T2        Stage_IIA
## 4      Negative    0      M0      N0      T2        Stage_IIA
## 5      Negative    0      M0      N0      T2        Stage_IIA
## 6      Negative    0      M0      N0      T2        Stage_IIA

先来按某个基因作为变量分组

require(table1)
## Loading required package: table1
## Warning: package 'table1' was built under R version 3.6.1
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
## 分组变量
mydata$CCL16<-factor(ifelse((mydata$CCL16)>median(mydata$CCL16),"high","low"),labels=c("CCL16 high","CCL16 low"))

## 分类变量标签调整
mydata$gender<-factor(mydata$gender,levels=c("FEMALE","MALE"),labels =c("female","male"))



##改变变量标签
## 更换标签的函数

label(mydata$age)<-"Age"
label(mydata$gender)<-"Gender"
label(mydata$T_stage)<-"T stage"
label(mydata$N_stage)<-"N stage"
label(mydata$M_stage)<-"M stage"
label(mydata$group)<-"Group"


##给连续变量指定单位
units(mydata$age)<-"years"

## 默认版
table1(~gender+age+group+T_stage+N_stage+M_stage|CCL16,data = mydata,overall = "Total")
CCL16 high
(n=114)
CCL16 low
(n=114)
Total
(n=228)
Gender
female 113 (99.1%) 114 (100%) 227 (99.6%)
male 1 (0.9%) 0 (0%) 1 (0.4%)
Age (years)
Mean (SD) 57.2 (14.4) 54.8 (12.1) 56.0 (13.3)
Median [Min, Max] 56.0 [30.0, 90.0] 54.0 [29.0, 90.0] 55.0 [29.0, 90.0]
Group
Normal 103 (90.4%) 10 (8.8%) 113 (49.6%)
TNBC 11 (9.6%) 104 (91.2%) 115 (50.4%)
T stage
T1 29 (25.4%) 26 (22.8%) 55 (24.1%)
T2 65 (57.0%) 73 (64.0%) 138 (60.5%)
T3 11 (9.6%) 13 (11.4%) 24 (10.5%)
T4 9 (7.9%) 2 (1.8%) 11 (4.8%)
TX 0 (0%) 0 (0%) 0 (0%)
N stage
N0 50 (43.9%) 69 (60.5%) 119 (52.2%)
N1 45 (39.5%) 29 (25.4%) 74 (32.5%)
N2 13 (11.4%) 11 (9.6%) 24 (10.5%)
N3 3 (2.6%) 5 (4.4%) 8 (3.5%)
NX 3 (2.6%) 0 (0%) 3 (1.3%)
M stage
M0 103 (90.4%) 99 (86.8%) 202 (88.6%)
M1 2 (1.8%) 2 (1.8%) 4 (1.8%)
MX 9 (7.9%) 13 (11.4%) 22 (9.6%)

升级版

require(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.6.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
load("F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
mydata<-data

## 数据清洗
mydata<-mydata %>%
  filter(group=="TNBC") #%>% 
  #filter(M_stage!="MX") %>% 
  #filter(N_stage!="NX") %>% 
  #filter(T_stage!="TX") 
dim(mydata)
## [1] 115  22
head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <int>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal     967        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal     584        0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    2654        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal     754        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    2048        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    1027        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
##  数据清洗调整标签
##
mydata<-within(mydata,{
  T_stage<-factor(T_stage,levels=c("T1","T2","T3","T4","TX"),labels = c("T1/T2","T1/T2","T3/T4","T3/T4","TX"))
  N_stage<-factor(N_stage,levels=c("N0","N1","N2","N3","NX"),labels = c("N0/N1","N0/N1","N2/N3","N2/N3","NX"))
  M_stage<-factor(M_stage,levels=c("M0","M1","MX"), labels = c("M0","M1","MX"))
  age<- factor(ifelse(age>=median(age),">=55","<55"),labels = c(">=55","<55"))
  gender<-factor(gender,levels=c("FEMALE","MALE"),labels = c("female","male"))
              
})


## 分组变量
mydata$CCL16<-factor(ifelse((mydata$CCL16)>median(mydata$CCL16),"high","low"),labels=c("CCL16 high","CCL16 low"))


## 左侧标签名调整
labels <- list(
    variables=list(gender="Gender",
                   age="Age (years)",
                   #group="Group",
                   T_stage="T stage",
                   N_stage="N stage",
                   M_stage="M stage"),
    groups=list("", "Expression"))
## 设置短横线亚组
strata <- c(list(Total=mydata), split(mydata, mydata$CCL16))

## 渲染风格
my.render.cont <- function(x) {
    with(stats.apply.rounding(stats.default(x), digits=2), c("",
        "Mean (SD)"=sprintf("%s (&plusmn; %s)", MEAN, SD)))
}
my.render.cat <- function(x) {
    c("", sapply(stats.default(x), function(y) with(y,
        sprintf("%d (%0.0f %%)", FREQ, PCT))))
}
##
## 绘制三线表
table1(strata, labels, groupspan=c(1, 2),
       render.continuous=my.render.cont, render.categorical=my.render.cat,
       topclass="Rtable1-zebra")
Expression
Total
(n=115)
CCL16 high
(n=57)
CCL16 low
(n=58)
Gender
female 115 (100 %) 57 (100 %) 58 (100 %)
male 0 (0 %) 0 (0 %) 0 (0 %)
Age (years)
>=55 57 (50 %) 30 (53 %) 27 (47 %)
<55 58 (50 %) 27 (47 %) 31 (53 %)
T stage
T1/T2 99 (86 %) 47 (82 %) 52 (90 %)
T3/T4 16 (14 %) 10 (18 %) 6 (10 %)
TX 0 (0 %) 0 (0 %) 0 (0 %)
N stage
N0/N1 99 (86 %) 48 (84 %) 51 (88 %)
N2/N3 16 (14 %) 9 (16 %) 7 (12 %)
NX 0 (0 %) 0 (0 %) 0 (0 %)
M stage
M0 98 (85 %) 48 (84 %) 50 (86 %)
M1 2 (2 %) 2 (4 %) 0 (0 %)
MX 15 (13 %) 7 (12 %) 8 (14 %)
## 然后复制到word中即可

增加一列pvalue版本

require(dplyr)
load("F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
mydata<-data

## 数据清洗
mydata<-mydata %>%
  filter(group=="TNBC") %>% 
  filter(M_stage!="MX") %>% 
  filter(N_stage!="NX") %>% 
  filter(T_stage!="TX") 
dim(mydata)
## [1] 100  22
head(mydata)
## # A tibble: 6 x 22
##   ID    group CCL14  HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
##   <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct>   <int>    <int>    <int>
## 1 TCGA~ TNBC   3.37  0     0     5.94 Basal     967        1       NA
## 2 TCGA~ TNBC   4.97  3.39  0     6.00 Basal     584        0       NA
## 3 TCGA~ TNBC   4.77  0     0     5.04 Basal    2654        0       NA
## 4 TCGA~ TNBC   4.19  3.74  2.84  5.13 Basal     754        1       NA
## 5 TCGA~ TNBC   0     0     0     5.65 Basal    2048        0     2048
## 6 TCGA~ TNBC   5.34  0     4.07  6.04 Basal    1027        0     1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## #   PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## #   M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
##  数据清洗调整标签
##
mydata<-within(mydata,{
  T_stage<-factor(T_stage,levels=c("T1","T2","T3","T4"),labels = c("T1/T2","T1/T2","T3/T4","T3/T4"))
  N_stage<-factor(N_stage,levels=c("N0","N1","N2","N3"),labels = c("N0/N1","N0/N1","N2/N3","N2/N3"))
  M_stage<-factor(M_stage,levels=c("M0","M1"), labels = c("M0","M1"))
  age<- factor(ifelse(age>=median(age),">=55","<55"),labels = c(">=55","<55"))
  gender<-factor(gender,levels=c("FEMALE","MALE"),labels = c("female","male"))
              
})



## 需要重新构建分组变量

mydata$CCL16<-factor(ifelse((mydata$CCL16)>median(mydata$CCL16),"high","low"),
                     levels = c("high","low","P-value"),
                     labels=c("CCL16 high","CCL16 low","P-value"))
## 左侧标签名调整
labels <- list(
    variables=list(gender="Gender",
                   age="Age (years)",
                   #group="Group",
                   T_stage="T stage",
                   N_stage="N stage",
                   M_stage="M stage"),
    groups=list("", "Expression",""))## 标题栏短横线
## 设置短横线亚组
strata <- c(list(Total=mydata), split(mydata, mydata$CCL16))

### 共有三处变量需要修改
rndr <- function(x, name, ...) {
  if (length(x) == 0) {
    y <- mydata[[name]]##修改mydata
    s <- rep("", length(render.default(x=y, name=name, ...)))
    if (is.numeric(y)) {
      p <- t.test(y ~ mydata$CCL16)$p.value##修改mydata
    } else {
      p <- chisq.test(table(y, droplevels(mydata$CCL16)))$p.value###修改mydata
    }
    s[2] <- sub("<", "&lt;", format.pval(p, digits=3, eps=0.001))
    s
  } else {
    render.default(x=x, name=name, ...)
  }
}

rndr.strat <- function(label, n, ...) {
  ifelse(n==0, label, render.strat.default(label, n, ...))
}
############

## 绘制三线表
table1(strata, labels, groupspan=c(1,2,1),droplevels = F,
       render=rndr, render.strat=rndr.strat,
       topclass="Rtable1-zebra") 
## Warning in chisq.test(table(y, droplevels(mydata$CCL16))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(mydata$CCL16))): Chi-squared
## approximation may be incorrect
Expression
Total
(n=100)
CCL16 high
(n=50)
CCL16 low
(n=50)
P-value
Gender
female 100 (100%) 50 (100%) 50 (100%) NA
male 0 (0%) 0 (0%) 0 (0%)
Age (years)
>=55 49 (49.0%) 27 (54.0%) 22 (44.0%) 0.424
<55 51 (51.0%) 23 (46.0%) 28 (56.0%)
T stage
T1/T2 90 (90.0%) 42 (84.0%) 48 (96.0%) 0.0956
T3/T4 10 (10.0%) 8 (16.0%) 2 (4.0%)
N stage
N0/N1 87 (87.0%) 41 (82.0%) 46 (92.0%) 0.234
N2/N3 13 (13.0%) 9 (18.0%) 4 (8.0%)
M stage
M0 98 (98.0%) 48 (96.0%) 50 (100%) 0.475
M1 2 (2.0%) 2 (4.0%) 0 (0%)