##allpatient group_Chi square and KM curve
# <!-- install.packages("survival") -->
# # <!-- install.packages("survival", "survminer") -->
# install.packages("survivalROC")
library(survivalROC)
library(survival)
library(data.table)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following object is masked from 'package:survival':
##
## cluster
library(readxl)
library(knitr)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-8
library(dplyr)
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
require(moonBook)
## Loading required package: moonBook
##
## Attaching package: 'moonBook'
##
## The following object is masked from 'package:lattice':
##
## densityplot
require(ztable)
## Loading required package: ztable
## Welcome to package ztable ver 0.2.3
# install.packages(c("gglot2","plyr"))
dataset <- read_excel('/Volumes/SSD 1T/downloads_2024_iMAC/ASDP_FINAL_Norecur_modified_PTIL.xlsx')
print(str_c('full dataset size : ', dataset %>% nrow()))
## [1] "full dataset size : 55"
colnames(dataset) <- str_c(dataset %>%
colnames() %>% str_replace_all(' ', '') %>%
str_replace_all(fixed('('), '_') %>%
str_replace_all(fixed('%'), 'per') %>%
str_replace_all(fixed(')'), '_') %>%
str_replace_all(fixed('/'), '_') %>%
str_squish())
glimpse(dataset)
## Rows: 55
## Columns: 13
## $ Number <chr> "S15024968", "S18005204", "S15000922", "S15009842", "S160232…
## $ PatientID <dbl> 1998661, 2169721, 1936889, 348993, 2057938, 1151575, 1069213…
## $ P_TIL <dbl> 12.19421, 18.31465, 32.99234, 40.11670, 38.06218, 35.32386, …
## $ C_TIL <dbl> 12.19421, 18.31465, 32.99234, 27.56477, 38.65840, 33.08246, …
## $ Mean_TIL <dbl> 12.19421, 18.31465, 32.99234, 33.84074, 38.36029, 34.20316, …
## $ Age <dbl> 77, 62, 65, 75, 59, 68, 72, 88, 88, 75, 76, 74, 76, 84, 87, …
## $ Sex <chr> "M", "M", "M", "M", "M", "M", "M", "F", "F", "M", "M", "M", …
## $ CIS <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, …
## $ T <dbl> 2.0, 1.3, 2.0, 2.0, 1.9, 2.0, 1.3, 2.0, 2.0, 1.6, 1.9, 2.0, …
## $ Subtype <chr> "0", "micropap", "sarc", "0", "plasmacytoid", "0", "0", "sar…
## $ LVI <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Recur <dbl> 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, …
## $ Months <dbl> 0.2301370, 3.7150685, 5.1616438, 3.1232877, 89.3260274, 71.6…
dataset_select <- dataset %>%
mutate(Age = as.numeric(Age),
Sex = as.factor(Sex),
CIS=as.factor(CIS),
T=as.numeric(T),
Recur= as.numeric(Recur),
P_TIL=as.numeric(P_TIL),
C_TIL=as.numeric(C_TIL),
Mean_TIL=as.numeric(Mean_TIL),
Subtype=as.factor(Subtype),
LVI=as.factor(LVI),
Months=as.numeric(Months))
### 변수별 NA 수
dataset_select %>% summarise_all(function(x){ifelse(is.na(x), 1, 0)}) %>% colSums() %>% kable()
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## ℹ The deprecated feature was likely used in the dplyr package.
## Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
| x | |
|---|---|
| Number | 0 |
| PatientID | 0 |
| P_TIL | 0 |
| C_TIL | 0 |
| Mean_TIL | 0 |
| Age | 0 |
| Sex | 0 |
| CIS | 0 |
| T | 0 |
| Subtype | 0 |
| LVI | 0 |
| Recur | 0 |
| Months | 0 |
##All patients
require(moonBook)
require(ztable)
out2=mytable(Recur~.,data=dataset_select);mylatex(out2)
## \begin{table}[!hbp]
## \begin{normalsize}
## \begin{tabular}{lccc}
## \multicolumn{4}{c}{Descriptive Statistics by Recur}\\
## \hline
## & 0 & 1 & \multirow{2}{*}{p}\\
## & (N=17) & (N=38) & \\
## \hline
## Number & unique values:55 & unique values:55 & \\
## PatientID & 1730193.1 ± 495246.3 & 1663724.0 ± 543165.5 & 0.669\\
## P_TIL & 24.6 ± 7.6 & 20.7 ± 8.9 & 0.124\\
## C_TIL & 25.2 ± 7.6 & 19.7 ± 8.6 & 0.026\\
## Mean_TIL & 24.9 ± 6.9 & 20.2 ± 8.4 & 0.047\\
## Age & 73.3 ± 8.4 & 69.8 ± 11.5 & 0.273\\
## Sex & & & 0.205\\
## - F & 0 ( 0.0\%) & 6 (15.8\%) & \\
## - M & 17 (100.0\%) & 32 (84.2\%) & \\
## CIS & & & 0.655\\
## - 0 & 15 (88.2\%) & 30 (78.9\%) & \\
## - 1 & 2 (11.8\%) & 8 (21.1\%) & \\
## T & & & 0.012\\
## - 1.3 & 1 ( 5.9\%) & 12 (31.6\%) & \\
## - 1.6 & 6 (35.3\%) & 5 (13.2\%) & \\
## - 1.9 & 5 (29.4\%) & 2 ( 5.3\%) & \\
## - 2 & 5 (29.4\%) & 18 (47.4\%) & \\
## - 3 & 0 ( 0.0\%) & 1 ( 2.6\%) & \\
## Subtype & & & 0.925\\
## - 0 & 14 (82.4\%) & 31 (81.6\%) & \\
## - micropap & 1 ( 5.9\%) & 3 ( 7.9\%) & \\
## - plasmacytoid & 1 ( 5.9\%) & 1 ( 2.6\%) & \\
## - sarc & 1 ( 5.9\%) & 3 ( 7.9\%) & \\
## LVI & & & 0.963\\
## - 0 & 16 (94.1\%) & 34 (89.5\%) & \\
## - 1 & 1 ( 5.9\%) & 4 (10.5\%) & \\
## Months & 50.1 ± 40.3 & 16.4 ± 23.8 & 0.004\\
## \hline
## \end{tabular}
## \end{normalsize}
## \end{table}
print(out2)
##
## Descriptive Statistics by 'Recur'
## —————————————————————————————————————————————————————————————————
## 0 1 p
## (N=17) (N=38)
## —————————————————————————————————————————————————————————————————
## Number unique values:55 unique values:55
## PatientID 1730193.1 ± 495246.3 1663724.0 ± 543165.5 0.669
## P_TIL 24.6 ± 7.6 20.7 ± 8.9 0.124
## C_TIL 25.2 ± 7.6 19.7 ± 8.6 0.026
## Mean_TIL 24.9 ± 6.9 20.2 ± 8.4 0.047
## Age 73.3 ± 8.4 69.8 ± 11.5 0.273
## Sex 0.205
## - F 0 ( 0.0%) 6 (15.8%)
## - M 17 (100.0%) 32 (84.2%)
## CIS 0.655
## - 0 15 (88.2%) 30 (78.9%)
## - 1 2 (11.8%) 8 (21.1%)
## T 0.012
## - 1.3 1 ( 5.9%) 12 (31.6%)
## - 1.6 6 (35.3%) 5 (13.2%)
## - 1.9 5 (29.4%) 2 ( 5.3%)
## - 2 5 (29.4%) 18 (47.4%)
## - 3 0 ( 0.0%) 1 ( 2.6%)
## Subtype 0.925
## - 0 14 (82.4%) 31 (81.6%)
## - micropap 1 ( 5.9%) 3 ( 7.9%)
## - plasmacytoid 1 ( 5.9%) 1 ( 2.6%)
## - sarc 1 ( 5.9%) 3 ( 7.9%)
## LVI 0.963
## - 0 16 (94.1%) 34 (89.5%)
## - 1 1 ( 5.9%) 4 (10.5%)
## Months 50.1 ± 40.3 16.4 ± 23.8 0.004
## —————————————————————————————————————————————————————————————————
##CoxpH
library(rrtable)
## Welcome to rrtable package
## Register inputHandler for chooserInput
##
## Attaching package: 'rrtable'
## The following objects are masked from 'package:ztable':
##
## roundDf, ztable2flextable
library(survival)
library(autoReg)
##
## Attaching package: 'autoReg'
## The following object is masked from 'package:rrtable':
##
## df2flextable
fit = coxph(Surv(Months,Recur)~P_TIL+T+CIS+Age+Sex+Subtype+C_TIL,data=dataset_select)
summary(fit)
## Call:
## coxph(formula = Surv(Months, Recur) ~ P_TIL + T + CIS + Age +
## Sex + Subtype + C_TIL, data = dataset_select)
##
## n= 55, number of events= 38
##
## coef exp(coef) se(coef) z Pr(>|z|)
## P_TIL 0.03630 1.03696 0.03700 0.981 0.327
## T 0.84021 2.31685 0.58689 1.432 0.152
## CIS1 0.34023 1.40527 0.43466 0.783 0.434
## Age -0.00949 0.99055 0.01801 -0.527 0.598
## SexM -0.89764 0.40753 0.53272 -1.685 0.092 .
## Subtypemicropap 0.71445 2.04307 0.64576 1.106 0.269
## Subtypeplasmacytoid -0.64373 0.52533 1.07686 -0.598 0.550
## Subtypesarc 0.38636 1.47162 0.62254 0.621 0.535
## C_TIL -0.04958 0.95163 0.03719 -1.333 0.182
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## P_TIL 1.0370 0.9644 0.96442 1.115
## T 2.3168 0.4316 0.73339 7.319
## CIS1 1.4053 0.7116 0.59949 3.294
## Age 0.9906 1.0095 0.95621 1.026
## SexM 0.4075 2.4538 0.14345 1.158
## Subtypemicropap 2.0431 0.4895 0.57625 7.244
## Subtypeplasmacytoid 0.5253 1.9036 0.06365 4.336
## Subtypesarc 1.4716 0.6795 0.43440 4.985
## C_TIL 0.9516 1.0508 0.88473 1.024
##
## Concordance= 0.617 (se = 0.048 )
## Likelihood ratio test= 8.26 on 9 df, p=0.5
## Wald test = 8.16 on 9 df, p=0.5
## Score (logrank) test = 8.52 on 9 df, p=0.5
result<-autoReg(fit,uni=TRUE,final=TRUE)%>%myft()
result
Dependent: Surv(Months, Recur) | all | HR (univariable) | HR (multivariable) | HR (final) | |
|---|---|---|---|---|---|
P_TIL | Mean ± SD | 21.9 ± 8.7 | 0.98 (0.95-1.02, p=.411) | ||
T | Mean ± SD | 1.8 ± 0.3 | 1.46 (0.46-4.63, p=.524) | ||
CIS | 0 | 45 (81.8%) | |||
1 | 10 (18.2%) | 1.25 (0.57-2.75, p=.574) | |||
Age | Mean ± SD | 70.9 ± 10.7 | 0.99 (0.96-1.02, p=.491) | ||
Sex | F | 6 (10.9%) | |||
M | 49 (89.1%) | 0.51 (0.21-1.23, p=.134) | 0.60 (0.24-1.48, p=.266) | ||
Subtype | 0 | 45 (81.8%) | |||
micropap | 4 (7.3%) | 1.49 (0.44-5.00, p=.522) | |||
plasmacytoid | 2 (3.6%) | 0.43 (0.06-3.18, p=.411) | |||
sarc | 4 (7.3%) | 1.52 (0.46-5.02, p=.488) | |||
C_TIL | Mean ± SD | 21.4 ± 8.6 | 0.97 (0.93-1.01, p=.134) | 0.98 (0.94-1.02, p=.231) | 0.97 (0.93-1.01, p=.134) |
n=55, events=38, Likelihood ratio test=3.37 on 2 df(p=.185) | |||||
fit2 = coxph(Surv(Months,Recur)~Sex+C_TIL,data=dataset_select)
summary(fit2)
## Call:
## coxph(formula = Surv(Months, Recur) ~ Sex + C_TIL, data = dataset_select)
##
## n= 55, number of events= 38
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SexM -0.51817 0.59561 0.46606 -1.112 0.266
## C_TIL -0.02414 0.97615 0.02017 -1.197 0.231
##
## exp(coef) exp(-coef) lower .95 upper .95
## SexM 0.5956 1.679 0.2389 1.485
## C_TIL 0.9762 1.024 0.9383 1.016
##
## Concordance= 0.598 (se = 0.053 )
## Likelihood ratio test= 3.37 on 2 df, p=0.2
## Wald test = 3.67 on 2 df, p=0.2
## Score (logrank) test = 3.78 on 2 df, p=0.2
result<-autoReg(fit2,uni=TRUE,final=TRUE)%>%myft()
result
Dependent: Surv(Months, Recur) | all | HR (univariable) | HR (multivariable) | HR (final) | |
|---|---|---|---|---|---|
Sex | F | 6 (10.9%) | |||
M | 49 (89.1%) | 0.51 (0.21-1.23, p=.134) | 0.60 (0.24-1.48, p=.266) | ||
C_TIL | Mean ± SD | 21.4 ± 8.6 | 0.97 (0.93-1.01, p=.134) | 0.98 (0.94-1.02, p=.231) | 0.97 (0.93-1.01, p=.134) |
n=55, events=38, Likelihood ratio test=3.37 on 2 df(p=.185) | |||||
# Install and load required packages
# install.packages("maxstat")
library(maxstat)
library(survival)
library(autoReg)
library(rrtable)
# Summary and cleaning
summary(dataset_select)
sum(is.na(dataset_select))
# Impute missing values (if needed)
# dataset_select <- na.omit(dataset_select)
# Ensure correct data types
dataset_select$Sex <- as.factor(dataset_select$Sex)
dataset_select$CIS <- as.factor(dataset_select$CIS)
dataset_select$Tstage <- as.factor(dataset_select$Tstage)
dataset_select$Subtype <- as.factor(dataset_select$Subtype)
dataset_select$LVI <- as.factor(dataset_select$LVI)
dataset_select$ADJ <- as.factor(dataset_select$ADJ)
dataset_select$Concurrent <- as.factor(dataset_select$Concurrent)
dataset_select$UpperTract <- as.factor(dataset_select$UpperTract)
# Find optimal cut-point for continuous variables and dichotomize
# Age
# opt_cut_age <- maxstat.test(Surv(RFS, Recur) ~ Age, data = dataset_select, smethod = "LogRank")
# dataset_select$Age_dichotomized <- ifelse(dataset_select$Age <= opt_cut_age$estimate, 0, 1)
# Central_intratumoral_ratio
opt_cut_central <- maxstat.test(Surv(RFS, Recur) ~ Central_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
dataset_select$Central_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Central_intratumoral_ratio <= opt_cut_central$estimate, 0, 1)
# Peripheral_intratumoral_ratio
opt_cut_peripheral <- maxstat.test(Surv(RFS, Recur) ~ Peripheral_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
dataset_select$Peripheral_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Peripheral_intratumoral_ratio <= opt_cut_peripheral$estimate, 0, 1)
# Refit the Cox model with dichotomized variables
fit <- coxph(Surv(RFS, Recur) ~ Age + CIS + Tstage + Subtype + LVI + Concurrent + UpperTract + Central_intratumoral_ratio_dichotomized + Peripheral_intratumoral_ratio_dichotomized,
data = dataset_select,
control = coxph.control(iter.max = 1000))
result<-autoReg(fit,uni=TRUE,final=TRUE)%>%myft()
result
#KMcurve
# Find optimal cut-point for Central_intratumoral_ratio and dichotomize
opt_cut_central <- maxstat.test(Surv(RFS, Recur) ~ Central_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
central_cutoff <- opt_cut_central$estimate
dataset_select$Central_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Central_intratumoral_ratio <= central_cutoff, 0, 1)
# Find optimal cut-point for Peripheral_intratumoral_ratio and dichotomize
opt_cut_peripheral <- maxstat.test(Surv(RFS, Recur) ~ Peripheral_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
peripheral_cutoff <- opt_cut_peripheral$estimate
dataset_select$Peripheral_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Peripheral_intratumoral_ratio <= peripheral_cutoff, 0, 1)
# Generate KM curve for Central_intratumoral_ratio
km_fit_central <- survfit(Surv(RFS, Recur) ~ Central_intratumoral_ratio_dichotomized, data = dataset_select)
p_value_central <- surv_pvalue(km_fit_central)$pval
# Generate KM curve for Central_intratumoral_ratio
km_fit_central <- survfit(Surv(RFS, Recur) ~ Central_intratumoral_ratio_dichotomized, data = dataset_select)
p_value_central <- surv_pvalue(km_fit_central)$pval
ggsurvplot(km_fit_central, data = dataset_select,
pval = TRUE,
pval.method = TRUE,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (Months)",
ylab = "Survival Probability",
title = "KM Curve for Central Intratumoral Ratio",
legend.title = "Central Intratumoral Ratio",
legend.labs = c("Low", "High"))
# Generate KM curve for Peripheral_intratumoral_ratio
km_fit_peripheral <- survfit(Surv(RFS, Recur) ~ Peripheral_intratumoral_ratio_dichotomized, data = dataset_select)
p_value_peripheral <- surv_pvalue(km_fit_peripheral)$pval
ggsurvplot(km_fit_peripheral, data = dataset_select,
pval = TRUE,
pval.method = TRUE,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (Months)",
ylab = "Survival Probability",
title = "KM Curve for Peripheral Intratumoral Ratio",
legend.title = "Peripheral Intratumoral Ratio",
legend.labs = c("Low", "High"))
# Find optimal cut-point for Central_intratumoral_ratio and dichotomize
opt_cut_central <- maxstat.test(Surv(RFS, Recur) ~ Central_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
central_cutoff <- opt_cut_central$estimate
dataset_select$Central_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Central_intratumoral_ratio <= central_cutoff, 0, 1)
# Print the cutoff point for Central_intratumoral_ratio
print(paste("Optimal cutoff for Central_intratumoral_ratio:", central_cutoff))
# Find optimal cut-point for Peripheral_intratumoral_ratio and dichotomize
opt_cut_peripheral <- maxstat.test(Surv(RFS, Recur) ~ Peripheral_intratumoral_ratio, data = dataset_select, smethod = "LogRank")
peripheral_cutoff <- opt_cut_peripheral$estimate
dataset_select$Peripheral_intratumoral_ratio_dichotomized <- ifelse(dataset_select$Peripheral_intratumoral_ratio <= peripheral_cutoff, 0, 1)
# Print the cutoff point for Peripheral_intratumoral_ratio
print(paste("Optimal cutoff for Peripheral_intratumoral_ratio:", peripheral_cutoff))
# Generate KM curve for Central_intratumoral_ratio
km_fit_central <- survfit(Surv(RFS, Recur) ~ Central_intratumoral_ratio_dichotomized, data = dataset_select)
p_value_central <- surv_pvalue(km_fit_central)$pval
plot_central <- ggsurvplot(km_fit_central, data = dataset_select,
pval = TRUE,
pval.method = TRUE,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (Months)",
ylab = "Survival Probability",
title = "KM Curve for Central Intratumoral Ratio",
legend.title = "Central Intratumoral Ratio",
legend.labs = c("Low", "High"))
# Add annotation for cutoff value
plot_central$plot <- plot_central$plot +
annotate("text", x = max(dataset_select$RFS) * 0.7, y = 0.2,
label = paste("Cutoff =", round(central_cutoff, 2)), size = 5)
print(plot_central)
# Generate KM curve for Peripheral_intratumoral_ratio
km_fit_peripheral <- survfit(Surv(RFS, Recur) ~ Peripheral_intratumoral_ratio_dichotomized, data = dataset_select)
p_value_peripheral <- surv_pvalue(km_fit_peripheral)$pval
plot_peripheral <- ggsurvplot(km_fit_peripheral, data = dataset_select,
pval = TRUE,
pval.method = TRUE,
risk.table = TRUE,
conf.int = TRUE,
xlab = "Time (Months)",
ylab = "Survival Probability",
title = "KM Curve for Peripheral Intratumoral Ratio",
legend.title = "Peripheral Intratumoral Ratio",
legend.labs = c("Low", "High"))
# Add annotation for cutoff value
plot_peripheral$plot <- plot_peripheral$plot +
annotate("text", x = max(dataset_select$RFS) * 0.7, y = 0.2,
label = paste("Cutoff =", round(peripheral_cutoff, 2)), size = 5)
print(plot_peripheral)