Reading in the inital data
Per Shubham (email 1.29.25), The initial query was very broad with selection criteria of patients older than 18 with microscopically confirmed non-small cell carcinoma of the lung and bronchus (that was not identified on autopsy or death certificate) who received surgery.
library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("dplyr")
library("readxl")
library("compareGroups")
library("readxl")
library("lubridate")
library("sas7bdat")
library("compareGroups")
library("survival")
library("survminer")
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
unclean <- read.sas7bdat("C:/Users/tumins01/Downloads/uncleaned.sas7bdat") #n=29322
Patient Selection
Selection step 1: surgical procedure codes, excluding anyone who did not have a lobectomy or pnumonectomy or wedge; n=28594 (excluded = 728)
Useful links: https://apps.naaccr.org/data-dictionary/data-dictionary/version=22/data-item-view/item-number=1290/ https://seer.cancer.gov/archive/manuals/2018/AppendixC/Surgery_Codes_Lung_2018.pdf
# Recode RX_Summ__Surg_Prim_Site__1998__ to `primsite`
seer <- unclean %>%
mutate(
surg2 = case_when(
RX_Summ__Surg_Prim_Site__1998__ %in% c(20, 21, 22, 23) ~ "Wedge", #removing 24, 25, "Laser excision & Bronchial sleeve resection ONLY
RX_Summ__Surg_Prim_Site__1998__ %in% c(30, 33, 45, 46, 47, 48) ~ "Lobectomy",
RX_Summ__Surg_Prim_Site__1998__ %in% c(55, 56, 65, 66, 70) ~ "Pneumonectomy",
TRUE ~ NA_character_ # Ensure NA is a character type
)
)
table(seer$RX_Summ__Surg_Prim_Site__1998__)
##
## 12 13 15 19 20 21 22 23 24 25 30 33 45
## 230 72 72 51 142 5090 1799 176 106 34 1746 18492 404
## 46 47 48 55 56 65 66 70 80 90
## 137 15 13 112 445 4 5 14 31 132
table(seer$surg2)
##
## Lobectomy Pneumonectomy Wedge
## 20807 580 7207
seer <- seer %>% filter(surg2 == "Lobectomy" | surg2 == "Pneumonectomy" | surg2 == "Wedge" )
#creating a new variable for specified mediastinal lymph node resection yes (1) or no (0)
seer <- seer %>%
mutate(
mediastinal = ifelse(RX_Summ__Surg_Prim_Site__1998__ %in% c(33, 56, 70), "yes", "no"))
table(seer$mediastinal)
##
## no yes
## 9643 18951
table(seer$mediastinal, seer$surg2)
##
## Lobectomy Pneumonectomy Wedge
## no 2315 121 7207
## yes 18492 459 0
table(seer$Derived_EOD_2018_N__2018__)
##
## 88 N0 N1 N2 N3 NX
## 238 22647 2759 2508 171 271
Selection step 2: Regional lymph nodes examined, removing when unknown or 90 (cause >90 number unknown); n=27381 (excluded = 1213)
Useful links: https://seer.cancer.gov/data-software/documentation/seerstat/nov2017/TextData.FileDescription.pdf#REGIONAL_NODES_EXAMINED
table(seer$Regional_nodes_examined__1988__)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2723 814 1027 1185 1251 1410 1357 1365 1387 1312 1433 1267 1187 1004 946 886
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
## 714 747 603 579 513 436 436 342 268 265 248 207 180 141 129 109
## 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
## 116 91 73 74 56 44 44 47 46 39 32 27 27 24 20 16
## 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
## 15 22 9 17 7 13 3 3 2 2 2 5 4 5 4 3
## 64 65 66 68 69 70 71 72 73 75 76 83 86 90 95 96
## 3 1 1 2 1 1 1 2 1 1 2 1 1 3 218 53
## 97 98 99
## 587 230 122
seer <- seer %>%
filter(!Regional_nodes_examined__1988__ %in% c("90", "95", "96", "97", "98", "99", NA))
Selection step 3: Regional lymph nodes were positive, removing when unknown or 90 (cause >90 number unknown); n=26876 (excluded = 505)
table(seer$Regional_nodes_positive__1988__)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 19966 1947 909 577 346 231 162 111 77 47 35 39 29
## 13 14 15 16 17 18 19 21 22 23 24 25 27
## 20 14 17 6 6 4 6 4 4 1 1 3 1
## 29 30 33 34 39 95 97 98 99
## 1 2 1 1 1 81 6 2723 2
table(seer$Derived_EOD_2018_N__2018__)
##
## 88 N0 N1 N2 N3 NX
## 219 21828 2652 2329 150 203
seer <- seer %>%
filter(!Regional_nodes_positive__1988__ %in% c("90", "95", "96", "97", "99", NA))
seer <- seer %>%
mutate(
Regional_nodes_positive__1988__ = case_when(
Regional_nodes_positive__1988__ == "98" ~ NA_real_,
TRUE ~ Regional_nodes_positive__1988__ # Keep other values unchanged
)
)
table(seer$Derived_EOD_2018_N__2018__)
##
## 88 N0 N1 N2 N3 NX
## 213 21825 2619 2287 145 203
seer <- seer %>%
filter(!seer$Derived_EOD_2018_N__2018__ %in% c("88", "NX"))
Selection step 4: Stage, only I, II, III, removing if unknown OR if grouped stage is distant; n= 25153 (excluded = 1723)
table(seer$Derived_EOD_2018_Stage_Group__20)
##
## 0 1A1 1A2 1A3 1B 2A 2B 3 3A 3B 3C 4 4A 4B 99 OC
## 4 3866 6047 3227 4449 913 3594 26 2966 686 32 71 687 190 117 1
seer <- seer %>%
mutate(stage = case_when(
Derived_EOD_2018_Stage_Group__20 %in% c("1A1", "1A2", "1A3", "1B") ~ "Stage 1",
Derived_EOD_2018_Stage_Group__20 %in% c("2A", "2B") ~ "Stage 2",
Derived_EOD_2018_Stage_Group__20 %in% c("3", "3A", "3B", "3C") ~ "Stage 3",
Derived_EOD_2018_Stage_Group__20 %in% c("4", "4A", "4B") ~ "Stage 4",
TRUE ~ NA_character_ # Assign NA to unmatched cases
))
table(seer$stage)
##
## Stage 1 Stage 2 Stage 3 Stage 4
## 17589 4507 3710 948
table(seer$Combined_Summary_Stage__2004__)
##
## Distant In situ Localized Regional
## 1599 4 15997 9245
## Unknown/unstaged
## 31
table(seer$Derived_EOD_2018_N__2018__)
##
## N0 N1 N2 N3
## 21825 2619 2287 145
seer <- seer %>% filter(stage == "Stage 1" | stage == "Stage 2" | stage == "Stage 3") %>% filter(Combined_Summary_Stage__2004__ != "Distant" )
table(seer$EOD_Mets__2018__) #0, good, no mets
##
## 0
## 25153
table(seer$Derived_EOD_2018_N__2018__)
##
## N0 N1 N2
## 20798 2402 1953
Selection step 5: Tumor Size, removing missing, unknown, or abnormally large tumor size; n=25028 (excluded = 125)
table(seer$Tumor_Size_Summary__2016__)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 1 47 84 107 129 153 228 293 430 461 838 720 1068 767 776 1685
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
## 726 932 1035 557 1176 699 845 536 503 1175 381 432 524 224 734 312
## 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
## 448 204 188 701 185 211 234 130 449 136 191 122 96 415 98 83
## 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
## 119 49 255 59 106 65 47 234 48 74 64 19 209 35 46 35
## 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## 26 139 20 38 31 24 102 17 32 24 16 88 14 15 19 5
## 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
## 68 15 13 7 6 46 9 8 11 7 47 2 3 4 3 28
## 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
## 6 8 6 4 37 4 3 4 3 17 3 2 4 2 19 2
## 114 115 116 117 118 119 120 122 124 125 128 130 131 133 135 136
## 3 6 2 2 2 1 18 1 1 2 1 6 1 1 2 1
## 137 139 140 145 150 152 153 160 163 165 170 175 180 190 195 200
## 1 1 5 3 3 1 2 2 1 2 2 2 4 2 2 1
## 203 205 210 225 260 280 301 310 430 500 750 990 999
## 1 1 1 1 1 1 1 1 1 1 1 2 112
seer <- seer %>%
filter(
!Tumor_Size_Summary__2016__ %in% c(990, 998, 999, 301, 307, 310, 430, 500, 750, NA)
)
table(seer$Derived_EOD_2018_T__2018__)
##
## T0 T1a T1b T1c T1mi T2a T2b T3 T4 TX
## 1 2386 6632 3812 1711 5672 1352 2639 824 5
seer <- seer %>%
filter(
!Derived_EOD_2018_T__2018__ %in% c("T0", "TX")
)
Selection step 6: good survival data, n=24021 (excluded = 1007)
Useful links:https://seer.cancer.gov/survivaltime/SurvivalTimeCalculation.pdf; Survival Months = floor((date last contact – date dx) / days in a month)
table(seer$Survival_months)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 712 770 808 862 751 757 778 528 509 809 816 795 759 723 802 719 746 698 787 683
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
## 814 722 758 681 596 626 663 568 650 611 586 574 659 558 601 549
table(seer$Months_from_diagnosis_to_treatme)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 8119 7086 5587 2261 927 418 225 153 83 42 29 20 18 8 5 5
## 16 17 19 20 21 23
## 5 6 1 1 1 1
table(seer$COD_to_site_rec_KM)
##
## Accidents and Adverse Effects
## 35
## Acute Lymphocytic Leukemia
## 1
## Acute Myeloid Leukemia
## 5
## Aleukemic, Subleukemic and NOS
## 1
## Alive
## 22743
## Anus, Anal Canal and Anorectum
## 1
## Aortic Aneurysm and Dissection
## 1
## Atherosclerosis
## 1
## Bones and Joints
## 2
## Brain and Other Nervous System
## 3
## Breast
## 21
## Cerebrovascular Diseases
## 43
## Chronic Liver Disease and Cirrhosis
## 7
## Chronic Lymphocytic Leukemia
## 4
## Chronic Obstructive Pulmonary Disease and Allied Cond
## 93
## Colon excluding Rectum
## 15
## Corpus Uteri
## 2
## Diabetes Mellitus
## 17
## Diseases of Heart
## 190
## Esophagus
## 11
## Homicide and Legal Intervention
## 1
## Hypertension without Heart Disease
## 3
## In situ, benign or unknown behavior neoplasm
## 13
## Intrahepatic Bile Duct
## 2
## Kidney and Renal Pelvis
## 7
## Larynx
## 6
## Liver
## 8
## Lung and Bronchus
## 1298
## Melanoma of the Skin
## 3
## Miscellaneous Malignant Cancer
## 53
## Myeloma
## 8
## Nasopharynx
## 1
## Nephritis, Nephrotic Syndrome and Nephrosis
## 15
## Non-Hodgkin Lymphoma
## 12
## Oropharynx
## 1
## Other Biliary
## 2
## Other Cause of Death
## 221
## Other Digestive Organs
## 1
## Other Diseases of Arteries, Arterioles, Capillaries
## 5
## Other Infectious and Parasitic Diseases including HIV
## 9
## Other Myeloid/Monocytic Leukemia
## 1
## Other Oral Cavity and Pharynx
## 2
## Ovary
## 3
## Pancreas
## 19
## Pneumonia and Influenza
## 27
## Prostate
## 12
## Rectum and Rectosigmoid Junction
## 1
## Septicemia
## 29
## Soft Tissue including Heart
## 3
## State DC not available or state DC available but no COD
## 28
## Stomach
## 2
## Stomach and Duodenal Ulcers
## 1
## Suicide and Self-Inflicted Injury
## 11
## Symptoms, Signs and Ill-Defined Conditions
## 10
## Tongue
## 5
## Tonsil
## 2
## Urinary Bladder
## 4
## Uterus, NOS
## 3
seer <- seer %>% mutate(dead_any = ifelse(COD_to_site_rec_KM == "Alive", 0, 1))
table(seer$dead_any)
##
## 0 1
## 22743 2285
seer <- seer %>% mutate(dead_lung = ifelse(COD_to_site_rec_KM == "Lung and Bronchus", 1, 0))
table(seer$dead_lung)
##
## 0 1
## 23730 1298
#if survival months is time from dx to event/censoring, then need to subtract time to treatment, so survival is time of treatment to event/censor
seer <- seer %>% mutate(sur_time = Survival_months - Months_from_diagnosis_to_treatme )
table(seer$sur_time)
##
## -15 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5
## 1 2 1 2 5 1 6 8 16 17 52 148 234 467 906 782 793 778 676 681
## 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 702 637 704 777 798 813 720 719 763 783 746 731 664 715 764 663 698 692 569 625
## 26 27 28 29 30 31 32 33 34 35
## 656 564 656 543 568 600 535 501 338 181
test <- seer %>% select(Survival_months, Months_from_diagnosis_to_treatme, sur_time)
seer <- seer %>% filter(sur_time >= 0)
Data Cleaning
simple_stats <- function(variable) {
# Calculate stats
mean_val <- mean(variable, na.rm = TRUE)
sd_val <- sd(variable, na.rm = TRUE)
range_val <- range(variable, na.rm = TRUE)
# Print results
cat("Mean:", mean_val, "\n")
cat("Standard Deviation:", sd_val, "\n")
cat("Range:", range_val, "\n")
}
#lymph nodes yes subset
seer_nodes_examined <- seer %>% filter(Regional_nodes_examined__1988__ != 0) # n=15694
summary(seer$Regional_nodes_examined__1988__)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 5.00 10.00 11.26 16.00 86.00
hist(seer$Regional_nodes_examined__1988__)
simple_stats(seer$Regional_nodes_examined__1988__)
## Mean: 11.25714
## Standard Deviation: 9.071014
## Range: 0 86
table(seer$Regional_nodes_examined__1988__)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 1910 703 920 1063 1126 1282 1225 1240 1254 1159 1282 1137 1061 911 850 785
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
## 654 666 533 537 457 395 393 307 245 233 226 184 156 121 113 100
## 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
## 108 82 66 66 52 34 40 41 45 32 30 23 21 23 18 16
## 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
## 15 15 9 14 6 11 3 3 2 2 2 4 4 4 3 3
## 64 65 68 69 70 71 72 73 75 76 86
## 3 1 2 1 1 1 2 1 1 2 1
summary(seer_nodes_examined$Regional_nodes_positive__1988__)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.5093 0.0000 39.0000
hist(seer_nodes_examined$Regional_nodes_positive__1988__)
simple_stats(seer_nodes_examined$Regional_nodes_positive__1988__)
## Mean: 0.5092856
## Standard Deviation: 1.694691
## Range: 0 39
table(seer_nodes_examined$Regional_nodes_positive__1988__)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 18205 1707 769 487 297 193 126 90 66 34 28 31 26
## 13 14 15 16 17 18 19 21 22 24 25 27 29
## 17 10 13 4 4 4 5 2 3 1 2 1 1
## 30 33 34 39
## 2 1 1 1
table(seer$Derived_EOD_2018_T__2018__)
##
## T1a T1b T1c T1mi T2a T2b T3 T4
## 2307 6370 3642 1656 5440 1301 2523 802
table(seer$Derived_EOD_2018_N__2018__)
##
## N0 N1 N2
## 19890 2315 1836
table(seer$Derived_EOD_2018_M__2018__)
##
## M0
## 24041
table(seer$Regional_nodes_examined__1988__, seer$Derived_EOD_2018_N__2018__)
##
## N0 N1 N2
## 0 1840 29 41
## 1 643 29 31
## 2 832 38 50
## 3 946 62 55
## 4 1001 74 51
## 5 1135 88 59
## 6 1043 91 91
## 7 1042 123 75
## 8 1053 113 88
## 9 985 100 74
## 10 1049 127 106
## 11 910 130 97
## 12 857 112 92
## 13 737 94 80
## 14 674 103 73
## 15 629 82 74
## 16 502 96 56
## 17 515 85 66
## 18 408 73 52
## 19 404 69 64
## 20 355 53 49
## 21 288 68 39
## 22 295 52 46
## 23 214 48 45
## 24 172 48 25
## 25 166 36 31
## 26 163 33 30
## 27 137 31 16
## 28 116 18 22
## 29 84 21 16
## 30 82 22 9
## 31 73 13 14
## 32 76 17 15
## 33 47 24 11
## 34 45 10 11
## 35 52 8 6
## 36 35 12 5
## 37 27 4 3
## 38 28 6 6
## 39 24 7 10
## 40 30 10 5
## 41 18 4 10
## 42 26 2 2
## 43 17 3 3
## 44 14 3 4
## 45 11 8 4
## 46 9 3 6
## 47 7 4 5
## 48 11 3 1
## 49 9 3 3
## 50 8 1 0
## 51 10 3 1
## 52 6 0 0
## 53 7 3 1
## 54 2 1 0
## 55 2 0 1
## 56 1 1 0
## 57 2 0 0
## 58 0 2 0
## 59 3 0 1
## 60 2 1 1
## 61 0 2 2
## 62 1 2 0
## 63 1 2 0
## 64 2 1 0
## 65 1 0 0
## 68 0 2 0
## 69 0 0 1
## 70 1 0 0
## 71 0 1 0
## 72 2 0 0
## 73 1 0 0
## 75 1 0 0
## 76 1 0 1
## 86 0 1 0
seer <- seer %>% mutate(node_any = ifelse(Regional_nodes_examined__1988__ == 0, "no", "yes"))
table(seer$Regional_nodes_examined__1988__, seer$node_any)
##
## no yes
## 0 1910 0
## 1 0 703
## 2 0 920
## 3 0 1063
## 4 0 1126
## 5 0 1282
## 6 0 1225
## 7 0 1240
## 8 0 1254
## 9 0 1159
## 10 0 1282
## 11 0 1137
## 12 0 1061
## 13 0 911
## 14 0 850
## 15 0 785
## 16 0 654
## 17 0 666
## 18 0 533
## 19 0 537
## 20 0 457
## 21 0 395
## 22 0 393
## 23 0 307
## 24 0 245
## 25 0 233
## 26 0 226
## 27 0 184
## 28 0 156
## 29 0 121
## 30 0 113
## 31 0 100
## 32 0 108
## 33 0 82
## 34 0 66
## 35 0 66
## 36 0 52
## 37 0 34
## 38 0 40
## 39 0 41
## 40 0 45
## 41 0 32
## 42 0 30
## 43 0 23
## 44 0 21
## 45 0 23
## 46 0 18
## 47 0 16
## 48 0 15
## 49 0 15
## 50 0 9
## 51 0 14
## 52 0 6
## 53 0 11
## 54 0 3
## 55 0 3
## 56 0 2
## 57 0 2
## 58 0 2
## 59 0 4
## 60 0 4
## 61 0 4
## 62 0 3
## 63 0 3
## 64 0 3
## 65 0 1
## 68 0 2
## 69 0 1
## 70 0 1
## 71 0 1
## 72 0 2
## 73 0 1
## 75 0 1
## 76 0 2
## 86 0 1
seer <- seer %>%
mutate(node_group = ifelse(Regional_nodes_examined__1988__ == 0, "0 nodes",
ifelse(Regional_nodes_examined__1988__ >= 1 & Regional_nodes_examined__1988__ <= 4, "1-4 nodes",
ifelse(Regional_nodes_examined__1988__ > 4 & Regional_nodes_examined__1988__ <= 10, "5-10 nodes",
"10+ nodes"))))
table(seer$Regional_nodes_examined__1988__, seer$node_group)
##
## 0 nodes 1-4 nodes 10+ nodes 5-10 nodes
## 0 1910 0 0 0
## 1 0 703 0 0
## 2 0 920 0 0
## 3 0 1063 0 0
## 4 0 1126 0 0
## 5 0 0 0 1282
## 6 0 0 0 1225
## 7 0 0 0 1240
## 8 0 0 0 1254
## 9 0 0 0 1159
## 10 0 0 0 1282
## 11 0 0 1137 0
## 12 0 0 1061 0
## 13 0 0 911 0
## 14 0 0 850 0
## 15 0 0 785 0
## 16 0 0 654 0
## 17 0 0 666 0
## 18 0 0 533 0
## 19 0 0 537 0
## 20 0 0 457 0
## 21 0 0 395 0
## 22 0 0 393 0
## 23 0 0 307 0
## 24 0 0 245 0
## 25 0 0 233 0
## 26 0 0 226 0
## 27 0 0 184 0
## 28 0 0 156 0
## 29 0 0 121 0
## 30 0 0 113 0
## 31 0 0 100 0
## 32 0 0 108 0
## 33 0 0 82 0
## 34 0 0 66 0
## 35 0 0 66 0
## 36 0 0 52 0
## 37 0 0 34 0
## 38 0 0 40 0
## 39 0 0 41 0
## 40 0 0 45 0
## 41 0 0 32 0
## 42 0 0 30 0
## 43 0 0 23 0
## 44 0 0 21 0
## 45 0 0 23 0
## 46 0 0 18 0
## 47 0 0 16 0
## 48 0 0 15 0
## 49 0 0 15 0
## 50 0 0 9 0
## 51 0 0 14 0
## 52 0 0 6 0
## 53 0 0 11 0
## 54 0 0 3 0
## 55 0 0 3 0
## 56 0 0 2 0
## 57 0 0 2 0
## 58 0 0 2 0
## 59 0 0 4 0
## 60 0 0 4 0
## 61 0 0 4 0
## 62 0 0 3 0
## 63 0 0 3 0
## 64 0 0 3 0
## 65 0 0 1 0
## 68 0 0 2 0
## 69 0 0 1 0
## 70 0 0 1 0
## 71 0 0 1 0
## 72 0 0 2 0
## 73 0 0 1 0
## 75 0 0 1 0
## 76 0 0 2 0
## 86 0 0 1 0
#how is TN distributed by stage
seer <- seer %>%
mutate(TN_Stage = case_when(
Derived_EOD_2018_T__2018__ %in% c("T1a", "T1b", "T1c", "T1mi") & Derived_EOD_2018_N__2018__ == "N0" ~ "T1N0",
Derived_EOD_2018_T__2018__ %in% c("T1a", "T1b", "T1c", "T1mi") & Derived_EOD_2018_N__2018__ == "N1" ~ "T1N1",
Derived_EOD_2018_T__2018__ %in% c("T1a", "T1b", "T1c", "T1mi") & Derived_EOD_2018_N__2018__ == "N2" ~ "T1N2",
Derived_EOD_2018_T__2018__ %in% c("T1a", "T1b", "T1c", "T1mi") & Derived_EOD_2018_N__2018__ == "N3" ~ "T1N3",
Derived_EOD_2018_T__2018__ %in% c("T2a", "T2b") & Derived_EOD_2018_N__2018__ == "N0" ~ "T2N0",
Derived_EOD_2018_T__2018__ %in% c("T2a", "T2b") & Derived_EOD_2018_N__2018__ == "N1" ~ "T2N1",
Derived_EOD_2018_T__2018__ %in% c("T2a", "T2b") & Derived_EOD_2018_N__2018__ == "N2" ~ "T2N2",
Derived_EOD_2018_T__2018__ %in% c("T2a", "T2b") & Derived_EOD_2018_N__2018__ == "N3" ~ "T2N3",
Derived_EOD_2018_T__2018__ %in% c("T3") & Derived_EOD_2018_N__2018__ == "N0" ~ "T3N0",
Derived_EOD_2018_T__2018__ %in% c("T3") & Derived_EOD_2018_N__2018__ == "N1" ~ "T3N1",
Derived_EOD_2018_T__2018__ %in% c("T3") & Derived_EOD_2018_N__2018__ == "N2" ~ "T3N2",
Derived_EOD_2018_T__2018__ %in% c("T3") & Derived_EOD_2018_N__2018__ == "N3" ~ "T3N3",
Derived_EOD_2018_T__2018__ %in% c("T4") & Derived_EOD_2018_N__2018__ == "N0" ~ "T4N0",
Derived_EOD_2018_T__2018__ %in% c("T4") & Derived_EOD_2018_N__2018__ == "N1" ~ "T4N1",
Derived_EOD_2018_T__2018__ %in% c("T4") & Derived_EOD_2018_N__2018__ == "N2" ~ "T4N2",
Derived_EOD_2018_T__2018__ %in% c("T4") & Derived_EOD_2018_N__2018__ == "N3" ~ "T4N3",
TRUE ~ NA_character_ # Assign NA to unmatched cases
))
table(seer$TN_Stage)
##
## T1N0 T1N1 T1N2 T2N0 T2N1 T2N2 T3N0 T3N1 T3N2 T4N0 T4N1 T4N2
## 12597 786 592 5119 891 731 1714 452 357 460 186 156
seer$TN_Stage <- as.factor(seer$TN_Stage)
table(seer$Derived_EOD_2018_T__2018__)
##
## T1a T1b T1c T1mi T2a T2b T3 T4
## 2307 6370 3642 1656 5440 1301 2523 802
seer <- seer %>%
mutate(T_Stage = case_when(
Derived_EOD_2018_T__2018__ %in% c("T1a", "T1b", "T1c", "T1mi") ~ "T1",
Derived_EOD_2018_T__2018__ %in% c("T2a", "T2b") ~ "T2",
Derived_EOD_2018_T__2018__ %in% c("T3") ~ "T3",
Derived_EOD_2018_T__2018__ %in% c("T4") ~ "T4",
TRUE ~ NA_character_ # Assign NA to unmatched cases
))
table(seer$T_Stage)
##
## T1 T2 T3 T4
## 13975 6741 2523 802
table(seer$TN_Stage, seer$mediastinal)
##
## no yes
## T1N0 4847 7750
## T1N1 169 617
## T1N2 124 468
## T2N0 1462 3657
## T2N1 144 747
## T2N2 123 608
## T3N0 416 1298
## T3N1 75 377
## T3N2 67 290
## T4N0 111 349
## T4N1 31 155
## T4N2 33 123
#age
table(seer$age)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 4 2 5 4 5 5 5 5 3 8 11 7 4 15 9 13
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## 11 20 18 15 22 23 28 35 35 35 43 60 54 86 85 118
## 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
## 118 163 159 240 267 339 404 464 455 551 625 669 727 752 827 976
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
## 942 997 1005 1064 1063 1106 1121 1044 977 965 899 846 652 613 478 430
## 82 83 84 85 86 87 88 89 90
## 379 293 201 155 102 67 46 34 33
seer$age2 <- as.numeric(seer$age)
#race
table(seer$Race_recode__W__B__AI__API_)
##
## American Indian/Alaska Native Asian or Pacific Islander
## 98 2023
## Black Unknown
## 1993 130
## White
## 19797
seer <- seer %>% mutate(race2 = ifelse(Race_recode__W__B__AI__API_ == "American Indian/Alaska Native" | Race_recode__W__B__AI__API_ == "Unknown", "Other/Unknown", Race_recode__W__B__AI__API_))
table(seer$race2)
##
## Asian or Pacific Islander Black Other/Unknown
## 2023 1993 228
## White
## 19797
#sex
table(seer$Sex)
##
## Female Male
## 13714 10327
#marital status
table(seer$Marital_status_at_diagnosis)
##
## Divorced Married (including common law)
## 2887 13531
## Separated Single (never married)
## 230 3353
## Unknown Unmarried or Domestic Partner
## 915 149
## Widowed
## 2976
seer <- seer %>% mutate(married2 = ifelse(Marital_status_at_diagnosis == "Divorced" | Marital_status_at_diagnosis == "Separated" | Marital_status_at_diagnosis == "Single (never married)", "single/seperated",
ifelse(Marital_status_at_diagnosis == "Married (including common law)" | Marital_status_at_diagnosis == "Unmarried or Domestic Partner", "married/partner", Marital_status_at_diagnosis)))
table(seer$married2)
##
## married/partner single/seperated Unknown Widowed
## 13680 6470 915 2976
#histology
table(seer$Histologic_Type_ICD_O_3)
##
## 8010 8012 8013 8022 8023 8031 8032 8033 8070 8071 8072 8082 8083 8140 8144 8200
## 45 90 265 103 1 4 25 28 2393 1200 1056 8 92 6305 24 31
## 8230 8240 8246 8249 8250 8253 8254 8255 8256 8257 8260 8265 8333 8430 8480 8481
## 809 1604 87 262 1244 562 169 637 158 20 667 257 4 58 391 9
## 8551 8560 8562
## 5030 401 2
seer <- seer %>%
mutate(
hist2 = case_when(
Histologic_Type_ICD_O_3 %in% c(8070, 8071, 8072, 8073, 8074, 8075) ~ "Squamous cell",
Histologic_Type_ICD_O_3 %in% c(8140, 8144) ~ "Adenocarcinoma",
Histologic_Type_ICD_O_3 %in% c(8240, 8244, 8245, 8246, 8249) ~ "Carcinoid",
Histologic_Type_ICD_O_3 %in% c(8250, 8251, 8252, 8253, 8254, 8255,
8256, 8257) ~ "Bronchioalveolar",
Histologic_Type_ICD_O_3 %in% c(8012, 8013, 8014) ~ "Large cell",
TRUE ~ "Other"
)
)
table(seer$hist2)
##
## Adenocarcinoma Bronchioalveolar Carcinoid Large cell
## 6329 2790 1953 355
## Other Squamous cell
## 7965 4649
#Grade
table(seer$Grade_Pathological__2018__)
##
## 1 2 3 4 9
## 4346 9544 5369 99 4683
seer <- seer %>% mutate(
grade2 = ifelse(Grade_Pathological__2018__ == 1, "G1: Well differentiated",
ifelse(Grade_Pathological__2018__ == 2, "G2: Moderately differentiated",
ifelse(Grade_Pathological__2018__ == 3, "G3: Poorly differentiated", "G4: Undifferentiated/Unknown")))
)
table(seer$grade2)
##
## G1: Well differentiated G2: Moderately differentiated
## 4346 9544
## G3: Poorly differentiated G4: Undifferentiated/Unknown
## 5369 4782
#tumor size
summary(seer$Tumor_Size_Summary__2016__)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 15.00 21.00 25.97 32.00 280.00
hist(seer$Tumor_Size_Summary__2016__)
simple_stats(seer$Tumor_Size_Summary__2016__)
## Mean: 25.97396
## Standard Deviation: 18.18596
## Range: 1 280
#tumor site
table(seer$Primary_Site___labeled)
##
## C34.0-Main bronchus C34.1-Upper lobe, lung
## 119 13526
## C34.2-Middle lobe, lung C34.3-Lower lobe, lung
## 1518 8450
## C34.8-Overlapping lesion of lung C34.9-Lung, NOS
## 205 223
seer <- seer %>% mutate(site2 = ifelse(Primary_Site___labeled == "C34.1-Upper lobe, lung", "Upper lobe",
ifelse(Primary_Site___labeled == "C34.2-Middle lobe, lung", "Middle lobe",
ifelse(Primary_Site___labeled == "C34.3-Lower lobe, lung", "Lower lobe", "Other,NOS" ))))
table(seer$site2)
##
## Lower lobe Middle lobe Other,NOS Upper lobe
## 8450 1518 547 13526
#systemic treatment
table(seer$RX_Summ__Systemic_Sur_Seq__2007_)
##
## No systemic therapy and/or surgical procedures
## 18735
## Sequence unknown
## 1
## Surgery both before and after systemic therapy
## 89
## Systemic therapy after surgery
## 4598
## Systemic therapy before surgery
## 440
## Systemic therapy both before and after surgery
## 178
test <- seer %>% filter(is.na(RX_Summ__Systemic_Sur_Seq__2007_))
seer <- seer %>% mutate(treat2 = ifelse(RX_Summ__Systemic_Sur_Seq__2007_ == "No systemic therapy and/or surgical procedures", "no", "yes"))
table(seer$treat2)
##
## no yes
## 18735 5306
#year of dx
table(seer$Year_of_diagnosis)
##
## 2018 2019 2020
## 8528 8873 6640
seer$Year_of_diagnosis <- as.character(seer$Year_of_diagnosis)
Univariate Analysis
#Nodes
table1 <- compareGroups(surg2 ~ Regional_nodes_examined__1988__ + node_any + node_group + mediastinal, data = seer)
createTable(table1)
##
## --------Summary descriptives table by 'surg2'---------
##
## __________________________________________________________________________________
## Lobectomy Pneumonectomy Wedge p.overall
## N=17897 N=437 N=5707
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Regional_nodes_examined__1988__ 12.9 (8.85) 18.4 (12.2) 5.54 (6.66) 0.000
## node_any: 0.000
## no 399 (2.23%) 10 (2.29%) 1501 (26.3%)
## yes 17498 (97.8%) 427 (97.7%) 4206 (73.7%)
## node_group: 0.000
## 0 nodes 399 (2.23%) 10 (2.29%) 1501 (26.3%)
## 1-4 nodes 2056 (11.5%) 23 (5.26%) 1733 (30.4%)
## 10+ nodes 9596 (53.6%) 316 (72.3%) 965 (16.9%)
## 5-10 nodes 5846 (32.7%) 88 (20.1%) 1508 (26.4%)
## mediastinal: 0.000
## no 1806 (10.1%) 89 (20.4%) 5707 (100%)
## yes 16091 (89.9%) 348 (79.6%) 0 (0.00%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
#Clinical and demographic factors
table2 <- compareGroups(surg2 ~ age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
createTable(table2)
##
## --------Summary descriptives table by 'surg2'---------
##
## ____________________________________________________________________________________
## Lobectomy Pneumonectomy Wedge p.overall
## N=17897 N=437 N=5707
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age2 67.8 (9.35) 62.8 (11.7) 69.5 (8.91) <0.001
## Sex: <0.001
## Female 10128 (56.6%) 193 (44.2%) 3393 (59.5%)
## Male 7769 (43.4%) 244 (55.8%) 2314 (40.5%)
## race2: .
## Asian or Pacific Islander 1606 (8.97%) 32 (7.32%) 385 (6.75%)
## Black 1510 (8.44%) 41 (9.38%) 442 (7.74%)
## Other/Unknown 182 (1.02%) 1 (0.23%) 45 (0.79%)
## White 14599 (81.6%) 363 (83.1%) 4835 (84.7%)
## married2: <0.001
## married/partner 10270 (57.4%) 254 (58.1%) 3156 (55.3%)
## single/seperated 4842 (27.1%) 131 (30.0%) 1497 (26.2%)
## Unknown 624 (3.49%) 20 (4.58%) 271 (4.75%)
## Widowed 2161 (12.1%) 32 (7.32%) 783 (13.7%)
## hist2: <0.001
## Adenocarcinoma 4826 (27.0%) 70 (16.0%) 1433 (25.1%)
## Bronchioalveolar 1981 (11.1%) 25 (5.72%) 784 (13.7%)
## Carcinoid 1381 (7.72%) 49 (11.2%) 523 (9.16%)
## Large cell 274 (1.53%) 8 (1.83%) 73 (1.28%)
## Other 5989 (33.5%) 99 (22.7%) 1877 (32.9%)
## Squamous cell 3446 (19.3%) 186 (42.6%) 1017 (17.8%)
## site2: <0.001
## Lower lobe 6254 (34.9%) 128 (29.3%) 2068 (36.2%)
## Middle lobe 1285 (7.18%) 15 (3.43%) 218 (3.82%)
## Other,NOS 325 (1.82%) 107 (24.5%) 115 (2.02%)
## Upper lobe 10033 (56.1%) 187 (42.8%) 3306 (57.9%)
## grade2: <0.001
## G1: Well differentiated 3065 (17.1%) 52 (11.9%) 1229 (21.5%)
## G2: Moderately differentiated 7248 (40.5%) 115 (26.3%) 2181 (38.2%)
## G3: Poorly differentiated 4140 (23.1%) 146 (33.4%) 1083 (19.0%)
## G4: Undifferentiated/Unknown 3444 (19.2%) 124 (28.4%) 1214 (21.3%)
## T_Stage: <0.001
## T1 9719 (54.3%) 77 (17.6%) 4179 (73.2%)
## T2 5413 (30.2%) 160 (36.6%) 1168 (20.5%)
## T3 2130 (11.9%) 94 (21.5%) 299 (5.24%)
## T4 635 (3.55%) 106 (24.3%) 61 (1.07%)
## treat2: <0.001
## no 13421 (75.0%) 200 (45.8%) 5114 (89.6%)
## yes 4476 (25.0%) 237 (54.2%) 593 (10.4%)
## Year_of_diagnosis: 0.002
## 2018 6384 (35.7%) 189 (43.2%) 1955 (34.3%)
## 2019 6617 (37.0%) 145 (33.2%) 2111 (37.0%)
## 2020 4896 (27.4%) 103 (23.6%) 1641 (28.8%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
seer$any2 <- as.factor(seer$dead_any)
seer$lung2 <- as.factor(seer$dead_lung)
table3 <- compareGroups(surg2 ~ sur_time + any2 + lung2, data = seer)
createTable(table3)
##
## --------Summary descriptives table by 'surg2'---------
##
## ___________________________________________________________
## Lobectomy Pneumonectomy Wedge p.overall
## N=17897 N=437 N=5707
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## sur_time 15.9 (9.87) 15.4 (10.2) 15.9 (9.99) 0.558
## any2: <0.001
## 0 16267 (90.9%) 332 (76.0%) 5189 (90.9%)
## 1 1630 (9.11%) 105 (24.0%) 518 (9.08%)
## lung2: <0.001
## 0 16938 (94.6%) 368 (84.2%) 5456 (95.6%)
## 1 959 (5.36%) 69 (15.8%) 251 (4.40%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
Survival Analysis
##All cause mortality
#any nodes examined
km_fit <- survfit(Surv(sur_time, dead_any) ~ node_any, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes", "1+ Nodes"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model1 <- coxph(Surv(sur_time, dead_any) ~ node_any + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model1)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ node_any + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 2253
##
## coef exp(coef) se(coef) z
## node_anyyes -0.426588 0.652733 0.068667 -6.212
## age2 0.030539 1.031011 0.002669 11.442
## SexMale 0.426833 1.532397 0.044939 9.498
## race2Black 0.435445 1.545650 0.113971 3.821
## race2Other/Unknown 0.543083 1.721306 0.227317 2.389
## race2White 0.339692 1.404514 0.091537 3.711
## married2single/seperated 0.177247 1.193926 0.051576 3.437
## married2Unknown 0.187867 1.206673 0.101559 1.850
## married2Widowed 0.277651 1.320025 0.064045 4.335
## hist2Bronchioalveolar -0.187584 0.828959 0.085805 -2.186
## hist2Carcinoid -0.914041 0.400901 0.154410 -5.920
## hist2Large cell 0.638590 1.893809 0.130731 4.885
## hist2Other -0.082682 0.920644 0.056705 -1.458
## hist2Squamous cell 0.221851 1.248385 0.056693 3.913
## site2Middle lobe 0.078584 1.081754 0.096012 0.818
## site2Other,NOS 0.094234 1.098816 0.124867 0.755
## site2Upper lobe -0.111702 0.894311 0.045347 -2.463
## grade2G2: Moderately differentiated 0.369531 1.447056 0.082537 4.477
## grade2G3: Poorly differentiated 0.672683 1.959487 0.085619 7.857
## grade2G4: Undifferentiated/Unknown 0.377012 1.457922 0.089711 4.203
## T_StageT2 0.443147 1.557602 0.050953 8.697
## T_StageT3 0.791138 2.205906 0.065946 11.997
## T_StageT4 1.205427 3.338184 0.085593 14.083
## treat2yes -0.025059 0.975252 0.053101 -0.472
## Year_of_diagnosis2019 -0.061986 0.939896 0.049041 -1.264
## Year_of_diagnosis2020 -0.131279 0.876973 0.083370 -1.575
## Pr(>|z|)
## node_anyyes 5.22e-10 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 0.000133 ***
## race2Other/Unknown 0.016890 *
## race2White 0.000206 ***
## married2single/seperated 0.000589 ***
## married2Unknown 0.064338 .
## married2Widowed 1.46e-05 ***
## hist2Bronchioalveolar 0.028803 *
## hist2Carcinoid 3.23e-09 ***
## hist2Large cell 1.04e-06 ***
## hist2Other 0.144814
## hist2Squamous cell 9.11e-05 ***
## site2Middle lobe 0.413085
## site2Other,NOS 0.450446
## site2Upper lobe 0.013766 *
## grade2G2: Moderately differentiated 7.57e-06 ***
## grade2G3: Poorly differentiated 3.94e-15 ***
## grade2G4: Undifferentiated/Unknown 2.64e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.636987
## Year_of_diagnosis2019 0.206248
## Year_of_diagnosis2020 0.115337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_anyyes 0.6527 1.5320 0.5705 0.7468
## age2 1.0310 0.9699 1.0256 1.0364
## SexMale 1.5324 0.6526 1.4032 1.6735
## race2Black 1.5457 0.6470 1.2362 1.9325
## race2Other/Unknown 1.7213 0.5810 1.1025 2.6875
## race2White 1.4045 0.7120 1.1738 1.6805
## married2single/seperated 1.1939 0.8376 1.0791 1.3209
## married2Unknown 1.2067 0.8287 0.9889 1.4724
## married2Widowed 1.3200 0.7576 1.1643 1.4966
## hist2Bronchioalveolar 0.8290 1.2063 0.7006 0.9808
## hist2Carcinoid 0.4009 2.4944 0.2962 0.5426
## hist2Large cell 1.8938 0.5280 1.4657 2.4469
## hist2Other 0.9206 1.0862 0.8238 1.0289
## hist2Squamous cell 1.2484 0.8010 1.1171 1.3951
## site2Middle lobe 1.0818 0.9244 0.8962 1.3057
## site2Other,NOS 1.0988 0.9101 0.8603 1.4035
## site2Upper lobe 0.8943 1.1182 0.8183 0.9774
## grade2G2: Moderately differentiated 1.4471 0.6911 1.2309 1.7011
## grade2G3: Poorly differentiated 1.9595 0.5103 1.6568 2.3175
## grade2G4: Undifferentiated/Unknown 1.4579 0.6859 1.2228 1.7382
## T_StageT2 1.5576 0.6420 1.4096 1.7212
## T_StageT3 2.2059 0.4533 1.9384 2.5103
## T_StageT4 3.3382 0.2996 2.8226 3.9479
## treat2yes 0.9753 1.0254 0.8789 1.0822
## Year_of_diagnosis2019 0.9399 1.0639 0.8538 1.0347
## Year_of_diagnosis2020 0.8770 1.1403 0.7448 1.0326
##
## Concordance= 0.696 (se = 0.006 )
## Likelihood ratio test= 1125 on 26 df, p=<2e-16
## Wald test = 1086 on 26 df, p=<2e-16
## Score (logrank) test = 1181 on 26 df, p=<2e-16
#any mediastinal examined
km_fit <- survfit(Surv(sur_time, dead_any) ~ mediastinal, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("No-mediastinal", "Yes-mediastinal"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model2 <- coxph(Surv(sur_time, dead_any) ~ mediastinal + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model2)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ mediastinal + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 2253
##
## coef exp(coef) se(coef) z
## mediastinalyes -0.205284 0.814416 0.045492 -4.513
## age2 0.030511 1.030981 0.002674 11.408
## SexMale 0.429490 1.536474 0.044944 9.556
## race2Black 0.453221 1.573372 0.113935 3.978
## race2Other/Unknown 0.523892 1.688586 0.227367 2.304
## race2White 0.343511 1.409889 0.091543 3.752
## married2single/seperated 0.172604 1.188395 0.051604 3.345
## married2Unknown 0.182602 1.200336 0.101583 1.798
## married2Widowed 0.280307 1.323536 0.064074 4.375
## hist2Bronchioalveolar -0.198731 0.819770 0.085788 -2.317
## hist2Carcinoid -0.909578 0.402694 0.154368 -5.892
## hist2Large cell 0.633641 1.884460 0.130707 4.848
## hist2Other -0.091138 0.912892 0.056663 -1.608
## hist2Squamous cell 0.218926 1.244739 0.056678 3.863
## site2Middle lobe 0.097629 1.102553 0.096030 1.017
## site2Other,NOS 0.098523 1.103540 0.124836 0.789
## site2Upper lobe -0.109701 0.896102 0.045351 -2.419
## grade2G2: Moderately differentiated 0.370893 1.449029 0.082546 4.493
## grade2G3: Poorly differentiated 0.673370 1.960834 0.085608 7.866
## grade2G4: Undifferentiated/Unknown 0.390899 1.478310 0.089643 4.361
## T_StageT2 0.440578 1.553605 0.050960 8.646
## T_StageT3 0.796047 2.216762 0.066049 12.052
## T_StageT4 1.212390 3.361509 0.085712 14.145
## treat2yes -0.019862 0.980334 0.053206 -0.373
## Year_of_diagnosis2019 -0.069006 0.933321 0.049010 -1.408
## Year_of_diagnosis2020 -0.139351 0.869923 0.083332 -1.672
## Pr(>|z|)
## mediastinalyes 6.40e-06 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 6.95e-05 ***
## race2Other/Unknown 0.021213 *
## race2White 0.000175 ***
## married2single/seperated 0.000824 ***
## married2Unknown 0.072246 .
## married2Widowed 1.22e-05 ***
## hist2Bronchioalveolar 0.020529 *
## hist2Carcinoid 3.81e-09 ***
## hist2Large cell 1.25e-06 ***
## hist2Other 0.107742
## hist2Squamous cell 0.000112 ***
## site2Middle lobe 0.309324
## site2Other,NOS 0.429981
## site2Upper lobe 0.015566 *
## grade2G2: Moderately differentiated 7.02e-06 ***
## grade2G3: Poorly differentiated 3.67e-15 ***
## grade2G4: Undifferentiated/Unknown 1.30e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.708922
## Year_of_diagnosis2019 0.159132
## Year_of_diagnosis2020 0.094480 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## mediastinalyes 0.8144 1.2279 0.7449 0.8904
## age2 1.0310 0.9699 1.0256 1.0364
## SexMale 1.5365 0.6508 1.4069 1.6780
## race2Black 1.5734 0.6356 1.2585 1.9670
## race2Other/Unknown 1.6886 0.5922 1.0814 2.6367
## race2White 1.4099 0.7093 1.1783 1.6870
## married2single/seperated 1.1884 0.8415 1.0741 1.3149
## married2Unknown 1.2003 0.8331 0.9836 1.4648
## married2Widowed 1.3235 0.7556 1.1673 1.5006
## hist2Bronchioalveolar 0.8198 1.2199 0.6929 0.9699
## hist2Carcinoid 0.4027 2.4833 0.2976 0.5450
## hist2Large cell 1.8845 0.5307 1.4586 2.4347
## hist2Other 0.9129 1.0954 0.8169 1.0201
## hist2Squamous cell 1.2447 0.8034 1.1139 1.3910
## site2Middle lobe 1.1026 0.9070 0.9134 1.3309
## site2Other,NOS 1.1035 0.9062 0.8640 1.4094
## site2Upper lobe 0.8961 1.1159 0.8199 0.9794
## grade2G2: Moderately differentiated 1.4490 0.6901 1.2326 1.7035
## grade2G3: Poorly differentiated 1.9608 0.5100 1.6579 2.3191
## grade2G4: Undifferentiated/Unknown 1.4783 0.6764 1.2401 1.7623
## T_StageT2 1.5536 0.6437 1.4059 1.7168
## T_StageT3 2.2168 0.4511 1.9476 2.5231
## T_StageT4 3.3615 0.2975 2.8417 3.9764
## treat2yes 0.9803 1.0201 0.8833 1.0881
## Year_of_diagnosis2019 0.9333 1.0714 0.8478 1.0274
## Year_of_diagnosis2020 0.8699 1.1495 0.7388 1.0243
##
## Concordance= 0.694 (se = 0.006 )
## Likelihood ratio test= 1110 on 26 df, p=<2e-16
## Wald test = 1067 on 26 df, p=<2e-16
## Score (logrank) test = 1163 on 26 df, p=<2e-16
#nodes examined by group
km_fit <- survfit(Surv(sur_time, dead_any) ~ node_group, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes", "1-4 Nodes", "10+ nodes", "5-10 nodes"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model3 <- coxph(Surv(sur_time, dead_any) ~ node_group + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model3)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ node_group + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 2253
##
## coef exp(coef) se(coef) z
## node_group1-4 nodes -0.238580 0.787746 0.082055 -2.908
## node_group10+ nodes -0.504462 0.603830 0.072910 -6.919
## node_group5-10 nodes -0.425695 0.653315 0.075932 -5.606
## age2 0.030226 1.030688 0.002669 11.324
## SexMale 0.428073 1.534298 0.044952 9.523
## race2Black 0.430434 1.537925 0.113994 3.776
## race2Other/Unknown 0.543853 1.722632 0.227361 2.392
## race2White 0.346295 1.413819 0.091552 3.782
## married2single/seperated 0.173338 1.189268 0.051597 3.359
## married2Unknown 0.187954 1.206777 0.101601 1.850
## married2Widowed 0.278771 1.321504 0.064051 4.352
## hist2Bronchioalveolar -0.185851 0.830397 0.085799 -2.166
## hist2Carcinoid -0.912950 0.401339 0.154415 -5.912
## hist2Large cell 0.640993 1.898364 0.130754 4.902
## hist2Other -0.074553 0.928159 0.056737 -1.314
## hist2Squamous cell 0.227855 1.255903 0.056715 4.018
## site2Middle lobe 0.070912 1.073487 0.096044 0.738
## site2Other,NOS 0.102521 1.107960 0.124957 0.820
## site2Upper lobe -0.108867 0.896849 0.045360 -2.400
## grade2G2: Moderately differentiated 0.377580 1.458751 0.082555 4.574
## grade2G3: Poorly differentiated 0.681880 1.977592 0.085627 7.963
## grade2G4: Undifferentiated/Unknown 0.385292 1.470044 0.089734 4.294
## T_StageT2 0.448373 1.565762 0.050983 8.794
## T_StageT3 0.807456 2.242196 0.066060 12.223
## T_StageT4 1.234743 3.437495 0.086030 14.352
## treat2yes -0.005106 0.994907 0.053385 -0.096
## Year_of_diagnosis2019 -0.060252 0.941527 0.049037 -1.229
## Year_of_diagnosis2020 -0.127915 0.879928 0.083369 -1.534
## Pr(>|z|)
## node_group1-4 nodes 0.003643 **
## node_group10+ nodes 4.55e-12 ***
## node_group5-10 nodes 2.07e-08 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 0.000159 ***
## race2Other/Unknown 0.016756 *
## race2White 0.000155 ***
## married2single/seperated 0.000781 ***
## married2Unknown 0.064326 .
## married2Widowed 1.35e-05 ***
## hist2Bronchioalveolar 0.030303 *
## hist2Carcinoid 3.37e-09 ***
## hist2Large cell 9.47e-07 ***
## hist2Other 0.188846
## hist2Squamous cell 5.88e-05 ***
## site2Middle lobe 0.460313
## site2Other,NOS 0.411959
## site2Upper lobe 0.016393 *
## grade2G2: Moderately differentiated 4.79e-06 ***
## grade2G3: Poorly differentiated 1.67e-15 ***
## grade2G4: Undifferentiated/Unknown 1.76e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.923806
## Year_of_diagnosis2019 0.219183
## Year_of_diagnosis2020 0.124948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_group1-4 nodes 0.7877 1.2694 0.6707 0.9252
## node_group10+ nodes 0.6038 1.6561 0.5234 0.6966
## node_group5-10 nodes 0.6533 1.5307 0.5630 0.7582
## age2 1.0307 0.9702 1.0253 1.0361
## SexMale 1.5343 0.6518 1.4049 1.6756
## race2Black 1.5379 0.6502 1.2300 1.9229
## race2Other/Unknown 1.7226 0.5805 1.1032 2.6898
## race2White 1.4138 0.7073 1.1816 1.6917
## married2single/seperated 1.1893 0.8409 1.0749 1.3158
## married2Unknown 1.2068 0.8287 0.9889 1.4727
## married2Widowed 1.3215 0.7567 1.1656 1.4983
## hist2Bronchioalveolar 0.8304 1.2042 0.7019 0.9825
## hist2Carcinoid 0.4013 2.4917 0.2965 0.5432
## hist2Large cell 1.8984 0.5268 1.4692 2.4529
## hist2Other 0.9282 1.0774 0.8305 1.0373
## hist2Squamous cell 1.2559 0.7962 1.1238 1.4036
## site2Middle lobe 1.0735 0.9315 0.8893 1.2958
## site2Other,NOS 1.1080 0.9026 0.8673 1.4154
## site2Upper lobe 0.8968 1.1150 0.8206 0.9802
## grade2G2: Moderately differentiated 1.4588 0.6855 1.2408 1.7150
## grade2G3: Poorly differentiated 1.9776 0.5057 1.6721 2.3390
## grade2G4: Undifferentiated/Unknown 1.4700 0.6803 1.2330 1.7527
## T_StageT2 1.5658 0.6387 1.4169 1.7303
## T_StageT3 2.2422 0.4460 1.9699 2.5521
## T_StageT4 3.4375 0.2909 2.9041 4.0688
## treat2yes 0.9949 1.0051 0.8961 1.1046
## Year_of_diagnosis2019 0.9415 1.0621 0.8552 1.0365
## Year_of_diagnosis2020 0.8799 1.1365 0.7473 1.0361
##
## Concordance= 0.696 (se = 0.006 )
## Likelihood ratio test= 1143 on 28 df, p=<2e-16
## Wald test = 1103 on 28 df, p=<2e-16
## Score (logrank) test = 1199 on 28 df, p=<2e-16
#nodes continuous
cox_model4 <- coxph(Surv(sur_time, dead_any) ~ Regional_nodes_examined__1988__ + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model4)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ Regional_nodes_examined__1988__ +
## age2 + Sex + race2 + married2 + hist2 + site2 + grade2 +
## T_Stage + treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 2253
##
## coef exp(coef) se(coef) z
## Regional_nodes_examined__1988__ -0.007231 0.992795 0.002377 -3.042
## age2 0.031002 1.031487 0.002674 11.595
## SexMale 0.431560 1.539657 0.044977 9.595
## race2Black 0.451046 1.569954 0.113952 3.958
## race2Other/Unknown 0.538307 1.713104 0.227316 2.368
## race2White 0.354650 1.425681 0.091532 3.875
## married2single/seperated 0.175773 1.192167 0.051587 3.407
## married2Unknown 0.200494 1.222007 0.101579 1.974
## married2Widowed 0.283185 1.327351 0.064078 4.419
## hist2Bronchioalveolar -0.198438 0.820010 0.085771 -2.314
## hist2Carcinoid -0.907468 0.403545 0.154432 -5.876
## hist2Large cell 0.631082 1.879643 0.130727 4.827
## hist2Other -0.089077 0.914775 0.056690 -1.571
## hist2Squamous cell 0.224956 1.252268 0.056725 3.966
## site2Middle lobe 0.079282 1.082510 0.096017 0.826
## site2Other,NOS 0.128306 1.136901 0.124636 1.029
## site2Upper lobe -0.109172 0.896576 0.045347 -2.407
## grade2G2: Moderately differentiated 0.365380 1.441061 0.082536 4.427
## grade2G3: Poorly differentiated 0.671683 1.957528 0.085619 7.845
## grade2G4: Undifferentiated/Unknown 0.391091 1.478593 0.089652 4.362
## T_StageT2 0.430925 1.538680 0.050885 8.469
## T_StageT3 0.786833 2.196430 0.065976 11.926
## T_StageT4 1.213980 3.366858 0.086003 14.116
## treat2yes -0.021281 0.978944 0.053333 -0.399
## Year_of_diagnosis2019 -0.067319 0.934897 0.049018 -1.373
## Year_of_diagnosis2020 -0.136970 0.871996 0.083341 -1.643
## Pr(>|z|)
## Regional_nodes_examined__1988__ 0.002349 **
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 7.55e-05 ***
## race2Other/Unknown 0.017880 *
## race2White 0.000107 ***
## married2single/seperated 0.000656 ***
## married2Unknown 0.048407 *
## married2Widowed 9.90e-06 ***
## hist2Bronchioalveolar 0.020691 *
## hist2Carcinoid 4.20e-09 ***
## hist2Large cell 1.38e-06 ***
## hist2Other 0.116114
## hist2Squamous cell 7.32e-05 ***
## site2Middle lobe 0.408966
## site2Other,NOS 0.303272
## site2Upper lobe 0.016063 *
## grade2G2: Moderately differentiated 9.56e-06 ***
## grade2G3: Poorly differentiated 4.33e-15 ***
## grade2G4: Undifferentiated/Unknown 1.29e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.689882
## Year_of_diagnosis2019 0.169645
## Year_of_diagnosis2020 0.100282
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Regional_nodes_examined__1988__ 0.9928 1.0073 0.9882 0.9974
## age2 1.0315 0.9695 1.0261 1.0369
## SexMale 1.5397 0.6495 1.4097 1.6815
## race2Black 1.5700 0.6370 1.2557 1.9628
## race2Other/Unknown 1.7131 0.5837 1.0972 2.6747
## race2White 1.4257 0.7014 1.1915 1.7058
## married2single/seperated 1.1922 0.8388 1.0775 1.3190
## married2Unknown 1.2220 0.8183 1.0014 1.4912
## married2Widowed 1.3274 0.7534 1.1707 1.5050
## hist2Bronchioalveolar 0.8200 1.2195 0.6931 0.9701
## hist2Carcinoid 0.4035 2.4780 0.2982 0.5462
## hist2Large cell 1.8796 0.5320 1.4548 2.4286
## hist2Other 0.9148 1.0932 0.8186 1.0223
## hist2Squamous cell 1.2523 0.7986 1.1205 1.3995
## site2Middle lobe 1.0825 0.9238 0.8968 1.3067
## site2Other,NOS 1.1369 0.8796 0.8905 1.4515
## site2Upper lobe 0.8966 1.1154 0.8203 0.9799
## grade2G2: Moderately differentiated 1.4411 0.6939 1.2258 1.6941
## grade2G3: Poorly differentiated 1.9575 0.5108 1.6551 2.3152
## grade2G4: Undifferentiated/Unknown 1.4786 0.6763 1.2403 1.7626
## T_StageT2 1.5387 0.6499 1.3926 1.7000
## T_StageT3 2.1964 0.4553 1.9300 2.4996
## T_StageT4 3.3669 0.2970 2.8446 3.9850
## treat2yes 0.9789 1.0215 0.8818 1.0868
## Year_of_diagnosis2019 0.9349 1.0696 0.8493 1.0292
## Year_of_diagnosis2020 0.8720 1.1468 0.7406 1.0267
##
## Concordance= 0.693 (se = 0.006 )
## Likelihood ratio test= 1100 on 26 df, p=<2e-16
## Wald test = 1058 on 26 df, p=<2e-16
## Score (logrank) test = 1154 on 26 df, p=<2e-16
#nodes continuous (log transformed)
seer$node_log <- log(seer$Regional_nodes_examined__1988__ + 1)
cox_model5 <- coxph(Surv(sur_time, dead_any) ~ node_log + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model5)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ node_log + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 2253
##
## coef exp(coef) se(coef) z
## node_log -0.147511 0.862853 0.022259 -6.627
## age2 0.030500 1.030969 0.002670 11.424
## SexMale 0.431423 1.539447 0.044955 9.597
## race2Black 0.436406 1.547136 0.113988 3.829
## race2Other/Unknown 0.544235 1.723289 0.227360 2.394
## race2White 0.351194 1.420763 0.091517 3.837
## married2single/seperated 0.173033 1.188906 0.051596 3.354
## married2Unknown 0.197253 1.218052 0.101590 1.942
## married2Widowed 0.281356 1.324925 0.064061 4.392
## hist2Bronchioalveolar -0.188954 0.827824 0.085782 -2.203
## hist2Carcinoid -0.910376 0.402373 0.154418 -5.896
## hist2Large cell 0.639928 1.896345 0.130736 4.895
## hist2Other -0.077003 0.925887 0.056731 -1.357
## hist2Squamous cell 0.229516 1.257990 0.056707 4.047
## site2Middle lobe 0.071445 1.074059 0.096039 0.744
## site2Other,NOS 0.109165 1.115347 0.124786 0.875
## site2Upper lobe -0.109079 0.896660 0.045349 -2.405
## grade2G2: Moderately differentiated 0.374200 1.453828 0.082552 4.533
## grade2G3: Poorly differentiated 0.680243 1.974358 0.085619 7.945
## grade2G4: Undifferentiated/Unknown 0.386370 1.471628 0.089679 4.308
## T_StageT2 0.445122 1.560681 0.050964 8.734
## T_StageT3 0.806391 2.239809 0.066049 12.209
## T_StageT4 1.243074 3.466253 0.086026 14.450
## treat2yes -0.001934 0.998068 0.053366 -0.036
## Year_of_diagnosis2019 -0.062040 0.939845 0.049029 -1.265
## Year_of_diagnosis2020 -0.129828 0.878247 0.083356 -1.558
## Pr(>|z|)
## node_log 3.42e-11 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 0.000129 ***
## race2Other/Unknown 0.016679 *
## race2White 0.000124 ***
## married2single/seperated 0.000798 ***
## married2Unknown 0.052178 .
## married2Widowed 1.12e-05 ***
## hist2Bronchioalveolar 0.027614 *
## hist2Carcinoid 3.73e-09 ***
## hist2Large cell 9.84e-07 ***
## hist2Other 0.174669
## hist2Squamous cell 5.18e-05 ***
## site2Middle lobe 0.456929
## site2Other,NOS 0.381670
## site2Upper lobe 0.016159 *
## grade2G2: Moderately differentiated 5.82e-06 ***
## grade2G3: Poorly differentiated 1.94e-15 ***
## grade2G4: Undifferentiated/Unknown 1.64e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.971095
## Year_of_diagnosis2019 0.205737
## Year_of_diagnosis2020 0.119350
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_log 0.8629 1.1589 0.8260 0.9013
## age2 1.0310 0.9700 1.0256 1.0364
## SexMale 1.5394 0.6496 1.4096 1.6812
## race2Black 1.5471 0.6464 1.2374 1.9344
## race2Other/Unknown 1.7233 0.5803 1.1036 2.6908
## race2White 1.4208 0.7038 1.1875 1.6999
## married2single/seperated 1.1889 0.8411 1.0746 1.3154
## married2Unknown 1.2181 0.8210 0.9981 1.4864
## married2Widowed 1.3249 0.7548 1.1686 1.5022
## hist2Bronchioalveolar 0.8278 1.2080 0.6997 0.9794
## hist2Carcinoid 0.4024 2.4853 0.2973 0.5446
## hist2Large cell 1.8963 0.5273 1.4677 2.4502
## hist2Other 0.9259 1.0800 0.8285 1.0348
## hist2Squamous cell 1.2580 0.7949 1.1257 1.4059
## site2Middle lobe 1.0741 0.9310 0.8898 1.2965
## site2Other,NOS 1.1153 0.8966 0.8734 1.4244
## site2Upper lobe 0.8967 1.1153 0.8204 0.9800
## grade2G2: Moderately differentiated 1.4538 0.6878 1.2366 1.7092
## grade2G3: Poorly differentiated 1.9744 0.5065 1.6693 2.3351
## grade2G4: Undifferentiated/Unknown 1.4716 0.6795 1.2344 1.7544
## T_StageT2 1.5607 0.6407 1.4123 1.7246
## T_StageT3 2.2398 0.4465 1.9678 2.5494
## T_StageT4 3.4663 0.2885 2.9284 4.1029
## treat2yes 0.9981 1.0019 0.8989 1.1081
## Year_of_diagnosis2019 0.9398 1.0640 0.8537 1.0346
## Year_of_diagnosis2020 0.8782 1.1386 0.7459 1.0341
##
## Concordance= 0.695 (se = 0.006 )
## Likelihood ratio test= 1133 on 26 df, p=<2e-16
## Wald test = 1090 on 26 df, p=<2e-16
## Score (logrank) test = 1186 on 26 df, p=<2e-16
#nodes positive continuous
cox_model6 <- coxph(Surv(sur_time, dead_any) ~ Regional_nodes_positive__1988__ + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model6)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ Regional_nodes_positive__1988__ +
## age2 + Sex + race2 + married2 + hist2 + site2 + grade2 +
## T_Stage + treat2 + Year_of_diagnosis, data = seer)
##
## n= 22131, number of events= 2008
## (1910 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z
## Regional_nodes_positive__1988__ 0.099100 1.104177 0.007268 13.635
## age2 0.030461 1.030930 0.002826 10.779
## SexMale 0.396793 1.487049 0.047510 8.352
## race2Black 0.424926 1.529478 0.119462 3.557
## race2Other/Unknown 0.442303 1.556287 0.242524 1.824
## race2White 0.325436 1.384634 0.095148 3.420
## married2single/seperated 0.162260 1.176165 0.054479 2.978
## married2Unknown 0.119860 1.127339 0.111445 1.076
## married2Widowed 0.232566 1.261833 0.068602 3.390
## hist2Bronchioalveolar -0.190115 0.826864 0.090424 -2.102
## hist2Carcinoid -0.997162 0.368925 0.173971 -5.732
## hist2Large cell 0.663105 1.940809 0.137128 4.836
## hist2Other -0.125412 0.882134 0.059622 -2.103
## hist2Squamous cell 0.208253 1.231524 0.060179 3.461
## site2Middle lobe 0.088511 1.092546 0.101956 0.868
## site2Other,NOS 0.020816 1.021035 0.137730 0.151
## site2Upper lobe -0.127443 0.880344 0.047861 -2.663
## grade2G2: Moderately differentiated 0.406161 1.501045 0.089891 4.518
## grade2G3: Poorly differentiated 0.699140 2.012021 0.093098 7.510
## grade2G4: Undifferentiated/Unknown 0.407548 1.503127 0.098143 4.153
## T_StageT2 0.425374 1.530163 0.053931 7.887
## T_StageT3 0.776757 2.174410 0.069155 11.232
## T_StageT4 1.167576 3.214192 0.088958 13.125
## treat2yes -0.195026 0.822813 0.057316 -3.403
## Year_of_diagnosis2019 -0.096803 0.907735 0.051666 -1.874
## Year_of_diagnosis2020 -0.183143 0.832649 0.088953 -2.059
## Pr(>|z|)
## Regional_nodes_positive__1988__ < 2e-16 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 0.000375 ***
## race2Other/Unknown 0.068190 .
## race2White 0.000625 ***
## married2single/seperated 0.002898 **
## married2Unknown 0.282148
## married2Widowed 0.000699 ***
## hist2Bronchioalveolar 0.035511 *
## hist2Carcinoid 9.94e-09 ***
## hist2Large cell 1.33e-06 ***
## hist2Other 0.035427 *
## hist2Squamous cell 0.000539 ***
## site2Middle lobe 0.385326
## site2Other,NOS 0.879865
## site2Upper lobe 0.007750 **
## grade2G2: Moderately differentiated 6.23e-06 ***
## grade2G3: Poorly differentiated 5.93e-14 ***
## grade2G4: Undifferentiated/Unknown 3.29e-05 ***
## T_StageT2 3.09e-15 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.000667 ***
## Year_of_diagnosis2019 0.060981 .
## Year_of_diagnosis2020 0.039506 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Regional_nodes_positive__1988__ 1.1042 0.9057 1.0886 1.1200
## age2 1.0309 0.9700 1.0252 1.0367
## SexMale 1.4870 0.6725 1.3548 1.6322
## race2Black 1.5295 0.6538 1.2102 1.9330
## race2Other/Unknown 1.5563 0.6426 0.9675 2.5034
## race2White 1.3846 0.7222 1.1491 1.6685
## married2single/seperated 1.1762 0.8502 1.0571 1.3087
## married2Unknown 1.1273 0.8870 0.9061 1.4025
## married2Widowed 1.2618 0.7925 1.1031 1.4434
## hist2Bronchioalveolar 0.8269 1.2094 0.6926 0.9872
## hist2Carcinoid 0.3689 2.7106 0.2623 0.5188
## hist2Large cell 1.9408 0.5152 1.4834 2.5393
## hist2Other 0.8821 1.1336 0.7848 0.9915
## hist2Squamous cell 1.2315 0.8120 1.0945 1.3857
## site2Middle lobe 1.0925 0.9153 0.8947 1.3342
## site2Other,NOS 1.0210 0.9794 0.7795 1.3374
## site2Upper lobe 0.8803 1.1359 0.8015 0.9669
## grade2G2: Moderately differentiated 1.5010 0.6662 1.2586 1.7902
## grade2G3: Poorly differentiated 2.0120 0.4970 1.6764 2.4148
## grade2G4: Undifferentiated/Unknown 1.5031 0.6653 1.2401 1.8219
## T_StageT2 1.5302 0.6535 1.3767 1.7008
## T_StageT3 2.1744 0.4599 1.8988 2.4900
## T_StageT4 3.2142 0.3111 2.6999 3.8264
## treat2yes 0.8228 1.2153 0.7354 0.9206
## Year_of_diagnosis2019 0.9077 1.1016 0.8203 1.0045
## Year_of_diagnosis2020 0.8326 1.2010 0.6994 0.9912
##
## Concordance= 0.708 (se = 0.006 )
## Likelihood ratio test= 1107 on 26 df, p=<2e-16
## Wald test = 1166 on 26 df, p=<2e-16
## Score (logrank) test = 1293 on 26 df, p=<2e-16
#nodes positive yes vs no
seer <- seer %>% mutate(pos_node2 = ifelse(Regional_nodes_positive__1988__ == 0, "no", "yes"))
km_fit <- survfit(Surv(sur_time, dead_any) ~ pos_node2, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes Positive", "1+ Nodes Positive"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model7 <- coxph(Surv(sur_time, dead_any) ~ pos_node2 + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model7)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ pos_node2 + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 22131, number of events= 2008
## (1910 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z
## pos_node2yes 0.888487 2.431449 0.057343 15.494
## age2 0.029482 1.029921 0.002812 10.484
## SexMale 0.393348 1.481934 0.047529 8.276
## race2Black 0.467655 1.596246 0.119251 3.922
## race2Other/Unknown 0.411430 1.508974 0.242492 1.697
## race2White 0.345060 1.412074 0.095114 3.628
## married2single/seperated 0.149934 1.161758 0.054390 2.757
## married2Unknown 0.120517 1.128080 0.111513 1.081
## married2Widowed 0.225369 1.252785 0.068725 3.279
## hist2Bronchioalveolar -0.180614 0.834758 0.090331 -1.999
## hist2Carcinoid -1.082031 0.338907 0.174361 -6.206
## hist2Large cell 0.674410 1.962875 0.137096 4.919
## hist2Other -0.123817 0.883542 0.059606 -2.077
## hist2Squamous cell 0.206493 1.229359 0.060099 3.436
## site2Middle lobe 0.076568 1.079576 0.101807 0.752
## site2Other,NOS -0.061178 0.940656 0.138323 -0.442
## site2Upper lobe -0.136338 0.872548 0.047748 -2.855
## grade2G2: Moderately differentiated 0.358556 1.431261 0.090103 3.979
## grade2G3: Poorly differentiated 0.629316 1.876326 0.093364 6.740
## grade2G4: Undifferentiated/Unknown 0.369451 1.446940 0.098314 3.758
## T_StageT2 0.394653 1.483869 0.053943 7.316
## T_StageT3 0.758394 2.134845 0.068365 11.093
## T_StageT4 1.141308 3.130860 0.088198 12.940
## treat2yes -0.452146 0.636261 0.061260 -7.381
## Year_of_diagnosis2019 -0.085892 0.917693 0.051662 -1.663
## Year_of_diagnosis2020 -0.173982 0.840312 0.088901 -1.957
## Pr(>|z|)
## pos_node2yes < 2e-16 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 8.80e-05 ***
## race2Other/Unknown 0.089757 .
## race2White 0.000286 ***
## married2single/seperated 0.005840 **
## married2Unknown 0.279811
## married2Widowed 0.001041 **
## hist2Bronchioalveolar 0.045558 *
## hist2Carcinoid 5.45e-10 ***
## hist2Large cell 8.69e-07 ***
## hist2Other 0.037778 *
## hist2Squamous cell 0.000591 ***
## site2Middle lobe 0.451995
## site2Other,NOS 0.658284
## site2Upper lobe 0.004299 **
## grade2G2: Moderately differentiated 6.91e-05 ***
## grade2G3: Poorly differentiated 1.58e-11 ***
## grade2G4: Undifferentiated/Unknown 0.000171 ***
## T_StageT2 2.55e-13 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 1.57e-13 ***
## Year_of_diagnosis2019 0.096398 .
## Year_of_diagnosis2020 0.050345 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pos_node2yes 2.4314 0.4113 2.1730 2.7207
## age2 1.0299 0.9709 1.0243 1.0356
## SexMale 1.4819 0.6748 1.3501 1.6266
## race2Black 1.5962 0.6265 1.2636 2.0165
## race2Other/Unknown 1.5090 0.6627 0.9382 2.4271
## race2White 1.4121 0.7082 1.1719 1.7014
## married2single/seperated 1.1618 0.8608 1.0443 1.2924
## married2Unknown 1.1281 0.8865 0.9066 1.4037
## married2Widowed 1.2528 0.7982 1.0949 1.4334
## hist2Bronchioalveolar 0.8348 1.1980 0.6993 0.9964
## hist2Carcinoid 0.3389 2.9507 0.2408 0.4770
## hist2Large cell 1.9629 0.5095 1.5004 2.5680
## hist2Other 0.8835 1.1318 0.7861 0.9930
## hist2Squamous cell 1.2294 0.8134 1.0928 1.3830
## site2Middle lobe 1.0796 0.9263 0.8843 1.3180
## site2Other,NOS 0.9407 1.0631 0.7173 1.2336
## site2Upper lobe 0.8725 1.1461 0.7946 0.9581
## grade2G2: Moderately differentiated 1.4313 0.6987 1.1996 1.7077
## grade2G3: Poorly differentiated 1.8763 0.5330 1.5626 2.2531
## grade2G4: Undifferentiated/Unknown 1.4469 0.6911 1.1933 1.7544
## T_StageT2 1.4839 0.6739 1.3350 1.6493
## T_StageT3 2.1348 0.4684 1.8671 2.4410
## T_StageT4 3.1309 0.3194 2.6338 3.7217
## treat2yes 0.6363 1.5717 0.5643 0.7174
## Year_of_diagnosis2019 0.9177 1.0897 0.8293 1.0155
## Year_of_diagnosis2020 0.8403 1.1900 0.7059 1.0003
##
## Concordance= 0.714 (se = 0.006 )
## Likelihood ratio test= 1211 on 26 df, p=<2e-16
## Wald test = 1241 on 26 df, p=<2e-16
## Score (logrank) test = 1360 on 26 df, p=<2e-16
##Lung cancer specific mortality
#any nodes examined
km_fit <- survfit(Surv(sur_time, dead_lung) ~ node_any, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes", "1+ Nodes"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model8 <- coxph(Surv(sur_time, dead_lung) ~ node_any + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model8)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ node_any + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 1279
##
## coef exp(coef) se(coef) z
## node_anyyes -0.409112 0.664240 0.095604 -4.279
## age2 0.029765 1.030213 0.003533 8.426
## SexMale 0.372490 1.451343 0.059448 6.266
## race2Black 0.244615 1.277130 0.148381 1.649
## race2Other/Unknown 0.485099 1.624335 0.290712 1.669
## race2White 0.234761 1.264606 0.115301 2.036
## married2single/seperated 0.174322 1.190439 0.067852 2.569
## married2Unknown 0.146767 1.158084 0.136582 1.075
## married2Widowed 0.179263 1.196335 0.087495 2.049
## hist2Bronchioalveolar -0.215107 0.806455 0.117665 -1.828
## hist2Carcinoid -1.180632 0.307085 0.255264 -4.625
## hist2Large cell 0.706108 2.026090 0.160695 4.394
## hist2Other -0.086895 0.916774 0.074496 -1.166
## hist2Squamous cell 0.148363 1.159934 0.074587 1.989
## site2Middle lobe -0.005947 0.994070 0.132882 -0.045
## site2Other,NOS -0.123875 0.883490 0.168986 -0.733
## site2Upper lobe -0.151304 0.859586 0.059717 -2.534
## grade2G2: Moderately differentiated 0.621010 1.860806 0.126075 4.926
## grade2G3: Poorly differentiated 1.045154 2.843837 0.128291 8.147
## grade2G4: Undifferentiated/Unknown 0.581904 1.789443 0.135346 4.299
## T_StageT2 0.638704 1.894025 0.069773 9.154
## T_StageT3 1.008292 2.740916 0.086656 11.636
## T_StageT4 1.558086 4.749723 0.104964 14.844
## treat2yes 0.195345 1.215730 0.066844 2.922
## Year_of_diagnosis2019 -0.101844 0.903171 0.064983 -1.567
## Year_of_diagnosis2020 -0.299654 0.741075 0.118830 -2.522
## Pr(>|z|)
## node_anyyes 1.88e-05 ***
## age2 < 2e-16 ***
## SexMale 3.71e-10 ***
## race2Black 0.09924 .
## race2Other/Unknown 0.09519 .
## race2White 0.04174 *
## married2single/seperated 0.01019 *
## married2Unknown 0.28257
## married2Widowed 0.04048 *
## hist2Bronchioalveolar 0.06753 .
## hist2Carcinoid 3.74e-06 ***
## hist2Large cell 1.11e-05 ***
## hist2Other 0.24344
## hist2Squamous cell 0.04669 *
## site2Middle lobe 0.96430
## site2Other,NOS 0.46353
## site2Upper lobe 0.01129 *
## grade2G2: Moderately differentiated 8.40e-07 ***
## grade2G3: Poorly differentiated 3.74e-16 ***
## grade2G4: Undifferentiated/Unknown 1.71e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.00347 **
## Year_of_diagnosis2019 0.11706
## Year_of_diagnosis2020 0.01168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_anyyes 0.6642 1.5055 0.5507 0.8011
## age2 1.0302 0.9707 1.0231 1.0374
## SexMale 1.4513 0.6890 1.2917 1.6307
## race2Black 1.2771 0.7830 0.9548 1.7082
## race2Other/Unknown 1.6243 0.6156 0.9188 2.8716
## race2White 1.2646 0.7908 1.0088 1.5853
## married2single/seperated 1.1904 0.8400 1.0422 1.3598
## married2Unknown 1.1581 0.8635 0.8861 1.5136
## married2Widowed 1.1963 0.8359 1.0078 1.4201
## hist2Bronchioalveolar 0.8065 1.2400 0.6404 1.0156
## hist2Carcinoid 0.3071 3.2564 0.1862 0.5065
## hist2Large cell 2.0261 0.4936 1.4787 2.7762
## hist2Other 0.9168 1.0908 0.7922 1.0609
## hist2Squamous cell 1.1599 0.8621 1.0022 1.3425
## site2Middle lobe 0.9941 1.0060 0.7661 1.2898
## site2Other,NOS 0.8835 1.1319 0.6344 1.2304
## site2Upper lobe 0.8596 1.1634 0.7646 0.9663
## grade2G2: Moderately differentiated 1.8608 0.5374 1.4534 2.3824
## grade2G3: Poorly differentiated 2.8438 0.3516 2.2116 3.6568
## grade2G4: Undifferentiated/Unknown 1.7894 0.5588 1.3725 2.3331
## T_StageT2 1.8940 0.5280 1.6519 2.1716
## T_StageT3 2.7409 0.3648 2.3128 3.2483
## T_StageT4 4.7497 0.2105 3.8665 5.8346
## treat2yes 1.2157 0.8226 1.0664 1.3859
## Year_of_diagnosis2019 0.9032 1.1072 0.7952 1.0258
## Year_of_diagnosis2020 0.7411 1.3494 0.5871 0.9354
##
## Concordance= 0.731 (se = 0.007 )
## Likelihood ratio test= 925.9 on 26 df, p=<2e-16
## Wald test = 871.5 on 26 df, p=<2e-16
## Score (logrank) test = 1030 on 26 df, p=<2e-16
#any mediastinal examined
km_fit <- survfit(Surv(sur_time, dead_lung) ~ mediastinal, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("No-mediastinal", "Yes-mediastinal"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model9 <- coxph(Surv(sur_time, dead_lung) ~ mediastinal + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model9)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ mediastinal + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 1279
##
## coef exp(coef) se(coef) z
## mediastinalyes -0.224404 0.798993 0.061171 -3.668
## age2 0.029693 1.030138 0.003538 8.392
## SexMale 0.375360 1.455515 0.059448 6.314
## race2Black 0.261538 1.298926 0.148324 1.763
## race2Other/Unknown 0.463709 1.589959 0.290799 1.595
## race2White 0.235796 1.265916 0.115314 2.045
## married2single/seperated 0.168278 1.183266 0.067900 2.478
## married2Unknown 0.143065 1.153804 0.136602 1.047
## married2Widowed 0.182888 1.200679 0.087522 2.090
## hist2Bronchioalveolar -0.223810 0.799467 0.117654 -1.902
## hist2Carcinoid -1.176556 0.308339 0.255198 -4.610
## hist2Large cell 0.704033 2.021890 0.160659 4.382
## hist2Other -0.093623 0.910626 0.074443 -1.258
## hist2Squamous cell 0.146055 1.157260 0.074572 1.959
## site2Middle lobe 0.011442 1.011508 0.132900 0.086
## site2Other,NOS -0.124098 0.883293 0.168971 -0.734
## site2Upper lobe -0.148993 0.861575 0.059725 -2.495
## grade2G2: Moderately differentiated 0.624646 1.867585 0.126087 4.954
## grade2G3: Poorly differentiated 1.047255 2.849819 0.128271 8.164
## grade2G4: Undifferentiated/Unknown 0.596505 1.815761 0.135258 4.410
## T_StageT2 0.638925 1.894444 0.069777 9.157
## T_StageT3 1.015969 2.762037 0.086781 11.707
## T_StageT4 1.569247 4.803029 0.105151 14.924
## treat2yes 0.203206 1.225325 0.067012 3.032
## Year_of_diagnosis2019 -0.107725 0.897875 0.064942 -1.659
## Year_of_diagnosis2020 -0.305510 0.736747 0.118783 -2.572
## Pr(>|z|)
## mediastinalyes 0.000244 ***
## age2 < 2e-16 ***
## SexMale 2.72e-10 ***
## race2Black 0.077853 .
## race2Other/Unknown 0.110802
## race2White 0.040872 *
## married2single/seperated 0.013201 *
## married2Unknown 0.294956
## married2Widowed 0.036652 *
## hist2Bronchioalveolar 0.057136 .
## hist2Carcinoid 4.02e-06 ***
## hist2Large cell 1.18e-05 ***
## hist2Other 0.208521
## hist2Squamous cell 0.050161 .
## site2Middle lobe 0.931391
## site2Other,NOS 0.462683
## site2Upper lobe 0.012607 *
## grade2G2: Moderately differentiated 7.27e-07 ***
## grade2G3: Poorly differentiated 3.23e-16 ***
## grade2G4: Undifferentiated/Unknown 1.03e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.002426 **
## Year_of_diagnosis2019 0.097162 .
## Year_of_diagnosis2020 0.010111 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## mediastinalyes 0.7990 1.2516 0.7087 0.9008
## age2 1.0301 0.9707 1.0230 1.0373
## SexMale 1.4555 0.6870 1.2954 1.6354
## race2Black 1.2989 0.7699 0.9712 1.7372
## race2Other/Unknown 1.5900 0.6289 0.8992 2.8114
## race2White 1.2659 0.7899 1.0098 1.5869
## married2single/seperated 1.1833 0.8451 1.0358 1.3517
## married2Unknown 1.1538 0.8667 0.8828 1.5080
## married2Widowed 1.2007 0.8329 1.0114 1.4254
## hist2Bronchioalveolar 0.7995 1.2508 0.6348 1.0068
## hist2Carcinoid 0.3083 3.2432 0.1870 0.5085
## hist2Large cell 2.0219 0.4946 1.4757 2.7702
## hist2Other 0.9106 1.0981 0.7870 1.0537
## hist2Squamous cell 1.1573 0.8641 0.9999 1.3394
## site2Middle lobe 1.0115 0.9886 0.7796 1.3125
## site2Other,NOS 0.8833 1.1321 0.6343 1.2301
## site2Upper lobe 0.8616 1.1607 0.7664 0.9686
## grade2G2: Moderately differentiated 1.8676 0.5355 1.4587 2.3911
## grade2G3: Poorly differentiated 2.8498 0.3509 2.2163 3.6644
## grade2G4: Undifferentiated/Unknown 1.8158 0.5507 1.3929 2.3670
## T_StageT2 1.8944 0.5279 1.6523 2.1721
## T_StageT3 2.7620 0.3621 2.3300 3.2741
## T_StageT4 4.8030 0.2082 3.9085 5.9023
## treat2yes 1.2253 0.8161 1.0745 1.3973
## Year_of_diagnosis2019 0.8979 1.1137 0.7906 1.0198
## Year_of_diagnosis2020 0.7367 1.3573 0.5837 0.9299
##
## Concordance= 0.73 (se = 0.007 )
## Likelihood ratio test= 922.5 on 26 df, p=<2e-16
## Wald test = 866.7 on 26 df, p=<2e-16
## Score (logrank) test = 1026 on 26 df, p=<2e-16
#nodes examined by group
km_fit <- survfit(Surv(sur_time, dead_lung) ~ node_group, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes", "1-4 Nodes", "10+ nodes", "5-10 nodes"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model10 <- coxph(Surv(sur_time, dead_lung) ~ node_group + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model10)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ node_group + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 1279
##
## coef exp(coef) se(coef) z
## node_group1-4 nodes -0.281566 0.754601 0.115059 -2.447
## node_group10+ nodes -0.428726 0.651338 0.100228 -4.278
## node_group5-10 nodes -0.451441 0.636710 0.105620 -4.274
## age2 0.029523 1.029963 0.003533 8.357
## SexMale 0.372533 1.451406 0.059451 6.266
## race2Black 0.241634 1.273328 0.148410 1.628
## race2Other/Unknown 0.485507 1.624999 0.290756 1.670
## race2White 0.236920 1.267340 0.115314 2.055
## married2single/seperated 0.172847 1.188685 0.067872 2.547
## married2Unknown 0.145834 1.157004 0.136616 1.067
## married2Widowed 0.180805 1.198182 0.087505 2.066
## hist2Bronchioalveolar -0.213182 0.808009 0.117674 -1.812
## hist2Carcinoid -1.179988 0.307282 0.255284 -4.622
## hist2Large cell 0.706046 2.025965 0.160728 4.393
## hist2Other -0.083094 0.920265 0.074534 -1.115
## hist2Squamous cell 0.150595 1.162526 0.074632 2.018
## site2Middle lobe -0.009955 0.990095 0.132919 -0.075
## site2Other,NOS -0.121822 0.885306 0.169060 -0.721
## site2Upper lobe -0.150709 0.860098 0.059737 -2.523
## grade2G2: Moderately differentiated 0.624860 1.867985 0.126106 4.955
## grade2G3: Poorly differentiated 1.049633 2.856602 0.128334 8.179
## grade2G4: Undifferentiated/Unknown 0.586484 1.797657 0.135385 4.332
## T_StageT2 0.641393 1.899124 0.069798 9.189
## T_StageT3 1.016514 2.763544 0.086810 11.710
## T_StageT4 1.568440 4.799155 0.105540 14.861
## treat2yes 0.203292 1.225430 0.067242 3.023
## Year_of_diagnosis2019 -0.100731 0.904176 0.064979 -1.550
## Year_of_diagnosis2020 -0.298247 0.742118 0.118831 -2.510
## Pr(>|z|)
## node_group1-4 nodes 0.0144 *
## node_group10+ nodes 1.89e-05 ***
## node_group5-10 nodes 1.92e-05 ***
## age2 < 2e-16 ***
## SexMale 3.70e-10 ***
## race2Black 0.1035
## race2Other/Unknown 0.0950 .
## race2White 0.0399 *
## married2single/seperated 0.0109 *
## married2Unknown 0.2858
## married2Widowed 0.0388 *
## hist2Bronchioalveolar 0.0700 .
## hist2Carcinoid 3.80e-06 ***
## hist2Large cell 1.12e-05 ***
## hist2Other 0.2649
## hist2Squamous cell 0.0436 *
## site2Middle lobe 0.9403
## site2Other,NOS 0.4712
## site2Upper lobe 0.0116 *
## grade2G2: Moderately differentiated 7.23e-07 ***
## grade2G3: Poorly differentiated 2.86e-16 ***
## grade2G4: Undifferentiated/Unknown 1.48e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.0025 **
## Year_of_diagnosis2019 0.1211
## Year_of_diagnosis2020 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_group1-4 nodes 0.7546 1.3252 0.6023 0.9455
## node_group10+ nodes 0.6513 1.5353 0.5352 0.7927
## node_group5-10 nodes 0.6367 1.5706 0.5177 0.7832
## age2 1.0300 0.9709 1.0229 1.0371
## SexMale 1.4514 0.6890 1.2918 1.6308
## race2Black 1.2733 0.7853 0.9519 1.7032
## race2Other/Unknown 1.6250 0.6154 0.9191 2.8731
## race2White 1.2673 0.7891 1.0110 1.5887
## married2single/seperated 1.1887 0.8413 1.0406 1.3578
## married2Unknown 1.1570 0.8643 0.8852 1.5122
## married2Widowed 1.1982 0.8346 1.0093 1.4224
## hist2Bronchioalveolar 0.8080 1.2376 0.6416 1.0176
## hist2Carcinoid 0.3073 3.2543 0.1863 0.5068
## hist2Large cell 2.0260 0.4936 1.4785 2.7762
## hist2Other 0.9203 1.0866 0.7952 1.0650
## hist2Squamous cell 1.1625 0.8602 1.0043 1.3456
## site2Middle lobe 0.9901 1.0100 0.7630 1.2847
## site2Other,NOS 0.8853 1.1296 0.6356 1.2331
## site2Upper lobe 0.8601 1.1627 0.7651 0.9669
## grade2G2: Moderately differentiated 1.8680 0.5353 1.4589 2.3917
## grade2G3: Poorly differentiated 2.8566 0.3501 2.2213 3.6736
## grade2G4: Undifferentiated/Unknown 1.7977 0.5563 1.3787 2.3439
## T_StageT2 1.8991 0.5266 1.6563 2.1775
## T_StageT3 2.7635 0.3619 2.3312 3.2761
## T_StageT4 4.7992 0.2084 3.9024 5.9020
## treat2yes 1.2254 0.8160 1.0741 1.3981
## Year_of_diagnosis2019 0.9042 1.1060 0.7961 1.0270
## Year_of_diagnosis2020 0.7421 1.3475 0.5879 0.9367
##
## Concordance= 0.731 (se = 0.007 )
## Likelihood ratio test= 929.7 on 28 df, p=<2e-16
## Wald test = 874.4 on 28 df, p=<2e-16
## Score (logrank) test = 1033 on 28 df, p=<2e-16
#nodes continuous
cox_model11 <- coxph(Surv(sur_time, dead_lung) ~ Regional_nodes_examined__1988__ + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model11)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ Regional_nodes_examined__1988__ +
## age2 + Sex + race2 + married2 + hist2 + site2 + grade2 +
## T_Stage + treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 1279
##
## coef exp(coef) se(coef) z
## Regional_nodes_examined__1988__ -0.002564 0.997440 0.003022 -0.848
## age2 0.030192 1.030652 0.003539 8.530
## SexMale 0.374253 1.453906 0.059506 6.289
## race2Black 0.260672 1.297802 0.148340 1.757
## race2Other/Unknown 0.483108 1.621105 0.290692 1.662
## race2White 0.246209 1.279167 0.115291 2.136
## married2single/seperated 0.174573 1.190738 0.067864 2.572
## married2Unknown 0.155495 1.168236 0.136609 1.138
## married2Widowed 0.184388 1.202482 0.087542 2.106
## hist2Bronchioalveolar -0.226184 0.797571 0.117630 -1.923
## hist2Carcinoid -1.175773 0.308580 0.255287 -4.606
## hist2Large cell 0.696589 2.006896 0.160700 4.335
## hist2Other -0.096647 0.907876 0.074463 -1.298
## hist2Squamous cell 0.147863 1.159354 0.074649 1.981
## site2Middle lobe -0.002819 0.997185 0.132888 -0.021
## site2Other,NOS -0.089902 0.914021 0.168618 -0.533
## site2Upper lobe -0.148619 0.861897 0.059714 -2.489
## grade2G2: Moderately differentiated 0.615003 1.849663 0.126064 4.879
## grade2G3: Poorly differentiated 1.040087 2.829464 0.128296 8.107
## grade2G4: Undifferentiated/Unknown 0.595130 1.813267 0.135259 4.400
## T_StageT2 0.623527 1.865496 0.069658 8.951
## T_StageT3 0.997481 2.711442 0.086700 11.505
## T_StageT4 1.547338 4.698946 0.105552 14.659
## treat2yes 0.187112 1.205762 0.067155 2.786
## Year_of_diagnosis2019 -0.107666 0.897928 0.064953 -1.658
## Year_of_diagnosis2020 -0.305647 0.736647 0.118799 -2.573
## Pr(>|z|)
## Regional_nodes_examined__1988__ 0.39627
## age2 < 2e-16 ***
## SexMale 3.19e-10 ***
## race2Black 0.07887 .
## race2Other/Unknown 0.09653 .
## race2White 0.03272 *
## married2single/seperated 0.01010 *
## married2Unknown 0.25502
## married2Widowed 0.03518 *
## hist2Bronchioalveolar 0.05450 .
## hist2Carcinoid 4.11e-06 ***
## hist2Large cell 1.46e-05 ***
## hist2Other 0.19431
## hist2Squamous cell 0.04762 *
## site2Middle lobe 0.98308
## site2Other,NOS 0.59392
## site2Upper lobe 0.01282 *
## grade2G2: Moderately differentiated 1.07e-06 ***
## grade2G3: Poorly differentiated 5.19e-16 ***
## grade2G4: Undifferentiated/Unknown 1.08e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.00533 **
## Year_of_diagnosis2019 0.09740 .
## Year_of_diagnosis2020 0.01009 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Regional_nodes_examined__1988__ 0.9974 1.0026 0.9915 1.0034
## age2 1.0307 0.9703 1.0235 1.0378
## SexMale 1.4539 0.6878 1.2939 1.6338
## race2Black 1.2978 0.7705 0.9704 1.7357
## race2Other/Unknown 1.6211 0.6169 0.9170 2.8658
## race2White 1.2792 0.7818 1.0204 1.6035
## married2single/seperated 1.1907 0.8398 1.0424 1.3601
## married2Unknown 1.1682 0.8560 0.8938 1.5269
## married2Widowed 1.2025 0.8316 1.0129 1.4276
## hist2Bronchioalveolar 0.7976 1.2538 0.6333 1.0044
## hist2Carcinoid 0.3086 3.2406 0.1871 0.5089
## hist2Large cell 2.0069 0.4983 1.4647 2.7499
## hist2Other 0.9079 1.1015 0.7846 1.0505
## hist2Squamous cell 1.1594 0.8625 1.0016 1.3420
## site2Middle lobe 0.9972 1.0028 0.7685 1.2939
## site2Other,NOS 0.9140 1.0941 0.6568 1.2720
## site2Upper lobe 0.8619 1.1602 0.7667 0.9689
## grade2G2: Moderately differentiated 1.8497 0.5406 1.4447 2.3681
## grade2G3: Poorly differentiated 2.8295 0.3534 2.2004 3.6384
## grade2G4: Undifferentiated/Unknown 1.8133 0.5515 1.3910 2.3637
## T_StageT2 1.8655 0.5361 1.6274 2.1384
## T_StageT3 2.7114 0.3688 2.2877 3.2137
## T_StageT4 4.6989 0.2128 3.8208 5.7789
## treat2yes 1.2058 0.8294 1.0571 1.3754
## Year_of_diagnosis2019 0.8979 1.1137 0.7906 1.0198
## Year_of_diagnosis2020 0.7366 1.3575 0.5836 0.9298
##
## Concordance= 0.729 (se = 0.007 )
## Likelihood ratio test= 910.1 on 26 df, p=<2e-16
## Wald test = 856.4 on 26 df, p=<2e-16
## Score (logrank) test = 1015 on 26 df, p=<2e-16
#nodes continuous (log transformed)
cox_model12 <- coxph(Surv(sur_time, dead_lung) ~ node_log + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model12)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ node_log + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 24041, number of events= 1279
##
## coef exp(coef) se(coef) z
## node_log -0.106573 0.898909 0.030498 -3.494
## age2 0.029870 1.030321 0.003536 8.448
## SexMale 0.375730 1.456054 0.059468 6.318
## race2Black 0.249375 1.283224 0.148394 1.680
## race2Other/Unknown 0.485234 1.624555 0.290748 1.669
## race2White 0.245098 1.277746 0.115277 2.126
## married2single/seperated 0.171845 1.187494 0.067871 2.532
## married2Unknown 0.155782 1.168571 0.136608 1.140
## married2Widowed 0.184409 1.202507 0.087517 2.107
## hist2Bronchioalveolar -0.219186 0.803172 0.117637 -1.863
## hist2Carcinoid -1.176567 0.308335 0.255282 -4.609
## hist2Large cell 0.706233 2.026343 0.160713 4.394
## hist2Other -0.086302 0.917318 0.074516 -1.158
## hist2Squamous cell 0.153959 1.166443 0.074623 2.063
## site2Middle lobe -0.009766 0.990281 0.132911 -0.073
## site2Other,NOS -0.103971 0.901251 0.168789 -0.616
## site2Upper lobe -0.148193 0.862265 0.059718 -2.482
## grade2G2: Moderately differentiated 0.622064 1.862769 0.126087 4.934
## grade2G3: Poorly differentiated 1.048970 2.854709 0.128296 8.176
## grade2G4: Undifferentiated/Unknown 0.591917 1.807450 0.135292 4.375
## T_StageT2 0.635139 1.887285 0.069757 9.105
## T_StageT3 1.015276 2.760126 0.086788 11.698
## T_StageT4 1.579481 4.852437 0.105541 14.966
## treat2yes 0.208199 1.231458 0.067213 3.098
## Year_of_diagnosis2019 -0.103626 0.901562 0.064964 -1.595
## Year_of_diagnosis2020 -0.300460 0.740477 0.118810 -2.529
## Pr(>|z|)
## node_log 0.000475 ***
## age2 < 2e-16 ***
## SexMale 2.65e-10 ***
## race2Black 0.092862 .
## race2Other/Unknown 0.095135 .
## race2White 0.033490 *
## married2single/seperated 0.011344 *
## married2Unknown 0.254138
## married2Widowed 0.035106 *
## hist2Bronchioalveolar 0.062429 .
## hist2Carcinoid 4.05e-06 ***
## hist2Large cell 1.11e-05 ***
## hist2Other 0.246796
## hist2Squamous cell 0.039098 *
## site2Middle lobe 0.941423
## site2Other,NOS 0.537904
## site2Upper lobe 0.013081 *
## grade2G2: Moderately differentiated 8.07e-07 ***
## grade2G3: Poorly differentiated 2.93e-16 ***
## grade2G4: Undifferentiated/Unknown 1.21e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.001951 **
## Year_of_diagnosis2019 0.110686
## Year_of_diagnosis2020 0.011442 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## node_log 0.8989 1.1125 0.8468 0.9543
## age2 1.0303 0.9706 1.0232 1.0375
## SexMale 1.4561 0.6868 1.2959 1.6361
## race2Black 1.2832 0.7793 0.9594 1.7164
## race2Other/Unknown 1.6246 0.6156 0.9189 2.8722
## race2White 1.2777 0.7826 1.0193 1.6017
## married2single/seperated 1.1875 0.8421 1.0396 1.3564
## married2Unknown 1.1686 0.8557 0.8941 1.5273
## married2Widowed 1.2025 0.8316 1.0130 1.4275
## hist2Bronchioalveolar 0.8032 1.2451 0.6378 1.0114
## hist2Carcinoid 0.3083 3.2432 0.1870 0.5085
## hist2Large cell 2.0263 0.4935 1.4788 2.7766
## hist2Other 0.9173 1.0901 0.7927 1.0616
## hist2Squamous cell 1.1664 0.8573 1.0077 1.3502
## site2Middle lobe 0.9903 1.0098 0.7632 1.2850
## site2Other,NOS 0.9013 1.1096 0.6474 1.2546
## site2Upper lobe 0.8623 1.1597 0.7670 0.9693
## grade2G2: Moderately differentiated 1.8628 0.5368 1.4549 2.3850
## grade2G3: Poorly differentiated 2.8547 0.3503 2.2200 3.6709
## grade2G4: Undifferentiated/Unknown 1.8074 0.5533 1.3865 2.3563
## T_StageT2 1.8873 0.5299 1.6461 2.1638
## T_StageT3 2.7601 0.3623 2.3284 3.2719
## T_StageT4 4.8524 0.2061 3.9457 5.9676
## treat2yes 1.2315 0.8120 1.0795 1.4049
## Year_of_diagnosis2019 0.9016 1.1092 0.7938 1.0240
## Year_of_diagnosis2020 0.7405 1.3505 0.5867 0.9346
##
## Concordance= 0.73 (se = 0.007 )
## Likelihood ratio test= 921.2 on 26 df, p=<2e-16
## Wald test = 865.4 on 26 df, p=<2e-16
## Score (logrank) test = 1025 on 26 df, p=<2e-16
#nodes positive continuous
cox_model13 <- coxph(Surv(sur_time, dead_lung) ~ Regional_nodes_positive__1988__ + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model13)
## Call:
## coxph(formula = Surv(sur_time, dead_lung) ~ Regional_nodes_positive__1988__ +
## age2 + Sex + race2 + married2 + hist2 + site2 + grade2 +
## T_Stage + treat2 + Year_of_diagnosis, data = seer)
##
## n= 22131, number of events= 1154
## (1910 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z
## Regional_nodes_positive__1988__ 0.112405 1.118966 0.008067 13.934
## age2 0.028426 1.028834 0.003699 7.685
## SexMale 0.384595 1.469019 0.062686 6.135
## race2Black 0.196410 1.217026 0.155724 1.261
## race2Other/Unknown 0.435278 1.545393 0.301512 1.444
## race2White 0.214454 1.239185 0.119331 1.797
## married2single/seperated 0.145444 1.156552 0.071461 2.035
## married2Unknown 0.051254 1.052590 0.150550 0.340
## married2Widowed 0.152097 1.164273 0.093167 1.633
## hist2Bronchioalveolar -0.188499 0.828201 0.123395 -1.528
## hist2Carcinoid -1.392259 0.248513 0.300898 -4.627
## hist2Large cell 0.764439 2.147790 0.168599 4.534
## hist2Other -0.104009 0.901217 0.078216 -1.330
## hist2Squamous cell 0.185526 1.203851 0.078892 2.352
## site2Middle lobe 0.040872 1.041718 0.139443 0.293
## site2Other,NOS -0.108759 0.896946 0.179820 -0.605
## site2Upper lobe -0.135947 0.872889 0.062958 -2.159
## grade2G2: Moderately differentiated 0.606966 1.834856 0.136194 4.457
## grade2G3: Poorly differentiated 1.017492 2.766249 0.138467 7.348
## grade2G4: Undifferentiated/Unknown 0.610009 1.840448 0.146140 4.174
## T_StageT2 0.613260 1.846441 0.073797 8.310
## T_StageT3 0.995425 2.705874 0.090628 10.984
## T_StageT4 1.508400 4.519493 0.108716 13.875
## treat2yes -0.017691 0.982464 0.071911 -0.246
## Year_of_diagnosis2019 -0.142878 0.866860 0.068323 -2.091
## Year_of_diagnosis2020 -0.324559 0.722846 0.125234 -2.592
## Pr(>|z|)
## Regional_nodes_positive__1988__ < 2e-16 ***
## age2 1.53e-14 ***
## SexMale 8.50e-10 ***
## race2Black 0.20721
## race2Other/Unknown 0.14884
## race2White 0.07231 .
## married2single/seperated 0.04182 *
## married2Unknown 0.73352
## married2Widowed 0.10257
## hist2Bronchioalveolar 0.12661
## hist2Carcinoid 3.71e-06 ***
## hist2Large cell 5.79e-06 ***
## hist2Other 0.18359
## hist2Squamous cell 0.01869 *
## site2Middle lobe 0.76944
## site2Other,NOS 0.54530
## site2Upper lobe 0.03082 *
## grade2G2: Moderately differentiated 8.33e-06 ***
## grade2G3: Poorly differentiated 2.01e-13 ***
## grade2G4: Undifferentiated/Unknown 2.99e-05 ***
## T_StageT2 < 2e-16 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 0.80567
## Year_of_diagnosis2019 0.03651 *
## Year_of_diagnosis2020 0.00955 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Regional_nodes_positive__1988__ 1.1190 0.8937 1.1014 1.1368
## age2 1.0288 0.9720 1.0214 1.0363
## SexMale 1.4690 0.6807 1.2992 1.6611
## race2Black 1.2170 0.8217 0.8969 1.6514
## race2Other/Unknown 1.5454 0.6471 0.8558 2.7905
## race2White 1.2392 0.8070 0.9808 1.5657
## married2single/seperated 1.1566 0.8646 1.0054 1.3304
## married2Unknown 1.0526 0.9500 0.7836 1.4139
## married2Widowed 1.1643 0.8589 0.9700 1.3975
## hist2Bronchioalveolar 0.8282 1.2074 0.6503 1.0548
## hist2Carcinoid 0.2485 4.0239 0.1378 0.4482
## hist2Large cell 2.1478 0.4656 1.5434 2.9888
## hist2Other 0.9012 1.1096 0.7731 1.0505
## hist2Squamous cell 1.2039 0.8307 1.0314 1.4052
## site2Middle lobe 1.0417 0.9600 0.7926 1.3691
## site2Other,NOS 0.8969 1.1149 0.6305 1.2759
## site2Upper lobe 0.8729 1.1456 0.7716 0.9875
## grade2G2: Moderately differentiated 1.8349 0.5450 1.4050 2.3962
## grade2G3: Poorly differentiated 2.7662 0.3615 2.1088 3.6287
## grade2G4: Undifferentiated/Unknown 1.8404 0.5433 1.3821 2.4509
## T_StageT2 1.8464 0.5416 1.5978 2.1338
## T_StageT3 2.7059 0.3696 2.2655 3.2318
## T_StageT4 4.5195 0.2213 3.6522 5.5928
## treat2yes 0.9825 1.0178 0.8533 1.1312
## Year_of_diagnosis2019 0.8669 1.1536 0.7582 0.9911
## Year_of_diagnosis2020 0.7228 1.3834 0.5655 0.9239
##
## Concordance= 0.749 (se = 0.008 )
## Likelihood ratio test= 963.7 on 26 df, p=<2e-16
## Wald test = 1034 on 26 df, p=<2e-16
## Score (logrank) test = 1271 on 26 df, p=<2e-16
#nodes positive yes vs no
km_fit <- survfit(Surv(sur_time, dead_lung) ~ pos_node2, data = seer)
ggsurvplot(km_fit, data = seer,
pval = TRUE, # Adds the p-value
conf.int = TRUE, # Adds confidence intervals
risk.table = TRUE, # Adds a risk table below the plot
risk.table.col = "strata",# Colors risk table by groups
legend.title = "Nodes", # Label for the legend
legend.labs = c("0 Nodes Positive", "1+ Nodes Positive"), # Custom legend labels
xlab = "Time (months)", # X-axis label
ylab = "Survival Probability", # Y-axis label
surv.median.line = "hv", # Adds median survival lines
ggtheme = theme_minimal()) # Applies a minimalistic theme
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
cox_model14 <- coxph(Surv(sur_time, dead_any) ~ pos_node2 + age2 + Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage + treat2 + Year_of_diagnosis, data = seer)
summary(cox_model14)
## Call:
## coxph(formula = Surv(sur_time, dead_any) ~ pos_node2 + age2 +
## Sex + race2 + married2 + hist2 + site2 + grade2 + T_Stage +
## treat2 + Year_of_diagnosis, data = seer)
##
## n= 22131, number of events= 2008
## (1910 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z
## pos_node2yes 0.888487 2.431449 0.057343 15.494
## age2 0.029482 1.029921 0.002812 10.484
## SexMale 0.393348 1.481934 0.047529 8.276
## race2Black 0.467655 1.596246 0.119251 3.922
## race2Other/Unknown 0.411430 1.508974 0.242492 1.697
## race2White 0.345060 1.412074 0.095114 3.628
## married2single/seperated 0.149934 1.161758 0.054390 2.757
## married2Unknown 0.120517 1.128080 0.111513 1.081
## married2Widowed 0.225369 1.252785 0.068725 3.279
## hist2Bronchioalveolar -0.180614 0.834758 0.090331 -1.999
## hist2Carcinoid -1.082031 0.338907 0.174361 -6.206
## hist2Large cell 0.674410 1.962875 0.137096 4.919
## hist2Other -0.123817 0.883542 0.059606 -2.077
## hist2Squamous cell 0.206493 1.229359 0.060099 3.436
## site2Middle lobe 0.076568 1.079576 0.101807 0.752
## site2Other,NOS -0.061178 0.940656 0.138323 -0.442
## site2Upper lobe -0.136338 0.872548 0.047748 -2.855
## grade2G2: Moderately differentiated 0.358556 1.431261 0.090103 3.979
## grade2G3: Poorly differentiated 0.629316 1.876326 0.093364 6.740
## grade2G4: Undifferentiated/Unknown 0.369451 1.446940 0.098314 3.758
## T_StageT2 0.394653 1.483869 0.053943 7.316
## T_StageT3 0.758394 2.134845 0.068365 11.093
## T_StageT4 1.141308 3.130860 0.088198 12.940
## treat2yes -0.452146 0.636261 0.061260 -7.381
## Year_of_diagnosis2019 -0.085892 0.917693 0.051662 -1.663
## Year_of_diagnosis2020 -0.173982 0.840312 0.088901 -1.957
## Pr(>|z|)
## pos_node2yes < 2e-16 ***
## age2 < 2e-16 ***
## SexMale < 2e-16 ***
## race2Black 8.80e-05 ***
## race2Other/Unknown 0.089757 .
## race2White 0.000286 ***
## married2single/seperated 0.005840 **
## married2Unknown 0.279811
## married2Widowed 0.001041 **
## hist2Bronchioalveolar 0.045558 *
## hist2Carcinoid 5.45e-10 ***
## hist2Large cell 8.69e-07 ***
## hist2Other 0.037778 *
## hist2Squamous cell 0.000591 ***
## site2Middle lobe 0.451995
## site2Other,NOS 0.658284
## site2Upper lobe 0.004299 **
## grade2G2: Moderately differentiated 6.91e-05 ***
## grade2G3: Poorly differentiated 1.58e-11 ***
## grade2G4: Undifferentiated/Unknown 0.000171 ***
## T_StageT2 2.55e-13 ***
## T_StageT3 < 2e-16 ***
## T_StageT4 < 2e-16 ***
## treat2yes 1.57e-13 ***
## Year_of_diagnosis2019 0.096398 .
## Year_of_diagnosis2020 0.050345 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pos_node2yes 2.4314 0.4113 2.1730 2.7207
## age2 1.0299 0.9709 1.0243 1.0356
## SexMale 1.4819 0.6748 1.3501 1.6266
## race2Black 1.5962 0.6265 1.2636 2.0165
## race2Other/Unknown 1.5090 0.6627 0.9382 2.4271
## race2White 1.4121 0.7082 1.1719 1.7014
## married2single/seperated 1.1618 0.8608 1.0443 1.2924
## married2Unknown 1.1281 0.8865 0.9066 1.4037
## married2Widowed 1.2528 0.7982 1.0949 1.4334
## hist2Bronchioalveolar 0.8348 1.1980 0.6993 0.9964
## hist2Carcinoid 0.3389 2.9507 0.2408 0.4770
## hist2Large cell 1.9629 0.5095 1.5004 2.5680
## hist2Other 0.8835 1.1318 0.7861 0.9930
## hist2Squamous cell 1.2294 0.8134 1.0928 1.3830
## site2Middle lobe 1.0796 0.9263 0.8843 1.3180
## site2Other,NOS 0.9407 1.0631 0.7173 1.2336
## site2Upper lobe 0.8725 1.1461 0.7946 0.9581
## grade2G2: Moderately differentiated 1.4313 0.6987 1.1996 1.7077
## grade2G3: Poorly differentiated 1.8763 0.5330 1.5626 2.2531
## grade2G4: Undifferentiated/Unknown 1.4469 0.6911 1.1933 1.7544
## T_StageT2 1.4839 0.6739 1.3350 1.6493
## T_StageT3 2.1348 0.4684 1.8671 2.4410
## T_StageT4 3.1309 0.3194 2.6338 3.7217
## treat2yes 0.6363 1.5717 0.5643 0.7174
## Year_of_diagnosis2019 0.9177 1.0897 0.8293 1.0155
## Year_of_diagnosis2020 0.8403 1.1900 0.7059 1.0003
##
## Concordance= 0.714 (se = 0.006 )
## Likelihood ratio test= 1211 on 26 df, p=<2e-16
## Wald test = 1241 on 26 df, p=<2e-16
## Score (logrank) test = 1360 on 26 df, p=<2e-16