##Read in the dataset
PSMA_data <- read.csv("//users12/users12$/tumins01/Data/Personal/Students/Sharon/Lung cancer_PSMA_study_4.9.csv")
#1494
##Data Cleaning/Wrangling##
##Remove blank rows
PSMA_data <- PSMA_data %>% filter(!is.na(Accession.Number))
#1487
##Remove imaging that are not PSMA scans (Agent = FDG); only keep PSMA scans (Age = DCFPyL)
PSMA_data <- PSMA_data %>% filter(!str_detect(Image, "\\bFDG\\b")) #467
PSMA_data <- PSMA_data %>% filter(str_detect(Image, "\\bDCFPyL\\b")) #432
##Remove females
PSMA_data <- PSMA_data %>% filter(Patient.Sex == "Male") #all males now
#Fix the Date
PSMA_data <- PSMA_data %>%
separate(Exam.Completed.Date, into = c("Date", "Time"), sep = " ", remove = FALSE)
range(PSMA_data$Date)
## [1] "1/10/2024" "5/3/2024"
#Age > 18
#Age
summary(PSMA_data$Patient.Age) #all patients over 18
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.0 63.0 70.0 69.6 76.0 93.0
hist(PSMA_data$Patient.Age)
#Unique Patients
PSMA_data_unique <- PSMA_data %>% distinct(MRN, .keep_all = TRUE)
#429
#PSMA_data %>% filter(duplicated(MRN) | duplicated(MRN, fromLast = TRUE))
#3 patients had multiple scans, but none of these 6 scans had findings
##Descriptive Statistics##
#Age
summary(PSMA_data_unique$Patient.Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.00 63.00 70.00 69.62 76.00 93.00
hist(PSMA_data_unique$Patient.Age)
#Age as categorical
PSMA_data_unique <- PSMA_data_unique %>%
mutate(Age_quartile = case_when(
Patient.Age < 63 ~ "Q1: <63",
Patient.Age >= 63 & Patient.Age < 70 ~ "Q2: 63–69",
Patient.Age >= 70 & Patient.Age < 76 ~ "Q3: 70–75",
Patient.Age >= 76 ~ "Q4: ≥76"
))
table(PSMA_data_unique$Age_quartile)
##
## Q1: <63 Q2: 63–69 Q3: 70–75 Q4: ≥76
## 97 112 104 116
##Outcome - incidental pulmonary finding##
#Findings & Uptake
table(PSMA_data_unique$Finding...Uptake)
##
## x x ( 4.8) x (1.2) x (1.4) x (1.6) x (1.7) x (1.8)
## 371 1 1 1 3 3 2 2
## x (1.9) x (10.1) x (16.8) x (2.1) x (2.2) x (2.7) x (2.8) x (2.9)
## 2 1 1 2 1 1 1 1
## x (21.7) x (23.4) x (28.5) x (3.1) x (3.8) x (3.9) x (4.1) x (5.1)
## 1 1 1 1 1 2 1 1
## x (6.7) x (8.8) x(0.8) x(1.7) x(14.4) x(2.2) X(2.2) X(2.7)
## 1 1 1 1 1 1 1 1
## x(2.8) X(3) x(3.1) x(3.4) X(3.4) x(3.8) X(3.9) x(30.9)
## 1 1 2 1 1 1 1 1
## x(32.2) x(4.9) x(5.9) X(6.1) x(6.4) x(7.0) x(7.7) x(9.8)
## 1 1 1 1 1 1 1 1
## X18.6)
## 1
PSMA_data_unique <- PSMA_data_unique %>%
mutate(FindingUptake2 = ifelse(Finding...Uptake != "", 1, 0))
table(PSMA_data_unique$FindingUptake2)#58
##
## 0 1
## 371 58
#new variable for SUV
PSMA_data_unique <- PSMA_data_unique %>%
mutate(SUV = str_extract(Finding...Uptake, "(?<=\\().+?(?=\\))"))
table(PSMA_data_unique$SUV)
##
## 4.8 0.8 1.2 1.4 1.6 1.7 1.8 1.9 10.1 14.4 16.8 2.1 2.2 2.7 2.8 2.9
## 1 1 1 3 3 3 2 2 1 1 1 2 3 2 2 1
## 21.7 23.4 28.5 3 3.1 3.4 3.8 3.9 30.9 32.2 4.1 4.9 5.1 5.9 6.1 6.4
## 1 1 1 1 3 2 2 3 1 1 1 1 1 1 1 1
## 6.7 7.0 7.7 8.8 9.8
## 1 1 1 1 1
table(PSMA_data_unique$FindingUptake2, PSMA_data_unique$SUV)
##
## 4.8 0.8 1.2 1.4 1.6 1.7 1.8 1.9 10.1 14.4 16.8 2.1 2.2 2.7 2.8 2.9 21.7
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1 1 1 1 3 3 3 2 2 1 1 1 2 3 2 2 1 1
##
## 23.4 28.5 3 3.1 3.4 3.8 3.9 30.9 32.2 4.1 4.9 5.1 5.9 6.1 6.4 6.7 7.0 7.7
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1 1 1 1 3 2 2 3 1 1 1 1 1 1 1 1 1 1 1
##
## 8.8 9.8
## 0 0 0
## 1 1 1
#Findings & No Uptake
table(PSMA_data_unique$Finding.NO.Uptake)
##
## x X
## 346 75 8
PSMA_data_unique <- PSMA_data_unique %>%
mutate(FindingNoUptake2 = ifelse(Finding.NO.Uptake != "", 1, 0))
table(PSMA_data_unique$FindingNoUptake2) #83
##
## 0 1
## 346 83
table(PSMA_data_unique$FindingUptake2, PSMA_data_unique$FindingNoUptake2) #4 both
##
## 0 1
## 0 292 79
## 1 54 4
test <- PSMA_data_unique %>% filter(FindingUptake2 == 1 & FindingNoUptake2 == 1)
#No Finding & Uptake
table(PSMA_data_unique$NO.FINDING...Uptake)
##
## x x (1.2) x (1.3) x (1.6) x (1.9) x (2.4) x(0.8) x(10.9) x(3.6)
## 418 1 1 1 2 1 1 1 1 1
## x(8.9)
## 1
PSMA_data_unique <- PSMA_data_unique %>%
mutate(NoFindingUptake2 = ifelse(NO.FINDING...Uptake != "", 1, 0))
table(PSMA_data_unique$NoFindingUptake2) #11
##
## 0 1
## 418 11
#new variable for SUV
PSMA_data_unique <- PSMA_data_unique %>%
mutate(SUV2 = str_extract(NO.FINDING...Uptake, "(?<=\\().+?(?=\\))"))
table(PSMA_data_unique$SUV2)
##
## 0.8 1.2 1.3 1.6 1.9 10.9 2.4 3.6 8.9
## 1 1 1 2 1 1 1 1 1
##Any Finding
PSMA_data_unique <- PSMA_data_unique %>% mutate(FindingsAny = ifelse(FindingUptake2 == 1 | FindingNoUptake2 == 1 | NoFindingUptake2 == 1, 1, 0))
table(PSMA_data_unique$FindingsAny)#148 (34.5%)
##
## 0 1
## 281 148
#as a categorical variable
PSMA_data_unique <- PSMA_data_unique %>% mutate(FindingsAny2 = ifelse(FindingsAny == 1, "Findings_Yes", "Findings_No"))
table(PSMA_data_unique$FindingsAny2)
##
## Findings_No Findings_Yes
## 281 148
#Making a Table 1
table_compare <- compareGroups(FindingsAny2 ~ Age_quartile, data = PSMA_data_unique)
createTable(table_compare)
##
## --------Summary descriptives table by 'FindingsAny2'---------
##
## ________________________________________________
## Findings_No Findings_Yes p.overall
## N=281 N=148
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Age_quartile: 0.006
## Q1: <63 76 (27.0%) 21 (14.2%)
## Q2: 63–69 76 (27.0%) 36 (24.3%)
## Q3: 70–75 63 (22.4%) 41 (27.7%)
## Q4: ≥76 66 (23.5%) 50 (33.8%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
##Solid Tumors
table(PSMA_data_unique$Solid)
##
## x X
## 353 64 12
table(PSMA_data_unique$Sub.Solid)
##
## x
## 395 34
PSMA_data_unique <- PSMA_data_unique %>%
mutate(Solid2 = ifelse(Solid != "", 1, 0))
PSMA_data_unique$Solid2 <- as.character(PSMA_data_unique$Solid2)
PSMA_data_unique <- PSMA_data_unique %>%
mutate(SubSolid2 = ifelse(Sub.Solid != "", 1, 0))
PSMA_data_unique$SubSolid2 <- as.character(PSMA_data_unique$SubSolid2)
table(PSMA_data_unique$Solid2) #76
##
## 0 1
## 353 76
table(PSMA_data_unique$SubSolid2) #34
##
## 0 1
## 395 34
PSMA_data_unique <- PSMA_data_unique %>% mutate(Subsolid_Solid = ifelse(Solid2 == 1 | SubSolid2 == 1, 1, 0))
table(PSMA_data_unique$Subsolid_Solid)#108
##
## 0 1
## 321 108
PSMA_data_unique$Subsolid_Solid <- as.character(PSMA_data_unique$Subsolid_Solid)
##PSMA finding by solic cateogry
table_compare2 <- compareGroups(FindingsAny2 ~ Solid2 + SubSolid2 + Subsolid_Solid, data = PSMA_data_unique)
createTable(table_compare2)
##
## --------Summary descriptives table by 'FindingsAny2'---------
##
## __________________________________________________
## Findings_No Findings_Yes p.overall
## N=281 N=148
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Solid2: <0.001
## 0 281 (100%) 72 (48.6%)
## 1 0 (0.00%) 76 (51.4%)
## SubSolid2: <0.001
## 0 281 (100%) 114 (77.0%)
## 1 0 (0.00%) 34 (23.0%)
## Subsolid_Solid: <0.001
## 0 281 (100%) 40 (27.0%)
## 1 0 (0.00%) 108 (73.0%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯