##Read in the dataset

PSMA_data <- read.csv("//users12/users12$/tumins01/Data/Personal/Students/Sharon/Lung cancer_PSMA_study_4.9.csv") 
  #1494

##Data Cleaning/Wrangling##

##Remove blank rows

PSMA_data <- PSMA_data %>% filter(!is.na(Accession.Number))
  #1487

##Remove imaging that are not PSMA scans (Agent = FDG); only keep PSMA scans (Age = DCFPyL)

PSMA_data <- PSMA_data %>% filter(!str_detect(Image, "\\bFDG\\b"))  #467

PSMA_data <- PSMA_data %>% filter(str_detect(Image, "\\bDCFPyL\\b")) #432

##Remove females

PSMA_data <- PSMA_data %>% filter(Patient.Sex == "Male") #all males now

#Fix the Date

PSMA_data <- PSMA_data %>%
  separate(Exam.Completed.Date, into = c("Date", "Time"), sep = " ",  remove = FALSE)

range(PSMA_data$Date)
## [1] "1/10/2024" "5/3/2024"

#Age > 18

#Age
summary(PSMA_data$Patient.Age) #all patients over 18
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    42.0    63.0    70.0    69.6    76.0    93.0
hist(PSMA_data$Patient.Age)

#Unique Patients

PSMA_data_unique <- PSMA_data %>% distinct(MRN, .keep_all = TRUE)
  #429

#PSMA_data %>%  filter(duplicated(MRN) | duplicated(MRN, fromLast = TRUE))
  #3 patients had multiple scans, but none of these 6 scans had findings

##Descriptive Statistics##

#Age
summary(PSMA_data_unique$Patient.Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   42.00   63.00   70.00   69.62   76.00   93.00
hist(PSMA_data_unique$Patient.Age)

#Age as categorical 

PSMA_data_unique <- PSMA_data_unique %>%
  mutate(Age_quartile = case_when(
    Patient.Age < 63 ~ "Q1: <63",
    Patient.Age >= 63 & Patient.Age < 70 ~ "Q2: 63–69",
    Patient.Age >= 70 & Patient.Age < 76 ~ "Q3: 70–75",
    Patient.Age >= 76 ~ "Q4: ≥76"
  ))

table(PSMA_data_unique$Age_quartile)
## 
##   Q1: <63 Q2: 63–69 Q3: 70–75   Q4: ≥76 
##        97       112       104       116

##Outcome - incidental pulmonary finding##

#Findings & Uptake
table(PSMA_data_unique$Finding...Uptake)
## 
##                 x x ( 4.8)  x (1.2)  x (1.4)  x (1.6)  x (1.7)  x (1.8) 
##      371        1        1        1        3        3        2        2 
##  x (1.9) x (10.1) x (16.8)  x (2.1)  x (2.2)  x (2.7)  x (2.8)  x (2.9) 
##        2        1        1        2        1        1        1        1 
## x (21.7) x (23.4) x (28.5)  x (3.1)  x (3.8)  x (3.9)  x (4.1)  x (5.1) 
##        1        1        1        1        1        2        1        1 
##  x (6.7)  x (8.8)   x(0.8)   x(1.7)  x(14.4)   x(2.2)   X(2.2)   X(2.7) 
##        1        1        1        1        1        1        1        1 
##   x(2.8)     X(3)   x(3.1)   x(3.4)   X(3.4)   x(3.8)   X(3.9)  x(30.9) 
##        1        1        2        1        1        1        1        1 
##  x(32.2)   x(4.9)   x(5.9)   X(6.1)   x(6.4)   x(7.0)   x(7.7)   x(9.8) 
##        1        1        1        1        1        1        1        1 
##   X18.6) 
##        1
PSMA_data_unique <- PSMA_data_unique %>%
  mutate(FindingUptake2 = ifelse(Finding...Uptake != "", 1, 0))

table(PSMA_data_unique$FindingUptake2)#58
## 
##   0   1 
## 371  58
  #new variable for SUV
PSMA_data_unique <- PSMA_data_unique %>%
   mutate(SUV = str_extract(Finding...Uptake, "(?<=\\().+?(?=\\))"))
table(PSMA_data_unique$SUV)
## 
##  4.8  0.8  1.2  1.4  1.6  1.7  1.8  1.9 10.1 14.4 16.8  2.1  2.2  2.7  2.8  2.9 
##    1    1    1    3    3    3    2    2    1    1    1    2    3    2    2    1 
## 21.7 23.4 28.5    3  3.1  3.4  3.8  3.9 30.9 32.2  4.1  4.9  5.1  5.9  6.1  6.4 
##    1    1    1    1    3    2    2    3    1    1    1    1    1    1    1    1 
##  6.7  7.0  7.7  8.8  9.8 
##    1    1    1    1    1
table(PSMA_data_unique$FindingUptake2, PSMA_data_unique$SUV) 
##    
##      4.8 0.8 1.2 1.4 1.6 1.7 1.8 1.9 10.1 14.4 16.8 2.1 2.2 2.7 2.8 2.9 21.7
##   0    0   0   0   0   0   0   0   0    0    0    0   0   0   0   0   0    0
##   1    1   1   1   3   3   3   2   2    1    1    1   2   3   2   2   1    1
##    
##     23.4 28.5 3 3.1 3.4 3.8 3.9 30.9 32.2 4.1 4.9 5.1 5.9 6.1 6.4 6.7 7.0 7.7
##   0    0    0 0   0   0   0   0    0    0   0   0   0   0   0   0   0   0   0
##   1    1    1 1   3   2   2   3    1    1   1   1   1   1   1   1   1   1   1
##    
##     8.8 9.8
##   0   0   0
##   1   1   1
#Findings & No Uptake
table(PSMA_data_unique$Finding.NO.Uptake)
## 
##       x   X 
## 346  75   8
PSMA_data_unique <- PSMA_data_unique %>%
  mutate(FindingNoUptake2 = ifelse(Finding.NO.Uptake != "", 1, 0))

table(PSMA_data_unique$FindingNoUptake2) #83
## 
##   0   1 
## 346  83
table(PSMA_data_unique$FindingUptake2, PSMA_data_unique$FindingNoUptake2) #4 both
##    
##       0   1
##   0 292  79
##   1  54   4
test <- PSMA_data_unique %>% filter(FindingUptake2 == 1 & FindingNoUptake2 == 1)

#No Finding & Uptake
table(PSMA_data_unique$NO.FINDING...Uptake) 
## 
##               x x (1.2) x (1.3) x (1.6) x (1.9) x (2.4)  x(0.8) x(10.9)  x(3.6) 
##     418       1       1       1       2       1       1       1       1       1 
##  x(8.9) 
##       1
PSMA_data_unique <- PSMA_data_unique %>%
  mutate(NoFindingUptake2 = ifelse(NO.FINDING...Uptake != "", 1, 0))

table(PSMA_data_unique$NoFindingUptake2) #11
## 
##   0   1 
## 418  11
    #new variable for SUV
PSMA_data_unique <- PSMA_data_unique %>%
   mutate(SUV2 = str_extract(NO.FINDING...Uptake, "(?<=\\().+?(?=\\))"))
table(PSMA_data_unique$SUV2)
## 
##  0.8  1.2  1.3  1.6  1.9 10.9  2.4  3.6  8.9 
##    1    1    1    2    1    1    1    1    1
##Any Finding
PSMA_data_unique <- PSMA_data_unique %>% mutate(FindingsAny = ifelse(FindingUptake2 == 1 | FindingNoUptake2 == 1 | NoFindingUptake2 == 1, 1, 0))

table(PSMA_data_unique$FindingsAny)#148 (34.5%)
## 
##   0   1 
## 281 148
  #as a categorical variable
PSMA_data_unique <- PSMA_data_unique %>% mutate(FindingsAny2 = ifelse(FindingsAny == 1, "Findings_Yes", "Findings_No"))
table(PSMA_data_unique$FindingsAny2)
## 
##  Findings_No Findings_Yes 
##          281          148

#Making a Table 1

table_compare <- compareGroups(FindingsAny2 ~  Age_quartile, data = PSMA_data_unique)
createTable(table_compare)
## 
## --------Summary descriptives table by 'FindingsAny2'---------
## 
## ________________________________________________ 
##               Findings_No Findings_Yes p.overall 
##                  N=281       N=148               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Age_quartile:                            0.006   
##     Q1: <63   76 (27.0%)   21 (14.2%)            
##     Q2: 63–69 76 (27.0%)   36 (24.3%)            
##     Q3: 70–75 63 (22.4%)   41 (27.7%)            
##     Q4: ≥76   66 (23.5%)   50 (33.8%)            
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

##Solid Tumors

table(PSMA_data_unique$Solid)
## 
##       x   X 
## 353  64  12
table(PSMA_data_unique$Sub.Solid)
## 
##       x 
## 395  34
PSMA_data_unique <- PSMA_data_unique %>%
  mutate(Solid2 = ifelse(Solid != "", 1, 0))
PSMA_data_unique$Solid2 <- as.character(PSMA_data_unique$Solid2)

PSMA_data_unique <- PSMA_data_unique %>%
  mutate(SubSolid2 = ifelse(Sub.Solid != "", 1, 0))
PSMA_data_unique$SubSolid2 <- as.character(PSMA_data_unique$SubSolid2)

table(PSMA_data_unique$Solid2) #76
## 
##   0   1 
## 353  76
table(PSMA_data_unique$SubSolid2) #34
## 
##   0   1 
## 395  34
PSMA_data_unique <- PSMA_data_unique %>% mutate(Subsolid_Solid = ifelse(Solid2 == 1 | SubSolid2 == 1, 1, 0))
table(PSMA_data_unique$Subsolid_Solid)#108
## 
##   0   1 
## 321 108
PSMA_data_unique$Subsolid_Solid <- as.character(PSMA_data_unique$Subsolid_Solid)

##PSMA finding by solic cateogry

table_compare2 <- compareGroups(FindingsAny2 ~  Solid2 + SubSolid2 + Subsolid_Solid, data = PSMA_data_unique)
createTable(table_compare2)
## 
## --------Summary descriptives table by 'FindingsAny2'---------
## 
## __________________________________________________ 
##                 Findings_No Findings_Yes p.overall 
##                    N=281       N=148               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Solid2:                                   <0.001   
##     0           281 (100%)   72 (48.6%)            
##     1            0 (0.00%)   76 (51.4%)            
## SubSolid2:                                <0.001   
##     0           281 (100%)  114 (77.0%)            
##     1            0 (0.00%)   34 (23.0%)            
## Subsolid_Solid:                           <0.001   
##     0           281 (100%)   40 (27.0%)            
##     1            0 (0.00%)  108 (73.0%)            
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯