Reading in data and patient selection

Data Cleaning

#Main Outcome
table(UNOS2$ACUTE_REJECTION_EPISODE)
## 
##     1     2     3 
##  2170   341 30054
UNOS2 <- UNOS2 %>% mutate(reject2 = ifelse(ACUTE_REJECTION_EPISODE == 1 | ACUTE_REJECTION_EPISODE == 2, 1,
                                           ifelse(ACUTE_REJECTION_EPISODE == 3, 0, NA)))

table(UNOS2$reject2) #missing for 25, dropping
## 
##     0     1 
## 30054  2511
UNOS2 <- UNOS2 %>% filter(!is.na(reject2)) #32565
  
#Main Predictor
table(UNOS2$Donor_Recipient_ABO_Match_Level)
## 
##     1     2 
## 30172  2393
UNOS2 <- UNOS2 %>% mutate(ABO2 = ifelse(Donor_Recipient_ABO_Match_Level == 1, "Identical",
                                           ifelse(Donor_Recipient_ABO_Match_Level == 2, "Compatible", NA)))
table(UNOS2$ABO2)
## 
## Compatible  Identical 
##       2393      30172
#Confounders
#Gender
table(UNOS2$TCR_RECIPIENT_GENDER) #does sex of donor always match recipiant?
## 
##     F     M 
## 12888 19677
#R- Age
#table(UNOS2$Recipient_Age) 
summary(UNOS2$Recipient_Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   53.00   61.00   57.46   66.00   84.00
#D- Age
#table(UNOS2$DONOR_AGE__YRS_) #still some children donators
summary(UNOS2$DONOR_AGE__YRS_) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0    23.0    33.0    35.1    46.0    77.0
ggplot(UNOS2, aes(x = Recipient_Age, y = DONOR_AGE__YRS_)) +
  geom_point() +
  labs(x = "Recipient Age", y = "Donor Age", title = "Scatter Plot of Recipient Age vs Donor Age")

#RD Abs Diff Age
UNOS2$absolute_difference_age <- abs(UNOS2$Recipient_Age - UNOS2$DONOR_AGE__YRS_)
summary(UNOS2$absolute_difference_age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   12.00   24.00   24.56   37.00   66.00
hist(UNOS2$absolute_difference_age)

#R- BMI
#table(UNOS2$Calculated_Recipient_BMI)
summary(UNOS2$Calculated_Recipient_BMI)#missing for 31
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.00   22.20   25.80   25.62   29.00   44.80      31
#D- BMI
#table(UNOS2$Calculated_Donor_BMI)
summary(UNOS2$Calculated_Donor_BMI)#missing for 27
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   10.59   22.49   25.41   26.32   29.14   68.73      27
ggplot(UNOS2, aes(x = Calculated_Recipient_BMI, y = Calculated_Donor_BMI)) +
  geom_point() +
  labs(x = "Recipient BMI", y = "Donor BMI", title = "Scatter Plot of Recipient BMI vs Donor BMI")
## Warning: Removed 58 rows containing missing values or values outside the scale range
## (`geom_point()`).

#RD Abs Diff Age
UNOS2$absolute_difference_BMI <- abs(UNOS2$Calculated_Recipient_BMI - UNOS2$Calculated_Donor_BMI)
summary(UNOS2$absolute_difference_BMI)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##  0.00002  2.15405  4.49630  5.47910  7.72303 42.81304       58
hist(UNOS2$absolute_difference_BMI)

#scatterplots look random so I can include both variables for donor and recipiant 

#Ischemic Time (hours) 
#table(UNOS2$Ischemic_Time__hours_)
summary(UNOS2$Ischemic_Time__hours_)#missing for 482
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   4.200   5.200   5.505   6.400  25.000     482
#t- Medicial Condition (1=Intensive Care, 2=Hospitalized but not ICU, 3=Not Hospitalized)
table(UNOS2$TRR_Medical_Condition) #missing for 20
## 
##     1     2     3 
##  3971  3130 25463
UNOS2$TRR_Medical_Condition <- as.character(UNOS2$TRR_Medical_Condition)


#Race variables missing from the dataset, only ethnicity (should be noted in the discussion)

#Type of disease
UNOS2 <- UNOS2 %>% 
  mutate(disease2 = ifelse(Thoracic_Diagnosis_from_TRR_TCR %in% c(1604, 1613, 420, 1605, 1521, 219, 1616, 1617, 1609, 438, 447, 403, 402, 413, 448), "restrictive",
                       ifelse(Thoracic_Diagnosis_from_TRR_TCR %in% c(1607, 1606, 1608, 1611), "obstructive",
                              ifelse(Thoracic_Diagnosis_from_TRR_TCR %in% c(1602), "cystic fibrosis ", "Other"))))
table(UNOS2$disease2)
## 
## cystic fibrosis       obstructive            Other      restrictive 
##             2750             8729             2723            18363

##Univariate analysis

table1 <- compareGroups(reject2 ~  ABO2 + TCR_RECIPIENT_GENDER + Recipient_Age + DONOR_AGE__YRS_ + absolute_difference_age + Calculated_Recipient_BMI + Calculated_Donor_BMI + absolute_difference_BMI + Ischemic_Time__hours_ + TRR_Medical_Condition + disease2, data = UNOS2)
createTable(table1)
## 
## --------Summary descriptives table by 'reject2'---------
## 
## _____________________________________________________________ 
##                                0            1       p.overall 
##                             N=30054       N=2511              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## ABO2:                                                 0.810   
##     Compatible           2212 (7.36%)  181 (7.21%)            
##     Identical            27842 (92.6%) 2330 (92.8%)           
## TCR_RECIPIENT_GENDER:                                <0.001   
##     F                    11768 (39.2%) 1120 (44.6%)           
##     M                    18286 (60.8%) 1391 (55.4%)           
## Recipient_Age             57.6 (12.2)  55.5 (13.2)   <0.001   
## DONOR_AGE__YRS_           35.1 (13.9)  35.0 (14.4)    0.756   
## absolute_difference_age   24.6 (14.7)  23.6 (14.8)   <0.001   
## Calculated_Recipient_BMI  25.6 (4.49)  25.9 (4.69)    0.004   
## Calculated_Donor_BMI      26.4 (5.58)  25.9 (5.31)   <0.001   
## absolute_difference_BMI   5.47 (4.46)  5.60 (4.31)    0.132   
## Ischemic_Time__hours_     5.49 (2.12)  5.72 (2.20)   <0.001   
## TRR_Medical_Condition:                               <0.001   
##     1                    3567 (11.9%)  404 (16.1%)            
##     2                    2861 (9.52%)  269 (10.7%)            
##     3                    23625 (78.6%) 1838 (73.2%)           
## disease2:                                            <0.001   
##     cystic fibrosis      2497 (8.31%)  253 (10.1%)            
##     obstructive          8126 (27.0%)  603 (24.0%)            
##     Other                2462 (8.19%)  261 (10.4%)            
##     restrictive          16969 (56.5%) 1394 (55.5%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

##Logistic regression

##Unadjusted

model_undj <- glm(reject2 ~  ABO2, data = UNOS2, family = binomial)
summary(model_undj )
## 
## Call:
## glm(formula = reject2 ~ ABO2, family = binomial, data = UNOS2)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -2.50316    0.07731  -32.38   <2e-16 ***
## ABO2Identical  0.02248    0.08026    0.28    0.779    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17692  on 32564  degrees of freedom
## Residual deviance: 17692  on 32563  degrees of freedom
## AIC: 17696
## 
## Number of Fisher Scoring iterations: 5
results <- data.frame(
  Odds_Ratio = exp(model_undj$coefficients),
  Lower_CI = exp(confint(model_undj )[, 1]),
  Upper_CI = exp(confint(model_undj )[, 2])
)
## Waiting for profiling to be done...
## Waiting for profiling to be done...
print(results)
##               Odds_Ratio   Lower_CI   Upper_CI
## (Intercept)    0.0818264 0.07008647 0.09491312
## ABO2Identical  1.0227325 0.87640855 1.20067215
##Adjusted

model_adj <- glm(reject2 ~  ABO2 + TCR_RECIPIENT_GENDER + Recipient_Age + DONOR_AGE__YRS_ + Calculated_Recipient_BMI + Calculated_Donor_BMI + Ischemic_Time__hours_ + TRR_Medical_Condition + disease2, data = UNOS2, family = binomial)
summary(model_adj )
## 
## Call:
## glm(formula = reject2 ~ ABO2 + TCR_RECIPIENT_GENDER + Recipient_Age + 
##     DONOR_AGE__YRS_ + Calculated_Recipient_BMI + Calculated_Donor_BMI + 
##     Ischemic_Time__hours_ + TRR_Medical_Condition + disease2, 
##     family = binomial, data = UNOS2)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.913429   0.196355  -9.745  < 2e-16 ***
## ABO2Identical            -0.012705   0.081132  -0.157    0.876    
## TCR_RECIPIENT_GENDERM    -0.213838   0.043712  -4.892 9.98e-07 ***
## Recipient_Age            -0.012769   0.002165  -5.898 3.67e-09 ***
## DONOR_AGE__YRS_           0.001016   0.001546   0.657    0.511    
## Calculated_Recipient_BMI  0.025590   0.005127   4.991 6.00e-07 ***
## Calculated_Donor_BMI     -0.017564   0.004020  -4.370 1.25e-05 ***
## Ischemic_Time__hours_     0.042134   0.009265   4.548 5.42e-06 ***
## TRR_Medical_Condition2   -0.120886   0.083844  -1.442    0.149    
## TRR_Medical_Condition3   -0.258513   0.060718  -4.258 2.07e-05 ***
## disease2obstructive       0.001114   0.101448   0.011    0.991    
## disease2Other             0.146952   0.104968   1.400    0.162    
## disease2restrictive       0.031642   0.098812   0.320    0.749    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17442  on 32025  degrees of freedom
## Residual deviance: 17258  on 32013  degrees of freedom
##   (539 observations deleted due to missingness)
## AIC: 17284
## 
## Number of Fisher Scoring iterations: 5
results2 <- data.frame(
  Odds_Ratio = exp(model_adj$coefficients),
  Lower_CI = exp(confint(model_adj )[, 1]),
  Upper_CI = exp(confint(model_adj )[, 2])
)
## Waiting for profiling to be done...
## Waiting for profiling to be done...
print(results2)
##                          Odds_Ratio  Lower_CI  Upper_CI
## (Intercept)               0.1475734 0.1003276 0.2166339
## ABO2Identical             0.9873751 0.8446654 1.1611406
## TCR_RECIPIENT_GENDERM     0.8074789 0.7412478 0.8798066
## Recipient_Age             0.9873124 0.9831472 0.9915260
## DONOR_AGE__YRS_           1.0010169 0.9979814 1.0040480
## Calculated_Recipient_BMI  1.0259200 1.0156593 1.0362788
## Calculated_Donor_BMI      0.9825898 0.9748242 0.9903058
## Ischemic_Time__hours_     1.0430345 1.0240529 1.0619327
## TRR_Medical_Condition2    0.8861354 0.7512793 1.0437510
## TRR_Medical_Condition3    0.7721988 0.6862612 0.8707250
## disease2obstructive       1.0011150 0.8211600 1.2222705
## disease2Other             1.1582978 0.9427752 1.4228765
## disease2restrictive       1.0321475 0.8510912 1.2537941