Introduction

This report presents an open-access statistical analysis of OuTSMART DSA Positive Database.

Install Packages and load their libraries

options(repos = c(CRAN = "https://cloud.r-project.org/"))

install.packages('dplyr')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('knitr')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('tinytex')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('mosaic')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('skimr')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('tidyverse')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages('ggplot2')
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages("survminer")
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
install.packages("tidyr")
## 
## The downloaded binary packages are in
##  /var/folders/x5/w2s472sd0fj2bhz916d7xzl00000gn/T//RtmpEgJBK3/downloaded_packages
library(tidyr)
library(tidyverse) 
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.4
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.2     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr) 
library(skimr) 
library(survival) 
## Warning: package 'survival' was built under R version 4.3.3
library(tinytex) 
## Warning: package 'tinytex' was built under R version 4.3.3
library(ggplot2) 
library(mosaic)
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Attaching package: 'mosaic'
## 
## The following object is masked from 'package:Matrix':
## 
##     mean
## 
## The following object is masked from 'package:skimr':
## 
##     n_missing
## 
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following object is masked from 'package:ggplot2':
## 
##     stat
## 
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## 
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
library(survminer)
## Warning: package 'survminer' was built under R version 4.3.3
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma

Load the dataset, where v5 is the data

setwd("~/Documents/ACF:PhD/OUTSMART DATA FILTERED")
kmcurve5 <- read.csv("~/Documents/ACF:PhD/OUTSMART DATA FILTERED/kmcurve5.csv")
head(kmcurve5)
##         Label DSA_End DSA_End_32 ABMR etoABMR               banff     TxAge CKD
## 1  P111557 LH       1          1    0      56                <NA> 15.192334   7
## 2 P040690 MPO       0          1    1       4          Not graded  1.207392   4
## 3 P041108 LPP       1          1    1       5 Cat 2 Acute Type II 10.171116  NA
## 4  P082010 RT       1          1    0      50                <NA>  6.956879   7
## 5  P030097 MR       1          1    1       7       Cat 2 Chronic 12.284736   2
## 6  P030104 BK       1          0    1      12 Cat 2 Acute Type II  8.265572  NA
##   CKD2 Other_exp GF_01 GF_cause ran Date.of.Randomisation    GF_Date time_to_gf
## 1    2      FSGS     0            1            13/04/2016 31/12/2020         56
## 2    4               1     ABMR   2            24/10/2014 24/12/2015         14
## 3   NA               1     ABMR   1            04/06/2015 12/05/2017         23
## 4    2       IgA     0            1            14/10/2016 31/12/2020         50
## 5    2               1     ABMR   2            31/01/2014 10/11/2017         45
## 6   NA               1     ABMR   2            05/12/2013 04/07/2016         30
##   age sex eth eth2 total_sMFI HLAClass blUPCR UPCR_End HLA.A HLA.B HLA.DRB1
## 1  50   0   2    2       9326        2    109      252     1     2        1
## 2  55   0   2    2       5326        2     17       70     1     0        1
## 3  54   0   2    2       9982        2     47      535     2     1        0
## 4  60   0   2    2       4694        2     NA       NA     1     1        1
## 5  60   0   2    2      31207     Both     NA      372     1     1        2
## 6  33   0   2    2      11711        1    155      225     1     1        0
##   HLAMM bltac tac2 tac3 tac4 etac ciclo tac aza MMF pred siro ever blGFR eGFR
## 1     4    NA   NA   12   NA    7     1   0   1   0    0    0    0    71   38
## 2     2     6    3   NA   NA   NA     0   1   0   1    0    0    0    51   12
## 3     3    NA    8    5   NA   NA     0   1   0   1    0    0    0    55   28
## 4     3     6    8    6    6    7     0   1   0   1    1    0    0    51   50
## 5     4    NA   NA    8    6    6     1   0   1   0    0    0    0    32   18
## 6     2    NA   NA    6    6   NA     0   0   0   1    1    0    0    52   22
##   TX_DSA total_eMFI Hclass_Last3
## 1      0         NA           NA
## 2      0         NA           NA
## 3      0         NA           NA
## 4      0         NA           NA
## 5      0      29459           NA
## 6      0      12232           NA
kmcurve5 -> v5

Display the two main groups by the presscence or abscene of DSA at the end of the study (DSA_End) (DSA+/+ (1) and DSA+/- (0))

table(v5$DSA_End)
## 
##  0  1 
## 76 39
v5$DSA_End <-as.factor(v5$DSA_End)

Determine mean age at randomisation (age, numeric) and age range in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run T-test to determine any statistical differences between groups

v5%>%
  group_by(DSA_End)%>%
  skim(age)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 0 1 57.14 12.35 27 48.75 57.5 65.25 80 ▂▆▇▇▆
age 1 0 1 53.67 14.33 27 42.00 55.0 64.00 78 ▆▂▇▇▅
v5%>%
  t.test(age ~ DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  age by DSA_End
## t = 1.2893, df = 67.519, p-value = 0.2017
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -1.905704  8.861845
## sample estimates:
## mean in group 0 mean in group 1 
##        57.14474        53.66667

Determine percentage of men(sex, where 0=male and 1=female) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(sex, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## sex        0        1
##   0 77.63158 76.92308
##   1 22.36842 23.07692
v5%>%
  dplyr::select(sex,DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 2.5842e-30, df = 1, p-value = 1

Determine the number and percentage of each ethnicity(eth, where 0=Asian and 1=Black 2= White 3= Mixed and 4= Other ) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(eth, DSA_End)%>%
  table()
##    DSA_End
## eth  0  1
##   0  8  5
##   1 15  5
##   2 51 29
##   3  1  0
##   4  1  0
v5%>%
  dplyr::select(eth, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## eth         0         1
##   0 10.526316 12.820513
##   1 19.736842 12.820513
##   2 67.105263 74.358974
##   3  1.315789  0.000000
##   4  1.315789  0.000000
v5%>%
  dplyr::select(eth, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 2.0502, df = 4, p-value = 0.7265
v5%>%
  dplyr::select(eth, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.8569
## alternative hypothesis: two.sided

##determine mean age of transplant (Tx age, numerical) at randomisation by DSA Category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run t-test to determine statistical significance

v5%>%
  group_by(DSA_End)%>%
  skim(TxAge)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TxAge 0 1 0.99 9.01 7.23 1.08 2.88 7.27 12.25 30.75 ▇▆▁▂▁
TxAge 1 1 0.97 11.92 8.63 1.09 4.37 9.91 19.27 31.15 ▇▃▂▃▂
v5%>%
  t.test(TxAge ~DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  TxAge by DSA_End
## t = -1.7869, df = 63.952, p-value = 0.07869
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -6.1663905  0.3435564
## sample estimates:
## mean in group 0 mean in group 1 
##        9.009281       11.920698

Determine cause of CKD (CKD2 where 1= DM, 2= GN 3= PKD 4=HTN 5=Congenital 6=obsrtuctive 7=other) by DSA Category (DSA_End where (DSA+/+ (1), DSA+/- (0)) run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(CKD2, DSA_End)%>%
  table()
##     DSA_End
## CKD2  0  1
##    1  4  2
##    2 22 13
##    3 10  4
##    4  9  2
##    5  9  2
##    6  2  1
##    7 13  7
v5%>%
  dplyr::select(CKD2, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##     DSA_End
## CKD2         0         1
##    1  5.797101  6.451613
##    2 31.884058 41.935484
##    3 14.492754 12.903226
##    4 13.043478  6.451613
##    5 13.043478  6.451613
##    6  2.898551  3.225806
##    7 18.840580 22.580645
v5%>%
  dplyr::select(CKD2, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 2.5185, df = 6, p-value = 0.8664

Determine number and percentage of patients who are on prednisolone (pred, where 0=not taking pred 1=taking pred ) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(pred, DSA_End)%>%
  table()
##     DSA_End
## pred  0  1
##    0 31 19
##    1 45 20
v5%>%
  dplyr::select(pred, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##     DSA_End
## pred        0        1
##    0 40.78947 48.71795
##    1 59.21053 51.28205
v5%>%
  dplyr::select(pred, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.37613, df = 1, p-value = 0.5397

Determine percentage of patients who are on tacrolimus(tac where 0=not taking tac 1=taking tac ) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square to determine any statistical differences between groups

v5%>%
  dplyr::select(tac, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## tac        0        1
##   0 35.52632 53.84615
##   1 64.47368 46.15385
v5%>%
  dplyr::select(tac, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 2.8437, df = 1, p-value = 0.09173

Determine percentage of patients who are on MMF(MMF, where 0=not taking MMF 1=taking MMF) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(MMF, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## MMF        0        1
##   0 36.84211 41.02564
##   1 63.15789 58.97436
v5%>%
  dplyr::select(MMF, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.054923, df = 1, p-value = 0.8147

Determine percentage of patients who are on Ciclosporin(ciclo, where 0=not taking ciclo 1=taking ciclo) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(ciclo, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##      DSA_End
## ciclo        0        1
##     0 76.31579 66.66667
##     1 23.68421 33.33333
v5%>%
  dplyr::select(ciclo, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.77795, df = 1, p-value = 0.3778

Determine percentage of patients who are on Azathioprine(aza where 0=not taking azathioprine 1=taking azathioprine) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(aza, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## aza        0        1
##   0 78.94737 69.23077
##   1 21.05263 30.76923
v5%>%
  dplyr::select(aza, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.84622, df = 1, p-value = 0.3576

Determine percentage of patients who are on Sirolimus(siro, where 0=not taking sirolimus 1=taking sirolimus) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(siro, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##     DSA_End
## siro         0         1
##    0 96.052632 92.307692
##    1  3.947368  7.692308
v5%>%
  dplyr::select(siro, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.1698, df = 1, p-value = 0.6803
v5%>%
  dplyr::select(siro, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.4061
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.2568703 15.7980616
## sample estimates:
## odds ratio 
##   2.014436

Determine mean Tacrolimus level at baseline (bltac, numeric) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run T-test to determine any statistical differences between groups

v5%>%
  group_by(DSA_End)%>%
  skim(bltac)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bltac 0 28 0.63 5.96 2.32 2 4 6 7 12 ▆▇▅▂▁
bltac 1 24 0.38 6.60 2.95 2 5 6 7 14 ▃▇▅▁▁
v5%>%
  t.test(bltac ~ DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  bltac by DSA_End
## t = -0.77158, df = 19.749, p-value = 0.4495
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -2.377819  1.094485
## sample estimates:
## mean in group 0 mean in group 1 
##        5.958333        6.600000

Determine mean Tacrolimus level at end of study (etac, numeric) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run T-test to determine any statistical differences between groups

v5%>%
  group_by(DSA_End)%>%
  skim(etac)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
etac 0 25 0.67 6.73 2.77 3 5 6 8 15 ▇▇▅▁▂
etac 1 16 0.59 6.70 1.89 4 6 6 7 12 ▃▇▁▂▁
v5%>%
  t.test(etac ~ DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  etac by DSA_End
## t = 0.053894, df = 60.295, p-value = 0.9572
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -1.077498  1.137174
## sample estimates:
## mean in group 0 mean in group 1 
##        6.725490        6.695652

Determine baseline eGFR (bGFR, numeric) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and perform t.test to determine statistical significance

v5%>%
  group_by(DSA_End)%>%
  skim(blGFR)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
blGFR 0 0 1 53.46 16.23 22 41 53 63.5 90 ▂▇▇▃▂
blGFR 1 0 1 52.87 15.33 30 41 51 61.0 93 ▇▇▅▂▂
v5%>%
  t.test(blGFR ~ DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  blGFR by DSA_End
## t = 0.19108, df = 80.759, p-value = 0.8489
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -5.541816  6.719279
## sample estimates:
## mean in group 0 mean in group 1 
##        53.46053        52.87179

Determine baseline UPCR (blUPCR) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and perform t.test to determine statistical significance

v5%>%
  group_by(DSA_End)%>%
  skim(blUPCR)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
blUPCR 0 16 0.79 32.15 41.56 5 10.75 19.5 35.25 240 ▇▁▁▁▁
blUPCR 1 12 0.69 78.63 126.90 3 20.00 35.0 56.50 607 ▇▁▁▁▁
v5%>%
  t.test(blUPCR ~ DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  blUPCR by DSA_End
## t = -1.8588, df = 28.541, p-value = 0.0734
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -97.656635   4.697375
## sample estimates:
## mean in group 0 mean in group 1 
##        32.15000        78.62963

Determine number of HLA mismatches (HLAMM where 0= 0 mismatches, 1= 1 HLA mismatch, 2= 2 HLA mismatches, 3= 2 HLA mismatches, 4= 4 HLA mismatches, 5 = 5 HLA mismatches, 6= 6 HLA mismatches) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and perform chisquared/fisher analysis to determine statistical significance

v5%>%
  dplyr::select(HLAMM, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##      DSA_End
## HLAMM         0         1
##     0  3.947368  5.128205
##     1  3.947368  5.128205
##     2 26.315789 17.948718
##     3 26.315789 25.641026
##     4 17.105263 25.641026
##     5 10.526316 15.384615
##     6 11.842105  5.128205
v5%>%
  dplyr::select(HLAMM, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 3.5916, df = 6, p-value = 0.7317
v5%>%
  dplyr::select(HLAMM, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.721
## alternative hypothesis: two.sided

Determine number of HLA class II Mismatches (HLA.DRB1 where 0= 0 mismatches, 1= 1 HLA mismatch, 2= 2 HLA mismatches) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and perform chisquared/fisher analysis to determine statistical significance

v5%>%
  dplyr::select(HLA.DRB1, DSA_End)%>%
  table()
##         DSA_End
## HLA.DRB1  0  1
##        0 22  7
##        1 38 27
##        2 16  5
v5%>%
  dplyr::select(HLA.DRB1, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##         DSA_End
## HLA.DRB1        0        1
##        0 28.94737 17.94872
##        1 50.00000 69.23077
##        2 21.05263 12.82051
v5%>%
  dplyr::select(HLA.DRB1, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 3.8793, df = 2, p-value = 0.1438

Determine DSA positive at time of transplantation (TxDSA, where 0= absent DSA at transplantation, 1= pre-formed DSA at time of transplant) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and perform chisquared/fisher analysis to determine statistical significance

v5%>%
  dplyr::select(TX_DSA, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##       DSA_End
## TX_DSA        0        1
##      0 89.47368 87.17949
##      1 10.52632 12.82051
v5%>%
  dplyr::select(TX_DSA, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.0032259, df = 1, p-value = 0.9547
v5%>%
  dplyr::select(TX_DSA, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.7602
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.2975148 4.7180258
## sample estimates:
## odds ratio 
##   1.247485

Determine the percentage of patients who were randomised to the biomarker led group and standard of care group in the OuTSMART study(ran where 1= Biomarker led and 2= Standard of care) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square analysis to determine any statistical differences between groups

v5%>%
  dplyr::select(ran, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##    DSA_End
## ran        0        1
##   1 51.31579 53.84615
##   2 48.68421 46.15385
v5%>%
  dplyr::select(ran, DSA_End)%>%
  table()%>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.0036007, df = 1, p-value = 0.9522

Determine the HLA class at baseline(HLAClass where 1= HLA Class I 2= HLA clas II and Both = Both HLA I and II) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher test

v5%>%
  dplyr::select(HLAClass, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##         DSA_End
## HLAClass         0         1
##     1    42.105263 17.948718
##     2    53.947368 74.358974
##     Both  3.947368  7.692308
v5%>%
  dplyr::select(HLAClass, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 6.8919, df = 2, p-value = 0.03188
v5%>%
  dplyr::select(HLAClass, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.02204
## alternative hypothesis: two.sided

Determine the mean and range of total MFI at baseline(total_sMFI) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run t.test analysis to determine statistical significance

v5%>%
  group_by(DSA_End)%>%
  skim(total_sMFI)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total_sMFI 0 0 1 6518.33 6249.12 2082 2943.5 4370.5 6774.25 37857 ▇▂▁▁▁
total_sMFI 1 0 1 11567.74 7091.87 2574 7413.0 9531.0 12813.50 33200 ▆▇▂▁▁
v5%>%
  t.test(total_sMFI ~DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  total_sMFI by DSA_End
## t = -3.76, df = 68.782, p-value = 0.000353
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -7728.624 -2370.205
## sample estimates:
## mean in group 0 mean in group 1 
##        6518.329       11567.744

Determine the median category of total MFI at baseline(total_sMFI) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and plot to look at patterns

v5%>%
  group_by(DSA_End)%>%
  summarise(median_sMFI = median(total_sMFI, na.rm = TRUE))
## # A tibble: 2 × 2
##   DSA_End median_sMFI
##   <fct>         <dbl>
## 1 0             4370.
## 2 1             9531
ggplot(v5, aes(x = factor(DSA_End), y = total_sMFI)) +
  geom_boxplot(fill = "skyblue", alpha = 0.6) +
  geom_jitter(width = 0.2, alpha = 0.4, color = "darkblue") +
  labs(
    x = "DSA Category",
    y = "Total MFI at baseline",
    title = "Distribution of total MFI at baseline (DSA+/+ vs DSA+/-)"
  ) +scale_x_discrete(labels = c("0" = "DSA+/-", "1" = "DSA+/+"))+
  theme_minimal()

Determine mean total mean MFI post enrollment

v5%>%
  skim(total_eMFI)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total_eMFI 99 0.14 19208 12078.63 5158 11030.5 14067 23646.5 46226 ▇▅▂▂▁

Determine the number and percentage of patients who had biopsy proven antibody mediated rejection on their biopsies(ABMR where 1= biopsy proven rejection 0= no rejection) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher test

v5%>%
  dplyr::select(ABMR, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##     DSA_End
## ABMR         0         1
##    0 97.368421 84.615385
##    1  2.631579 15.384615
v5%>%
  dplyr::select(HLAClass, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 6.8919, df = 2, p-value = 0.03188
v5%>%
  dplyr::select(HLAClass, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.02204
## alternative hypothesis: two.sided

Determine banff criteria (banff- using Banff Criteria 2007- please refer to references for catergorisation) of each incidence of biopsy proveb ABMR by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher test

v5%>%
  dplyr::select(banff, DSA_End)%>%
  table()
##                      DSA_End
## banff                 0 1
##   Cat 2 Acute Type II 0 3
##   Cat 2 Chronic       1 3
##   Not graded          1 0
v5%>%
  dplyr::select(banff, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 4, df = 2, p-value = 0.1353
v5%>%
  dplyr::select(banff, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.3571
## alternative hypothesis: two.sided

## Determine the mean and range of end of study eGFR (eGFR, numerical) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run t.test analysis

v5%>%
  group_by(DSA_End)%>%
  skim(eGFR)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
eGFR 0 0 1 46.63 18.63 9 35.5 46 58.25 98 ▂▆▇▂▁
eGFR 1 0 1 43.03 16.68 13 31.5 42 56.50 79 ▅▇▅▆▂
v5%>%
  t.test(eGFR ~DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  eGFR by DSA_End
## t = 1.054, df = 84.655, p-value = 0.2949
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -3.196642 10.408518
## sample estimates:
## mean in group 0 mean in group 1 
##        46.63158        43.02564

Determine end of study UPCR (UPCR_End, numerical) in each DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run t.test analysis

v5%>%
  group_by(DSA_End)%>%
  skim(UPCR_End)
Data summary
Name Piped data
Number of rows 115
Number of columns 45
_______________________
Column type frequency:
numeric 1
________________________
Group variables DSA_End

Variable type: numeric

skim_variable DSA_End n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
UPCR_End 0 21 0.72 85.31 205.83 7 15.5 25 67 1172 ▇▁▁▁▁
UPCR_End 1 14 0.64 181.48 217.53 3 27.0 59 252 726 ▇▂▁▁▁
v5%>%
  t.test(UPCR_End ~DSA_End, data =.)
## 
##  Welch Two Sample t-test
## 
## data:  UPCR_End by DSA_End
## t = -1.8636, df = 44.253, p-value = 0.06902
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -200.155384    7.813566
## sample estimates:
## mean in group 0 mean in group 1 
##        85.30909       181.48000

Determine the number and percentage of patients who had graft failure by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher test

v5%>%
  dplyr::select(GF_01, DSA_End)%>%
  table()
##      DSA_End
## GF_01  0  1
##     0 71 31
##     1  5  8
v5%>%
  dplyr::select(GF_01, DSA_End)%>%
  table()%>%
  proportions(margin = 2)*100
##      DSA_End
## GF_01         0         1
##     0 93.421053 79.487179
##     1  6.578947 20.512821
v5%>%
  dplyr::select(GF_01, DSA_End)%>%
  table()%>%
  chisq.test()
## Warning in chisq.test(.): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 3.6979, df = 1, p-value = 0.05448
v5%>%
  dplyr::select(GF_01, DSA_End)%>%
  table()%>%
  fisher.test()
## 
##  Fisher's Exact Test for Count Data
## 
## data:  .
## p-value = 0.03297
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.9556008 15.2563590
## sample estimates:
## odds ratio 
##   3.618659

Determine the cause of graft failure(GF_cause) by DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)) and run chi square/fisher test

v5 %>%
  dplyr::filter(GF_01 == 1) %>%
  dplyr::select(GF_cause, DSA_End) %>%
  table()
##                    DSA_End
## GF_cause            0 1
##   ABMR              3 3
##   Acute rejection   0 1
##   Chronic rejection 1 2
##   Mixed rejection   0 1
##   Unknown           1 1
v5 %>%
  dplyr::filter(GF_01 == 1) %>%
  dplyr::select(GF_cause, DSA_End) %>%
  table()%>%
  proportions(margin = 2)*100
##                    DSA_End
## GF_cause               0    1
##   ABMR              60.0 37.5
##   Acute rejection    0.0 12.5
##   Chronic rejection 20.0 25.0
##   Mixed rejection    0.0 12.5
##   Unknown           20.0 12.5

Survival Analysis (categorical) or Logistic regression(numerical) and Cox Hazard Ratio’s for biopsy proven Antibody mediated rejection (ABMR, 0=no ABMR, 1=ABMR; etoABMR- time to ABMR/end of randomisation) - factors analysed were : DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)), HLA class at enrollment (HLAClass where 1= HLA Class I 2= HLA clas II and Both = Both HLA I and II), MFI category at enrollment (MFI 2,000-10,000, >10,000), Baseline urine protein creatinine ratio (bUPCR, numeric) and transplant age at ransomisation (TxAge).

v5$ABMR <-as.numeric(v5$ABMR)
v5$etoABMR <as.numeric(v5$etoABMR)
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#Survival analysis and cox hazard ratio for ABMR by DSA_End
surv_ABMR_DSA_end <- survfit(Surv(etoABMR, ABMR) ~ DSA_End, data = v5)

lr_DSA_end <-survdiff(Surv(etoABMR, ABMR)~DSA_End, data = v5)
lr_DSA_end
## Call:
## survdiff(formula = Surv(etoABMR, ABMR) ~ DSA_End, data = v5)
## 
##            N Observed Expected (O-E)^2/E (O-E)^2/V
## DSA_End=0 76        2     5.34      2.09       6.3
## DSA_End=1 39        6     2.66      4.20       6.3
## 
##  Chisq= 6.3  on 1 degrees of freedom, p= 0.01
cox_ABMR_DSA_end <- coxph(Surv(etoABMR, ABMR) ~ DSA_End, data = v5)
summary(cox_ABMR_DSA_end)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ DSA_End, data = v5)
## 
##   n= 115, number of events= 8 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1 1.8000    6.0496   0.8166 2.204   0.0275 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1      6.05     0.1653     1.221     29.98
## 
## Concordance= 0.708  (se = 0.08 )
## Likelihood ratio test= 5.86  on 1 df,   p=0.02
## Wald test            = 4.86  on 1 df,   p=0.03
## Score (logrank) test = 6.32  on 1 df,   p=0.01
#Survival analysis and cox hazard ratio for ABMR by HLA class

surv_ABMR_HLAc <-survfit(Surv(etoABMR, ABMR) ~ HLAClass, data = v5)
lr_ABMR_HLAc <-survdiff(Surv(etoABMR, ABMR)~HLAClass, data = v5)
lr_ABMR_HLAc
## Call:
## survdiff(formula = Surv(etoABMR, ABMR) ~ HLAClass, data = v5)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## HLAClass=1    39        1    2.787   1.14570    1.7625
## HLAClass=2    70        5    4.829   0.00608    0.0154
## HLAClass=Both  6        2    0.384   6.78852    7.1514
## 
##  Chisq= 8  on 2 degrees of freedom, p= 0.02
cox_ABMR_HLAc <-coxph(Surv(etoABMR, ABMR) ~ HLAClass, data = v5)
summary(cox_ABMR_HLAc)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ HLAClass, data = v5)
## 
##   n= 115, number of events= 8 
## 
##                coef exp(coef) se(coef)     z Pr(>|z|)  
## HLAClass2     1.058     2.882    1.095 0.966   0.3340  
## HLAClassBoth  2.688    14.705    1.226 2.193   0.0283 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## HLAClass2        2.882    0.34700    0.3367     24.67
## HLAClassBoth    14.705    0.06801    1.3309    162.47
## 
## Concordance= 0.674  (se = 0.08 )
## Likelihood ratio test= 4.93  on 2 df,   p=0.08
## Wald test            = 5.81  on 2 df,   p=0.05
## Score (logrank) test = 8.05  on 2 df,   p=0.02
#Survival analysis and cox hazard ratio for ABMR by MFI categories

v5$total_sMFI<-as.numeric(v5$total_sMFI)
v5 <- v5 %>%
  mutate(MFI_cat = case_when(
    total_sMFI >= 2000 & total_sMFI <= 10000 ~ "2000-10000",
    total_sMFI > 10000 ~ ">10000",
    TRUE ~ NA_character_  # Assigns NA to values outside the defined ranges
  ))

v5$MFI_cat <-as.factor(v5$MFI_cat)
table(v5$MFI_cat)
## 
##     >10000 2000-10000 
##         32         83
v5$MFI_cat <- relevel(factor(v5$MFI_cat), ref = "2000-10000")

survdMFIcat <- survfit(Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)

lr_MFIcat <-survdiff(Surv(time_to_gf, GF_01)~MFI_cat, data = v5)
lr_MFIcat
## Call:
## survdiff(formula = Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
## 
##                     N Observed Expected (O-E)^2/E (O-E)^2/V
## MFI_cat=2000-10000 83        8     9.34     0.193     0.686
## MFI_cat=>10000     32        5     3.66     0.493     0.686
## 
##  Chisq= 0.7  on 1 degrees of freedom, p= 0.4
cox_GFMFI_cat <- coxph(Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
summary(cox_GFMFI_cat)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
## 
##   n= 115, number of events= 13 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)
## MFI_cat>10000 0.4679    1.5966   0.5702 0.821    0.412
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## MFI_cat>10000     1.597     0.6263    0.5222     4.881
## 
## Concordance= 0.554  (se = 0.069 )
## Likelihood ratio test= 0.64  on 1 df,   p=0.4
## Wald test            = 0.67  on 1 df,   p=0.4
## Score (logrank) test = 0.69  on 1 df,   p=0.4
#Logistic reg and Cox Analysis - Baseline uPCR and ABMR

logit_ABMRbUPCR <-glm(ABMR~blUPCR, family = binomial, data = v5)
summary(logit_ABMRbUPCR)
## 
## Call:
## glm(formula = ABMR ~ blUPCR, family = binomial, data = v5)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.6122542  0.4881060  -5.352 8.71e-08 ***
## blUPCR       0.0002029  0.0050987   0.040    0.968    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 43.666  on 86  degrees of freedom
## Residual deviance: 43.665  on 85  degrees of freedom
##   (28 observations deleted due to missingness)
## AIC: 47.665
## 
## Number of Fisher Scoring iterations: 5
cox_ABMR_blUPCR <- coxph(Surv(etoABMR, ABMR) ~ log(blUPCR), data = v5)
summary(cox_ABMR_blUPCR)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ log(blUPCR), data = v5)
## 
##   n= 87, number of events= 6 
##    (28 observations deleted due to missingness)
## 
##               coef exp(coef) se(coef)     z Pr(>|z|)
## log(blUPCR) 0.2063    1.2291   0.3826 0.539     0.59
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## log(blUPCR)     1.229     0.8136    0.5806     2.602
## 
## Concordance= 0.572  (se = 0.105 )
## Likelihood ratio test= 0.28  on 1 df,   p=0.6
## Wald test            = 0.29  on 1 df,   p=0.6
## Score (logrank) test = 0.29  on 1 df,   p=0.6
#Logistic reg and Cox Analysis - Tx Age and ABMR

logit_ABMR_TxAge <-glm(ABMR~TxAge, family = binomial, data = v5)
summary(logit_ABMR_TxAge)
## 
## Call:
## glm(formula = ABMR ~ TxAge, family = binomial, data = v5)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.639360   0.602281  -4.382 1.17e-05 ***
## TxAge        0.006385   0.046266   0.138     0.89    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 57.787  on 112  degrees of freedom
## Residual deviance: 57.768  on 111  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 61.768
## 
## Number of Fisher Scoring iterations: 5
cox_ABMR_TxAge <-coxph(Surv(etoABMR, ABMR) ~ log(TxAge), data = v5)
summary(cox_ABMR_TxAge)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ log(TxAge), data = v5)
## 
##   n= 113, number of events= 8 
##    (2 observations deleted due to missingness)
## 
##                 coef exp(coef)  se(coef)      z Pr(>|z|)
## log(TxAge) -0.007032  0.992993  0.380254 -0.018    0.985
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## log(TxAge)     0.993      1.007    0.4713     2.092
## 
## Concordance= 0.479  (se = 0.106 )
## Likelihood ratio test= 0  on 1 df,   p=1
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 0  on 1 df,   p=1

Survival Analysis (categorical) or Logistic regression(numerical) and Cox Hazard Ratio’s for Graft failure(GF_01, 0=no Graft failure, 1= graft failure; time_to_gf- time to GF/end of randomisation) - factors analysed were : DSA category (DSA_End where (DSA+/+ (1), DSA+/- (0)), HLA class at enrollment (HLAClass where 1= HLA Class I 2= HLA clas II and Both = Both HLA I and II), MFI category at enrollment (MFI 2,000-10,000, >10,000), Baseline urine protein creatinine ratio (bUPCR, numeric) and transplant age at ransomisation (TxAge).

v5$GF_01<-as.character(v5$GF_01)
v5$GF_01 <-as.numeric(v5$GF_01)
v5$time_to_gf <- as.numeric(v5$time_to_gf)
v5$DSA_End <- as.factor(v5$DSA_End)

#Survival analysis and cox hazard ratio for GF by DSA Category
survdDSA_end <- survfit(Surv(time_to_gf, GF_01) ~ DSA_End, data = v5)

lr_DSA_end <-survdiff(Surv(time_to_gf, GF_01)~DSA_End, data = v5)
lr_DSA_end
## Call:
## survdiff(formula = Surv(time_to_gf, GF_01) ~ DSA_End, data = v5)
## 
##            N Observed Expected (O-E)^2/E (O-E)^2/V
## DSA_End=0 76        5     8.84      1.67      5.21
## DSA_End=1 39        8     4.16      3.53      5.21
## 
##  Chisq= 5.2  on 1 degrees of freedom, p= 0.02
cox_GFDSA_end <- coxph(Surv(time_to_gf, GF_01) ~ DSA_End, data = v5)
summary(cox_GFDSA_end)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ DSA_End, data = v5)
## 
##   n= 115, number of events= 13 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1 1.2240    3.4007   0.5705 2.146   0.0319 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1     3.401     0.2941     1.112      10.4
## 
## Concordance= 0.643  (se = 0.07 )
## Likelihood ratio test= 4.76  on 1 df,   p=0.03
## Wald test            = 4.6  on 1 df,   p=0.03
## Score (logrank) test = 5.21  on 1 df,   p=0.02
#Survival analysis and cox hazard ratio for MFI category

survdMFIcat <- survfit(Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)

lr_MFIcat <-survdiff(Surv(time_to_gf, GF_01)~MFI_cat, data = v5)
lr_MFIcat
## Call:
## survdiff(formula = Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
## 
##                     N Observed Expected (O-E)^2/E (O-E)^2/V
## MFI_cat=2000-10000 83        8     9.34     0.193     0.686
## MFI_cat=>10000     32        5     3.66     0.493     0.686
## 
##  Chisq= 0.7  on 1 degrees of freedom, p= 0.4
cox_GFMFI_cat <- coxph(Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
summary(cox_GFMFI_cat)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ MFI_cat, data = v5)
## 
##   n= 115, number of events= 13 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)
## MFI_cat>10000 0.4679    1.5966   0.5702 0.821    0.412
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## MFI_cat>10000     1.597     0.6263    0.5222     4.881
## 
## Concordance= 0.554  (se = 0.069 )
## Likelihood ratio test= 0.64  on 1 df,   p=0.4
## Wald test            = 0.67  on 1 df,   p=0.4
## Score (logrank) test = 0.69  on 1 df,   p=0.4
#Survival analysis and cox hazard ratio for MFI category for GF and HLA class

survA_Hclass <- survfit(Surv(time_to_gf, GF_01) ~ HLAClass, data = v5)

lr_Hclass <-survdiff(Surv(time_to_gf, GF_01)~HLAClass, data = v5)
lr_Hclass
## Call:
## survdiff(formula = Surv(time_to_gf, GF_01) ~ HLAClass, data = v5)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## HLAClass=1    39        4     4.56   0.06837   0.10540
## HLAClass=2    70        8     7.83   0.00363   0.00913
## HLAClass=Both  6        1     0.61   0.24883   0.26143
## 
##  Chisq= 0.3  on 2 degrees of freedom, p= 0.9
cox_GFHClass <- coxph(Surv(time_to_gf, GF_01) ~ HLAClass, data = v5)
summary(cox_GFHClass)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ HLAClass, data = v5)
## 
##   n= 115, number of events= 13 
## 
##                coef exp(coef) se(coef)     z Pr(>|z|)
## HLAClass2    0.1522    1.1644   0.6126 0.248    0.804
## HLAClassBoth 0.6257    1.8695   1.1193 0.559    0.576
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## HLAClass2        1.164     0.8588    0.3505     3.869
## HLAClassBoth     1.869     0.5349    0.2084    16.768
## 
## Concordance= 0.544  (se = 0.07 )
## Likelihood ratio test= 0.28  on 2 df,   p=0.9
## Wald test            = 0.31  on 2 df,   p=0.9
## Score (logrank) test = 0.32  on 2 df,   p=0.9
#Logistic reg and Cox Analysis - Baseline uPCR and GF

logit_GFbUPCR <-glm(GF_01~blUPCR, family = binomial, data = v5)
summary(logit_GFbUPCR)
## 
## Call:
## glm(formula = GF_01 ~ blUPCR, family = binomial, data = v5)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.561048   0.435583  -5.880 4.11e-09 ***
## blUPCR       0.006422   0.003354   1.915   0.0555 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 57.871  on 86  degrees of freedom
## Residual deviance: 53.783  on 85  degrees of freedom
##   (28 observations deleted due to missingness)
## AIC: 57.783
## 
## Number of Fisher Scoring iterations: 5
cox_blUPCR <- coxph(Surv(time_to_gf, GF_01) ~ log(blUPCR), data = v5)
summary(cox_blUPCR)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ log(blUPCR), data = v5)
## 
##   n= 87, number of events= 9 
##    (28 observations deleted due to missingness)
## 
##               coef exp(coef) se(coef)     z Pr(>|z|)  
## log(blUPCR) 0.6481    1.9120   0.2762 2.347   0.0189 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## log(blUPCR)     1.912      0.523     1.113     3.285
## 
## Concordance= 0.722  (se = 0.068 )
## Likelihood ratio test= 4.91  on 1 df,   p=0.03
## Wald test            = 5.51  on 1 df,   p=0.02
## Score (logrank) test = 5.82  on 1 df,   p=0.02
#Logistic reg and Cox Analysis - Baseline uPCR and Tx Age

logit_GFTxAge <-glm(GF_01~TxAge, family = binomial, data = v5)
summary(logit_GFTxAge)
## 
## Call:
## glm(formula = GF_01 ~ TxAge, family = binomial, data = v5)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.976808   0.476600  -4.148 3.36e-05 ***
## TxAge       -0.006445   0.038633  -0.167    0.868    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 80.667  on 112  degrees of freedom
## Residual deviance: 80.639  on 111  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 84.639
## 
## Number of Fisher Scoring iterations: 4
cox_TxAge <- coxph(Surv(time_to_gf, GF_01) ~ log(TxAge), data = v5)
summary(cox_TxAge)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ log(TxAge), data = v5)
## 
##   n= 113, number of events= 13 
##    (2 observations deleted due to missingness)
## 
##              coef exp(coef) se(coef)     z Pr(>|z|)
## log(TxAge) 0.1357    1.1453   0.3070 0.442    0.659
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## log(TxAge)     1.145     0.8731    0.6275     2.091
## 
## Concordance= 0.527  (se = 0.071 )
## Likelihood ratio test= 0.2  on 1 df,   p=0.7
## Wald test            = 0.2  on 1 df,   p=0.7
## Score (logrank) test = 0.2  on 1 df,   p=0.7

Sensitivity Analysis for ABMR ((ABMR, 0=no ABMR, 1=ABMR; etoABMR- time to ABMR/end of randomisation)

#Sensitivity Analysis for ABMR excluding those did not have 32-month HLA sample (DSA_End_32, where 1= had sample, 0= no sample at 32 months)

v5$DSA_End_32<-as.factor(v5$DSA_End_32)
table(v5$DSA_End_32)
## 
##   0   1 
##  15 100
v5_sens <- v5[v5$DSA_End_32 == 1, ]

cox_sens_DSA32 <-coxph(Surv(etoABMR, ABMR) ~ DSA_End, data = v5_sens)
summary(cox_sens_DSA32)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ DSA_End, data = v5_sens)
## 
##   n= 100, number of events= 5 
## 
##           coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1 2.074     7.959    1.118 1.855   0.0636 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1     7.959     0.1256    0.8894     71.22
## 
## Concordance= 0.733  (se = 0.093 )
## Likelihood ratio test= 4.57  on 1 df,   p=0.03
## Wald test            = 3.44  on 1 df,   p=0.06
## Score (logrank) test = 4.87  on 1 df,   p=0.03
#Sensitivity Analysis for ABMR excluding those who had pre-formed DSA during Transplantation (TX_DSA where 1= preformed DSA present at time of transplant, 0= no DSA present at time of transplant)

v5$TX_DSA <-as.factor(v5$TX_DSA)
table(v5$TX_DSA)
## 
##   0   1 
## 102  13
v5_sens_TX <- v5[v5$TX_DSA == 0, ]

cox_sens_ABMR_Tx <- coxph(Surv(etoABMR, ABMR) ~ DSA_End, data = v5_sens_TX)
summary(cox_sens_ABMR_Tx)
## Call:
## coxph(formula = Surv(etoABMR, ABMR) ~ DSA_End, data = v5_sens_TX)
## 
##   n= 102, number of events= 7 
## 
##           coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1  2.54     12.68     1.08 2.351   0.0187 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1     12.68    0.07887     1.526     105.4
## 
## Concordance= 0.769  (se = 0.072 )
## Likelihood ratio test= 8.66  on 1 df,   p=0.003
## Wald test            = 5.53  on 1 df,   p=0.02
## Score (logrank) test = 9.21  on 1 df,   p=0.002

Sensitivity Analysis for Graft Failure (GF_01, 0=no Graft failure, 1= graft failure; time_to_gf- time to GF/end of randomisation)

#Sensitivity Analysis for GF excluding those did not have 32-month HLA sample (DSA_End_32, where 1= had sample, 0= no sample at 32 months). Dataframe v5_sens created above which represents data excluding those without month sample

cox_sens_Tx <-coxph(Surv(time_to_gf, GF_01) ~ DSA_End, data = v5_sens_TX)
summary(cox_sens_Tx)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ DSA_End, data = v5_sens_TX)
## 
##   n= 102, number of events= 10 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1 1.6711    5.3179   0.6909 2.419   0.0156 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1     5.318      0.188     1.373      20.6
## 
## Concordance= 0.705  (se = 0.073 )
## Likelihood ratio test= 6.57  on 1 df,   p=0.01
## Wald test            = 5.85  on 1 df,   p=0.02
## Score (logrank) test = 7.33  on 1 df,   p=0.007
#Sensitivity Analysis for ABMR excluding those who had pre-formed DSA during Transplantation (TX_DSA where 1= preformed DSA present at time of transplant, 0= no DSA present at time of transplant). Dataframe v5_sens_TX created which exclused those who had preformed DSA at time of transplant)

cox_sens_Tx <-coxph(Surv(time_to_gf, GF_01) ~ DSA_End, data = v5_sens_TX)
summary(cox_sens_Tx)
## Call:
## coxph(formula = Surv(time_to_gf, GF_01) ~ DSA_End, data = v5_sens_TX)
## 
##   n= 102, number of events= 10 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)  
## DSA_End1 1.6711    5.3179   0.6909 2.419   0.0156 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## DSA_End1     5.318      0.188     1.373      20.6
## 
## Concordance= 0.705  (se = 0.073 )
## Likelihood ratio test= 6.57  on 1 df,   p=0.01
## Wald test            = 5.85  on 1 df,   p=0.02
## Score (logrank) test = 7.33  on 1 df,   p=0.007

Survival plot for GF by DSA category

MFI plot - load new dataset

MFIplot <- read.csv("~/Documents/ACF:PhD/OUTSMART DATA FILTERED/MFIplot.csv")
head(MFIplot)
##   Patient.ID
## 1    P111557
## 2   P041108 
## 3   P082010 
## 4    P030097
## 5    P030104
## 6   P030601 
##   DSA.Status.at.End..0..DSA...1..DSA...2.DSA.Unknown..3.DSA....post.enrolment.
## 1                                                                            1
## 2                                                                            1
## 3                                                                            1
## 4                                                                            1
## 5                                                                            1
## 6                                                                            1
##   Total.MFI.Baseline.DSA
## 1                   9326
## 2                   9982
## 3                   4694
## 4                  31207
## 5                  11711
## 6                   3886
m1<-MFIplot


colnames(m1)
## [1] "Patient.ID"                                                                  
## [2] "DSA.Status.at.End..0..DSA...1..DSA...2.DSA.Unknown..3.DSA....post.enrolment."
## [3] "Total.MFI.Baseline.DSA"
m1 <- m1 %>% 
  rename(DSA_End = DSA.Status.at.End..0..DSA...1..DSA...2.DSA.Unknown..3.DSA....post.enrolment.)

m1$DSA_End <- as.factor(m1$DSA_End)

table(m1$DSA_End)
## 
##  0  1  2  3 
## 76 39 17 39
m2 <- m1[m1$DSA_End != 2, ]
table(m2$DSA_End)
## 
##  0  1  2  3 
## 76 39  0 39
m2$DSA_End <- droplevels(m2$DSA_End)
table(m2$DSA_End)
## 
##  0  1  3 
## 76 39 39
m2 <- m2 %>% 
  rename(totalMFI = Total.MFI.Baseline.DSA)

m2$totalMFI <- as.numeric(m2$totalMFI)

m2$DSA_End <- factor(m2$DSA_End, levels = c("0", "1", "3"))

MFI plot - plot

## Warning: Removed 23 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 23 rows containing non-finite outside the scale range
## (`stat_compare_means()`).