Leemos los datos y definimos variables nuevas:

df=read_sav("CopiaAnalisisDesember2019.sav")
df=df %>% mutate(TumorSolido=as.factor(TumorType==1),Diameter=ifelse(CTdiameter>=6,6,CTdiameter)) %>%
    mutate(
        Trifecta1= (ClampingTime<=20) & Bleeding<=500 & (IntraopComplicationYN==0),
        Trifecta2= (ClampingTime<20) & Bleeding <500 & (IntraopComplicationYN==0),
        Trifecta3= (ClampingTime<=20) & Bleeding<=500 & (RosenthalGrade<=1),
        Trifecta4= (ClampingTime< 20) & Bleeding< 500 & (RosenthalGrade<=1),
        ) %>%
    mutate(ChangeGFRabs=GFRDischarge-PreGFR) %>%
    mutate(ChangeGFRabs=ChangeGFRabs/PreGFR*100) %>%
    mutate(HospitalStay4=HospitalStay>=5,
           ClavienDindoGrade3=ClavienDindoGrade>=3) %>% select(-OperDate)


df2=df %>% as_factor() %>% mutate(Trifecta3=as.factor(Trifecta3))


rndr <- function(x, name, ...) {
    if (length(x) == 0) {
        y <- df2[[name]]
        s <- rep("", length(render.default(x=y, name=name, ...)))
        if (is.numeric(y)) {
            p <- t.test(y ~ df2$Trifecta3)$p.value
        } else {
            p <- chisq.test(table(y, droplevels(df2$Trifecta3)))$p.value
        }
        s[2] <- sub("<", "&lt;", format.pval(p, digits=3, eps=0.001))
        s
    } else {
        render.default(x=x, name=name, ...)
    }
}

rndr.strat <- function(label, n, ...) {
    ifelse(n==0, label, render.strat.default(label, n, ...))
}
table1(~ . ,data=df2)
Overall
(n=122)
Age
Mean (SD) 61.5 (12.2)
Median [Min, Max] 65.0 [18.0, 85.0]
Sex
Man 80 (65.6%)
Woman 42 (34.4%)
BMI
Mean (SD) 27.5 (4.78)
Median [Min, Max] 27.1 [17.3, 47.9]
CharlsonIndex
Mean (SD) 4.58 (1.55)
Median [Min, Max] 5.00 [2.00, 9.00]
ASA
ASA I 6 (4.9%)
ASA II 61 (50.0%)
ASA III 54 (44.3%)
ASA IV 1 (0.8%)
Hb
Mean (SD) 14.3 (1.38)
Median [Min, Max] 14.5 [9.20, 17.8]
PreGFR
Mean (SD) 80.8 (15.6)
Median [Min, Max] 89.5 [13.0, 90.0]
TumorType
Solid 90 (73.8%)
Bosniak 2 0 (0%)
Bosniak 3 13 (10.7%)
Bosniak 4 19 (15.6%)
Number
Mean (SD) 1.04 (0.237)
Median [Min, Max] 1.00 [1.00, 3.00]
Side
Right 66 (54.1%)
Left 55 (45.1%)
3 1 (0.8%)
CT størrelse
Mean (SD) 2.69 (1.20)
Median [Min, Max] 2.45 [1.10, 9.00]
RENALscore
Mean (SD) 6.42 (1.79)
Median [Min, Max] 6.50 [4.00, 10.0]
RenalSuffix
a 26 (21.3%)
p 54 (44.3%)
x 42 (34.4%)
OperatorAndTys
And 89 (73.0%)
Tys 33 (27.0%)
Opr.tid min.
Mean (SD) 162 (35.8)
Median [Min, Max] 157 [103, 261]
Blødning ml.
Mean (SD) 226 (282)
Median [Min, Max] 100 [50.0, 2000]
ClampingTime
Mean (SD) 13.1 (5.22)
Median [Min, Max] 12.0 [0.00, 32.0]
Dren
No 22 (18.0%)
Yes 100 (82.0%)
Handport 0/1
No 28 (23.0%)
Yes 94 (77.0%)
IntraopComplicationYN
No 107 (87.7%)
Yes 15 (12.3%)
Cause
Surgeon 11 (9.0%)
Anestesiolog 4 (3.3%)
Missing 107 (87.7%)
RosenthalGrade
Mean (SD) 0.213 (0.633)
Median [Min, Max] 0.00 [0.00, 3.00]
Tumorstørrelse pT i cm.
Mean (SD) 2.46 (1.16)
Median [Min, Max] 2.20 [1.00, 8.00]
Histology
ClearCell 55 (45.1%)
Papillary 26 (21.3%)
Chromophobe 7 (5.7%)
ClearCell Cystisc changes 4 (3.3%)
Multilocular cystic 2 (1.6%)
Oncocytoma 17 (13.9%)
Others 10 (8.2%)
Missing 1 (0.8%)
pT
pT1a 105 (86.1%)
pT1b 16 (13.1%)
pT2a 0 (0%)
pT2b 1 (0.8%)
pT3a 0 (0%)
Margins
Negative 112 (91.8%)
Positive 10 (8.2%)
uncertain 0 (0%)
Postopr liggetid dager
Mean (SD) 4.57 (1.79)
Median [Min, Max] 4.00 [2.00, 13.0]
GFRDischarge
Mean (SD) 74.8 (18.7)
Median [Min, Max] 82.5 [12.0, 90.0]
GFR1Month
Mean (SD) 73.0 (18.8)
Median [Min, Max] 81.0 [31.0, 90.0]
Missing 81 (66.4%)
CRP1day
Mean (SD) 51.9 (38.5)
Median [Min, Max] 44.5 [5.00, 235]
CRP2day
Mean (SD) 104 (73.9)
Median [Min, Max] 81.5 [13.0, 400]
CRPmax
Mean (SD) 131 (86.8)
Median [Min, Max] 109 [13.0, 461]
CRPut
Mean (SD) 71.8 (39.2)
Median [Min, Max] 65.0 [9.00, 213]
Missing 14 (11.5%)
Transfusjon ant enh
Mean (SD) 0.107 (0.442)
Median [Min, Max] 0.00 [0.00, 3.00]
PostopComplicationYN
No 89 (73.0%)
Yes 33 (27.0%)
ClavienDindoGrade
Mean (SD) 0.541 (1.08)
Median [Min, Max] 0.00 [0.00, 4.00]
Komplikasjoner 0/1
50 (41.0%)
0 34 (27.9%)
1ileus + orchitt ve side etter hjemkomst,antibiotikabeh. 1 (0.8%)
1paralytisk ileus 1 (0.8%)
Atb for mistanke UVI men ikke bekreftet med dyrkning 1 (0.8%)
bilat lungeemboli 4 uker senere, Albyl e ikke sep 1 (0.8%)
blødning fra binyre som måtte fjernes, brokk 1 (0.8%)
blødning i trokar, nyresvikt (tidligre nefrektomert) 1 (0.8%)
chylus ascites 1 (0.8%)
CRP stigning behandlet med cefalo 1 (0.8%)
eliquis 1 (0.8%)
FEBER. infe ukjent fokus 1 (0.8%)
Iiskemisk kolitt 1 (0.8%)
ileus, konvertert til nefrektomi 1 (0.8%)
infeksjon??? Sansynlig ikke 1 (0.8%)
konvertert til nefrektomi 1 (0.8%)
kreat stigning 1 (0.8%)
lettgradig tarmparalyse 1 (0.8%)
miltskade, UVI 1 (0.8%)
nyrven skade, nyrepol ischemi 1 (0.8%)
reinnlagt SSF med magesmerter 12 postopr dag 1 (0.8%)
reoperasjon grunnet postop blødning med hematom evakueri 1 (0.8%)
Resp svikt atrieflimmer, intensiv, telemetri AB behandlet 1 (0.8%)
respirasjon svikt-UVI 1 (0.8%)
respirasjonsproblem,pleuravæske,aspirasjonspneumoni 1 (0.8%)
respirasjonsvikt 1 (0.8%)
respirasjonsvikt + lunge atelectase med Int opphold 1 (0.8%)
respirasjonsvikt, ileus 1 (0.8%)
retroperitonealt hematom, pleuravæske, diare 1 (0.8%)
smerter 1 (0.8%)
subfebrilia, AB beh 1 (0.8%)
subileus 1 (0.8%)
tarmparalyse 2 (1.6%)
tarmparalyse,oppkast 1 (0.8%)
urinretensjon 2 (1.6%)
UVI 2 (1.6%)
UVI uten funn på CT, innlagt i 5 dager for atb 1 (0.8%)
TumorSolido
FALSE 32 (26.2%)
TRUE 90 (73.8%)
Diameter
Mean (SD) 2.67 (1.09)
Median [Min, Max] 2.45 [1.10, 6.00]
Trifecta1
Yes 99 (81.1%)
No 23 (18.9%)
Trifecta2
Yes 93 (76.2%)
No 29 (23.8%)
Trifecta3
FALSE 17 (13.9%)
TRUE 105 (86.1%)
Trifecta4
Yes 98 (80.3%)
No 24 (19.7%)
ChangeGFRabs
Mean (SD) -7.73 (14.4)
Median [Min, Max] -0.556 [-50.6, 36.7]
HospitalStay4
Yes 49 (40.2%)
No 73 (59.8%)
ClavienDindoGrade3
Yes 10 (8.2%)
No 112 (91.8%)
df2$Trifecta3=factor(as.integer(df2$Trifecta3),levels=1:3,labels=c("FALSE","TRUE","p-value"))  
table1(~ . |Trifecta3,data=df2 , render=rndr, render.strat=rndr.strat,overall=F,droplevels = F)
## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect
FALSE
(n=17)
TRUE
(n=105)
p-value
Age
Mean (SD) 63.9 (11.2) 61.1 (12.4) 0.348
Median [Min, Max] 65.0 [42.0, 85.0] 65.0 [18.0, 85.0]
Sex
Man 14 (82.4%) 66 (62.9%) 0.196
Woman 3 (17.6%) 39 (37.1%)
BMI
Mean (SD) 29.0 (3.70) 27.3 (4.90) 0.104
Median [Min, Max] 28.7 [23.2, 36.6] 26.8 [17.3, 47.9]
CharlsonIndex
Mean (SD) 5.29 (1.86) 4.47 (1.47) 0.097
Median [Min, Max] 5.00 [2.00, 9.00] 4.00 [2.00, 8.00]
ASA
ASA I 1 (5.9%) 5 (4.8%) 0.839
ASA II 7 (41.2%) 54 (51.4%)
ASA III 9 (52.9%) 45 (42.9%)
ASA IV 0 (0%) 1 (1.0%)
Hb
Mean (SD) 14.1 (1.64) 14.4 (1.34) 0.478
Median [Min, Max] 14.4 [9.20, 16.3] 14.5 [10.6, 17.8]
PreGFR
Mean (SD) 79.6 (15.7) 81.0 (15.7) 0.747
Median [Min, Max] 89.0 [45.0, 90.0] 90.0 [13.0, 90.0]
TumorType
Solid 13 (76.5%) 77 (73.3%) NA
Bosniak 2 0 (0%) 0 (0%)
Bosniak 3 1 (5.9%) 12 (11.4%)
Bosniak 4 3 (17.6%) 16 (15.2%)
Number
Mean (SD) 1.06 (0.243) 1.04 (0.237) 0.746
Median [Min, Max] 1.00 [1.00, 2.00] 1.00 [1.00, 3.00]
Side
Right 9 (52.9%) 57 (54.3%) 0.912
Left 8 (47.1%) 47 (44.8%)
3 0 (0%) 1 (1.0%)
CT størrelse
Mean (SD) 3.55 (1.77) 2.56 (1.02) 0.036
Median [Min, Max] 3.30 [1.30, 9.00] 2.30 [1.10, 5.60]
RENALscore
Mean (SD) 7.59 (1.46) 6.23 (1.77) 0.002
Median [Min, Max] 8.00 [4.00, 10.0] 6.00 [4.00, 10.0]
RenalSuffix
a 5 (29.4%) 21 (20.0%) 0.672
p 7 (41.2%) 47 (44.8%)
x 5 (29.4%) 37 (35.2%)
OperatorAndTys
And 14 (82.4%) 75 (71.4%) 0.518
Tys 3 (17.6%) 30 (28.6%)
Opr.tid min.
Mean (SD) 214 (39.8) 154 (27.2) <0.001
Median [Min, Max] 222 [131, 261] 151 [103, 237]
Blødning ml.
Mean (SD) 635 (554) 160 (117) 0.003
Median [Min, Max] 300 [100, 2000] 100 [50.0, 500]
ClampingTime
Mean (SD) 19.5 (7.59) 12.0 (3.86) <0.001
Median [Min, Max] 22.0 [7.00, 32.0] 12.0 [0.00, 20.0]
Dren
No 0 (0%) 22 (21.0%) 0.081
Yes 17 (100%) 83 (79.0%)
Handport 0/1
No 3 (17.6%) 25 (23.8%) 0.803
Yes 14 (82.4%) 80 (76.2%)
IntraopComplicationYN
No 8 (47.1%) 99 (94.3%) <0.001
Yes 9 (52.9%) 6 (5.7%)
Cause
Surgeon 7 (41.2%) 4 (3.8%) 1
Anestesiolog 2 (11.8%) 2 (1.9%)
Missing 8 (47.1%) 99 (94.3%)
RosenthalGrade
Mean (SD) 1.18 (1.24) 0.0571 (0.233) 0.002
Median [Min, Max] 1.00 [0.00, 3.00] 0.00 [0.00, 1.00]
Tumorstørrelse pT i cm.
Mean (SD) 2.94 (1.67) 2.39 (1.04) 0.201
Median [Min, Max] 2.50 [1.00, 8.00] 2.10 [1.00, 5.70]
Histology
ClearCell 8 (47.1%) 47 (44.8%) 0.759
Papillary 5 (29.4%) 21 (20.0%)
Chromophobe 1 (5.9%) 6 (5.7%)
ClearCell Cystisc changes 0 (0%) 4 (3.8%)
Multilocular cystic 0 (0%) 2 (1.9%)
Oncocytoma 3 (17.6%) 14 (13.3%)
Others 0 (0%) 10 (9.5%)
Missing 0 (0%) 1 (1.0%)
pT
pT1a 12 (70.6%) 93 (88.6%) NA
pT1b 4 (23.5%) 12 (11.4%)
pT2a 0 (0%) 0 (0%)
pT2b 1 (5.9%) 0 (0%)
pT3a 0 (0%) 0 (0%)
Margins
Negative 15 (88.2%) 97 (92.4%) NA
Positive 2 (11.8%) 8 (7.6%)
uncertain 0 (0%) 0 (0%)
Postopr liggetid dager
Mean (SD) 5.65 (2.06) 4.40 (1.68) 0.028
Median [Min, Max] 5.00 [3.00, 10.0] 4.00 [2.00, 13.0]
GFRDischarge
Mean (SD) 67.9 (22.5) 75.9 (17.9) 0.183
Median [Min, Max] 77.0 [32.0, 90.0] 84.0 [12.0, 90.0]
GFR1Month
Mean (SD) 59.1 (25.4) 76.3 (15.6) 0.103
Median [Min, Max] 50.0 [31.0, 90.0] 81.0 [32.0, 90.0]
Missing 9 (52.9%) 72 (68.6%)
CRP1day
Mean (SD) 89.8 (60.5) 45.7 (29.8) 0.009
Median [Min, Max] 89.0 [8.00, 235] 40.0 [5.00, 166]
CRP2day
Mean (SD) 156 (103) 95.4 (64.7) 0.03
Median [Min, Max] 139 [14.0, 333] 76.0 [13.0, 400]
CRPmax
Mean (SD) 187 (125) 121 (75.9) 0.049
Median [Min, Max] 154 [14.0, 461] 100 [13.0, 400]
CRPut
Mean (SD) 69.7 (34.4) 72.1 (40.2) 0.803
Median [Min, Max] 66.0 [9.00, 140] 64.0 [10.0, 213]
Missing 1 (5.9%) 13 (12.4%)
Transfusjon ant enh
Mean (SD) 0.529 (0.943) 0.0381 (0.237) 0.048
Median [Min, Max] 0.00 [0.00, 3.00] 0.00 [0.00, 2.00]
PostopComplicationYN
No 8 (47.1%) 81 (77.1%) 0.022
Yes 9 (52.9%) 24 (22.9%)
ClavienDindoGrade
Mean (SD) 1.47 (1.70) 0.390 (0.860) 0.02
Median [Min, Max] 1.00 [0.00, 4.00] 0.00 [0.00, 4.00]
Komplikasjoner 0/1
5 (29.4%) 45 (42.9%) 0.002
0 2 (11.8%) 32 (30.5%)
bilat lungeemboli 4 uker senere, Albyl e ikke sep 1 (5.9%) 0 (0%)
blødning fra binyre som måtte fjernes, brokk 1 (5.9%) 0 (0%)
ileus, konvertert til nefrektomi 1 (5.9%) 0 (0%)
konvertert til nefrektomi 1 (5.9%) 0 (0%)
miltskade, UVI 1 (5.9%) 0 (0%)
Resp svikt atrieflimmer, intensiv, telemetri AB behandlet 1 (5.9%) 0 (0%)
respirasjon svikt-UVI 1 (5.9%) 0 (0%)
respirasjonsproblem,pleuravæske,aspirasjonspneumoni 1 (5.9%) 0 (0%)
respirasjonsvikt + lunge atelectase med Int opphold 1 (5.9%) 0 (0%)
tarmparalyse 1 (5.9%) 1 (1.0%)
1ileus + orchitt ve side etter hjemkomst,antibiotikabeh. 0 (0%) 1 (1.0%)
1paralytisk ileus 0 (0%) 1 (1.0%)
Atb for mistanke UVI men ikke bekreftet med dyrkning 0 (0%) 1 (1.0%)
blødning i trokar, nyresvikt (tidligre nefrektomert) 0 (0%) 1 (1.0%)
chylus ascites 0 (0%) 1 (1.0%)
CRP stigning behandlet med cefalo 0 (0%) 1 (1.0%)
eliquis 0 (0%) 1 (1.0%)
FEBER. infe ukjent fokus 0 (0%) 1 (1.0%)
Iiskemisk kolitt 0 (0%) 1 (1.0%)
infeksjon??? Sansynlig ikke 0 (0%) 1 (1.0%)
kreat stigning 0 (0%) 1 (1.0%)
lettgradig tarmparalyse 0 (0%) 1 (1.0%)
nyrven skade, nyrepol ischemi 0 (0%) 1 (1.0%)
reinnlagt SSF med magesmerter 12 postopr dag 0 (0%) 1 (1.0%)
reoperasjon grunnet postop blødning med hematom evakueri 0 (0%) 1 (1.0%)
respirasjonsvikt 0 (0%) 1 (1.0%)
respirasjonsvikt, ileus 0 (0%) 1 (1.0%)
retroperitonealt hematom, pleuravæske, diare 0 (0%) 1 (1.0%)
smerter 0 (0%) 1 (1.0%)
subfebrilia, AB beh 0 (0%) 1 (1.0%)
subileus 0 (0%) 1 (1.0%)
tarmparalyse,oppkast 0 (0%) 1 (1.0%)
urinretensjon 0 (0%) 2 (1.9%)
UVI 0 (0%) 2 (1.9%)
UVI uten funn på CT, innlagt i 5 dager for atb 0 (0%) 1 (1.0%)
TumorSolido
FALSE 4 (23.5%) 28 (26.7%) 1
TRUE 13 (76.5%) 77 (73.3%)
Diameter
Mean (SD) 3.38 (1.27) 2.56 (1.02) 0.02
Median [Min, Max] 3.30 [1.30, 6.00] 2.30 [1.10, 5.60]
Trifecta1
Yes 0 (0%) 99 (94.3%) <0.001
No 17 (100%) 6 (5.7%)
Trifecta2
Yes 0 (0%) 93 (88.6%) <0.001
No 17 (100%) 12 (11.4%)
Trifecta3
FALSE 17 (100%) 0 (0%) NA
TRUE 0 (0%) 105 (100%)
p-value 0 (0%) 0 (0%)
Trifecta4
Yes 0 (0%) 98 (93.3%) <0.001
No 17 (100%) 7 (6.7%)
ChangeGFRabs
Mean (SD) -16.2 (17.0) -6.36 (13.5) 0.035
Median [Min, Max] -11.2 [-48.4, 0.00] 0.00 [-50.6, 36.7]
HospitalStay4
Yes 12 (70.6%) 37 (35.2%) 0.013
No 5 (29.4%) 68 (64.8%)
ClavienDindoGrade3
Yes 5 (29.4%) 5 (4.8%) 0.003
No 12 (70.6%) 100 (95.2%)

Parece que nos interesa:

Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore

d <- datadist(df) 
options(datadist='d') 
f<-lrm(Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore,data=df) 
summary(f)
##              Effects              Response : Trifecta3 
## 
##  Factor        Low    High Diff. Effect   S.E.    Lower 0.95 Upper 0.95
##  BMI           24.425 30.2 5.775 -0.49410 0.34745 -1.175100   0.18689  
##   Odds Ratio   24.425 30.2 5.775  0.61012      NA  0.308790   1.20550  
##  CharlsonIndex  3.250  6.0 2.750 -1.10130 0.50410 -2.089300  -0.11330  
##   Odds Ratio    3.250  6.0 2.750  0.33244      NA  0.123770   0.89289  
##  RENALscore     5.000  8.0 3.000 -1.54410 0.54590 -2.614000  -0.47415  
##   Odds Ratio    5.000  8.0 3.000  0.21351      NA  0.073239   0.62241
#summary(f,BMI=c(20,40),Diameter=c(0,5)) 

plot(nomogram(f,fun=plogis,funlabel="Probability of Trifecta"
              #, fun.at=c(.05,.1,.25,.5,.75,.9,.95), #Perdidas1Mes=c(0,5,10,20,seq(50,700,by=50))
))

Vamos a explorar la curva ROC:

df$predictions=predict(f)
df$labels=2*(df$Trifecta3-0.5)
precrec_obj <- evalmod(scores = df$predictions, labels = df$labels)
autoplot(precrec_obj)

uauc1 <- evalmod(scores=df$predictions, labels = df$labels, mode="aucroc")

# print 'aucroc'
uauc1
## 
##     === Input data ===
## 
##      Model name Dataset ID # of negatives # of positives
##    1         m1          1             17            105
## 
## 
##     === AUCs ===
## 
##      Model name Dataset ID       AUC      U
##    1         m1          1 0.7582633 1353.5

Bootstrap

Vamos a usar la técnica de bootstrap con la idea de tener algo de validación interna del modelo de regresión anterior. Cada cálculo dará un resultado aleatorio. Vamos a fijar una semilla para reproducibilidad:

set.seed(20201001)
logit = glm(Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore,data=df, family = binomial(link = "logit"))

logit %>% summary()
## 
## Call:
## glm(formula = Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore, 
##     family = binomial(link = "logit"), data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4754   0.2066   0.3484   0.5815   1.1064  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    9.76122    2.61667   3.730 0.000191 ***
## BMI           -0.08556    0.06016  -1.422 0.154986    
## CharlsonIndex -0.40047    0.18330  -2.185 0.028901 *  
## RENALscore    -0.51470    0.18195  -2.829 0.004673 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 98.520  on 121  degrees of freedom
## Residual deviance: 82.789  on 118  degrees of freedom
## AIC: 90.789
## 
## Number of Fisher Scoring iterations: 5

Vamos a aplicar la técnica de bootstrap para estudiar la estabilidad de las estimaciones de los coeficientes de la regresión logística:.

R = 10000                      # number of bootstrap samples
n = nrow(df)              # sample size
k = length(coef(logit))     # number of coefficients

# set up a empty Rxn matrix B
B = matrix(nrow = R, ncol = k,
           dimnames = list(paste("Sample",1:R), 
                           names(coef(logit))))
# loop R times
for(i in 1:R){
    # sample credit data with replacement
    boot.data = df[sample(x = 1:n, size = n, replace = TRUE), ]
    # fit the model on the boostrapped sample
    boot.logit = glm(logit$formula, 
                     data=boot.data, family = binomial(link = "logit"))
    # store the coefficients
    B[i,] = coef(boot.logit)
}



# function to return bootstrapped coefficients
myLogitCoef <- function(data, indices, formula) {
    d <- data[indices,]
    fit <- glm(formula, data=d, family = binomial(link = "logit"))
    return(coef(fit))
}

# set up cluster of 4 CPU cores
cl<-makeCluster(4)
clusterExport(cl, 'myLogitCoef')

set.seed(373)
coef.boot <- boot(data=df, statistic=myLogitCoef, R=10000, 
                  formula= logit$formula,
                  # process in parallel across 4 CPU cores
                  parallel = 'snow', ncpus=4, cl=cl)
stopCluster(cl)

coef.boot
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = df, statistic = myLogitCoef, R = 10000, formula = logit$formula, 
##     parallel = "snow", ncpus = 4, cl = cl)
## 
## 
## Bootstrap Statistics :
##       original        bias    std. error
## t1*  9.7612216  0.5328396901  2.68695651
## t2* -0.0855582 -0.0004506506  0.05976187
## t3* -0.4004748 -0.0224398008  0.21652931
## t4* -0.5146958 -0.0423837644  0.19574431

No parece que los sesgos sean importantes. Ni en valor, ni menos tras compararlos con el tamaño del error standard. creo que basta decirlo de palabra. No será necesario enseñarlo.

Vamos a usar ahora bootstrap para estimar AUC:

prob = predict.glm(logit, type='response', newdata =  df)
pred = prediction(prob, df$Trifecta3)

# AUC
auc = performance(pred,"auc")@y.values[[1]][1]

# plot the ROC curve
perf <- performance(pred,"tpr","fpr")
plot(perf, col="navyblue", cex.main=1,
     main= paste("Logistic Regression ROC Curve: AUC =", round(auc,3)))
abline(a=0, b = 1, col='darkorange1')

R = 10000
n = nrow(df)

# empty Rx2 matrix for bootstrap results 
B = matrix(nrow = R, ncol = 2,
           dimnames = list(paste('Sample',1:R),
                           c("auc_orig","auc_boot")))

set.seed(701)
for(i in 1:R){
    
    # draw a random sample
    obs.boot <- sample(x = 1:n, size = n, replace = T)
    data.boot <- df[obs.boot, ]
    
    # fit the model on bootstrap sample
    logit.boot <- glm(logit$formula , 
                      data=data.boot,
                      family = binomial(link = "logit"))
    
    # apply model to original data
    prob1 = predict(logit.boot, type='response', df)
    pred1 = prediction(prob1, df$Trifecta3)
    auc1 = performance(pred1,"auc")@y.values[[1]][1]
    B[i, 1] = auc1
    
    # apply model to bootstrap data
    prob2 = predict(logit.boot, type='response', data.boot)
    pred2 = prediction(prob2, data.boot$Trifecta3)
    auc2 = performance(pred2,"auc")@y.values[[1]][1]
    B[i, 2] = auc2
}

resultado=B %>% as_data_frame() %>% as_tibble() %>% mutate(optimism=auc_boot-auc_orig)
## Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.

Más o menos se obtiene los mismo. El optimismo (bias) es pequeño.

resultado %>% summarise_all(mean)
## # A tibble: 1 x 3
##   auc_orig auc_boot optimism
##      <dbl>    <dbl>    <dbl>
## 1    0.752    0.782   0.0301

La variabilidad de las estimacines anteriores también es pequeña.

resultado %>% summarise_all(sd)
## # A tibble: 1 x 3
##   auc_orig auc_boot optimism
##      <dbl>    <dbl>    <dbl>
## 1   0.0155   0.0551   0.0565

Más o menos vemos lo mismo. No tenemos problemas de sesgo. Se puede ignorar lo que hemos hecho y comentarlo de palabra.