Leemos los datos y definimos variables nuevas:

df=read_sav("CopiaAnalisisDesember2019.sav")
df=df %>% mutate(TumorSolido=as.factor(TumorType==1),Diameter=ifelse(CTdiameter>=6,6,CTdiameter)) %>%
    mutate(
        Trifecta1= (ClampingTime<=20) & Bleeding<=500 & (IntraopComplicationYN==0),
        Trifecta2= (ClampingTime<20) & Bleeding <500 & (IntraopComplicationYN==0),
        Trifecta3= (ClampingTime<=20) & Bleeding<=500 & (RosenthalGrade<=1),
        Trifecta4= (ClampingTime< 20) & Bleeding< 500 & (RosenthalGrade<=1),
        ) %>%
    mutate(ChangeGFRabs=GFRDischarge-PreGFR) %>%
    mutate(ChangeGFRabs=ChangeGFRabs/PreGFR*100) %>%
    mutate(HospitalStay4=HospitalStay>=5,
           ClavienDindoGrade3=ClavienDindoGrade>=3) %>% select(-OperDate)


df2=df %>% as_factor() %>% mutate(Trifecta3=as.factor(Trifecta3))


rndr <- function(x, name, ...) {
    if (length(x) == 0) {
        y <- df2[[name]]
        s <- rep("", length(render.default(x=y, name=name, ...)))
        if (is.numeric(y)) {
            p <- t.test(y ~ df2$Trifecta3)$p.value
        } else {
            p <- chisq.test(table(y, droplevels(df2$Trifecta3)))$p.value
        }
        s[2] <- sub("<", "&lt;", format.pval(p, digits=3, eps=0.001))
        s
    } else {
        render.default(x=x, name=name, ...)
    }
}

rndr.strat <- function(label, n, ...) {
    ifelse(n==0, label, render.strat.default(label, n, ...))
}

table1(~ . ,data=df2)

	Overall (n=122)
Age
Mean (SD)	61.5 (12.2)
Median [Min, Max]	65.0 [18.0, 85.0]
Sex
Man	80 (65.6%)
Woman	42 (34.4%)
BMI
Mean (SD)	27.5 (4.78)
Median [Min, Max]	27.1 [17.3, 47.9]
CharlsonIndex
Mean (SD)	4.58 (1.55)
Median [Min, Max]	5.00 [2.00, 9.00]
ASA
ASA I	6 (4.9%)
ASA II	61 (50.0%)
ASA III	54 (44.3%)
ASA IV	1 (0.8%)
Hb
Mean (SD)	14.3 (1.38)
Median [Min, Max]	14.5 [9.20, 17.8]
PreGFR
Mean (SD)	80.8 (15.6)
Median [Min, Max]	89.5 [13.0, 90.0]
TumorType
Solid	90 (73.8%)
Bosniak 2	0 (0%)
Bosniak 3	13 (10.7%)
Bosniak 4	19 (15.6%)
Number
Mean (SD)	1.04 (0.237)
Median [Min, Max]	1.00 [1.00, 3.00]
Side
Right	66 (54.1%)
Left	55 (45.1%)
3	1 (0.8%)
CT størrelse
Mean (SD)	2.69 (1.20)
Median [Min, Max]	2.45 [1.10, 9.00]
RENALscore
Mean (SD)	6.42 (1.79)
Median [Min, Max]	6.50 [4.00, 10.0]
RenalSuffix
a	26 (21.3%)
p	54 (44.3%)
x	42 (34.4%)
OperatorAndTys
And	89 (73.0%)
Tys	33 (27.0%)
Opr.tid min.
Mean (SD)	162 (35.8)
Median [Min, Max]	157 [103, 261]
Blødning ml.
Mean (SD)	226 (282)
Median [Min, Max]	100 [50.0, 2000]
ClampingTime
Mean (SD)	13.1 (5.22)
Median [Min, Max]	12.0 [0.00, 32.0]
Dren
No	22 (18.0%)
Yes	100 (82.0%)
Handport 0/1
No	28 (23.0%)
Yes	94 (77.0%)
IntraopComplicationYN
No	107 (87.7%)
Yes	15 (12.3%)
Cause
Surgeon	11 (9.0%)
Anestesiolog	4 (3.3%)
Missing	107 (87.7%)
RosenthalGrade
Mean (SD)	0.213 (0.633)
Median [Min, Max]	0.00 [0.00, 3.00]
Tumorstørrelse pT i cm.
Mean (SD)	2.46 (1.16)
Median [Min, Max]	2.20 [1.00, 8.00]
Histology
ClearCell	55 (45.1%)
Papillary	26 (21.3%)
Chromophobe	7 (5.7%)
ClearCell Cystisc changes	4 (3.3%)
Multilocular cystic	2 (1.6%)
Oncocytoma	17 (13.9%)
Others	10 (8.2%)
Missing	1 (0.8%)
pT
pT1a	105 (86.1%)
pT1b	16 (13.1%)
pT2a	0 (0%)
pT2b	1 (0.8%)
pT3a	0 (0%)
Margins
Negative	112 (91.8%)
Positive	10 (8.2%)
uncertain	0 (0%)
Postopr liggetid dager
Mean (SD)	4.57 (1.79)
Median [Min, Max]	4.00 [2.00, 13.0]
GFRDischarge
Mean (SD)	74.8 (18.7)
Median [Min, Max]	82.5 [12.0, 90.0]
GFR1Month
Mean (SD)	73.0 (18.8)
Median [Min, Max]	81.0 [31.0, 90.0]
Missing	81 (66.4%)
CRP1day
Mean (SD)	51.9 (38.5)
Median [Min, Max]	44.5 [5.00, 235]
CRP2day
Mean (SD)	104 (73.9)
Median [Min, Max]	81.5 [13.0, 400]
CRPmax
Mean (SD)	131 (86.8)
Median [Min, Max]	109 [13.0, 461]
CRPut
Mean (SD)	71.8 (39.2)
Median [Min, Max]	65.0 [9.00, 213]
Missing	14 (11.5%)
Transfusjon ant enh
Mean (SD)	0.107 (0.442)
Median [Min, Max]	0.00 [0.00, 3.00]
PostopComplicationYN
No	89 (73.0%)
Yes	33 (27.0%)
ClavienDindoGrade
Mean (SD)	0.541 (1.08)
Median [Min, Max]	0.00 [0.00, 4.00]
Komplikasjoner 0/1
	50 (41.0%)
0	34 (27.9%)
1ileus + orchitt ve side etter hjemkomst,antibiotikabeh.	1 (0.8%)
1paralytisk ileus	1 (0.8%)
Atb for mistanke UVI men ikke bekreftet med dyrkning	1 (0.8%)
bilat lungeemboli 4 uker senere, Albyl e ikke sep	1 (0.8%)
blødning fra binyre som måtte fjernes, brokk	1 (0.8%)
blødning i trokar, nyresvikt (tidligre nefrektomert)	1 (0.8%)
chylus ascites	1 (0.8%)
CRP stigning behandlet med cefalo	1 (0.8%)
eliquis	1 (0.8%)
FEBER. infe ukjent fokus	1 (0.8%)
Iiskemisk kolitt	1 (0.8%)
ileus, konvertert til nefrektomi	1 (0.8%)
infeksjon??? Sansynlig ikke	1 (0.8%)
konvertert til nefrektomi	1 (0.8%)
kreat stigning	1 (0.8%)
lettgradig tarmparalyse	1 (0.8%)
miltskade, UVI	1 (0.8%)
nyrven skade, nyrepol ischemi	1 (0.8%)
reinnlagt SSF med magesmerter 12 postopr dag	1 (0.8%)
reoperasjon grunnet postop blødning med hematom evakueri	1 (0.8%)
Resp svikt atrieflimmer, intensiv, telemetri AB behandlet	1 (0.8%)
respirasjon svikt-UVI	1 (0.8%)
respirasjonsproblem,pleuravæske,aspirasjonspneumoni	1 (0.8%)
respirasjonsvikt	1 (0.8%)
respirasjonsvikt + lunge atelectase med Int opphold	1 (0.8%)
respirasjonsvikt, ileus	1 (0.8%)
retroperitonealt hematom, pleuravæske, diare	1 (0.8%)
smerter	1 (0.8%)
subfebrilia, AB beh	1 (0.8%)
subileus	1 (0.8%)
tarmparalyse	2 (1.6%)
tarmparalyse,oppkast	1 (0.8%)
urinretensjon	2 (1.6%)
UVI	2 (1.6%)
UVI uten funn på CT, innlagt i 5 dager for atb	1 (0.8%)
TumorSolido
FALSE	32 (26.2%)
TRUE	90 (73.8%)
Diameter
Mean (SD)	2.67 (1.09)
Median [Min, Max]	2.45 [1.10, 6.00]
Trifecta1
Yes	99 (81.1%)
No	23 (18.9%)
Trifecta2
Yes	93 (76.2%)
No	29 (23.8%)
Trifecta3
FALSE	17 (13.9%)
TRUE	105 (86.1%)
Trifecta4
Yes	98 (80.3%)
No	24 (19.7%)
ChangeGFRabs
Mean (SD)	-7.73 (14.4)
Median [Min, Max]	-0.556 [-50.6, 36.7]
HospitalStay4
Yes	49 (40.2%)
No	73 (59.8%)
ClavienDindoGrade3
Yes	10 (8.2%)
No	112 (91.8%)

df2$Trifecta3=factor(as.integer(df2$Trifecta3),levels=1:3,labels=c("FALSE","TRUE","p-value"))  
table1(~ . |Trifecta3,data=df2 , render=rndr, render.strat=rndr.strat,overall=F,droplevels = F)

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

## Warning in chisq.test(table(y, droplevels(df2$Trifecta3))): Chi-squared
## approximation may be incorrect

	FALSE (n=17)	TRUE (n=105)	p-value
Age
Mean (SD)	63.9 (11.2)	61.1 (12.4)	0.348
Median [Min, Max]	65.0 [42.0, 85.0]	65.0 [18.0, 85.0]
Sex
Man	14 (82.4%)	66 (62.9%)	0.196
Woman	3 (17.6%)	39 (37.1%)
BMI
Mean (SD)	29.0 (3.70)	27.3 (4.90)	0.104
Median [Min, Max]	28.7 [23.2, 36.6]	26.8 [17.3, 47.9]
CharlsonIndex
Mean (SD)	5.29 (1.86)	4.47 (1.47)	0.097
Median [Min, Max]	5.00 [2.00, 9.00]	4.00 [2.00, 8.00]
ASA
ASA I	1 (5.9%)	5 (4.8%)	0.839
ASA II	7 (41.2%)	54 (51.4%)
ASA III	9 (52.9%)	45 (42.9%)
ASA IV	0 (0%)	1 (1.0%)
Hb
Mean (SD)	14.1 (1.64)	14.4 (1.34)	0.478
Median [Min, Max]	14.4 [9.20, 16.3]	14.5 [10.6, 17.8]
PreGFR
Mean (SD)	79.6 (15.7)	81.0 (15.7)	0.747
Median [Min, Max]	89.0 [45.0, 90.0]	90.0 [13.0, 90.0]
TumorType
Solid	13 (76.5%)	77 (73.3%)	NA
Bosniak 2	0 (0%)	0 (0%)
Bosniak 3	1 (5.9%)	12 (11.4%)
Bosniak 4	3 (17.6%)	16 (15.2%)
Number
Mean (SD)	1.06 (0.243)	1.04 (0.237)	0.746
Median [Min, Max]	1.00 [1.00, 2.00]	1.00 [1.00, 3.00]
Side
Right	9 (52.9%)	57 (54.3%)	0.912
Left	8 (47.1%)	47 (44.8%)
3	0 (0%)	1 (1.0%)
CT størrelse
Mean (SD)	3.55 (1.77)	2.56 (1.02)	0.036
Median [Min, Max]	3.30 [1.30, 9.00]	2.30 [1.10, 5.60]
RENALscore
Mean (SD)	7.59 (1.46)	6.23 (1.77)	0.002
Median [Min, Max]	8.00 [4.00, 10.0]	6.00 [4.00, 10.0]
RenalSuffix
a	5 (29.4%)	21 (20.0%)	0.672
p	7 (41.2%)	47 (44.8%)
x	5 (29.4%)	37 (35.2%)
OperatorAndTys
And	14 (82.4%)	75 (71.4%)	0.518
Tys	3 (17.6%)	30 (28.6%)
Opr.tid min.
Mean (SD)	214 (39.8)	154 (27.2)	<0.001
Median [Min, Max]	222 [131, 261]	151 [103, 237]
Blødning ml.
Mean (SD)	635 (554)	160 (117)	0.003
Median [Min, Max]	300 [100, 2000]	100 [50.0, 500]
ClampingTime
Mean (SD)	19.5 (7.59)	12.0 (3.86)	<0.001
Median [Min, Max]	22.0 [7.00, 32.0]	12.0 [0.00, 20.0]
Dren
No	0 (0%)	22 (21.0%)	0.081
Yes	17 (100%)	83 (79.0%)
Handport 0/1
No	3 (17.6%)	25 (23.8%)	0.803
Yes	14 (82.4%)	80 (76.2%)
IntraopComplicationYN
No	8 (47.1%)	99 (94.3%)	<0.001
Yes	9 (52.9%)	6 (5.7%)
Cause
Surgeon	7 (41.2%)	4 (3.8%)	1
Anestesiolog	2 (11.8%)	2 (1.9%)
Missing	8 (47.1%)	99 (94.3%)
RosenthalGrade
Mean (SD)	1.18 (1.24)	0.0571 (0.233)	0.002
Median [Min, Max]	1.00 [0.00, 3.00]	0.00 [0.00, 1.00]
Tumorstørrelse pT i cm.
Mean (SD)	2.94 (1.67)	2.39 (1.04)	0.201
Median [Min, Max]	2.50 [1.00, 8.00]	2.10 [1.00, 5.70]
Histology
ClearCell	8 (47.1%)	47 (44.8%)	0.759
Papillary	5 (29.4%)	21 (20.0%)
Chromophobe	1 (5.9%)	6 (5.7%)
ClearCell Cystisc changes	0 (0%)	4 (3.8%)
Multilocular cystic	0 (0%)	2 (1.9%)
Oncocytoma	3 (17.6%)	14 (13.3%)
Others	0 (0%)	10 (9.5%)
Missing	0 (0%)	1 (1.0%)
pT
pT1a	12 (70.6%)	93 (88.6%)	NA
pT1b	4 (23.5%)	12 (11.4%)
pT2a	0 (0%)	0 (0%)
pT2b	1 (5.9%)	0 (0%)
pT3a	0 (0%)	0 (0%)
Margins
Negative	15 (88.2%)	97 (92.4%)	NA
Positive	2 (11.8%)	8 (7.6%)
uncertain	0 (0%)	0 (0%)
Postopr liggetid dager
Mean (SD)	5.65 (2.06)	4.40 (1.68)	0.028
Median [Min, Max]	5.00 [3.00, 10.0]	4.00 [2.00, 13.0]
GFRDischarge
Mean (SD)	67.9 (22.5)	75.9 (17.9)	0.183
Median [Min, Max]	77.0 [32.0, 90.0]	84.0 [12.0, 90.0]
GFR1Month
Mean (SD)	59.1 (25.4)	76.3 (15.6)	0.103
Median [Min, Max]	50.0 [31.0, 90.0]	81.0 [32.0, 90.0]
Missing	9 (52.9%)	72 (68.6%)
CRP1day
Mean (SD)	89.8 (60.5)	45.7 (29.8)	0.009
Median [Min, Max]	89.0 [8.00, 235]	40.0 [5.00, 166]
CRP2day
Mean (SD)	156 (103)	95.4 (64.7)	0.03
Median [Min, Max]	139 [14.0, 333]	76.0 [13.0, 400]
CRPmax
Mean (SD)	187 (125)	121 (75.9)	0.049
Median [Min, Max]	154 [14.0, 461]	100 [13.0, 400]
CRPut
Mean (SD)	69.7 (34.4)	72.1 (40.2)	0.803
Median [Min, Max]	66.0 [9.00, 140]	64.0 [10.0, 213]
Missing	1 (5.9%)	13 (12.4%)
Transfusjon ant enh
Mean (SD)	0.529 (0.943)	0.0381 (0.237)	0.048
Median [Min, Max]	0.00 [0.00, 3.00]	0.00 [0.00, 2.00]
PostopComplicationYN
No	8 (47.1%)	81 (77.1%)	0.022
Yes	9 (52.9%)	24 (22.9%)
ClavienDindoGrade
Mean (SD)	1.47 (1.70)	0.390 (0.860)	0.02
Median [Min, Max]	1.00 [0.00, 4.00]	0.00 [0.00, 4.00]
Komplikasjoner 0/1
	5 (29.4%)	45 (42.9%)	0.002
0	2 (11.8%)	32 (30.5%)
bilat lungeemboli 4 uker senere, Albyl e ikke sep	1 (5.9%)	0 (0%)
blødning fra binyre som måtte fjernes, brokk	1 (5.9%)	0 (0%)
ileus, konvertert til nefrektomi	1 (5.9%)	0 (0%)
konvertert til nefrektomi	1 (5.9%)	0 (0%)
miltskade, UVI	1 (5.9%)	0 (0%)
Resp svikt atrieflimmer, intensiv, telemetri AB behandlet	1 (5.9%)	0 (0%)
respirasjon svikt-UVI	1 (5.9%)	0 (0%)
respirasjonsproblem,pleuravæske,aspirasjonspneumoni	1 (5.9%)	0 (0%)
respirasjonsvikt + lunge atelectase med Int opphold	1 (5.9%)	0 (0%)
tarmparalyse	1 (5.9%)	1 (1.0%)
1ileus + orchitt ve side etter hjemkomst,antibiotikabeh.	0 (0%)	1 (1.0%)
1paralytisk ileus	0 (0%)	1 (1.0%)
Atb for mistanke UVI men ikke bekreftet med dyrkning	0 (0%)	1 (1.0%)
blødning i trokar, nyresvikt (tidligre nefrektomert)	0 (0%)	1 (1.0%)
chylus ascites	0 (0%)	1 (1.0%)
CRP stigning behandlet med cefalo	0 (0%)	1 (1.0%)
eliquis	0 (0%)	1 (1.0%)
FEBER. infe ukjent fokus	0 (0%)	1 (1.0%)
Iiskemisk kolitt	0 (0%)	1 (1.0%)
infeksjon??? Sansynlig ikke	0 (0%)	1 (1.0%)
kreat stigning	0 (0%)	1 (1.0%)
lettgradig tarmparalyse	0 (0%)	1 (1.0%)
nyrven skade, nyrepol ischemi	0 (0%)	1 (1.0%)
reinnlagt SSF med magesmerter 12 postopr dag	0 (0%)	1 (1.0%)
reoperasjon grunnet postop blødning med hematom evakueri	0 (0%)	1 (1.0%)
respirasjonsvikt	0 (0%)	1 (1.0%)
respirasjonsvikt, ileus	0 (0%)	1 (1.0%)
retroperitonealt hematom, pleuravæske, diare	0 (0%)	1 (1.0%)
smerter	0 (0%)	1 (1.0%)
subfebrilia, AB beh	0 (0%)	1 (1.0%)
subileus	0 (0%)	1 (1.0%)
tarmparalyse,oppkast	0 (0%)	1 (1.0%)
urinretensjon	0 (0%)	2 (1.9%)
UVI	0 (0%)	2 (1.9%)
UVI uten funn på CT, innlagt i 5 dager for atb	0 (0%)	1 (1.0%)
TumorSolido
FALSE	4 (23.5%)	28 (26.7%)	1
TRUE	13 (76.5%)	77 (73.3%)
Diameter
Mean (SD)	3.38 (1.27)	2.56 (1.02)	0.02
Median [Min, Max]	3.30 [1.30, 6.00]	2.30 [1.10, 5.60]
Trifecta1
Yes	0 (0%)	99 (94.3%)	<0.001
No	17 (100%)	6 (5.7%)
Trifecta2
Yes	0 (0%)	93 (88.6%)	<0.001
No	17 (100%)	12 (11.4%)
Trifecta3
FALSE	17 (100%)	0 (0%)	NA
TRUE	0 (0%)	105 (100%)
p-value	0 (0%)	0 (0%)
Trifecta4
Yes	0 (0%)	98 (93.3%)	<0.001
No	17 (100%)	7 (6.7%)
ChangeGFRabs
Mean (SD)	-16.2 (17.0)	-6.36 (13.5)	0.035
Median [Min, Max]	-11.2 [-48.4, 0.00]	0.00 [-50.6, 36.7]
HospitalStay4
Yes	12 (70.6%)	37 (35.2%)	0.013
No	5 (29.4%)	68 (64.8%)
ClavienDindoGrade3
Yes	5 (29.4%)	5 (4.8%)	0.003
No	12 (70.6%)	100 (95.2%)

Parece que nos interesa:

Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore

d <- datadist(df) 
options(datadist='d') 
f<-lrm(Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore,data=df) 
summary(f)

##              Effects              Response : Trifecta3 
## 
##  Factor        Low    High Diff. Effect   S.E.    Lower 0.95 Upper 0.95
##  BMI           24.425 30.2 5.775 -0.49410 0.34745 -1.175100   0.18689  
##   Odds Ratio   24.425 30.2 5.775  0.61012      NA  0.308790   1.20550  
##  CharlsonIndex  3.250  6.0 2.750 -1.10130 0.50410 -2.089300  -0.11330  
##   Odds Ratio    3.250  6.0 2.750  0.33244      NA  0.123770   0.89289  
##  RENALscore     5.000  8.0 3.000 -1.54410 0.54590 -2.614000  -0.47415  
##   Odds Ratio    5.000  8.0 3.000  0.21351      NA  0.073239   0.62241

#summary(f,BMI=c(20,40),Diameter=c(0,5)) 

plot(nomogram(f,fun=plogis,funlabel="Probability of Trifecta"
              #, fun.at=c(.05,.1,.25,.5,.75,.9,.95), #Perdidas1Mes=c(0,5,10,20,seq(50,700,by=50))
))

Vamos a explorar la curva ROC:

df$predictions=predict(f)
df$labels=2*(df$Trifecta3-0.5)
precrec_obj <- evalmod(scores = df$predictions, labels = df$labels)
autoplot(precrec_obj)

uauc1 <- evalmod(scores=df$predictions, labels = df$labels, mode="aucroc")

# print 'aucroc'
uauc1

## 
##     === Input data ===
## 
##      Model name Dataset ID # of negatives # of positives
##    1         m1          1             17            105
## 
## 
##     === AUCs ===
## 
##      Model name Dataset ID       AUC      U
##    1         m1          1 0.7582633 1353.5

Bootstrap

Vamos a usar la técnica de bootstrap con la idea de tener algo de validación interna del modelo de regresión anterior. Cada cálculo dará un resultado aleatorio. Vamos a fijar una semilla para reproducibilidad:

set.seed(20201001)

logit = glm(Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore,data=df, family = binomial(link = "logit"))

logit %>% summary()

## 
## Call:
## glm(formula = Trifecta3 ~ 1 + BMI + CharlsonIndex + RENALscore, 
##     family = binomial(link = "logit"), data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4754   0.2066   0.3484   0.5815   1.1064  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    9.76122    2.61667   3.730 0.000191 ***
## BMI           -0.08556    0.06016  -1.422 0.154986    
## CharlsonIndex -0.40047    0.18330  -2.185 0.028901 *  
## RENALscore    -0.51470    0.18195  -2.829 0.004673 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 98.520  on 121  degrees of freedom
## Residual deviance: 82.789  on 118  degrees of freedom
## AIC: 90.789
## 
## Number of Fisher Scoring iterations: 5

Vamos a aplicar la técnica de bootstrap para estudiar la estabilidad de las estimaciones de los coeficientes de la regresión logística:.

R = 10000                      # number of bootstrap samples
n = nrow(df)              # sample size
k = length(coef(logit))     # number of coefficients

# set up a empty Rxn matrix B
B = matrix(nrow = R, ncol = k,
           dimnames = list(paste("Sample",1:R), 
                           names(coef(logit))))
# loop R times
for(i in 1:R){
    # sample credit data with replacement
    boot.data = df[sample(x = 1:n, size = n, replace = TRUE), ]
    # fit the model on the boostrapped sample
    boot.logit = glm(logit$formula, 
                     data=boot.data, family = binomial(link = "logit"))
    # store the coefficients
    B[i,] = coef(boot.logit)
}



# function to return bootstrapped coefficients
myLogitCoef <- function(data, indices, formula) {
    d <- data[indices,]
    fit <- glm(formula, data=d, family = binomial(link = "logit"))
    return(coef(fit))
}

# set up cluster of 4 CPU cores
cl<-makeCluster(4)
clusterExport(cl, 'myLogitCoef')

set.seed(373)
coef.boot <- boot(data=df, statistic=myLogitCoef, R=10000, 
                  formula= logit$formula,
                  # process in parallel across 4 CPU cores
                  parallel = 'snow', ncpus=4, cl=cl)
stopCluster(cl)

coef.boot

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = df, statistic = myLogitCoef, R = 10000, formula = logit$formula, 
##     parallel = "snow", ncpus = 4, cl = cl)
## 
## 
## Bootstrap Statistics :
##       original        bias    std. error
## t1*  9.7612216  0.5328396901  2.68695651
## t2* -0.0855582 -0.0004506506  0.05976187
## t3* -0.4004748 -0.0224398008  0.21652931
## t4* -0.5146958 -0.0423837644  0.19574431

No parece que los sesgos sean importantes. Ni en valor, ni menos tras compararlos con el tamaño del error standard. creo que basta decirlo de palabra. No será necesario enseñarlo.

Vamos a usar ahora bootstrap para estimar AUC:

prob = predict.glm(logit, type='response', newdata =  df)
pred = prediction(prob, df$Trifecta3)

# AUC
auc = performance(pred,"auc")@y.values[[1]][1]

# plot the ROC curve
perf <- performance(pred,"tpr","fpr")
plot(perf, col="navyblue", cex.main=1,
     main= paste("Logistic Regression ROC Curve: AUC =", round(auc,3)))
abline(a=0, b = 1, col='darkorange1')

R = 10000
n = nrow(df)

# empty Rx2 matrix for bootstrap results 
B = matrix(nrow = R, ncol = 2,
           dimnames = list(paste('Sample',1:R),
                           c("auc_orig","auc_boot")))

set.seed(701)
for(i in 1:R){
    
    # draw a random sample
    obs.boot <- sample(x = 1:n, size = n, replace = T)
    data.boot <- df[obs.boot, ]
    
    # fit the model on bootstrap sample
    logit.boot <- glm(logit$formula , 
                      data=data.boot,
                      family = binomial(link = "logit"))
    
    # apply model to original data
    prob1 = predict(logit.boot, type='response', df)
    pred1 = prediction(prob1, df$Trifecta3)
    auc1 = performance(pred1,"auc")@y.values[[1]][1]
    B[i, 1] = auc1
    
    # apply model to bootstrap data
    prob2 = predict(logit.boot, type='response', data.boot)
    pred2 = prediction(prob2, data.boot$Trifecta3)
    auc2 = performance(pred2,"auc")@y.values[[1]][1]
    B[i, 2] = auc2
}

resultado=B %>% as_data_frame() %>% as_tibble() %>% mutate(optimism=auc_boot-auc_orig)

## Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.

Más o menos se obtiene los mismo. El optimismo (bias) es pequeño.

resultado %>% summarise_all(mean)

## # A tibble: 1 x 3
##   auc_orig auc_boot optimism
##      <dbl>    <dbl>    <dbl>
## 1    0.752    0.782   0.0301

La variabilidad de las estimacines anteriores también es pequeña.

resultado %>% summarise_all(sd)

## # A tibble: 1 x 3
##   auc_orig auc_boot optimism
##      <dbl>    <dbl>    <dbl>
## 1   0.0155   0.0551   0.0565

Más o menos vemos lo mismo. No tenemos problemas de sesgo. Se puede ignorar lo que hemos hecho y comentarlo de palabra.

Ovidiu

Bootstrap