Curvas ROC

Las librerías

library(ROCR)

Los datos

datos1 <- read.csv("../datos/roc-example-1.csv")
datos2 <- read.csv("../datos/roc-example-2.csv")


head(datos1)
##        prob class
## 1 0.9917340     1
## 2 0.9768288     1
## 3 0.9763148     1
## 4 0.9601505     1
## 5 0.9351574     1
## 6 0.9335989     1
tail(datos1)
##           prob class
## 95  0.10426897     0
## 96  0.07292866     0
## 97  0.07154785     0
## 98  0.04703280     0
## 99  0.04652589     0
## 100 0.00112760     0
head(datos2)
##        prob class
## 1 0.9917340 buyer
## 2 0.9768288 buyer
## 3 0.9763148 buyer
## 4 0.9601505 buyer
## 5 0.9351574 buyer
## 6 0.9335989 buyer
tail(datos2)
##           prob     class
## 95  0.10426897 non-buyer
## 96  0.07292866 non-buyer
## 97  0.07154785 non-buyer
## 98  0.04703280 non-buyer
## 99  0.04652589 non-buyer
## 100 0.00112760 non-buyer

Explorando los datos1

str(datos1)
## 'data.frame':    100 obs. of  2 variables:
##  $ prob : num  0.992 0.977 0.976 0.96 0.935 ...
##  $ class: int  1 1 1 1 1 1 1 1 1 0 ...
summary(datos1)
##       prob              class     
##  Min.   :0.001128   Min.   :0.00  
##  1st Qu.:0.317687   1st Qu.:0.00  
##  Median :0.533578   Median :1.00  
##  Mean   :0.529236   Mean   :0.54  
##  3rd Qu.:0.781416   3rd Qu.:1.00  
##  Max.   :0.991734   Max.   :1.00

Explorando los datos2

str(datos2)
## 'data.frame':    100 obs. of  2 variables:
##  $ prob : num  0.992 0.977 0.976 0.96 0.935 ...
##  $ class: Factor w/ 2 levels "buyer","non-buyer": 1 1 1 1 1 1 1 1 1 2 ...
summary(datos2)
##       prob                class   
##  Min.   :0.001128   buyer    :54  
##  1st Qu.:0.317687   non-buyer:46  
##  Median :0.533578                 
##  Mean   :0.529236                 
##  3rd Qu.:0.781416                 
##  Max.   :0.991734

Generar el objeto prediccion

predict1 <- prediction(datos1$prob, datos1$class )
predict1
## An object of class "prediction"
## Slot "predictions":
## [[1]]
##   [1] 0.99173404 0.97682879 0.97631479 0.96015054 0.93515738 0.93359886
##   [7] 0.92531568 0.92486715 0.90804187 0.90171203 0.90149578 0.89417377
##  [13] 0.88941282 0.88913969 0.87396706 0.87391514 0.87059886 0.85775832
##  [19] 0.82275614 0.81576505 0.81101760 0.80412822 0.80322212 0.79257332
##  [25] 0.79233624 0.77777593 0.77146537 0.77072779 0.73860977 0.73320167
##  [31] 0.69094004 0.68907019 0.68784751 0.67534324 0.67059922 0.66701779
##  [37] 0.65995183 0.63541281 0.62834747 0.62423536 0.62171950 0.61417473
##  [43] 0.60600444 0.60215943 0.57970328 0.57705735 0.57633523 0.57099714
##  [49] 0.56004202 0.54787125 0.51928556 0.51327811 0.50251169 0.49815058
##  [55] 0.49616956 0.47840739 0.47754679 0.46323419 0.45227354 0.44950615
##  [61] 0.43443516 0.43274841 0.42845777 0.40701592 0.40272660 0.40248242
##  [67] 0.40140505 0.37646732 0.36254324 0.35547851 0.34872470 0.33814262
##  [73] 0.33528819 0.33041954 0.31878806 0.31438300 0.31164060 0.30761685
##  [79] 0.23584000 0.23310990 0.22243908 0.20977357 0.19511299 0.18730064
##  [85] 0.18210618 0.17060700 0.15536249 0.14907332 0.14288827 0.13868194
##  [91] 0.13090790 0.12741177 0.12676979 0.11107864 0.10426897 0.07292866
##  [97] 0.07154785 0.04703280 0.04652589 0.00112760
## 
## 
## Slot "labels":
## [[1]]
##   [1] 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1
##  [38] 1 1 0 1 1 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0
##  [75] 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## Levels: 0 < 1
## 
## 
## Slot "cutoffs":
## [[1]]
##   [1]        Inf 0.99173404 0.97682879 0.97631479 0.96015054 0.93515738
##   [7] 0.93359886 0.92531568 0.92486715 0.90804187 0.90171203 0.90149578
##  [13] 0.89417377 0.88941282 0.88913969 0.87396706 0.87391514 0.87059886
##  [19] 0.85775832 0.82275614 0.81576505 0.81101760 0.80412822 0.80322212
##  [25] 0.79257332 0.79233624 0.77777593 0.77146537 0.77072779 0.73860977
##  [31] 0.73320167 0.69094004 0.68907019 0.68784751 0.67534324 0.67059922
##  [37] 0.66701779 0.65995183 0.63541281 0.62834747 0.62423536 0.62171950
##  [43] 0.61417473 0.60600444 0.60215943 0.57970328 0.57705735 0.57633523
##  [49] 0.57099714 0.56004202 0.54787125 0.51928556 0.51327811 0.50251169
##  [55] 0.49815058 0.49616956 0.47840739 0.47754679 0.46323419 0.45227354
##  [61] 0.44950615 0.43443516 0.43274841 0.42845777 0.40701592 0.40272660
##  [67] 0.40248242 0.40140505 0.37646732 0.36254324 0.35547851 0.34872470
##  [73] 0.33814262 0.33528819 0.33041954 0.31878806 0.31438300 0.31164060
##  [79] 0.30761685 0.23584000 0.23310990 0.22243908 0.20977357 0.19511299
##  [85] 0.18730064 0.18210618 0.17060700 0.15536249 0.14907332 0.14288827
##  [91] 0.13868194 0.13090790 0.12741177 0.12676979 0.11107864 0.10426897
##  [97] 0.07292866 0.07154785 0.04703280 0.04652589 0.00112760
## 
## 
## Slot "fp":
## [[1]]
##   [1]  0  0  0  0  0  0  0  0  0  0  1  1  1  1  2  2  2  2  3  3  3  3  3  3  3
##  [26]  4  4  4  4  4  4  4  4  5  5  5  5  5  5  5  6  6  6  6  7  8  8  9  9  9
##  [51] 10 10 10 10 10 10 11 12 13 13 14 15 16 17 17 17 18 18 19 20 21 22 23 24 25
##  [76] 26 27 28 28 29 29 30 31 31 31 32 33 34 34 35 36 37 38 39 40 41 42 43 44 45
## [101] 46
## 
## 
## Slot "tp":
## [[1]]
##   [1]  0  1  2  3  4  5  6  7  8  9  9 10 11 12 12 13 14 15 15 16 17 18 19 20 21
##  [26] 21 22 23 24 25 26 27 28 28 29 30 31 32 33 34 34 35 36 37 37 37 38 38 39 40
##  [51] 40 41 42 43 44 45 45 45 45 46 46 46 46 46 47 48 48 49 49 49 49 49 49 49 49
##  [76] 49 49 49 50 50 51 51 51 52 53 53 53 53 54 54 54 54 54 54 54 54 54 54 54 54
## [101] 54
## 
## 
## Slot "tn":
## [[1]]
##   [1] 46 46 46 46 46 46 46 46 46 46 45 45 45 45 44 44 44 44 43 43 43 43 43 43 43
##  [26] 42 42 42 42 42 42 42 42 41 41 41 41 41 41 41 40 40 40 40 39 38 38 37 37 37
##  [51] 36 36 36 36 36 36 35 34 33 33 32 31 30 29 29 29 28 28 27 26 25 24 23 22 21
##  [76] 20 19 18 18 17 17 16 15 15 15 14 13 12 12 11 10  9  8  7  6  5  4  3  2  1
## [101]  0
## 
## 
## Slot "fn":
## [[1]]
##   [1] 54 53 52 51 50 49 48 47 46 45 45 44 43 42 42 41 40 39 39 38 37 36 35 34 33
##  [26] 33 32 31 30 29 28 27 26 26 25 24 23 22 21 20 20 19 18 17 17 17 16 16 15 14
##  [51] 14 13 12 11 10  9  9  9  9  8  8  8  8  8  7  6  6  5  5  5  5  5  5  5  5
##  [76]  5  5  5  4  4  3  3  3  2  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0
## [101]  0
## 
## 
## Slot "n.pos":
## [[1]]
## [1] 54
## 
## 
## Slot "n.neg":
## [[1]]
## [1] 46
## 
## 
## Slot "n.pos.pred":
## [[1]]
##   [1]   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
##  [19]  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
##  [37]  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
##  [55]  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
##  [73]  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
##  [91]  90  91  92  93  94  95  96  97  98  99 100
## 
## 
## Slot "n.neg.pred":
## [[1]]
##   [1] 100  99  98  97  96  95  94  93  92  91  90  89  88  87  86  85  84  83
##  [19]  82  81  80  79  78  77  76  75  74  73  72  71  70  69  68  67  66  65
##  [37]  64  63  62  61  60  59  58  57  56  55  54  53  52  51  50  49  48  47
##  [55]  46  45  44  43  42  41  40  39  38  37  36  35  34  33  32  31  30  29
##  [73]  28  27  26  25  24  23  22  21  20  19  18  17  16  15  14  13  12  11
##  [91]  10   9   8   7   6   5   4   3   2   1   0

Elaborar el performance (rendimiejto) de la predicción

performance1 <- performance(predict1, "tpr", "fpr")

performance1
## An object of class "performance"
## Slot "x.name":
## [1] "False positive rate"
## 
## Slot "y.name":
## [1] "True positive rate"
## 
## Slot "alpha.name":
## [1] "Cutoff"
## 
## Slot "x.values":
## [[1]]
##   [1] 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##   [7] 0.00000000 0.00000000 0.00000000 0.00000000 0.02173913 0.02173913
##  [13] 0.02173913 0.02173913 0.04347826 0.04347826 0.04347826 0.04347826
##  [19] 0.06521739 0.06521739 0.06521739 0.06521739 0.06521739 0.06521739
##  [25] 0.06521739 0.08695652 0.08695652 0.08695652 0.08695652 0.08695652
##  [31] 0.08695652 0.08695652 0.08695652 0.10869565 0.10869565 0.10869565
##  [37] 0.10869565 0.10869565 0.10869565 0.10869565 0.13043478 0.13043478
##  [43] 0.13043478 0.13043478 0.15217391 0.17391304 0.17391304 0.19565217
##  [49] 0.19565217 0.19565217 0.21739130 0.21739130 0.21739130 0.21739130
##  [55] 0.21739130 0.21739130 0.23913043 0.26086957 0.28260870 0.28260870
##  [61] 0.30434783 0.32608696 0.34782609 0.36956522 0.36956522 0.36956522
##  [67] 0.39130435 0.39130435 0.41304348 0.43478261 0.45652174 0.47826087
##  [73] 0.50000000 0.52173913 0.54347826 0.56521739 0.58695652 0.60869565
##  [79] 0.60869565 0.63043478 0.63043478 0.65217391 0.67391304 0.67391304
##  [85] 0.67391304 0.69565217 0.71739130 0.73913043 0.73913043 0.76086957
##  [91] 0.78260870 0.80434783 0.82608696 0.84782609 0.86956522 0.89130435
##  [97] 0.91304348 0.93478261 0.95652174 0.97826087 1.00000000
## 
## 
## Slot "y.values":
## [[1]]
##   [1] 0.00000000 0.01851852 0.03703704 0.05555556 0.07407407 0.09259259
##   [7] 0.11111111 0.12962963 0.14814815 0.16666667 0.16666667 0.18518519
##  [13] 0.20370370 0.22222222 0.22222222 0.24074074 0.25925926 0.27777778
##  [19] 0.27777778 0.29629630 0.31481481 0.33333333 0.35185185 0.37037037
##  [25] 0.38888889 0.38888889 0.40740741 0.42592593 0.44444444 0.46296296
##  [31] 0.48148148 0.50000000 0.51851852 0.51851852 0.53703704 0.55555556
##  [37] 0.57407407 0.59259259 0.61111111 0.62962963 0.62962963 0.64814815
##  [43] 0.66666667 0.68518519 0.68518519 0.68518519 0.70370370 0.70370370
##  [49] 0.72222222 0.74074074 0.74074074 0.75925926 0.77777778 0.79629630
##  [55] 0.81481481 0.83333333 0.83333333 0.83333333 0.83333333 0.85185185
##  [61] 0.85185185 0.85185185 0.85185185 0.85185185 0.87037037 0.88888889
##  [67] 0.88888889 0.90740741 0.90740741 0.90740741 0.90740741 0.90740741
##  [73] 0.90740741 0.90740741 0.90740741 0.90740741 0.90740741 0.90740741
##  [79] 0.92592593 0.92592593 0.94444444 0.94444444 0.94444444 0.96296296
##  [85] 0.98148148 0.98148148 0.98148148 0.98148148 1.00000000 1.00000000
##  [91] 1.00000000 1.00000000 1.00000000 1.00000000 1.00000000 1.00000000
##  [97] 1.00000000 1.00000000 1.00000000 1.00000000 1.00000000
## 
## 
## Slot "alpha.values":
## [[1]]
##   [1]        Inf 0.99173404 0.97682879 0.97631479 0.96015054 0.93515738
##   [7] 0.93359886 0.92531568 0.92486715 0.90804187 0.90171203 0.90149578
##  [13] 0.89417377 0.88941282 0.88913969 0.87396706 0.87391514 0.87059886
##  [19] 0.85775832 0.82275614 0.81576505 0.81101760 0.80412822 0.80322212
##  [25] 0.79257332 0.79233624 0.77777593 0.77146537 0.77072779 0.73860977
##  [31] 0.73320167 0.69094004 0.68907019 0.68784751 0.67534324 0.67059922
##  [37] 0.66701779 0.65995183 0.63541281 0.62834747 0.62423536 0.62171950
##  [43] 0.61417473 0.60600444 0.60215943 0.57970328 0.57705735 0.57633523
##  [49] 0.57099714 0.56004202 0.54787125 0.51928556 0.51327811 0.50251169
##  [55] 0.49815058 0.49616956 0.47840739 0.47754679 0.46323419 0.45227354
##  [61] 0.44950615 0.43443516 0.43274841 0.42845777 0.40701592 0.40272660
##  [67] 0.40248242 0.40140505 0.37646732 0.36254324 0.35547851 0.34872470
##  [73] 0.33814262 0.33528819 0.33041954 0.31878806 0.31438300 0.31164060
##  [79] 0.30761685 0.23584000 0.23310990 0.22243908 0.20977357 0.19511299
##  [85] 0.18730064 0.18210618 0.17060700 0.15536249 0.14907332 0.14288827
##  [91] 0.13868194 0.13090790 0.12741177 0.12676979 0.11107864 0.10426897
##  [97] 0.07292866 0.07154785 0.04703280 0.04652589 0.00112760

Representando la performance mediante un plot()

plot(performance1)
lines(par()$usr[1:2], par()$usr[3:4])

Cuándo hacer corte

prob.cut1 <- data.frame(cut = performance1@alpha.values[[1]], fpr=performance1@x.values[[1]],
tpr=performance1@y.values[[1]]) # Estructura inerna de datos de R

head(prob.cut1)
##         cut fpr        tpr
## 1       Inf   0 0.00000000
## 2 0.9917340   0 0.01851852
## 3 0.9768288   0 0.03703704
## 4 0.9763148   0 0.05555556
## 5 0.9601505   0 0.07407407
## 6 0.9351574   0 0.09259259
tail(prob.cut1)
##            cut       fpr tpr
## 96  0.10426897 0.8913043   1
## 97  0.07292866 0.9130435   1
## 98  0.07154785 0.9347826   1
## 99  0.04703280 0.9565217   1
## 100 0.04652589 0.9782609   1
## 101 0.00112760 1.0000000   1

Ecnontar un balance de corte

prob.cut1 [prob.cut1$tpr >= 0.8,]
##            cut       fpr       tpr
## 55  0.49815058 0.2173913 0.8148148
## 56  0.49616956 0.2173913 0.8333333
## 57  0.47840739 0.2391304 0.8333333
## 58  0.47754679 0.2608696 0.8333333
## 59  0.46323419 0.2826087 0.8333333
## 60  0.45227354 0.2826087 0.8518519
## 61  0.44950615 0.3043478 0.8518519
## 62  0.43443516 0.3260870 0.8518519
## 63  0.43274841 0.3478261 0.8518519
## 64  0.42845777 0.3695652 0.8518519
## 65  0.40701592 0.3695652 0.8703704
## 66  0.40272660 0.3695652 0.8888889
## 67  0.40248242 0.3913043 0.8888889
## 68  0.40140505 0.3913043 0.9074074
## 69  0.37646732 0.4130435 0.9074074
## 70  0.36254324 0.4347826 0.9074074
## 71  0.35547851 0.4565217 0.9074074
## 72  0.34872470 0.4782609 0.9074074
## 73  0.33814262 0.5000000 0.9074074
## 74  0.33528819 0.5217391 0.9074074
## 75  0.33041954 0.5434783 0.9074074
## 76  0.31878806 0.5652174 0.9074074
## 77  0.31438300 0.5869565 0.9074074
## 78  0.31164060 0.6086957 0.9074074
## 79  0.30761685 0.6086957 0.9259259
## 80  0.23584000 0.6304348 0.9259259
## 81  0.23310990 0.6304348 0.9444444
## 82  0.22243908 0.6521739 0.9444444
## 83  0.20977357 0.6739130 0.9444444
## 84  0.19511299 0.6739130 0.9629630
## 85  0.18730064 0.6739130 0.9814815
## 86  0.18210618 0.6956522 0.9814815
## 87  0.17060700 0.7173913 0.9814815
## 88  0.15536249 0.7391304 0.9814815
## 89  0.14907332 0.7391304 1.0000000
## 90  0.14288827 0.7608696 1.0000000
## 91  0.13868194 0.7826087 1.0000000
## 92  0.13090790 0.8043478 1.0000000
## 93  0.12741177 0.8260870 1.0000000
## 94  0.12676979 0.8478261 1.0000000
## 95  0.11107864 0.8695652 1.0000000
## 96  0.10426897 0.8913043 1.0000000
## 97  0.07292866 0.9130435 1.0000000
## 98  0.07154785 0.9347826 1.0000000
## 99  0.04703280 0.9565217 1.0000000
## 100 0.04652589 0.9782609 1.0000000
## 101 0.00112760 1.0000000 1.0000000

Interpretración