library(tidyverse)
library(DataExplorer)
library(e1071)
library(caret)
library(caTools)
library(InformationValue)
data <- read_delim("billets.csv", delim = ";")
summary(data)
## is_genuine diagonal height_left height_right
## Mode :logical Min. :171.0 Min. :103.1 Min. :102.8
## FALSE:500 1st Qu.:171.8 1st Qu.:103.8 1st Qu.:103.7
## TRUE :1000 Median :172.0 Median :104.0 Median :103.9
## Mean :172.0 Mean :104.0 Mean :103.9
## 3rd Qu.:172.2 3rd Qu.:104.2 3rd Qu.:104.2
## Max. :173.0 Max. :104.9 Max. :105.0
##
## margin_low margin_up length
## Min. :2.980 Min. :2.270 Min. :109.5
## 1st Qu.:4.015 1st Qu.:2.990 1st Qu.:112.0
## Median :4.310 Median :3.140 Median :113.0
## Mean :4.486 Mean :3.151 Mean :112.7
## 3rd Qu.:4.870 3rd Qu.:3.310 3rd Qu.:113.3
## Max. :6.900 Max. :3.910 Max. :114.4
## NA's :37
glimpse(data)
## Rows: 1,500
## Columns: 7
## $ is_genuine <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRU~
## $ diagonal <dbl> 171.81, 171.46, 172.69, 171.36, 171.73, 172.17, 172.34, 1~
## $ height_left <dbl> 104.86, 103.36, 104.48, 103.91, 104.28, 103.74, 104.18, 1~
## $ height_right <dbl> 104.95, 103.66, 103.50, 103.94, 103.46, 104.08, 103.85, 1~
## $ margin_low <dbl> 4.52, 3.77, 4.40, 3.62, 4.04, 4.42, 4.58, 3.98, 4.00, 4.0~
## $ margin_up <dbl> 2.89, 2.99, 2.94, 3.01, 3.48, 2.95, 3.26, 2.92, 3.25, 3.2~
## $ length <dbl> 112.83, 113.09, 113.16, 113.51, 112.54, 112.81, 112.81, 1~
Au total le jeu de données est composé de 1500 observations et 7 variables, 1000 vrai billets et 500 faux. On a 37 valeurs manquantes, toutes dans la variable margin_low.
plot_histogram(data)
On remarque des distributions normales, mis à part pour margin_low, légèrement aplatie sur la droite, et length inversement aplatie sur la gauche.
plot_boxplot(data, "is_genuine", geom_boxplot_args = list(notch=TRUE))
Les box plots par catégorie montrent que les médianes des deux groupes sont significativement différentes:
length pour les vrais billetsmargin_low et margin_upheight_left et height_rightdiagonalA noter que diagonal ne suit pas Pythagore. On le peux vérifier avec ce code: data %>% mutate(vrai_diag = sqrt(height_left ^2 + length^2)) %>% View()
Pour déterminer les variables plus influentes on peut effectuer une ACP sur les jeu de données. De telle manière on identifiera facilement un modèle de départ pour une régression logistique. avant d’effectuer l’ ACP sur un jeu de données on impute les données manquantes en utilisant l’algorithme PCA itératif régularisé (method=“Regularized”).
library(missMDA)
#estimation du nombre des dimensions nécessaires à l'imputation
estim_ncpPCA(data, quali.sup = 1)
## $ncp
## [1] 1
##
## $criterion
## 0 1 2 3 4 5
## 0.2565542 0.1613912 0.2231048 0.3577891 0.6860599 1.8470063
imputed.data <- imputePCA(data, ncp=1, quali.sup = 1)
comp.data <- imputed.data$completeObs
#vérifions les données imputées
print(setdiff(comp.data, data), n = Inf)
## # A tibble: 37 x 7
## is_genuine diagonal height_left height_right margin_low margin_up length
## <lgl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TRUE 172. 104. 103. 4.30 3.25 113.
## 2 TRUE 172. 104. 104. 4.51 3.14 113.
## 3 TRUE 172. 104. 104. 4.44 3.02 113.
## 4 TRUE 171. 104. 104. 4.52 3.62 113.
## 5 TRUE 172. 104. 104. 4.57 3.02 112.
## 6 TRUE 172. 103. 103. 3.62 2.95 113.
## 7 TRUE 172. 104. 104. 4.29 3.23 113.
## 8 TRUE 172. 104. 104. 4.18 3 113.
## 9 TRUE 173. 104. 104. 4.11 3.12 113.
## 10 TRUE 172. 104. 104. 4.13 3.16 113.
## 11 TRUE 172. 104. 103. 4.19 3.01 113.
## 12 TRUE 172. 104. 104. 3.85 2.71 114.
## 13 TRUE 172. 104. 104. 4.23 2.98 114.
## 14 TRUE 172. 104. 103. 4.28 3.3 113.
## 15 TRUE 172. 104. 104. 3.95 2.7 113.
## 16 TRUE 172. 104. 104. 4.00 2.56 113.
## 17 TRUE 172. 104. 104. 4.39 3.07 113.
## 18 TRUE 172. 104. 104. 4.33 3.09 112.
## 19 TRUE 172. 104. 104. 4.47 3.24 113.
## 20 TRUE 172. 104. 104. 4.22 3.13 113.
## 21 TRUE 172. 104. 104. 3.90 3.01 114.
## 22 TRUE 172. 104. 104. 4.54 2.99 113.
## 23 TRUE 172. 104. 104. 4.27 2.9 114.
## 24 TRUE 171. 104. 104. 4.33 3.07 113.
## 25 TRUE 172. 104. 104. 4.03 2.98 114.
## 26 TRUE 172. 104. 103. 3.68 2.58 114.
## 27 TRUE 172. 104. 104. 4.13 3.02 113.
## 28 TRUE 172. 104. 105. 4.80 3.27 113.
## 29 TRUE 172. 104. 104. 4.25 2.99 113.
## 30 FALSE 172. 104. 104. 4.97 3.21 112.
## 31 FALSE 171. 104. 104. 4.81 3.17 112.
## 32 FALSE 172. 104. 104. 4.71 3.02 111.
## 33 FALSE 172. 104. 104. 4.70 2.93 111.
## 34 FALSE 172. 104. 104. 4.82 3.4 112.
## 35 FALSE 172. 104. 104. 5.42 3.61 110.
## 36 FALSE 173. 104. 104. 5.16 3.56 111.
## 37 FALSE 172. 104. 104. 4.98 3.24 111.
#corrélations
plot_correlation(comp.data)
C’est déjà à noter la forte corrélation des catégories avec la variable length.
#PCA
library(FactoMineR)
res.pca <- PCA(comp.data, quali.sup = 1)
library(factoextra)
fviz_pca_biplot(res.pca, habillage = as.factor(data$is_genuine), addEllipses = TRUE, geom = "point",axes = c(1,2))
Les variables length et margin_low permettent de décrire le premier axe, que caractérise aussi de manière évidente la division entre catégories Vrai/Faux.
fviz_eig(res.pca)
La première dimension explique le 43% de la variabilité du jeu de données.
res.pca$var
## $coord
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## diagonal -0.1365405 0.949548684 -0.25431072 -0.08520125 -0.08808187
## height_left 0.5328355 0.310152360 0.78250764 -0.04070576 0.07681429
## height_right 0.6351016 0.108747554 -0.14611776 0.72781641 0.18371870
## margin_low 0.8178247 -0.072841995 -0.09669963 -0.07080584 -0.42828247
## margin_up 0.7087095 -0.005270776 -0.23831930 -0.37898407 0.53630141
## length -0.8483392 0.048761940 0.13071360 0.14815414 0.23511599
##
## $cor
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## diagonal -0.1365405 0.949548684 -0.25431072 -0.08520125 -0.08808187
## height_left 0.5328355 0.310152360 0.78250764 -0.04070576 0.07681429
## height_right 0.6351016 0.108747554 -0.14611776 0.72781641 0.18371870
## margin_low 0.8178247 -0.072841995 -0.09669963 -0.07080584 -0.42828247
## margin_up 0.7087095 -0.005270776 -0.23831930 -0.37898407 0.53630141
## length -0.8483392 0.048761940 0.13071360 0.14815414 0.23511599
##
## $cos2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## diagonal 0.01864331 9.016427e-01 0.064673944 0.007259252 0.007758416
## height_left 0.28391371 9.619449e-02 0.612318212 0.001656958 0.005900434
## height_right 0.40335407 1.182603e-02 0.021350400 0.529716726 0.033752560
## margin_low 0.66883718 5.305956e-03 0.009350818 0.005013466 0.183425872
## margin_up 0.50226915 2.778108e-05 0.056796091 0.143628925 0.287619202
## length 0.71967936 2.377727e-03 0.017086045 0.021949650 0.055279529
##
## $contrib
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## diagonal 0.7179625 88.624448528 8.274817 1.0235472 1.352262
## height_left 10.9336488 9.455168067 78.344089 0.2336295 1.028423
## height_right 15.5333528 1.162406605 2.731713 74.6895192 5.882943
## margin_low 25.7572307 0.521534132 1.196406 0.7068936 31.970430
## margin_up 19.3426186 0.002730664 7.266872 20.2515322 50.130930
## length 27.7151867 0.233712005 2.186103 3.0948783 9.635011
La variable length est celle qui a plus contribué à la 1ère dimension, et aussi est celle qui est projetée mieux sur le plan.
res.pca$quali.sup
## $coord
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## FALSE 2.034615 -0.08002569 -0.11360915 -0.10421088 -0.14843156
## TRUE -1.017307 0.04001285 0.05680458 0.05210544 0.07421578
##
## $cos2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## FALSE 0.9867172 0.001526466 0.003076485 0.002588537 0.005251468
## TRUE 0.9867172 0.001526466 0.003076485 0.002588537 0.005251468
##
## $v.test
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## FALSE 34.56666 -2.172075 -3.518144 -3.387715 -5.364831
## TRUE -34.56666 2.172075 3.518144 3.387715 5.364831
##
## $dist
## FALSE TRUE
## 2.048264 1.024132
##
## $eta2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## is_genuine 0.7971008 0.003147371 0.008257065 0.00765618 0.0192004
Les catégories vrai/faux sont fortement corrélées à la première dimension avec un eta2 de 0.79 et corrélation de 0.98; elle peuvent en effet résumer toute l’information contenue sur le premier axe.
log.bil)Pour un premier modèle on va utiliser les variables margin_low et length, celle qui ont plus d’influence sur la formation de la première composante principale.
#premier essai de modélisation
log.bil <- glm(is_genuine ~ margin_low + length, data = comp.data, family = "binomial")
summary(log.bil)
##
## Call:
## glm(formula = is_genuine ~ margin_low + length, family = "binomial",
## data = comp.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.7552 -0.0021 0.0133 0.0633 2.1905
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -713.3184 74.5448 -9.569 <2e-16 ***
## margin_low -5.9511 0.7016 -8.483 <2e-16 ***
## length 6.5817 0.6734 9.773 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1909.54 on 1499 degrees of freedom
## Residual deviance: 147.93 on 1497 degrees of freedom
## AIC: 153.93
##
## Number of Fisher Scoring iterations: 9
#fonction step pour trouver un modèle performant
log.bil.aut <- glm(is_genuine ~ ., data = comp.data)
step(log.bil.aut)
## Start: AIC=-740.1
## is_genuine ~ diagonal + height_left + height_right + margin_low +
## margin_up + length
##
## Df Deviance AIC
## <none> 53.052 -740.10
## - diagonal 1 53.551 -728.05
## - height_left 1 54.111 -712.47
## - height_right 1 55.451 -675.76
## - margin_up 1 59.540 -569.05
## - margin_low 1 71.459 -295.33
## - length 1 89.676 45.29
##
## Call: glm(formula = is_genuine ~ diagonal + height_left + height_right +
## margin_low + margin_up + length, data = comp.data)
##
## Coefficients:
## (Intercept) diagonal height_left height_right margin_low
## -12.74321 0.06033 -0.09553 -0.13800 -0.23352
## margin_up length
## -0.33821 0.26117
##
## Degrees of Freedom: 1499 Total (i.e. Null); 1493 Residual
## Null Deviance: 333.3
## Residual Deviance: 53.05 AIC: -740.1
#formula retenue de la fonction step
step.log <- glm(formula = is_genuine ~ height_left + height_right + margin_low + margin_up + length, family = "binomial", data = comp.data)
summary(step.log)
##
## Call:
## glm(formula = is_genuine ~ height_left + height_right + margin_low +
## margin_up + length, family = "binomial", data = comp.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.9755 -0.0011 0.0053 0.0290 2.6704
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -197.6332 157.4394 -1.255 0.2094
## height_left -1.7733 1.1011 -1.610 0.1073
## height_right -2.2015 1.0730 -2.052 0.0402 *
## margin_low -5.7346 0.8773 -6.537 6.29e-11 ***
## margin_up -9.8972 2.0312 -4.873 1.10e-06 ***
## length 5.9465 0.8354 7.118 1.10e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1909.543 on 1499 degrees of freedom
## Residual deviance: 85.313 on 1494 degrees of freedom
## AIC: 97.313
##
## Number of Fisher Scoring iterations: 10
On remarque les prédicteurs significatifs:
margin_lowmargin_uplengthlog.bil# Performances sur le jeu de données entier
pred.bil <- predict(log.bil, type = "response")
table(pred.bil>0.5, comp.data$is_genuine)
##
## FALSE TRUE
## FALSE 487 8
## TRUE 13 992
accuracy<- (487+992)/ (487+992+8+13)
print(accuracy)
## [1] 0.986
log.bil#on rajoute margin_up au modèle de base
log.bil <- update(log.bil, ~. +margin_up)
summary(log.bil)
##
## Call:
## glm(formula = is_genuine ~ margin_low + length + margin_up, family = "binomial",
## data = comp.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.2857 -0.0010 0.0054 0.0314 2.5203
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -648.5140 87.0116 -7.453 9.11e-14 ***
## margin_low -6.3258 0.8722 -7.252 4.09e-13 ***
## length 6.3049 0.8065 7.818 5.38e-15 ***
## margin_up -9.9591 1.8710 -5.323 1.02e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1909.543 on 1499 degrees of freedom
## Residual deviance: 95.727 on 1496 degrees of freedom
## AIC: 103.73
##
## Number of Fisher Scoring iterations: 10
log.bil avec toutes les variables significatives#prédictions du modèle avec toutes les variables significatives
pred.bil <- predict(log.bil, type = "response")
caret::confusionMatrix(table(pred.bil>0.5, comp.data$is_genuine))
## Confusion Matrix and Statistics
##
##
## FALSE TRUE
## FALSE 492 4
## TRUE 8 996
##
## Accuracy : 0.992
## 95% CI : (0.9861, 0.9959)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.982
##
## Mcnemar's Test P-Value : 0.3865
##
## Sensitivity : 0.9840
## Specificity : 0.9960
## Pos Pred Value : 0.9919
## Neg Pred Value : 0.9920
## Prevalence : 0.3333
## Detection Rate : 0.3280
## Detection Prevalence : 0.3307
## Balanced Accuracy : 0.9900
##
## 'Positive' Class : FALSE
##
step#prédictions du modèle trouvé avec la fonction step
pred.step <-predict(step.log, type="response")
caret::confusionMatrix(table(pred.step>0.5, comp.data$is_genuine))
## Confusion Matrix and Statistics
##
##
## FALSE TRUE
## FALSE 491 4
## TRUE 9 996
##
## Accuracy : 0.9913
## 95% CI : (0.9852, 0.9954)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9805
##
## Mcnemar's Test P-Value : 0.2673
##
## Sensitivity : 0.9820
## Specificity : 0.9960
## Pos Pred Value : 0.9919
## Neg Pred Value : 0.9910
## Prevalence : 0.3333
## Detection Rate : 0.3273
## Detection Prevalence : 0.3300
## Balanced Accuracy : 0.9890
##
## 'Positive' Class : FALSE
##
On remarque des performances à peu près égales sur le jeu complet. On teste maintenant avec la division en train/test:
#création des partitions train/test
set.seed(5)
split = sample.split(comp.data$is_genuine)
train = subset(comp.data, split==TRUE)
test = subset(comp.data, split==FALSE)
# modèle log.bil entraîné sur le jeu de train
log.bil <- glm(is_genuine ~ margin_low + length + margin_up, family = "binomial",
data = train)
#performance de log.bil sur le test
pred.bil <- predict(log.bil, type = "response", newdata = test)
caret::confusionMatrix(table(pred.bil>0.5, test$is_genuine))
## Confusion Matrix and Statistics
##
##
## FALSE TRUE
## FALSE 163 1
## TRUE 4 332
##
## Accuracy : 0.99
## 95% CI : (0.9768, 0.9967)
## No Information Rate : 0.666
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9774
##
## Mcnemar's Test P-Value : 0.3711
##
## Sensitivity : 0.9760
## Specificity : 0.9970
## Pos Pred Value : 0.9939
## Neg Pred Value : 0.9881
## Prevalence : 0.3340
## Detection Rate : 0.3260
## Detection Prevalence : 0.3280
## Balanced Accuracy : 0.9865
##
## 'Positive' Class : FALSE
##
# modèle "step" entraîné sur le jeu de train
step.bil <- glm(is_genuine ~ margin_low + length + margin_up, family = "binomial", data = train)
#performance de step sur le jeu de test
pred.bil.step <- predict(step.bil, type = "response", newdata = test)
caret::confusionMatrix(table(pred.bil.step >0.5, test$is_genuine))
## Confusion Matrix and Statistics
##
##
## FALSE TRUE
## FALSE 163 1
## TRUE 4 332
##
## Accuracy : 0.99
## 95% CI : (0.9768, 0.9967)
## No Information Rate : 0.666
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9774
##
## Mcnemar's Test P-Value : 0.3711
##
## Sensitivity : 0.9760
## Specificity : 0.9970
## Pos Pred Value : 0.9939
## Neg Pred Value : 0.9881
## Prevalence : 0.3340
## Detection Rate : 0.3260
## Detection Prevalence : 0.3280
## Balanced Accuracy : 0.9865
##
## 'Positive' Class : FALSE
##
Dans ce cas on a des performances égales ou légèrement supérieures du modèle avec les variables significatives (log.bil). On retiendra donc log.bil (parcimonie).
#ROCR predbil
library(ROCR)
ROCRpred = prediction(pred.bil, test$is_genuine)
perfROCR = performance(ROCRpred, "auc")
perfROCR@y.values
## [[1]]
## [1] 0.9994426
perfROCR = performance(ROCRpred, "tpr", "fpr")
plot(perfROCR, colorize=TRUE)
Si on veut maximiser le recall (TP rate), le ratio de faux billets reconnu, on pourra choisir un cutoff élevé (p(T) > 0.8).
#log régression avec caret, méthode de cross validation
comp.data <- comp.data %>%
mutate(is_genuine = factor(is_genuine))
#avec la fonction `createDataPartition`, l'échantillonnage aléatoire est effectué dans les niveaux de y lorsque y est un facteur, afin de tenter d'équilibrer les distributions #des classes au sein de l'ensemble de données réparties.
set.seed(5)
train_indexes = createDataPartition(comp.data$is_genuine, p = 0.70, list = FALSE)
train = comp.data[train_indexes, ]
test = comp.data[-train_indexes, ]
##train model, cross validation on train set
log_bil_mod = train(
form = is_genuine ~ margin_low + length + margin_up,
data = train,
trControl = trainControl(method = "cv", number = 10),
method = "glm",
family = "binomial"
)
print(log_bil_mod$results)
## parameter Accuracy Kappa AccuracySD KappaSD
## 1 none 0.9885714 0.9740994 0.008751777 0.01990941
print(log_bil_mod$finalModel)
##
## Call: NULL
##
## Coefficients:
## (Intercept) margin_low length margin_up
## -621.969 -5.622 6.044 -10.172
##
## Degrees of Freedom: 1049 Total (i.e. Null); 1046 Residual
## Null Deviance: 1337
## Residual Deviance: 77.21 AIC: 85.21
pr.cross <- predict(log_bil_mod, newdata=test)
caret::confusionMatrix(table(pr.cross, test$is_genuine))
## Confusion Matrix and Statistics
##
##
## pr.cross FALSE TRUE
## FALSE 150 2
## TRUE 0 298
##
## Accuracy : 0.9956
## 95% CI : (0.984, 0.9995)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.99
##
## Mcnemar's Test P-Value : 0.4795
##
## Sensitivity : 1.0000
## Specificity : 0.9933
## Pos Pred Value : 0.9868
## Neg Pred Value : 1.0000
## Prevalence : 0.3333
## Detection Rate : 0.3333
## Detection Prevalence : 0.3378
## Balanced Accuracy : 0.9967
##
## 'Positive' Class : FALSE
##
#importance des variables
varImp(log_bil_mod)
## glm variable importance
##
## Overall
## length 100.00
## margin_low 69.52
## margin_up 0.00
pred <- ifelse(pr.cross == "TRUE", 1, 0)
response <- ifelse(test$is_genuine=="TRUE", 1, 0)
#ROCR Curve
plotROC(response, pred)
set.seed(5)
km.comp <- kmeans(scale(comp.data[-1]), nstart = 20, centers = 2)
fviz_cluster(km.comp, data=comp.data[-1], geom = "point")
aggregate(comp.data[-1], by=list(km.comp$cluster), FUN=mean)
## Group.1 diagonal height_left height_right margin_low margin_up length
## 1 1 171.9878 103.9452 103.8057 4.119845 3.052173 113.1972
## 2 2 171.8993 104.1998 104.1516 5.217030 3.351871 111.6316
table(km.comp$cluster, comp.data$is_genuine)
##
## FALSE TRUE
## 1 13 990
## 2 487 10
Un kmeans avec 2 centres représente assez fidèlement la division en deux catégories du jeu de données.
#predicting model kmeans
source("pr_km.R")
set.seed(20)
km.train = kmeans(scale(train[-1]), centers=2, nstart = 30)
preds.km <- predict(km.train, newdata = scale(test[-1]))
table(preds.km, test$is_genuine)
##
## preds.km FALSE TRUE
## 1 1 296
## 2 149 4
accuracy <- (149 + 296) / nrow(test)
log_bil_mod avec km_trainpaste0("accuracy = ", accuracy )
## [1] "accuracy = 0.988888888888889"
caret::confusionMatrix(table(pr.cross, test$is_genuine))
## Confusion Matrix and Statistics
##
##
## pr.cross FALSE TRUE
## FALSE 150 2
## TRUE 0 298
##
## Accuracy : 0.9956
## 95% CI : (0.984, 0.9995)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.99
##
## Mcnemar's Test P-Value : 0.4795
##
## Sensitivity : 1.0000
## Specificity : 0.9933
## Pos Pred Value : 0.9868
## Neg Pred Value : 1.0000
## Prevalence : 0.3333
## Detection Rate : 0.3333
## Detection Prevalence : 0.3378
## Balanced Accuracy : 0.9967
##
## 'Positive' Class : FALSE
##
La régression logistique est le meilleur modèle.