library(ROCR)
library(e1071)
#read the data into a table from the file
cc <- read.csv("a2/UCI_Credit_Card.csv",header = TRUE, sep = ",")
head(cc,5)
#Convert to factorial
cc$default.payment.next.month <- factor(cc$default.payment.next.month,levels=c(0,1), labels=c("No","Yes"))
cc$PAY_0 <- factor(cc$PAY_0 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
cc$PAY_2 <- factor(cc$PAY_2 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
cc$PAY_3 <- factor(cc$PAY_3 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
cc$PAY_4 <- factor(cc$PAY_4 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
cc$PAY_5 <- factor(cc$PAY_5 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
cc$PAY_6 <- factor(cc$PAY_6 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "Delay 1 month", "Delay 2 months", "Delay 3 months", "Delay 4 months", "Delay 5 months", "Delay 6 months", "Delay 7 months", "Delay 8 months", "Delay 9 months", "Delay 10 months", "Delay 11 months"))
train <- as.data.frame(cc[1:5000,])
test <- as.data.frame(cc[5001:5100,])
#execute and display model
nbPay <- naiveBayes(default.payment.next.month ~ PAY_0 + PAY_2 + PAY_3, train)
nbPay
Naive Bayes Classifier for Discrete Predictors
Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)
A-priori probabilities:
Y
No Yes
0.7786 0.2214
Conditional probabilities:
PAY_0
Y Pay duly Delay 1 month Delay 2 months Delay 3 months
No 0.0875931158 0.2167993835 0.5371179039 0.1109684048
Yes 0.0370370370 0.1806684734 0.2746160795 0.2177055104
PAY_0
Y Delay 4 months Delay 5 months Delay 6 months
No 0.0416131518 0.0038530696 0.0005137426
Yes 0.2493224932 0.0207768744 0.0108401084
PAY_0
Y Delay 7 months Delay 8 months Delay 9 months
No 0.0000000000 0.0000000000 0.0000000000
Yes 0.0027100271 0.0009033424 0.0027100271
PAY_0
Y Delay 10 months Delay 11 months
No 0.0015412278 0.0000000000
Yes 0.0027100271 0.0000000000
PAY_2
Y Pay duly Delay 1 month Delay 2 months Delay 3 months
No 0.1235550989 0.2306704341 0.5586950938 0.0005137426
Yes 0.1174345077 0.1725383921 0.3703703704 0.0000000000
PAY_2
Y Delay 4 months Delay 5 months Delay 6 months
No 0.0796301053 0.0043668122 0.0007706139
Yes 0.2953929539 0.0298102981 0.0063233966
PAY_2
Y Delay 7 months Delay 8 months Delay 9 months
No 0.0002568713 0.0000000000 0.0015412278
Yes 0.0009033424 0.0045167118 0.0027100271
PAY_2
Y Delay 10 months Delay 11 months
No 0.0000000000 0.0000000000
Yes 0.0000000000 0.0000000000
PAY_3
Y Pay duly Delay 1 month Delay 2 months Delay 3 months
No 0.1281787824 0.2347803750 0.5440534292 0.0005137426
Yes 0.1255645890 0.1607949413 0.3974706414 0.0000000000
PAY_3
Y Delay 4 months Delay 5 months Delay 6 months
No 0.0863087593 0.0023118418 0.0015412278
Yes 0.2818428184 0.0144534779 0.0054200542
PAY_3
Y Delay 7 months Delay 8 months Delay 9 months
No 0.0005137426 0.0015412278 0.0002568713
Yes 0.0054200542 0.0027100271 0.0063233966
PAY_3
Y Delay 10 months Delay 11 months
No 0.0000000000 0.0000000000
Yes 0.0000000000 0.0000000000
results <- predict(nbPay,test)
results
[1] No No No No No No No No No No No No Yes No No
[16] No No No No Yes No No No No No No No No No No
[31] No Yes No No No No No No No No No No No No No
[46] No No No Yes No No No No No No No Yes No No No
[61] No Yes No No No No No No No No No No No No No
[76] Yes Yes No No Yes Yes No No No Yes No No No No No
[91] No No No No No No No No No Yes
Levels: No Yes
pred <-prediction(as.numeric(results), as.numeric( test$default.payment.next.month), label.ordering = NULL)
perf <- performance(pred, "tpr", "fpr")
plot(perf, lwd=2, xlab="False Positive Rate", ylab="True Positive Rate")
abline(a=0, b=1, col="gray50", lty=3)
#AUC score of the ROC curve
auc <- performance(pred, "auc")
auc <- unlist(slot(auc,"y.values"))
auc
[1] 0.6066667