Victor Enchautegui
11/1/2020
Load libraries
library(ggplot2)
library(ROCR)
library(e1071)
Training set
cc <- read.csv("data/UCI_Credit_Card.csv")
Transform data
cc$default.payment.next.month <- factor(cc$default.payment.next.month,levels=c(0,1), labels=c("No","Yes"))
cc$AGE <- cut(cc$AGE,breaks=c(0,20,30,40,50,60,70,80,100), labels=c("<20s","20s", "30s", "40s", "50s", "60s", "70s", "80s+"))
cc$AGE <- factor(cc$AGE)
cc$EDUCATION <- factor(cc$EDUCATION,levels=c(1,2,3,4,5,6), labels=c("Grad School", "University", "High School", "Others", "Unknown", "Unknown"))
cc$PAY_0 <- factor(cc$PAY_0 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "1mo", "2mos", "3mos", "4mos", "5mos", "6mos", "7mos", "8mos", "9mos", "10mos", "11mos"))
cc$PAY_2 <- factor(cc$PAY_2 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "1mo", "2mos", "3mos", "4mos", "5mos", "6mos", "7mos", "8mos", "9mos", "10mos", "11mos"))
cc$PAY_3 <- factor(cc$PAY_3 , levels=c(-2,-1,0,1,2,3,4,5,6,7,8,9), labels=c("Pay duly", "1mo", "2mos", "3mos", "4mos", "5mos", "6mos", "7mos", "8mos", "9mos", "10mos", "11mos"))
Training set
train <- cc[sample(nrow(cc), 100), ]
Test set
test <- cc[sample(nrow(cc), 100), ]
Build the Naïve Bayes classifier
nbPay <- naiveBayes(default.payment.next.month ~ PAY_0 + PAY_2 + PAY_3, train)
Perform on the testing set
nb_prediction <- predict(nbPay,
# remove column "default payment next month"
test[,-ncol(test)],
type='raw')
score <- nb_prediction[, c("Yes")]
actual_class <- train$default.payment.next.month == 'Yes'
pred <- prediction(score, actual_class)
perf <- performance(pred, "tpr", "fpr")
plot(perf, lwd=2, xlab="False Positive Rate (FPR)",
ylab="True Positive Rate (TPR)")
abline(a=0, b=1, col="gray50", lty=3)

auc <- performance(pred, "auc")
auc <- unlist(slot(auc, "y.values"))
auc
[1] 0.4528898
Discussion
When AUC is approximately 0.5, model has no discrimination capacity to distinguish between positive class and negative class.
So in my training set the AUC is .45, which indicates a mistake in the way I picked my classifier targets or I might have a bad training set. And being my AUC is close to .5, this suggests no discrimination, so we might as well flip a coin.
LS0tDQp0aXRsZTogIldlZWsgNiBFeGVyY2lzZSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQo8aDM+PGI+VmljdG9yIEVuY2hhdXRlZ3VpPC9iPjwvaDM+DQo8aDQ+MTEvMS8yMDIwPC9oND4NCjxicj4NCg0KPGg0PjxiPkxvYWQgbGlicmFyaWVzPC9iPjwvaDQ+DQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoUk9DUikgDQpsaWJyYXJ5KGUxMDcxKQ0KYGBgDQo8YnI+DQo8aDQ+PGI+VHJhaW5pbmcgc2V0PC9iPjwvaDQ+DQpgYGBge3J9DQpjYyA8LSByZWFkLmNzdigiZGF0YS9VQ0lfQ3JlZGl0X0NhcmQuY3N2IikNCmBgYA0KPGJyPg0KPGg0PjxiPlRyYW5zZm9ybSBkYXRhPC9iPjwvaDQ+DQpgYGB7cn0NCmNjJGRlZmF1bHQucGF5bWVudC5uZXh0Lm1vbnRoIDwtIGZhY3RvcihjYyRkZWZhdWx0LnBheW1lbnQubmV4dC5tb250aCxsZXZlbHM9YygwLDEpLCBsYWJlbHM9YygiTm8iLCJZZXMiKSkNCg0KY2MkQUdFIDwtIGN1dChjYyRBR0UsYnJlYWtzPWMoMCwyMCwzMCw0MCw1MCw2MCw3MCw4MCwxMDApLCBsYWJlbHM9YygiPDIwcyIsIjIwcyIsICIzMHMiLCAiNDBzIiwgIjUwcyIsICI2MHMiLCAiNzBzIiwgIjgwcysiKSkgDQoNCmNjJEFHRSA8LSBmYWN0b3IoY2MkQUdFKSANCg0KY2MkRURVQ0FUSU9OIDwtIGZhY3RvcihjYyRFRFVDQVRJT04sbGV2ZWxzPWMoMSwyLDMsNCw1LDYpLCBsYWJlbHM9YygiR3JhZCBTY2hvb2wiLCAiVW5pdmVyc2l0eSIsICJIaWdoIFNjaG9vbCIsICJPdGhlcnMiLCAiVW5rbm93biIsICJVbmtub3duIikpDQoNCmNjJFBBWV8wICA8LSBmYWN0b3IoY2MkUEFZXzAgLCBsZXZlbHM9YygtMiwtMSwwLDEsMiwzLDQsNSw2LDcsOCw5KSwgbGFiZWxzPWMoIlBheSBkdWx5IiwgIjFtbyIsICIybW9zIiwgIjNtb3MiLCAiNG1vcyIsICAiNW1vcyIsICI2bW9zIiwgIjdtb3MiLCAgIjhtb3MiLCAiOW1vcyIsICIxMG1vcyIsICIxMW1vcyIpKQ0KDQpjYyRQQVlfMiAgPC0gZmFjdG9yKGNjJFBBWV8yICwgbGV2ZWxzPWMoLTIsLTEsMCwxLDIsMyw0LDUsNiw3LDgsOSksIGxhYmVscz1jKCJQYXkgZHVseSIsICIxbW8iLCAiMm1vcyIsICIzbW9zIiwgIjRtb3MiLCAgIjVtb3MiLCAiNm1vcyIsICI3bW9zIiwgICI4bW9zIiwgIjltb3MiLCAiMTBtb3MiLCAiMTFtb3MiKSkNCg0KY2MkUEFZXzMgIDwtIGZhY3RvcihjYyRQQVlfMyAsIGxldmVscz1jKC0yLC0xLDAsMSwyLDMsNCw1LDYsNyw4LDkpLCBsYWJlbHM9YygiUGF5IGR1bHkiLCAiMW1vIiwgIjJtb3MiLCAiM21vcyIsICI0bW9zIiwgICI1bW9zIiwgIjZtb3MiLCAiN21vcyIsICAiOG1vcyIsICI5bW9zIiwgIjEwbW9zIiwgIjExbW9zIikpDQpgYGANCjxicj4NCjxoND48Yj5UcmFpbmluZyBzZXQ8L2I+PC9oND4NCmBgYHtyfQ0KdHJhaW4gPC0gY2Nbc2FtcGxlKG5yb3coY2MpLCAxMDApLCBdIA0KYGBgDQo8YnI+DQo8aDQ+PGI+VGVzdCBzZXQ8L2I+PC9oND4NCmBgYHtyfQ0KdGVzdCA8LSBjY1tzYW1wbGUobnJvdyhjYyksIDEwMCksIF0NCmBgYA0KPGJyPg0KPGg0PjxiPkJ1aWxkIHRoZSBOYcOvdmUgQmF5ZXMgY2xhc3NpZmllcjwvYj48L2g0Pg0KYGBge3J9DQpuYlBheSA8LSBuYWl2ZUJheWVzKGRlZmF1bHQucGF5bWVudC5uZXh0Lm1vbnRoIH4gUEFZXzAgKyBQQVlfMiArIFBBWV8zLCB0cmFpbikNCmBgYA0KPGJyPg0KPGg0PjxiPlBlcmZvcm0gb24gdGhlIHRlc3Rpbmcgc2V0PC9iPjwvaDQ+DQpgYGB7cn0NCm5iX3ByZWRpY3Rpb24gPC0gcHJlZGljdChuYlBheSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSBjb2x1bW4gImRlZmF1bHQgcGF5bWVudCBuZXh0IG1vbnRoIg0KICAgICAgICAgICAgICAgICAgICAgICAgIHRlc3RbLC1uY29sKHRlc3QpXSwNCiAgICAgICAgICAgICAgICAgICAgICAgICB0eXBlPSdyYXcnKQ0KDQoNCnNjb3JlIDwtIG5iX3ByZWRpY3Rpb25bLCBjKCJZZXMiKV0gDQoNCmFjdHVhbF9jbGFzcyA8LSB0cmFpbiRkZWZhdWx0LnBheW1lbnQubmV4dC5tb250aCA9PSAnWWVzJyANCg0KcHJlZCA8LSBwcmVkaWN0aW9uKHNjb3JlLCBhY3R1YWxfY2xhc3MpDQoNCnBlcmYgPC0gcGVyZm9ybWFuY2UocHJlZCwgInRwciIsICJmcHIiKSANCmBgYA0KDQpgYGB7cn0NCnBsb3QocGVyZiwgbHdkPTIsIHhsYWI9IkZhbHNlIFBvc2l0aXZlIFJhdGUgKEZQUikiLA0KICAgICB5bGFiPSJUcnVlIFBvc2l0aXZlIFJhdGUgKFRQUikiKQ0KYWJsaW5lKGE9MCwgYj0xLCBjb2w9ImdyYXk1MCIsIGx0eT0zKQ0KYGBgDQpgYGB7cn0NCmF1YyA8LSBwZXJmb3JtYW5jZShwcmVkLCAiYXVjIikNCmF1YyA8LSB1bmxpc3Qoc2xvdChhdWMsICJ5LnZhbHVlcyIpKQ0KYXVjDQpgYGANCjxicj4NCjxoND48Yj5EaXNjdXNzaW9uPC9iPjwvaDQ+DQpXaGVuIEFVQyBpcyBhcHByb3hpbWF0ZWx5IDAuNSwgbW9kZWwgaGFzIG5vIGRpc2NyaW1pbmF0aW9uIGNhcGFjaXR5IHRvIGRpc3Rpbmd1aXNoIGJldHdlZW4gcG9zaXRpdmUgY2xhc3MgYW5kIG5lZ2F0aXZlIGNsYXNzLg0KDQoNClNvIGluIG15IHRyYWluaW5nIHNldCB0aGUgQVVDIGlzIC40NSwgd2hpY2ggaW5kaWNhdGVzIGEgbWlzdGFrZSBpbiB0aGUgd2F5IEkgcGlja2VkIG15IGNsYXNzaWZpZXIgdGFyZ2V0cyBvciBJIG1pZ2h0IGhhdmUgYSBiYWQgdHJhaW5pbmcgc2V0LiBBbmQgYmVpbmcgbXkgQVVDIGlzIGNsb3NlIHRvIC41LCB0aGlzIHN1Z2dlc3RzIG5vIGRpc2NyaW1pbmF0aW9uLCBzbyB3ZSBtaWdodCBhcyB3ZWxsIGZsaXAgYSBjb2luLg0K