Question 2.
b.Change the .5 cutoff for the sick class to .4, then to .3, .2,.1,
and finally, .001 each time recording the value for auc.
cutoffs <- c(0.4, 0.3, 0.2, 0.1, 0.001)
auc_out <- numeric(length(cutoffs))
for (i in seq_along(cutoffs)) {
# get probabilities
p.card <- predict(card.glm, card.test, type="response")
# APPLY THE CUTOFF
p.card <- ifelse(p.card > cutoffs[i], 2, 1)
# create ROCR prediction object
pr.card <- prediction(p.card, card.test$class)
# compute AUC and store it
auc_out[i] <- performance(pr.card, measure = "auc")@y.values[[1]]
}
# Put results in a table
results <- data.frame(cutoff = cutoffs, auc = auc_out)
results
NA
c.Change the .5 cutoff for the sick class to .4, then to
.6,.7,.8,.9, and finally, .999 each time recording the value for
auc.
cutoffsc <- c(0.6, 0.7, 0.8, 0.9, 0.999)
auc_out <- numeric(length(cutoffsc))
for (i in seq_along(cutoffsc)) {
# get probabilities
p.card <- predict(card.glm, card.test, type="response")
# APPLY THE CUTOFF
p.card <- ifelse(p.card > cutoffsc[i], 2, 1)
# create ROCR prediction object
pr.card <- prediction(p.card, card.test$class)
# compute AUC and store it
auc_out[i] <- performance(pr.card, measure = "auc")@y.values[[1]]
}
# Put results in a table
results <- data.frame(cutoffc = cutoffsc, auc = auc_out)
results
Question 4
x <- removeNAS(creditScreening)
# Randomize and split the data for 2/3 training, 1/3 testing
set.seed(100)
credit.data <- creditScreening
index <- sample(1:nrow(credit.data), 2/3*nrow(credit.data))
credit.train <- credit.data[index,]
credit.test <- credit.data[-index,]
Next, build a second model by replacing attribute nine with
attribute twelve.
twelve.credit.Bayes<-naiveBayes(class ~ twelve,
laplace = 1,
data= credit.train,type = "class")
# CREATE CONFUSION MATRIX
twelve.credit.pred <-predict(twelve.credit.Bayes, credit.test)
twelve.credit.perf<- table(credit.test$class, twelve.credit.pred, dnn=c("actual", "Predicted"))
twelve.credit.perf
Predicted
actual - +
- 130 0
+ 100 0
confusionP(twelve.credit.perf)
Correct= 130
Incorrect= 100
Accuracy = 56.52 %
Question 5
b.Repeat Part A but assume Gender is unknown
print("P|Life=Yes|E")
[1] "P|Life=Yes|E"
(5.5/6)*(4.5/6)*(.5/6)*(3.5/6)*(1.5/3)
[1] 0.01671007
print("P|Life=No|E")
[1] "P|Life=No|E"
(2.5/6)*(.5/6)*(5.5/6)*(1.5/6)*(1.5/3)
[1] 0.003978588
Question 6
Consider the confusion matrix below where Yes represents the
positive class.
a.Compute the overall classification accuracy.
(30+70)/(30+10+10+70)
[1] 0.8333333
b. Compute the True Positive Rate
30/(30+10)
[1] 0.75
c. Compute the False Positive Rate
10/(10+70)
[1] 0.125
