library(kernlab)
data(spam)
head(spam)
## make address all num3d our over remove internet order mail receive
## 1 0.00 0.64 0.64 0 0.32 0.00 0.00 0.00 0.00 0.00 0.00
## 2 0.21 0.28 0.50 0 0.14 0.28 0.21 0.07 0.00 0.94 0.21
## 3 0.06 0.00 0.71 0 1.23 0.19 0.19 0.12 0.64 0.25 0.38
## 4 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31
## 5 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31
## 6 0.00 0.00 0.00 0 1.85 0.00 0.00 1.85 0.00 0.00 0.00
## will people report addresses free business email you credit your font
## 1 0.64 0.00 0.00 0.00 0.32 0.00 1.29 1.93 0.00 0.96 0
## 2 0.79 0.65 0.21 0.14 0.14 0.07 0.28 3.47 0.00 1.59 0
## 3 0.45 0.12 0.00 1.75 0.06 0.06 1.03 1.36 0.32 0.51 0
## 4 0.31 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0
## 5 0.31 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0
## 6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0
## num000 money hp hpl george num650 lab labs telnet num857 data num415
## 1 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 2 0.43 0.43 0 0 0 0 0 0 0 0 0 0
## 3 1.16 0.06 0 0 0 0 0 0 0 0 0 0
## 4 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 5 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 6 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## num85 technology num1999 parts pm direct cs meeting original project
## 1 0 0 0.00 0 0 0.00 0 0 0.00 0
## 2 0 0 0.07 0 0 0.00 0 0 0.00 0
## 3 0 0 0.00 0 0 0.06 0 0 0.12 0
## 4 0 0 0.00 0 0 0.00 0 0 0.00 0
## 5 0 0 0.00 0 0 0.00 0 0 0.00 0
## 6 0 0 0.00 0 0 0.00 0 0 0.00 0
## re edu table conference charSemicolon charRoundbracket
## 1 0.00 0.00 0 0 0.00 0.000
## 2 0.00 0.00 0 0 0.00 0.132
## 3 0.06 0.06 0 0 0.01 0.143
## 4 0.00 0.00 0 0 0.00 0.137
## 5 0.00 0.00 0 0 0.00 0.135
## 6 0.00 0.00 0 0 0.00 0.223
## charSquarebracket charExclamation charDollar charHash capitalAve
## 1 0 0.778 0.000 0.000 3.756
## 2 0 0.372 0.180 0.048 5.114
## 3 0 0.276 0.184 0.010 9.821
## 4 0 0.137 0.000 0.000 3.537
## 5 0 0.135 0.000 0.000 3.537
## 6 0 0.000 0.000 0.000 3.000
## capitalLong capitalTotal type
## 1 61 278 spam
## 2 101 1028 spam
## 3 485 2259 spam
## 4 40 191 spam
## 5 40 191 spam
## 6 15 54 spam
spamPredictor <- function(spam){
vals = seq(0,11.1,by=0.1)
optimal = 0
pos = 0
neg = 0
ns_vals = double()
ns_sensitivity = double()
ns_specificity = double()
s_vals = double()
steps = double()
for(i in vals){
prediction <- ifelse(spam$your > i, "spam", "nonspam")
x <- table(prediction, spam$type)/length(spam$type)
ns <- x[1,1] / (x[1,1] + x[1,2])
ns_sens <- x[1,1] / (x[1,1] + x[2,1])
ns_spec <- x[2,2] / (x[1,2] + x[2,2])
ns_vals <- c(ns_vals, ns)
ns_sensitivity <-c(ns_sensitivity, ns_sens)
ns_specificity <-c(ns_specificity, ns_spec)
steps = c(steps, i)
if(ns > pos){
pos = ns
optimal = i
}
s <- x[2,2] / (x[2,1] + x[2,2])
s_vals <- c(s_vals, s)
}
#old.par <- par(mfrow=c(2, 1))
#dev.new(width = 5, height = 4)
plot(steps, ns_vals, xlab = "Number of 'your' occurrences",type='l', ylab = "Accuracy",
col=2, ylim = c(0,1), main = "Positive and negative predictive power" )
lines(steps, s_vals, col=3)
legend(0.4,1,c("Positive predictive value", "Negative predictive value"), col=c(2,3), lty=c(1,1), cex =0.7)
plot(1 - ns_specificity, ns_sensitivity, col="green", type='l', ylim = c(0,1),
xlab = "1 - specificity", ylab = "Sensitivity", main = "ROC")
points(steps, steps)
#lines(steps, ns_specificity, col="orange")
#par(old.par)
z = list(optimal, steps, ns_vals, s_vals)
return(z)
}
#x = spamPredictor(spam)

