library(kernlab)
data(spam)
head(spam)
##   make address  all num3d  our over remove internet order mail receive
## 1 0.00    0.64 0.64     0 0.32 0.00   0.00     0.00  0.00 0.00    0.00
## 2 0.21    0.28 0.50     0 0.14 0.28   0.21     0.07  0.00 0.94    0.21
## 3 0.06    0.00 0.71     0 1.23 0.19   0.19     0.12  0.64 0.25    0.38
## 4 0.00    0.00 0.00     0 0.63 0.00   0.31     0.63  0.31 0.63    0.31
## 5 0.00    0.00 0.00     0 0.63 0.00   0.31     0.63  0.31 0.63    0.31
## 6 0.00    0.00 0.00     0 1.85 0.00   0.00     1.85  0.00 0.00    0.00
##   will people report addresses free business email  you credit your font
## 1 0.64   0.00   0.00      0.00 0.32     0.00  1.29 1.93   0.00 0.96    0
## 2 0.79   0.65   0.21      0.14 0.14     0.07  0.28 3.47   0.00 1.59    0
## 3 0.45   0.12   0.00      1.75 0.06     0.06  1.03 1.36   0.32 0.51    0
## 4 0.31   0.31   0.00      0.00 0.31     0.00  0.00 3.18   0.00 0.31    0
## 5 0.31   0.31   0.00      0.00 0.31     0.00  0.00 3.18   0.00 0.31    0
## 6 0.00   0.00   0.00      0.00 0.00     0.00  0.00 0.00   0.00 0.00    0
##   num000 money hp hpl george num650 lab labs telnet num857 data num415
## 1   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 2   0.43  0.43  0   0      0      0   0    0      0      0    0      0
## 3   1.16  0.06  0   0      0      0   0    0      0      0    0      0
## 4   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 5   0.00  0.00  0   0      0      0   0    0      0      0    0      0
## 6   0.00  0.00  0   0      0      0   0    0      0      0    0      0
##   num85 technology num1999 parts pm direct cs meeting original project
## 1     0          0    0.00     0  0   0.00  0       0     0.00       0
## 2     0          0    0.07     0  0   0.00  0       0     0.00       0
## 3     0          0    0.00     0  0   0.06  0       0     0.12       0
## 4     0          0    0.00     0  0   0.00  0       0     0.00       0
## 5     0          0    0.00     0  0   0.00  0       0     0.00       0
## 6     0          0    0.00     0  0   0.00  0       0     0.00       0
##     re  edu table conference charSemicolon charRoundbracket
## 1 0.00 0.00     0          0          0.00            0.000
## 2 0.00 0.00     0          0          0.00            0.132
## 3 0.06 0.06     0          0          0.01            0.143
## 4 0.00 0.00     0          0          0.00            0.137
## 5 0.00 0.00     0          0          0.00            0.135
## 6 0.00 0.00     0          0          0.00            0.223
##   charSquarebracket charExclamation charDollar charHash capitalAve
## 1                 0           0.778      0.000    0.000      3.756
## 2                 0           0.372      0.180    0.048      5.114
## 3                 0           0.276      0.184    0.010      9.821
## 4                 0           0.137      0.000    0.000      3.537
## 5                 0           0.135      0.000    0.000      3.537
## 6                 0           0.000      0.000    0.000      3.000
##   capitalLong capitalTotal type
## 1          61          278 spam
## 2         101         1028 spam
## 3         485         2259 spam
## 4          40          191 spam
## 5          40          191 spam
## 6          15           54 spam
spamPredictor <- function(spam){
  vals = seq(0,11.1,by=0.1)
  optimal = 0
  pos = 0
  neg = 0
  ns_vals = double()
  ns_sensitivity = double()
  ns_specificity = double()
  s_vals = double()
  steps = double()
  for(i in vals){
    prediction <- ifelse(spam$your > i, "spam", "nonspam")
    x <- table(prediction, spam$type)/length(spam$type)
    ns <- x[1,1] / (x[1,1] + x[1,2])
    ns_sens <- x[1,1] / (x[1,1] + x[2,1])
    ns_spec <- x[2,2] / (x[1,2] + x[2,2])
    ns_vals <- c(ns_vals, ns)
    ns_sensitivity <-c(ns_sensitivity, ns_sens)
    ns_specificity <-c(ns_specificity, ns_spec)
    steps = c(steps, i)
    if(ns > pos){
      pos = ns
      optimal = i
    }
    s <- x[2,2] / (x[2,1] + x[2,2])
    s_vals <- c(s_vals, s)
  }
  #old.par <- par(mfrow=c(2, 1))
  #dev.new(width = 5, height = 4)
  plot(steps, ns_vals, xlab = "Number of 'your' occurrences",type='l', ylab = "Accuracy",
       col=2, ylim = c(0,1), main = "Positive and negative predictive power" )
  lines(steps, s_vals, col=3)
  legend(0.4,1,c("Positive predictive value", "Negative predictive value"), col=c(2,3), lty=c(1,1), cex =0.7)
  plot(1 - ns_specificity, ns_sensitivity, col="green", type='l', ylim = c(0,1),
       xlab = "1 - specificity", ylab = "Sensitivity", main = "ROC")
  points(steps, steps)
  #lines(steps, ns_specificity, col="orange")
  #par(old.par)
  z = list(optimal, steps, ns_vals, s_vals)
  return(z)
}
#x = spamPredictor(spam)