library(kernlab)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:kernlab':
##
## alpha
data(spam)
nrow(spam)
## [1] 4601
head(spam)
## make address all num3d our over remove internet order mail receive
## 1 0.00 0.64 0.64 0 0.32 0.00 0.00 0.00 0.00 0.00 0.00
## 2 0.21 0.28 0.50 0 0.14 0.28 0.21 0.07 0.00 0.94 0.21
## 3 0.06 0.00 0.71 0 1.23 0.19 0.19 0.12 0.64 0.25 0.38
## 4 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31
## 5 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31
## 6 0.00 0.00 0.00 0 1.85 0.00 0.00 1.85 0.00 0.00 0.00
## will people report addresses free business email you credit your font
## 1 0.64 0.00 0.00 0.00 0.32 0.00 1.29 1.93 0.00 0.96 0
## 2 0.79 0.65 0.21 0.14 0.14 0.07 0.28 3.47 0.00 1.59 0
## 3 0.45 0.12 0.00 1.75 0.06 0.06 1.03 1.36 0.32 0.51 0
## 4 0.31 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0
## 5 0.31 0.31 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0
## 6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0
## num000 money hp hpl george num650 lab labs telnet num857 data num415
## 1 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 2 0.43 0.43 0 0 0 0 0 0 0 0 0 0
## 3 1.16 0.06 0 0 0 0 0 0 0 0 0 0
## 4 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 5 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## 6 0.00 0.00 0 0 0 0 0 0 0 0 0 0
## num85 technology num1999 parts pm direct cs meeting original project
## 1 0 0 0.00 0 0 0.00 0 0 0.00 0
## 2 0 0 0.07 0 0 0.00 0 0 0.00 0
## 3 0 0 0.00 0 0 0.06 0 0 0.12 0
## 4 0 0 0.00 0 0 0.00 0 0 0.00 0
## 5 0 0 0.00 0 0 0.00 0 0 0.00 0
## 6 0 0 0.00 0 0 0.00 0 0 0.00 0
## re edu table conference charSemicolon charRoundbracket
## 1 0.00 0.00 0 0 0.00 0.000
## 2 0.00 0.00 0 0 0.00 0.132
## 3 0.06 0.06 0 0 0.01 0.143
## 4 0.00 0.00 0 0 0.00 0.137
## 5 0.00 0.00 0 0 0.00 0.135
## 6 0.00 0.00 0 0 0.00 0.223
## charSquarebracket charExclamation charDollar charHash capitalAve
## 1 0 0.778 0.000 0.000 3.756
## 2 0 0.372 0.180 0.048 5.114
## 3 0 0.276 0.184 0.010 9.821
## 4 0 0.137 0.000 0.000 3.537
## 5 0 0.135 0.000 0.000 3.537
## 6 0 0.000 0.000 0.000 3.000
## capitalLong capitalTotal type
## 1 61 278 spam
## 2 101 1028 spam
## 3 485 2259 spam
## 4 40 191 spam
## 5 40 191 spam
## 6 15 54 spam
Split the data into training and test sets
set.seed(998)
inTraining <- createDataPartition(spam$type, p = .75, list = FALSE) # this is a stratified sampling technique! Cool!
spam_train <- spam[ inTraining,]
spam_test <- spam[-inTraining,]
Hint:: Model: type~.
data("promotergene")
?promotergene
## starting httpd help server ... done
head(promotergene)
## Class V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
## 1 + g c c t t c t c c a a a a c g t g t
## 2 + a t g c a a t t t t t t a g t t g c
## 3 + c c g t t t a t t t t t t c t a c c
## 4 + t c t c a a c g t a a c a c t t t a
## 5 + t a g g c a c c c c a g g c t t t a
## 6 + a t a t a a a a a a g t t c t t g c
## V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37
## 1 t t t t t g t t g t t a a t t c g g
## 2 a t g a a c t c g c a t g t c t c c
## 3 c a t a t c c t t g a a g c g g t g
## 4 c a g c g g c g c g t c a t t t g a
## 5 c a c t t t a t g c t t c c g g c t
## 6 t t t c t a a c g t g a a a g t g g
## V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55
## 1 t g t a g a c t t g t a a a c c t a
## 2 a t a g a a t g c g c g c t a c t t
## 3 t t a t a a t g c c g c g c c c t c
## 4 t a t g a t g c g c c c c g c t t c
## 5 c g t a t g t t g t g t g g a a t t
## 6 t t t a g g t t a a a a g a c a t c
## V56 V57 V58
## 1 a a t
## 2 g a t
## 3 g a t
## 4 c c g
## 5 g t g
## 6 a g t
nrow(promotergene)
## [1] 106
table(promotergene$Class)
##
## + -
## 53 53
Split the data into training and test sets
set.seed(123)
inTraining <- createDataPartition(promotergene$Class, p = .80, list = FALSE) # this is a stratified sampling technique! Cool!
promotergeneTrain <- promotergene[ inTraining,]
promotergeneTest <- promotergene[-inTraining,]
table(promotergeneTest$Class)
##
## + -
## 10 10
Hint:: Model: Class~.