suppressMessages(library("corrplot"))
suppressMessages(library("ggplot2"))
suppressMessages(library("e1071"))
suppressMessages(library("printr"))
suppressMessages(library('caret'))
suppressMessages(library('randomForest'))
suppressMessages(library('ROCR'))
suppressMessages(library('reshape2'))

#Preprocessing
##First step was to import the data in R and run descriptive statistics to identify any descripancies.
#Header names were inserted into dataset and saved in csv file.
df<-read.csv("phishing.csv",sep=",",header=T)
head(df)
IP_Address URL_Length Shortining_Service having_At_Symbol double_slash_redirecting Prefix_Suffix having_Sub_Domain SSLfinal_State Domain_registeration_length Favicon port HTTPS_token Request_URL URL_of_Anchor Links_in_tags SFH Submitting_to_email Abnormal_URL Redirect on_mouseover RightClick popUpWidnow Iframe age_of_domain DNSRecord web_traffic Page_Rank Google_Index Links_pointing_to_page Statistical_report Result
-1 1 1 1 -1 -1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 -1 0 1 1 1 1 -1 -1 -1 -1 1 1 -1 -1
1 1 1 1 1 -1 0 1 -1 1 1 -1 1 0 -1 -1 1 1 0 1 1 1 1 -1 -1 0 -1 1 1 1 -1
1 0 1 1 1 -1 -1 -1 -1 1 1 -1 1 0 -1 -1 -1 -1 0 1 1 1 1 1 -1 1 -1 1 0 -1 -1
1 0 1 1 1 -1 -1 -1 1 1 1 -1 -1 0 0 -1 1 1 0 1 1 1 1 -1 -1 1 -1 1 -1 1 -1
1 0 -1 1 1 -1 1 1 -1 1 1 1 1 0 0 -1 1 1 0 -1 1 -1 1 -1 -1 0 -1 1 1 1 1
-1 0 -1 1 -1 -1 1 1 -1 1 1 -1 1 0 0 -1 -1 -1 0 1 1 1 1 1 1 1 -1 1 -1 -1 1
#Descriptive statistics
str(df)
## 'data.frame':    11055 obs. of  31 variables:
##  $ IP_Address                 : int  -1 1 1 1 1 -1 1 1 1 1 ...
##  $ URL_Length                 : int  1 1 0 0 0 0 0 0 0 1 ...
##  $ Shortining_Service         : int  1 1 1 1 -1 -1 -1 1 -1 -1 ...
##  $ having_At_Symbol           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ double_slash_redirecting   : int  -1 1 1 1 1 -1 1 1 1 1 ...
##  $ Prefix_Suffix              : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ having_Sub_Domain          : int  -1 0 -1 -1 1 1 -1 -1 1 -1 ...
##  $ SSLfinal_State             : int  -1 1 -1 -1 1 1 -1 -1 1 1 ...
##  $ Domain_registeration_length: int  -1 -1 -1 1 -1 -1 1 1 -1 -1 ...
##  $ Favicon                    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ port                       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ HTTPS_token                : int  -1 -1 -1 -1 1 -1 1 -1 -1 1 ...
##  $ Request_URL                : int  1 1 1 -1 1 1 -1 -1 1 1 ...
##  $ URL_of_Anchor              : int  -1 0 0 0 0 0 -1 0 0 0 ...
##  $ Links_in_tags              : int  1 -1 -1 0 0 0 0 -1 1 1 ...
##  $ SFH                        : int  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ Submitting_to_email        : int  -1 1 -1 1 1 -1 -1 1 1 1 ...
##  $ Abnormal_URL               : int  -1 1 -1 1 1 -1 -1 1 1 1 ...
##  $ Redirect                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ on_mouseover               : int  1 1 1 1 -1 1 1 1 1 1 ...
##  $ RightClick                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ popUpWidnow                : int  1 1 1 1 -1 1 1 1 1 1 ...
##  $ Iframe                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ age_of_domain              : int  -1 -1 1 -1 -1 1 1 -1 1 1 ...
##  $ DNSRecord                  : int  -1 -1 -1 -1 -1 1 -1 -1 -1 -1 ...
##  $ web_traffic                : int  -1 0 1 1 0 1 -1 0 1 0 ...
##  $ Page_Rank                  : int  -1 -1 -1 -1 -1 -1 -1 -1 1 -1 ...
##  $ Google_Index               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Links_pointing_to_page     : int  1 1 0 -1 1 -1 0 0 0 0 ...
##  $ Statistical_report         : int  -1 1 -1 1 1 -1 -1 1 1 1 ...
##  $ Result                     : int  -1 -1 -1 -1 1 1 -1 -1 1 -1 ...
summary(df)
IP_Address URL_Length Shortining_Service having_At_Symbol double_slash_redirecting Prefix_Suffix having_Sub_Domain SSLfinal_State Domain_registeration_length Favicon port HTTPS_token Request_URL URL_of_Anchor Links_in_tags SFH Submitting_to_email Abnormal_URL Redirect on_mouseover RightClick popUpWidnow Iframe age_of_domain DNSRecord web_traffic Page_Rank Google_Index Links_pointing_to_page Statistical_report Result
Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.000 Min. :-1.00000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.00000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :0.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.00000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.0000 Min. :-1.000 Min. :-1.0000 Min. :-1.0000
1st Qu.:-1.0000 1st Qu.:-1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.:-1.000 1st Qu.:-1.00000 1st Qu.:-1.0000 1st Qu.:-1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.:-1.0000 1st Qu.:-1.00000 1st Qu.:-1.0000 1st Qu.:-1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.:0.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.: 1.0000 1st Qu.:-1.00000 1st Qu.:-1.0000 1st Qu.: 0.0000 1st Qu.:-1.0000 1st Qu.: 1.0000 1st Qu.: 0.000 1st Qu.: 1.0000 1st Qu.:-1.0000
Median : 1.0000 Median :-1.0000 Median : 1.0000 Median : 1.0000 Median : 1.0000 Median :-1.000 Median : 0.00000 Median : 1.0000 Median :-1.0000 Median : 1.0000 Median : 1.0000 Median : 1.0000 Median : 1.0000 Median : 0.00000 Median : 0.0000 Median :-1.0000 Median : 1.0000 Median : 1.0000 Median :0.0000 Median : 1.0000 Median : 1.0000 Median : 1.0000 Median : 1.0000 Median : 1.00000 Median : 1.0000 Median : 1.0000 Median :-1.0000 Median : 1.0000 Median : 0.000 Median : 1.0000 Median : 1.0000
Mean : 0.3138 Mean :-0.6332 Mean : 0.7388 Mean : 0.7006 Mean : 0.7415 Mean :-0.735 Mean : 0.06395 Mean : 0.2509 Mean :-0.3368 Mean : 0.6286 Mean : 0.7283 Mean : 0.6751 Mean : 0.1868 Mean :-0.07653 Mean :-0.1181 Mean :-0.5957 Mean : 0.6356 Mean : 0.7053 Mean :0.1157 Mean : 0.7621 Mean : 0.9139 Mean : 0.6134 Mean : 0.8169 Mean : 0.06124 Mean : 0.3771 Mean : 0.2873 Mean :-0.4837 Mean : 0.7216 Mean : 0.344 Mean : 0.7196 Mean : 0.1139
3rd Qu.: 1.0000 3rd Qu.:-1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.:-1.000 3rd Qu.: 1.00000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.:-1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.:0.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.00000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 1.000 3rd Qu.: 1.0000 3rd Qu.: 1.0000
Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.000 Max. : 1.00000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.00000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. :1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.00000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.0000 Max. : 1.000 Max. : 1.0000 Max. : 1.0000
#There are 11055 observations for 30 attributes, last attribute is the predictor variable.
# above command shows if the data is set the way explained in the link, if not more processing has to be done, for eg. if the column names are not correct according to the data field, it needs to be reorganized and reassigned using R or excel and saved as csv. 

a<-lapply(df[, c("IP_Address","URL_Length","Shortining_Service",
              "having_At_Symbol","double_slash_redirecting",
              "Prefix_Suffix","having_Sub_Domain","SSLfinal_State",
              "Domain_registeration_length","Favicon","port",
              "HTTPS_token","Request_URL","URL_of_Anchor",
              "Links_in_tags","SFH","Submitting_to_email",
              "Abnormal_URL","Redirect","on_mouseover",
              "RightClick","popUpWidnow","Iframe","age_of_domain",
              "DNSRecord","web_traffic","Page_Rank","Google_Index",
              "Links_pointing_to_page","Statistical_report", "Result")], table)

#Visualization
#Checking the score distribution
df1<-df[1:10]
boxplot(df1)

df2<-df[11:20]
boxplot(df2)

df3<-df[21:30]
boxplot(df3)

#correlation plot
cmatrix<-cor(df)
round(cmatrix,2)
IP_Address URL_Length Shortining_Service having_At_Symbol double_slash_redirecting Prefix_Suffix having_Sub_Domain SSLfinal_State Domain_registeration_length Favicon port HTTPS_token Request_URL URL_of_Anchor Links_in_tags SFH Submitting_to_email Abnormal_URL Redirect on_mouseover RightClick popUpWidnow Iframe age_of_domain DNSRecord web_traffic Page_Rank Google_Index Links_pointing_to_page Statistical_report Result
IP_Address 1.00 -0.05 0.40 0.16 0.40 -0.01 -0.08 0.07 -0.02 0.09 0.06 0.36 0.03 0.10 0.01 -0.01 0.08 0.34 -0.32 0.08 0.04 0.10 0.05 -0.01 -0.05 0.00 -0.09 0.03 -0.34 -0.02 0.09
URL_Length -0.05 1.00 -0.10 -0.08 -0.08 0.06 0.00 0.05 -0.22 -0.04 0.00 -0.09 0.25 -0.02 0.05 0.41 -0.01 -0.11 0.05 -0.05 -0.01 -0.05 -0.01 0.18 -0.04 0.01 0.18 0.00 -0.02 -0.07 0.06
Shortining_Service 0.40 -0.10 1.00 0.10 0.84 -0.08 -0.04 -0.06 0.06 0.01 0.00 0.76 -0.04 0.00 -0.13 -0.02 0.05 0.74 -0.53 0.06 0.04 0.04 0.02 -0.05 0.44 -0.05 0.01 0.16 -0.20 0.09 -0.07
having_At_Symbol 0.16 -0.08 0.10 1.00 0.09 -0.01 -0.06 0.03 0.02 0.30 0.36 0.10 0.03 0.06 -0.07 -0.01 0.37 0.20 -0.03 0.28 0.22 0.29 0.28 -0.01 -0.05 0.03 -0.06 0.04 -0.01 -0.08 0.05
double_slash_redirecting 0.40 -0.08 0.84 0.09 1.00 -0.09 -0.04 -0.04 0.05 0.04 0.03 0.76 -0.03 -0.01 -0.13 -0.04 0.03 0.72 -0.59 0.09 0.03 0.05 0.01 -0.05 0.43 -0.06 0.00 0.18 -0.19 0.07 -0.04
Prefix_Suffix -0.01 0.06 -0.08 -0.01 -0.09 1.00 0.09 0.26 -0.10 -0.01 -0.02 -0.07 0.10 0.35 0.10 0.00 -0.05 -0.08 0.02 0.01 -0.02 -0.01 -0.04 0.07 -0.02 0.11 -0.01 0.07 0.07 0.00 0.35
having_Sub_Domain -0.08 0.00 -0.04 -0.06 -0.04 0.09 1.00 0.27 -0.08 -0.02 0.00 -0.04 0.10 0.23 0.09 0.10 0.01 -0.03 0.03 -0.02 0.02 -0.03 0.01 0.12 0.13 -0.01 0.12 0.06 -0.01 0.08 0.30
SSLfinal_State 0.07 0.05 -0.06 0.03 -0.04 0.26 0.27 1.00 -0.19 -0.01 0.03 -0.03 0.19 0.54 0.18 0.17 0.01 -0.05 -0.02 0.02 0.02 -0.01 0.00 0.16 0.05 0.26 0.07 0.10 -0.01 0.06 0.71
Domain_registeration_length -0.02 -0.22 0.06 0.02 0.05 -0.10 -0.08 -0.19 1.00 0.05 0.02 0.06 -0.61 -0.16 -0.10 -0.14 0.04 0.06 -0.02 0.02 0.02 0.05 0.00 -0.06 -0.01 -0.13 -0.06 -0.04 0.12 0.00 -0.23
Favicon 0.09 -0.04 0.01 0.30 0.04 -0.01 -0.02 -0.01 0.05 1.00 0.80 0.05 0.00 0.04 -0.10 -0.01 0.67 0.07 -0.02 0.71 0.41 0.94 0.63 0.00 0.09 -0.05 0.01 -0.02 -0.13 0.30 0.00
port 0.06 0.00 0.00 0.36 0.03 -0.02 0.00 0.03 0.02 0.80 1.00 0.00 0.03 0.04 -0.07 0.01 0.80 0.05 -0.02 0.62 0.48 0.75 0.69 0.01 0.05 -0.03 0.02 -0.01 -0.14 0.34 0.04
HTTPS_token 0.36 -0.09 0.76 0.10 0.76 -0.07 -0.04 -0.03 0.06 0.05 0.00 1.00 -0.01 0.01 -0.10 -0.01 0.08 0.72 -0.46 0.11 0.01 0.07 0.02 -0.05 0.40 -0.04 0.02 0.12 -0.13 0.10 -0.04
Request_URL 0.03 0.25 -0.04 0.03 -0.03 0.10 0.10 0.19 -0.61 0.00 0.03 -0.01 1.00 0.18 0.07 0.13 0.02 -0.04 0.00 0.01 -0.02 0.00 0.02 0.09 0.02 0.16 0.06 0.05 -0.07 0.04 0.25
URL_of_Anchor 0.10 -0.02 0.00 0.06 -0.01 0.35 0.23 0.54 -0.16 0.04 0.04 0.01 0.18 1.00 0.14 0.11 0.03 -0.01 0.00 0.07 0.02 0.04 0.01 0.08 0.09 0.33 0.10 0.04 0.02 0.08 0.69
Links_in_tags 0.01 0.05 -0.13 -0.07 -0.13 0.10 0.09 0.18 -0.10 -0.10 -0.07 -0.10 0.07 0.14 1.00 0.07 -0.04 -0.12 0.04 -0.08 -0.04 -0.11 -0.07 0.08 -0.04 0.06 -0.01 0.05 0.01 -0.09 0.25
SFH -0.01 0.41 -0.02 -0.01 -0.04 0.00 0.10 0.17 -0.14 -0.01 0.01 -0.01 0.13 0.11 0.07 1.00 0.01 -0.03 0.05 0.01 0.01 0.00 0.01 -0.02 0.03 0.05 0.00 0.03 -0.01 -0.01 0.22
Submitting_to_email 0.08 -0.01 0.05 0.37 0.03 -0.05 0.01 0.01 0.04 0.67 0.80 0.08 0.02 0.03 -0.04 0.01 1.00 0.20 -0.01 0.53 0.40 0.63 0.58 0.01 0.06 -0.02 0.03 -0.01 -0.04 0.35 0.02
Abnormal_URL 0.34 -0.11 0.74 0.20 0.72 -0.08 -0.03 -0.05 0.06 0.07 0.05 0.72 -0.04 -0.01 -0.12 -0.03 0.20 1.00 -0.46 0.12 0.02 0.09 0.02 -0.03 0.37 -0.05 0.01 0.12 -0.16 0.19 -0.06
Redirect -0.32 0.05 -0.53 -0.03 -0.59 0.02 0.03 -0.02 -0.02 -0.02 -0.02 -0.46 0.00 0.00 0.04 0.05 -0.01 -0.46 1.00 -0.03 -0.02 -0.03 -0.01 -0.02 -0.21 0.00 0.05 0.06 0.16 -0.06 -0.02
on_mouseover 0.08 -0.05 0.06 0.28 0.09 0.01 -0.02 0.02 0.02 0.71 0.62 0.11 0.01 0.07 -0.08 0.01 0.53 0.12 -0.03 1.00 0.47 0.73 0.66 0.01 0.09 -0.04 0.02 -0.01 -0.04 0.28 0.04
RightClick 0.04 -0.01 0.04 0.22 0.03 -0.02 0.02 0.02 0.02 0.41 0.48 0.01 -0.02 0.02 -0.04 0.01 0.40 0.02 -0.02 0.47 1.00 0.42 0.66 0.01 0.04 -0.01 0.03 -0.01 -0.12 0.20 0.01
popUpWidnow 0.10 -0.05 0.04 0.29 0.05 -0.01 -0.03 -0.01 0.05 0.94 0.75 0.07 0.00 0.04 -0.11 0.00 0.63 0.09 -0.03 0.73 0.42 1.00 0.63 0.00 0.10 -0.04 0.02 -0.01 -0.12 0.29 0.00
Iframe 0.05 -0.01 0.02 0.28 0.01 -0.04 0.01 0.00 0.00 0.63 0.69 0.02 0.02 0.01 -0.07 0.01 0.58 0.02 -0.01 0.66 0.66 0.63 1.00 0.02 0.05 -0.02 0.02 0.00 -0.14 0.27 0.00
age_of_domain -0.01 0.18 -0.05 -0.01 -0.05 0.07 0.12 0.16 -0.06 0.00 0.01 -0.05 0.09 0.08 0.08 -0.02 0.01 -0.03 -0.02 0.01 0.01 0.00 0.02 1.00 -0.03 0.09 -0.15 -0.03 0.04 0.01 0.12
DNSRecord -0.05 -0.04 0.44 -0.05 0.43 -0.02 0.13 0.05 -0.01 0.09 0.05 0.40 0.02 0.09 -0.04 0.03 0.06 0.37 -0.21 0.09 0.04 0.10 0.05 -0.03 1.00 0.05 0.14 0.14 -0.32 0.14 0.08
web_traffic 0.00 0.01 -0.05 0.03 -0.06 0.11 -0.01 0.26 -0.13 -0.05 -0.03 -0.04 0.16 0.33 0.06 0.05 -0.02 -0.05 0.00 -0.04 -0.01 -0.04 -0.02 0.09 0.05 1.00 0.03 -0.01 -0.02 0.01 0.35
Page_Rank -0.09 0.18 0.01 -0.06 0.00 -0.01 0.12 0.07 -0.06 0.01 0.02 0.02 0.06 0.10 -0.01 0.00 0.03 0.01 0.05 0.02 0.03 0.02 0.02 -0.15 0.14 0.03 1.00 0.03 -0.03 0.03 0.10
Google_Index 0.03 0.00 0.16 0.04 0.18 0.07 0.06 0.10 -0.04 -0.02 -0.01 0.12 0.05 0.04 0.05 0.03 -0.01 0.12 0.06 -0.01 -0.01 -0.01 0.00 -0.03 0.14 -0.01 0.03 1.00 -0.04 -0.01 0.13
Links_pointing_to_page -0.34 -0.02 -0.20 -0.01 -0.19 0.07 -0.01 -0.01 0.12 -0.13 -0.14 -0.13 -0.07 0.02 0.01 -0.01 -0.04 -0.16 0.16 -0.04 -0.12 -0.12 -0.14 0.04 -0.32 -0.02 -0.03 -0.04 1.00 -0.02 0.03
Statistical_report -0.02 -0.07 0.09 -0.08 0.07 0.00 0.08 0.06 0.00 0.30 0.34 0.10 0.04 0.08 -0.09 -0.01 0.35 0.19 -0.06 0.28 0.20 0.29 0.27 0.01 0.14 0.01 0.03 -0.01 -0.02 1.00 0.08
Result 0.09 0.06 -0.07 0.05 -0.04 0.35 0.30 0.71 -0.23 0.00 0.04 -0.04 0.25 0.69 0.25 0.22 0.02 -0.06 -0.02 0.04 0.01 0.00 0.00 0.12 0.08 0.35 0.10 0.13 0.03 0.08 1.00
colnames(df)<-c(1:31)
corrplot(cor(df), method="shade",shade.col=NA, tl.col="black", tl.srt=45)

colnames(df)<-c(1:31)
a<-melt(df)
## No id variables; using all as measure variables
ggplot(a, aes(x = variable,fill=value))+geom_bar(position="identity",fill="#FF9999", colour="black")+facet_grid(value~.)+ggtitle("Phishing Database Data Distribution")

df<-read.csv("phishing.csv",sep=",",header=T,colClasses = "factor")

#Data Mining algorithm-1 - Naive Bayes
smp_size <- floor(0.80 * nrow(df))

## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(df)), size = smp_size)

train <- df[train_ind, ]
test <- df[-train_ind, ]

nb_model<-naiveBayes(Result~.,data = train)
nb_model 
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
## 
## A-priori probabilities:
## Y
##        -1         1 
## 0.4448213 0.5551787 
## 
## Conditional probabilities:
##     IP_Address
## Y           -1         1
##   -1 0.3957804 0.6042196
##   1  0.3034623 0.6965377
## 
##     URL_Length
## Y            -1          0          1
##   -1 0.83706151 0.01626843 0.14667006
##   1  0.79042770 0.00814664 0.20142566
## 
##     Shortining_Service
## Y           -1         1
##   -1 0.1026945 0.8973055
##   1  0.1537678 0.8462322
## 
##     having_At_Symbol
## Y           -1         1
##   -1 0.1725979 0.8274021
##   1  0.1309572 0.8690428
## 
##     double_slash_redirecting
## Y           -1         1
##   -1 0.1123538 0.8876462
##   1  0.1425662 0.8574338
## 
##     Prefix_Suffix
## Y           -1         1
##   -1 1.0000000 0.0000000
##   1  0.7647658 0.2352342
## 
##     having_Sub_Domain
## Y           -1         0         1
##   -1 0.3797661 0.4600915 0.1601423
##   1  0.2415479 0.2224033 0.5360489
## 
##     SSLfinal_State
## Y            -1          0          1
##   -1 0.62048805 0.23690900 0.14260295
##   1  0.08167006 0.00305499 0.91527495
## 
##     Domain_registeration_length
## Y           -1         1
##   -1 0.5495679 0.4504321
##   1  0.7663951 0.2336049
## 
##     Favicon
## Y           -1         1
##   -1 0.1911540 0.8088460
##   1  0.1855397 0.8144603
## 
##     port
## Y           -1         1
##   -1 0.1548043 0.8451957
##   1  0.1242363 0.8757637
## 
##     HTTPS_token
## Y           -1         1
##   -1 0.1464159 0.8535841
##   1  0.1749491 0.8250509
## 
##     Request_URL
## Y           -1         1
##   -1 0.5485511 0.4514489
##   1  0.2930754 0.7069246
## 
##     URL_of_Anchor
## Y             -1           0           1
##   -1 0.663446873 0.305287239 0.031265887
##   1  0.006720978 0.622810591 0.370468432
## 
##     Links_in_tags
## Y           -1         0         1
##   -1 0.4850025 0.3581596 0.1568378
##   1  0.2537678 0.4429735 0.3032587
## 
##     SFH
## Y            -1          0          1
##   -1 0.86731063 0.05541434 0.07727504
##   1  0.68268839 0.08228106 0.23503055
## 
##     Submitting_to_email
## Y           -1         1
##   -1 0.1970005 0.8029995
##   1  0.1741344 0.8258656
## 
##     Abnormal_URL
## Y           -1         1
##   -1 0.1245552 0.8754448
##   1  0.1676171 0.8323829
## 
##     Redirect
## Y            0         1
##   -1 0.8746823 0.1253177
##   1  0.8890020 0.1109980
## 
##     on_mouseover
## Y           -1         1
##   -1 0.1365023 0.8634977
##   1  0.1067210 0.8932790
## 
##     RightClick
## Y            -1          1
##   -1 0.04778851 0.95221149
##   1  0.04114053 0.95885947
## 
##     popUpWidnow
## Y           -1         1
##   -1 0.1967463 0.8032537
##   1  0.1928717 0.8071283
## 
##     Iframe
## Y            -1          1
##   -1 0.09125572 0.90874428
##   1  0.09246436 0.90753564
## 
##     age_of_domain
## Y           -1         1
##   -1 0.5391459 0.4608541
##   1  0.4177189 0.5822811
## 
##     DNSRecord
## Y           -1         1
##   -1 0.3510422 0.6489578
##   1  0.2818737 0.7181263
## 
##     web_traffic
## Y           -1         0         1
##   -1 0.3446873 0.3482461 0.3070666
##   1  0.1592668 0.1403259 0.7004073
## 
##     Page_Rank
## Y           -1         1
##   -1 0.7890188 0.2109812
##   1  0.7036660 0.2963340
## 
##     Google_Index
## Y           -1         1
##   -1 0.1924250 0.8075750
##   1  0.1032587 0.8967413
## 
##     Links_pointing_to_page
## Y            -1          0          1
##   -1 0.03914591 0.59379766 0.36705643
##   1  0.05682281 0.52403259 0.41914460
## 
##     Statistical_report
## Y           -1         1
##   -1 0.1736146 0.8263854
##   1  0.1175153 0.8824847
str(nb_model)
## List of 4
##  $ apriori: 'table' int [1:2(1d)] 3934 4910
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ Y: chr [1:2] "-1" "1"
##  $ tables :List of 30
##   ..$ IP_Address                 : table [1:2, 1:2] 0.396 0.303 0.604 0.697
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y         : chr [1:2] "-1" "1"
##   .. .. ..$ IP_Address: chr [1:2] "-1" "1"
##   ..$ URL_Length                 : table [1:2, 1:3] 0.83706 0.79043 0.01627 0.00815 0.14667 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y         : chr [1:2] "-1" "1"
##   .. .. ..$ URL_Length: chr [1:3] "-1" "0" "1"
##   ..$ Shortining_Service         : table [1:2, 1:2] 0.103 0.154 0.897 0.846
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                 : chr [1:2] "-1" "1"
##   .. .. ..$ Shortining_Service: chr [1:2] "-1" "1"
##   ..$ having_At_Symbol           : table [1:2, 1:2] 0.173 0.131 0.827 0.869
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y               : chr [1:2] "-1" "1"
##   .. .. ..$ having_At_Symbol: chr [1:2] "-1" "1"
##   ..$ double_slash_redirecting   : table [1:2, 1:2] 0.112 0.143 0.888 0.857
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                       : chr [1:2] "-1" "1"
##   .. .. ..$ double_slash_redirecting: chr [1:2] "-1" "1"
##   ..$ Prefix_Suffix              : table [1:2, 1:2] 1 0.765 0 0.235
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y            : chr [1:2] "-1" "1"
##   .. .. ..$ Prefix_Suffix: chr [1:2] "-1" "1"
##   ..$ having_Sub_Domain          : table [1:2, 1:3] 0.38 0.242 0.46 0.222 0.16 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                : chr [1:2] "-1" "1"
##   .. .. ..$ having_Sub_Domain: chr [1:3] "-1" "0" "1"
##   ..$ SSLfinal_State             : table [1:2, 1:3] 0.62049 0.08167 0.23691 0.00305 0.1426 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y             : chr [1:2] "-1" "1"
##   .. .. ..$ SSLfinal_State: chr [1:3] "-1" "0" "1"
##   ..$ Domain_registeration_length: table [1:2, 1:2] 0.55 0.766 0.45 0.234
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                          : chr [1:2] "-1" "1"
##   .. .. ..$ Domain_registeration_length: chr [1:2] "-1" "1"
##   ..$ Favicon                    : table [1:2, 1:2] 0.191 0.186 0.809 0.814
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y      : chr [1:2] "-1" "1"
##   .. .. ..$ Favicon: chr [1:2] "-1" "1"
##   ..$ port                       : table [1:2, 1:2] 0.155 0.124 0.845 0.876
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y   : chr [1:2] "-1" "1"
##   .. .. ..$ port: chr [1:2] "-1" "1"
##   ..$ HTTPS_token                : table [1:2, 1:2] 0.146 0.175 0.854 0.825
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y          : chr [1:2] "-1" "1"
##   .. .. ..$ HTTPS_token: chr [1:2] "-1" "1"
##   ..$ Request_URL                : table [1:2, 1:2] 0.549 0.293 0.451 0.707
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y          : chr [1:2] "-1" "1"
##   .. .. ..$ Request_URL: chr [1:2] "-1" "1"
##   ..$ URL_of_Anchor              : table [1:2, 1:3] 0.66345 0.00672 0.30529 0.62281 0.03127 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y            : chr [1:2] "-1" "1"
##   .. .. ..$ URL_of_Anchor: chr [1:3] "-1" "0" "1"
##   ..$ Links_in_tags              : table [1:2, 1:3] 0.485 0.254 0.358 0.443 0.157 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y            : chr [1:2] "-1" "1"
##   .. .. ..$ Links_in_tags: chr [1:3] "-1" "0" "1"
##   ..$ SFH                        : table [1:2, 1:3] 0.8673 0.6827 0.0554 0.0823 0.0773 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y  : chr [1:2] "-1" "1"
##   .. .. ..$ SFH: chr [1:3] "-1" "0" "1"
##   ..$ Submitting_to_email        : table [1:2, 1:2] 0.197 0.174 0.803 0.826
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                  : chr [1:2] "-1" "1"
##   .. .. ..$ Submitting_to_email: chr [1:2] "-1" "1"
##   ..$ Abnormal_URL               : table [1:2, 1:2] 0.125 0.168 0.875 0.832
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y           : chr [1:2] "-1" "1"
##   .. .. ..$ Abnormal_URL: chr [1:2] "-1" "1"
##   ..$ Redirect                   : table [1:2, 1:2] 0.875 0.889 0.125 0.111
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y       : chr [1:2] "-1" "1"
##   .. .. ..$ Redirect: chr [1:2] "0" "1"
##   ..$ on_mouseover               : table [1:2, 1:2] 0.137 0.107 0.863 0.893
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y           : chr [1:2] "-1" "1"
##   .. .. ..$ on_mouseover: chr [1:2] "-1" "1"
##   ..$ RightClick                 : table [1:2, 1:2] 0.0478 0.0411 0.9522 0.9589
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y         : chr [1:2] "-1" "1"
##   .. .. ..$ RightClick: chr [1:2] "-1" "1"
##   ..$ popUpWidnow                : table [1:2, 1:2] 0.197 0.193 0.803 0.807
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y          : chr [1:2] "-1" "1"
##   .. .. ..$ popUpWidnow: chr [1:2] "-1" "1"
##   ..$ Iframe                     : table [1:2, 1:2] 0.0913 0.0925 0.9087 0.9075
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y     : chr [1:2] "-1" "1"
##   .. .. ..$ Iframe: chr [1:2] "-1" "1"
##   ..$ age_of_domain              : table [1:2, 1:2] 0.539 0.418 0.461 0.582
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y            : chr [1:2] "-1" "1"
##   .. .. ..$ age_of_domain: chr [1:2] "-1" "1"
##   ..$ DNSRecord                  : table [1:2, 1:2] 0.351 0.282 0.649 0.718
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y        : chr [1:2] "-1" "1"
##   .. .. ..$ DNSRecord: chr [1:2] "-1" "1"
##   ..$ web_traffic                : table [1:2, 1:3] 0.345 0.159 0.348 0.14 0.307 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y          : chr [1:2] "-1" "1"
##   .. .. ..$ web_traffic: chr [1:3] "-1" "0" "1"
##   ..$ Page_Rank                  : table [1:2, 1:2] 0.789 0.704 0.211 0.296
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y        : chr [1:2] "-1" "1"
##   .. .. ..$ Page_Rank: chr [1:2] "-1" "1"
##   ..$ Google_Index               : table [1:2, 1:2] 0.192 0.103 0.808 0.897
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y           : chr [1:2] "-1" "1"
##   .. .. ..$ Google_Index: chr [1:2] "-1" "1"
##   ..$ Links_pointing_to_page     : table [1:2, 1:3] 0.0391 0.0568 0.5938 0.524 0.3671 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                     : chr [1:2] "-1" "1"
##   .. .. ..$ Links_pointing_to_page: chr [1:3] "-1" "0" "1"
##   ..$ Statistical_report         : table [1:2, 1:2] 0.174 0.118 0.826 0.882
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ Y                 : chr [1:2] "-1" "1"
##   .. .. ..$ Statistical_report: chr [1:2] "-1" "1"
##  $ levels : chr [1:2] "-1" "1"
##  $ call   : language naiveBayes.default(x = X, y = Y, laplace = laplace)
##  - attr(*, "class")= chr "naiveBayes"
output<-predict(nb_model,test[,-31])
table(pred=output,true=test$Result)
pred/true -1 1
-1 868 58
1 96 1189
confusionMatrix(output, test$Result)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   -1    1
##         -1  868   58
##         1    96 1189
##                                           
##                Accuracy : 0.9303          
##                  95% CI : (0.9189, 0.9406)
##     No Information Rate : 0.564           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8577          
##  Mcnemar's Test P-Value : 0.002868        
##                                           
##             Sensitivity : 0.9004          
##             Specificity : 0.9535          
##          Pos Pred Value : 0.9374          
##          Neg Pred Value : 0.9253          
##              Prevalence : 0.4360          
##          Detection Rate : 0.3926          
##    Detection Prevalence : 0.4188          
##       Balanced Accuracy : 0.9270          
##                                           
##        'Positive' Class : -1              
## 
# trainControl for Random Forest
fitControl = trainControl(method = "repeatedcv", repeats = 5,
                          number = 5, verboseIter = T)

# Run a Random Forest classification over the training set
rf.fit <- train(Result ~ .,  data = train, method = "rf",
                importance = T, trControl = fitControl,
                tuneLength = 5)
## + Fold1.Rep1: mtry= 2 
## - Fold1.Rep1: mtry= 2 
## + Fold1.Rep1: mtry=11 
## - Fold1.Rep1: mtry=11 
## + Fold1.Rep1: mtry=20 
## - Fold1.Rep1: mtry=20 
## + Fold1.Rep1: mtry=29 
## - Fold1.Rep1: mtry=29 
## + Fold1.Rep1: mtry=38 
## - Fold1.Rep1: mtry=38 
## + Fold2.Rep1: mtry= 2 
## - Fold2.Rep1: mtry= 2 
## + Fold2.Rep1: mtry=11 
## - Fold2.Rep1: mtry=11 
## + Fold2.Rep1: mtry=20 
## - Fold2.Rep1: mtry=20 
## + Fold2.Rep1: mtry=29 
## - Fold2.Rep1: mtry=29 
## + Fold2.Rep1: mtry=38 
## - Fold2.Rep1: mtry=38 
## + Fold3.Rep1: mtry= 2 
## - Fold3.Rep1: mtry= 2 
## + Fold3.Rep1: mtry=11 
## - Fold3.Rep1: mtry=11 
## + Fold3.Rep1: mtry=20 
## - Fold3.Rep1: mtry=20 
## + Fold3.Rep1: mtry=29 
## - Fold3.Rep1: mtry=29 
## + Fold3.Rep1: mtry=38 
## - Fold3.Rep1: mtry=38 
## + Fold4.Rep1: mtry= 2 
## - Fold4.Rep1: mtry= 2 
## + Fold4.Rep1: mtry=11 
## - Fold4.Rep1: mtry=11 
## + Fold4.Rep1: mtry=20 
## - Fold4.Rep1: mtry=20 
## + Fold4.Rep1: mtry=29 
## - Fold4.Rep1: mtry=29 
## + Fold4.Rep1: mtry=38 
## - Fold4.Rep1: mtry=38 
## + Fold5.Rep1: mtry= 2 
## - Fold5.Rep1: mtry= 2 
## + Fold5.Rep1: mtry=11 
## - Fold5.Rep1: mtry=11 
## + Fold5.Rep1: mtry=20 
## - Fold5.Rep1: mtry=20 
## + Fold5.Rep1: mtry=29 
## - Fold5.Rep1: mtry=29 
## + Fold5.Rep1: mtry=38 
## - Fold5.Rep1: mtry=38 
## + Fold1.Rep2: mtry= 2 
## - Fold1.Rep2: mtry= 2 
## + Fold1.Rep2: mtry=11 
## - Fold1.Rep2: mtry=11 
## + Fold1.Rep2: mtry=20 
## - Fold1.Rep2: mtry=20 
## + Fold1.Rep2: mtry=29 
## - Fold1.Rep2: mtry=29 
## + Fold1.Rep2: mtry=38 
## - Fold1.Rep2: mtry=38 
## + Fold2.Rep2: mtry= 2 
## - Fold2.Rep2: mtry= 2 
## + Fold2.Rep2: mtry=11 
## - Fold2.Rep2: mtry=11 
## + Fold2.Rep2: mtry=20 
## - Fold2.Rep2: mtry=20 
## + Fold2.Rep2: mtry=29 
## - Fold2.Rep2: mtry=29 
## + Fold2.Rep2: mtry=38 
## - Fold2.Rep2: mtry=38 
## + Fold3.Rep2: mtry= 2 
## - Fold3.Rep2: mtry= 2 
## + Fold3.Rep2: mtry=11 
## - Fold3.Rep2: mtry=11 
## + Fold3.Rep2: mtry=20 
## - Fold3.Rep2: mtry=20 
## + Fold3.Rep2: mtry=29 
## - Fold3.Rep2: mtry=29 
## + Fold3.Rep2: mtry=38 
## - Fold3.Rep2: mtry=38 
## + Fold4.Rep2: mtry= 2 
## - Fold4.Rep2: mtry= 2 
## + Fold4.Rep2: mtry=11 
## - Fold4.Rep2: mtry=11 
## + Fold4.Rep2: mtry=20 
## - Fold4.Rep2: mtry=20 
## + Fold4.Rep2: mtry=29 
## - Fold4.Rep2: mtry=29 
## + Fold4.Rep2: mtry=38 
## - Fold4.Rep2: mtry=38 
## + Fold5.Rep2: mtry= 2 
## - Fold5.Rep2: mtry= 2 
## + Fold5.Rep2: mtry=11 
## - Fold5.Rep2: mtry=11 
## + Fold5.Rep2: mtry=20 
## - Fold5.Rep2: mtry=20 
## + Fold5.Rep2: mtry=29 
## - Fold5.Rep2: mtry=29 
## + Fold5.Rep2: mtry=38 
## - Fold5.Rep2: mtry=38 
## + Fold1.Rep3: mtry= 2 
## - Fold1.Rep3: mtry= 2 
## + Fold1.Rep3: mtry=11 
## - Fold1.Rep3: mtry=11 
## + Fold1.Rep3: mtry=20 
## - Fold1.Rep3: mtry=20 
## + Fold1.Rep3: mtry=29 
## - Fold1.Rep3: mtry=29 
## + Fold1.Rep3: mtry=38 
## - Fold1.Rep3: mtry=38 
## + Fold2.Rep3: mtry= 2 
## - Fold2.Rep3: mtry= 2 
## + Fold2.Rep3: mtry=11 
## - Fold2.Rep3: mtry=11 
## + Fold2.Rep3: mtry=20 
## - Fold2.Rep3: mtry=20 
## + Fold2.Rep3: mtry=29 
## - Fold2.Rep3: mtry=29 
## + Fold2.Rep3: mtry=38 
## - Fold2.Rep3: mtry=38 
## + Fold3.Rep3: mtry= 2 
## - Fold3.Rep3: mtry= 2 
## + Fold3.Rep3: mtry=11 
## - Fold3.Rep3: mtry=11 
## + Fold3.Rep3: mtry=20 
## - Fold3.Rep3: mtry=20 
## + Fold3.Rep3: mtry=29 
## - Fold3.Rep3: mtry=29 
## + Fold3.Rep3: mtry=38 
## - Fold3.Rep3: mtry=38 
## + Fold4.Rep3: mtry= 2 
## - Fold4.Rep3: mtry= 2 
## + Fold4.Rep3: mtry=11 
## - Fold4.Rep3: mtry=11 
## + Fold4.Rep3: mtry=20 
## - Fold4.Rep3: mtry=20 
## + Fold4.Rep3: mtry=29 
## - Fold4.Rep3: mtry=29 
## + Fold4.Rep3: mtry=38 
## - Fold4.Rep3: mtry=38 
## + Fold5.Rep3: mtry= 2 
## - Fold5.Rep3: mtry= 2 
## + Fold5.Rep3: mtry=11 
## - Fold5.Rep3: mtry=11 
## + Fold5.Rep3: mtry=20 
## - Fold5.Rep3: mtry=20 
## + Fold5.Rep3: mtry=29 
## - Fold5.Rep3: mtry=29 
## + Fold5.Rep3: mtry=38 
## - Fold5.Rep3: mtry=38 
## + Fold1.Rep4: mtry= 2 
## - Fold1.Rep4: mtry= 2 
## + Fold1.Rep4: mtry=11 
## - Fold1.Rep4: mtry=11 
## + Fold1.Rep4: mtry=20 
## - Fold1.Rep4: mtry=20 
## + Fold1.Rep4: mtry=29 
## - Fold1.Rep4: mtry=29 
## + Fold1.Rep4: mtry=38 
## - Fold1.Rep4: mtry=38 
## + Fold2.Rep4: mtry= 2 
## - Fold2.Rep4: mtry= 2 
## + Fold2.Rep4: mtry=11 
## - Fold2.Rep4: mtry=11 
## + Fold2.Rep4: mtry=20 
## - Fold2.Rep4: mtry=20 
## + Fold2.Rep4: mtry=29 
## - Fold2.Rep4: mtry=29 
## + Fold2.Rep4: mtry=38 
## - Fold2.Rep4: mtry=38 
## + Fold3.Rep4: mtry= 2 
## - Fold3.Rep4: mtry= 2 
## + Fold3.Rep4: mtry=11 
## - Fold3.Rep4: mtry=11 
## + Fold3.Rep4: mtry=20 
## - Fold3.Rep4: mtry=20 
## + Fold3.Rep4: mtry=29 
## - Fold3.Rep4: mtry=29 
## + Fold3.Rep4: mtry=38 
## - Fold3.Rep4: mtry=38 
## + Fold4.Rep4: mtry= 2 
## - Fold4.Rep4: mtry= 2 
## + Fold4.Rep4: mtry=11 
## - Fold4.Rep4: mtry=11 
## + Fold4.Rep4: mtry=20 
## - Fold4.Rep4: mtry=20 
## + Fold4.Rep4: mtry=29 
## - Fold4.Rep4: mtry=29 
## + Fold4.Rep4: mtry=38 
## - Fold4.Rep4: mtry=38 
## + Fold5.Rep4: mtry= 2 
## - Fold5.Rep4: mtry= 2 
## + Fold5.Rep4: mtry=11 
## - Fold5.Rep4: mtry=11 
## + Fold5.Rep4: mtry=20 
## - Fold5.Rep4: mtry=20 
## + Fold5.Rep4: mtry=29 
## - Fold5.Rep4: mtry=29 
## + Fold5.Rep4: mtry=38 
## - Fold5.Rep4: mtry=38 
## + Fold1.Rep5: mtry= 2 
## - Fold1.Rep5: mtry= 2 
## + Fold1.Rep5: mtry=11 
## - Fold1.Rep5: mtry=11 
## + Fold1.Rep5: mtry=20 
## - Fold1.Rep5: mtry=20 
## + Fold1.Rep5: mtry=29 
## - Fold1.Rep5: mtry=29 
## + Fold1.Rep5: mtry=38 
## - Fold1.Rep5: mtry=38 
## + Fold2.Rep5: mtry= 2 
## - Fold2.Rep5: mtry= 2 
## + Fold2.Rep5: mtry=11 
## - Fold2.Rep5: mtry=11 
## + Fold2.Rep5: mtry=20 
## - Fold2.Rep5: mtry=20 
## + Fold2.Rep5: mtry=29 
## - Fold2.Rep5: mtry=29 
## + Fold2.Rep5: mtry=38 
## - Fold2.Rep5: mtry=38 
## + Fold3.Rep5: mtry= 2 
## - Fold3.Rep5: mtry= 2 
## + Fold3.Rep5: mtry=11 
## - Fold3.Rep5: mtry=11 
## + Fold3.Rep5: mtry=20 
## - Fold3.Rep5: mtry=20 
## + Fold3.Rep5: mtry=29 
## - Fold3.Rep5: mtry=29 
## + Fold3.Rep5: mtry=38 
## - Fold3.Rep5: mtry=38 
## + Fold4.Rep5: mtry= 2 
## - Fold4.Rep5: mtry= 2 
## + Fold4.Rep5: mtry=11 
## - Fold4.Rep5: mtry=11 
## + Fold4.Rep5: mtry=20 
## - Fold4.Rep5: mtry=20 
## + Fold4.Rep5: mtry=29 
## - Fold4.Rep5: mtry=29 
## + Fold4.Rep5: mtry=38 
## - Fold4.Rep5: mtry=38 
## + Fold5.Rep5: mtry= 2 
## - Fold5.Rep5: mtry= 2 
## + Fold5.Rep5: mtry=11 
## - Fold5.Rep5: mtry=11 
## + Fold5.Rep5: mtry=20 
## - Fold5.Rep5: mtry=20 
## + Fold5.Rep5: mtry=29 
## - Fold5.Rep5: mtry=29 
## + Fold5.Rep5: mtry=38 
## - Fold5.Rep5: mtry=38 
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 20 on full training set
# Predict the testing target
rf.predict <- predict(rf.fit, test[,-31])

confusionMatrix(rf.predict, test$Result)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   -1    1
##         -1  914   13
##         1    50 1234
##                                          
##                Accuracy : 0.9715         
##                  95% CI : (0.9637, 0.978)
##     No Information Rate : 0.564          
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9418         
##  Mcnemar's Test P-Value : 5.745e-06      
##                                          
##             Sensitivity : 0.9481         
##             Specificity : 0.9896         
##          Pos Pred Value : 0.9860         
##          Neg Pred Value : 0.9611         
##              Prevalence : 0.4360         
##          Detection Rate : 0.4134         
##    Detection Prevalence : 0.4193         
##       Balanced Accuracy : 0.9689         
##                                          
##        'Positive' Class : -1             
## 
plot(varImp(rf.fit))

#logistic Regression
# trainControl for Boosted Logisitic Regression
fitControl <- trainControl(method = 'repeatedcv', repeats = 5,
                           number = 5, verboseIter = T)

# Run a Boosted logisitic regression over the training set
lg.fit <- train(Result ~ .,  data = train, 
                 method = "LogitBoost", trControl = fitControl,
                 tuneLength = 5)
## + Fold1.Rep1: nIter=51 
## - Fold1.Rep1: nIter=51 
## + Fold2.Rep1: nIter=51 
## - Fold2.Rep1: nIter=51 
## + Fold3.Rep1: nIter=51 
## - Fold3.Rep1: nIter=51 
## + Fold4.Rep1: nIter=51 
## - Fold4.Rep1: nIter=51 
## + Fold5.Rep1: nIter=51 
## - Fold5.Rep1: nIter=51 
## + Fold1.Rep2: nIter=51 
## - Fold1.Rep2: nIter=51 
## + Fold2.Rep2: nIter=51 
## - Fold2.Rep2: nIter=51 
## + Fold3.Rep2: nIter=51 
## - Fold3.Rep2: nIter=51 
## + Fold4.Rep2: nIter=51 
## - Fold4.Rep2: nIter=51 
## + Fold5.Rep2: nIter=51 
## - Fold5.Rep2: nIter=51 
## + Fold1.Rep3: nIter=51 
## - Fold1.Rep3: nIter=51 
## + Fold2.Rep3: nIter=51 
## - Fold2.Rep3: nIter=51 
## + Fold3.Rep3: nIter=51 
## - Fold3.Rep3: nIter=51 
## + Fold4.Rep3: nIter=51 
## - Fold4.Rep3: nIter=51 
## + Fold5.Rep3: nIter=51 
## - Fold5.Rep3: nIter=51 
## + Fold1.Rep4: nIter=51 
## - Fold1.Rep4: nIter=51 
## + Fold2.Rep4: nIter=51 
## - Fold2.Rep4: nIter=51 
## + Fold3.Rep4: nIter=51 
## - Fold3.Rep4: nIter=51 
## + Fold4.Rep4: nIter=51 
## - Fold4.Rep4: nIter=51 
## + Fold5.Rep4: nIter=51 
## - Fold5.Rep4: nIter=51 
## + Fold1.Rep5: nIter=51 
## - Fold1.Rep5: nIter=51 
## + Fold2.Rep5: nIter=51 
## - Fold2.Rep5: nIter=51 
## + Fold3.Rep5: nIter=51 
## - Fold3.Rep5: nIter=51 
## + Fold4.Rep5: nIter=51 
## - Fold4.Rep5: nIter=51 
## + Fold5.Rep5: nIter=51 
## - Fold5.Rep5: nIter=51 
## Aggregating results
## Selecting tuning parameters
## Fitting nIter = 41 on full training set
# Predict the testing target
log.predict <- predict(lg.fit, test[,-31])
confusionMatrix(log.predict,test$Result)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   -1    1
##         -1  866   56
##         1    98 1191
##                                           
##                Accuracy : 0.9303          
##                  95% CI : (0.9189, 0.9406)
##     No Information Rate : 0.564           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8577          
##  Mcnemar's Test P-Value : 0.0009536       
##                                           
##             Sensitivity : 0.8983          
##             Specificity : 0.9551          
##          Pos Pred Value : 0.9393          
##          Neg Pred Value : 0.9240          
##              Prevalence : 0.4360          
##          Detection Rate : 0.3917          
##    Detection Prevalence : 0.4170          
##       Balanced Accuracy : 0.9267          
##                                           
##        'Positive' Class : -1              
##