Decision Trees

Load and Subset the Data

x<-read.csv("c:\\Reports\\DT.csv")
y<-subset(x,Category.Behavior=="/Authentication/Verify" & Device.Product=="Microsoft Windows",select=c('Type','Name','Target.User.Name','Attacker.User.Name','Category.Behavior','Category.Object','Category.Significance'))

Create Training and Test Data

ind<-sample(2,nrow(y),replace=TRUE,prob=c(0.7,0.3))
traindata<-y[ind==1,]
testdata<-y[ind==2,]

Create the Decision Tree

library(party)
## Warning: package 'party' was built under R version 3.1.1
## Loading required package: grid
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.1.1
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.1.1
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.1.1
## Loading required package: modeltools
## Warning: package 'modeltools' was built under R version 3.1.1
## Loading required package: stats4
myformula<-Type~Name+Target.User.Name+Attacker.User.Name+Category.Behavior+Category.Object+Category.Significance
type_ctree<-ctree(myformula,data=traindata)

Display the Confusion Matrix for Training data

table(predict(type_ctree),traindata$Type)
##       
##        Base CORR
##   Base  259    0
##   CORR    0   10

Print and Plot the Decision Tree

print(type_ctree)
## 
##   Conditional inference tree with 3 terminal nodes
## 
## Response:  Type 
## Inputs:  Name, Target.User.Name, Attacker.User.Name, Category.Behavior, Category.Object, Category.Significance 
## Number of observations:  269 
## 
## 1) Attacker.User.Name == {, administrator, Carbaugh, CCOSTA, mhedberg, Source/Destination: Target User Name, swright, Zara, Zaun}; criterion = 1, statistic = 79.442
##   2)*  weights = 238 
## 1) Attacker.User.Name == {Administrator}
##   3) Target.User.Name == {Admin, Super}; criterion = 1, statistic = 30
##     4)*  weights = 21 
##   3) Target.User.Name == {Administrator}
##     5)*  weights = 10
plot(type_ctree,type="simple")

plot of chunk Print and Plot the Decision Tree

Create the Decision Tree for Test data

testpred<-predict(type_ctree,newdata = testdata)

Display the Confusion Matrix for Test Data

table(testpred,testdata$Type)
##         
## testpred Base CORR
##     Base  115    0
##     CORR    0    5