library(C50)
## Warning: package 'C50' was built under R version 3.5.1
library(tree)
## Warning: package 'tree' was built under R version 3.5.1
Data <- read.csv("D:\\DataScience\\Assignments\\DecisionTree\\Fraud_check.csv")

View(Data)
Data$CUndergrad <- NA
Data$CUndergrad[Data$Undergrad=="YES"]=1
Data$CUndergrad[Data$Undergrad=="NO"]=0

Data$CMarital.Status <- NA

Data$CMarital.Status[Data$Marital.Status=="Single"]=0

Data$CMarital.Status[Data$Marital.Status=="Married"]=1

Data$CMarital.Status[Data$Marital.Status=="Divorced"]=2

Data$CUrban <- NA

Data$CUrban[Data$Urban=="YES"]=1
Data$CUrban[Data$Urban=="NO"]=0


Data$Ctype <- NA

Data$Ctype[ Data$Taxable.Income<30000]="Rishky"
Data$Ctype[Data$Taxable.Income==30000]="Rishky"
Data$Ctype[Data$Taxable.Income > 30000]="Good"

Data <- Data[,-c(1,2,6)]
Data <- Data[,-1]

Data_Good <- Data[Data$Ctype=="Good",]
Data_Rishky <- Data[Data$Ctype=="Rishky",]
Data_Train <- rbind(Data_Good[1:238,],Data_Rishky[1:62,])
Data_Test <- rbind(Data_Good[239:476,],Data_Rishky[63:124,])

 Data5.0_Train <- C5.0(x=Data_Train[,-7],as.factor(Data_Train$Ctype))
plot(Data5.0_Train)
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), :
## NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : NAs introduced by coercion

mean(Data_Train$Ctype==predict(Data5.0_Train,Data_Train))
## [1] 1
predc5.0_Test <- predict(Data5.0_Train,newdata=Data_Test) # predicting on test data
mean(predc5.0_Test==Data_Test$Ctype)
## [1] 1
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
# Cross tablez
CrossTable(Data_Test$Ctype,predc5.0_Test)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  300 
## 
##  
##                 | predc5.0_Test 
## Data_Test$Ctype |      Good |    Rishky | Row Total | 
## ----------------|-----------|-----------|-----------|
##            Good |       238 |         0 |       238 | 
##                 |    12.813 |    49.187 |           | 
##                 |     1.000 |     0.000 |     0.793 | 
##                 |     1.000 |     0.000 |           | 
##                 |     0.793 |     0.000 |           | 
## ----------------|-----------|-----------|-----------|
##          Rishky |         0 |        62 |        62 | 
##                 |    49.187 |   188.813 |           | 
##                 |     0.000 |     1.000 |     0.207 | 
##                 |     0.000 |     1.000 |           | 
##                 |     0.000 |     0.207 |           | 
## ----------------|-----------|-----------|-----------|
##    Column Total |       238 |        62 |       300 | 
##                 |     0.793 |     0.207 |           | 
## ----------------|-----------|-----------|-----------|
## 
##