library(C50)
## Warning: package 'C50' was built under R version 3.5.1
library(tree)
## Warning: package 'tree' was built under R version 3.5.1
Data <- read.csv("D:\\DataScience\\Assignments\\DecisionTree\\Fraud_check.csv")
View(Data)
Data$CUndergrad <- NA
Data$CUndergrad[Data$Undergrad=="YES"]=1
Data$CUndergrad[Data$Undergrad=="NO"]=0
Data$CMarital.Status <- NA
Data$CMarital.Status[Data$Marital.Status=="Single"]=0
Data$CMarital.Status[Data$Marital.Status=="Married"]=1
Data$CMarital.Status[Data$Marital.Status=="Divorced"]=2
Data$CUrban <- NA
Data$CUrban[Data$Urban=="YES"]=1
Data$CUrban[Data$Urban=="NO"]=0
Data$Ctype <- NA
Data$Ctype[ Data$Taxable.Income<30000]="Rishky"
Data$Ctype[Data$Taxable.Income==30000]="Rishky"
Data$Ctype[Data$Taxable.Income > 30000]="Good"
Data <- Data[,-c(1,2,6)]
Data <- Data[,-1]
Data_Good <- Data[Data$Ctype=="Good",]
Data_Rishky <- Data[Data$Ctype=="Rishky",]
Data_Train <- rbind(Data_Good[1:238,],Data_Rishky[1:62,])
Data_Test <- rbind(Data_Good[239:476,],Data_Rishky[63:124,])
Data5.0_Train <- C5.0(x=Data_Train[,-7],as.factor(Data_Train$Ctype))
plot(Data5.0_Train)
## Warning in partysplit(varid = as.integer(i), breaks = as.numeric(j[1]), :
## NAs introduced by coercion
## Warning in .bincode(as.numeric(x), breaks = unique(c(-Inf,
## breaks_split(split), : NAs introduced by coercion

mean(Data_Train$Ctype==predict(Data5.0_Train,Data_Train))
## [1] 1
predc5.0_Test <- predict(Data5.0_Train,newdata=Data_Test) # predicting on test data
mean(predc5.0_Test==Data_Test$Ctype)
## [1] 1
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
# Cross tablez
CrossTable(Data_Test$Ctype,predc5.0_Test)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 300
##
##
## | predc5.0_Test
## Data_Test$Ctype | Good | Rishky | Row Total |
## ----------------|-----------|-----------|-----------|
## Good | 238 | 0 | 238 |
## | 12.813 | 49.187 | |
## | 1.000 | 0.000 | 0.793 |
## | 1.000 | 0.000 | |
## | 0.793 | 0.000 | |
## ----------------|-----------|-----------|-----------|
## Rishky | 0 | 62 | 62 |
## | 49.187 | 188.813 | |
## | 0.000 | 1.000 | 0.207 |
## | 0.000 | 1.000 | |
## | 0.000 | 0.207 | |
## ----------------|-----------|-----------|-----------|
## Column Total | 238 | 62 | 300 |
## | 0.793 | 0.207 | |
## ----------------|-----------|-----------|-----------|
##
##