Assignment 26
#install.packages("C50")
#install.packages("tree")
library(C50)
## Warning: package 'C50' was built under R version 3.5.1
library(tree)
## Warning: package 'tree' was built under R version 3.5.1
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\10 Decision Trees\\Assignment\\Fraud_check.csv")
colnames(mydata)
## [1] "Undergrad" "Marital.Status" "Taxable.Income" "City.Population"
## [5] "Work.Experience" "Urban"
TIRisky <- NULL
TIRisky <- ifelse(mydata$Taxable.Income<=30000,1,0)
mydata[,"TIRisky"] <- TIRisky
mydata$Undergrad <- as.factor(mydata$Undergrad)
mydata$Marital.Status <- as.factor(mydata$Marital.Status)
mydata$Urban <- as.factor(mydata$Urban)
mydata$TIRisky <- as.factor(mydata$TIRisky)
fraud_risky <- mydata[mydata$TIRisky == "1",]
fraud_not_risky <- mydata[mydata$TIRisky == "0",]
data_train <- rbind(fraud_risky[1:93,], fraud_not_risky[1:357,])
data_test <- rbind(fraud_risky[94:124,], fraud_not_risky[357:476,])
trained_model <- C5.0(data_train[,-c(7)], data_train$TIRisky)
plot(trained_model)

mean(data_train$TIRisky == predict(trained_model, data_train))
## [1] 1
pred_test <- predict(trained_model, newdata = data_test)
mean(pred_test == data_test$TIRisky)
## [1] 1
#install.packages("gmodels")
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(data_test$TIRisky, pred_test)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 151
##
##
## | pred_test
## data_test$TIRisky | 0 | 1 | Row Total |
## ------------------|-----------|-----------|-----------|
## 0 | 120 | 0 | 120 |
## | 6.364 | 24.636 | |
## | 1.000 | 0.000 | 0.795 |
## | 1.000 | 0.000 | |
## | 0.795 | 0.000 | |
## ------------------|-----------|-----------|-----------|
## 1 | 0 | 31 | 31 |
## | 24.636 | 95.364 | |
## | 0.000 | 1.000 | 0.205 |
## | 0.000 | 1.000 | |
## | 0.000 | 0.205 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 120 | 31 | 151 |
## | 0.795 | 0.205 | |
## ------------------|-----------|-----------|-----------|
##
##