fraudData <- read.csv("FraudInstanceRawData.csv")
summary(fraudData)
## X. Fraud.Instance Damaged.Item Item.Not.Avaiable
## Min. : 1 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1088 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2175 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :2175 Mean :0.3923 Mean :0.6691 Mean :0.1398
## 3rd Qu.:3262 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :4349 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Item.Not.In.Stock Product.Care.Plan Claim.Amount Registered.Online
## Min. :0.0000 Min. :0.0000 $250 : 25 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 $252 : 23 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 $177 : 22 Median :0.0000
## Mean :0.4992 Mean :0.2996 $246 : 22 Mean :0.4914
## 3rd Qu.:1.0000 3rd Qu.:1.0000 $280 : 22 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 $112 : 21 Max. :1.0000
## (Other):4214
## Age.Group Marital.Status Owns.a.Vehicle
## Min. :18.0 In-Relationship:1408 Min. :0.0000
## 1st Qu.:27.0 Married :1503 1st Qu.:0.0000
## Median :36.0 Unmarried, :1438 Median :0.0000
## Mean :36.4 Mean :0.4976
## 3rd Qu.:46.0 3rd Qu.:1.0000
## Max. :55.0 Max. :1.0000
##
## Accomodation.Type Height..cms.
## Owns a house :1441 Min. :150.0
## Rented :1409 1st Qu.:160.0
## Staying with Family:1499 Median :170.0
## Mean :170.3
## 3rd Qu.:181.0
## Max. :190.0
##
fraudData$Fraud.Instance <- as.factor(fraudData$Fraud.Instance)
fraudData$Damaged.Item <- as.factor(fraudData$Damaged.Item)
fraudData$Item.Not.Avaiable <- as.factor(fraudData$Item.Not.Avaiable)
fraudData$Item.Not.In.Stock <- as.factor(fraudData$Item.Not.In.Stock)
fraudData$Product.Care.Plan <- as.factor(fraudData$Product.Care.Plan)
fraudData$Registered.Online <- as.factor(fraudData$Registered.Online)
fraudData$Owns.a.Vehicle<- as.factor(fraudData$Owns.a.Vehicle)
fraudData$Claim.Amount<- as.character(fraudData$Claim.Amount)
fraudData$Claim.Amount <- sapply(fraudData$Claim.Amount, substring,2)
featurePlot(x= fraudData[,c("Age.Group","Accomodation.Type", "Damaged.Item")], y=fraudData$Fraud.Instance, plot="pairs")
The Fraud.Instance is not coralted with Age.Group or Accomodation.Type or Damaged.Item. ## the model
inTrain <- createDataPartition(y=fraudData$Fraud.Instance,p=0.75, list=FALSE)
training <- fraudData[inTrain,]
testing <- fraudData[-inTrain,]
#the peron height is not related to fraud data, so remove it
training <- training[, -(13)]
testing <- testing[, -(13)]
tree <- rpart(Fraud.Instance ~ ., training, method = "class")
fancyRpartPlot(tree)
pred <- predict(tree, testing, type = "class")
conf <- table(testing$Fraud.Instance, pred)
print(conf)
## pred
## 0 1
## 0 660 0
## 1 0 426
from the table, the model accuracy is %100