Load and Explore the Data

fraudData <- read.csv("FraudInstanceRawData.csv")
summary(fraudData)
##        X.       Fraud.Instance    Damaged.Item    Item.Not.Avaiable
##  Min.   :   1   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:1088   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   
##  Median :2175   Median :0.0000   Median :1.0000   Median :0.0000   
##  Mean   :2175   Mean   :0.3923   Mean   :0.6691   Mean   :0.1398   
##  3rd Qu.:3262   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   
##  Max.   :4349   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   
##                                                                    
##  Item.Not.In.Stock Product.Care.Plan  Claim.Amount  Registered.Online
##  Min.   :0.0000    Min.   :0.0000    $250   :  25   Min.   :0.0000   
##  1st Qu.:0.0000    1st Qu.:0.0000    $252   :  23   1st Qu.:0.0000   
##  Median :0.0000    Median :0.0000    $177   :  22   Median :0.0000   
##  Mean   :0.4992    Mean   :0.2996    $246   :  22   Mean   :0.4914   
##  3rd Qu.:1.0000    3rd Qu.:1.0000    $280   :  22   3rd Qu.:1.0000   
##  Max.   :1.0000    Max.   :1.0000    $112   :  21   Max.   :1.0000   
##                                      (Other):4214                    
##    Age.Group            Marital.Status Owns.a.Vehicle  
##  Min.   :18.0   In-Relationship:1408   Min.   :0.0000  
##  1st Qu.:27.0   Married        :1503   1st Qu.:0.0000  
##  Median :36.0   Unmarried,     :1438   Median :0.0000  
##  Mean   :36.4                          Mean   :0.4976  
##  3rd Qu.:46.0                          3rd Qu.:1.0000  
##  Max.   :55.0                          Max.   :1.0000  
##                                                        
##            Accomodation.Type  Height..cms.  
##  Owns a house       :1441    Min.   :150.0  
##  Rented             :1409    1st Qu.:160.0  
##  Staying with Family:1499    Median :170.0  
##                              Mean   :170.3  
##                              3rd Qu.:181.0  
##                              Max.   :190.0  
## 

Clean the Data

fraudData$Fraud.Instance <- as.factor(fraudData$Fraud.Instance)
fraudData$Damaged.Item <- as.factor(fraudData$Damaged.Item)
fraudData$Item.Not.Avaiable <- as.factor(fraudData$Item.Not.Avaiable)
fraudData$Item.Not.In.Stock <- as.factor(fraudData$Item.Not.In.Stock)
fraudData$Product.Care.Plan <- as.factor(fraudData$Product.Care.Plan)
fraudData$Registered.Online <- as.factor(fraudData$Registered.Online)
fraudData$Owns.a.Vehicle<- as.factor(fraudData$Owns.a.Vehicle)
fraudData$Claim.Amount<- as.character(fraudData$Claim.Amount)
fraudData$Claim.Amount <- sapply(fraudData$Claim.Amount, substring,2)

Plot

featurePlot(x= fraudData[,c("Age.Group","Accomodation.Type", "Damaged.Item")], y=fraudData$Fraud.Instance, plot="pairs")

The Fraud.Instance is not coralted with Age.Group or Accomodation.Type or Damaged.Item. ## the model

inTrain <- createDataPartition(y=fraudData$Fraud.Instance,p=0.75, list=FALSE)
training <- fraudData[inTrain,]
testing <- fraudData[-inTrain,]
#the peron height is not related to fraud data, so remove it
training <- training[, -(13)]
testing <- testing[, -(13)]

tree <- rpart(Fraud.Instance ~ ., training, method = "class")
fancyRpartPlot(tree)

pred <- predict(tree, testing, type = "class")
conf <- table(testing$Fraud.Instance, pred)
print(conf)
##    pred
##       0   1
##   0 660   0
##   1   0 426

from the table, the model accuracy is %100