Sales Practices Fraud

Lekshman Ramesh

Why?

Data Collection

od=read.csv("data.csv", header=T)
str(od)
## 'data.frame':    20002 obs. of  7 variables:
##  $ Banker       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Accounts     : num  82.5 104.6 110.6 90.8 45.4 ...
##  $ Maximum.Spike: num  2.28 5.18 0.59 2.63 4.46 ...
##  $ Inactivity   : num  0.07 0.24 0.21 0.1 0.2 0.3 0.07 0.02 0.27 0.35 ...
##  $ Early.Closure: num  0.49 0.07 0.45 0.49 0.11 0.18 0.26 0.23 0.28 0.05 ...
##  $ Charge.Off   : num  0.15 0.09 0.09 0.19 0.05 0 0.12 0.07 0.01 0.13 ...
##  $ Fraud        : num  0.09 0.2 0.09 0.09 0.19 0.13 0.06 0.18 0.09 0.17 ...

Problem Formulation

Step 1 - Normalization

od2=as.data.frame(scale(od))
summary(od2)
##      Banker          Accounts        Maximum.Spike      
##  Min.   :-1.732   Min.   :-1.74572   Min.   :-1.731514  
##  1st Qu.:-0.866   1st Qu.:-0.87048   1st Qu.:-0.873454  
##  Median : 0.000   Median : 0.01379   Median : 0.000299  
##  Mean   : 0.000   Mean   : 0.00000   Mean   : 0.000000  
##  3rd Qu.: 0.866   3rd Qu.: 0.86836   3rd Qu.: 0.869489  
##  Max.   : 1.732   Max.   : 1.71819   Max.   : 4.652904  
##    Inactivity        Early.Closure        Charge.Off       
##  Min.   :-1.735546   Min.   :-1.72497   Min.   :-1.725586  
##  1st Qu.:-0.834225   1st Qu.:-0.84720   1st Qu.:-0.864403  
##  Median :-0.002236   Median : 0.03058   Median :-0.003221  
##  Mean   : 0.000000   Mean   : 0.00000   Mean   : 0.000000  
##  3rd Qu.: 0.829753   3rd Qu.: 0.84565   3rd Qu.: 0.857962  
##  Max.   : 3.325720   Max.   : 2.91469   Max.   : 5.680586  
##      Fraud          
##  Min.   :-1.703520  
##  1st Qu.:-0.846951  
##  Median : 0.009618  
##  Mean   : 0.000000  
##  3rd Qu.: 0.866187  
##  Max.   : 6.690856

Step 2 - Ignoring Lesser Than Mean Values & Calculating Eucledian Distances

od2=as.data.frame(sapply(od2,function(x){ifelse(x<0,0,x)}))
od2$ed_overall=od2[,2]^2+od2[,3]^2+od2[,4]^2+od2[,5]^2+od2[,6]^2+od2[,7]^2
#Ignored the banker key od[,1]
summary(od2)
##      Banker             Accounts       Maximum.Spike        Inactivity    
##  Min.   :0.0000000   Min.   :0.00000   Min.   :0.000000   Min.   :0.0000  
##  1st Qu.:0.0000000   1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.0000  
##  Median :0.0000433   Median :0.01379   Median :0.000299   Median :0.0000  
##  Mean   :0.4330019   Mean   :0.43323   Mean   :0.432576   Mean   :0.4327  
##  3rd Qu.:0.8659605   3rd Qu.:0.86836   3rd Qu.:0.869489   3rd Qu.:0.8298  
##  Max.   :1.7319209   Max.   :1.71819   Max.   :4.652904   Max.   :3.3257  
##  Early.Closure       Charge.Off         Fraud            ed_overall    
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000   Min.   : 0.000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.: 1.476  
##  Median :0.03058   Median :0.0000   Median :0.009618   Median : 2.762  
##  Mean   :0.43308   Mean   :0.4322   Mean   :0.432536   Mean   : 2.997  
##  3rd Qu.:0.84565   3rd Qu.:0.8580   3rd Qu.:0.866187   3rd Qu.: 4.262  
##  Max.   :2.91469   Max.   :5.6806   Max.   :6.690856   Max.   :45.543

Outlier Visualization

library(ggplot2)
ggplot(data = od2, aes(x=1,y=ed_overall))+
  geom_boxplot(outlier.colour = "red", outlier.size = 3,fill="lightblue", color="darkblue")+
  theme(axis.title.x = element_blank(), axis.text.x = element_blank())

Final Data with Scores

ed_overall=od2$ed_overall
od3=cbind(od,ed_overall)
od3=od3[order(od3$ed_overall, decreasing = T),]
head(od3)
##       Banker  Accounts Maximum.Spike Inactivity Early.Closure Charge.Off
## 14781  14781  30.98339      1.279827       0.33          0.18       0.14
## 5634    5634  53.72716      4.046279       0.15          0.29       0.43
## 9          9 102.35200      9.745000       0.27          0.28       0.01
## 11833  11833  69.84385      2.672617       0.73          0.44       0.15
## 11476  11476 121.52178      5.421197       0.50          0.48       0.16
## 102      102 108.83688      5.404849       0.47          0.53       0.16
##       Fraud ed_overall
## 14781  0.49   45.54294
## 5634   0.10   32.79255
## 9      0.09   22.54170
## 11833  0.09   12.86515
## 11476  0.16   12.11792
## 102    0.18   11.92362

Results