# file.choose()
bd <- read.csv("/Users/danielatrevino/Downloads/Claims_limpia.csv")
summary(bd)
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 650915 Min. : -270 Min. : 0 Min. : 0.00
## 1st Qu.: 811125 1st Qu.: 60 1st Qu.: 0 1st Qu.: 0.00
## Median : 844626 Median : 235 Median : 0 Median : 0.00
## Mean :10149151 Mean : 6746 Mean : 2233 Mean : 68.88
## 3rd Qu.:22716506 3rd Qu.: 938 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203891 Max. :4527291 Max. :2069575 Max. :130541.03
##
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -475 Min. : -7820 Length:134004 Length:134004
## 1st Qu.: 0 1st Qu.: 58 Class :character Class :character
## Median : 0 Median : 230 Mode :character Mode :character
## Mean : 3061 Mean : 3685
## 3rd Qu.: 0 3rd Qu.: 855
## Max. :640732 Max. :4129915
##
## IncidentDescription AverageWeeklyWage ReceivedDate IsDenied
## Length:134004 Length:134004 Length:134004 Min. :0.00000
## Class :character Class :character Class :character 1st Qu.:0.00000
## Mode :character Mode :character Mode :character Median :0.00000
## Mean :0.04474
## 3rd Qu.:0.00000
## Max. :1.00000
##
## Gender ClaimantType InjuryNature BodyPartRegion
## Length:134004 Length:134004 Length:134004 Length:134004
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BodyPart semana ClaimantClosedDate2 ClaimantOpenedDate2
## Length:134004 Min. : 0.00 Min. : 1.00 Min. : 1.00
## Class :character 1st Qu.:13.00 1st Qu.:13.00 1st Qu.:14.00
## Mode :character Median :25.00 Median :14.00 Median :26.00
## Mean :25.62 Mean :21.55 Mean :25.95
## 3rd Qu.:38.00 3rd Qu.:30.00 3rd Qu.:38.00
## Max. :53.00 Max. :53.00 Max. :53.00
## NA's :58637 NA's :4678
## EmployerNotificationDate2 ReturnToWorkDate2 Total_Incurred_Cost_Claim
## Min. : 1.00 Min. : 1.00 Min. : -11775
## 1st Qu.:14.00 1st Qu.:14.00 1st Qu.: 59
## Median :25.00 Median :26.00 Median : 234
## Mean :25.77 Mean :26.02 Mean : 8910
## 3rd Qu.:38.00 3rd Qu.:38.00 3rd Qu.: 965
## Max. :53.00 Max. :53.00 Max. :5054823
## NA's :22288 NA's :58637
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
count(bd, IsDenied, sort= TRUE)
## IsDenied n
## 1 0 128008
## 2 1 5996
count(bd, Gender, sort=TRUE)
## Gender n
## 1 Male 65125
## 2 Female 59197
## 3 Not Available 9682
count(bd, ClaimantType, sort = TRUE)
## ClaimantType n
## 1 Medical Only 87943
## 2 Indemnity 33726
## 3 Report Only 12335
count(bd, BodyPartRegion, sort = TRUE)
## BodyPartRegion n
## 1 Upper Extremities 38952
## 2 Lower Extremities 27609
## 3 Trunk 21223
## 4 Multiple Body Parts 16380
## 5 Head 12832
## 6 Non-Standard Code 12651
## 7 Neck 4349
## 8 Not Available 8
transaction <- bd[,c("IsDenied", "Gender", "ClaimantType", "BodyPartRegion")]
transaction$IsDenied<- as.factor(ifelse(transaction$IsDenied==0, "Aprobada", "Denegada"))
transaction$Gender <- as.factor(transaction$Gender)
transaction$ClaimantType <- as.factor(transaction$ClaimantType)
transaction$BodyPartRegion <- as.factor(transaction$BodyPartRegion)
str(transaction)
## 'data.frame': 134004 obs. of 4 variables:
## $ IsDenied : Factor w/ 2 levels "Aprobada","Denegada": 1 1 1 1 1 1 1 1 1 1 ...
## $ Gender : Factor w/ 3 levels "Female","Male",..: 2 2 2 2 1 2 1 2 1 2 ...
## $ ClaimantType : Factor w/ 3 levels "Indemnity","Medical Only",..: 1 2 1 2 1 2 2 1 2 1 ...
## $ BodyPartRegion: Factor w/ 8 levels "Head","Lower Extremities",..: 7 7 8 8 7 2 7 8 3 3 ...
summary(transaction)
## IsDenied Gender ClaimantType
## Aprobada:128008 Female :59197 Indemnity :33726
## Denegada: 5996 Male :65125 Medical Only:87943
## Not Available: 9682 Report Only :12335
##
##
##
##
## BodyPartRegion
## Upper Extremities :38952
## Lower Extremities :27609
## Trunk :21223
## Multiple Body Parts:16380
## Head :12832
## Non-Standard Code :12651
## (Other) : 4357
# install.packages("rpart")
library(rpart)
# install.packages("rpart.plot")
library(rpart.plot)
arbol <- rpart(formula = Gender ~ ., data = transaction)
arbol
## n= 134004
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 134004 68879 Male (0.441755470 0.485992955 0.072251575)
## 2) BodyPartRegion=Head,Lower Extremities,Multiple Body Parts,Neck,Not Available,Trunk,Upper Extremities 121353 59449 Male (0.484001220 0.510115119 0.005883662)
## 4) BodyPartRegion=Lower Extremities,Multiple Body Parts,Neck,Not Available,Upper Extremities 87298 43365 Female (0.503253225 0.491202548 0.005544228)
## 8) IsDenied=Denegada 4491 1874 Female (0.582720998 0.413048319 0.004230684) *
## 9) IsDenied=Aprobada 82807 41491 Female (0.498943326 0.495441207 0.005615467)
## 18) BodyPartRegion=Lower Extremities,Multiple Body Parts 41527 19959 Female (0.519372938 0.476003564 0.004623498) *
## 19) BodyPartRegion=Neck,Not Available,Upper Extremities 41280 20021 Male (0.478391473 0.514995155 0.006613372) *
## 5) BodyPartRegion=Head,Trunk 34055 15032 Male (0.434649831 0.558596388 0.006753781) *
## 3) BodyPartRegion=Non-Standard Code 12651 3683 Not Available (0.036518852 0.254604379 0.708876769) *
rpart.plot(arbol)
arbol <- rpart(formula = ClaimantType ~ ., data = transaction)
arbol
## n= 134004
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 134004 46061 Medical Only (0.25167905 0.65627145 0.09204949)
## 2) BodyPartRegion=Head,Lower Extremities,Multiple Body Parts,Neck,Trunk,Upper Extremities 121345 35669 Medical Only (0.23339239 0.70605299 0.06055462) *
## 3) BodyPartRegion=Non-Standard Code,Not Available 12659 7254 Indemnity (0.42696895 0.17908208 0.39394897) *
rpart.plot(arbol)
arbol <- rpart(formula = BodyPartRegion ~ ., data = transaction)
arbol
## n= 134004
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 134004 95052 Upper Extremities (0.096 0.21 0.12 0.032 0.094 6e-05 0.16 0.29)
## 2) Gender=Female,Male 124322 85613 Upper Extremities (0.1 0.22 0.13 0.035 0.03 6.4e-05 0.17 0.31) *
## 3) Gender=Not Available 9682 714 Non-Standard Code (0.0096 0.017 0.0033 0.0041 0.93 0 0.014 0.025) *
rpart.plot(arbol)
arbol <- rpart(formula = IsDenied ~ ., data = transaction)
arbol
## n= 134004
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 134004 5996 Aprobada (0.95525507 0.04474493) *
rpart.plot(arbol)