#Descargar Librerias
#install.packages("ggplot2")
library(ggplot2)
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages("cluster")
library(cluster)
#install.packages("data.table")
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
#install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#install.packages("rpart")
library(rpart)
#install.packages("rpart.plot")
library(rpart.plot)
#Descargar Bases de Datos
#file.choose()
basededatos1 <- read.csv("C:\\Users\\sguerra\\Downloads\\ClaimsData2018.csv")
basededatos2 <- read.csv("C:\\Users\\sguerra\\Downloads\\TransactionsSummary2018.csv")
#Juntar Bases de Datos
bd <- merge(basededatos1, basededatos2, by="ClaimID", all = TRUE)
#Filtrar los ISDENIED
bdp <- bd[bd$IsDenied == "0",]
#Crear nuevas columnas CTI Y TP (Convertirlo a fecha)
bdp$cti <- bdp$TotalReserves + bdp$TotalPaid + bdp$IndemnityPaid - bdp$TotalRecovery
bdp$ClaimantOpenedDate <- as.Date(bdp$ClaimantOpenedDate, ("%d/%m/%Y"))
bdp$ClaimantClosedDate <- as.Date(bdp$ClaimantClosedDate, ("%d/%m/%Y"))
bdp$tp <- as.numeric(bdp$ClaimantClosedDate - bdp$ClaimantOpenedDate)
bdp$cti <- as.numeric(bdp$cti)
#Observaciones
summary(bdp)
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 650915 Min. : -270 Min. : 0 Min. : 0.00
## 1st Qu.: 810940 1st Qu.: 67 1st Qu.: 0 1st Qu.: 0.00
## Median : 844950 Median : 240 Median : 0 Median : 0.00
## Mean :10196295 Mean : 6942 Mean : 2282 Mean : 72.09
## 3rd Qu.:22716721 3rd Qu.: 957 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203891 Max. :4527291 Max. :2069575 Max. :130541.03
## NA's :52673 NA's :52673 NA's :52673 NA's :52673
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -475 Min. : -7820 Length:180681 Length:180681
## 1st Qu.: 0 1st Qu.: 65 Class :character Class :character
## Median : 0 Median : 235 Mode :character Mode :character
## Mean : 3130 Mean : 3811
## 3rd Qu.: 0 3rd Qu.: 874
## Max. :640732 Max. :4129915
## NA's :52673 NA's :52673
## IncidentDescription ReturnToWorkDate AverageWeeklyWage ClaimantOpenedDate
## Length:180681 Length:180681 Length:180681 Min. :1967-06-02
## Class :character Class :character Class :character 1st Qu.:1999-01-09
## Mode :character Mode :character Mode :character Median :2003-11-12
## Mean :2003-12-06
## 3rd Qu.:2009-02-02
## Max. :2014-12-06
## NA's :130819
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Min. :1999-01-06 Length:180681 Length:180681
## 1st Qu.:2005-11-07 Class :character Class :character
## Median :2008-11-12 Mode :character Mode :character
## Mean :2008-04-19
## 3rd Qu.:2011-07-11
## Max. :2014-12-06
## NA's :152847
## IsDenied ClaimantAge_at_DOI Gender ClaimantType
## Min. :0 Length:180681 Length:180681 Length:180681
## 1st Qu.:0 Class :character Class :character Class :character
## Median :0 Mode :character Mode :character Mode :character
## Mean :0
## 3rd Qu.:0
## Max. :0
## NA's :52673
## InjuryNature BodyPartRegion BodyPart BillReviewALE
## Length:180681 Length:180681 Length:180681 Min. : -456.00
## Class :character Class :character Class :character 1st Qu.: 8.25
## Mode :character Mode :character Mode :character Median : 24.00
## Mean : 190.82
## 3rd Qu.: 65.28
## Max. :56475.30
## NA's :157477
## Hospital PhysicianOutpatient Rx cti
## Min. : -12570.4 Min. : -549.5 Min. : -469.5 Min. : -10400
## 1st Qu.: 204.2 1st Qu.: 107.4 1st Qu.: 22.8 1st Qu.: 65
## Median : 582.4 Median : 222.8 Median : 60.0 Median : 240
## Mean : 4749.7 Mean : 1771.8 Mean : 1458.4 Mean : 12282
## 3rd Qu.: 2303.7 3rd Qu.: 691.0 3rd Qu.: 177.6 3rd Qu.: 1017
## Max. :2759604.0 Max. :1481468.5 Max. :631635.5 Max. :5339997
## NA's :160640 NA's :130832 NA's :161751 NA's :52673
## tp
## Min. :-333
## 1st Qu.: 63
## Median : 515
## Mean :1073
## 3rd Qu.:1533
## Max. :7422
## NA's :168805
##El valor menos respondido es el hospital con 160640 NA.
##Valores negativos en nuestras nuevas columnas de total incurred cost y time processing.
##52673 valores menos despues de quitar los valores denegados.
#Limpieza General
bd1 <- bdp
bd1 <- subset(bd1, select = c(ClaimID, cti, tp))
length(bd1$cti)
## [1] 180681
bd1 <- na.omit(bd1)
length(bd1$cti)
## [1] 11876
summary(bd1)
## ClaimID cti tp
## Min. : 650919 Min. : -2350.1 Min. :-333
## 1st Qu.: 833855 1st Qu.: 12.7 1st Qu.: 63
## Median :22712408 Median : 226.8 Median : 515
## Mean :16575465 Mean : 7582.4 Mean :1073
## 3rd Qu.:22731162 3rd Qu.: 1138.4 3rd Qu.:1533
## Max. :61592860 Max. :1915501.9 Max. :7422
#Regresión Lineal
regresion <- lm(cti ~ ClaimantAge_at_DOI + Gender + ClaimantType + tp + BodyPartRegion + BodyPart, data=bdp)
summary(regresion)
##
## Call:
## lm(formula = cti ~ ClaimantAge_at_DOI + Gender + ClaimantType +
## tp + BodyPartRegion + BodyPart, data = bdp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -137843 -10753 -908 4591 1873188
##
## Coefficients: (7 not defined because of singularities)
## Estimate
## (Intercept) 1.433e+04
## ClaimantAge_at_DOI-10 -1.679e+04
## ClaimantAge_at_DOI-6 -9.920e+02
## ClaimantAge_at_DOI-7 -5.878e+03
## ClaimantAge_at_DOI-7162 -1.663e+04
## ClaimantAge_at_DOI-9 -9.621e+03
## ClaimantAge_at_DOI0 -1.145e+04
## ClaimantAge_at_DOI1 -1.495e+04
## ClaimantAge_at_DOI12 6.318e+03
## ClaimantAge_at_DOI13 -1.602e+04
## ClaimantAge_at_DOI14 -6.233e+03
## ClaimantAge_at_DOI15 -4.463e+03
## ClaimantAge_at_DOI16 -2.089e+03
## ClaimantAge_at_DOI17 1.783e+03
## ClaimantAge_at_DOI18 3.157e+03
## ClaimantAge_at_DOI19 -1.293e+03
## ClaimantAge_at_DOI2 -1.223e+04
## ClaimantAge_at_DOI20 -2.197e+03
## ClaimantAge_at_DOI21 -3.418e+03
## ClaimantAge_at_DOI22 -6.503e+02
## ClaimantAge_at_DOI23 -2.536e+03
## ClaimantAge_at_DOI24 -2.789e+03
## ClaimantAge_at_DOI25 -2.006e+03
## ClaimantAge_at_DOI26 -1.460e+03
## ClaimantAge_at_DOI27 -1.039e+03
## ClaimantAge_at_DOI28 -3.030e+03
## ClaimantAge_at_DOI29 -1.916e+03
## ClaimantAge_at_DOI3 2.288e+03
## ClaimantAge_at_DOI30 -1.149e+03
## ClaimantAge_at_DOI31 2.764e+03
## ClaimantAge_at_DOI32 -3.326e+03
## ClaimantAge_at_DOI33 -2.698e+02
## ClaimantAge_at_DOI34 -1.910e+03
## ClaimantAge_at_DOI35 -2.686e+02
## ClaimantAge_at_DOI36 -3.665e+03
## ClaimantAge_at_DOI37 2.205e+03
## ClaimantAge_at_DOI38 -1.163e+03
## ClaimantAge_at_DOI39 1.550e+03
## ClaimantAge_at_DOI40 -4.706e+02
## ClaimantAge_at_DOI41 -1.544e+03
## ClaimantAge_at_DOI42 5.514e+02
## ClaimantAge_at_DOI43 1.245e+03
## ClaimantAge_at_DOI44 1.846e+03
## ClaimantAge_at_DOI45 -1.332e+02
## ClaimantAge_at_DOI46 4.994e+03
## ClaimantAge_at_DOI47 6.782e+03
## ClaimantAge_at_DOI48 5.079e+03
## ClaimantAge_at_DOI49 3.016e+03
## ClaimantAge_at_DOI50 1.728e+00
## ClaimantAge_at_DOI51 -2.454e+03
## ClaimantAge_at_DOI52 2.668e+03
## ClaimantAge_at_DOI53 -6.238e+01
## ClaimantAge_at_DOI54 9.476e+02
## ClaimantAge_at_DOI55 5.312e+03
## ClaimantAge_at_DOI56 6.457e+03
## ClaimantAge_at_DOI57 -2.249e+02
## ClaimantAge_at_DOI58 6.895e+02
## ClaimantAge_at_DOI59 5.238e+03
## ClaimantAge_at_DOI6 -1.591e+03
## ClaimantAge_at_DOI60 3.517e+03
## ClaimantAge_at_DOI61 8.794e+03
## ClaimantAge_at_DOI62 6.501e+02
## ClaimantAge_at_DOI63 1.387e+03
## ClaimantAge_at_DOI64 -2.567e+03
## ClaimantAge_at_DOI65 -1.382e+03
## ClaimantAge_at_DOI66 1.853e+04
## ClaimantAge_at_DOI67 -1.110e+03
## ClaimantAge_at_DOI68 -4.551e+03
## ClaimantAge_at_DOI69 7.042e+02
## ClaimantAge_at_DOI7 -1.362e+03
## ClaimantAge_at_DOI70 3.386e+02
## ClaimantAge_at_DOI71 4.037e+03
## ClaimantAge_at_DOI72 7.292e+03
## ClaimantAge_at_DOI73 -1.225e+03
## ClaimantAge_at_DOI74 -1.016e+04
## ClaimantAge_at_DOI75 2.671e+04
## ClaimantAge_at_DOI76 -1.299e+03
## ClaimantAge_at_DOI77 1.779e+02
## ClaimantAge_at_DOI80 7.498e+03
## ClaimantAge_at_DOI83 7.405e+03
## ClaimantAge_at_DOI84 -2.927e+03
## ClaimantAge_at_DOI85 -1.687e+04
## ClaimantAge_at_DOI87 4.193e+03
## ClaimantAge_at_DOI93 1.073e+03
## ClaimantAge_at_DOINULL -5.312e+03
## GenderMale 8.971e+02
## GenderNot Available 5.528e+03
## ClaimantTypeMedical Only -2.452e+04
## ClaimantTypeReport Only -2.322e+04
## tp 4.375e+00
## BodyPartRegionLower Extremities 1.115e+04
## BodyPartRegionMultiple Body Parts 4.905e+03
## BodyPartRegionNeck 1.145e+05
## BodyPartRegionNon-Standard Code -3.042e+03
## BodyPartRegionNot Available 5.335e+03
## BodyPartRegionTrunk 2.193e+03
## BodyPartRegionUpper Extremities 4.131e+03
## BodyPartAnkle -4.895e+03
## BodyPartArtificial Appliance -1.549e+04
## BodyPartBody Systems and Multiple Body Systems 1.089e+02
## BodyPartBrain 3.590e+03
## BodyPartButtocks 2.650e+03
## BodyPartChest 6.826e+03
## BodyPartDisc-Trunk 3.307e+04
## BodyPartEar(S) 6.797e+03
## BodyPartElbow 3.337e+03
## BodyPartEyes 4.429e+03
## BodyPartFacial Bones 5.886e+03
## BodyPartFinger(S) -9.847e+02
## BodyPartFoot -6.678e+03
## BodyPartGreat Toe -9.313e+03
## BodyPartHand 1.069e+03
## BodyPartHeart 6.230e+02
## BodyPartHip -2.430e+03
## BodyPartInsufficient Info to Properly Identify?Unclassified 4.745e+03
## BodyPartInternal Organs 5.316e+03
## BodyPartKnee -8.141e+02
## BodyPartLarynx -1.177e+05
## BodyPartLower Arm 2.101e+03
## BodyPartLower Back Area 9.444e+03
## BodyPartLower Leg -6.209e+03
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 4.398e+03
## BodyPartLungs 2.007e+03
## BodyPartMouth 1.546e+02
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 4.373e+02
## BodyPartMultiple Head Injury 7.781e+03
## BodyPartMultiple Lower Extremities -3.740e+03
## BodyPartMultiple Neck Injury -9.719e+04
## BodyPartMultiple Trunk -3.850e+03
## BodyPartMultiple Upper Extremities 6.420e+03
## BodyPartNo Physical Injury 4.450e+03
## BodyPartNon-Standard Code NA
## BodyPartNose 3.325e+03
## BodyPartNot Available NA
## BodyPartPelvis 5.629e+04
## BodyPartSacrum And Coccyx 2.313e+03
## BodyPartShoulder(S) 9.336e+03
## BodyPartSkull 3.231e+03
## BodyPartSoft Tissue-Head 9.366e+03
## BodyPartSoft Tissue-Neck -1.084e+05
## BodyPartSpinal Cord-Trunk -5.704e+03
## BodyPartTeeth NA
## BodyPartThumb 1.478e+03
## BodyPartToes -7.918e+03
## BodyPartTrachea -1.242e+05
## BodyPartUpper Arm 1.211e+03
## BodyPartUpper Back Area 1.045e+03
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## Std. Error
## (Intercept) 2.394e+04
## ClaimantAge_at_DOI-10 4.469e+04
## ClaimantAge_at_DOI-6 4.470e+04
## ClaimantAge_at_DOI-7 4.470e+04
## ClaimantAge_at_DOI-7162 4.472e+04
## ClaimantAge_at_DOI-9 3.534e+04
## ClaimantAge_at_DOI0 2.830e+04
## ClaimantAge_at_DOI1 2.827e+04
## ClaimantAge_at_DOI12 4.470e+04
## ClaimantAge_at_DOI13 3.545e+04
## ClaimantAge_at_DOI14 4.468e+04
## ClaimantAge_at_DOI15 3.533e+04
## ClaimantAge_at_DOI16 2.675e+04
## ClaimantAge_at_DOI17 3.160e+04
## ClaimantAge_at_DOI18 2.465e+04
## ClaimantAge_at_DOI19 2.320e+04
## ClaimantAge_at_DOI2 2.964e+04
## ClaimantAge_at_DOI20 2.273e+04
## ClaimantAge_at_DOI21 2.272e+04
## ClaimantAge_at_DOI22 2.263e+04
## ClaimantAge_at_DOI23 2.260e+04
## ClaimantAge_at_DOI24 2.259e+04
## ClaimantAge_at_DOI25 2.257e+04
## ClaimantAge_at_DOI26 2.258e+04
## ClaimantAge_at_DOI27 2.255e+04
## ClaimantAge_at_DOI28 2.254e+04
## ClaimantAge_at_DOI29 2.255e+04
## ClaimantAge_at_DOI3 3.537e+04
## ClaimantAge_at_DOI30 2.254e+04
## ClaimantAge_at_DOI31 2.253e+04
## ClaimantAge_at_DOI32 2.254e+04
## ClaimantAge_at_DOI33 2.253e+04
## ClaimantAge_at_DOI34 2.254e+04
## ClaimantAge_at_DOI35 2.251e+04
## ClaimantAge_at_DOI36 2.252e+04
## ClaimantAge_at_DOI37 2.252e+04
## ClaimantAge_at_DOI38 2.250e+04
## ClaimantAge_at_DOI39 2.251e+04
## ClaimantAge_at_DOI40 2.250e+04
## ClaimantAge_at_DOI41 2.250e+04
## ClaimantAge_at_DOI42 2.250e+04
## ClaimantAge_at_DOI43 2.249e+04
## ClaimantAge_at_DOI44 2.250e+04
## ClaimantAge_at_DOI45 2.250e+04
## ClaimantAge_at_DOI46 2.249e+04
## ClaimantAge_at_DOI47 2.249e+04
## ClaimantAge_at_DOI48 2.251e+04
## ClaimantAge_at_DOI49 2.249e+04
## ClaimantAge_at_DOI50 2.250e+04
## ClaimantAge_at_DOI51 2.251e+04
## ClaimantAge_at_DOI52 2.251e+04
## ClaimantAge_at_DOI53 2.252e+04
## ClaimantAge_at_DOI54 2.250e+04
## ClaimantAge_at_DOI55 2.254e+04
## ClaimantAge_at_DOI56 2.252e+04
## ClaimantAge_at_DOI57 2.253e+04
## ClaimantAge_at_DOI58 2.257e+04
## ClaimantAge_at_DOI59 2.262e+04
## ClaimantAge_at_DOI6 3.536e+04
## ClaimantAge_at_DOI60 2.260e+04
## ClaimantAge_at_DOI61 2.265e+04
## ClaimantAge_at_DOI62 2.277e+04
## ClaimantAge_at_DOI63 2.285e+04
## ClaimantAge_at_DOI64 2.310e+04
## ClaimantAge_at_DOI65 2.323e+04
## ClaimantAge_at_DOI66 2.350e+04
## ClaimantAge_at_DOI67 2.417e+04
## ClaimantAge_at_DOI68 2.524e+04
## ClaimantAge_at_DOI69 2.550e+04
## ClaimantAge_at_DOI7 4.486e+04
## ClaimantAge_at_DOI70 2.740e+04
## ClaimantAge_at_DOI71 2.500e+04
## ClaimantAge_at_DOI72 2.585e+04
## ClaimantAge_at_DOI73 2.625e+04
## ClaimantAge_at_DOI74 3.537e+04
## ClaimantAge_at_DOI75 2.961e+04
## ClaimantAge_at_DOI76 4.468e+04
## ClaimantAge_at_DOI77 4.577e+04
## ClaimantAge_at_DOI80 3.533e+04
## ClaimantAge_at_DOI83 4.469e+04
## ClaimantAge_at_DOI84 3.534e+04
## ClaimantAge_at_DOI85 4.473e+04
## ClaimantAge_at_DOI87 3.543e+04
## ClaimantAge_at_DOI93 4.485e+04
## ClaimantAge_at_DOINULL 2.241e+04
## GenderMale 7.922e+02
## GenderNot Available 1.961e+03
## ClaimantTypeMedical Only 8.670e+02
## ClaimantTypeReport Only 1.126e+03
## tp 3.197e-01
## BodyPartRegionLower Extremities 9.646e+03
## BodyPartRegionMultiple Body Parts 1.265e+04
## BodyPartRegionNeck 2.116e+04
## BodyPartRegionNon-Standard Code 8.586e+03
## BodyPartRegionNot Available 2.868e+04
## BodyPartRegionTrunk 9.236e+03
## BodyPartRegionUpper Extremities 8.684e+03
## BodyPartAnkle 4.956e+03
## BodyPartArtificial Appliance 3.986e+04
## BodyPartBody Systems and Multiple Body Systems 1.131e+04
## BodyPartBrain 2.393e+04
## BodyPartButtocks 6.829e+03
## BodyPartChest 4.594e+03
## BodyPartDisc-Trunk 7.530e+03
## BodyPartEar(S) 9.846e+03
## BodyPartElbow 3.273e+03
## BodyPartEyes 8.685e+03
## BodyPartFacial Bones 9.695e+03
## BodyPartFinger(S) 2.387e+03
## BodyPartFoot 5.159e+03
## BodyPartGreat Toe 1.022e+04
## BodyPartHand 2.513e+03
## BodyPartHeart 2.266e+04
## BodyPartHip 5.753e+03
## BodyPartInsufficient Info to Properly Identify?Unclassified 1.099e+04
## BodyPartInternal Organs 1.066e+04
## BodyPartKnee 4.800e+03
## BodyPartLarynx 3.356e+04
## BodyPartLower Arm 2.920e+03
## BodyPartLower Back Area 3.878e+03
## BodyPartLower Leg 5.293e+03
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 5.679e+03
## BodyPartLungs 8.194e+03
## BodyPartMouth 1.066e+04
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 9.491e+03
## BodyPartMultiple Head Injury 8.997e+03
## BodyPartMultiple Lower Extremities 6.091e+03
## BodyPartMultiple Neck Injury 1.974e+04
## BodyPartMultiple Trunk 9.847e+03
## BodyPartMultiple Upper Extremities 3.818e+03
## BodyPartNo Physical Injury 1.023e+04
## BodyPartNon-Standard Code NA
## BodyPartNose 9.940e+03
## BodyPartNot Available NA
## BodyPartPelvis 1.343e+04
## BodyPartSacrum And Coccyx 1.344e+04
## BodyPartShoulder(S) 2.562e+03
## BodyPartSkull 9.147e+03
## BodyPartSoft Tissue-Head 8.975e+03
## BodyPartSoft Tissue-Neck 1.954e+04
## BodyPartSpinal Cord-Trunk 6.692e+03
## BodyPartTeeth NA
## BodyPartThumb 3.272e+03
## BodyPartToes 6.946e+03
## BodyPartTrachea 4.385e+04
## BodyPartUpper Arm 3.539e+03
## BodyPartUpper Back Area 5.438e+03
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## t value
## (Intercept) 0.599
## ClaimantAge_at_DOI-10 -0.376
## ClaimantAge_at_DOI-6 -0.022
## ClaimantAge_at_DOI-7 -0.131
## ClaimantAge_at_DOI-7162 -0.372
## ClaimantAge_at_DOI-9 -0.272
## ClaimantAge_at_DOI0 -0.405
## ClaimantAge_at_DOI1 -0.529
## ClaimantAge_at_DOI12 0.141
## ClaimantAge_at_DOI13 -0.452
## ClaimantAge_at_DOI14 -0.139
## ClaimantAge_at_DOI15 -0.126
## ClaimantAge_at_DOI16 -0.078
## ClaimantAge_at_DOI17 0.056
## ClaimantAge_at_DOI18 0.128
## ClaimantAge_at_DOI19 -0.056
## ClaimantAge_at_DOI2 -0.413
## ClaimantAge_at_DOI20 -0.097
## ClaimantAge_at_DOI21 -0.150
## ClaimantAge_at_DOI22 -0.029
## ClaimantAge_at_DOI23 -0.112
## ClaimantAge_at_DOI24 -0.123
## ClaimantAge_at_DOI25 -0.089
## ClaimantAge_at_DOI26 -0.065
## ClaimantAge_at_DOI27 -0.046
## ClaimantAge_at_DOI28 -0.134
## ClaimantAge_at_DOI29 -0.085
## ClaimantAge_at_DOI3 0.065
## ClaimantAge_at_DOI30 -0.051
## ClaimantAge_at_DOI31 0.123
## ClaimantAge_at_DOI32 -0.148
## ClaimantAge_at_DOI33 -0.012
## ClaimantAge_at_DOI34 -0.085
## ClaimantAge_at_DOI35 -0.012
## ClaimantAge_at_DOI36 -0.163
## ClaimantAge_at_DOI37 0.098
## ClaimantAge_at_DOI38 -0.052
## ClaimantAge_at_DOI39 0.069
## ClaimantAge_at_DOI40 -0.021
## ClaimantAge_at_DOI41 -0.069
## ClaimantAge_at_DOI42 0.025
## ClaimantAge_at_DOI43 0.055
## ClaimantAge_at_DOI44 0.082
## ClaimantAge_at_DOI45 -0.006
## ClaimantAge_at_DOI46 0.222
## ClaimantAge_at_DOI47 0.302
## ClaimantAge_at_DOI48 0.226
## ClaimantAge_at_DOI49 0.134
## ClaimantAge_at_DOI50 0.000
## ClaimantAge_at_DOI51 -0.109
## ClaimantAge_at_DOI52 0.119
## ClaimantAge_at_DOI53 -0.003
## ClaimantAge_at_DOI54 0.042
## ClaimantAge_at_DOI55 0.236
## ClaimantAge_at_DOI56 0.287
## ClaimantAge_at_DOI57 -0.010
## ClaimantAge_at_DOI58 0.031
## ClaimantAge_at_DOI59 0.232
## ClaimantAge_at_DOI6 -0.045
## ClaimantAge_at_DOI60 0.156
## ClaimantAge_at_DOI61 0.388
## ClaimantAge_at_DOI62 0.029
## ClaimantAge_at_DOI63 0.061
## ClaimantAge_at_DOI64 -0.111
## ClaimantAge_at_DOI65 -0.059
## ClaimantAge_at_DOI66 0.788
## ClaimantAge_at_DOI67 -0.046
## ClaimantAge_at_DOI68 -0.180
## ClaimantAge_at_DOI69 0.028
## ClaimantAge_at_DOI7 -0.030
## ClaimantAge_at_DOI70 0.012
## ClaimantAge_at_DOI71 0.161
## ClaimantAge_at_DOI72 0.282
## ClaimantAge_at_DOI73 -0.047
## ClaimantAge_at_DOI74 -0.287
## ClaimantAge_at_DOI75 0.902
## ClaimantAge_at_DOI76 -0.029
## ClaimantAge_at_DOI77 0.004
## ClaimantAge_at_DOI80 0.212
## ClaimantAge_at_DOI83 0.166
## ClaimantAge_at_DOI84 -0.083
## ClaimantAge_at_DOI85 -0.377
## ClaimantAge_at_DOI87 0.118
## ClaimantAge_at_DOI93 0.024
## ClaimantAge_at_DOINULL -0.237
## GenderMale 1.132
## GenderNot Available 2.819
## ClaimantTypeMedical Only -28.286
## ClaimantTypeReport Only -20.623
## tp 13.685
## BodyPartRegionLower Extremities 1.155
## BodyPartRegionMultiple Body Parts 0.388
## BodyPartRegionNeck 5.413
## BodyPartRegionNon-Standard Code -0.354
## BodyPartRegionNot Available 0.186
## BodyPartRegionTrunk 0.237
## BodyPartRegionUpper Extremities 0.476
## BodyPartAnkle -0.988
## BodyPartArtificial Appliance -0.389
## BodyPartBody Systems and Multiple Body Systems 0.010
## BodyPartBrain 0.150
## BodyPartButtocks 0.388
## BodyPartChest 1.486
## BodyPartDisc-Trunk 4.392
## BodyPartEar(S) 0.690
## BodyPartElbow 1.019
## BodyPartEyes 0.510
## BodyPartFacial Bones 0.607
## BodyPartFinger(S) -0.412
## BodyPartFoot -1.295
## BodyPartGreat Toe -0.911
## BodyPartHand 0.425
## BodyPartHeart 0.027
## BodyPartHip -0.422
## BodyPartInsufficient Info to Properly Identify?Unclassified 0.432
## BodyPartInternal Organs 0.499
## BodyPartKnee -0.170
## BodyPartLarynx -3.506
## BodyPartLower Arm 0.720
## BodyPartLower Back Area 2.435
## BodyPartLower Leg -1.173
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 0.774
## BodyPartLungs 0.245
## BodyPartMouth 0.015
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 0.046
## BodyPartMultiple Head Injury 0.865
## BodyPartMultiple Lower Extremities -0.614
## BodyPartMultiple Neck Injury -4.923
## BodyPartMultiple Trunk -0.391
## BodyPartMultiple Upper Extremities 1.682
## BodyPartNo Physical Injury 0.435
## BodyPartNon-Standard Code NA
## BodyPartNose 0.335
## BodyPartNot Available NA
## BodyPartPelvis 4.191
## BodyPartSacrum And Coccyx 0.172
## BodyPartShoulder(S) 3.644
## BodyPartSkull 0.353
## BodyPartSoft Tissue-Head 1.044
## BodyPartSoft Tissue-Neck -5.546
## BodyPartSpinal Cord-Trunk -0.852
## BodyPartTeeth NA
## BodyPartThumb 0.452
## BodyPartToes -1.140
## BodyPartTrachea -2.833
## BodyPartUpper Arm 0.342
## BodyPartUpper Back Area 0.192
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## Pr(>|t|)
## (Intercept) 0.549473
## ClaimantAge_at_DOI-10 0.707170
## ClaimantAge_at_DOI-6 0.982293
## ClaimantAge_at_DOI-7 0.895384
## ClaimantAge_at_DOI-7162 0.709982
## ClaimantAge_at_DOI-9 0.785467
## ClaimantAge_at_DOI0 0.685651
## ClaimantAge_at_DOI1 0.597036
## ClaimantAge_at_DOI12 0.887611
## ClaimantAge_at_DOI13 0.651418
## ClaimantAge_at_DOI14 0.889073
## ClaimantAge_at_DOI15 0.899468
## ClaimantAge_at_DOI16 0.937735
## ClaimantAge_at_DOI17 0.955000
## ClaimantAge_at_DOI18 0.898096
## ClaimantAge_at_DOI19 0.955551
## ClaimantAge_at_DOI2 0.679771
## ClaimantAge_at_DOI20 0.923022
## ClaimantAge_at_DOI21 0.880418
## ClaimantAge_at_DOI22 0.977076
## ClaimantAge_at_DOI23 0.910657
## ClaimantAge_at_DOI24 0.901756
## ClaimantAge_at_DOI25 0.929205
## ClaimantAge_at_DOI26 0.948436
## ClaimantAge_at_DOI27 0.963239
## ClaimantAge_at_DOI28 0.893081
## ClaimantAge_at_DOI29 0.932286
## ClaimantAge_at_DOI3 0.948408
## ClaimantAge_at_DOI30 0.959332
## ClaimantAge_at_DOI31 0.902365
## ClaimantAge_at_DOI32 0.882705
## ClaimantAge_at_DOI33 0.990448
## ClaimantAge_at_DOI34 0.932480
## ClaimantAge_at_DOI35 0.990480
## ClaimantAge_at_DOI36 0.870706
## ClaimantAge_at_DOI37 0.921999
## ClaimantAge_at_DOI38 0.958803
## ClaimantAge_at_DOI39 0.945092
## ClaimantAge_at_DOI40 0.983314
## ClaimantAge_at_DOI41 0.945289
## ClaimantAge_at_DOI42 0.980449
## ClaimantAge_at_DOI43 0.955861
## ClaimantAge_at_DOI44 0.934620
## ClaimantAge_at_DOI45 0.995277
## ClaimantAge_at_DOI46 0.824276
## ClaimantAge_at_DOI47 0.762966
## ClaimantAge_at_DOI48 0.821481
## ClaimantAge_at_DOI49 0.893352
## ClaimantAge_at_DOI50 0.999939
## ClaimantAge_at_DOI51 0.913160
## ClaimantAge_at_DOI52 0.905630
## ClaimantAge_at_DOI53 0.997790
## ClaimantAge_at_DOI54 0.966409
## ClaimantAge_at_DOI55 0.813690
## ClaimantAge_at_DOI56 0.774273
## ClaimantAge_at_DOI57 0.992037
## ClaimantAge_at_DOI58 0.975626
## ClaimantAge_at_DOI59 0.816907
## ClaimantAge_at_DOI6 0.964123
## ClaimantAge_at_DOI60 0.876310
## ClaimantAge_at_DOI61 0.697822
## ClaimantAge_at_DOI62 0.977218
## ClaimantAge_at_DOI63 0.951592
## ClaimantAge_at_DOI64 0.911521
## ClaimantAge_at_DOI65 0.952563
## ClaimantAge_at_DOI66 0.430450
## ClaimantAge_at_DOI67 0.963373
## ClaimantAge_at_DOI68 0.856905
## ClaimantAge_at_DOI69 0.977968
## ClaimantAge_at_DOI7 0.975773
## ClaimantAge_at_DOI70 0.990142
## ClaimantAge_at_DOI71 0.871718
## ClaimantAge_at_DOI72 0.777868
## ClaimantAge_at_DOI73 0.962773
## ClaimantAge_at_DOI74 0.773869
## ClaimantAge_at_DOI75 0.366945
## ClaimantAge_at_DOI76 0.976814
## ClaimantAge_at_DOI77 0.996899
## ClaimantAge_at_DOI80 0.831936
## ClaimantAge_at_DOI83 0.868382
## ClaimantAge_at_DOI84 0.933985
## ClaimantAge_at_DOI85 0.706076
## ClaimantAge_at_DOI87 0.905809
## ClaimantAge_at_DOI93 0.980909
## ClaimantAge_at_DOINULL 0.812640
## GenderMale 0.257460
## GenderNot Available 0.004823
## ClaimantTypeMedical Only < 2e-16
## ClaimantTypeReport Only < 2e-16
## tp < 2e-16
## BodyPartRegionLower Extremities 0.247930
## BodyPartRegionMultiple Body Parts 0.698127
## BodyPartRegionNeck 6.32e-08
## BodyPartRegionNon-Standard Code 0.723159
## BodyPartRegionNot Available 0.852444
## BodyPartRegionTrunk 0.812309
## BodyPartRegionUpper Extremities 0.634307
## BodyPartAnkle 0.323291
## BodyPartArtificial Appliance 0.697627
## BodyPartBody Systems and Multiple Body Systems 0.992319
## BodyPartBrain 0.880774
## BodyPartButtocks 0.697969
## BodyPartChest 0.137357
## BodyPartDisc-Trunk 1.13e-05
## BodyPartEar(S) 0.490030
## BodyPartElbow 0.308037
## BodyPartEyes 0.610043
## BodyPartFacial Bones 0.543820
## BodyPartFinger(S) 0.680014
## BodyPartFoot 0.195481
## BodyPartGreat Toe 0.362424
## BodyPartHand 0.670697
## BodyPartHeart 0.978064
## BodyPartHip 0.672699
## BodyPartInsufficient Info to Properly Identify?Unclassified 0.665803
## BodyPartInternal Organs 0.617862
## BodyPartKnee 0.865314
## BodyPartLarynx 0.000457
## BodyPartLower Arm 0.471744
## BodyPartLower Back Area 0.014894
## BodyPartLower Leg 0.240739
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 0.438704
## BodyPartLungs 0.806556
## BodyPartMouth 0.988425
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 0.963247
## BodyPartMultiple Head Injury 0.387154
## BodyPartMultiple Lower Extremities 0.539232
## BodyPartMultiple Neck Injury 8.66e-07
## BodyPartMultiple Trunk 0.695808
## BodyPartMultiple Upper Extremities 0.092685
## BodyPartNo Physical Injury 0.663703
## BodyPartNon-Standard Code NA
## BodyPartNose 0.737998
## BodyPartNot Available NA
## BodyPartPelvis 2.80e-05
## BodyPartSacrum And Coccyx 0.863370
## BodyPartShoulder(S) 0.000269
## BodyPartSkull 0.723903
## BodyPartSoft Tissue-Head 0.296715
## BodyPartSoft Tissue-Neck 2.99e-08
## BodyPartSpinal Cord-Trunk 0.394051
## BodyPartTeeth NA
## BodyPartThumb 0.651537
## BodyPartToes 0.254363
## BodyPartTrachea 0.004620
## BodyPartUpper Arm 0.732123
## BodyPartUpper Back Area 0.847559
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
##
## (Intercept)
## ClaimantAge_at_DOI-10
## ClaimantAge_at_DOI-6
## ClaimantAge_at_DOI-7
## ClaimantAge_at_DOI-7162
## ClaimantAge_at_DOI-9
## ClaimantAge_at_DOI0
## ClaimantAge_at_DOI1
## ClaimantAge_at_DOI12
## ClaimantAge_at_DOI13
## ClaimantAge_at_DOI14
## ClaimantAge_at_DOI15
## ClaimantAge_at_DOI16
## ClaimantAge_at_DOI17
## ClaimantAge_at_DOI18
## ClaimantAge_at_DOI19
## ClaimantAge_at_DOI2
## ClaimantAge_at_DOI20
## ClaimantAge_at_DOI21
## ClaimantAge_at_DOI22
## ClaimantAge_at_DOI23
## ClaimantAge_at_DOI24
## ClaimantAge_at_DOI25
## ClaimantAge_at_DOI26
## ClaimantAge_at_DOI27
## ClaimantAge_at_DOI28
## ClaimantAge_at_DOI29
## ClaimantAge_at_DOI3
## ClaimantAge_at_DOI30
## ClaimantAge_at_DOI31
## ClaimantAge_at_DOI32
## ClaimantAge_at_DOI33
## ClaimantAge_at_DOI34
## ClaimantAge_at_DOI35
## ClaimantAge_at_DOI36
## ClaimantAge_at_DOI37
## ClaimantAge_at_DOI38
## ClaimantAge_at_DOI39
## ClaimantAge_at_DOI40
## ClaimantAge_at_DOI41
## ClaimantAge_at_DOI42
## ClaimantAge_at_DOI43
## ClaimantAge_at_DOI44
## ClaimantAge_at_DOI45
## ClaimantAge_at_DOI46
## ClaimantAge_at_DOI47
## ClaimantAge_at_DOI48
## ClaimantAge_at_DOI49
## ClaimantAge_at_DOI50
## ClaimantAge_at_DOI51
## ClaimantAge_at_DOI52
## ClaimantAge_at_DOI53
## ClaimantAge_at_DOI54
## ClaimantAge_at_DOI55
## ClaimantAge_at_DOI56
## ClaimantAge_at_DOI57
## ClaimantAge_at_DOI58
## ClaimantAge_at_DOI59
## ClaimantAge_at_DOI6
## ClaimantAge_at_DOI60
## ClaimantAge_at_DOI61
## ClaimantAge_at_DOI62
## ClaimantAge_at_DOI63
## ClaimantAge_at_DOI64
## ClaimantAge_at_DOI65
## ClaimantAge_at_DOI66
## ClaimantAge_at_DOI67
## ClaimantAge_at_DOI68
## ClaimantAge_at_DOI69
## ClaimantAge_at_DOI7
## ClaimantAge_at_DOI70
## ClaimantAge_at_DOI71
## ClaimantAge_at_DOI72
## ClaimantAge_at_DOI73
## ClaimantAge_at_DOI74
## ClaimantAge_at_DOI75
## ClaimantAge_at_DOI76
## ClaimantAge_at_DOI77
## ClaimantAge_at_DOI80
## ClaimantAge_at_DOI83
## ClaimantAge_at_DOI84
## ClaimantAge_at_DOI85
## ClaimantAge_at_DOI87
## ClaimantAge_at_DOI93
## ClaimantAge_at_DOINULL
## GenderMale
## GenderNot Available **
## ClaimantTypeMedical Only ***
## ClaimantTypeReport Only ***
## tp ***
## BodyPartRegionLower Extremities
## BodyPartRegionMultiple Body Parts
## BodyPartRegionNeck ***
## BodyPartRegionNon-Standard Code
## BodyPartRegionNot Available
## BodyPartRegionTrunk
## BodyPartRegionUpper Extremities
## BodyPartAnkle
## BodyPartArtificial Appliance
## BodyPartBody Systems and Multiple Body Systems
## BodyPartBrain
## BodyPartButtocks
## BodyPartChest
## BodyPartDisc-Trunk ***
## BodyPartEar(S)
## BodyPartElbow
## BodyPartEyes
## BodyPartFacial Bones
## BodyPartFinger(S)
## BodyPartFoot
## BodyPartGreat Toe
## BodyPartHand
## BodyPartHeart
## BodyPartHip
## BodyPartInsufficient Info to Properly Identify?Unclassified
## BodyPartInternal Organs
## BodyPartKnee
## BodyPartLarynx ***
## BodyPartLower Arm
## BodyPartLower Back Area *
## BodyPartLower Leg
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk)
## BodyPartLungs
## BodyPartMouth
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts)
## BodyPartMultiple Head Injury
## BodyPartMultiple Lower Extremities
## BodyPartMultiple Neck Injury ***
## BodyPartMultiple Trunk
## BodyPartMultiple Upper Extremities .
## BodyPartNo Physical Injury
## BodyPartNon-Standard Code
## BodyPartNose
## BodyPartNot Available
## BodyPartPelvis ***
## BodyPartSacrum And Coccyx
## BodyPartShoulder(S) ***
## BodyPartSkull
## BodyPartSoft Tissue-Head
## BodyPartSoft Tissue-Neck ***
## BodyPartSpinal Cord-Trunk
## BodyPartTeeth
## BodyPartThumb
## BodyPartToes
## BodyPartTrachea **
## BodyPartUpper Arm
## BodyPartUpper Back Area
## BodyPartUpper Leg
## BodyPartVertebrae
## BodyPartWhole Body
## BodyPartWrist
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38650 on 11732 degrees of freedom
## (168805 observations deleted due to missingness)
## Multiple R-squared: 0.114, Adjusted R-squared: 0.1032
## F-statistic: 10.56 on 143 and 11732 DF, p-value: < 2.2e-16
#Borra Datos AtĆpicos
boxplot(bd1$cti)
iqr_cti <- IQR(bd1$cti)
li_cti <- 166.5 - 1.5*iqr_cti
li_cti
## [1] -1522.05
ls_cti <- 2120.8 + 1.5*iqr_cti
ls_cti
## [1] 3809.35
bd1 <- bd1[bd1$cti > 0,]
bd1 <- bd1[bd1$cti < 3809.35,]
boxplot(bd1$tp)
iqr_tp <- IQR(bd1$tp)
li_tp <- 157.8 - 1.5*iqr_tp
li_tp
## [1] -1843.2
ls_tp <- 1491.0 + 1.5*iqr_tp
ls_tp
## [1] 3492
bd1 <- bd1[bd1$tp > 0,]
bd1 <- bd1[bd1$tp < 3492,]
summary(bd1)
## ClaimID cti tp
## Min. : 777667 Min. : 3.0 Min. : 1.0
## 1st Qu.: 826870 1st Qu.: 142.5 1st Qu.: 271.0
## Median :22713597 Median : 302.3 Median : 643.0
## Mean :15118298 Mean : 644.1 Mean : 940.8
## 3rd Qu.:22728024 3rd Qu.: 804.1 3rd Qu.:1400.0
## Max. :61112894 Max. :3804.8 Max. :3474.0
length(bd1$cti)
## [1] 5861
length(bd1$tp)
## [1] 5861
#Clusters
grupos <- 6
segmentos <- kmeans(bd1,grupos)
asignacion <- cbind(bd1, cluster=segmentos$cluster)
fviz_cluster(segmentos, bd1)
#.6 Optimizar la cantidad de grupos
set.seed(123)
optimizacion <- clusGap(bd1, FUN=kmeans, nstart=1, K.max=10)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 293050)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 293050)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 293050)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 293050)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 293050)
plot(optimizacion, xlab= "Numero de clusters K")
#ARBOL DE DECISION
arbol <- bdp
arbol$Gender <- as.factor(arbol$Gender)
arbol$BodyPart <- as.factor(arbol$BodyPart)
arbol$BodyPartRegion <- as.factor(arbol$BodyPartRegion)
arbol_cti <- subset(arbol, select = c(Gender, AverageWeeklyWage, ClaimantAge_at_DOI, cti))
length(arbol_cti$cti)
## [1] 180681
arbol_cti$AverageWeeklyWage <- as.numeric(arbol_cti$AverageWeeklyWage)
## Warning: NAs introducidos por coerción
arbol_cti$ClaimantAge_at_DOI <- as.numeric(arbol_cti$ClaimantAge_at_DOI)
## Warning: NAs introducidos por coerción
arbol_cti <- na.omit(arbol_cti)
length(arbol_cti$cti)
## [1] 34836
arbol_tp <- subset(arbol, select = c(Gender, AverageWeeklyWage, ClaimantAge_at_DOI, tp))
length(arbol_tp$tp)
## [1] 180681
arbol_tp$AverageWeeklyWage <- as.numeric(arbol_tp$AverageWeeklyWage)
## Warning: NAs introducidos por coerción
arbol_tp$ClaimantAge_at_DOI <- as.numeric(arbol_tp$ClaimantAge_at_DOI)
## Warning: NAs introducidos por coerción
arbol_tp <- na.omit(arbol_tp)
length(arbol_tp$tp)
## [1] 3577
arbol1 <- rpart(formula = cti ~ Gender + AverageWeeklyWage + ClaimantAge_at_DOI, data = arbol_cti)
arbol2 <- rpart(formula = tp ~ Gender + AverageWeeklyWage + ClaimantAge_at_DOI, data = arbol_tp)
rpart.plot(arbol1)
rpart.plot(arbol2)