#Llamar a las librerias
library(tidyverse)
library(readxl)
library(dplyr)
library(rpart)
library(rpart.plot)
library(ggplot2)
library(factoextra)
library(cluster)
library(data.table)
library(VIM)
#Importar datos
claims_data <- read_excel("/Users/ofna19/Downloads/ClaimsData2018.xlsx")
transactions_summary <- read.csv("/Users/ofna19/Downloads/TransactionsSummary2018.csv")merged_df <- merge(claims_data, transactions_summary, by = "ClaimID", all = TRUE)
#summary(merged_df)
#count(merged_df, ClaimStatus, sort=TRUE)
#count(merged_df, IncidentDescription, sort=TRUE)
#count(merged_df, Gender, sort=TRUE)
#count(merged_df, ClaimantType, sort=TRUE)
#count(merged_df, InjuryNature, sort=TRUE)
#count(merged_df, BodyPartRegion, sort=TRUE)
#count(merged_df, BodyPart, sort=TRUE)
#count(merged_df, IsDenied, sort=TRUE)## Corregir el tipo de datos:
# as.Date
merged_df$IncidentDate <- as.Date(merged_df$IncidentDate,"%m/%d/%Y")
merged_df$ReturnToWorkDate <- as.Date(merged_df$ReturnToWorkDate,"%m/%d/%Y")
merged_df$ClaimantOpenedDate <- as.Date(merged_df$ClaimantOpenedDate,"%m/%d/%Y")
merged_df$ClaimantClosedDate <- as.Date(merged_df$ClaimantClosedDate,"%m/%d/%Y")
merged_df$EmployerNotificationDate <- as.Date(merged_df$EmployerNotificationDate,"%m/%d/%Y")
merged_df$ReceivedDate <- as.Date(merged_df$ReceivedDate,"%m/%d/%Y")
#as.numeric
merged_df$AverageWeeklyWage <- as.numeric(merged_df$AverageWeeklyWage)
merged_df$IsDenied <- as.character(merged_df$IsDenied)
merged_df$ClaimantAge_at_DOI <- as.numeric(merged_df$ClaimantAge_at_DOI)
merged_df$TotalPaid <- as.numeric(merged_df$TotalReserves)
merged_df$TotalRecovery <- as.numeric(merged_df$TotalRecovery)
merged_df$TotalReserves <- as.numeric(merged_df$TotalReserves)
merged_df$TotalPaid <- as.numeric(merged_df$TotalPaid)
merged_df$IndemnityPaid <- as.numeric(merged_df$IndemnityPaid)
merged_df$OtherPaid <- as.numeric(merged_df$OtherPaid)
merged_df$ClaimantAge_at_DOI <- as.numeric(merged_df$ClaimantAge_at_DOI)
# ¿Cuántos NA tengo en al base de datos?
sum(is.na(merged_df$EmployerNotificationDate))## [1] 74961
## [1] 52673
## [1] 57351
## [1] 52673
## [1] 52673
## ClaimID TotalPaid TotalReserves
## 0 52673 52673
## TotalRecovery IndemnityPaid OtherPaid
## 52673 52673 52673
## ClaimStatus IncidentDate IncidentDescription
## 52673 52673 52673
## ReturnToWorkDate AverageWeeklyWage ClaimantOpenedDate
## 111310 137597 52673
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## 57351 74961 52673
## IsDenied ClaimantAge_at_DOI Gender
## 52673 97751 52673
## ClaimantType InjuryNature BodyPartRegion
## 52673 52673 52673
## BodyPart BillReviewALE Hospital
## 52673 139865 145262
## PhysicianOutpatient Rx
## 84986 145752
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 633915 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 810246 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.00
## Median : 856915 Median : 0 Median : 0 Median : 0.00
## Mean :12344572 Mean : 2233 Mean : 2233 Mean : 68.88
## 3rd Qu.:22716420 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62246496 Max. :2069575 Max. :2069575 Max. :130541.03
## NA's :52673 NA's :52673 NA's :52673
## IndemnityPaid OtherPaid ClaimStatus IncidentDate
## Min. : -475 Min. : -7820 Length:186677 Min. :1947-02-24
## 1st Qu.: 0 1st Qu.: 58 Class :character 1st Qu.:1998-12-21
## Median : 0 Median : 230 Mode :character Median :2004-01-05
## Mean : 3061 Mean : 3685 Mean :2003-12-08
## 3rd Qu.: 0 3rd Qu.: 855 3rd Qu.:2009-02-02
## Max. :640732 Max. :4129915 Max. :2014-06-27
## NA's :52673 NA's :52673 NA's :52673
## IncidentDescription ReturnToWorkDate AverageWeeklyWage
## Length:186677 Min. :1976-10-29 Min. : 0.0
## Class :character 1st Qu.:2002-04-25 1st Qu.: 300.0
## Mode :character Median :2007-07-09 Median : 492.0
## Mean :2006-06-01 Mean : 587.3
## 3rd Qu.:2011-06-01 3rd Qu.: 660.4
## Max. :2015-05-07 Max. :2024000.0
## NA's :111310 NA's :137597
## ClaimantOpenedDate ClaimantClosedDate EmployerNotificationDate
## Min. :1947-02-24 Min. :1999-06-01 Min. :1972-09-10
## 1st Qu.:1999-02-09 1st Qu.:2005-03-31 1st Qu.:2000-03-13
## Median :2004-02-17 Median :2006-04-04 Median :2004-12-28
## Mean :2004-01-23 Mean :2007-05-24 Mean :2005-08-29
## 3rd Qu.:2009-04-09 3rd Qu.:2009-11-11 3rd Qu.:2009-11-03
## Max. :2014-06-30 Max. :2014-06-30 Max. :9999-07-21
## NA's :52673 NA's :57351 NA's :74961
## ReceivedDate IsDenied ClaimantAge_at_DOI Gender
## Min. :1947-02-24 Length:186677 Min. :-8000.00 Length:186677
## 1st Qu.:1999-02-09 Class :character 1st Qu.: 33.00 Class :character
## Median :2004-02-13 Mode :character Median : 42.00 Mode :character
## Mean :2004-07-19 Mean : 39.85
## 3rd Qu.:2009-02-27 3rd Qu.: 51.00
## Max. :9999-07-21 Max. : 94.00
## NA's :52673 NA's :97751
## ClaimantType InjuryNature BodyPartRegion BodyPart
## Length:186677 Length:186677 Length:186677 Length:186677
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BillReviewALE Hospital PhysicianOutpatient Rx
## Min. : -456.0 Min. : -12570.4 Min. : -4655.7 Min. : -469.5
## 1st Qu.: 16.0 1st Qu.: 193.9 1st Qu.: 107.6 1st Qu.: 23.3
## Median : 32.0 Median : 559.1 Median : 221.6 Median : 58.3
## Mean : 191.2 Mean : 4394.7 Mean : 1752.3 Mean : 1140.4
## 3rd Qu.: 80.0 3rd Qu.: 2253.4 3rd Qu.: 710.5 3rd Qu.: 174.5
## Max. :56475.3 Max. :2759604.0 Max. :1481468.5 Max. :631635.5
## NA's :139865 NA's :145262 NA's :84986 NA's :145752
Se agregan las columnas con los cálculos necesarios para mostrar nuestro objetivo, que es el costo total y los días transcurridos de los procedimientos.
Se utilizaron las variables de mayor relevancia para el modelo predictivo.
regresion <- lm(TotalIncurredCost ~ BodyPartRegion + BodyPart + ClaimantType + Gender + ClaimantAge_at_DOI + TimeProcesses + InjuryNature + IsDenied , data = merged_df)
summary(regresion)##
## Call:
## lm(formula = TotalIncurredCost ~ BodyPartRegion + BodyPart +
## ClaimantType + Gender + ClaimantAge_at_DOI + TimeProcesses +
## InjuryNature + IsDenied, data = merged_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36378 -4488 -181 2034 1372079
##
## Coefficients: (7 not defined because of singularities)
## Estimate
## (Intercept) 6.146e+03
## BodyPartRegionLower Extremities 5.359e+03
## BodyPartRegionMultiple Body Parts 1.019e+04
## BodyPartRegionNeck 1.743e+04
## BodyPartRegionNon-Standard Code -1.247e+03
## BodyPartRegionNot Available 6.159e+03
## BodyPartRegionTrunk 3.122e+03
## BodyPartRegionUpper Extremities 3.625e+03
## BodyPartAnkle -2.849e+03
## BodyPartArtificial Appliance -7.514e+03
## BodyPartBody Systems and Multiple Body Systems -5.912e+03
## BodyPartBrain 1.310e+04
## BodyPartButtocks 1.322e+03
## BodyPartChest 9.705e+02
## BodyPartDisc-Trunk 3.218e+03
## BodyPartEar(S) 5.206e+03
## BodyPartElbow 7.283e+02
## BodyPartEyes 4.096e+03
## BodyPartFacial Bones 3.635e+03
## BodyPartFinger(S) -1.122e+03
## BodyPartFoot -2.672e+03
## BodyPartGreat Toe -2.910e+03
## BodyPartHand 1.234e+02
## BodyPartHeart -2.176e+03
## BodyPartHip -3.151e+02
## BodyPartInsufficient Info to Properly Identify?Unclassified -5.318e+03
## BodyPartInternal Organs 9.073e+02
## BodyPartKnee 5.605e+02
## BodyPartLarynx -1.643e+04
## BodyPartLower Arm 9.104e+02
## BodyPartLower Back Area 2.486e+03
## BodyPartLower Leg -1.515e+03
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 3.709e+03
## BodyPartLungs 1.887e+03
## BodyPartMouth 2.828e+03
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) -4.624e+03
## BodyPartMultiple Head Injury 4.898e+03
## BodyPartMultiple Lower Extremities 9.111e+02
## BodyPartMultiple Neck Injury -1.188e+04
## BodyPartMultiple Trunk -2.326e+03
## BodyPartMultiple Upper Extremities -1.997e+02
## BodyPartNo Physical Injury -5.873e+03
## BodyPartNon-Standard Code NA
## BodyPartNose 4.148e+03
## BodyPartNot Available NA
## BodyPartPelvis 4.300e+03
## BodyPartSacrum And Coccyx -1.360e+03
## BodyPartShoulder(S) 4.516e+03
## BodyPartSkull 5.048e+03
## BodyPartSoft Tissue-Head 5.909e+03
## BodyPartSoft Tissue-Neck -1.260e+04
## BodyPartSpinal Cord-Trunk -2.440e+03
## BodyPartTeeth NA
## BodyPartThumb -7.469e+02
## BodyPartToes -3.214e+03
## BodyPartTrachea -1.522e+04
## BodyPartUpper Arm 7.993e+02
## BodyPartUpper Back Area 9.297e+02
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## ClaimantTypeMedical Only -1.592e+04
## ClaimantTypeReport Only -1.568e+04
## GenderMale 4.392e+01
## GenderNot Available 6.146e+03
## ClaimantAge_at_DOI 1.190e+00
## TimeProcesses 3.010e+00
## InjuryNatureAll Other Cumulative Injury, NOC 3.540e+03
## InjuryNatureAll Other Occupational Disease Injury, NOC 1.061e+04
## InjuryNatureAll Other Specific Injuries, Noc 1.802e+03
## InjuryNatureAmputation 7.231e+03
## InjuryNatureAngina Pectoris 3.799e+03
## InjuryNatureAsbestosis 1.148e+04
## InjuryNatureAsphyxiation 1.952e+04
## InjuryNatureBlack Lung 3.581e+03
## InjuryNatureBurn 5.314e+03
## InjuryNatureCancer 2.729e+04
## InjuryNatureCarpal Tunnel Syndrome 4.976e+03
## InjuryNatureConcussion 5.730e+03
## InjuryNatureContagious Disease 2.469e+03
## InjuryNatureContusion 3.594e+03
## InjuryNatureCrushing 4.969e+03
## InjuryNatureDermatitis 3.760e+03
## InjuryNatureDislocation 5.658e+03
## InjuryNatureDust Disease, NOC 4.414e+03
## InjuryNatureElectric Shock 2.151e+03
## InjuryNatureForeign Body 2.734e+03
## InjuryNatureFracture 7.607e+03
## InjuryNatureFreezing 6.342e+02
## InjuryNatureHearing Loss Or Impairment -3.411e+03
## InjuryNatureHeat Prostration 2.407e+03
## InjuryNatureHernia 2.547e+01
## InjuryNatureInfection 3.882e+03
## InjuryNatureInflammation 4.711e+03
## InjuryNatureLaceration 3.920e+03
## InjuryNatureLoss of Hearing 3.696e+03
## InjuryNatureMental Disorder 8.590e+03
## InjuryNatureMental Stress 1.688e+03
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 7.785e+03
## InjuryNatureMultiple Physical Injuries Only 6.007e+03
## InjuryNatureMyocardial Infarction 9.194e+03
## InjuryNatureNo Physical Injury 3.952e+03
## InjuryNatureNon-Standard Code 4.599e+02
## InjuryNatureNot Available 4.948e+03
## InjuryNaturePoisoning?Chemical (Other Than Metals) 2.532e+03
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 3.357e+03
## InjuryNaturePuncture 3.992e+03
## InjuryNatureRadiation 7.610e+03
## InjuryNatureRespiratory Disorders 4.188e+03
## InjuryNatureRupture 1.333e+04
## InjuryNatureSeverance 1.898e+04
## InjuryNatureSilicosis 3.613e+03
## InjuryNatureSprain 4.689e+03
## InjuryNatureStrain 4.043e+03
## InjuryNatureSyncope 4.911e+03
## InjuryNatureVascular 4.217e+03
## InjuryNatureVDT-Related Disease -3.269e+03
## InjuryNatureVision Loss 3.589e+03
## IsDenied1 -5.993e+03
## Std. Error
## (Intercept) 9.588e+03
## BodyPartRegionLower Extremities 1.654e+03
## BodyPartRegionMultiple Body Parts 2.734e+03
## BodyPartRegionNeck 4.950e+03
## BodyPartRegionNon-Standard Code 1.576e+03
## BodyPartRegionNot Available 8.155e+03
## BodyPartRegionTrunk 1.595e+03
## BodyPartRegionUpper Extremities 1.478e+03
## BodyPartAnkle 9.045e+02
## BodyPartArtificial Appliance 8.960e+03
## BodyPartBody Systems and Multiple Body Systems 2.577e+03
## BodyPartBrain 4.100e+03
## BodyPartButtocks 1.382e+03
## BodyPartChest 8.844e+02
## BodyPartDisc-Trunk 1.239e+03
## BodyPartEar(S) 1.902e+03
## BodyPartElbow 6.484e+02
## BodyPartEyes 1.505e+03
## BodyPartFacial Bones 1.639e+03
## BodyPartFinger(S) 4.656e+02
## BodyPartFoot 9.279e+02
## BodyPartGreat Toe 1.846e+03
## BodyPartHand 4.731e+02
## BodyPartHeart 4.629e+03
## BodyPartHip 1.067e+03
## BodyPartInsufficient Info to Properly Identify?Unclassified 2.528e+03
## BodyPartInternal Organs 1.860e+03
## BodyPartKnee 8.599e+02
## BodyPartLarynx 7.744e+03
## BodyPartLower Arm 5.710e+02
## BodyPartLower Back Area 7.388e+02
## BodyPartLower Leg 9.451e+02
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 1.010e+03
## BodyPartLungs 1.443e+03
## BodyPartMouth 1.798e+03
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 2.340e+03
## BodyPartMultiple Head Injury 1.525e+03
## BodyPartMultiple Lower Extremities 1.162e+03
## BodyPartMultiple Neck Injury 4.780e+03
## BodyPartMultiple Trunk 1.967e+03
## BodyPartMultiple Upper Extremities 7.876e+02
## BodyPartNo Physical Injury 2.463e+03
## BodyPartNon-Standard Code NA
## BodyPartNose 1.800e+03
## BodyPartNot Available NA
## BodyPartPelvis 1.968e+03
## BodyPartSacrum And Coccyx 2.781e+03
## BodyPartShoulder(S) 4.930e+02
## BodyPartSkull 1.656e+03
## BodyPartSoft Tissue-Head 1.624e+03
## BodyPartSoft Tissue-Neck 4.767e+03
## BodyPartSpinal Cord-Trunk 1.400e+03
## BodyPartTeeth NA
## BodyPartThumb 6.239e+02
## BodyPartToes 1.304e+03
## BodyPartTrachea 7.762e+03
## BodyPartUpper Arm 6.978e+02
## BodyPartUpper Back Area 1.047e+03
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## ClaimantTypeMedical Only 1.856e+02
## ClaimantTypeReport Only 3.302e+02
## GenderMale 1.514e+02
## GenderNot Available 9.800e+02
## ClaimantAge_at_DOI 6.006e-01
## TimeProcesses 8.358e-02
## InjuryNatureAll Other Cumulative Injury, NOC 1.012e+04
## InjuryNatureAll Other Occupational Disease Injury, NOC 1.086e+04
## InjuryNatureAll Other Specific Injuries, Noc 9.484e+03
## InjuryNatureAmputation 9.942e+03
## InjuryNatureAngina Pectoris 1.082e+04
## InjuryNatureAsbestosis 9.919e+03
## InjuryNatureAsphyxiation 1.143e+04
## InjuryNatureBlack Lung 1.549e+04
## InjuryNatureBurn 9.504e+03
## InjuryNatureCancer 1.284e+04
## InjuryNatureCarpal Tunnel Syndrome 9.529e+03
## InjuryNatureConcussion 9.527e+03
## InjuryNatureContagious Disease 9.575e+03
## InjuryNatureContusion 9.480e+03
## InjuryNatureCrushing 9.521e+03
## InjuryNatureDermatitis 9.499e+03
## InjuryNatureDislocation 9.553e+03
## InjuryNatureDust Disease, NOC 1.060e+04
## InjuryNatureElectric Shock 9.725e+03
## InjuryNatureForeign Body 9.496e+03
## InjuryNatureFracture 9.489e+03
## InjuryNatureFreezing 1.422e+04
## InjuryNatureHearing Loss Or Impairment 9.897e+03
## InjuryNatureHeat Prostration 9.643e+03
## InjuryNatureHernia 9.613e+03
## InjuryNatureInfection 9.558e+03
## InjuryNatureInflammation 9.500e+03
## InjuryNatureLaceration 9.482e+03
## InjuryNatureLoss of Hearing 1.012e+04
## InjuryNatureMental Disorder 1.167e+04
## InjuryNatureMental Stress 9.639e+03
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 9.994e+03
## InjuryNatureMultiple Physical Injuries Only 9.492e+03
## InjuryNatureMyocardial Infarction 1.090e+04
## InjuryNatureNo Physical Injury 9.494e+03
## InjuryNatureNon-Standard Code 9.494e+03
## InjuryNatureNot Available 9.550e+03
## InjuryNaturePoisoning?Chemical (Other Than Metals) 9.630e+03
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 9.696e+03
## InjuryNaturePuncture 9.486e+03
## InjuryNatureRadiation 1.055e+04
## InjuryNatureRespiratory Disorders 9.536e+03
## InjuryNatureRupture 9.734e+03
## InjuryNatureSeverance 1.046e+04
## InjuryNatureSilicosis 1.548e+04
## InjuryNatureSprain 9.482e+03
## InjuryNatureStrain 9.480e+03
## InjuryNatureSyncope 9.680e+03
## InjuryNatureVascular 1.161e+04
## InjuryNatureVDT-Related Disease 1.773e+04
## InjuryNatureVision Loss 1.022e+04
## IsDenied1 3.389e+02
## t value
## (Intercept) 0.641
## BodyPartRegionLower Extremities 3.241
## BodyPartRegionMultiple Body Parts 3.727
## BodyPartRegionNeck 3.521
## BodyPartRegionNon-Standard Code -0.791
## BodyPartRegionNot Available 0.755
## BodyPartRegionTrunk 1.957
## BodyPartRegionUpper Extremities 2.453
## BodyPartAnkle -3.150
## BodyPartArtificial Appliance -0.839
## BodyPartBody Systems and Multiple Body Systems -2.294
## BodyPartBrain 3.196
## BodyPartButtocks 0.956
## BodyPartChest 1.097
## BodyPartDisc-Trunk 2.596
## BodyPartEar(S) 2.738
## BodyPartElbow 1.123
## BodyPartEyes 2.721
## BodyPartFacial Bones 2.218
## BodyPartFinger(S) -2.409
## BodyPartFoot -2.880
## BodyPartGreat Toe -1.576
## BodyPartHand 0.261
## BodyPartHeart -0.470
## BodyPartHip -0.295
## BodyPartInsufficient Info to Properly Identify?Unclassified -2.104
## BodyPartInternal Organs 0.488
## BodyPartKnee 0.652
## BodyPartLarynx -2.121
## BodyPartLower Arm 1.594
## BodyPartLower Back Area 3.365
## BodyPartLower Leg -1.603
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 3.671
## BodyPartLungs 1.308
## BodyPartMouth 1.572
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) -1.977
## BodyPartMultiple Head Injury 3.212
## BodyPartMultiple Lower Extremities 0.784
## BodyPartMultiple Neck Injury -2.485
## BodyPartMultiple Trunk -1.182
## BodyPartMultiple Upper Extremities -0.254
## BodyPartNo Physical Injury -2.384
## BodyPartNon-Standard Code NA
## BodyPartNose 2.304
## BodyPartNot Available NA
## BodyPartPelvis 2.185
## BodyPartSacrum And Coccyx -0.489
## BodyPartShoulder(S) 9.161
## BodyPartSkull 3.048
## BodyPartSoft Tissue-Head 3.639
## BodyPartSoft Tissue-Neck -2.644
## BodyPartSpinal Cord-Trunk -1.743
## BodyPartTeeth NA
## BodyPartThumb -1.197
## BodyPartToes -2.465
## BodyPartTrachea -1.961
## BodyPartUpper Arm 1.145
## BodyPartUpper Back Area 0.888
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## ClaimantTypeMedical Only -85.771
## ClaimantTypeReport Only -47.495
## GenderMale 0.290
## GenderNot Available 6.271
## ClaimantAge_at_DOI 1.981
## TimeProcesses 36.009
## InjuryNatureAll Other Cumulative Injury, NOC 0.350
## InjuryNatureAll Other Occupational Disease Injury, NOC 0.977
## InjuryNatureAll Other Specific Injuries, Noc 0.190
## InjuryNatureAmputation 0.727
## InjuryNatureAngina Pectoris 0.351
## InjuryNatureAsbestosis 1.157
## InjuryNatureAsphyxiation 1.707
## InjuryNatureBlack Lung 0.231
## InjuryNatureBurn 0.559
## InjuryNatureCancer 2.125
## InjuryNatureCarpal Tunnel Syndrome 0.522
## InjuryNatureConcussion 0.601
## InjuryNatureContagious Disease 0.258
## InjuryNatureContusion 0.379
## InjuryNatureCrushing 0.522
## InjuryNatureDermatitis 0.396
## InjuryNatureDislocation 0.592
## InjuryNatureDust Disease, NOC 0.416
## InjuryNatureElectric Shock 0.221
## InjuryNatureForeign Body 0.288
## InjuryNatureFracture 0.802
## InjuryNatureFreezing 0.045
## InjuryNatureHearing Loss Or Impairment -0.345
## InjuryNatureHeat Prostration 0.250
## InjuryNatureHernia 0.003
## InjuryNatureInfection 0.406
## InjuryNatureInflammation 0.496
## InjuryNatureLaceration 0.413
## InjuryNatureLoss of Hearing 0.365
## InjuryNatureMental Disorder 0.736
## InjuryNatureMental Stress 0.175
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 0.779
## InjuryNatureMultiple Physical Injuries Only 0.633
## InjuryNatureMyocardial Infarction 0.844
## InjuryNatureNo Physical Injury 0.416
## InjuryNatureNon-Standard Code 0.048
## InjuryNatureNot Available 0.518
## InjuryNaturePoisoning?Chemical (Other Than Metals) 0.263
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 0.346
## InjuryNaturePuncture 0.421
## InjuryNatureRadiation 0.722
## InjuryNatureRespiratory Disorders 0.439
## InjuryNatureRupture 1.369
## InjuryNatureSeverance 1.815
## InjuryNatureSilicosis 0.233
## InjuryNatureSprain 0.495
## InjuryNatureStrain 0.426
## InjuryNatureSyncope 0.507
## InjuryNatureVascular 0.363
## InjuryNatureVDT-Related Disease -0.184
## InjuryNatureVision Loss 0.351
## IsDenied1 -17.683
## Pr(>|t|)
## (Intercept) 0.521539
## BodyPartRegionLower Extremities 0.001193
## BodyPartRegionMultiple Body Parts 0.000194
## BodyPartRegionNeck 0.000429
## BodyPartRegionNon-Standard Code 0.428948
## BodyPartRegionNot Available 0.450150
## BodyPartRegionTrunk 0.050339
## BodyPartRegionUpper Extremities 0.014162
## BodyPartAnkle 0.001632
## BodyPartArtificial Appliance 0.401686
## BodyPartBody Systems and Multiple Body Systems 0.021802
## BodyPartBrain 0.001394
## BodyPartButtocks 0.338882
## BodyPartChest 0.272502
## BodyPartDisc-Trunk 0.009427
## BodyPartEar(S) 0.006186
## BodyPartElbow 0.261376
## BodyPartEyes 0.006515
## BodyPartFacial Bones 0.026560
## BodyPartFinger(S) 0.015979
## BodyPartFoot 0.003975
## BodyPartGreat Toe 0.114927
## BodyPartHand 0.794200
## BodyPartHeart 0.638229
## BodyPartHip 0.767623
## BodyPartInsufficient Info to Properly Identify?Unclassified 0.035377
## BodyPartInternal Organs 0.625742
## BodyPartKnee 0.514562
## BodyPartLarynx 0.033919
## BodyPartLower Arm 0.110858
## BodyPartLower Back Area 0.000767
## BodyPartLower Leg 0.109007
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 0.000242
## BodyPartLungs 0.190776
## BodyPartMouth 0.115857
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 0.048096
## BodyPartMultiple Head Injury 0.001318
## BodyPartMultiple Lower Extremities 0.432944
## BodyPartMultiple Neck Injury 0.012956
## BodyPartMultiple Trunk 0.237167
## BodyPartMultiple Upper Extremities 0.799840
## BodyPartNo Physical Injury 0.017122
## BodyPartNon-Standard Code NA
## BodyPartNose 0.021200
## BodyPartNot Available NA
## BodyPartPelvis 0.028870
## BodyPartSacrum And Coccyx 0.624797
## BodyPartShoulder(S) < 2e-16
## BodyPartSkull 0.002301
## BodyPartSoft Tissue-Head 0.000274
## BodyPartSoft Tissue-Neck 0.008206
## BodyPartSpinal Cord-Trunk 0.081382
## BodyPartTeeth NA
## BodyPartThumb 0.231234
## BodyPartToes 0.013715
## BodyPartTrachea 0.049854
## BodyPartUpper Arm 0.252027
## BodyPartUpper Back Area 0.374689
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## ClaimantTypeMedical Only < 2e-16
## ClaimantTypeReport Only < 2e-16
## GenderMale 0.771804
## GenderNot Available 3.6e-10
## ClaimantAge_at_DOI 0.047642
## TimeProcesses < 2e-16
## InjuryNatureAll Other Cumulative Injury, NOC 0.726343
## InjuryNatureAll Other Occupational Disease Injury, NOC 0.328678
## InjuryNatureAll Other Specific Injuries, Noc 0.849328
## InjuryNatureAmputation 0.467071
## InjuryNatureAngina Pectoris 0.725474
## InjuryNatureAsbestosis 0.247293
## InjuryNatureAsphyxiation 0.087743
## InjuryNatureBlack Lung 0.817149
## InjuryNatureBurn 0.576054
## InjuryNatureCancer 0.033571
## InjuryNatureCarpal Tunnel Syndrome 0.601530
## InjuryNatureConcussion 0.547522
## InjuryNatureContagious Disease 0.796485
## InjuryNatureContusion 0.704583
## InjuryNatureCrushing 0.601722
## InjuryNatureDermatitis 0.692204
## InjuryNatureDislocation 0.553676
## InjuryNatureDust Disease, NOC 0.677129
## InjuryNatureElectric Shock 0.824949
## InjuryNatureForeign Body 0.773451
## InjuryNatureFracture 0.422787
## InjuryNatureFreezing 0.964416
## InjuryNatureHearing Loss Or Impairment 0.730387
## InjuryNatureHeat Prostration 0.802867
## InjuryNatureHernia 0.997886
## InjuryNatureInfection 0.684654
## InjuryNatureInflammation 0.619971
## InjuryNatureLaceration 0.679272
## InjuryNatureLoss of Hearing 0.714871
## InjuryNatureMental Disorder 0.461558
## InjuryNatureMental Stress 0.860990
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 0.436005
## InjuryNatureMultiple Physical Injuries Only 0.526815
## InjuryNatureMyocardial Infarction 0.398759
## InjuryNatureNo Physical Injury 0.677243
## InjuryNatureNon-Standard Code 0.961362
## InjuryNatureNot Available 0.604356
## InjuryNaturePoisoning?Chemical (Other Than Metals) 0.792599
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 0.729167
## InjuryNaturePuncture 0.673853
## InjuryNatureRadiation 0.470543
## InjuryNatureRespiratory Disorders 0.660549
## InjuryNatureRupture 0.170875
## InjuryNatureSeverance 0.069597
## InjuryNatureSilicosis 0.815463
## InjuryNatureSprain 0.620942
## InjuryNatureStrain 0.669775
## InjuryNatureSyncope 0.611897
## InjuryNatureVascular 0.716419
## InjuryNatureVDT-Related Disease 0.853744
## InjuryNatureVision Loss 0.725530
## IsDenied1 < 2e-16
##
## (Intercept)
## BodyPartRegionLower Extremities **
## BodyPartRegionMultiple Body Parts ***
## BodyPartRegionNeck ***
## BodyPartRegionNon-Standard Code
## BodyPartRegionNot Available
## BodyPartRegionTrunk .
## BodyPartRegionUpper Extremities *
## BodyPartAnkle **
## BodyPartArtificial Appliance
## BodyPartBody Systems and Multiple Body Systems *
## BodyPartBrain **
## BodyPartButtocks
## BodyPartChest
## BodyPartDisc-Trunk **
## BodyPartEar(S) **
## BodyPartElbow
## BodyPartEyes **
## BodyPartFacial Bones *
## BodyPartFinger(S) *
## BodyPartFoot **
## BodyPartGreat Toe
## BodyPartHand
## BodyPartHeart
## BodyPartHip
## BodyPartInsufficient Info to Properly Identify?Unclassified *
## BodyPartInternal Organs
## BodyPartKnee
## BodyPartLarynx *
## BodyPartLower Arm
## BodyPartLower Back Area ***
## BodyPartLower Leg
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) ***
## BodyPartLungs
## BodyPartMouth
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) *
## BodyPartMultiple Head Injury **
## BodyPartMultiple Lower Extremities
## BodyPartMultiple Neck Injury *
## BodyPartMultiple Trunk
## BodyPartMultiple Upper Extremities
## BodyPartNo Physical Injury *
## BodyPartNon-Standard Code
## BodyPartNose *
## BodyPartNot Available
## BodyPartPelvis *
## BodyPartSacrum And Coccyx
## BodyPartShoulder(S) ***
## BodyPartSkull **
## BodyPartSoft Tissue-Head ***
## BodyPartSoft Tissue-Neck **
## BodyPartSpinal Cord-Trunk .
## BodyPartTeeth
## BodyPartThumb
## BodyPartToes *
## BodyPartTrachea *
## BodyPartUpper Arm
## BodyPartUpper Back Area
## BodyPartUpper Leg
## BodyPartVertebrae
## BodyPartWhole Body
## BodyPartWrist
## ClaimantTypeMedical Only ***
## ClaimantTypeReport Only ***
## GenderMale
## GenderNot Available ***
## ClaimantAge_at_DOI *
## TimeProcesses ***
## InjuryNatureAll Other Cumulative Injury, NOC
## InjuryNatureAll Other Occupational Disease Injury, NOC
## InjuryNatureAll Other Specific Injuries, Noc
## InjuryNatureAmputation
## InjuryNatureAngina Pectoris
## InjuryNatureAsbestosis
## InjuryNatureAsphyxiation .
## InjuryNatureBlack Lung
## InjuryNatureBurn
## InjuryNatureCancer *
## InjuryNatureCarpal Tunnel Syndrome
## InjuryNatureConcussion
## InjuryNatureContagious Disease
## InjuryNatureContusion
## InjuryNatureCrushing
## InjuryNatureDermatitis
## InjuryNatureDislocation
## InjuryNatureDust Disease, NOC
## InjuryNatureElectric Shock
## InjuryNatureForeign Body
## InjuryNatureFracture
## InjuryNatureFreezing
## InjuryNatureHearing Loss Or Impairment
## InjuryNatureHeat Prostration
## InjuryNatureHernia
## InjuryNatureInfection
## InjuryNatureInflammation
## InjuryNatureLaceration
## InjuryNatureLoss of Hearing
## InjuryNatureMental Disorder
## InjuryNatureMental Stress
## InjuryNatureMultiple Injuries Including Both Physical and Psychological
## InjuryNatureMultiple Physical Injuries Only
## InjuryNatureMyocardial Infarction
## InjuryNatureNo Physical Injury
## InjuryNatureNon-Standard Code
## InjuryNatureNot Available
## InjuryNaturePoisoning?Chemical (Other Than Metals)
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)
## InjuryNaturePuncture
## InjuryNatureRadiation
## InjuryNatureRespiratory Disorders
## InjuryNatureRupture
## InjuryNatureSeverance .
## InjuryNatureSilicosis
## InjuryNatureSprain
## InjuryNatureStrain
## InjuryNatureSyncope
## InjuryNatureVascular
## InjuryNatureVDT-Related Disease
## InjuryNatureVision Loss
## IsDenied1 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21190 on 84176 degrees of freedom
## (102388 observations deleted due to missingness)
## Multiple R-squared: 0.126, Adjusted R-squared: 0.1248
## F-statistic: 108.3 on 112 and 84176 DF, p-value: < 2.2e-16
data <- data.frame(BodyPartRegion = "Trunk", BodyPart = "Lumbar and/or Sacral Vertebrae (Vertebra NOC Trunk)", ClaimantType = "Medical Only", Gender = "Male", ClaimantAge_at_DOI = 23, TimeProcesses = 381, InjuryNature = "Cancer", IsDenied = "0" )
predict(regresion, data)## 1
## 25564.86
# Bases de datos nuevas
cuerpo <- merged_df %>% select(Gender, BodyPartRegion)
denied <- merged_df %>% select(Gender, IsDenied)
#Árboles de predicción
arbol <- rpart(formula = Gender ~ ., data=cuerpo)
rpart.plot(arbol)cluster <- merged_df %>% select(TotalIncurredCost, TimeProcesses) %>% na.omit() %>% filter(TotalIncurredCost>0)
summary(cluster)## TotalIncurredCost TimeProcesses
## Min. : 0.0 Min. : 0
## 1st Qu.: 146.8 1st Qu.: 194
## Median : 332.4 Median : 992
## Mean : 5122.2 Mean : 1306
## 3rd Qu.: 1201.6 3rd Qu.: 2057
## Max. :1758919.5 Max. :16428
# Los datos fuera de lo normal están fuera de los siguientes límites:
# Límite inferior = q1 -1.5*IQR
# Límite superior = Q3 + 1.5*IQR
# Q1: Cuartil 1, Q3
IQR_C <- IQR(cluster$TotalIncurredCost)
LI_C <- 146.8-1.5*IQR_C
LS_C <- 1201.6+1.5*IQR_C
cluster <- cluster[cluster$TotalIncurredCost <=2783.74,]
IQR_T <- IQR(cluster$TimeProcesses)
LI_T<- 160-1.5*IQR_T
LS_T <- 2040+1.5*IQR_T
cluster <- cluster[cluster$TimeProcesses<=4860,]
cluster <- as.data.frame(scale(cluster))
grupos <- 3
segmentos <- kmeans(cluster, grupos)
asignación <- cbind(cluster, cluster=segmentos$cluster)Para la optimización del gráfico de clusters se utilizó el Elbow Method. El cálculo se intento en RStudio, RCloud y Python, para probar cuál de los tres soportaba el tamaño de los datos. Al final el resultado se pudo obtener de ambos RCloud y Python.
El procedimiento seguido para obtener el resultado fue el siguiente:
#Se trasladaron solo los datos necesarios con la siguiente formula, la cual nos permite pasar un dataframe de R a un excel.
#write.xlsx(cluster, "Cluster.xlsx")
# Utiliza la visualización del codo para determinar el número óptimo de clústeres
#visualizer = KElbowVisualizer(model, k=(1, 10), metric='distortion', timings=False)
#visualizer.fit(datos) # Ajusta el modelo a tus datos
# Muestra el gráfico
#visualizer.show()Resultado: