# file.choose()
bd <- read.csv("/Users/genarorodriguezalcantara/Desktop/R Files Manipulación de Datos/BDD/seguros.csv")
summary(bd)
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 777632 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 800748 1st Qu.: 83 1st Qu.: 0 1st Qu.: 0.00
## Median : 812128 Median : 271 Median : 0 Median : 0.00
## Mean : 1864676 Mean : 10404 Mean : 3368 Mean : 66.05
## 3rd Qu.: 824726 3rd Qu.: 1122 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203364 Max. :4527291 Max. :1529053 Max. :100000.00
##
## IndemnityPaid OtherPaid TotalIncurredCost ClaimStatus
## Min. : 0 Min. : 0 Min. : -10400 Length:31619
## 1st Qu.: 0 1st Qu.: 80 1st Qu.: 80 Class :character
## Median : 0 Median : 265 Median : 266 Mode :character
## Mean : 4977 Mean : 5427 Mean : 13706
## 3rd Qu.: 0 3rd Qu.: 1023 3rd Qu.: 1098
## Max. :640732 Max. :4129915 Max. :4734750
##
## IncidentDate IncidentDescription ReturnToWorkDate ClaimantOpenedDate
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied Transaction_Time Procesing_Time ClaimantAge_at_DOI
## Min. :0.00000 Min. : 0 Min. : 0.00 Min. :14.0
## 1st Qu.:0.00000 1st Qu.: 211 1st Qu.: 4.00 1st Qu.:33.0
## Median :0.00000 Median : 780 Median : 10.00 Median :42.0
## Mean :0.04463 Mean : 1004 Mean : 62.99 Mean :41.6
## 3rd Qu.:0.00000 3rd Qu.: 1440 3rd Qu.: 24.00 3rd Qu.:50.0
## Max. :1.00000 Max. :16428 Max. :11558.00 Max. :94.0
## NA's :614
## Gender ClaimantType InjuryNature BodyPartRegion
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BodyPart AverageWeeklyWage1 ClaimID1 BillReviewALE
## Length:31619 Min. : 100.0 Min. : 777632 Min. : -448.0
## Class :character 1st Qu.: 492.0 1st Qu.: 800748 1st Qu.: 16.0
## Mode :character Median : 492.0 Median : 812128 Median : 24.0
## Mean : 536.5 Mean : 1864676 Mean : 188.7
## 3rd Qu.: 492.0 3rd Qu.: 824726 3rd Qu.: 64.1
## Max. :8613.5 Max. :62203364 Max. :46055.3
## NA's :14912
## Hospital PhysicianOutpatient Rx
## Min. : -12570.4 Min. : -549.5 Min. : -160.7
## 1st Qu.: 210.5 1st Qu.: 105.8 1st Qu.: 22.9
## Median : 613.9 Median : 218.0 Median : 61.5
## Mean : 5113.2 Mean : 1813.2 Mean : 1695.2
## 3rd Qu.: 2349.1 3rd Qu.: 680.6 3rd Qu.: 189.0
## Max. :2759604.0 Max. :1219766.6 Max. :631635.5
## NA's :19655 NA's :2329 NA's :20730
# install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
count(bd, ClaimStatus, sort = TRUE)
## ClaimStatus n
## 1 C 31005
## 2 O 383
## 3 R 231
# count(bd, IncidentDate, sort = TRUE)
# count(bd, IncidentDescription, sort = TRUE)
# count(bd, ReturnToWorkDate, sort = TRUE)
# count(bd, ClaimantOpenedDate, sort = TRUE)
# count(bd, ClaimantClosedDate, sort = TRUE)
# count(bd, EmployerNotificationDate, sort = TRUE)
# count(bd, ReceivedDate, sort = TRUE)
# count(bd, Gender, sort = TRUE)
# count(bd, ClaimantType, sort = TRUE)
# count(bd, InjuryNature, sort = TRUE)
# count(bd, BodyPartRegion, sort = TRUE)
# count(bd, BodyPart, sort = TRUE)
# regresion <- lm(TotalIncurredCost ~ ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature + BodyPartRegion + BodyPart + AverageWeeklyWage1, data=bd)
# summary(regresion)
regresion_limpia <- lm(TotalIncurredCost ~ ClaimantAge_at_DOI + Gender + ClaimantType + BodyPartRegion + BodyPart + AverageWeeklyWage1, data=bd)
summary(regresion_limpia)
##
## Call:
## lm(formula = TotalIncurredCost ~ ClaimantAge_at_DOI + Gender +
## ClaimantType + BodyPartRegion + BodyPart + AverageWeeklyWage1,
## data = bd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -450567 -12736 -580 5092 4678170
##
## Coefficients: (6 not defined because of singularities)
## Estimate
## (Intercept) 2.857e+04
## ClaimantAge_at_DOI 1.518e+02
## GenderMale 1.225e+03
## GenderNot Available 6.818e+04
## ClaimantTypeMedical Only -5.519e+04
## ClaimantTypeReport Only -4.955e+04
## BodyPartRegionLower Extremities 1.674e+04
## BodyPartRegionMultiple Body Parts 2.883e+05
## BodyPartRegionNeck 4.859e+04
## BodyPartRegionNon-Standard Code -9.270e+03
## BodyPartRegionTrunk 3.303e+03
## BodyPartRegionUpper Extremities 1.047e+04
## BodyPartAnkle -7.020e+03
## BodyPartArtificial Appliance -2.826e+05
## BodyPartBody Systems and Multiple Body Systems -2.720e+05
## BodyPartBrain 4.070e+05
## BodyPartButtocks 2.449e+04
## BodyPartChest 8.235e+03
## BodyPartDisc-Trunk 1.957e+04
## BodyPartEar(S) 8.287e+03
## BodyPartElbow 1.819e+03
## BodyPartEyes 1.521e+04
## BodyPartFacial Bones 1.486e+04
## BodyPartFinger(S) 1.015e+02
## BodyPartFoot -7.970e+03
## BodyPartGreat Toe -7.687e+03
## BodyPartHand -2.855e+02
## BodyPartHeart -9.176e+03
## BodyPartHip 6.602e+03
## BodyPartInsufficient Info to Properly Identify?Unclassified -2.790e+05
## BodyPartInternal Organs 1.037e+04
## BodyPartKnee -1.508e+03
## BodyPartLarynx -8.191e+04
## BodyPartLower Arm -3.781e+02
## BodyPartLower Back Area 2.678e+04
## BodyPartLower Leg 6.323e+02
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 1.931e+04
## BodyPartLungs 5.695e+03
## BodyPartMouth 1.490e+04
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) -2.676e+05
## BodyPartMultiple Head Injury 1.908e+04
## BodyPartMultiple Lower Extremities 2.444e+03
## BodyPartMultiple Neck Injury -3.221e+04
## BodyPartMultiple Trunk 9.063e+03
## BodyPartMultiple Upper Extremities 5.885e+03
## BodyPartNo Physical Injury -2.756e+05
## BodyPartNon-Standard Code NA
## BodyPartNose 9.969e+03
## BodyPartPelvis 2.573e+03
## BodyPartSacrum And Coccyx 2.383e+04
## BodyPartShoulder(S) 6.237e+03
## BodyPartSkull 4.900e+04
## BodyPartSoft Tissue-Head -1.283e+04
## BodyPartSoft Tissue-Neck -1.506e+04
## BodyPartSpinal Cord-Trunk 9.452e+04
## BodyPartTeeth NA
## BodyPartThumb -5.342e+02
## BodyPartToes -7.005e+03
## BodyPartTrachea -4.832e+04
## BodyPartUpper Arm 4.822e+03
## BodyPartUpper Back Area 8.849e+03
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## AverageWeeklyWage1 7.943e+00
## Std. Error
## (Intercept) 8.643e+03
## ClaimantAge_at_DOI 4.024e+01
## GenderMale 9.170e+02
## GenderNot Available 8.408e+03
## ClaimantTypeMedical Only 1.119e+03
## ClaimantTypeReport Only 2.577e+03
## BodyPartRegionLower Extremities 9.729e+03
## BodyPartRegionMultiple Body Parts 3.137e+04
## BodyPartRegionNeck 4.091e+04
## BodyPartRegionNon-Standard Code 9.015e+03
## BodyPartRegionTrunk 9.497e+03
## BodyPartRegionUpper Extremities 8.636e+03
## BodyPartAnkle 5.460e+03
## BodyPartArtificial Appliance 4.684e+04
## BodyPartBody Systems and Multiple Body Systems 3.106e+04
## BodyPartBrain 3.674e+04
## BodyPartButtocks 9.133e+03
## BodyPartChest 5.719e+03
## BodyPartDisc-Trunk 7.170e+03
## BodyPartEar(S) 1.113e+04
## BodyPartElbow 4.197e+03
## BodyPartEyes 8.642e+03
## BodyPartFacial Bones 9.762e+03
## BodyPartFinger(S) 2.797e+03
## BodyPartFoot 5.672e+03
## BodyPartGreat Toe 1.079e+04
## BodyPartHand 2.900e+03
## BodyPartHeart 2.354e+04
## BodyPartHip 6.743e+03
## BodyPartInsufficient Info to Properly Identify?Unclassified 3.065e+04
## BodyPartInternal Organs 1.264e+04
## BodyPartKnee 5.228e+03
## BodyPartLarynx 8.945e+04
## BodyPartLower Arm 3.665e+03
## BodyPartLower Back Area 4.749e+03
## BodyPartLower Leg 5.880e+03
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 5.745e+03
## BodyPartLungs 7.384e+03
## BodyPartMouth 1.044e+04
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) 3.027e+04
## BodyPartMultiple Head Injury 8.703e+03
## BodyPartMultiple Lower Extremities 6.534e+03
## BodyPartMultiple Neck Injury 4.017e+04
## BodyPartMultiple Trunk 1.180e+04
## BodyPartMultiple Upper Extremities 4.222e+03
## BodyPartNo Physical Injury 3.060e+04
## BodyPartNon-Standard Code NA
## BodyPartNose 1.055e+04
## BodyPartPelvis 1.136e+04
## BodyPartSacrum And Coccyx 1.605e+04
## BodyPartShoulder(S) 3.020e+03
## BodyPartSkull 1.623e+04
## BodyPartSoft Tissue-Head 8.043e+04
## BodyPartSoft Tissue-Neck 4.032e+04
## BodyPartSpinal Cord-Trunk 1.410e+04
## BodyPartTeeth NA
## BodyPartThumb 3.716e+03
## BodyPartToes 7.763e+03
## BodyPartTrachea 5.018e+04
## BodyPartUpper Arm 4.449e+03
## BodyPartUpper Back Area 6.778e+03
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## AverageWeeklyWage1 2.074e+00
## t value
## (Intercept) 3.306
## ClaimantAge_at_DOI 3.772
## GenderMale 1.336
## GenderNot Available 8.109
## ClaimantTypeMedical Only -49.337
## ClaimantTypeReport Only -19.231
## BodyPartRegionLower Extremities 1.720
## BodyPartRegionMultiple Body Parts 9.190
## BodyPartRegionNeck 1.188
## BodyPartRegionNon-Standard Code -1.028
## BodyPartRegionTrunk 0.348
## BodyPartRegionUpper Extremities 1.212
## BodyPartAnkle -1.286
## BodyPartArtificial Appliance -6.032
## BodyPartBody Systems and Multiple Body Systems -8.760
## BodyPartBrain 11.079
## BodyPartButtocks 2.682
## BodyPartChest 1.440
## BodyPartDisc-Trunk 2.729
## BodyPartEar(S) 0.745
## BodyPartElbow 0.434
## BodyPartEyes 1.760
## BodyPartFacial Bones 1.522
## BodyPartFinger(S) 0.036
## BodyPartFoot -1.405
## BodyPartGreat Toe -0.712
## BodyPartHand -0.098
## BodyPartHeart -0.390
## BodyPartHip 0.979
## BodyPartInsufficient Info to Properly Identify?Unclassified -9.104
## BodyPartInternal Organs 0.821
## BodyPartKnee -0.288
## BodyPartLarynx -0.916
## BodyPartLower Arm -0.103
## BodyPartLower Back Area 5.640
## BodyPartLower Leg 0.108
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 3.362
## BodyPartLungs 0.771
## BodyPartMouth 1.426
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) -8.840
## BodyPartMultiple Head Injury 2.193
## BodyPartMultiple Lower Extremities 0.374
## BodyPartMultiple Neck Injury -0.802
## BodyPartMultiple Trunk 0.768
## BodyPartMultiple Upper Extremities 1.394
## BodyPartNo Physical Injury -9.004
## BodyPartNon-Standard Code NA
## BodyPartNose 0.945
## BodyPartPelvis 0.226
## BodyPartSacrum And Coccyx 1.485
## BodyPartShoulder(S) 2.065
## BodyPartSkull 3.019
## BodyPartSoft Tissue-Head -0.160
## BodyPartSoft Tissue-Neck -0.374
## BodyPartSpinal Cord-Trunk 6.704
## BodyPartTeeth NA
## BodyPartThumb -0.144
## BodyPartToes -0.902
## BodyPartTrachea -0.963
## BodyPartUpper Arm 1.084
## BodyPartUpper Back Area 1.306
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## AverageWeeklyWage1 3.829
## Pr(>|t|)
## (Intercept) 0.000948
## ClaimantAge_at_DOI 0.000162
## GenderMale 0.181507
## GenderNot Available 5.29e-16
## ClaimantTypeMedical Only < 2e-16
## ClaimantTypeReport Only < 2e-16
## BodyPartRegionLower Extremities 0.085403
## BodyPartRegionMultiple Body Parts < 2e-16
## BodyPartRegionNeck 0.234896
## BodyPartRegionNon-Standard Code 0.303834
## BodyPartRegionTrunk 0.727996
## BodyPartRegionUpper Extremities 0.225378
## BodyPartAnkle 0.198534
## BodyPartArtificial Appliance 1.64e-09
## BodyPartBody Systems and Multiple Body Systems < 2e-16
## BodyPartBrain < 2e-16
## BodyPartButtocks 0.007325
## BodyPartChest 0.149887
## BodyPartDisc-Trunk 0.006349
## BodyPartEar(S) 0.456362
## BodyPartElbow 0.664637
## BodyPartEyes 0.078417
## BodyPartFacial Bones 0.128074
## BodyPartFinger(S) 0.971047
## BodyPartFoot 0.160000
## BodyPartGreat Toe 0.476334
## BodyPartHand 0.921572
## BodyPartHeart 0.696661
## BodyPartHip 0.327556
## BodyPartInsufficient Info to Properly Identify?Unclassified < 2e-16
## BodyPartInternal Organs 0.411798
## BodyPartKnee 0.773028
## BodyPartLarynx 0.359853
## BodyPartLower Arm 0.917818
## BodyPartLower Back Area 1.71e-08
## BodyPartLower Leg 0.914363
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) 0.000775
## BodyPartLungs 0.440541
## BodyPartMouth 0.153833
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) < 2e-16
## BodyPartMultiple Head Injury 0.028333
## BodyPartMultiple Lower Extremities 0.708377
## BodyPartMultiple Neck Injury 0.422687
## BodyPartMultiple Trunk 0.442326
## BodyPartMultiple Upper Extremities 0.163291
## BodyPartNo Physical Injury < 2e-16
## BodyPartNon-Standard Code NA
## BodyPartNose 0.344904
## BodyPartPelvis 0.820847
## BodyPartSacrum And Coccyx 0.137635
## BodyPartShoulder(S) 0.038919
## BodyPartSkull 0.002541
## BodyPartSoft Tissue-Head 0.873264
## BodyPartSoft Tissue-Neck 0.708706
## BodyPartSpinal Cord-Trunk 2.07e-11
## BodyPartTeeth NA
## BodyPartThumb 0.885687
## BodyPartToes 0.366862
## BodyPartTrachea 0.335587
## BodyPartUpper Arm 0.278439
## BodyPartUpper Back Area 0.191720
## BodyPartUpper Leg NA
## BodyPartVertebrae NA
## BodyPartWhole Body NA
## BodyPartWrist NA
## AverageWeeklyWage1 0.000129
##
## (Intercept) ***
## ClaimantAge_at_DOI ***
## GenderMale
## GenderNot Available ***
## ClaimantTypeMedical Only ***
## ClaimantTypeReport Only ***
## BodyPartRegionLower Extremities .
## BodyPartRegionMultiple Body Parts ***
## BodyPartRegionNeck
## BodyPartRegionNon-Standard Code
## BodyPartRegionTrunk
## BodyPartRegionUpper Extremities
## BodyPartAnkle
## BodyPartArtificial Appliance ***
## BodyPartBody Systems and Multiple Body Systems ***
## BodyPartBrain ***
## BodyPartButtocks **
## BodyPartChest
## BodyPartDisc-Trunk **
## BodyPartEar(S)
## BodyPartElbow
## BodyPartEyes .
## BodyPartFacial Bones
## BodyPartFinger(S)
## BodyPartFoot
## BodyPartGreat Toe
## BodyPartHand
## BodyPartHeart
## BodyPartHip
## BodyPartInsufficient Info to Properly Identify?Unclassified ***
## BodyPartInternal Organs
## BodyPartKnee
## BodyPartLarynx
## BodyPartLower Arm
## BodyPartLower Back Area ***
## BodyPartLower Leg
## BodyPartLumbar and/or Sacral Vertebrae (Vertebra NOC Trunk) ***
## BodyPartLungs
## BodyPartMouth
## BodyPartMultiple Body Parts (Including Body Systems and Body Parts) ***
## BodyPartMultiple Head Injury *
## BodyPartMultiple Lower Extremities
## BodyPartMultiple Neck Injury
## BodyPartMultiple Trunk
## BodyPartMultiple Upper Extremities
## BodyPartNo Physical Injury ***
## BodyPartNon-Standard Code
## BodyPartNose
## BodyPartPelvis
## BodyPartSacrum And Coccyx
## BodyPartShoulder(S) *
## BodyPartSkull **
## BodyPartSoft Tissue-Head
## BodyPartSoft Tissue-Neck
## BodyPartSpinal Cord-Trunk ***
## BodyPartTeeth
## BodyPartThumb
## BodyPartToes
## BodyPartTrachea
## BodyPartUpper Arm
## BodyPartUpper Back Area
## BodyPartUpper Leg
## BodyPartVertebrae
## BodyPartWhole Body
## BodyPartWrist
## AverageWeeklyWage1 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 79980 on 31559 degrees of freedom
## Multiple R-squared: 0.1008, Adjusted R-squared: 0.09914
## F-statistic: 59.98 on 59 and 31559 DF, p-value: < 2.2e-16
datos_nuevos <- data.frame(ClaimantAge_at_DOI= c(20,30,40,50,60,70,80,90), Gender="Male", ClaimantType="Medical Only", BodyPartRegion="Head", BodyPart="Brain", AverageWeeklyWage1=550)
predict(regresion_limpia,datos_nuevos)
## Warning in predict.lm(regresion_limpia, datos_nuevos): prediction from a
## rank-deficient fit may be misleading
## 1 2 3 4 5 6 7 8
## 389027.8 390545.9 392064.0 393582.1 395100.3 396618.4 398136.5 399654.6