base_de_datos <- read.csv("C:\\Users\\gamas\\Downloads\\seguros.csv")
resumen <- summary(base_de_datos)
resumen
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 777632 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 800748 1st Qu.: 83 1st Qu.: 0 1st Qu.: 0.00
## Median : 812128 Median : 271 Median : 0 Median : 0.00
## Mean : 1864676 Mean : 10404 Mean : 3368 Mean : 66.05
## 3rd Qu.: 824726 3rd Qu.: 1122 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203364 Max. :4527291 Max. :1529053 Max. :100000.00
##
## IndemnityPaid OtherPaid TotalIncurredCost ClaimStatus
## Min. : 0 Min. : 0 Min. : -10400 Length:31619
## 1st Qu.: 0 1st Qu.: 80 1st Qu.: 80 Class :character
## Median : 0 Median : 265 Median : 266 Mode :character
## Mean : 4977 Mean : 5427 Mean : 13706
## 3rd Qu.: 0 3rd Qu.: 1023 3rd Qu.: 1098
## Max. :640732 Max. :4129915 Max. :4734750
##
## IncidentDate IncidentDescription ReturnToWorkDate ClaimantOpenedDate
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied Transaction_Time Procesing_Time ClaimantAge_at_DOI
## Min. :0.00000 Min. : 0 Min. : 0.00 Min. :14.0
## 1st Qu.:0.00000 1st Qu.: 211 1st Qu.: 4.00 1st Qu.:33.0
## Median :0.00000 Median : 780 Median : 10.00 Median :42.0
## Mean :0.04463 Mean : 1004 Mean : 62.99 Mean :41.6
## 3rd Qu.:0.00000 3rd Qu.: 1440 3rd Qu.: 24.00 3rd Qu.:50.0
## Max. :1.00000 Max. :16428 Max. :11558.00 Max. :94.0
## NA's :614
## Gender ClaimantType InjuryNature BodyPartRegion
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BodyPart AverageWeeklyWage1 ClaimID1 BillReviewALE
## Length:31619 Min. : 100.0 Min. : 777632 Min. : -448.0
## Class :character 1st Qu.: 492.0 1st Qu.: 800748 1st Qu.: 16.0
## Mode :character Median : 492.0 Median : 812128 Median : 24.0
## Mean : 536.5 Mean : 1864676 Mean : 188.7
## 3rd Qu.: 492.0 3rd Qu.: 824726 3rd Qu.: 64.1
## Max. :8613.5 Max. :62203364 Max. :46055.3
## NA's :14912
## Hospital PhysicianOutpatient Rx
## Min. : -12570.4 Min. : -549.5 Min. : -160.7
## 1st Qu.: 210.5 1st Qu.: 105.8 1st Qu.: 22.9
## Median : 613.9 Median : 218.0 Median : 61.5
## Mean : 5113.2 Mean : 1813.2 Mean : 1695.2
## 3rd Qu.: 2349.1 3rd Qu.: 680.6 3rd Qu.: 189.0
## Max. :2759604.0 Max. :1219766.6 Max. :631635.5
## NA's :19655 NA's :2329 NA's :20730
regresion <- lm(TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature + BodyPartRegion + AverageWeeklyWage1 + BillReviewALE + Hospital + PhysicianOutpatient + Rx, data=base_de_datos)
summary(regresion)
##
## Call:
## lm(formula = TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time +
## ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature +
## BodyPartRegion + AverageWeeklyWage1 + BillReviewALE + Hospital +
## PhysicianOutpatient + Rx, data = base_de_datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -454179 -3153 24 2285 1063665
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.701e+04 6.025e+03
## ClaimStatusO 4.490e+05 1.171e+04
## ClaimStatusR 3.190e+05 1.305e+04
## IsDenied -8.474e+03 4.366e+03
## Procesing_Time 4.647e+01 4.818e+00
## ClaimantAge_at_DOI -2.612e+01 6.833e+01
## GenderMale 3.149e+02 1.538e+03
## GenderNot Available 2.031e+03 1.205e+04
## ClaimantTypeMedical Only -2.477e+04 2.017e+03
## ClaimantTypeReport Only -2.431e+04 4.829e+03
## InjuryNatureAsbestosis 6.366e+03 3.063e+04
## InjuryNatureBurn 1.883e+02 7.904e+03
## InjuryNatureCarpal Tunnel Syndrome -3.840e+03 1.039e+04
## InjuryNatureConcussion 2.100e+04 1.679e+04
## InjuryNatureContagious Disease 5.091e+03 1.674e+04
## InjuryNatureContusion 3.604e+03 4.430e+03
## InjuryNatureCrushing 3.074e+03 1.492e+04
## InjuryNatureDermatitis 4.036e+03 7.050e+03
## InjuryNatureDislocation -3.692e+04 1.423e+04
## InjuryNatureDust Disease, NOC 7.068e+02 4.304e+04
## InjuryNatureElectric Shock 5.065e+03 2.512e+04
## InjuryNatureForeign Body 4.459e+03 6.199e+03
## InjuryNatureFracture 4.540e+03 6.567e+03
## InjuryNatureHearing Loss Or Impairment -8.563e+02 2.521e+04
## InjuryNatureHeat Prostration -1.972e+03 2.513e+04
## InjuryNatureHernia -9.356e+03 2.192e+04
## InjuryNatureInfection -2.409e+03 1.674e+04
## InjuryNatureInflammation 4.660e+03 9.310e+03
## InjuryNatureLaceration 3.311e+03 5.143e+03
## InjuryNatureLoss of Hearing -1.943e+04 2.528e+04
## InjuryNatureMental Stress -1.419e+04 1.978e+04
## InjuryNatureMultiple Physical Injuries Only 1.472e+04 7.683e+03
## InjuryNatureMyocardial Infarction -1.661e+04 3.091e+04
## InjuryNatureNo Physical Injury 3.799e+03 8.442e+03
## InjuryNatureNon-Standard Code -1.644e+03 2.187e+04
## InjuryNaturePoisoning?Chemical (Other Than Metals) 4.469e+01 1.979e+04
## InjuryNaturePuncture 4.340e+03 5.646e+03
## InjuryNatureRespiratory Disorders -2.827e+03 8.525e+03
## InjuryNatureRupture -2.115e+04 3.065e+04
## InjuryNatureSeverance 8.548e+03 3.071e+04
## InjuryNatureSprain -5.494e+03 5.390e+03
## InjuryNatureStrain 4.182e+03 4.543e+03
## InjuryNatureSyncope 3.808e+03 1.968e+04
## BodyPartRegionLower Extremities 4.185e+03 3.199e+03
## BodyPartRegionMultiple Body Parts 6.375e+02 3.339e+03
## BodyPartRegionNeck 1.189e+02 5.586e+03
## BodyPartRegionNon-Standard Code -1.555e+03 2.331e+04
## BodyPartRegionTrunk 4.658e+03 3.423e+03
## BodyPartRegionUpper Extremities 9.662e+02 2.994e+03
## AverageWeeklyWage1 4.569e+00 3.241e+00
## BillReviewALE 3.181e-02 6.144e-01
## Hospital -7.270e-04 2.014e-02
## PhysicianOutpatient -1.361e-02 5.445e-02
## Rx 7.366e-03 4.369e-02
## t value Pr(>|t|)
## (Intercept) 2.822 0.00480 **
## ClaimStatusO 38.348 < 2e-16 ***
## ClaimStatusR 24.432 < 2e-16 ***
## IsDenied -1.941 0.05236 .
## Procesing_Time 9.645 < 2e-16 ***
## ClaimantAge_at_DOI -0.382 0.70227
## GenderMale 0.205 0.83777
## GenderNot Available 0.169 0.86613
## ClaimantTypeMedical Only -12.280 < 2e-16 ***
## ClaimantTypeReport Only -5.034 5.07e-07 ***
## InjuryNatureAsbestosis 0.208 0.83536
## InjuryNatureBurn 0.024 0.98100
## InjuryNatureCarpal Tunnel Syndrome -0.369 0.71180
## InjuryNatureConcussion 1.251 0.21111
## InjuryNatureContagious Disease 0.304 0.76103
## InjuryNatureContusion 0.814 0.41599
## InjuryNatureCrushing 0.206 0.83672
## InjuryNatureDermatitis 0.572 0.56707
## InjuryNatureDislocation -2.594 0.00953 **
## InjuryNatureDust Disease, NOC 0.016 0.98690
## InjuryNatureElectric Shock 0.202 0.84025
## InjuryNatureForeign Body 0.719 0.47198
## InjuryNatureFracture 0.691 0.48939
## InjuryNatureHearing Loss Or Impairment -0.034 0.97290
## InjuryNatureHeat Prostration -0.078 0.93744
## InjuryNatureHernia -0.427 0.66957
## InjuryNatureInfection -0.144 0.88562
## InjuryNatureInflammation 0.501 0.61674
## InjuryNatureLaceration 0.644 0.51980
## InjuryNatureLoss of Hearing -0.768 0.44229
## InjuryNatureMental Stress -0.717 0.47332
## InjuryNatureMultiple Physical Injuries Only 1.915 0.05554 .
## InjuryNatureMyocardial Infarction -0.537 0.59102
## InjuryNatureNo Physical Injury 0.450 0.65276
## InjuryNatureNon-Standard Code -0.075 0.94008
## InjuryNaturePoisoning?Chemical (Other Than Metals) 0.002 0.99820
## InjuryNaturePuncture 0.769 0.44208
## InjuryNatureRespiratory Disorders -0.332 0.74022
## InjuryNatureRupture -0.690 0.49038
## InjuryNatureSeverance 0.278 0.78075
## InjuryNatureSprain -1.019 0.30811
## InjuryNatureStrain 0.921 0.35736
## InjuryNatureSyncope 0.193 0.84659
## BodyPartRegionLower Extremities 1.308 0.19081
## BodyPartRegionMultiple Body Parts 0.191 0.84862
## BodyPartRegionNeck 0.021 0.98302
## BodyPartRegionNon-Standard Code -0.067 0.94682
## BodyPartRegionTrunk 1.360 0.17377
## BodyPartRegionUpper Extremities 0.323 0.74697
## AverageWeeklyWage1 1.410 0.15871
## BillReviewALE 0.052 0.95871
## Hospital -0.036 0.97120
## PhysicianOutpatient -0.250 0.80267
## Rx 0.169 0.86612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42770 on 3191 degrees of freedom
## (28374 observations deleted due to missingness)
## Multiple R-squared: 0.6116, Adjusted R-squared: 0.6051
## F-statistic: 94.8 on 53 and 3191 DF, p-value: < 2.2e-16
regresion <- lm(TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + Gender + ClaimantType + Rx, data=base_de_datos)
summary(regresion)
##
## Call:
## lm(formula = TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time +
## Gender + ClaimantType + Rx, data = base_de_datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -613611 -1298 332 925 2635418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.171e+04 1.347e+03 16.118 < 2e-16 ***
## ClaimStatusO 4.357e+05 6.002e+03 72.589 < 2e-16 ***
## ClaimStatusR 2.543e+05 7.006e+03 36.296 < 2e-16 ***
## IsDenied -9.765e+03 2.724e+03 -3.585 0.000339 ***
## Procesing_Time 3.809e+01 2.242e+00 16.991 < 2e-16 ***
## GenderMale 6.080e+02 1.097e+03 0.554 0.579469
## GenderNot Available -6.497e+03 9.556e+03 -0.680 0.496640
## ClaimantTypeMedical Only -2.263e+04 1.366e+03 -16.562 < 2e-16 ***
## ClaimantTypeReport Only -2.315e+04 3.028e+03 -7.648 2.22e-14 ***
## Rx 1.172e-02 4.309e-02 0.272 0.785670
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56980 on 10879 degrees of freedom
## (20730 observations deleted due to missingness)
## Multiple R-squared: 0.525, Adjusted R-squared: 0.5246
## F-statistic: 1336 on 9 and 10879 DF, p-value: < 2.2e-16
datos_nuevos <- data.frame(Procesing_Time=12, ClaimStatus="C", ClaimantType="Indemnity", IsDenied=0, Gender="Female", Rx=154.97)
predict(regresion,datos_nuevos)
## 1
## 22172.82
El modelo de regresión es muy sencillo de utilizar y es posible conocer qué tan significativo es el modelo por medio del análisis de R cuadrada.