# Leer la base de datos desde un archivo CSV
base_de_datos <- read.csv("/Users/josemarentes/Downloads/seguros.csv")
# Entender la base de datos
resumen <- summary(base_de_datos)
print(resumen)
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 777632 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 800748 1st Qu.: 83 1st Qu.: 0 1st Qu.: 0.00
## Median : 812128 Median : 271 Median : 0 Median : 0.00
## Mean : 1864676 Mean : 10404 Mean : 3368 Mean : 66.05
## 3rd Qu.: 824726 3rd Qu.: 1122 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203364 Max. :4527291 Max. :1529053 Max. :100000.00
##
## IndemnityPaid OtherPaid TotalIncurredCost ClaimStatus
## Min. : 0 Min. : 0 Min. : -10400 Length:31619
## 1st Qu.: 0 1st Qu.: 80 1st Qu.: 80 Class :character
## Median : 0 Median : 265 Median : 266 Mode :character
## Mean : 4977 Mean : 5427 Mean : 13706
## 3rd Qu.: 0 3rd Qu.: 1023 3rd Qu.: 1098
## Max. :640732 Max. :4129915 Max. :4734750
##
## IncidentDate IncidentDescription ReturnToWorkDate ClaimantOpenedDate
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied Transaction_Time Procesing_Time ClaimantAge_at_DOI
## Min. :0.00000 Min. : 0 Min. : 0.00 Min. :14.0
## 1st Qu.:0.00000 1st Qu.: 211 1st Qu.: 4.00 1st Qu.:33.0
## Median :0.00000 Median : 780 Median : 10.00 Median :42.0
## Mean :0.04463 Mean : 1004 Mean : 62.99 Mean :41.6
## 3rd Qu.:0.00000 3rd Qu.: 1440 3rd Qu.: 24.00 3rd Qu.:50.0
## Max. :1.00000 Max. :16428 Max. :11558.00 Max. :94.0
## NA's :614
## Gender ClaimantType InjuryNature BodyPartRegion
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BodyPart AverageWeeklyWage1 ClaimID1 BillReviewALE
## Length:31619 Min. : 100.0 Min. : 777632 Min. : -448.0
## Class :character 1st Qu.: 492.0 1st Qu.: 800748 1st Qu.: 16.0
## Mode :character Median : 492.0 Median : 812128 Median : 24.0
## Mean : 536.5 Mean : 1864676 Mean : 188.7
## 3rd Qu.: 492.0 3rd Qu.: 824726 3rd Qu.: 64.1
## Max. :8613.5 Max. :62203364 Max. :46055.3
## NA's :14912
## Hospital PhysicianOutpatient Rx
## Min. : -12570.4 Min. : -549.5 Min. : -160.7
## 1st Qu.: 210.5 1st Qu.: 105.8 1st Qu.: 22.9
## Median : 613.9 Median : 218.0 Median : 61.5
## Mean : 5113.2 Mean : 1813.2 Mean : 1695.2
## 3rd Qu.: 2349.1 3rd Qu.: 680.6 3rd Qu.: 189.0
## Max. :2759604.0 Max. :1219766.6 Max. :631635.5
## NA's :19655 NA's :2329 NA's :20730
# Graficar la influencia de una variable sobre el total pagado
plot(base_de_datos$TotalReserves, base_de_datos$TotalPaid,
main = "Influencia de las Reservas Totales sobre el Total Pagado",
xlab = "Reservas Totales", ylab = "Total Pagado")

# Generar un modelo de regresión (modelo lineal)
regresion <- lm(TotalPaid ~ ClaimID + TotalReserves + TotalRecovery + IndemnityPaid + OtherPaid + Hospital,
data = base_de_datos)
summary(regresion)
##
## Call:
## lm(formula = TotalPaid ~ ClaimID + TotalReserves + TotalRecovery +
## IndemnityPaid + OtherPaid + Hospital, data = base_de_datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.511e-09 7.000e-13 1.400e-12 2.100e-12 1.953e-10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.836e-11 9.259e-13 -6.303e+01 <2e-16 ***
## ClaimID -1.511e-19 1.731e-19 -8.730e-01 0.383
## TotalReserves -6.845e-16 2.722e-17 -2.515e+01 <2e-16 ***
## TotalRecovery -4.003e-16 9.719e-16 -4.120e-01 0.680
## IndemnityPaid 1.000e+00 4.276e-17 2.338e+16 <2e-16 ***
## OtherPaid 1.000e+00 3.370e-17 2.967e+16 <2e-16 ***
## Hospital -7.220e-18 2.565e-17 -2.810e-01 0.778
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.153e-11 on 11957 degrees of freedom
## (19655 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 6.342e+32 on 6 and 11957 DF, p-value: < 2.2e-16
# Evaluar y ajustar el modelo de regresión, si es necesario
regresion <- lm(TotalPaid ~ TotalReserves + TotalRecovery + IndemnityPaid + OtherPaid + Hospital,
data = base_de_datos)
summary(regresion)
##
## Call:
## lm(formula = TotalPaid ~ TotalReserves + TotalRecovery + IndemnityPaid +
## OtherPaid + Hospital, data = base_de_datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.527e-08 -2.400e-11 -2.400e-11 -2.300e-11 2.269e-07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.268e-10 2.350e-11 1.390e+01 < 2e-16 ***
## TotalReserves 6.457e-14 7.396e-16 8.729e+01 < 2e-16 ***
## TotalRecovery -1.090e-13 2.640e-14 -4.128e+00 3.68e-05 ***
## IndemnityPaid 1.000e+00 1.162e-15 8.604e+14 < 2e-16 ***
## OtherPaid 1.000e+00 9.159e-16 1.092e+15 < 2e-16 ***
## Hospital -1.166e-16 6.972e-16 -1.670e-01 0.867
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.488e-09 on 11958 degrees of freedom
## (19655 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.03e+30 on 5 and 11958 DF, p-value: < 2.2e-16
# Construir un modelo de predicción con nuevos datos
datos_nuevos <- data.frame(TotalReserves = 1000, TotalRecovery = 200,
IndemnityPaid = 500, OtherPaid = 300,
Hospital = 150)
predicciones <- predict(regresion, datos_nuevos)
print(predicciones)
## 1
## 800
LS0tCnRpdGxlOiAiVW50aXRsZWQiCmF1dGhvcjogIkpvc2UgQW5nZWwgTWFyZW50ZXMiCmRhdGU6ICIyMDI0LTA4LTE5IgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogVFJVRQogICAgdG9jX2Zsb2F0OiBUUlVFCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFCiAgICB0aGVtZTogY29zbW8KLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCmBgYHtyfQojIExlZXIgbGEgYmFzZSBkZSBkYXRvcyBkZXNkZSB1biBhcmNoaXZvIENTVgpiYXNlX2RlX2RhdG9zIDwtIHJlYWQuY3N2KCIvVXNlcnMvam9zZW1hcmVudGVzL0Rvd25sb2Fkcy9zZWd1cm9zLmNzdiIpCgojIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MKcmVzdW1lbiA8LSBzdW1tYXJ5KGJhc2VfZGVfZGF0b3MpCnByaW50KHJlc3VtZW4pCgojIEdyYWZpY2FyIGxhIGluZmx1ZW5jaWEgZGUgdW5hIHZhcmlhYmxlIHNvYnJlIGVsIHRvdGFsIHBhZ2FkbwpwbG90KGJhc2VfZGVfZGF0b3MkVG90YWxSZXNlcnZlcywgYmFzZV9kZV9kYXRvcyRUb3RhbFBhaWQsIAogICAgIG1haW4gPSAiSW5mbHVlbmNpYSBkZSBsYXMgUmVzZXJ2YXMgVG90YWxlcyBzb2JyZSBlbCBUb3RhbCBQYWdhZG8iLCAKICAgICB4bGFiID0gIlJlc2VydmFzIFRvdGFsZXMiLCB5bGFiID0gIlRvdGFsIFBhZ2FkbyIpCgojIEdlbmVyYXIgdW4gbW9kZWxvIGRlIHJlZ3Jlc2nDs24gKG1vZGVsbyBsaW5lYWwpCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbFBhaWQgfiBDbGFpbUlEICsgVG90YWxSZXNlcnZlcyArIFRvdGFsUmVjb3ZlcnkgKyBJbmRlbW5pdHlQYWlkICsgT3RoZXJQYWlkICsgSG9zcGl0YWwsIAogICAgICAgICAgICAgICAgZGF0YSA9IGJhc2VfZGVfZGF0b3MpCnN1bW1hcnkocmVncmVzaW9uKQoKIyBFdmFsdWFyIHkgYWp1c3RhciBlbCBtb2RlbG8gZGUgcmVncmVzacOzbiwgc2kgZXMgbmVjZXNhcmlvCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbFBhaWQgfiBUb3RhbFJlc2VydmVzICsgVG90YWxSZWNvdmVyeSArIEluZGVtbml0eVBhaWQgKyBPdGhlclBhaWQgKyBIb3NwaXRhbCwgCiAgICAgICAgICAgICAgICBkYXRhID0gYmFzZV9kZV9kYXRvcykKc3VtbWFyeShyZWdyZXNpb24pCgojIENvbnN0cnVpciB1biBtb2RlbG8gZGUgcHJlZGljY2nDs24gY29uIG51ZXZvcyBkYXRvcwpkYXRvc19udWV2b3MgPC0gZGF0YS5mcmFtZShUb3RhbFJlc2VydmVzID0gMTAwMCwgVG90YWxSZWNvdmVyeSA9IDIwMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIEluZGVtbml0eVBhaWQgPSA1MDAsIE90aGVyUGFpZCA9IDMwMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIEhvc3BpdGFsID0gMTUwKQpwcmVkaWNjaW9uZXMgPC0gcHJlZGljdChyZWdyZXNpb24sIGRhdG9zX251ZXZvcykKcHJpbnQocHJlZGljY2lvbmVzKQoKYGBgCgo=