# Leer la base de datos desde un archivo CSV
base_de_datos <- read.csv("/Users/josemarentes/Downloads/seguros.csv")

# Entender la base de datos
resumen <- summary(base_de_datos)
print(resumen)
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  777632   Min.   :      0   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  800748   1st Qu.:     83   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  812128   Median :    271   Median :      0   Median :     0.00  
##  Mean   : 1864676   Mean   :  10404   Mean   :   3368   Mean   :    66.05  
##  3rd Qu.:  824726   3rd Qu.:   1122   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203364   Max.   :4527291   Max.   :1529053   Max.   :100000.00  
##                                                                            
##  IndemnityPaid      OtherPaid       TotalIncurredCost ClaimStatus       
##  Min.   :     0   Min.   :      0   Min.   : -10400   Length:31619      
##  1st Qu.:     0   1st Qu.:     80   1st Qu.:     80   Class :character  
##  Median :     0   Median :    265   Median :    266   Mode  :character  
##  Mean   :  4977   Mean   :   5427   Mean   :  13706                     
##  3rd Qu.:     0   3rd Qu.:   1023   3rd Qu.:   1098                     
##  Max.   :640732   Max.   :4129915   Max.   :4734750                     
##                                                                         
##  IncidentDate       IncidentDescription ReturnToWorkDate   ClaimantOpenedDate
##  Length:31619       Length:31619        Length:31619       Length:31619      
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:31619       Length:31619             Length:31619      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       Transaction_Time Procesing_Time     ClaimantAge_at_DOI
##  Min.   :0.00000   Min.   :    0    Min.   :    0.00   Min.   :14.0      
##  1st Qu.:0.00000   1st Qu.:  211    1st Qu.:    4.00   1st Qu.:33.0      
##  Median :0.00000   Median :  780    Median :   10.00   Median :42.0      
##  Mean   :0.04463   Mean   : 1004    Mean   :   62.99   Mean   :41.6      
##  3rd Qu.:0.00000   3rd Qu.: 1440    3rd Qu.:   24.00   3rd Qu.:50.0      
##  Max.   :1.00000   Max.   :16428    Max.   :11558.00   Max.   :94.0      
##                    NA's   :614                                           
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:31619       Length:31619       Length:31619       Length:31619      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart         AverageWeeklyWage1    ClaimID1        BillReviewALE    
##  Length:31619       Min.   : 100.0     Min.   :  777632   Min.   : -448.0  
##  Class :character   1st Qu.: 492.0     1st Qu.:  800748   1st Qu.:   16.0  
##  Mode  :character   Median : 492.0     Median :  812128   Median :   24.0  
##                     Mean   : 536.5     Mean   : 1864676   Mean   :  188.7  
##                     3rd Qu.: 492.0     3rd Qu.:  824726   3rd Qu.:   64.1  
##                     Max.   :8613.5     Max.   :62203364   Max.   :46055.3  
##                                                           NA's   :14912    
##     Hospital         PhysicianOutpatient       Rx          
##  Min.   : -12570.4   Min.   :   -549.5   Min.   :  -160.7  
##  1st Qu.:    210.5   1st Qu.:    105.8   1st Qu.:    22.9  
##  Median :    613.9   Median :    218.0   Median :    61.5  
##  Mean   :   5113.2   Mean   :   1813.2   Mean   :  1695.2  
##  3rd Qu.:   2349.1   3rd Qu.:    680.6   3rd Qu.:   189.0  
##  Max.   :2759604.0   Max.   :1219766.6   Max.   :631635.5  
##  NA's   :19655       NA's   :2329        NA's   :20730
# Graficar la influencia de una variable sobre el total pagado
plot(base_de_datos$TotalReserves, base_de_datos$TotalPaid, 
     main = "Influencia de las Reservas Totales sobre el Total Pagado", 
     xlab = "Reservas Totales", ylab = "Total Pagado")

# Generar un modelo de regresión (modelo lineal)
regresion <- lm(TotalPaid ~ ClaimID + TotalReserves + TotalRecovery + IndemnityPaid + OtherPaid + Hospital, 
                data = base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalPaid ~ ClaimID + TotalReserves + TotalRecovery + 
##     IndemnityPaid + OtherPaid + Hospital, data = base_de_datos)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.511e-09  7.000e-13  1.400e-12  2.100e-12  1.953e-10 
## 
## Coefficients:
##                 Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)   -5.836e-11  9.259e-13 -6.303e+01   <2e-16 ***
## ClaimID       -1.511e-19  1.731e-19 -8.730e-01    0.383    
## TotalReserves -6.845e-16  2.722e-17 -2.515e+01   <2e-16 ***
## TotalRecovery -4.003e-16  9.719e-16 -4.120e-01    0.680    
## IndemnityPaid  1.000e+00  4.276e-17  2.338e+16   <2e-16 ***
## OtherPaid      1.000e+00  3.370e-17  2.967e+16   <2e-16 ***
## Hospital      -7.220e-18  2.565e-17 -2.810e-01    0.778    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.153e-11 on 11957 degrees of freedom
##   (19655 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 6.342e+32 on 6 and 11957 DF,  p-value: < 2.2e-16
# Evaluar y ajustar el modelo de regresión, si es necesario
regresion <- lm(TotalPaid ~ TotalReserves + TotalRecovery + IndemnityPaid + OtherPaid + Hospital, 
                data = base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalPaid ~ TotalReserves + TotalRecovery + IndemnityPaid + 
##     OtherPaid + Hospital, data = base_de_datos)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.527e-08 -2.400e-11 -2.400e-11 -2.300e-11  2.269e-07 
## 
## Coefficients:
##                 Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)    3.268e-10  2.350e-11  1.390e+01  < 2e-16 ***
## TotalReserves  6.457e-14  7.396e-16  8.729e+01  < 2e-16 ***
## TotalRecovery -1.090e-13  2.640e-14 -4.128e+00 3.68e-05 ***
## IndemnityPaid  1.000e+00  1.162e-15  8.604e+14  < 2e-16 ***
## OtherPaid      1.000e+00  9.159e-16  1.092e+15  < 2e-16 ***
## Hospital      -1.166e-16  6.972e-16 -1.670e-01    0.867    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.488e-09 on 11958 degrees of freedom
##   (19655 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.03e+30 on 5 and 11958 DF,  p-value: < 2.2e-16
# Construir un modelo de predicción con nuevos datos
datos_nuevos <- data.frame(TotalReserves = 1000, TotalRecovery = 200, 
                           IndemnityPaid = 500, OtherPaid = 300, 
                           Hospital = 150)
predicciones <- predict(regresion, datos_nuevos)
print(predicciones)
##   1 
## 800
LS0tCnRpdGxlOiAiVW50aXRsZWQiCmF1dGhvcjogIkpvc2UgQW5nZWwgTWFyZW50ZXMiCmRhdGU6ICIyMDI0LTA4LTE5IgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogVFJVRQogICAgdG9jX2Zsb2F0OiBUUlVFCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFCiAgICB0aGVtZTogY29zbW8KLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCmBgYHtyfQojIExlZXIgbGEgYmFzZSBkZSBkYXRvcyBkZXNkZSB1biBhcmNoaXZvIENTVgpiYXNlX2RlX2RhdG9zIDwtIHJlYWQuY3N2KCIvVXNlcnMvam9zZW1hcmVudGVzL0Rvd25sb2Fkcy9zZWd1cm9zLmNzdiIpCgojIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MKcmVzdW1lbiA8LSBzdW1tYXJ5KGJhc2VfZGVfZGF0b3MpCnByaW50KHJlc3VtZW4pCgojIEdyYWZpY2FyIGxhIGluZmx1ZW5jaWEgZGUgdW5hIHZhcmlhYmxlIHNvYnJlIGVsIHRvdGFsIHBhZ2FkbwpwbG90KGJhc2VfZGVfZGF0b3MkVG90YWxSZXNlcnZlcywgYmFzZV9kZV9kYXRvcyRUb3RhbFBhaWQsIAogICAgIG1haW4gPSAiSW5mbHVlbmNpYSBkZSBsYXMgUmVzZXJ2YXMgVG90YWxlcyBzb2JyZSBlbCBUb3RhbCBQYWdhZG8iLCAKICAgICB4bGFiID0gIlJlc2VydmFzIFRvdGFsZXMiLCB5bGFiID0gIlRvdGFsIFBhZ2FkbyIpCgojIEdlbmVyYXIgdW4gbW9kZWxvIGRlIHJlZ3Jlc2nDs24gKG1vZGVsbyBsaW5lYWwpCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbFBhaWQgfiBDbGFpbUlEICsgVG90YWxSZXNlcnZlcyArIFRvdGFsUmVjb3ZlcnkgKyBJbmRlbW5pdHlQYWlkICsgT3RoZXJQYWlkICsgSG9zcGl0YWwsIAogICAgICAgICAgICAgICAgZGF0YSA9IGJhc2VfZGVfZGF0b3MpCnN1bW1hcnkocmVncmVzaW9uKQoKIyBFdmFsdWFyIHkgYWp1c3RhciBlbCBtb2RlbG8gZGUgcmVncmVzacOzbiwgc2kgZXMgbmVjZXNhcmlvCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbFBhaWQgfiBUb3RhbFJlc2VydmVzICsgVG90YWxSZWNvdmVyeSArIEluZGVtbml0eVBhaWQgKyBPdGhlclBhaWQgKyBIb3NwaXRhbCwgCiAgICAgICAgICAgICAgICBkYXRhID0gYmFzZV9kZV9kYXRvcykKc3VtbWFyeShyZWdyZXNpb24pCgojIENvbnN0cnVpciB1biBtb2RlbG8gZGUgcHJlZGljY2nDs24gY29uIG51ZXZvcyBkYXRvcwpkYXRvc19udWV2b3MgPC0gZGF0YS5mcmFtZShUb3RhbFJlc2VydmVzID0gMTAwMCwgVG90YWxSZWNvdmVyeSA9IDIwMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIEluZGVtbml0eVBhaWQgPSA1MDAsIE90aGVyUGFpZCA9IDMwMCwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIEhvc3BpdGFsID0gMTUwKQpwcmVkaWNjaW9uZXMgPC0gcHJlZGljdChyZWdyZXNpb24sIGRhdG9zX251ZXZvcykKcHJpbnQocHJlZGljY2lvbmVzKQoKYGBgCgo=