Teoría

lm() es la función de R para ajustar modelos lineales. Es el modelo estadistico más basico que existe y más facil de interpretar. Para interpretarlo se usa la medida de R-cuadrada, que signifcia que tan cerca están los datos de la linea de regresión ajustada (Va de 0 a 1, donde 1 es que el modelo explica toda la variabilidad)

Modelo Predictivo

#Modelo Predictivo

# Importar base de datos
base_de_datos <- read.csv("Desktop/seguros.csv")
#View(base_de_datos)

# Entender la base de datos
resumen <- summary(base_de_datos)
resumen
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  777632   Min.   :      0   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  800748   1st Qu.:     83   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  812128   Median :    271   Median :      0   Median :     0.00  
##  Mean   : 1864676   Mean   :  10404   Mean   :   3368   Mean   :    66.05  
##  3rd Qu.:  824726   3rd Qu.:   1122   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203364   Max.   :4527291   Max.   :1529053   Max.   :100000.00  
##                                                                            
##  IndemnityPaid      OtherPaid       TotalIncurredCost ClaimStatus       
##  Min.   :     0   Min.   :      0   Min.   : -10400   Length:31619      
##  1st Qu.:     0   1st Qu.:     80   1st Qu.:     80   Class :character  
##  Median :     0   Median :    265   Median :    266   Mode  :character  
##  Mean   :  4977   Mean   :   5427   Mean   :  13706                     
##  3rd Qu.:     0   3rd Qu.:   1023   3rd Qu.:   1098                     
##  Max.   :640732   Max.   :4129915   Max.   :4734750                     
##                                                                         
##  IncidentDate       IncidentDescription ReturnToWorkDate   ClaimantOpenedDate
##  Length:31619       Length:31619        Length:31619       Length:31619      
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:31619       Length:31619             Length:31619      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       Transaction_Time Procesing_Time     ClaimantAge_at_DOI
##  Min.   :0.00000   Min.   :    0    Min.   :    0.00   Min.   :14.0      
##  1st Qu.:0.00000   1st Qu.:  211    1st Qu.:    4.00   1st Qu.:33.0      
##  Median :0.00000   Median :  780    Median :   10.00   Median :42.0      
##  Mean   :0.04463   Mean   : 1004    Mean   :   62.99   Mean   :41.6      
##  3rd Qu.:0.00000   3rd Qu.: 1440    3rd Qu.:   24.00   3rd Qu.:50.0      
##  Max.   :1.00000   Max.   :16428    Max.   :11558.00   Max.   :94.0      
##                    NA's   :614                                           
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:31619       Length:31619       Length:31619       Length:31619      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart         AverageWeeklyWage1    ClaimID1        BillReviewALE    
##  Length:31619       Min.   : 100.0     Min.   :  777632   Min.   : -448.0  
##  Class :character   1st Qu.: 492.0     1st Qu.:  800748   1st Qu.:   16.0  
##  Mode  :character   Median : 492.0     Median :  812128   Median :   24.0  
##                     Mean   : 536.5     Mean   : 1864676   Mean   :  188.7  
##                     3rd Qu.: 492.0     3rd Qu.:  824726   3rd Qu.:   64.1  
##                     Max.   :8613.5     Max.   :62203364   Max.   :46055.3  
##                                                           NA's   :14912    
##     Hospital         PhysicianOutpatient       Rx          
##  Min.   : -12570.4   Min.   :   -549.5   Min.   :  -160.7  
##  1st Qu.:    210.5   1st Qu.:    105.8   1st Qu.:    22.9  
##  Median :    613.9   Median :    218.0   Median :    61.5  
##  Mean   :   5113.2   Mean   :   1813.2   Mean   :  1695.2  
##  3rd Qu.:   2349.1   3rd Qu.:    680.6   3rd Qu.:   189.0  
##  Max.   :2759604.0   Max.   :1219766.6   Max.   :631635.5  
##  NA's   :19655       NA's   :2329        NA's   :20730

Modelo Lineal

# Generar regresion (modelo lineal)
regresion <- lm(TotalIncurredCost ~ TotalPaid +TotalReserves + TotalRecovery + IndemnityPaid + OtherPaid + Transaction_Time + Procesing_Time + AverageWeeklyWage1, data=base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalIncurredCost ~ TotalPaid + TotalReserves + 
##     TotalRecovery + IndemnityPaid + OtherPaid + Transaction_Time + 
##     Procesing_Time + AverageWeeklyWage1, data = base_de_datos)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.974e-07 -3.000e-11 -1.000e-11  1.000e-11  9.110e-09 
## 
## Coefficients: (2 not defined because of singularities)
##                      Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)        -5.281e-10  4.723e-11 -1.118e+01   <2e-16 ***
## TotalPaid           1.000e+00  1.248e-15  8.014e+14   <2e-16 ***
## TotalReserves              NA         NA         NA       NA    
## TotalRecovery      -1.000e+00  1.547e-14 -6.464e+13   <2e-16 ***
## IndemnityPaid       1.336e-15  2.254e-15  5.930e-01    0.553    
## OtherPaid                  NA         NA         NA       NA    
## Transaction_Time    2.212e-14  1.678e-14  1.318e+00    0.188    
## Procesing_Time     -1.098e-12  8.847e-14 -1.241e+01   <2e-16 ***
## AverageWeeklyWage1 -4.563e-14  7.368e-14 -6.190e-01    0.536    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.834e-09 on 30998 degrees of freedom
##   (614 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.999e+29 on 6 and 30998 DF,  p-value: < 2.2e-16

Evaluar, y en caso necesario, ajustar la regresion

# Evaluar, y en caso necesario, ajustar la regresion 
regresion <- lm(TotalIncurredCost ~ TotalPaid + TotalRecovery + IndemnityPaid + AverageWeeklyWage1, data=base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalIncurredCost ~ TotalPaid + TotalRecovery + 
##     IndemnityPaid + AverageWeeklyWage1, data = base_de_datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -888407     597     800     851 1303964 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        230.420508 449.635356   0.512  0.60833    
## TotalPaid            1.204439   0.005196 231.805  < 2e-16 ***
## TotalRecovery       -0.250959   0.131256  -1.912  0.05589 .  
## IndemnityPaid        0.430447   0.011306  38.073  < 2e-16 ***
## AverageWeeklyWage1  -2.201917   0.778035  -2.830  0.00466 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30500 on 31614 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.869 
## F-statistic: 5.244e+04 on 4 and 31614 DF,  p-value: < 2.2e-16

Construir un modelo de predicción

# Construir un modelo de prediccion
datos_nuevos <- data.frame(TotalPaid= 10404, TotalRecovery= 66.05, IndemnityPaid= 4977, AverageWeeklyWage1= 536.5)
predict(regresion,datos_nuevos)
##        1 
## 13705.84

Conclusión

El modelo de regresión lineal explica el 39% del comportamiento de las rentas de bicicletas. Este modelo. se puede mejorar ageregando otras variables que impacten el negocio.

LS0tCnRpdGxlOiAiUmVncmVzaW9uIExpbmVhbCAtIFNlZ3Vyb3MiCmF1dGhvcjogIlJlbmUgT2xlYSIKZGF0ZTogIjIwMjQtMDgtMTkiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogVFJVRQogICAgdG9jX2Zsb2F0OiBUUlVFCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFCiAgICB0aGVtZTogInNwYWNlbGFiIgogICAgaGlnaGxpZ2h0OiAia2F0ZSIKICAgICAgICAgIAotLS0KCiMjIDxzcGFuIHN0eWxlPSJjb2xvcjogZ3JheTsiPlRlb3LDrWE8L3NwYW4+CioqbG0oKSoqIGVzIGxhIGZ1bmNpw7NuIGRlIFIgcGFyYSBhanVzdGFyIG1vZGVsb3MgbGluZWFsZXMuCkVzIGVsIG1vZGVsbyBlc3RhZGlzdGljbyBtw6FzIGJhc2ljbyBxdWUgZXhpc3RlIHkgbcOhcyBmYWNpbCBkZSBpbnRlcnByZXRhci4KUGFyYSBpbnRlcnByZXRhcmxvIHNlIHVzYSBsYSBtZWRpZGEgZGUgUi1jdWFkcmFkYSwgcXVlIHNpZ25pZmNpYSBxdWUgdGFuIGNlcmNhIGVzdMOhbiBsb3MgZGF0b3MgZGUgbGEgbGluZWEgZGUgcmVncmVzacOzbiBhanVzdGFkYSAKKFZhIGRlIDAgYSAxLCBkb25kZSAxIGVzIHF1ZSBlbCBtb2RlbG8gZXhwbGljYSB0b2RhIGxhIHZhcmlhYmlsaWRhZCkKCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IGdyYXk7Ij5Nb2RlbG8gUHJlZGljdGl2bzwvc3Bhbj4KYGBge3J9CiNNb2RlbG8gUHJlZGljdGl2bwoKIyBJbXBvcnRhciBiYXNlIGRlIGRhdG9zCmJhc2VfZGVfZGF0b3MgPC0gcmVhZC5jc3YoIkRlc2t0b3Avc2VndXJvcy5jc3YiKQojVmlldyhiYXNlX2RlX2RhdG9zKQoKIyBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zCnJlc3VtZW4gPC0gc3VtbWFyeShiYXNlX2RlX2RhdG9zKQpyZXN1bWVuCgpgYGAKCiMjIDxzcGFuIHN0eWxlPSJjb2xvcjogZ3JheTsiPk1vZGVsbyBMaW5lYWw8L3NwYW4+CmBgYHtyfQojIEdlbmVyYXIgcmVncmVzaW9uIChtb2RlbG8gbGluZWFsKQpyZWdyZXNpb24gPC0gbG0oVG90YWxJbmN1cnJlZENvc3QgfiBUb3RhbFBhaWQgK1RvdGFsUmVzZXJ2ZXMgKyBUb3RhbFJlY292ZXJ5ICsgSW5kZW1uaXR5UGFpZCArIE90aGVyUGFpZCArIFRyYW5zYWN0aW9uX1RpbWUgKyBQcm9jZXNpbmdfVGltZSArIEF2ZXJhZ2VXZWVrbHlXYWdlMSwgZGF0YT1iYXNlX2RlX2RhdG9zKQpzdW1tYXJ5KHJlZ3Jlc2lvbikKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IGdyYXk7Ij5FdmFsdWFyLCB5IGVuIGNhc28gbmVjZXNhcmlvLCBhanVzdGFyIGxhIHJlZ3Jlc2lvbiA8L3NwYW4+CmBgYHtyfQojIEV2YWx1YXIsIHkgZW4gY2FzbyBuZWNlc2FyaW8sIGFqdXN0YXIgbGEgcmVncmVzaW9uIApyZWdyZXNpb24gPC0gbG0oVG90YWxJbmN1cnJlZENvc3QgfiBUb3RhbFBhaWQgKyBUb3RhbFJlY292ZXJ5ICsgSW5kZW1uaXR5UGFpZCArIEF2ZXJhZ2VXZWVrbHlXYWdlMSwgZGF0YT1iYXNlX2RlX2RhdG9zKQpzdW1tYXJ5KHJlZ3Jlc2lvbikKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IGdyYXk7Ij5Db25zdHJ1aXIgdW4gbW9kZWxvIGRlIHByZWRpY2Npw7NuPC9zcGFuPgpgYGB7cn0KIyBDb25zdHJ1aXIgdW4gbW9kZWxvIGRlIHByZWRpY2Npb24KZGF0b3NfbnVldm9zIDwtIGRhdGEuZnJhbWUoVG90YWxQYWlkPSAxMDQwNCwgVG90YWxSZWNvdmVyeT0gNjYuMDUsIEluZGVtbml0eVBhaWQ9IDQ5NzcsIEF2ZXJhZ2VXZWVrbHlXYWdlMT0gNTM2LjUpCnByZWRpY3QocmVncmVzaW9uLGRhdG9zX251ZXZvcykKCmBgYAoKCiMjIDxzcGFuIHN0eWxlPSJjb2xvcjogZ3JheTsiPkNvbmNsdXNpw7NuPC9zcGFuPgoKRWwgbW9kZWxvIGRlIHJlZ3Jlc2nDs24gbGluZWFsIGV4cGxpY2EgZWwgMzklIGRlbCBjb21wb3J0YW1pZW50byBkZSBsYXMgcmVudGFzIGRlIGJpY2ljbGV0YXMuIEVzdGUgbW9kZWxvLiBzZSBwdWVkZSBtZWpvcmFyIGFnZXJlZ2FuZG8gb3RyYXMgdmFyaWFibGVzIHF1ZSBpbXBhY3RlbiBlbCBuZWdvY2lvLiAK