Importar base de datos

base_de_datos <- read.csv("C:\\Users\\gamas\\Downloads\\seguros.csv")

Entender la base de datos

resumen <- summary(base_de_datos)
resumen
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  777632   Min.   :      0   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  800748   1st Qu.:     83   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  812128   Median :    271   Median :      0   Median :     0.00  
##  Mean   : 1864676   Mean   :  10404   Mean   :   3368   Mean   :    66.05  
##  3rd Qu.:  824726   3rd Qu.:   1122   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203364   Max.   :4527291   Max.   :1529053   Max.   :100000.00  
##                                                                            
##  IndemnityPaid      OtherPaid       TotalIncurredCost ClaimStatus       
##  Min.   :     0   Min.   :      0   Min.   : -10400   Length:31619      
##  1st Qu.:     0   1st Qu.:     80   1st Qu.:     80   Class :character  
##  Median :     0   Median :    265   Median :    266   Mode  :character  
##  Mean   :  4977   Mean   :   5427   Mean   :  13706                     
##  3rd Qu.:     0   3rd Qu.:   1023   3rd Qu.:   1098                     
##  Max.   :640732   Max.   :4129915   Max.   :4734750                     
##                                                                         
##  IncidentDate       IncidentDescription ReturnToWorkDate   ClaimantOpenedDate
##  Length:31619       Length:31619        Length:31619       Length:31619      
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:31619       Length:31619             Length:31619      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       Transaction_Time Procesing_Time     ClaimantAge_at_DOI
##  Min.   :0.00000   Min.   :    0    Min.   :    0.00   Min.   :14.0      
##  1st Qu.:0.00000   1st Qu.:  211    1st Qu.:    4.00   1st Qu.:33.0      
##  Median :0.00000   Median :  780    Median :   10.00   Median :42.0      
##  Mean   :0.04463   Mean   : 1004    Mean   :   62.99   Mean   :41.6      
##  3rd Qu.:0.00000   3rd Qu.: 1440    3rd Qu.:   24.00   3rd Qu.:50.0      
##  Max.   :1.00000   Max.   :16428    Max.   :11558.00   Max.   :94.0      
##                    NA's   :614                                           
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:31619       Length:31619       Length:31619       Length:31619      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart         AverageWeeklyWage1    ClaimID1        BillReviewALE    
##  Length:31619       Min.   : 100.0     Min.   :  777632   Min.   : -448.0  
##  Class :character   1st Qu.: 492.0     1st Qu.:  800748   1st Qu.:   16.0  
##  Mode  :character   Median : 492.0     Median :  812128   Median :   24.0  
##                     Mean   : 536.5     Mean   : 1864676   Mean   :  188.7  
##                     3rd Qu.: 492.0     3rd Qu.:  824726   3rd Qu.:   64.1  
##                     Max.   :8613.5     Max.   :62203364   Max.   :46055.3  
##                                                           NA's   :14912    
##     Hospital         PhysicianOutpatient       Rx          
##  Min.   : -12570.4   Min.   :   -549.5   Min.   :  -160.7  
##  1st Qu.:    210.5   1st Qu.:    105.8   1st Qu.:    22.9  
##  Median :    613.9   Median :    218.0   Median :    61.5  
##  Mean   :   5113.2   Mean   :   1813.2   Mean   :  1695.2  
##  3rd Qu.:   2349.1   3rd Qu.:    680.6   3rd Qu.:   189.0  
##  Max.   :2759604.0   Max.   :1219766.6   Max.   :631635.5  
##  NA's   :19655       NA's   :2329        NA's   :20730

Generar regresion (modelo lineal)

regresion <- lm(TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature + BodyPartRegion + AverageWeeklyWage1 + BillReviewALE + Hospital + PhysicianOutpatient + Rx, data=base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + 
##     ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature + 
##     BodyPartRegion + AverageWeeklyWage1 + BillReviewALE + Hospital + 
##     PhysicianOutpatient + Rx, data = base_de_datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -454179   -3153      24    2285 1063665 
## 
## Coefficients:
##                                                      Estimate Std. Error
## (Intercept)                                         1.701e+04  6.025e+03
## ClaimStatusO                                        4.490e+05  1.171e+04
## ClaimStatusR                                        3.190e+05  1.305e+04
## IsDenied                                           -8.474e+03  4.366e+03
## Procesing_Time                                      4.647e+01  4.818e+00
## ClaimantAge_at_DOI                                 -2.612e+01  6.833e+01
## GenderMale                                          3.149e+02  1.538e+03
## GenderNot Available                                 2.031e+03  1.205e+04
## ClaimantTypeMedical Only                           -2.477e+04  2.017e+03
## ClaimantTypeReport Only                            -2.431e+04  4.829e+03
## InjuryNatureAsbestosis                              6.366e+03  3.063e+04
## InjuryNatureBurn                                    1.883e+02  7.904e+03
## InjuryNatureCarpal Tunnel Syndrome                 -3.840e+03  1.039e+04
## InjuryNatureConcussion                              2.100e+04  1.679e+04
## InjuryNatureContagious Disease                      5.091e+03  1.674e+04
## InjuryNatureContusion                               3.604e+03  4.430e+03
## InjuryNatureCrushing                                3.074e+03  1.492e+04
## InjuryNatureDermatitis                              4.036e+03  7.050e+03
## InjuryNatureDislocation                            -3.692e+04  1.423e+04
## InjuryNatureDust Disease, NOC                       7.068e+02  4.304e+04
## InjuryNatureElectric Shock                          5.065e+03  2.512e+04
## InjuryNatureForeign Body                            4.459e+03  6.199e+03
## InjuryNatureFracture                                4.540e+03  6.567e+03
## InjuryNatureHearing Loss Or Impairment             -8.563e+02  2.521e+04
## InjuryNatureHeat Prostration                       -1.972e+03  2.513e+04
## InjuryNatureHernia                                 -9.356e+03  2.192e+04
## InjuryNatureInfection                              -2.409e+03  1.674e+04
## InjuryNatureInflammation                            4.660e+03  9.310e+03
## InjuryNatureLaceration                              3.311e+03  5.143e+03
## InjuryNatureLoss of Hearing                        -1.943e+04  2.528e+04
## InjuryNatureMental Stress                          -1.419e+04  1.978e+04
## InjuryNatureMultiple Physical Injuries Only         1.472e+04  7.683e+03
## InjuryNatureMyocardial Infarction                  -1.661e+04  3.091e+04
## InjuryNatureNo Physical Injury                      3.799e+03  8.442e+03
## InjuryNatureNon-Standard Code                      -1.644e+03  2.187e+04
## InjuryNaturePoisoning?Chemical (Other Than Metals)  4.469e+01  1.979e+04
## InjuryNaturePuncture                                4.340e+03  5.646e+03
## InjuryNatureRespiratory Disorders                  -2.827e+03  8.525e+03
## InjuryNatureRupture                                -2.115e+04  3.065e+04
## InjuryNatureSeverance                               8.548e+03  3.071e+04
## InjuryNatureSprain                                 -5.494e+03  5.390e+03
## InjuryNatureStrain                                  4.182e+03  4.543e+03
## InjuryNatureSyncope                                 3.808e+03  1.968e+04
## BodyPartRegionLower Extremities                     4.185e+03  3.199e+03
## BodyPartRegionMultiple Body Parts                   6.375e+02  3.339e+03
## BodyPartRegionNeck                                  1.189e+02  5.586e+03
## BodyPartRegionNon-Standard Code                    -1.555e+03  2.331e+04
## BodyPartRegionTrunk                                 4.658e+03  3.423e+03
## BodyPartRegionUpper Extremities                     9.662e+02  2.994e+03
## AverageWeeklyWage1                                  4.569e+00  3.241e+00
## BillReviewALE                                       3.181e-02  6.144e-01
## Hospital                                           -7.270e-04  2.014e-02
## PhysicianOutpatient                                -1.361e-02  5.445e-02
## Rx                                                  7.366e-03  4.369e-02
##                                                    t value Pr(>|t|)    
## (Intercept)                                          2.822  0.00480 ** 
## ClaimStatusO                                        38.348  < 2e-16 ***
## ClaimStatusR                                        24.432  < 2e-16 ***
## IsDenied                                            -1.941  0.05236 .  
## Procesing_Time                                       9.645  < 2e-16 ***
## ClaimantAge_at_DOI                                  -0.382  0.70227    
## GenderMale                                           0.205  0.83777    
## GenderNot Available                                  0.169  0.86613    
## ClaimantTypeMedical Only                           -12.280  < 2e-16 ***
## ClaimantTypeReport Only                             -5.034 5.07e-07 ***
## InjuryNatureAsbestosis                               0.208  0.83536    
## InjuryNatureBurn                                     0.024  0.98100    
## InjuryNatureCarpal Tunnel Syndrome                  -0.369  0.71180    
## InjuryNatureConcussion                               1.251  0.21111    
## InjuryNatureContagious Disease                       0.304  0.76103    
## InjuryNatureContusion                                0.814  0.41599    
## InjuryNatureCrushing                                 0.206  0.83672    
## InjuryNatureDermatitis                               0.572  0.56707    
## InjuryNatureDislocation                             -2.594  0.00953 ** 
## InjuryNatureDust Disease, NOC                        0.016  0.98690    
## InjuryNatureElectric Shock                           0.202  0.84025    
## InjuryNatureForeign Body                             0.719  0.47198    
## InjuryNatureFracture                                 0.691  0.48939    
## InjuryNatureHearing Loss Or Impairment              -0.034  0.97290    
## InjuryNatureHeat Prostration                        -0.078  0.93744    
## InjuryNatureHernia                                  -0.427  0.66957    
## InjuryNatureInfection                               -0.144  0.88562    
## InjuryNatureInflammation                             0.501  0.61674    
## InjuryNatureLaceration                               0.644  0.51980    
## InjuryNatureLoss of Hearing                         -0.768  0.44229    
## InjuryNatureMental Stress                           -0.717  0.47332    
## InjuryNatureMultiple Physical Injuries Only          1.915  0.05554 .  
## InjuryNatureMyocardial Infarction                   -0.537  0.59102    
## InjuryNatureNo Physical Injury                       0.450  0.65276    
## InjuryNatureNon-Standard Code                       -0.075  0.94008    
## InjuryNaturePoisoning?Chemical (Other Than Metals)   0.002  0.99820    
## InjuryNaturePuncture                                 0.769  0.44208    
## InjuryNatureRespiratory Disorders                   -0.332  0.74022    
## InjuryNatureRupture                                 -0.690  0.49038    
## InjuryNatureSeverance                                0.278  0.78075    
## InjuryNatureSprain                                  -1.019  0.30811    
## InjuryNatureStrain                                   0.921  0.35736    
## InjuryNatureSyncope                                  0.193  0.84659    
## BodyPartRegionLower Extremities                      1.308  0.19081    
## BodyPartRegionMultiple Body Parts                    0.191  0.84862    
## BodyPartRegionNeck                                   0.021  0.98302    
## BodyPartRegionNon-Standard Code                     -0.067  0.94682    
## BodyPartRegionTrunk                                  1.360  0.17377    
## BodyPartRegionUpper Extremities                      0.323  0.74697    
## AverageWeeklyWage1                                   1.410  0.15871    
## BillReviewALE                                        0.052  0.95871    
## Hospital                                            -0.036  0.97120    
## PhysicianOutpatient                                 -0.250  0.80267    
## Rx                                                   0.169  0.86612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42770 on 3191 degrees of freedom
##   (28374 observations deleted due to missingness)
## Multiple R-squared:  0.6116, Adjusted R-squared:  0.6051 
## F-statistic:  94.8 on 53 and 3191 DF,  p-value: < 2.2e-16

Evaluar, y en caso necesario, ajustar la regresion

regresion <- lm(TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + Gender + ClaimantType + Rx, data=base_de_datos)
summary(regresion)
## 
## Call:
## lm(formula = TotalIncurredCost ~ ClaimStatus + IsDenied + Procesing_Time + 
##     Gender + ClaimantType + Rx, data = base_de_datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -613611   -1298     332     925 2635418 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               2.171e+04  1.347e+03  16.118  < 2e-16 ***
## ClaimStatusO              4.357e+05  6.002e+03  72.589  < 2e-16 ***
## ClaimStatusR              2.543e+05  7.006e+03  36.296  < 2e-16 ***
## IsDenied                 -9.765e+03  2.724e+03  -3.585 0.000339 ***
## Procesing_Time            3.809e+01  2.242e+00  16.991  < 2e-16 ***
## GenderMale                6.080e+02  1.097e+03   0.554 0.579469    
## GenderNot Available      -6.497e+03  9.556e+03  -0.680 0.496640    
## ClaimantTypeMedical Only -2.263e+04  1.366e+03 -16.562  < 2e-16 ***
## ClaimantTypeReport Only  -2.315e+04  3.028e+03  -7.648 2.22e-14 ***
## Rx                        1.172e-02  4.309e-02   0.272 0.785670    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56980 on 10879 degrees of freedom
##   (20730 observations deleted due to missingness)
## Multiple R-squared:  0.525,  Adjusted R-squared:  0.5246 
## F-statistic:  1336 on 9 and 10879 DF,  p-value: < 2.2e-16

Construir un modelo de prediccion

datos_nuevos <- data.frame(Procesing_Time=12, ClaimStatus="C", ClaimantType="Indemnity", IsDenied=0, Gender="Female", Rx=154.97)
predict(regresion,datos_nuevos)
##        1 
## 22172.82

Conclusiones

El modelo de regresión es muy sencillo de utilizar y es posible conocer qué tan significativo es el modelo por medio del análisis de R cuadrada.

LS0tDQp0aXRsZTogIlNlZ3Vyb3MiDQphdXRob3I6ICJHYW1hbGllbCBPc3RvcyAtIEEwMTI3NzAyMyINCmRhdGU6ICIyMDI0LTA4LTIyIg0Kb3V0cHV0OiANCiBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogdHJ1ZQ0KICAgIHRvY19mbG9hdDogdHJ1ZQ0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUgDQotLS0NCg0KIVtdKEM6XFxVc2Vyc1xcZ2FtYXNcXFBpY3R1cmVzXFxzZWd1cm9zLmdpZikNCg0KIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+SW1wb3J0YXIgYmFzZSBkZSBkYXRvczwvc3Bhbj4NCmBgYHtyfQ0KYmFzZV9kZV9kYXRvcyA8LSByZWFkLmNzdigiQzpcXFVzZXJzXFxnYW1hc1xcRG93bmxvYWRzXFxzZWd1cm9zLmNzdiIpDQpgYGANCg0KIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+RW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvczwvc3Bhbj4NCmBgYHtyfQ0KcmVzdW1lbiA8LSBzdW1tYXJ5KGJhc2VfZGVfZGF0b3MpDQpyZXN1bWVuDQpgYGANCg0KIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+R2VuZXJhciByZWdyZXNpb24gKG1vZGVsbyBsaW5lYWwpPC9zcGFuPg0KYGBge3J9DQpyZWdyZXNpb24gPC0gbG0oVG90YWxJbmN1cnJlZENvc3QgfiBDbGFpbVN0YXR1cyArIElzRGVuaWVkICsgUHJvY2VzaW5nX1RpbWUgKyBDbGFpbWFudEFnZV9hdF9ET0kgKyBHZW5kZXIgKyBDbGFpbWFudFR5cGUgKyBJbmp1cnlOYXR1cmUgKyBCb2R5UGFydFJlZ2lvbiArIEF2ZXJhZ2VXZWVrbHlXYWdlMSArIEJpbGxSZXZpZXdBTEUgKyBIb3NwaXRhbCArIFBoeXNpY2lhbk91dHBhdGllbnQgKyBSeCwgZGF0YT1iYXNlX2RlX2RhdG9zKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCg0KIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+RXZhbHVhciwgeSBlbiBjYXNvIG5lY2VzYXJpbywgYWp1c3RhciBsYSByZWdyZXNpb248L3NwYW4+DQpgYGB7cn0NCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbEluY3VycmVkQ29zdCB+IENsYWltU3RhdHVzICsgSXNEZW5pZWQgKyBQcm9jZXNpbmdfVGltZSArIEdlbmRlciArIENsYWltYW50VHlwZSArIFJ4LCBkYXRhPWJhc2VfZGVfZGF0b3MpDQpzdW1tYXJ5KHJlZ3Jlc2lvbikNCmBgYA0KDQojIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij5Db25zdHJ1aXIgdW4gbW9kZWxvIGRlIHByZWRpY2Npb248L3NwYW4+DQpgYGB7cn0NCmRhdG9zX251ZXZvcyA8LSBkYXRhLmZyYW1lKFByb2Nlc2luZ19UaW1lPTEyLCBDbGFpbVN0YXR1cz0iQyIsIENsYWltYW50VHlwZT0iSW5kZW1uaXR5IiwgSXNEZW5pZWQ9MCwgR2VuZGVyPSJGZW1hbGUiLCBSeD0xNTQuOTcpDQpwcmVkaWN0KHJlZ3Jlc2lvbixkYXRvc19udWV2b3MpDQpgYGANCg0KIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+Q29uY2x1c2lvbmVzPC9zcGFuPg0KRWwgbW9kZWxvIGRlIHJlZ3Jlc2nDs24gZXMgbXV5IHNlbmNpbGxvIGRlIHV0aWxpemFyIHkgZXMgcG9zaWJsZSBjb25vY2VyIHF1w6kgdGFuIHNpZ25pZmljYXRpdm8gZXMgZWwgbW9kZWxvIHBvciBtZWRpbyBkZWwgYW7DoWxpc2lzIGRlIFIgY3VhZHJhZGEu