library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ stringr 1.4.0
## ✔ tidyr   1.2.0     ✔ forcats 0.5.1
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)

getwd()

Importar Base de Datos

#file.choose()
seguros <- read_csv("/Users/danieltrevino/Documents/Quinto Semestre TEC/Bootcamp de Programación/seguros.csv")
## Rows: 31619 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): ClaimStatus, IncidentDate, IncidentDescription, ReturnToWorkDate, ...
## dbl (17): ClaimID, TotalPaid, TotalReserves, TotalRecovery, IndemnityPaid, O...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Entender Base de Datos

resumen <- summary(seguros)
resumen
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  777632   Min.   :      0   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  800748   1st Qu.:     83   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  812128   Median :    271   Median :      0   Median :     0.00  
##  Mean   : 1864676   Mean   :  10404   Mean   :   3368   Mean   :    66.05  
##  3rd Qu.:  824726   3rd Qu.:   1122   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203364   Max.   :4527291   Max.   :1529053   Max.   :100000.00  
##                                                                            
##  IndemnityPaid      OtherPaid       TotalIncurredCost ClaimStatus       
##  Min.   :     0   Min.   :      0   Min.   : -10400   Length:31619      
##  1st Qu.:     0   1st Qu.:     80   1st Qu.:     80   Class :character  
##  Median :     0   Median :    265   Median :    266   Mode  :character  
##  Mean   :  4977   Mean   :   5427   Mean   :  13706                     
##  3rd Qu.:     0   3rd Qu.:   1023   3rd Qu.:   1098                     
##  Max.   :640732   Max.   :4129915   Max.   :4734750                     
##                                                                         
##  IncidentDate       IncidentDescription ReturnToWorkDate   ClaimantOpenedDate
##  Length:31619       Length:31619        Length:31619       Length:31619      
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:31619       Length:31619             Length:31619      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       Transaction_Time Procesing_Time     ClaimantAge_at_DOI
##  Min.   :0.00000   Min.   :    0    Min.   :    0.00   Min.   :14.0      
##  1st Qu.:0.00000   1st Qu.:  211    1st Qu.:    4.00   1st Qu.:33.0      
##  Median :0.00000   Median :  780    Median :   10.00   Median :42.0      
##  Mean   :0.04463   Mean   : 1004    Mean   :   62.99   Mean   :41.6      
##  3rd Qu.:0.00000   3rd Qu.: 1440    3rd Qu.:   24.00   3rd Qu.:50.0      
##  Max.   :1.00000   Max.   :16428    Max.   :11558.00   Max.   :94.0      
##                    NA's   :614                                           
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:31619       Length:31619       Length:31619       Length:31619      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart         AverageWeeklyWage1    ClaimID1        BillReviewALE    
##  Length:31619       Min.   : 100.0     Min.   :  777632   Min.   : -448.0  
##  Class :character   1st Qu.: 492.0     1st Qu.:  800748   1st Qu.:   16.0  
##  Mode  :character   Median : 492.0     Median :  812128   Median :   24.0  
##                     Mean   : 536.5     Mean   : 1864676   Mean   :  188.7  
##                     3rd Qu.: 492.0     3rd Qu.:  824726   3rd Qu.:   64.1  
##                     Max.   :8613.5     Max.   :62203364   Max.   :46055.3  
##                                                           NA's   :14912    
##     Hospital         PhysicianOutpatient       Rx          
##  Min.   : -12570.4   Min.   :   -549.5   Min.   :  -160.7  
##  1st Qu.:    210.5   1st Qu.:    105.8   1st Qu.:    22.9  
##  Median :    613.9   Median :    218.0   Median :    61.5  
##  Mean   :   5113.2   Mean   :   1813.2   Mean   :  1695.2  
##  3rd Qu.:   2349.1   3rd Qu.:    680.6   3rd Qu.:   189.0  
##  Max.   :2759604.0   Max.   :1219766.6   Max.   :631635.5  
##  NA's   :19655       NA's   :2329        NA's   :20730
plot(seguros$Procesing_Time , seguros$TotalIncurredCost , main = "Influencia del tiempo de proceso en el gasto total del reclamo", xlab = " Tiempo de proceso", ylab = "Gasto por reclamo")

Generar regresión (modelo lineal)

regresion <- lm(TotalIncurredCost ~ ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature +  BodyPartRegion, data =  seguros)
summary(regresion)
## 
## Call:
## lm(formula = TotalIncurredCost ~ ClaimantAge_at_DOI + Gender + 
##     ClaimantType + InjuryNature + BodyPartRegion, data = seguros)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -177721   -9387    -507    4058 4675138 
## 
## Coefficients:
##                                                                          Estimate
## (Intercept)                                                              -9243.57
## ClaimantAge_at_DOI                                                         184.64
## GenderMale                                                                1482.94
## GenderNot Available                                                      68781.81
## ClaimantTypeMedical Only                                                -57672.76
## ClaimantTypeReport Only                                                 -51749.47
## InjuryNatureAll Other Specific Injuries, Noc                             61382.92
## InjuryNatureAmputation                                                   43083.33
## InjuryNatureAngina Pectoris                                             -13366.72
## InjuryNatureAsbestosis                                                   59636.03
## InjuryNatureAsphyxiation                                                 48841.51
## InjuryNatureBlack Lung                                                   47796.94
## InjuryNatureBurn                                                         62135.08
## InjuryNatureCancer                                                       65309.26
## InjuryNatureCarpal Tunnel Syndrome                                       44720.42
## InjuryNatureConcussion                                                   99874.71
## InjuryNatureContagious Disease                                           76065.95
## InjuryNatureContusion                                                    58841.13
## InjuryNatureCrushing                                                     59202.96
## InjuryNatureDermatitis                                                   56523.81
## InjuryNatureDislocation                                                  46941.34
## InjuryNatureDust Disease, NOC                                            48084.12
## InjuryNatureElectric Shock                                               68432.89
## InjuryNatureForeign Body                                                 57047.07
## InjuryNatureFracture                                                     53193.95
## InjuryNatureFreezing                                                     56150.37
## InjuryNatureHearing Loss Or Impairment                                   46400.16
## InjuryNatureHeat Prostration                                             46692.30
## InjuryNatureHernia                                                       14895.51
## InjuryNatureInfection                                                    50549.31
## InjuryNatureInflammation                                                 66189.42
## InjuryNatureLaceration                                                   57768.32
## InjuryNatureLoss of Hearing                                              44103.86
## InjuryNatureMental Disorder                                              45970.68
## InjuryNatureMental Stress                                                59827.85
## InjuryNatureMultiple Injuries Including Both Physical and Psychological  44909.60
## InjuryNatureMultiple Physical Injuries Only                              66545.07
## InjuryNatureMyocardial Infarction                                        25844.86
## InjuryNatureNo Physical Injury                                           50345.71
## InjuryNatureNon-Standard Code                                            45282.65
## InjuryNatureNot Available                                                55162.24
## InjuryNaturePoisoning?Chemical (Other Than Metals)                       61123.14
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)              53185.04
## InjuryNaturePuncture                                                     57737.12
## InjuryNatureRadiation                                                    61148.54
## InjuryNatureRespiratory Disorders                                        44778.83
## InjuryNatureRupture                                                      70663.32
## InjuryNatureSeverance                                                    90327.74
## InjuryNatureSprain                                                       57408.69
## InjuryNatureStrain                                                       55726.80
## InjuryNatureSyncope                                                      48646.50
## InjuryNatureVascular                                                      8474.36
## InjuryNatureVDT-Related Disease                                           6970.06
## InjuryNatureVision Loss                                                  58859.46
## BodyPartRegionLower Extremities                                          -1211.71
## BodyPartRegionMultiple Body Parts                                         3942.18
## BodyPartRegionNeck                                                        6740.15
## BodyPartRegionNon-Standard Code                                         -13078.91
## BodyPartRegionTrunk                                                      10744.80
## BodyPartRegionUpper Extremities                                          -2847.20
##                                                                         Std. Error
## (Intercept)                                                               80367.51
## ClaimantAge_at_DOI                                                           40.15
## GenderMale                                                                  924.64
## GenderNot Available                                                        8431.63
## ClaimantTypeMedical Only                                                   1138.83
## ClaimantTypeReport Only                                                    2602.56
## InjuryNatureAll Other Specific Injuries, Noc                              80363.42
## InjuryNatureAmputation                                                    85208.11
## InjuryNatureAngina Pectoris                                               98393.08
## InjuryNatureAsbestosis                                                    82060.13
## InjuryNatureAsphyxiation                                                 113611.24
## InjuryNatureBlack Lung                                                   113611.36
## InjuryNatureBurn                                                          80457.92
## InjuryNatureCancer                                                        89823.29
## InjuryNatureCarpal Tunnel Syndrome                                        80518.82
## InjuryNatureConcussion                                                    80722.43
## InjuryNatureContagious Disease                                            80880.03
## InjuryNatureContusion                                                     80341.88
## InjuryNatureCrushing                                                      80612.27
## InjuryNatureDermatitis                                                    80430.56
## InjuryNatureDislocation                                                   80594.98
## InjuryNatureDust Disease, NOC                                             92772.21
## InjuryNatureElectric Shock                                                81357.04
## InjuryNatureForeign Body                                                  80392.39
## InjuryNatureFracture                                                      80381.92
## InjuryNatureFreezing                                                      92763.12
## InjuryNatureHearing Loss Or Impairment                                    82334.64
## InjuryNatureHeat Prostration                                              81279.43
## InjuryNatureHernia                                                        80976.09
## InjuryNatureInfection                                                     80733.48
## InjuryNatureInflammation                                                  80455.47
## InjuryNatureLaceration                                                    80359.22
## InjuryNatureLoss of Hearing                                               82823.69
## InjuryNatureMental Disorder                                               87993.39
## InjuryNatureMental Stress                                                 81139.40
## InjuryNatureMultiple Injuries Including Both Physical and Psychological   82962.75
## InjuryNatureMultiple Physical Injuries Only                               80401.30
## InjuryNatureMyocardial Infarction                                         83375.86
## InjuryNatureNo Physical Injury                                            80449.48
## InjuryNatureNon-Standard Code                                             80979.46
## InjuryNatureNot Available                                                113637.58
## InjuryNaturePoisoning?Chemical (Other Than Metals)                        81012.63
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)               82807.69
## InjuryNaturePuncture                                                      80369.74
## InjuryNatureRadiation                                                     83159.02
## InjuryNatureRespiratory Disorders                                         80489.66
## InjuryNatureRupture                                                       81359.25
## InjuryNatureSeverance                                                     83363.74
## InjuryNatureSprain                                                        80356.68
## InjuryNatureStrain                                                        80343.56
## InjuryNatureSyncope                                                       81162.19
## InjuryNatureVascular                                                     113607.62
## InjuryNatureVDT-Related Disease                                          113606.99
## InjuryNatureVision Loss                                                   83633.67
## BodyPartRegionLower Extremities                                            1939.44
## BodyPartRegionMultiple Body Parts                                          2072.33
## BodyPartRegionNeck                                                         3068.76
## BodyPartRegionNon-Standard Code                                           10840.41
## BodyPartRegionTrunk                                                        2088.57
## BodyPartRegionUpper Extremities                                            1830.23
##                                                                         t value
## (Intercept)                                                              -0.115
## ClaimantAge_at_DOI                                                        4.599
## GenderMale                                                                1.604
## GenderNot Available                                                       8.158
## ClaimantTypeMedical Only                                                -50.642
## ClaimantTypeReport Only                                                 -19.884
## InjuryNatureAll Other Specific Injuries, Noc                              0.764
## InjuryNatureAmputation                                                    0.506
## InjuryNatureAngina Pectoris                                              -0.136
## InjuryNatureAsbestosis                                                    0.727
## InjuryNatureAsphyxiation                                                  0.430
## InjuryNatureBlack Lung                                                    0.421
## InjuryNatureBurn                                                          0.772
## InjuryNatureCancer                                                        0.727
## InjuryNatureCarpal Tunnel Syndrome                                        0.555
## InjuryNatureConcussion                                                    1.237
## InjuryNatureContagious Disease                                            0.940
## InjuryNatureContusion                                                     0.732
## InjuryNatureCrushing                                                      0.734
## InjuryNatureDermatitis                                                    0.703
## InjuryNatureDislocation                                                   0.582
## InjuryNatureDust Disease, NOC                                             0.518
## InjuryNatureElectric Shock                                                0.841
## InjuryNatureForeign Body                                                  0.710
## InjuryNatureFracture                                                      0.662
## InjuryNatureFreezing                                                      0.605
## InjuryNatureHearing Loss Or Impairment                                    0.564
## InjuryNatureHeat Prostration                                              0.574
## InjuryNatureHernia                                                        0.184
## InjuryNatureInfection                                                     0.626
## InjuryNatureInflammation                                                  0.823
## InjuryNatureLaceration                                                    0.719
## InjuryNatureLoss of Hearing                                               0.533
## InjuryNatureMental Disorder                                               0.522
## InjuryNatureMental Stress                                                 0.737
## InjuryNatureMultiple Injuries Including Both Physical and Psychological   0.541
## InjuryNatureMultiple Physical Injuries Only                               0.828
## InjuryNatureMyocardial Infarction                                         0.310
## InjuryNatureNo Physical Injury                                            0.626
## InjuryNatureNon-Standard Code                                             0.559
## InjuryNatureNot Available                                                 0.485
## InjuryNaturePoisoning?Chemical (Other Than Metals)                        0.754
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)               0.642
## InjuryNaturePuncture                                                      0.718
## InjuryNatureRadiation                                                     0.735
## InjuryNatureRespiratory Disorders                                         0.556
## InjuryNatureRupture                                                       0.869
## InjuryNatureSeverance                                                     1.084
## InjuryNatureSprain                                                        0.714
## InjuryNatureStrain                                                        0.694
## InjuryNatureSyncope                                                       0.599
## InjuryNatureVascular                                                      0.075
## InjuryNatureVDT-Related Disease                                           0.061
## InjuryNatureVision Loss                                                   0.704
## BodyPartRegionLower Extremities                                          -0.625
## BodyPartRegionMultiple Body Parts                                         1.902
## BodyPartRegionNeck                                                        2.196
## BodyPartRegionNon-Standard Code                                          -1.206
## BodyPartRegionTrunk                                                       5.145
## BodyPartRegionUpper Extremities                                          -1.556
##                                                                         Pr(>|t|)
## (Intercept)                                                               0.9084
## ClaimantAge_at_DOI                                                      4.26e-06
## GenderMale                                                                0.1088
## GenderNot Available                                                     3.54e-16
## ClaimantTypeMedical Only                                                 < 2e-16
## ClaimantTypeReport Only                                                  < 2e-16
## InjuryNatureAll Other Specific Injuries, Noc                              0.4450
## InjuryNatureAmputation                                                    0.6131
## InjuryNatureAngina Pectoris                                               0.8919
## InjuryNatureAsbestosis                                                    0.4674
## InjuryNatureAsphyxiation                                                  0.6673
## InjuryNatureBlack Lung                                                    0.6740
## InjuryNatureBurn                                                          0.4400
## InjuryNatureCancer                                                        0.4672
## InjuryNatureCarpal Tunnel Syndrome                                        0.5786
## InjuryNatureConcussion                                                    0.2160
## InjuryNatureContagious Disease                                            0.3470
## InjuryNatureContusion                                                     0.4639
## InjuryNatureCrushing                                                      0.4627
## InjuryNatureDermatitis                                                    0.4822
## InjuryNatureDislocation                                                   0.5603
## InjuryNatureDust Disease, NOC                                             0.6043
## InjuryNatureElectric Shock                                                0.4003
## InjuryNatureForeign Body                                                  0.4780
## InjuryNatureFracture                                                      0.5081
## InjuryNatureFreezing                                                      0.5450
## InjuryNatureHearing Loss Or Impairment                                    0.5731
## InjuryNatureHeat Prostration                                              0.5657
## InjuryNatureHernia                                                        0.8541
## InjuryNatureInfection                                                     0.5312
## InjuryNatureInflammation                                                  0.4107
## InjuryNatureLaceration                                                    0.4722
## InjuryNatureLoss of Hearing                                               0.5944
## InjuryNatureMental Disorder                                               0.6014
## InjuryNatureMental Stress                                                 0.4609
## InjuryNatureMultiple Injuries Including Both Physical and Psychological   0.5883
## InjuryNatureMultiple Physical Injuries Only                               0.4079
## InjuryNatureMyocardial Infarction                                         0.7566
## InjuryNatureNo Physical Injury                                            0.5314
## InjuryNatureNon-Standard Code                                             0.5760
## InjuryNatureNot Available                                                 0.6274
## InjuryNaturePoisoning?Chemical (Other Than Metals)                        0.4506
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)               0.5207
## InjuryNaturePuncture                                                      0.4725
## InjuryNatureRadiation                                                     0.4621
## InjuryNatureRespiratory Disorders                                         0.5780
## InjuryNatureRupture                                                       0.3851
## InjuryNatureSeverance                                                     0.2786
## InjuryNatureSprain                                                        0.4750
## InjuryNatureStrain                                                        0.4879
## InjuryNatureSyncope                                                       0.5489
## InjuryNatureVascular                                                      0.9405
## InjuryNatureVDT-Related Disease                                           0.9511
## InjuryNatureVision Loss                                                   0.4816
## BodyPartRegionLower Extremities                                           0.5321
## BodyPartRegionMultiple Body Parts                                         0.0571
## BodyPartRegionNeck                                                        0.0281
## BodyPartRegionNon-Standard Code                                           0.2276
## BodyPartRegionTrunk                                                     2.70e-07
## BodyPartRegionUpper Extremities                                           0.1198
##                                                                            
## (Intercept)                                                                
## ClaimantAge_at_DOI                                                      ***
## GenderMale                                                                 
## GenderNot Available                                                     ***
## ClaimantTypeMedical Only                                                ***
## ClaimantTypeReport Only                                                 ***
## InjuryNatureAll Other Specific Injuries, Noc                               
## InjuryNatureAmputation                                                     
## InjuryNatureAngina Pectoris                                                
## InjuryNatureAsbestosis                                                     
## InjuryNatureAsphyxiation                                                   
## InjuryNatureBlack Lung                                                     
## InjuryNatureBurn                                                           
## InjuryNatureCancer                                                         
## InjuryNatureCarpal Tunnel Syndrome                                         
## InjuryNatureConcussion                                                     
## InjuryNatureContagious Disease                                             
## InjuryNatureContusion                                                      
## InjuryNatureCrushing                                                       
## InjuryNatureDermatitis                                                     
## InjuryNatureDislocation                                                    
## InjuryNatureDust Disease, NOC                                              
## InjuryNatureElectric Shock                                                 
## InjuryNatureForeign Body                                                   
## InjuryNatureFracture                                                       
## InjuryNatureFreezing                                                       
## InjuryNatureHearing Loss Or Impairment                                     
## InjuryNatureHeat Prostration                                               
## InjuryNatureHernia                                                         
## InjuryNatureInfection                                                      
## InjuryNatureInflammation                                                   
## InjuryNatureLaceration                                                     
## InjuryNatureLoss of Hearing                                                
## InjuryNatureMental Disorder                                                
## InjuryNatureMental Stress                                                  
## InjuryNatureMultiple Injuries Including Both Physical and Psychological    
## InjuryNatureMultiple Physical Injuries Only                                
## InjuryNatureMyocardial Infarction                                          
## InjuryNatureNo Physical Injury                                             
## InjuryNatureNon-Standard Code                                              
## InjuryNatureNot Available                                                  
## InjuryNaturePoisoning?Chemical (Other Than Metals)                         
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)                
## InjuryNaturePuncture                                                       
## InjuryNatureRadiation                                                      
## InjuryNatureRespiratory Disorders                                          
## InjuryNatureRupture                                                        
## InjuryNatureSeverance                                                      
## InjuryNatureSprain                                                         
## InjuryNatureStrain                                                         
## InjuryNatureSyncope                                                        
## InjuryNatureVascular                                                       
## InjuryNatureVDT-Related Disease                                            
## InjuryNatureVision Loss                                                    
## BodyPartRegionLower Extremities                                            
## BodyPartRegionMultiple Body Parts                                       .  
## BodyPartRegionNeck                                                      *  
## BodyPartRegionNon-Standard Code                                            
## BodyPartRegionTrunk                                                     ***
## BodyPartRegionUpper Extremities                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 80320 on 31559 degrees of freedom
## Multiple R-squared:  0.09318,    Adjusted R-squared:  0.09148 
## F-statistic: 54.96 on 59 and 31559 DF,  p-value: < 2.2e-16
# tres * = mucha relacion con lo que queremos predecir
# Adjusted R-squared: Cuantas veces predice las rentas el modelo

Modelo predictivo

datos_nuevos <- data.frame(ClaimantAge_at_DOI = 40, Gender = "Male", ClaimantType = "Medical Only", InjuryNature ="Contusion", BodyPartRegion = "Multiple Body Parts")
predict(regresion, datos_nuevos)
##        1 
## 4735.462
# Este tipo de reclamo costaria 4735.462 

Conclusiones

En esta actividad, observamos como realizar un modelo de regresión lineal y a hacer predicciones apartir de este modelo. En este caso, cuanto costaria un reclamo en un seguro segun el tipo de lesion, edad del reclamador y otras variable de la base de datos.