library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ stringr 1.4.0
## ✔ tidyr 1.2.0 ✔ forcats 0.5.1
## ✔ readr 2.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
getwd()
#file.choose()
seguros <- read_csv("/Users/danieltrevino/Documents/Quinto Semestre TEC/Bootcamp de Programación/seguros.csv")
## Rows: 31619 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): ClaimStatus, IncidentDate, IncidentDescription, ReturnToWorkDate, ...
## dbl (17): ClaimID, TotalPaid, TotalReserves, TotalRecovery, IndemnityPaid, O...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
resumen <- summary(seguros)
resumen
## ClaimID TotalPaid TotalReserves TotalRecovery
## Min. : 777632 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 800748 1st Qu.: 83 1st Qu.: 0 1st Qu.: 0.00
## Median : 812128 Median : 271 Median : 0 Median : 0.00
## Mean : 1864676 Mean : 10404 Mean : 3368 Mean : 66.05
## 3rd Qu.: 824726 3rd Qu.: 1122 3rd Qu.: 0 3rd Qu.: 0.00
## Max. :62203364 Max. :4527291 Max. :1529053 Max. :100000.00
##
## IndemnityPaid OtherPaid TotalIncurredCost ClaimStatus
## Min. : 0 Min. : 0 Min. : -10400 Length:31619
## 1st Qu.: 0 1st Qu.: 80 1st Qu.: 80 Class :character
## Median : 0 Median : 265 Median : 266 Mode :character
## Mean : 4977 Mean : 5427 Mean : 13706
## 3rd Qu.: 0 3rd Qu.: 1023 3rd Qu.: 1098
## Max. :640732 Max. :4129915 Max. :4734750
##
## IncidentDate IncidentDescription ReturnToWorkDate ClaimantOpenedDate
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## ClaimantClosedDate EmployerNotificationDate ReceivedDate
## Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## IsDenied Transaction_Time Procesing_Time ClaimantAge_at_DOI
## Min. :0.00000 Min. : 0 Min. : 0.00 Min. :14.0
## 1st Qu.:0.00000 1st Qu.: 211 1st Qu.: 4.00 1st Qu.:33.0
## Median :0.00000 Median : 780 Median : 10.00 Median :42.0
## Mean :0.04463 Mean : 1004 Mean : 62.99 Mean :41.6
## 3rd Qu.:0.00000 3rd Qu.: 1440 3rd Qu.: 24.00 3rd Qu.:50.0
## Max. :1.00000 Max. :16428 Max. :11558.00 Max. :94.0
## NA's :614
## Gender ClaimantType InjuryNature BodyPartRegion
## Length:31619 Length:31619 Length:31619 Length:31619
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## BodyPart AverageWeeklyWage1 ClaimID1 BillReviewALE
## Length:31619 Min. : 100.0 Min. : 777632 Min. : -448.0
## Class :character 1st Qu.: 492.0 1st Qu.: 800748 1st Qu.: 16.0
## Mode :character Median : 492.0 Median : 812128 Median : 24.0
## Mean : 536.5 Mean : 1864676 Mean : 188.7
## 3rd Qu.: 492.0 3rd Qu.: 824726 3rd Qu.: 64.1
## Max. :8613.5 Max. :62203364 Max. :46055.3
## NA's :14912
## Hospital PhysicianOutpatient Rx
## Min. : -12570.4 Min. : -549.5 Min. : -160.7
## 1st Qu.: 210.5 1st Qu.: 105.8 1st Qu.: 22.9
## Median : 613.9 Median : 218.0 Median : 61.5
## Mean : 5113.2 Mean : 1813.2 Mean : 1695.2
## 3rd Qu.: 2349.1 3rd Qu.: 680.6 3rd Qu.: 189.0
## Max. :2759604.0 Max. :1219766.6 Max. :631635.5
## NA's :19655 NA's :2329 NA's :20730
plot(seguros$Procesing_Time , seguros$TotalIncurredCost , main = "Influencia del tiempo de proceso en el gasto total del reclamo", xlab = " Tiempo de proceso", ylab = "Gasto por reclamo")
regresion <- lm(TotalIncurredCost ~ ClaimantAge_at_DOI + Gender + ClaimantType + InjuryNature + BodyPartRegion, data = seguros)
summary(regresion)
##
## Call:
## lm(formula = TotalIncurredCost ~ ClaimantAge_at_DOI + Gender +
## ClaimantType + InjuryNature + BodyPartRegion, data = seguros)
##
## Residuals:
## Min 1Q Median 3Q Max
## -177721 -9387 -507 4058 4675138
##
## Coefficients:
## Estimate
## (Intercept) -9243.57
## ClaimantAge_at_DOI 184.64
## GenderMale 1482.94
## GenderNot Available 68781.81
## ClaimantTypeMedical Only -57672.76
## ClaimantTypeReport Only -51749.47
## InjuryNatureAll Other Specific Injuries, Noc 61382.92
## InjuryNatureAmputation 43083.33
## InjuryNatureAngina Pectoris -13366.72
## InjuryNatureAsbestosis 59636.03
## InjuryNatureAsphyxiation 48841.51
## InjuryNatureBlack Lung 47796.94
## InjuryNatureBurn 62135.08
## InjuryNatureCancer 65309.26
## InjuryNatureCarpal Tunnel Syndrome 44720.42
## InjuryNatureConcussion 99874.71
## InjuryNatureContagious Disease 76065.95
## InjuryNatureContusion 58841.13
## InjuryNatureCrushing 59202.96
## InjuryNatureDermatitis 56523.81
## InjuryNatureDislocation 46941.34
## InjuryNatureDust Disease, NOC 48084.12
## InjuryNatureElectric Shock 68432.89
## InjuryNatureForeign Body 57047.07
## InjuryNatureFracture 53193.95
## InjuryNatureFreezing 56150.37
## InjuryNatureHearing Loss Or Impairment 46400.16
## InjuryNatureHeat Prostration 46692.30
## InjuryNatureHernia 14895.51
## InjuryNatureInfection 50549.31
## InjuryNatureInflammation 66189.42
## InjuryNatureLaceration 57768.32
## InjuryNatureLoss of Hearing 44103.86
## InjuryNatureMental Disorder 45970.68
## InjuryNatureMental Stress 59827.85
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 44909.60
## InjuryNatureMultiple Physical Injuries Only 66545.07
## InjuryNatureMyocardial Infarction 25844.86
## InjuryNatureNo Physical Injury 50345.71
## InjuryNatureNon-Standard Code 45282.65
## InjuryNatureNot Available 55162.24
## InjuryNaturePoisoning?Chemical (Other Than Metals) 61123.14
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 53185.04
## InjuryNaturePuncture 57737.12
## InjuryNatureRadiation 61148.54
## InjuryNatureRespiratory Disorders 44778.83
## InjuryNatureRupture 70663.32
## InjuryNatureSeverance 90327.74
## InjuryNatureSprain 57408.69
## InjuryNatureStrain 55726.80
## InjuryNatureSyncope 48646.50
## InjuryNatureVascular 8474.36
## InjuryNatureVDT-Related Disease 6970.06
## InjuryNatureVision Loss 58859.46
## BodyPartRegionLower Extremities -1211.71
## BodyPartRegionMultiple Body Parts 3942.18
## BodyPartRegionNeck 6740.15
## BodyPartRegionNon-Standard Code -13078.91
## BodyPartRegionTrunk 10744.80
## BodyPartRegionUpper Extremities -2847.20
## Std. Error
## (Intercept) 80367.51
## ClaimantAge_at_DOI 40.15
## GenderMale 924.64
## GenderNot Available 8431.63
## ClaimantTypeMedical Only 1138.83
## ClaimantTypeReport Only 2602.56
## InjuryNatureAll Other Specific Injuries, Noc 80363.42
## InjuryNatureAmputation 85208.11
## InjuryNatureAngina Pectoris 98393.08
## InjuryNatureAsbestosis 82060.13
## InjuryNatureAsphyxiation 113611.24
## InjuryNatureBlack Lung 113611.36
## InjuryNatureBurn 80457.92
## InjuryNatureCancer 89823.29
## InjuryNatureCarpal Tunnel Syndrome 80518.82
## InjuryNatureConcussion 80722.43
## InjuryNatureContagious Disease 80880.03
## InjuryNatureContusion 80341.88
## InjuryNatureCrushing 80612.27
## InjuryNatureDermatitis 80430.56
## InjuryNatureDislocation 80594.98
## InjuryNatureDust Disease, NOC 92772.21
## InjuryNatureElectric Shock 81357.04
## InjuryNatureForeign Body 80392.39
## InjuryNatureFracture 80381.92
## InjuryNatureFreezing 92763.12
## InjuryNatureHearing Loss Or Impairment 82334.64
## InjuryNatureHeat Prostration 81279.43
## InjuryNatureHernia 80976.09
## InjuryNatureInfection 80733.48
## InjuryNatureInflammation 80455.47
## InjuryNatureLaceration 80359.22
## InjuryNatureLoss of Hearing 82823.69
## InjuryNatureMental Disorder 87993.39
## InjuryNatureMental Stress 81139.40
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 82962.75
## InjuryNatureMultiple Physical Injuries Only 80401.30
## InjuryNatureMyocardial Infarction 83375.86
## InjuryNatureNo Physical Injury 80449.48
## InjuryNatureNon-Standard Code 80979.46
## InjuryNatureNot Available 113637.58
## InjuryNaturePoisoning?Chemical (Other Than Metals) 81012.63
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 82807.69
## InjuryNaturePuncture 80369.74
## InjuryNatureRadiation 83159.02
## InjuryNatureRespiratory Disorders 80489.66
## InjuryNatureRupture 81359.25
## InjuryNatureSeverance 83363.74
## InjuryNatureSprain 80356.68
## InjuryNatureStrain 80343.56
## InjuryNatureSyncope 81162.19
## InjuryNatureVascular 113607.62
## InjuryNatureVDT-Related Disease 113606.99
## InjuryNatureVision Loss 83633.67
## BodyPartRegionLower Extremities 1939.44
## BodyPartRegionMultiple Body Parts 2072.33
## BodyPartRegionNeck 3068.76
## BodyPartRegionNon-Standard Code 10840.41
## BodyPartRegionTrunk 2088.57
## BodyPartRegionUpper Extremities 1830.23
## t value
## (Intercept) -0.115
## ClaimantAge_at_DOI 4.599
## GenderMale 1.604
## GenderNot Available 8.158
## ClaimantTypeMedical Only -50.642
## ClaimantTypeReport Only -19.884
## InjuryNatureAll Other Specific Injuries, Noc 0.764
## InjuryNatureAmputation 0.506
## InjuryNatureAngina Pectoris -0.136
## InjuryNatureAsbestosis 0.727
## InjuryNatureAsphyxiation 0.430
## InjuryNatureBlack Lung 0.421
## InjuryNatureBurn 0.772
## InjuryNatureCancer 0.727
## InjuryNatureCarpal Tunnel Syndrome 0.555
## InjuryNatureConcussion 1.237
## InjuryNatureContagious Disease 0.940
## InjuryNatureContusion 0.732
## InjuryNatureCrushing 0.734
## InjuryNatureDermatitis 0.703
## InjuryNatureDislocation 0.582
## InjuryNatureDust Disease, NOC 0.518
## InjuryNatureElectric Shock 0.841
## InjuryNatureForeign Body 0.710
## InjuryNatureFracture 0.662
## InjuryNatureFreezing 0.605
## InjuryNatureHearing Loss Or Impairment 0.564
## InjuryNatureHeat Prostration 0.574
## InjuryNatureHernia 0.184
## InjuryNatureInfection 0.626
## InjuryNatureInflammation 0.823
## InjuryNatureLaceration 0.719
## InjuryNatureLoss of Hearing 0.533
## InjuryNatureMental Disorder 0.522
## InjuryNatureMental Stress 0.737
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 0.541
## InjuryNatureMultiple Physical Injuries Only 0.828
## InjuryNatureMyocardial Infarction 0.310
## InjuryNatureNo Physical Injury 0.626
## InjuryNatureNon-Standard Code 0.559
## InjuryNatureNot Available 0.485
## InjuryNaturePoisoning?Chemical (Other Than Metals) 0.754
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 0.642
## InjuryNaturePuncture 0.718
## InjuryNatureRadiation 0.735
## InjuryNatureRespiratory Disorders 0.556
## InjuryNatureRupture 0.869
## InjuryNatureSeverance 1.084
## InjuryNatureSprain 0.714
## InjuryNatureStrain 0.694
## InjuryNatureSyncope 0.599
## InjuryNatureVascular 0.075
## InjuryNatureVDT-Related Disease 0.061
## InjuryNatureVision Loss 0.704
## BodyPartRegionLower Extremities -0.625
## BodyPartRegionMultiple Body Parts 1.902
## BodyPartRegionNeck 2.196
## BodyPartRegionNon-Standard Code -1.206
## BodyPartRegionTrunk 5.145
## BodyPartRegionUpper Extremities -1.556
## Pr(>|t|)
## (Intercept) 0.9084
## ClaimantAge_at_DOI 4.26e-06
## GenderMale 0.1088
## GenderNot Available 3.54e-16
## ClaimantTypeMedical Only < 2e-16
## ClaimantTypeReport Only < 2e-16
## InjuryNatureAll Other Specific Injuries, Noc 0.4450
## InjuryNatureAmputation 0.6131
## InjuryNatureAngina Pectoris 0.8919
## InjuryNatureAsbestosis 0.4674
## InjuryNatureAsphyxiation 0.6673
## InjuryNatureBlack Lung 0.6740
## InjuryNatureBurn 0.4400
## InjuryNatureCancer 0.4672
## InjuryNatureCarpal Tunnel Syndrome 0.5786
## InjuryNatureConcussion 0.2160
## InjuryNatureContagious Disease 0.3470
## InjuryNatureContusion 0.4639
## InjuryNatureCrushing 0.4627
## InjuryNatureDermatitis 0.4822
## InjuryNatureDislocation 0.5603
## InjuryNatureDust Disease, NOC 0.6043
## InjuryNatureElectric Shock 0.4003
## InjuryNatureForeign Body 0.4780
## InjuryNatureFracture 0.5081
## InjuryNatureFreezing 0.5450
## InjuryNatureHearing Loss Or Impairment 0.5731
## InjuryNatureHeat Prostration 0.5657
## InjuryNatureHernia 0.8541
## InjuryNatureInfection 0.5312
## InjuryNatureInflammation 0.4107
## InjuryNatureLaceration 0.4722
## InjuryNatureLoss of Hearing 0.5944
## InjuryNatureMental Disorder 0.6014
## InjuryNatureMental Stress 0.4609
## InjuryNatureMultiple Injuries Including Both Physical and Psychological 0.5883
## InjuryNatureMultiple Physical Injuries Only 0.4079
## InjuryNatureMyocardial Infarction 0.7566
## InjuryNatureNo Physical Injury 0.5314
## InjuryNatureNon-Standard Code 0.5760
## InjuryNatureNot Available 0.6274
## InjuryNaturePoisoning?Chemical (Other Than Metals) 0.4506
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury) 0.5207
## InjuryNaturePuncture 0.4725
## InjuryNatureRadiation 0.4621
## InjuryNatureRespiratory Disorders 0.5780
## InjuryNatureRupture 0.3851
## InjuryNatureSeverance 0.2786
## InjuryNatureSprain 0.4750
## InjuryNatureStrain 0.4879
## InjuryNatureSyncope 0.5489
## InjuryNatureVascular 0.9405
## InjuryNatureVDT-Related Disease 0.9511
## InjuryNatureVision Loss 0.4816
## BodyPartRegionLower Extremities 0.5321
## BodyPartRegionMultiple Body Parts 0.0571
## BodyPartRegionNeck 0.0281
## BodyPartRegionNon-Standard Code 0.2276
## BodyPartRegionTrunk 2.70e-07
## BodyPartRegionUpper Extremities 0.1198
##
## (Intercept)
## ClaimantAge_at_DOI ***
## GenderMale
## GenderNot Available ***
## ClaimantTypeMedical Only ***
## ClaimantTypeReport Only ***
## InjuryNatureAll Other Specific Injuries, Noc
## InjuryNatureAmputation
## InjuryNatureAngina Pectoris
## InjuryNatureAsbestosis
## InjuryNatureAsphyxiation
## InjuryNatureBlack Lung
## InjuryNatureBurn
## InjuryNatureCancer
## InjuryNatureCarpal Tunnel Syndrome
## InjuryNatureConcussion
## InjuryNatureContagious Disease
## InjuryNatureContusion
## InjuryNatureCrushing
## InjuryNatureDermatitis
## InjuryNatureDislocation
## InjuryNatureDust Disease, NOC
## InjuryNatureElectric Shock
## InjuryNatureForeign Body
## InjuryNatureFracture
## InjuryNatureFreezing
## InjuryNatureHearing Loss Or Impairment
## InjuryNatureHeat Prostration
## InjuryNatureHernia
## InjuryNatureInfection
## InjuryNatureInflammation
## InjuryNatureLaceration
## InjuryNatureLoss of Hearing
## InjuryNatureMental Disorder
## InjuryNatureMental Stress
## InjuryNatureMultiple Injuries Including Both Physical and Psychological
## InjuryNatureMultiple Physical Injuries Only
## InjuryNatureMyocardial Infarction
## InjuryNatureNo Physical Injury
## InjuryNatureNon-Standard Code
## InjuryNatureNot Available
## InjuryNaturePoisoning?Chemical (Other Than Metals)
## InjuryNaturePoisoning?General (NOT OD or Cumulative Injury)
## InjuryNaturePuncture
## InjuryNatureRadiation
## InjuryNatureRespiratory Disorders
## InjuryNatureRupture
## InjuryNatureSeverance
## InjuryNatureSprain
## InjuryNatureStrain
## InjuryNatureSyncope
## InjuryNatureVascular
## InjuryNatureVDT-Related Disease
## InjuryNatureVision Loss
## BodyPartRegionLower Extremities
## BodyPartRegionMultiple Body Parts .
## BodyPartRegionNeck *
## BodyPartRegionNon-Standard Code
## BodyPartRegionTrunk ***
## BodyPartRegionUpper Extremities
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 80320 on 31559 degrees of freedom
## Multiple R-squared: 0.09318, Adjusted R-squared: 0.09148
## F-statistic: 54.96 on 59 and 31559 DF, p-value: < 2.2e-16
# tres * = mucha relacion con lo que queremos predecir
# Adjusted R-squared: Cuantas veces predice las rentas el modelo
datos_nuevos <- data.frame(ClaimantAge_at_DOI = 40, Gender = "Male", ClaimantType = "Medical Only", InjuryNature ="Contusion", BodyPartRegion = "Multiple Body Parts")
predict(regresion, datos_nuevos)
## 1
## 4735.462
# Este tipo de reclamo costaria 4735.462
En esta actividad, observamos como realizar un modelo de regresión lineal y a hacer predicciones apartir de este modelo. En este caso, cuanto costaria un reclamo en un seguro segun el tipo de lesion, edad del reclamador y otras variable de la base de datos.