library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.3     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 1. Importar la base de datos 
#file.choose()
df <- read.csv("/Users/alejandromontano/Desktop/Claims_limpia.csv")
## 2. Entender la base de datos
summary(df)
##     ClaimID           TotalPaid       TotalReserves     TotalRecovery      
##  Min.   :  650915   Min.   :   -270   Min.   :      0   Min.   :     0.00  
##  1st Qu.:  811125   1st Qu.:     60   1st Qu.:      0   1st Qu.:     0.00  
##  Median :  844626   Median :    235   Median :      0   Median :     0.00  
##  Mean   :10149151   Mean   :   6746   Mean   :   2233   Mean   :    68.88  
##  3rd Qu.:22716506   3rd Qu.:    938   3rd Qu.:      0   3rd Qu.:     0.00  
##  Max.   :62203891   Max.   :4527291   Max.   :2069575   Max.   :130541.03  
##                                                                            
##  IndemnityPaid      OtherPaid       ClaimStatus        IncidentDate      
##  Min.   :  -475   Min.   :  -7820   Length:134004      Length:134004     
##  1st Qu.:     0   1st Qu.:     58   Class :character   Class :character  
##  Median :     0   Median :    230   Mode  :character   Mode  :character  
##  Mean   :  3061   Mean   :   3685                                        
##  3rd Qu.:     0   3rd Qu.:    855                                        
##  Max.   :640732   Max.   :4129915                                        
##                                                                          
##  IncidentDescription AverageWeeklyWage  ReceivedDate          IsDenied      
##  Length:134004       Length:134004      Length:134004      Min.   :0.00000  
##  Class :character    Class :character   Class :character   1st Qu.:0.00000  
##  Mode  :character    Mode  :character   Mode  :character   Median :0.00000  
##                                                            Mean   :0.04474  
##                                                            3rd Qu.:0.00000  
##                                                            Max.   :1.00000  
##                                                                             
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:134004      Length:134004      Length:134004      Length:134004     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart             semana      ClaimantClosedDate2 ClaimantOpenedDate2
##  Length:134004      Min.   : 0.00   Min.   : 1.00       Min.   : 1.00      
##  Class :character   1st Qu.:13.00   1st Qu.:13.00       1st Qu.:14.00      
##  Mode  :character   Median :25.00   Median :14.00       Median :26.00      
##                     Mean   :25.62   Mean   :21.55       Mean   :25.95      
##                     3rd Qu.:38.00   3rd Qu.:30.00       3rd Qu.:38.00      
##                     Max.   :53.00   Max.   :53.00       Max.   :53.00      
##                     NA's   :58637   NA's   :4678                           
##  EmployerNotificationDate2 ReturnToWorkDate2 Total_Incurred_Cost_Claim
##  Min.   : 1.00             Min.   : 1.00     Min.   : -11775          
##  1st Qu.:14.00             1st Qu.:14.00     1st Qu.:     59          
##  Median :25.00             Median :26.00     Median :    234          
##  Mean   :25.77             Mean   :26.02     Mean   :   8910          
##  3rd Qu.:38.00             3rd Qu.:38.00     3rd Qu.:    965          
##  Max.   :53.00             Max.   :53.00     Max.   :5054823          
##  NA's   :22288             NA's   :58637
df <- df %>%
  filter(complete.cases(ReturnToWorkDate2, ClaimantClosedDate2))

summary(df)
##     ClaimID           TotalPaid        TotalReserves TotalRecovery     
##  Min.   :  650915   Min.   :  -270.3   Min.   :0     Min.   :    0.00  
##  1st Qu.:  820013   1st Qu.:    45.6   1st Qu.:0     1st Qu.:    0.00  
##  Median :  842520   Median :   252.7   Median :0     Median :    0.00  
##  Mean   : 9072170   Mean   :  4232.0   Mean   :0     Mean   :   55.52  
##  3rd Qu.:11042817   3rd Qu.:  1139.4   3rd Qu.:0     3rd Qu.:    0.00  
##  Max.   :62203889   Max.   :808619.3   Max.   :0     Max.   :80049.48  
##                                                                        
##  IndemnityPaid      OtherPaid        ClaimStatus        IncidentDate      
##  Min.   :  -475   Min.   :  -270.3   Length:71934       Length:71934      
##  1st Qu.:     0   1st Qu.:    40.0   Class :character   Class :character  
##  Median :     0   Median :   246.2   Mode  :character   Mode  :character  
##  Mean   :  1801   Mean   :  2431.2                                        
##  3rd Qu.:     0   3rd Qu.:  1020.8                                        
##  Max.   :371920   Max.   :745818.7                                        
##                                                                           
##  IncidentDescription AverageWeeklyWage  ReceivedDate          IsDenied      
##  Length:71934        Length:71934       Length:71934       Min.   :0.00000  
##  Class :character    Class :character   Class :character   1st Qu.:0.00000  
##  Mode  :character    Mode  :character   Mode  :character   Median :0.00000  
##                                                            Mean   :0.04654  
##                                                            3rd Qu.:0.00000  
##                                                            Max.   :1.00000  
##                                                                             
##     Gender          ClaimantType       InjuryNature       BodyPartRegion    
##  Length:71934       Length:71934       Length:71934       Length:71934      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    BodyPart             semana      ClaimantClosedDate2 ClaimantOpenedDate2
##  Length:71934       Min.   : 0.00   Min.   : 1.00       Min.   : 1.00      
##  Class :character   1st Qu.:14.00   1st Qu.:13.00       1st Qu.:14.00      
##  Mode  :character   Median :26.00   Median :15.00       Median :26.00      
##                     Mean   :25.78   Mean   :22.47       Mean   :26.17      
##                     3rd Qu.:38.00   3rd Qu.:35.00       3rd Qu.:38.00      
##                     Max.   :53.00   Max.   :53.00       Max.   :53.00      
##                                                                            
##  EmployerNotificationDate2 ReturnToWorkDate2 Total_Incurred_Cost_Claim
##  Min.   : 1.00             Min.   : 1.00     Min.   :-10681.7         
##  1st Qu.:14.00             1st Qu.:14.00     1st Qu.:    39.7         
##  Median :26.00             Median :26.00     Median :   246.8         
##  Mean   :26.09             Mean   :26.18     Mean   :  4176.5         
##  3rd Qu.:38.00             3rd Qu.:38.00     3rd Qu.:  1100.8         
##  Max.   :53.00             Max.   :53.00     Max.   :806695.1         
##  NA's   :316
df$ClaimantClosedDate2 <- as.numeric(df$ClaimantClosedDate2)
df$ReturnToWorkDate2 <- as.numeric(df$ReturnToWorkDate2)
df$AverageWeeklyWage <- as.numeric(df$AverageWeeklyWage)
## Warning: NAs introduced by coercion
regresion <- lm(Total_Incurred_Cost_Claim ~ ReturnToWorkDate2 + AverageWeeklyWage + ClaimantOpenedDate2 + ClaimantClosedDate2 + IsDenied, data=df)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ReturnToWorkDate2 + 
##     AverageWeeklyWage + ClaimantOpenedDate2 + ClaimantClosedDate2 + 
##     IsDenied, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -19409  -7645  -6707  -2096 738738 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          9.521e+03  3.550e+02  26.818  < 2e-16 ***
## ReturnToWorkDate2    3.045e+00  1.134e+01   0.269   0.7883    
## AverageWeeklyWage    8.016e-04  1.014e-02   0.079   0.9370    
## ClaimantOpenedDate2 -5.054e+01  1.145e+01  -4.414 1.02e-05 ***
## ClaimantClosedDate2 -1.732e+01  8.783e+00  -1.972   0.0486 *  
## IsDenied            -6.063e+03  4.871e+02 -12.448  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25020 on 38585 degrees of freedom
##   (33343 observations deleted due to missingness)
## Multiple R-squared:  0.004913,   Adjusted R-squared:  0.004784 
## F-statistic:  38.1 on 5 and 38585 DF,  p-value: < 2.2e-16
regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimantOpenedDate2 + ClaimantClosedDate2 + IsDenied, data=df)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimantOpenedDate2 + 
##     ClaimantClosedDate2 + IsDenied, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -15310  -4311  -3818  -2681 802647 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4875.974    183.772  26.533  < 2e-16 ***
## ClaimantOpenedDate2   -26.518      4.908  -5.403 6.57e-08 ***
## ClaimantClosedDate2     5.417      4.895   1.107    0.268    
## IsDenied            -2736.445    336.147  -8.141 4.00e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18990 on 71930 degrees of freedom
## Multiple R-squared:  0.001343,   Adjusted R-squared:  0.001301 
## F-statistic: 32.24 on 3 and 71930 DF,  p-value: < 2.2e-16