Contexto

1. Importar y juntar bases de datos

# file.choose()
bd1 <- read.csv("C:\\Users\\ximen\\Downloads\\ClaimsData2018.csv")
bd2 <- read.csv("C:\\Users\\ximen\\Downloads\\TransactionsSummary.csv")
bd <- merge(bd1, bd2, by="ClaimID",all=TRUE)

2. Crear nueva columna para Total Incurred Cost

# install.packages("dplyr")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bd <- bd %>% 
  mutate(Total_Incurred_Cost_Claim = TotalReserves + IndemnityPaid + OtherPaid - TotalRecovery)

3. Filtrar base de datos por sólo mujeres

bd_mujeres1 <- subset(bd, Gender == "Female")
# View(bd_mujeres1)

4. Eliminar columas de X

library(dplyr)
bd_mujeres1 <- bd_mujeres1 %>%
  select(-X:-X.22)
# View(bd_mujeres1)

5. Descargar base de datos limpia como CSV

write.csv(bd_mujeres1, "bd_mujeres limpia.csv", row.names=FALSE)
summary(bd_mujeres1)
##     ClaimID           TotalPaid         TotalReserves     TotalRecovery     
##  Min.   :  650919   Min.   :    -81.8   Min.   :      0   Min.   :    0.00  
##  1st Qu.:  806228   1st Qu.:     20.1   1st Qu.:      0   1st Qu.:    0.00  
##  Median :  833851   Median :    223.0   Median :      0   Median :    0.00  
##  Mean   : 8053898   Mean   :   6504.3   Mean   :   2423   Mean   :   31.13  
##  3rd Qu.: 7143280   3rd Qu.:    932.1   3rd Qu.:      0   3rd Qu.:    0.00  
##  Max.   :62203889   Max.   :2985247.9   Max.   :2069575   Max.   :90357.52  
##                                                                             
##  IndemnityPaid        OtherPaid         ClaimStatus        IncidentDate      
##  Min.   :    -1.2   Min.   :    -81.8   Length:59197       Length:59197      
##  1st Qu.:     0.0   1st Qu.:     16.4   Class :character   Class :character  
##  Median :     0.0   Median :    218.7   Mode  :character   Mode  :character  
##  Mean   :  2945.2   Mean   :   3559.1                                        
##  3rd Qu.:     0.0   3rd Qu.:    857.8                                        
##  Max.   :492934.8   Max.   :2700073.4                                        
##                                                                              
##  IncidentDescription ReturnToWorkDate   AverageWeeklyWage  ClaimantOpenedDate
##  Length:59197        Length:59197       Length:59197       Length:59197      
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:59197       Length:59197             Length:59197      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       ClaimantAge_at_DOI    Gender          ClaimantType      
##  Min.   :0.00000   Length:59197       Length:59197       Length:59197      
##  1st Qu.:0.00000   Class :character   Class :character   Class :character  
##  Median :0.00000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.05688                                                           
##  3rd Qu.:0.00000                                                           
##  Max.   :1.00000                                                           
##                                                                            
##  InjuryNature       BodyPartRegion       BodyPart         BillReviewALE     
##  Length:59197       Length:59197       Length:59197       Min.   : -456.00  
##  Class :character   Class :character   Class :character   1st Qu.:    8.25  
##  Mode  :character   Mode  :character   Mode  :character   Median :   24.00  
##                                                           Mean   :  174.80  
##                                                           3rd Qu.:   64.00  
##                                                           Max.   :20730.77  
##                                                           NA's   :46628     
##     Hospital        PhysicianOutpatient       Rx          
##  Min.   :-12570.4   Min.   :   -162.9   Min.   :  -160.7  
##  1st Qu.:   203.1   1st Qu.:    106.8   1st Qu.:    23.4  
##  Median :   572.9   Median :    220.2   Median :    61.1  
##  Mean   :  4580.8   Mean   :   1700.4   Mean   :  1357.1  
##  3rd Qu.:  2213.5   3rd Qu.:    667.2   3rd Qu.:   176.5  
##  Max.   :667973.0   Max.   :1481468.5   Max.   :380924.3  
##  NA's   :49187      NA's   :34369       NA's   :49906     
##  Total_Incurred_Cost_Claim
##  Min.   :  -2961          
##  1st Qu.:     22          
##  Median :    226          
##  Mean   :   8897          
##  3rd Qu.:    976          
##  Max.   :5054823          
## 

6. Entender la base de datos

summary(bd_mujeres1)
##     ClaimID           TotalPaid         TotalReserves     TotalRecovery     
##  Min.   :  650919   Min.   :    -81.8   Min.   :      0   Min.   :    0.00  
##  1st Qu.:  806228   1st Qu.:     20.1   1st Qu.:      0   1st Qu.:    0.00  
##  Median :  833851   Median :    223.0   Median :      0   Median :    0.00  
##  Mean   : 8053898   Mean   :   6504.3   Mean   :   2423   Mean   :   31.13  
##  3rd Qu.: 7143280   3rd Qu.:    932.1   3rd Qu.:      0   3rd Qu.:    0.00  
##  Max.   :62203889   Max.   :2985247.9   Max.   :2069575   Max.   :90357.52  
##                                                                             
##  IndemnityPaid        OtherPaid         ClaimStatus        IncidentDate      
##  Min.   :    -1.2   Min.   :    -81.8   Length:59197       Length:59197      
##  1st Qu.:     0.0   1st Qu.:     16.4   Class :character   Class :character  
##  Median :     0.0   Median :    218.7   Mode  :character   Mode  :character  
##  Mean   :  2945.2   Mean   :   3559.1                                        
##  3rd Qu.:     0.0   3rd Qu.:    857.8                                        
##  Max.   :492934.8   Max.   :2700073.4                                        
##                                                                              
##  IncidentDescription ReturnToWorkDate   AverageWeeklyWage  ClaimantOpenedDate
##  Length:59197        Length:59197       Length:59197       Length:59197      
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:59197       Length:59197             Length:59197      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       ClaimantAge_at_DOI    Gender          ClaimantType      
##  Min.   :0.00000   Length:59197       Length:59197       Length:59197      
##  1st Qu.:0.00000   Class :character   Class :character   Class :character  
##  Median :0.00000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.05688                                                           
##  3rd Qu.:0.00000                                                           
##  Max.   :1.00000                                                           
##                                                                            
##  InjuryNature       BodyPartRegion       BodyPart         BillReviewALE     
##  Length:59197       Length:59197       Length:59197       Min.   : -456.00  
##  Class :character   Class :character   Class :character   1st Qu.:    8.25  
##  Mode  :character   Mode  :character   Mode  :character   Median :   24.00  
##                                                           Mean   :  174.80  
##                                                           3rd Qu.:   64.00  
##                                                           Max.   :20730.77  
##                                                           NA's   :46628     
##     Hospital        PhysicianOutpatient       Rx          
##  Min.   :-12570.4   Min.   :   -162.9   Min.   :  -160.7  
##  1st Qu.:   203.1   1st Qu.:    106.8   1st Qu.:    23.4  
##  Median :   572.9   Median :    220.2   Median :    61.1  
##  Mean   :  4580.8   Mean   :   1700.4   Mean   :  1357.1  
##  3rd Qu.:  2213.5   3rd Qu.:    667.2   3rd Qu.:   176.5  
##  Max.   :667973.0   Max.   :1481468.5   Max.   :380924.3  
##  NA's   :49187      NA's   :34369       NA's   :49906     
##  Total_Incurred_Cost_Claim
##  Min.   :  -2961          
##  1st Qu.:     22          
##  Median :    226          
##  Mean   :   8897          
##  3rd Qu.:    976          
##  Max.   :5054823          
## 
#count(bd_mujeres1, ClaimStatus, sort= TRUE)
#count(bd_mujeres1, IncidentDate, sort= TRUE)
#count(bd_mujeres1, IncidentDescription, sort= TRUE)
#count(bd_mujeres1, ReturnToWorkDate, sort= TRUE)
#count(bd_mujeres1, AverageWeeklyWage, sort= TRUE)
#count(bd_mujeres1, ClaimantOpenedDate, sort= TRUE)
#count(bd_mujeres1, ClaimantClosedDate, sort= TRUE)
#count(bd_mujeres1, EmployerNotificationDate, sort= TRUE)
#count(bd_mujeres1, ReceivedDate, sort= TRUE)
#count(bd_mujeres1, ClaimantAge_at_DOI, sort= TRUE)
#count(bd_mujeres1, Gender, sort= TRUE)
#count(bd_mujeres1, ClaimantType, sort= TRUE)
#count(bd_mujeres1, InjuryNature, sort= TRUE)
#count(bd_mujeres1, BodyPartRegion, sort= TRUE)
#count(bd_mujeres1, BodyPart, sort= TRUE)

Observaciones:
1. Tenemos muchos “#VALUE!” en la variable de Return to Work Date
2. Muchos NULL en Average Weekly Wage
3. Tenemos muchos “#VALUE!” en la variable de EmployerNotificationDate
4. Muchos NULL en ClaimantAGE_at_DOI

# Extraer las variables de interés
Mujeres <- bd_mujeres1[,c("ClaimStatus","ClaimantType","BodyPartRegion","InjuryNature")]

Regresión Lineal

1. Importar base de datos limpia

#file.choose()
bd_mujereslimpia <- read.csv("C:\\Users\\ximen\\Downloads\\bd_mujeres limpia.csv")

2. Identificar las variables de interés

library(dplyr)
# Tener una base de datos con las columnas necesarias
bd_mujeresrl <- bd_mujereslimpia %>%
  select(ClaimID, ClaimStatus, BodyPartRegion, ClaimantAge_at_DOI, Gender, ClaimantType, ClaimantOpenedDate, ClaimantClosedDate, Total_Incurred_Cost_Claim)
# View(bd_mujeresrl)
summary(bd_mujeresrl)
##     ClaimID         ClaimStatus        BodyPartRegion     ClaimantAge_at_DOI
##  Min.   :  650919   Length:59197       Length:59197       Length:59197      
##  1st Qu.:  806228   Class :character   Class :character   Class :character  
##  Median :  833851   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 8053898                                                           
##  3rd Qu.: 7143280                                                           
##  Max.   :62203889                                                           
##     Gender          ClaimantType       ClaimantOpenedDate ClaimantClosedDate
##  Length:59197       Length:59197       Length:59197       Length:59197      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Total_Incurred_Cost_Claim
##  Min.   :  -2961          
##  1st Qu.:     22          
##  Median :    226          
##  Mean   :   8897          
##  3rd Qu.:    976          
##  Max.   :5054823

3. Conversión de fechas a días

bd_mujeresrl$ClaimantOpenedDate <- as.Date(bd_mujeresrl$ClaimantOpenedDate, format = "%m/%d/%y")
bd_mujeresrl$ClaimantClosedDate <- as.Date(bd_mujeresrl$ClaimantClosedDate, format = "%m/%d/%y")

# Calcular la diferencia en días entre las fechas
bd_mujeresrl$TiempoDeProcesamientoDias <- as.numeric(difftime(bd_mujeresrl$ClaimantClosedDate, bd_mujeresrl$ClaimantOpenedDate, units = "days"))

# Eliminar las columnas originales de fecha
bd_mujeresrl <- bd_mujeresrl[, !(names(bd_mujeresrl) %in% c("ClaimantOpenedDate", "ClaimantClosedDate"))]
# View(bd_mujeresAR)
summary(bd_mujeresrl)
##     ClaimID         ClaimStatus        BodyPartRegion     ClaimantAge_at_DOI
##  Min.   :  650919   Length:59197       Length:59197       Length:59197      
##  1st Qu.:  806228   Class :character   Class :character   Class :character  
##  Median :  833851   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 8053898                                                           
##  3rd Qu.: 7143280                                                           
##  Max.   :62203889                                                           
##                                                                             
##     Gender          ClaimantType       Total_Incurred_Cost_Claim
##  Length:59197       Length:59197       Min.   :  -2961          
##  Class :character   Class :character   1st Qu.:     22          
##  Mode  :character   Mode  :character   Median :    226          
##                                        Mean   :   8897          
##                                        3rd Qu.:    976          
##                                        Max.   :5054823          
##                                                                 
##  TiempoDeProcesamientoDias
##  Min.   :-333.0           
##  1st Qu.:   0.0           
##  Median : 245.0           
##  Mean   : 806.1           
##  3rd Qu.: 920.0           
##  Max.   :6912.0           
##  NA's   :54104

4. Conversión de carácter a números

bd_mujeresrl$ClaimStatus <- factor(bd_mujeresrl$ClaimStatus, levels = c("C", "O", "R"), labels = c(1, 2, 3))
bd_mujeresrl$ClaimantAge_at_DOI <- as.numeric(bd_mujeresrl$ClaimantAge_at_DOI)
## Warning: NAs introducidos por coerción
bd_mujeresrl$Gender <- as.numeric(factor(bd_mujeresrl$Gender, levels = c("Male", "Female", "Not Provided"), labels = c(1, 2, 3)))
bd_mujeresrl$ClaimantType <- as.numeric(factor(bd_mujeresrl$ClaimantType, levels = c("Medical Only", "Indemnity", "Report Only"), labels = c(1, 2, 3)))

5.Eliminar Na’s

bd <- na.omit(bd_mujeresrl)
summary(bd_mujeresrl)
##     ClaimID         ClaimStatus BodyPartRegion     ClaimantAge_at_DOI
##  Min.   :  650919   1:56900     Length:59197       Min.   :-8000.00  
##  1st Qu.:  806228   2: 1786     Class :character   1st Qu.:   33.00  
##  Median :  833851   3:  511     Mode  :character   Median :   43.00  
##  Mean   : 8053898                                  Mean   :   39.75  
##  3rd Qu.: 7143280                                  3rd Qu.:   52.00  
##  Max.   :62203889                                  Max.   :   89.00  
##                                                    NA's   :17097     
##      Gender   ClaimantType   Total_Incurred_Cost_Claim
##  Min.   :2   Min.   :1.000   Min.   :  -2961          
##  1st Qu.:2   1st Qu.:1.000   1st Qu.:     22          
##  Median :2   Median :1.000   Median :    226          
##  Mean   :2   Mean   :1.357   Mean   :   8897          
##  3rd Qu.:2   3rd Qu.:2.000   3rd Qu.:    976          
##  Max.   :2   Max.   :3.000   Max.   :5054823          
##                                                       
##  TiempoDeProcesamientoDias
##  Min.   :-333.0           
##  1st Qu.:   0.0           
##  Median : 245.0           
##  Mean   : 806.1           
##  3rd Qu.: 920.0           
##  Max.   :6912.0           
##  NA's   :54104

5.Eliminar valores negativos

# Eliminar valores negativos en ClaimantAge_at_DOI
bd_mujeresrl <- bd_mujeresrl %>%
  filter(ClaimantAge_at_DOI >= 0)
# Eliminar valores negativos en Total_Incurred_Cost_Claim
bd_mujeresrl <- bd_mujeresrl %>%
  filter(Total_Incurred_Cost_Claim >= 0)
# Eliminar valores negativos en TiempoDeProcesamientoDías
bd_mujeresrl <- bd_mujeresrl %>%
  filter(TiempoDeProcesamientoDias >= 0)

summary(bd_mujeresrl)
##     ClaimID         ClaimStatus BodyPartRegion     ClaimantAge_at_DOI
##  Min.   :  650919   1:4015      Length:4018        Min.   : 1.00     
##  1st Qu.:  823404   2:   3      Class :character   1st Qu.:34.00     
##  Median : 5970814   3:   0      Mode  :character   Median :44.00     
##  Mean   :15622363                                  Mean   :43.14     
##  3rd Qu.:30288888                                  3rd Qu.:52.00     
##  Max.   :61592860                                  Max.   :87.00     
##      Gender   ClaimantType  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :2   Min.   :1.00   Min.   :     0.0          Min.   :   0.0           
##  1st Qu.:2   1st Qu.:1.00   1st Qu.:     0.0          1st Qu.:  33.0           
##  Median :2   Median :1.00   Median :   171.3          Median : 301.0           
##  Mean   :2   Mean   :1.68   Mean   :  4459.3          Mean   : 717.1           
##  3rd Qu.:2   3rd Qu.:2.00   3rd Qu.:  1125.3          3rd Qu.: 844.0           
##  Max.   :2   Max.   :3.00   Max.   :388620.8          Max.   :6912.0

6.Regresión Lineal

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimID + ClaimantAge_at_DOI + BodyPartRegion + TiempoDeProcesamientoDias, 
                data = bd_mujeresrl)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimID + ClaimantAge_at_DOI + 
##     BodyPartRegion + TiempoDeProcesamientoDias, data = bd_mujeresrl)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -23086  -4840  -2286    -14 363016 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -4.842e+03  1.676e+03  -2.889  0.00388 ** 
## ClaimID                           -5.826e-05  2.034e-05  -2.864  0.00421 ** 
## ClaimantAge_at_DOI                 1.269e+02  2.770e+01   4.580 4.78e-06 ***
## BodyPartRegionLower Extremities    3.031e+03  1.231e+03   2.461  0.01389 *  
## BodyPartRegionMultiple Body Parts  1.952e+03  1.403e+03   1.391  0.16417    
## BodyPartRegionNeck                 3.727e+03  1.893e+03   1.969  0.04903 *  
## BodyPartRegionNon-Standard Code   -8.554e+02  2.703e+03  -0.316  0.75166    
## BodyPartRegionNot Available       -3.580e+00  2.056e+04   0.000  0.99986    
## BodyPartRegionTrunk                3.408e+03  1.336e+03   2.551  0.01076 *  
## BodyPartRegionUpper Extremities    1.794e+03  1.170e+03   1.533  0.12547    
## TiempoDeProcesamientoDias          3.543e+00  3.343e-01  10.599  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20530 on 4007 degrees of freedom
## Multiple R-squared:  0.04572,    Adjusted R-squared:  0.04334 
## F-statistic:  19.2 on 10 and 4007 DF,  p-value: < 2.2e-16

7. Ajuste de variables para aumentar R

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + BodyPartRegion + TiempoDeProcesamientoDias, 
                data = bd_mujeresrl)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + 
##     BodyPartRegion + TiempoDeProcesamientoDias, data = bd_mujeresrl)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -25854  -4494  -2357   -250 362038 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -6264.9460  1602.2526  -3.910 9.38e-05 ***
## ClaimantAge_at_DOI                  126.0115    27.7233   4.545 5.65e-06 ***
## BodyPartRegionLower Extremities    3348.0154  1227.5452   2.727  0.00641 ** 
## BodyPartRegionMultiple Body Parts  2561.3990  1387.9538   1.845  0.06505 .  
## BodyPartRegionNeck                 3727.3213  1894.4594   1.967  0.04920 *  
## BodyPartRegionNon-Standard Code   -1174.6409  2702.9742  -0.435  0.66390    
## BodyPartRegionNot Available        -665.6847 20574.4218  -0.032  0.97419    
## BodyPartRegionTrunk                3846.9340  1328.2441   2.896  0.00380 ** 
## BodyPartRegionUpper Extremities    2010.1022  1168.9276   1.720  0.08558 .  
## TiempoDeProcesamientoDias             3.9175     0.3079  12.724  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20550 on 4008 degrees of freedom
## Multiple R-squared:  0.04377,    Adjusted R-squared:  0.04162 
## F-statistic: 20.38 on 9 and 4008 DF,  p-value: < 2.2e-16

8.Modelo Predictivo

datos <- data.frame(ClaimantAge_at_DOI = 43.14, TiempoDeProcesamientoDias = 717.1, BodyPartRegion = "Lower Extremities")
predict(regresion, datos)
##        1 
## 5328.477

9.Regresión Lineal Ajustada

regresion <- lm(TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + ClaimantType + Total_Incurred_Cost_Claim, 
                data = bd_mujeresrl)
summary(regresion)
## 
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + 
##     ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresrl)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2719.3  -569.3  -289.7    48.3  6332.3 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.716e+03  6.655e+01  25.789   <2e-16 ***
## ClaimantAge_at_DOI        -1.229e+01  1.351e+00  -9.099   <2e-16 ***
## ClaimantType              -3.075e+02  1.961e+01 -15.682   <2e-16 ***
## Total_Incurred_Cost_Claim  1.071e-02  7.620e-04  14.061   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1011 on 4014 degrees of freedom
## Multiple R-squared:  0.1133, Adjusted R-squared:  0.1126 
## F-statistic:   171 on 3 and 4014 DF,  p-value: < 2.2e-16

10.Conclusión

Clústers

1.Identificar Outliers

boxplot(bd_mujeresrl$Total_Incurred_Cost_Claim, horizontal = TRUE)

boxplot(bd_mujeresrl$TiempoDeProcesamientoDias, horizontal = TRUE)

2. Eliminar Outliers

# Crear una nueva base de datos con las columnas deseadas
bd_mujerescl <- bd_mujeresrl[, c("ClaimID", "TiempoDeProcesamientoDias", "Total_Incurred_Cost_Claim")]

# Llamar a los renglones como ClaimID
rownames(bd_mujerescl) <- bd_mujerescl$ClaimID
bd_mujerescl <- subset(bd_mujerescl, select = -c(ClaimID))
# View(bd_mujeresCL)

# Columna de TiempoDeProcesamientoDias
IQR_TiempoDeProcesamientoDias <- IQR(bd_mujerescl$TiempoDeProcesamientoDias)
IQR_TiempoDeProcesamientoDias
## [1] 811
summary(bd_mujerescl)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:  33.0            1st Qu.:     0.0         
##  Median : 301.0            Median :   171.3         
##  Mean   : 717.1            Mean   :  4459.3         
##  3rd Qu.: 844.0            3rd Qu.:  1125.3         
##  Max.   :6912.0            Max.   :388620.8
LI_TiempoDeProcesamientoDias <- 33 - 1.5*IQR_TiempoDeProcesamientoDias
LI_TiempoDeProcesamientoDias
## [1] -1183.5
LS_TiempoDeProcesamientoDias <- 844 + 1.5*IQR_TiempoDeProcesamientoDias
LS_TiempoDeProcesamientoDias
## [1] 2060.5
cat("LI_TiempoDeProcesamientoDias:", LI_TiempoDeProcesamientoDias, "\n")
## LI_TiempoDeProcesamientoDias: -1183.5
cat("LS_TiempoDeProcesamientoDias:", LS_TiempoDeProcesamientoDias, "\n")
## LS_TiempoDeProcesamientoDias: 2060.5
bd_mujerescl <- bd_mujerescl[bd_mujerescl$TiempoDeProcesamientoDias <= 2061, ]
### Nota: se redondeó a 2061 porque el LS dió un resultado de 2060.5.

#Columna de Total_Incurred_Cost_Claim
IQR_Total_Incurred_Cost_Claim <- IQR(bd_mujerescl$Total_Incurred_Cost_Claim)
IQR_Total_Incurred_Cost_Claim
## [1] 1155.71
summary(bd_mujerescl)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:   4.0            1st Qu.:     0.0         
##  Median : 235.0            Median :   177.8         
##  Mean   : 398.6            Mean   :  3707.7         
##  3rd Qu.: 548.0            3rd Qu.:  1155.7         
##  Max.   :2058.0            Max.   :246847.9
LI_Total_Incurred_Cost_Claim <- 0 - 1.5*IQR_Total_Incurred_Cost_Claim
LI_Total_Incurred_Cost_Claim
## [1] -1733.565
LS_Total_Incurred_Cost_Claim <- 1125.3 + 1.5*IQR_Total_Incurred_Cost_Claim
LS_Total_Incurred_Cost_Claim
## [1] 2858.865
cat("LI_Total_Incurred_Cost_Claim:", LI_Total_Incurred_Cost_Claim, "\n")
## LI_Total_Incurred_Cost_Claim: -1733.565
cat("LS_Total_Incurred_Cost_Claim:", LS_Total_Incurred_Cost_Claim, "\n")
## LS_Total_Incurred_Cost_Claim: 2858.865
bd_mujerescl <- bd_mujerescl[bd_mujerescl$Total_Incurred_Cost_Claim <= 2859, ]
### Nota: se redondeó a 2859 porque el LS dió un resultado de 2858.865.
summary(bd_mujerescl)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :   0.00          
##  1st Qu.:   0.0            1st Qu.:   0.00          
##  Median : 175.0            Median :  98.62          
##  Mean   : 337.7            Mean   : 372.37          
##  3rd Qu.: 460.5            3rd Qu.: 428.27          
##  Max.   :2058.0            Max.   :2851.58

3. Crear grupos

# O. Normalizar variables
bd_mujerescl <- as.data.frame(scale(bd_mujerescl))

# 1. Crear base de datos
bdmujeresCLUSTER <- bd_mujerescl

# 2. Determinar el número de grupos
grupos <- 10

# 3. Realizar la clasificación
segmentos <- kmeans(bdmujeresCLUSTER,grupos)

# 4. Revisar la asignación de grupos
asignacion <- cbind(bdmujeresCLUSTER, cluster=segmentos$cluster)

# 5. Graficar asignaciones
# install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# install.packages("factoextra")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.1
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data = bdmujeresCLUSTER,
             ellipse.type = "euclid",
             star.plot = T,
             repel = T,
             ggtheme = theme())

4.Optimizar la cantidad de grupos creados

library(cluster)
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.1
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
set.seed(123)
optimizacion <- clusGap(bdmujeresCLUSTER, FUN = kmeans, nstart = 1, K.max = 10)
## Warning: did not converge in 10 iterations
plot(optimizacion, xlab = "Número de clusters K")

# Árbol

#Extraer las variables de interés
mujeresarbol <- bd_mujeresrl[ ,c("Total_Incurred_Cost_Claim","ClaimStatus", "ClaimantAge_at_DOI","Gender","ClaimantType", "TiempoDeProcesamientoDias")]

sum(is.na(mujeresarbol))
## [1] 0
#Conversión de variables categóricas a factores
mujeresarbol$Total_Incurred_Cost_Claim <- as.numeric(mujeresarbol$Total_Incurred_Cost_Claim)
mujeresarbol$ClaimStatus <- as.factor(mujeresarbol$ClaimStatus)
mujeresarbol$ClaimantAge_at_DOI <- as.numeric(mujeresarbol$ClaimantAge_at_DOI)
mujeresarbol$Gender <- as.factor(mujeresarbol$Gender)
mujeresarbol$ClaimantType <- as.factor(mujeresarbol$ClaimantType)
mujeresarbol$TiempoDeProcesamientoDias <- as.numeric(mujeresarbol$TiempoDeProcesamientoDias)
str(mujeresarbol)
## 'data.frame':    4018 obs. of  6 variables:
##  $ Total_Incurred_Cost_Claim: num  43108 390.2 0 106.4 19.6 ...
##  $ ClaimStatus              : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ClaimantAge_at_DOI       : num  41 38 51 35 42 51 47 36 47 47 ...
##  $ Gender                   : Factor w/ 1 level "2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ClaimantType             : Factor w/ 3 levels "1","2","3": 2 1 1 1 1 1 1 2 1 1 ...
##  $ TiempoDeProcesamientoDias: num  848 2464 2856 2679 3256 ...
library(rpart)
## Warning: package 'rpart' was built under R version 4.3.1
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.1
arbol <- rpart(formula= TiempoDeProcesamientoDias ~ ., data= mujeresarbol)
arbol
## n= 4018 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 4018 4623308000  717.11470  
##    2) ClaimantType=3 904  330852900  129.85840  
##      4) Total_Incurred_Cost_Claim< 25 890  219013300   89.88764 *
##      5) Total_Incurred_Cost_Claim>=25 14   20024210 2670.85700 *
##    3) ClaimantType=1,2 3114 3890188000  887.59630  
##      6) Total_Incurred_Cost_Claim>=51.975 2443 2068233000  787.16950  
##       12) Total_Incurred_Cost_Claim< 208180.3 2430 1927711000  772.19140  
##         24) Total_Incurred_Cost_Claim< 12823.46 2155 1650736000  718.96470 *
##         25) Total_Incurred_Cost_Claim>=12823.46 275  223025800 1189.29500 *
##       13) Total_Incurred_Cost_Claim>=208180.3 13   38075070 3586.92300 *
##      7) Total_Incurred_Cost_Claim< 51.975 671 1707609000 1253.23400  
##       14) ClaimantAge_at_DOI>=48.5 196  247688700  726.56120 *
##       15) ClaimantAge_at_DOI< 48.5 475 1383120000 1470.55600 *
rpart.plot(arbol)

prp(arbol)

LS0tDQp0aXRsZTogIkFjdGl2aWRhZDQuNiBNdWplcmVzIg0KYXV0aG9yOiAiWGltZW5hIENhc3RpbGxvIEEwMTM2OTk0OSINCmRhdGU6ICIyMDIzLTA5LTMwIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IFRSVUUNCiAgICB0b2NfZmxvYXQ6IFRSVUUNCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFDQogICAgdGhlbWU6ICJ5ZXRpIg0KICAgIGhpZ2hsaWdodDogInRhbmdvIg0KDQotLS0NCjxjZW50ZXI+DQojIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPkNvbnRleHRvPC9zcGFuPg0KDQohW10oQzpcXFVzZXJzXFx4aW1lblxcRG93bmxvYWRzXFxzZWd1cm9zZ2lmLmdpZikNCg0KPGNlbnRlcj4NCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij4xLiBJbXBvcnRhciB5IGp1bnRhciBiYXNlcyBkZSBkYXRvczwvc3Bhbj4NCg0KYGBge3J9DQojIGZpbGUuY2hvb3NlKCkNCmJkMSA8LSByZWFkLmNzdigiQzpcXFVzZXJzXFx4aW1lblxcRG93bmxvYWRzXFxDbGFpbXNEYXRhMjAxOC5jc3YiKQ0KYmQyIDwtIHJlYWQuY3N2KCJDOlxcVXNlcnNcXHhpbWVuXFxEb3dubG9hZHNcXFRyYW5zYWN0aW9uc1N1bW1hcnkuY3N2IikNCmJkIDwtIG1lcmdlKGJkMSwgYmQyLCBieT0iQ2xhaW1JRCIsYWxsPVRSVUUpDQoNCmBgYA0KDQo8Y2VudGVyPg0KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjIuIENyZWFyIG51ZXZhIGNvbHVtbmEgcGFyYSBUb3RhbCBJbmN1cnJlZCBDb3N0PC9zcGFuPg0KYGBge3J9DQojIGluc3RhbGwucGFja2FnZXMoImRwbHlyIikNCmxpYnJhcnkoZHBseXIpDQoNCmJkIDwtIGJkICU+JSANCiAgbXV0YXRlKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPSBUb3RhbFJlc2VydmVzICsgSW5kZW1uaXR5UGFpZCArIE90aGVyUGFpZCAtIFRvdGFsUmVjb3ZlcnkpDQoNCmBgYA0KDQo8Y2VudGVyPg0KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjMuIEZpbHRyYXIgYmFzZSBkZSBkYXRvcyBwb3Igc8OzbG8gbXVqZXJlczwvc3Bhbj4NCmBgYHtyfQ0KYmRfbXVqZXJlczEgPC0gc3Vic2V0KGJkLCBHZW5kZXIgPT0gIkZlbWFsZSIpDQojIFZpZXcoYmRfbXVqZXJlczEpDQoNCmBgYA0KDQo8Y2VudGVyPg0KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjQuIEVsaW1pbmFyIGNvbHVtYXMgZGUgWDwvc3Bhbj4NCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmJkX211amVyZXMxIDwtIGJkX211amVyZXMxICU+JQ0KICBzZWxlY3QoLVg6LVguMjIpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KIyBWaWV3KGJkX211amVyZXMxKQ0KDQpgYGANCg0KPGNlbnRlcj4NCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij41LiBEZXNjYXJnYXIgYmFzZSBkZSBkYXRvcyBsaW1waWEgY29tbyBDU1Y8L3NwYW4+DQpgYGB7cn0NCndyaXRlLmNzdihiZF9tdWplcmVzMSwgImJkX211amVyZXMgbGltcGlhLmNzdiIsIHJvdy5uYW1lcz1GQUxTRSkNCnN1bW1hcnkoYmRfbXVqZXJlczEpDQoNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+Ni4gRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvczwvc3Bhbj4NCmBgYHtyfQ0Kc3VtbWFyeShiZF9tdWplcmVzMSkNCiNjb3VudChiZF9tdWplcmVzMSwgQ2xhaW1TdGF0dXMsIHNvcnQ9IFRSVUUpDQojY291bnQoYmRfbXVqZXJlczEsIEluY2lkZW50RGF0ZSwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgSW5jaWRlbnREZXNjcmlwdGlvbiwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgUmV0dXJuVG9Xb3JrRGF0ZSwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgQXZlcmFnZVdlZWtseVdhZ2UsIHNvcnQ9IFRSVUUpDQojY291bnQoYmRfbXVqZXJlczEsIENsYWltYW50T3BlbmVkRGF0ZSwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgQ2xhaW1hbnRDbG9zZWREYXRlLCBzb3J0PSBUUlVFKQ0KI2NvdW50KGJkX211amVyZXMxLCBFbXBsb3llck5vdGlmaWNhdGlvbkRhdGUsIHNvcnQ9IFRSVUUpDQojY291bnQoYmRfbXVqZXJlczEsIFJlY2VpdmVkRGF0ZSwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgQ2xhaW1hbnRBZ2VfYXRfRE9JLCBzb3J0PSBUUlVFKQ0KI2NvdW50KGJkX211amVyZXMxLCBHZW5kZXIsIHNvcnQ9IFRSVUUpDQojY291bnQoYmRfbXVqZXJlczEsIENsYWltYW50VHlwZSwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgSW5qdXJ5TmF0dXJlLCBzb3J0PSBUUlVFKQ0KI2NvdW50KGJkX211amVyZXMxLCBCb2R5UGFydFJlZ2lvbiwgc29ydD0gVFJVRSkNCiNjb3VudChiZF9tdWplcmVzMSwgQm9keVBhcnQsIHNvcnQ9IFRSVUUpDQoNCg0KYGBgDQpPYnNlcnZhY2lvbmVzOiAgIA0KMS4gVGVuZW1vcyBtdWNob3MgIiNWQUxVRSEiIGVuIGxhIHZhcmlhYmxlIGRlIFJldHVybiB0byBXb3JrIERhdGUgIA0KMi4gTXVjaG9zIE5VTEwgZW4gQXZlcmFnZSBXZWVrbHkgV2FnZSAgDQozLiBUZW5lbW9zIG11Y2hvcyAiI1ZBTFVFISIgZW4gbGEgdmFyaWFibGUgZGUgRW1wbG95ZXJOb3RpZmljYXRpb25EYXRlICANCjQuIE11Y2hvcyBOVUxMIGVuIENsYWltYW50QUdFX2F0X0RPSSAgDQoNCg0KYGBge3J9DQoNCiMgRXh0cmFlciBsYXMgdmFyaWFibGVzIGRlIGludGVyw6lzDQpNdWplcmVzIDwtIGJkX211amVyZXMxWyxjKCJDbGFpbVN0YXR1cyIsIkNsYWltYW50VHlwZSIsIkJvZHlQYXJ0UmVnaW9uIiwiSW5qdXJ5TmF0dXJlIildDQpgYGANCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgIA0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgIA0KDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBvcmFuZ2U7Ij5SZWdyZXNpw7NuIExpbmVhbDwvc3Bhbj4NCg0KIVtdKEM6XFxVc2Vyc1xceGltZW5cXERvd25sb2Fkc1xccmVncmVzaW9uZ2lmLmdpZikNCg0KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogb3JhbmdlOyI+MS4gSW1wb3J0YXIgYmFzZSBkZSBkYXRvcyBsaW1waWE8L3NwYW4+DQoNCmBgYHtyfQ0KI2ZpbGUuY2hvb3NlKCkNCmJkX211amVyZXNsaW1waWEgPC0gcmVhZC5jc3YoIkM6XFxVc2Vyc1xceGltZW5cXERvd25sb2Fkc1xcYmRfbXVqZXJlcyBsaW1waWEuY3N2IikNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBvcmFuZ2U7Ij4yLiBJZGVudGlmaWNhciBsYXMgdmFyaWFibGVzIGRlIGludGVyw6lzPC9zcGFuPg0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQojIFRlbmVyIHVuYSBiYXNlIGRlIGRhdG9zIGNvbiBsYXMgY29sdW1uYXMgbmVjZXNhcmlhcw0KYmRfbXVqZXJlc3JsIDwtIGJkX211amVyZXNsaW1waWEgJT4lDQogIHNlbGVjdChDbGFpbUlELCBDbGFpbVN0YXR1cywgQm9keVBhcnRSZWdpb24sIENsYWltYW50QWdlX2F0X0RPSSwgR2VuZGVyLCBDbGFpbWFudFR5cGUsIENsYWltYW50T3BlbmVkRGF0ZSwgQ2xhaW1hbnRDbG9zZWREYXRlLCBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltKQ0KIyBWaWV3KGJkX211amVyZXNybCkNCnN1bW1hcnkoYmRfbXVqZXJlc3JsKQ0KYGBgDQoNCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjMuIENvbnZlcnNpw7NuIGRlIGZlY2hhcyBhIGTDrWFzPC9zcGFuPg0KDQpgYGB7cn0NCmJkX211amVyZXNybCRDbGFpbWFudE9wZW5lZERhdGUgPC0gYXMuRGF0ZShiZF9tdWplcmVzcmwkQ2xhaW1hbnRPcGVuZWREYXRlLCBmb3JtYXQgPSAiJW0vJWQvJXkiKQ0KYmRfbXVqZXJlc3JsJENsYWltYW50Q2xvc2VkRGF0ZSA8LSBhcy5EYXRlKGJkX211amVyZXNybCRDbGFpbWFudENsb3NlZERhdGUsIGZvcm1hdCA9ICIlbS8lZC8leSIpDQoNCiMgQ2FsY3VsYXIgbGEgZGlmZXJlbmNpYSBlbiBkw61hcyBlbnRyZSBsYXMgZmVjaGFzDQpiZF9tdWplcmVzcmwkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSBhcy5udW1lcmljKGRpZmZ0aW1lKGJkX211amVyZXNybCRDbGFpbWFudENsb3NlZERhdGUsIGJkX211amVyZXNybCRDbGFpbWFudE9wZW5lZERhdGUsIHVuaXRzID0gImRheXMiKSkNCg0KIyBFbGltaW5hciBsYXMgY29sdW1uYXMgb3JpZ2luYWxlcyBkZSBmZWNoYQ0KYmRfbXVqZXJlc3JsIDwtIGJkX211amVyZXNybFssICEobmFtZXMoYmRfbXVqZXJlc3JsKSAlaW4lIGMoIkNsYWltYW50T3BlbmVkRGF0ZSIsICJDbGFpbWFudENsb3NlZERhdGUiKSldDQojIFZpZXcoYmRfbXVqZXJlc0FSKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShiZF9tdWplcmVzcmwpDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjQuIENvbnZlcnNpw7NuIGRlIGNhcsOhY3RlciBhIG7Dum1lcm9zPC9zcGFuPg0KDQpgYGB7cn0NCmJkX211amVyZXNybCRDbGFpbVN0YXR1cyA8LSBmYWN0b3IoYmRfbXVqZXJlc3JsJENsYWltU3RhdHVzLCBsZXZlbHMgPSBjKCJDIiwgIk8iLCAiUiIpLCBsYWJlbHMgPSBjKDEsIDIsIDMpKQ0KYmRfbXVqZXJlc3JsJENsYWltYW50QWdlX2F0X0RPSSA8LSBhcy5udW1lcmljKGJkX211amVyZXNybCRDbGFpbWFudEFnZV9hdF9ET0kpDQpgYGANCmBgYHtyfQ0KYmRfbXVqZXJlc3JsJEdlbmRlciA8LSBhcy5udW1lcmljKGZhY3RvcihiZF9tdWplcmVzcmwkR2VuZGVyLCBsZXZlbHMgPSBjKCJNYWxlIiwgIkZlbWFsZSIsICJOb3QgUHJvdmlkZWQiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkpDQpiZF9tdWplcmVzcmwkQ2xhaW1hbnRUeXBlIDwtIGFzLm51bWVyaWMoZmFjdG9yKGJkX211amVyZXNybCRDbGFpbWFudFR5cGUsIGxldmVscyA9IGMoIk1lZGljYWwgT25seSIsICJJbmRlbW5pdHkiLCAiUmVwb3J0IE9ubHkiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkpDQoNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBvcmFuZ2U7Ij41LkVsaW1pbmFyIE5hJ3M8L3NwYW4+DQoNCmBgYHtyfQ0KYmQgPC0gbmEub21pdChiZF9tdWplcmVzcmwpDQpzdW1tYXJ5KGJkX211amVyZXNybCkNCmBgYA0KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogb3JhbmdlOyI+NS5FbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvczwvc3Bhbj4NCg0KYGBge3J9DQojIEVsaW1pbmFyIHZhbG9yZXMgbmVnYXRpdm9zIGVuIENsYWltYW50QWdlX2F0X0RPSQ0KYmRfbXVqZXJlc3JsIDwtIGJkX211amVyZXNybCAlPiUNCiAgZmlsdGVyKENsYWltYW50QWdlX2F0X0RPSSA+PSAwKQ0KIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltDQpiZF9tdWplcmVzcmwgPC0gYmRfbXVqZXJlc3JsICU+JQ0KICBmaWx0ZXIoVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA+PSAwKQ0KIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUaWVtcG9EZVByb2Nlc2FtaWVudG9Ew61hcw0KYmRfbXVqZXJlc3JsIDwtIGJkX211amVyZXNybCAlPiUNCiAgZmlsdGVyKFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMgPj0gMCkNCg0Kc3VtbWFyeShiZF9tdWplcmVzcmwpDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjYuUmVncmVzacOzbiBMaW5lYWw8L3NwYW4+DQoNCmBgYHtyfQ0KcmVncmVzaW9uIDwtIGxtKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gfiBDbGFpbUlEICsgQ2xhaW1hbnRBZ2VfYXRfRE9JICsgQm9keVBhcnRSZWdpb24gKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCANCiAgICAgICAgICAgICAgICBkYXRhID0gYmRfbXVqZXJlc3JsKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjcuIEFqdXN0ZSBkZSB2YXJpYWJsZXMgcGFyYSBhdW1lbnRhciBSPC9zcGFuPg0KDQpgYGB7cn0NCnJlZ3Jlc2lvbiA8LSBsbShUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltIH4gQ2xhaW1hbnRBZ2VfYXRfRE9JICsgQm9keVBhcnRSZWdpb24gKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCANCiAgICAgICAgICAgICAgICBkYXRhID0gYmRfbXVqZXJlc3JsKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjguTW9kZWxvIFByZWRpY3Rpdm88L3NwYW4+DQoNCmBgYHtyfQ0KZGF0b3MgPC0gZGF0YS5mcmFtZShDbGFpbWFudEFnZV9hdF9ET0kgPSA0My4xNCwgVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA9IDcxNy4xLCBCb2R5UGFydFJlZ2lvbiA9ICJMb3dlciBFeHRyZW1pdGllcyIpDQpwcmVkaWN0KHJlZ3Jlc2lvbiwgZGF0b3MpDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjkuUmVncmVzacOzbiBMaW5lYWwgQWp1c3RhZGE8L3NwYW4+DQoNCmBgYHtyfQ0KcmVncmVzaW9uIDwtIGxtKFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMgfiBDbGFpbWFudEFnZV9hdF9ET0kgKyBDbGFpbWFudFR5cGUgKyBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltLCANCiAgICAgICAgICAgICAgICBkYXRhID0gYmRfbXVqZXJlc3JsKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IG9yYW5nZTsiPjEwLkNvbmNsdXNpw7NuPC9zcGFuPg0KDQoNCg0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgIA0KIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMgIA0KDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBwdXJwbGU7Ij5DbMO6c3RlcnM8L3NwYW4+DQoNCg0KIVtdKEM6XFxVc2Vyc1xceGltZW5cXERvd25sb2Fkc1xcQ2x1c3RlcnNnaWYuZ2lmKQ0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBwdXJwbGU7Ij4xLklkZW50aWZpY2FyIE91dGxpZXJzPC9zcGFuPg0KDQoNCg0KYGBge3J9DQpib3hwbG90KGJkX211amVyZXNybCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltLCBob3Jpem9udGFsID0gVFJVRSkNCmBgYA0KYGBge3J9DQpib3hwbG90KGJkX211amVyZXNybCRUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCBob3Jpem9udGFsID0gVFJVRSkNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBwdXJwbGU7Ij4yLiBFbGltaW5hciBPdXRsaWVyczwvc3Bhbj4NCg0KDQoNCmBgYHtyfQ0KIyBDcmVhciB1bmEgbnVldmEgYmFzZSBkZSBkYXRvcyBjb24gbGFzIGNvbHVtbmFzIGRlc2VhZGFzDQpiZF9tdWplcmVzY2wgPC0gYmRfbXVqZXJlc3JsWywgYygiQ2xhaW1JRCIsICJUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIiwgIlRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0iKV0NCg0KIyBMbGFtYXIgYSBsb3MgcmVuZ2xvbmVzIGNvbW8gQ2xhaW1JRA0Kcm93bmFtZXMoYmRfbXVqZXJlc2NsKSA8LSBiZF9tdWplcmVzY2wkQ2xhaW1JRA0KYmRfbXVqZXJlc2NsIDwtIHN1YnNldChiZF9tdWplcmVzY2wsIHNlbGVjdCA9IC1jKENsYWltSUQpKQ0KIyBWaWV3KGJkX211amVyZXNDTCkNCg0KIyBDb2x1bW5hIGRlIFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMNCklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDwtIElRUihiZF9tdWplcmVzY2wkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcykNCklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzDQpgYGANCmBgYHtyfQ0Kc3VtbWFyeShiZF9tdWplcmVzY2wpDQpgYGANCmBgYHtyfQ0KTElfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSAzMyAtIDEuNSpJUVJfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcw0KTElfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcw0KYGBgDQpgYGB7cn0NCkxTX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMgPC0gODQ0ICsgMS41KklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzDQpMU19UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzDQpgYGANCmBgYHtyfQ0KY2F0KCJMSV9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzOiIsIExJX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsICJcbiIpDQpgYGANCmBgYHtyfQ0KY2F0KCJMU19UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzOiIsIExTX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsICJcbiIpDQpgYGANCmBgYHtyfQ0KYmRfbXVqZXJlc2NsIDwtIGJkX211amVyZXNjbFtiZF9tdWplcmVzY2wkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8PSAyMDYxLCBdDQojIyMgTm90YTogc2UgcmVkb25kZcOzIGEgMjA2MSBwb3JxdWUgZWwgTFMgZGnDsyB1biByZXN1bHRhZG8gZGUgMjA2MC41Lg0KDQojQ29sdW1uYSBkZSBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltDQpJUVJfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSBJUVIoYmRfbXVqZXJlc2NsJFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0pDQpJUVJfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQ0KYGBgDQpgYGB7cn0NCnN1bW1hcnkoYmRfbXVqZXJlc2NsKQ0KYGBgDQpgYGB7cn0NCkxJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPC0gMCAtIDEuNSpJUVJfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQ0KTElfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQ0KYGBgDQpgYGB7cn0NCkxTX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPC0gMTEyNS4zICsgMS41KklRUl9Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltDQpMU19Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltDQpgYGANCmBgYHtyfQ0KY2F0KCJMSV9Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltOiIsIExJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0sICJcbiIpDQpgYGANCmBgYHtyfQ0KY2F0KCJMU19Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltOiIsIExTX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0sICJcbiIpDQpgYGANCmBgYHtyfQ0KYmRfbXVqZXJlc2NsIDwtIGJkX211amVyZXNjbFtiZF9tdWplcmVzY2wkVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8PSAyODU5LCBdDQojIyMgTm90YTogc2UgcmVkb25kZcOzIGEgMjg1OSBwb3JxdWUgZWwgTFMgZGnDsyB1biByZXN1bHRhZG8gZGUgMjg1OC44NjUuDQpzdW1tYXJ5KGJkX211amVyZXNjbCkNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBwdXJwbGU7Ij4zLiBDcmVhciBncnVwb3M8L3NwYW4+DQoNCg0KYGBge3J9DQojIE8uIE5vcm1hbGl6YXIgdmFyaWFibGVzDQpiZF9tdWplcmVzY2wgPC0gYXMuZGF0YS5mcmFtZShzY2FsZShiZF9tdWplcmVzY2wpKQ0KDQojIDEuIENyZWFyIGJhc2UgZGUgZGF0b3MNCmJkbXVqZXJlc0NMVVNURVIgPC0gYmRfbXVqZXJlc2NsDQoNCiMgMi4gRGV0ZXJtaW5hciBlbCBuw7ptZXJvIGRlIGdydXBvcw0KZ3J1cG9zIDwtIDEwDQoNCiMgMy4gUmVhbGl6YXIgbGEgY2xhc2lmaWNhY2nDs24NCnNlZ21lbnRvcyA8LSBrbWVhbnMoYmRtdWplcmVzQ0xVU1RFUixncnVwb3MpDQoNCiMgNC4gUmV2aXNhciBsYSBhc2lnbmFjacOzbiBkZSBncnVwb3MNCmFzaWduYWNpb24gPC0gY2JpbmQoYmRtdWplcmVzQ0xVU1RFUiwgY2x1c3Rlcj1zZWdtZW50b3MkY2x1c3RlcikNCg0KIyA1LiBHcmFmaWNhciBhc2lnbmFjaW9uZXMNCiMgaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpDQpsaWJyYXJ5KGdncGxvdDIpDQojIGluc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKQ0KbGlicmFyeShmYWN0b2V4dHJhKQ0KYGBgDQoNCmBgYHtyLCBmaWcud2lkdGg9MTAsIGZpZy5oZWlnaHQ9MTB9DQpmdml6X2NsdXN0ZXIoc2VnbWVudG9zLCBkYXRhID0gYmRtdWplcmVzQ0xVU1RFUiwNCiAgICAgICAgICAgICBlbGxpcHNlLnR5cGUgPSAiZXVjbGlkIiwNCiAgICAgICAgICAgICBzdGFyLnBsb3QgPSBULA0KICAgICAgICAgICAgIHJlcGVsID0gVCwNCiAgICAgICAgICAgICBnZ3RoZW1lID0gdGhlbWUoKSkNCmBgYA0KDQojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBwdXJwbGU7Ij40Lk9wdGltaXphciBsYSBjYW50aWRhZCBkZSBncnVwb3MgY3JlYWRvczwvc3Bhbj4NCg0KDQpgYGB7cn0NCmxpYnJhcnkoY2x1c3RlcikNCmxpYnJhcnkoZGF0YS50YWJsZSkNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCm9wdGltaXphY2lvbiA8LSBjbHVzR2FwKGJkbXVqZXJlc0NMVVNURVIsIEZVTiA9IGttZWFucywgbnN0YXJ0ID0gMSwgSy5tYXggPSAxMCkNCmBgYA0KYGBge3J9DQpwbG90KG9wdGltaXphY2lvbiwgeGxhYiA9ICJOw7ptZXJvIGRlIGNsdXN0ZXJzIEsiKQ0KYGBgDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogcGluazsiPsOBcmJvbDwvc3Bhbj4NCg0KYGBge3J9DQojRXh0cmFlciBsYXMgdmFyaWFibGVzIGRlIGludGVyw6lzDQptdWplcmVzYXJib2wgPC0gYmRfbXVqZXJlc3JsWyAsYygiVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSIsIkNsYWltU3RhdHVzIiwgIkNsYWltYW50QWdlX2F0X0RPSSIsIkdlbmRlciIsIkNsYWltYW50VHlwZSIsICJUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIildDQoNCnN1bShpcy5uYShtdWplcmVzYXJib2wpKQ0KDQojQ29udmVyc2nDs24gZGUgdmFyaWFibGVzIGNhdGVnw7NyaWNhcyBhIGZhY3RvcmVzDQptdWplcmVzYXJib2wkVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSBhcy5udW1lcmljKG11amVyZXNhcmJvbCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltKQ0KbXVqZXJlc2FyYm9sJENsYWltU3RhdHVzIDwtIGFzLmZhY3RvcihtdWplcmVzYXJib2wkQ2xhaW1TdGF0dXMpDQptdWplcmVzYXJib2wkQ2xhaW1hbnRBZ2VfYXRfRE9JIDwtIGFzLm51bWVyaWMobXVqZXJlc2FyYm9sJENsYWltYW50QWdlX2F0X0RPSSkNCm11amVyZXNhcmJvbCRHZW5kZXIgPC0gYXMuZmFjdG9yKG11amVyZXNhcmJvbCRHZW5kZXIpDQptdWplcmVzYXJib2wkQ2xhaW1hbnRUeXBlIDwtIGFzLmZhY3RvcihtdWplcmVzYXJib2wkQ2xhaW1hbnRUeXBlKQ0KbXVqZXJlc2FyYm9sJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMgPC0gYXMubnVtZXJpYyhtdWplcmVzYXJib2wkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcykNCnN0cihtdWplcmVzYXJib2wpDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkocnBhcnQpDQpsaWJyYXJ5KHJwYXJ0LnBsb3QpDQoNCmFyYm9sIDwtIHJwYXJ0KGZvcm11bGE9IFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMgfiAuLCBkYXRhPSBtdWplcmVzYXJib2wpDQphcmJvbA0KcnBhcnQucGxvdChhcmJvbCkNCnBycChhcmJvbCkNCmBgYA0KDQoNCg0KDQo=