Contexto

1. Importar y juntar bases de datos

# file.choose()
bd1 <- read.csv("/Users/dannaleal/Downloads/ClaimsData2018.csv")
bd2 <- read.csv("/Users/dannaleal/Downloads/TransactionsSummary2018.csv")
bd <- merge(bd1, bd2, by="ClaimID",all=TRUE)

2. Crear nueva columna para Total Incurred Cost

# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bd <- bd %>% 
  mutate(Total_Incurred_Cost_Claim = TotalReserves + IndemnityPaid + OtherPaid - TotalRecovery)

3. Filtrar base de datos por sólo mujeres

bd_mujeres1 <- subset(bd, Gender == "Female")
# View(bd_mujeres1)

4. Eliminar columnas de X

library(dplyr)
bd_mujeres1 <- bd_mujeres1 %>%
  select(-X:-X.22)
# View(bd_mujeres1)

5. Descargar base de datos limpia como CSV

write.csv(bd_mujeres1, "bd_mujeres limpia.csv", row.names = FALSE)

Regresión Lineal

1. Importar la base de datos

# file.choose()
bd_mujereslimpia <- read.csv("/Users/dannaleal/Downloads/bd_mujeres limpia.csv")

2. Entender la base de datos

summary(bd_mujereslimpia)
##     ClaimID           TotalPaid         TotalReserves     TotalRecovery     
##  Min.   :  650919   Min.   :    -81.8   Min.   :      0   Min.   :    0.00  
##  1st Qu.:  806228   1st Qu.:     20.1   1st Qu.:      0   1st Qu.:    0.00  
##  Median :  833851   Median :    223.0   Median :      0   Median :    0.00  
##  Mean   : 8053898   Mean   :   6504.3   Mean   :   2423   Mean   :   31.13  
##  3rd Qu.: 7143280   3rd Qu.:    932.1   3rd Qu.:      0   3rd Qu.:    0.00  
##  Max.   :62203889   Max.   :2985247.9   Max.   :2069575   Max.   :90357.52  
##                                                                             
##  IndemnityPaid        OtherPaid         ClaimStatus        IncidentDate      
##  Min.   :    -1.2   Min.   :    -81.8   Length:59197       Length:59197      
##  1st Qu.:     0.0   1st Qu.:     16.4   Class :character   Class :character  
##  Median :     0.0   Median :    218.7   Mode  :character   Mode  :character  
##  Mean   :  2945.2   Mean   :   3559.1                                        
##  3rd Qu.:     0.0   3rd Qu.:    857.8                                        
##  Max.   :492934.8   Max.   :2700073.4                                        
##                                                                              
##  IncidentDescription ReturnToWorkDate   AverageWeeklyWage  ClaimantOpenedDate
##  Length:59197        Length:59197       Length:59197       Length:59197      
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:59197       Length:59197             Length:59197      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       ClaimantAge_at_DOI    Gender          ClaimantType      
##  Min.   :0.00000   Length:59197       Length:59197       Length:59197      
##  1st Qu.:0.00000   Class :character   Class :character   Class :character  
##  Median :0.00000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.05688                                                           
##  3rd Qu.:0.00000                                                           
##  Max.   :1.00000                                                           
##                                                                            
##  InjuryNature       BodyPartRegion       BodyPart         BillReviewALE     
##  Length:59197       Length:59197       Length:59197       Min.   : -456.00  
##  Class :character   Class :character   Class :character   1st Qu.:    8.25  
##  Mode  :character   Mode  :character   Mode  :character   Median :   24.00  
##                                                           Mean   :  174.80  
##                                                           3rd Qu.:   64.00  
##                                                           Max.   :20730.77  
##                                                           NA's   :46628     
##     Hospital        PhysicianOutpatient       Rx          
##  Min.   :-12570.4   Min.   :   -162.9   Min.   :  -160.7  
##  1st Qu.:   203.1   1st Qu.:    106.8   1st Qu.:    23.4  
##  Median :   572.9   Median :    220.2   Median :    61.1  
##  Mean   :  4580.8   Mean   :   1700.4   Mean   :  1357.1  
##  3rd Qu.:  2213.5   3rd Qu.:    667.2   3rd Qu.:   176.5  
##  Max.   :667973.0   Max.   :1481468.5   Max.   :380924.3  
##  NA's   :49187      NA's   :34369       NA's   :49906     
##  Total_Incurred_Cost_Claim
##  Min.   :  -2961          
##  1st Qu.:     22          
##  Median :    226          
##  Mean   :   8897          
##  3rd Qu.:    976          
##  Max.   :5054823          
## 

3. Crear una nueva base de datos para las variables que se tomarán en cuenta para la regresión

library(dplyr)
# Crear una nueva base de datos con las columnas deseadas
bd_mujeresAR <- bd_mujereslimpia %>%
  select(ClaimID, ClaimStatus, ClaimantAge_at_DOI, Gender, ClaimantType, ClaimantOpenedDate, ClaimantClosedDate, Total_Incurred_Cost_Claim)
# View(bd_mujeresAR)

4. Convertir las fechas a días y crear nueva columna de TiempodeProcesamientoDias

bd_mujeresAR$ClaimantOpenedDate <- as.Date(bd_mujeresAR$ClaimantOpenedDate, format = "%m/%d/%y")
bd_mujeresAR$ClaimantClosedDate <- as.Date(bd_mujeresAR$ClaimantClosedDate, format = "%m/%d/%y")

# Calcular la diferencia en días entre las fechas
bd_mujeresAR$TiempoDeProcesamientoDias <- as.numeric(difftime(bd_mujeresAR$ClaimantClosedDate, bd_mujeresAR$ClaimantOpenedDate, units = "days"))

# Eliminar las columnas originales de fecha
bd_mujeresAR <- bd_mujeresAR[, !(names(bd_mujeresAR) %in% c("ClaimantOpenedDate", "ClaimantClosedDate"))]
# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus        ClaimantAge_at_DOI    Gender         
##  Min.   :  650919   Length:59197       Length:59197       Length:59197      
##  1st Qu.:  806228   Class :character   Class :character   Class :character  
##  Median :  833851   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 8053898                                                           
##  3rd Qu.: 7143280                                                           
##  Max.   :62203889                                                           
##                                                                             
##  ClaimantType       Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Length:59197       Min.   :  -2961           Min.   :-333.0           
##  Class :character   1st Qu.:     22           1st Qu.:   0.0           
##  Mode  :character   Median :    226           Median : 245.0           
##                     Mean   :   8897           Mean   : 806.1           
##                     3rd Qu.:    976           3rd Qu.: 920.0           
##                     Max.   :5054823           Max.   :6912.0           
##                                               NA's   :54104

5. Convertir las variables de carácter a númerico

bd_mujeresAR$ClaimStatus <- factor(bd_mujeresAR$ClaimStatus, levels = c("C", "O", "R"), labels = c(1, 2, 3))
bd_mujeresAR$ClaimantAge_at_DOI <- as.numeric(bd_mujeresAR$ClaimantAge_at_DOI)
## Warning: NAs introduced by coercion
bd_mujeresAR$Gender <- as.numeric(factor(bd_mujeresAR$Gender, levels = c("Male", "Female", "Not Provided"), labels = c(1, 2, 3)))
bd_mujeresAR$ClaimantType <- as.numeric(factor(bd_mujeresAR$ClaimantType, levels = c("Medical Only", "Indemnity", "Report Only"), labels = c(1, 2, 3)))

# View(bd_mujeresAR)

6. Eliminar NA’s

bd <- na.omit(bd_mujeresAR)
# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType  
##  Min.   :  650919   1:56900     Min.   :-8000.00   Min.   :2   Min.   :1.000  
##  1st Qu.:  806228   2: 1786     1st Qu.:   33.00   1st Qu.:2   1st Qu.:1.000  
##  Median :  833851   3:  511     Median :   43.00   Median :2   Median :1.000  
##  Mean   : 8053898               Mean   :   39.75   Mean   :2   Mean   :1.357  
##  3rd Qu.: 7143280               3rd Qu.:   52.00   3rd Qu.:2   3rd Qu.:2.000  
##  Max.   :62203889               Max.   :   89.00   Max.   :2   Max.   :3.000  
##                                 NA's   :17097                                 
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :  -2961           Min.   :-333.0           
##  1st Qu.:     22           1st Qu.:   0.0           
##  Median :    226           Median : 245.0           
##  Mean   :   8897           Mean   : 806.1           
##  3rd Qu.:    976           3rd Qu.: 920.0           
##  Max.   :5054823           Max.   :6912.0           
##                            NA's   :54104

7. Eliminar valores negativos en ClaimantAge_at_DOI, Total_Incurred_Cost_Claim y TiempoDeProcesamientoDias

# Eliminar valores negativos en ClaimantAge_at_DOI
bd_mujeresAR <- bd_mujeresAR %>%
  filter(ClaimantAge_at_DOI >= 0)
# Eliminar valores negativos en Total_Incurred_Cost_Claim
bd_mujeresAR <- bd_mujeresAR %>%
  filter(Total_Incurred_Cost_Claim >= 0)
# Eliminar valores negativos en TiempoDeProcesamientoDías
bd_mujeresAR <- bd_mujeresAR %>%
  filter(TiempoDeProcesamientoDias >= 0)

# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType 
##  Min.   :  650919   1:4015      Min.   : 1.00      Min.   :2   Min.   :1.00  
##  1st Qu.:  823404   2:   3      1st Qu.:34.00      1st Qu.:2   1st Qu.:1.00  
##  Median : 5970814   3:   0      Median :44.00      Median :2   Median :1.00  
##  Mean   :15622363               Mean   :43.14      Mean   :2   Mean   :1.68  
##  3rd Qu.:30288888               3rd Qu.:52.00      3rd Qu.:2   3rd Qu.:2.00  
##  Max.   :61592860               Max.   :87.00      Max.   :2   Max.   :3.00  
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :     0.0          Min.   :   0.0           
##  1st Qu.:     0.0          1st Qu.:  33.0           
##  Median :   171.3          Median : 301.0           
##  Mean   :  4459.3          Mean   : 717.1           
##  3rd Qu.:  1125.3          3rd Qu.: 844.0           
##  Max.   :388620.8          Max.   :6912.0

8. Generar la primer regresión lineal

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimStatus + ClaimantAge_at_DOI + 
                Gender + ClaimantType + TiempoDeProcesamientoDias, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimStatus + ClaimantAge_at_DOI + 
##     Gender + ClaimantType + TiempoDeProcesamientoDias, data = bd_mujeresAR)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28757  -5382  -2119    625 359883 
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -8599.8188  1447.1614  -5.943 3.04e-09 ***
## ClaimStatus2               -145.6059 11808.9879  -0.012     0.99    
## ClaimantAge_at_DOI          117.6975    27.5373   4.274 1.96e-05 ***
## Gender                            NA         NA      NA       NA    
## ClaimantType               2881.0214   406.1079   7.094 1.53e-12 ***
## TiempoDeProcesamientoDias     4.3814     0.3117  14.055  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20440 on 4013 degrees of freedom
## Multiple R-squared:  0.0524, Adjusted R-squared:  0.05145 
## F-statistic: 55.48 on 4 and 4013 DF,  p-value: < 2.2e-16

9. Ajustar la regresión lineal

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + ClaimantType + TiempoDeProcesamientoDias, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + 
##     ClaimantType + TiempoDeProcesamientoDias, data = bd_mujeresAR)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28757  -5382  -2119    625 359883 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -8600.2016  1446.6483  -5.945 3.00e-09 ***
## ClaimantAge_at_DOI          117.6993    27.5335   4.275 1.96e-05 ***
## ClaimantType               2881.1015   406.0053   7.096 1.51e-12 ***
## TiempoDeProcesamientoDias     4.3815     0.3116  14.061  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20440 on 4014 degrees of freedom
## Multiple R-squared:  0.0524, Adjusted R-squared:  0.05169 
## F-statistic: 73.99 on 3 and 4014 DF,  p-value: < 2.2e-16

10. Construir un modelo predictivo

datos <- data.frame(ClaimantAge_at_DOI = 43.14, ClaimantType = 1.68, 
                    TiempoDeProcesamientoDias = 717.1)
predict(regresion, datos)
##        1 
## 4459.569

11. Generar la segunda regresión lineal

regresion <- lm(TiempoDeProcesamientoDias ~ ClaimStatus + ClaimantAge_at_DOI + 
                Gender + ClaimantType + Total_Incurred_Cost_Claim, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimStatus + ClaimantAge_at_DOI + 
##     Gender + ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresAR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2718.6  -569.1  -289.9    50.1  6331.8 
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.717e+03  6.655e+01  25.808   <2e-16 ***
## ClaimStatus2              -7.947e+02  5.837e+02  -1.362    0.173    
## ClaimantAge_at_DOI        -1.229e+01  1.351e+00  -9.103   <2e-16 ***
## Gender                            NA         NA      NA       NA    
## ClaimantType              -3.078e+02  1.961e+01 -15.697   <2e-16 ***
## Total_Incurred_Cost_Claim  1.071e-02  7.619e-04  14.055   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1010 on 4013 degrees of freedom
## Multiple R-squared:  0.1137, Adjusted R-squared:  0.1128 
## F-statistic: 128.7 on 4 and 4013 DF,  p-value: < 2.2e-16

12. Ajustar la regresión lineal

regresion <- lm(TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + ClaimantType + Total_Incurred_Cost_Claim, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + 
##     ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresAR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2719.3  -569.3  -289.7    48.3  6332.3 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.716e+03  6.655e+01  25.789   <2e-16 ***
## ClaimantAge_at_DOI        -1.229e+01  1.351e+00  -9.099   <2e-16 ***
## ClaimantType              -3.075e+02  1.961e+01 -15.682   <2e-16 ***
## Total_Incurred_Cost_Claim  1.071e-02  7.620e-04  14.061   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1011 on 4014 degrees of freedom
## Multiple R-squared:  0.1133, Adjusted R-squared:  0.1126 
## F-statistic:   171 on 3 and 4014 DF,  p-value: < 2.2e-16

13. Construir un modelo predictivo

datos <- data.frame(ClaimantAge_at_DOI = 43.14, ClaimantType = 1.68, 
                    Total_Incurred_Cost_Claim = 4459.3)
predict(regresion, datos)
##        1 
## 717.0795

Conclusiones

Como se puede observar, en el segundo análisis de regresión se obtuvo una R cuadrada ajustada del 11.26%, mientras que en la primera se obtuvo una R cuadrada ajustada del 5.16%. Esto nos indica que el segundo análisis de regresión se ajusta mejor al modelo de los datos.

Análisis Clusters

1. Entender la base de datos

summary (bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType 
##  Min.   :  650919   1:4015      Min.   : 1.00      Min.   :2   Min.   :1.00  
##  1st Qu.:  823404   2:   3      1st Qu.:34.00      1st Qu.:2   1st Qu.:1.00  
##  Median : 5970814   3:   0      Median :44.00      Median :2   Median :1.00  
##  Mean   :15622363               Mean   :43.14      Mean   :2   Mean   :1.68  
##  3rd Qu.:30288888               3rd Qu.:52.00      3rd Qu.:2   3rd Qu.:2.00  
##  Max.   :61592860               Max.   :87.00      Max.   :2   Max.   :3.00  
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :     0.0          Min.   :   0.0           
##  1st Qu.:     0.0          1st Qu.:  33.0           
##  Median :   171.3          Median : 301.0           
##  Mean   :  4459.3          Mean   : 717.1           
##  3rd Qu.:  1125.3          3rd Qu.: 844.0           
##  Max.   :388620.8          Max.   :6912.0

2. Identificar outliers

boxplot(bd_mujeresAR$Total_Incurred_Cost_Claim, horizontal = TRUE)

boxplot(bd_mujeresAR$TiempoDeProcesamientoDias, horizontal = TRUE)

3. Crear una nueva base de datos con ClaimID, TiempoDeProcesamientoDias y Total_Incurred_Cost_Claim y eliminar datos fuera de lo normal

# Crear una nueva base de datos con las columnas deseadas
bd_mujeresCL <- bd_mujeresAR[, c("ClaimID", "TiempoDeProcesamientoDias", "Total_Incurred_Cost_Claim")]

# Llamar a los renglones como ClaimID
rownames(bd_mujeresCL) <- bd_mujeresCL$ClaimID
bd_mujeresCL <- subset(bd_mujeresCL, select = -c(ClaimID))
# View(bd_mujeresCL)

# Columna de TiempoDeProcesamientoDias
IQR_TiempoDeProcesamientoDias <- IQR(bd_mujeresCL$TiempoDeProcesamientoDias)
IQR_TiempoDeProcesamientoDias
## [1] 811
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:  33.0            1st Qu.:     0.0         
##  Median : 301.0            Median :   171.3         
##  Mean   : 717.1            Mean   :  4459.3         
##  3rd Qu.: 844.0            3rd Qu.:  1125.3         
##  Max.   :6912.0            Max.   :388620.8
LI_TiempoDeProcesamientoDias <- 33 - 1.5*IQR_TiempoDeProcesamientoDias
LI_TiempoDeProcesamientoDias
## [1] -1183.5
LS_TiempoDeProcesamientoDias <- 844 + 1.5*IQR_TiempoDeProcesamientoDias
LS_TiempoDeProcesamientoDias
## [1] 2060.5
cat("LI_TiempoDeProcesamientoDias:", LI_TiempoDeProcesamientoDias, "\n")
## LI_TiempoDeProcesamientoDias: -1183.5
cat("LS_TiempoDeProcesamientoDias:", LS_TiempoDeProcesamientoDias, "\n")
## LS_TiempoDeProcesamientoDias: 2060.5
bd_mujeresCL <- bd_mujeresCL[bd_mujeresCL$TiempoDeProcesamientoDias <= 2061, ]
### Nota: se redondeó a 2061 porque el LS dió un resultado de 2060.5.

#Columna de Total_Incurred_Cost_Claim
IQR_Total_Incurred_Cost_Claim <- IQR(bd_mujeresCL$Total_Incurred_Cost_Claim)
IQR_Total_Incurred_Cost_Claim
## [1] 1155.71
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:   4.0            1st Qu.:     0.0         
##  Median : 235.0            Median :   177.8         
##  Mean   : 398.6            Mean   :  3707.7         
##  3rd Qu.: 548.0            3rd Qu.:  1155.7         
##  Max.   :2058.0            Max.   :246847.9
LI_Total_Incurred_Cost_Claim <- 0 - 1.5*IQR_Total_Incurred_Cost_Claim
LI_Total_Incurred_Cost_Claim
## [1] -1733.565
LS_Total_Incurred_Cost_Claim <- 1125.3 + 1.5*IQR_Total_Incurred_Cost_Claim
LS_Total_Incurred_Cost_Claim
## [1] 2858.865
cat("LI_Total_Incurred_Cost_Claim:", LI_Total_Incurred_Cost_Claim, "\n")
## LI_Total_Incurred_Cost_Claim: -1733.565
cat("LS_Total_Incurred_Cost_Claim:", LS_Total_Incurred_Cost_Claim, "\n")
## LS_Total_Incurred_Cost_Claim: 2858.865
bd_mujeresCL <- bd_mujeresCL[bd_mujeresCL$Total_Incurred_Cost_Claim <= 2859, ]
### Nota: se redondeó a 2859 porque el LS dió un resultado de 2858.865.
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :   0.00          
##  1st Qu.:   0.0            1st Qu.:   0.00          
##  Median : 175.0            Median :  98.62          
##  Mean   : 337.7            Mean   : 372.37          
##  3rd Qu.: 460.5            3rd Qu.: 428.27          
##  Max.   :2058.0            Max.   :2851.58

4. Asignación de grupos

# O. Normalizar variables
bd_mujeresCL <- as.data.frame(scale(bd_mujeresCL))

# 1. Crear base de datos
bdmujeresCLUSTER <- bd_mujeresCL

# 2. Determinar el número de grupos
grupos <- 10

# 3. Realizar la clasificación
segmentos <- kmeans(bdmujeresCLUSTER,grupos)

# 4. Revisar la asignación de grupos
asignacion <- cbind(bdmujeresCLUSTER, cluster=segmentos$cluster)

# 5. Graficar asignaciones
# install.packages("ggplot2")
library(ggplot2)
# install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data = bdmujeresCLUSTER,
             palette = c("darkorchid", "darkorange2", "aquamarine2", "pink", "blue", "darkolivegreen1", "salmon1", "skyblue3", "slategray2", "yellow"),
             ellipse.type = "euclid",
             star.plot = T,
             repel = T,
             ggtheme = theme())

# 6. Optimizar la cantidad de grupos
library(cluster)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
set.seed(123)
optimizacion <- clusGap(bdmujeresCLUSTER, FUN = kmeans, nstart = 1, K.max = 10) 
## Warning: did not converge in 10 iterations
plot(optimizacion, xlab = "Número de clusters K")

5. Conclusiones

Dado que el punto más alto en la gráfica es 10, eso nos indica que la cantidad de grupos óptimo es 10.

Árbol de Decisiones

1. Extraer las variables de interés

mujeresarbol <- bd_mujeresAR[ ,c("Total_Incurred_Cost_Claim","ClaimStatus", "ClaimantAge_at_DOI","Gender","ClaimantType", "TiempoDeProcesamientoDias")]

sum(is.na(mujeresarbol))
## [1] 0

2. Conversión de variables categóricas a factores

mujeresarbol$Total_Incurred_Cost_Claim <- as.numeric(mujeresarbol$Total_Incurred_Cost_Claim)
mujeresarbol$ClaimStatus <- as.factor(mujeresarbol$ClaimStatus)
mujeresarbol$ClaimantAge_at_DOI <- as.numeric(mujeresarbol$ClaimantAge_at_DOI)
mujeresarbol$Gender <- as.factor(mujeresarbol$Gender)
mujeresarbol$ClaimantType <- as.factor(mujeresarbol$ClaimantType)
mujeresarbol$TiempoDeProcesamientoDias <- as.numeric(mujeresarbol$TiempoDeProcesamientoDias)
str(mujeresarbol)
## 'data.frame':    4018 obs. of  6 variables:
##  $ Total_Incurred_Cost_Claim: num  43108 390.2 0 106.4 19.6 ...
##  $ ClaimStatus              : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ClaimantAge_at_DOI       : num  41 38 51 35 42 51 47 36 47 47 ...
##  $ Gender                   : Factor w/ 1 level "2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ClaimantType             : Factor w/ 3 levels "1","2","3": 2 1 1 1 1 1 1 2 1 1 ...
##  $ TiempoDeProcesamientoDias: num  848 2464 2856 2679 3256 ...

3. Generar el árbol de decisión

library(rpart)
library(rpart.plot)

arbol <- rpart(formula= TiempoDeProcesamientoDias ~ ., data= mujeresarbol)
arbol
## n= 4018 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 4018 4623308000  717.11470  
##    2) ClaimantType=3 904  330852900  129.85840  
##      4) Total_Incurred_Cost_Claim< 25 890  219013300   89.88764 *
##      5) Total_Incurred_Cost_Claim>=25 14   20024210 2670.85700 *
##    3) ClaimantType=1,2 3114 3890188000  887.59630  
##      6) Total_Incurred_Cost_Claim>=51.975 2443 2068233000  787.16950  
##       12) Total_Incurred_Cost_Claim< 208180.3 2430 1927711000  772.19140  
##         24) Total_Incurred_Cost_Claim< 12823.46 2155 1650736000  718.96470 *
##         25) Total_Incurred_Cost_Claim>=12823.46 275  223025800 1189.29500 *
##       13) Total_Incurred_Cost_Claim>=208180.3 13   38075070 3586.92300 *
##      7) Total_Incurred_Cost_Claim< 51.975 671 1707609000 1253.23400  
##       14) ClaimantAge_at_DOI>=48.5 196  247688700  726.56120 *
##       15) ClaimantAge_at_DOI< 48.5 475 1383120000 1470.55600 *
rpart.plot(arbol)

prp(arbol)

LS0tCnRpdGxlOiAiTTQgQWN0aXZpZGFkIE1lZGlvIFTDqXJtaW5vIgphdXRob3I6ICJEYW5uYSBMZWFsIEEwMDgzMTY5OCIKZGF0ZTogIjI4LzkvMjAyMyIKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiAiYm9vdHN0cmFwIgogICAgaGlnaGxpZ2h0OiAiZXNwcmVzc28iCi0tLQojIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPkNvbnRleHRvPC9zcGFuPgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+MS4gSW1wb3J0YXIgeSBqdW50YXIgYmFzZXMgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQojIGZpbGUuY2hvb3NlKCkKYmQxIDwtIHJlYWQuY3N2KCIvVXNlcnMvZGFubmFsZWFsL0Rvd25sb2Fkcy9DbGFpbXNEYXRhMjAxOC5jc3YiKQpiZDIgPC0gcmVhZC5jc3YoIi9Vc2Vycy9kYW5uYWxlYWwvRG93bmxvYWRzL1RyYW5zYWN0aW9uc1N1bW1hcnkyMDE4LmNzdiIpCmJkIDwtIG1lcmdlKGJkMSwgYmQyLCBieT0iQ2xhaW1JRCIsYWxsPVRSVUUpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjIuIENyZWFyIG51ZXZhIGNvbHVtbmEgcGFyYSBUb3RhbCBJbmN1cnJlZCBDb3N0PC9zcGFuPgpgYGB7cn0KIyBpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpCmxpYnJhcnkoZHBseXIpCgpiZCA8LSBiZCAlPiUgCiAgbXV0YXRlKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPSBUb3RhbFJlc2VydmVzICsgSW5kZW1uaXR5UGFpZCArIE90aGVyUGFpZCAtIFRvdGFsUmVjb3ZlcnkpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjMuIEZpbHRyYXIgYmFzZSBkZSBkYXRvcyBwb3Igc8OzbG8gbXVqZXJlczwvc3Bhbj4KYGBge3J9CmJkX211amVyZXMxIDwtIHN1YnNldChiZCwgR2VuZGVyID09ICJGZW1hbGUiKQojIFZpZXcoYmRfbXVqZXJlczEpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjQuIEVsaW1pbmFyIGNvbHVtbmFzIGRlIFg8L3NwYW4+CmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpiZF9tdWplcmVzMSA8LSBiZF9tdWplcmVzMSAlPiUKICBzZWxlY3QoLVg6LVguMjIpCmBgYAoKYGBge3J9CiMgVmlldyhiZF9tdWplcmVzMSkKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+NS4gRGVzY2FyZ2FyIGJhc2UgZGUgZGF0b3MgbGltcGlhIGNvbW8gQ1NWPC9zcGFuPgpgYGB7cn0Kd3JpdGUuY3N2KGJkX211amVyZXMxLCAiYmRfbXVqZXJlcyBsaW1waWEuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpCmBgYAoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+UmVncmVzacOzbiBMaW5lYWw8L3NwYW4+CiFbXSgvVXNlcnMvZGFubmFsZWFsL0Rlc2t0b3AvYW5hbHlzaXNnaWYuZ2lmKSAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xLiBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zPC9zcGFuPgpgYGB7cn0KIyBmaWxlLmNob29zZSgpCmJkX211amVyZXNsaW1waWEgPC0gcmVhZC5jc3YoIi9Vc2Vycy9kYW5uYWxlYWwvRG93bmxvYWRzL2JkX211amVyZXMgbGltcGlhLmNzdiIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjIuIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQpzdW1tYXJ5KGJkX211amVyZXNsaW1waWEpCmBgYAojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+My4gQ3JlYXIgdW5hIG51ZXZhIGJhc2UgZGUgZGF0b3MgcGFyYSBsYXMgdmFyaWFibGVzIHF1ZSBzZSB0b21hcsOhbiBlbiBjdWVudGEgcGFyYSBsYSByZWdyZXNpw7NuPC9zcGFuPgpgYGB7cn0KbGlicmFyeShkcGx5cikKIyBDcmVhciB1bmEgbnVldmEgYmFzZSBkZSBkYXRvcyBjb24gbGFzIGNvbHVtbmFzIGRlc2VhZGFzCmJkX211amVyZXNBUiA8LSBiZF9tdWplcmVzbGltcGlhICU+JQogIHNlbGVjdChDbGFpbUlELCBDbGFpbVN0YXR1cywgQ2xhaW1hbnRBZ2VfYXRfRE9JLCBHZW5kZXIsIENsYWltYW50VHlwZSwgQ2xhaW1hbnRPcGVuZWREYXRlLCBDbGFpbWFudENsb3NlZERhdGUsIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0pCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjQuIENvbnZlcnRpciBsYXMgZmVjaGFzIGEgZMOtYXMgeSBjcmVhciBudWV2YSBjb2x1bW5hIGRlIFRpZW1wb2RlUHJvY2VzYW1pZW50b0RpYXM8L3NwYW4+CmBgYHtyfQpiZF9tdWplcmVzQVIkQ2xhaW1hbnRPcGVuZWREYXRlIDwtIGFzLkRhdGUoYmRfbXVqZXJlc0FSJENsYWltYW50T3BlbmVkRGF0ZSwgZm9ybWF0ID0gIiVtLyVkLyV5IikKYmRfbXVqZXJlc0FSJENsYWltYW50Q2xvc2VkRGF0ZSA8LSBhcy5EYXRlKGJkX211amVyZXNBUiRDbGFpbWFudENsb3NlZERhdGUsIGZvcm1hdCA9ICIlbS8lZC8leSIpCgojIENhbGN1bGFyIGxhIGRpZmVyZW5jaWEgZW4gZMOtYXMgZW50cmUgbGFzIGZlY2hhcwpiZF9tdWplcmVzQVIkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSBhcy5udW1lcmljKGRpZmZ0aW1lKGJkX211amVyZXNBUiRDbGFpbWFudENsb3NlZERhdGUsIGJkX211amVyZXNBUiRDbGFpbWFudE9wZW5lZERhdGUsIHVuaXRzID0gImRheXMiKSkKCiMgRWxpbWluYXIgbGFzIGNvbHVtbmFzIG9yaWdpbmFsZXMgZGUgZmVjaGEKYmRfbXVqZXJlc0FSIDwtIGJkX211amVyZXNBUlssICEobmFtZXMoYmRfbXVqZXJlc0FSKSAlaW4lIGMoIkNsYWltYW50T3BlbmVkRGF0ZSIsICJDbGFpbWFudENsb3NlZERhdGUiKSldCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKYGBge3J9CnN1bW1hcnkoYmRfbXVqZXJlc0FSKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij41LiBDb252ZXJ0aXIgbGFzIHZhcmlhYmxlcyBkZSBjYXLDoWN0ZXIgYSBuw7ptZXJpY288L3NwYW4+CmBgYHtyfQpiZF9tdWplcmVzQVIkQ2xhaW1TdGF0dXMgPC0gZmFjdG9yKGJkX211amVyZXNBUiRDbGFpbVN0YXR1cywgbGV2ZWxzID0gYygiQyIsICJPIiwgIlIiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkKYmRfbXVqZXJlc0FSJENsYWltYW50QWdlX2F0X0RPSSA8LSBhcy5udW1lcmljKGJkX211amVyZXNBUiRDbGFpbWFudEFnZV9hdF9ET0kpCmJkX211amVyZXNBUiRHZW5kZXIgPC0gYXMubnVtZXJpYyhmYWN0b3IoYmRfbXVqZXJlc0FSJEdlbmRlciwgbGV2ZWxzID0gYygiTWFsZSIsICJGZW1hbGUiLCAiTm90IFByb3ZpZGVkIiksIGxhYmVscyA9IGMoMSwgMiwgMykpKQpiZF9tdWplcmVzQVIkQ2xhaW1hbnRUeXBlIDwtIGFzLm51bWVyaWMoZmFjdG9yKGJkX211amVyZXNBUiRDbGFpbWFudFR5cGUsIGxldmVscyA9IGMoIk1lZGljYWwgT25seSIsICJJbmRlbW5pdHkiLCAiUmVwb3J0IE9ubHkiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkpCgojIFZpZXcoYmRfbXVqZXJlc0FSKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij42LiBFbGltaW5hciBOQSdzPC9zcGFuPgpgYGB7cn0KYmQgPC0gbmEub21pdChiZF9tdWplcmVzQVIpCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKYGBge3J9CnN1bW1hcnkoYmRfbXVqZXJlc0FSKQpgYGAKCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+Ny4gRWxpbWluYXIgdmFsb3JlcyBuZWdhdGl2b3MgZW4gQ2xhaW1hbnRBZ2VfYXRfRE9JLCAgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB5IFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXM8L3NwYW4+CmBgYHtyfQojIEVsaW1pbmFyIHZhbG9yZXMgbmVnYXRpdm9zIGVuIENsYWltYW50QWdlX2F0X0RPSQpiZF9tdWplcmVzQVIgPC0gYmRfbXVqZXJlc0FSICU+JQogIGZpbHRlcihDbGFpbWFudEFnZV9hdF9ET0kgPj0gMCkKIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltCmJkX211amVyZXNBUiA8LSBiZF9tdWplcmVzQVIgJT4lCiAgZmlsdGVyKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPj0gMCkKIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUaWVtcG9EZVByb2Nlc2FtaWVudG9Ew61hcwpiZF9tdWplcmVzQVIgPC0gYmRfbXVqZXJlc0FSICU+JQogIGZpbHRlcihUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzID49IDApCgojIFZpZXcoYmRfbXVqZXJlc0FSKQpzdW1tYXJ5KGJkX211amVyZXNBUikKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+OC4gR2VuZXJhciBsYSBwcmltZXIgcmVncmVzacOzbiBsaW5lYWw8L3NwYW4+CmBgYHtyfQpyZWdyZXNpb24gPC0gbG0oVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB+IENsYWltU3RhdHVzICsgQ2xhaW1hbnRBZ2VfYXRfRE9JICsgCiAgICAgICAgICAgICAgICBHZW5kZXIgKyBDbGFpbWFudFR5cGUgKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij45LiBBanVzdGFyIGxhIHJlZ3Jlc2nDs24gbGluZWFsPC9zcGFuPgpgYGB7cn0KcmVncmVzaW9uIDwtIGxtKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gfiBDbGFpbWFudEFnZV9hdF9ET0kgKyBDbGFpbWFudFR5cGUgKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xMC4gQ29uc3RydWlyIHVuIG1vZGVsbyBwcmVkaWN0aXZvPC9zcGFuPgpgYGB7cn0KZGF0b3MgPC0gZGF0YS5mcmFtZShDbGFpbWFudEFnZV9hdF9ET0kgPSA0My4xNCwgQ2xhaW1hbnRUeXBlID0gMS42OCwgCiAgICAgICAgICAgICAgICAgICAgVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA9IDcxNy4xKQpwcmVkaWN0KHJlZ3Jlc2lvbiwgZGF0b3MpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjExLiBHZW5lcmFyIGxhIHNlZ3VuZGEgcmVncmVzacOzbiBsaW5lYWw8L3NwYW4+CmBgYHtyfQpyZWdyZXNpb24gPC0gbG0oVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyB+IENsYWltU3RhdHVzICsgQ2xhaW1hbnRBZ2VfYXRfRE9JICsgCiAgICAgICAgICAgICAgICBHZW5kZXIgKyBDbGFpbWFudFR5cGUgKyBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xMi4gQWp1c3RhciBsYSByZWdyZXNpw7NuIGxpbmVhbDwvc3Bhbj4KYGBge3J9CnJlZ3Jlc2lvbiA8LSBsbShUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIH4gQ2xhaW1hbnRBZ2VfYXRfRE9JICsgQ2xhaW1hbnRUeXBlICsgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgCiAgICAgICAgICAgICAgICBkYXRhID0gYmRfbXVqZXJlc0FSKQpzdW1tYXJ5KHJlZ3Jlc2lvbikKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+MTMuIENvbnN0cnVpciB1biBtb2RlbG8gcHJlZGljdGl2bzwvc3Bhbj4KYGBge3J9CmRhdG9zIDwtIGRhdGEuZnJhbWUoQ2xhaW1hbnRBZ2VfYXRfRE9JID0gNDMuMTQsIENsYWltYW50VHlwZSA9IDEuNjgsIAogICAgICAgICAgICAgICAgICAgIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPSA0NDU5LjMpCnByZWRpY3QocmVncmVzaW9uLCBkYXRvcykKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+Q29uY2x1c2lvbmVzPC9zcGFuPgpDb21vIHNlIHB1ZWRlIG9ic2VydmFyLCBlbiBlbCBzZWd1bmRvIGFuw6FsaXNpcyBkZSByZWdyZXNpw7NuIHNlIG9idHV2byB1bmEgClIgY3VhZHJhZGEgYWp1c3RhZGEgZGVsIDExLjI2JSwgbWllbnRyYXMgcXVlIGVuIGxhIHByaW1lcmEgc2Ugb2J0dXZvIHVuYSAKUiBjdWFkcmFkYSBhanVzdGFkYSBkZWwgNS4xNiUuIEVzdG8gbm9zIGluZGljYSBxdWUgZWwgc2VndW5kbyBhbsOhbGlzaXMgZGUgCnJlZ3Jlc2nDs24gc2UgYWp1c3RhIG1lam9yIGFsIG1vZGVsbyBkZSBsb3MgZGF0b3MuCgoKIyA8c3BhbiBzdHlsZT0iY29sb3I6ICM2NkNEQUE7Ij5BbsOhbGlzaXMgQ2x1c3RlcnM8L3NwYW4+CiFbXSgvVXNlcnMvZGFubmFsZWFsL0Rlc2t0b3AvY2x1c3RlcmdpZi5naWYpIAoKIyMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiAjNjZDREFBOyI+MS4gRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvczwvc3Bhbj4KCmBgYHtyfQpzdW1tYXJ5IChiZF9tdWplcmVzQVIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiAjNjZDREFBOyI+Mi4gSWRlbnRpZmljYXIgb3V0bGllcnM8L3NwYW4+IApgYGB7cn0KYm94cGxvdChiZF9tdWplcmVzQVIkVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgaG9yaXpvbnRhbCA9IFRSVUUpCmJveHBsb3QoYmRfbXVqZXJlc0FSJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogIzY2Q0RBQTsiPjMuIENyZWFyIHVuYSBudWV2YSBiYXNlIGRlIGRhdG9zIGNvbiBDbGFpbUlELCBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIHkgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB5IGVsaW1pbmFyIGRhdG9zIGZ1ZXJhIGRlIGxvIG5vcm1hbDwvc3Bhbj4KYGBge3J9CiMgQ3JlYXIgdW5hIG51ZXZhIGJhc2UgZGUgZGF0b3MgY29uIGxhcyBjb2x1bW5hcyBkZXNlYWRhcwpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0FSWywgYygiQ2xhaW1JRCIsICJUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIiwgIlRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0iKV0KCiMgTGxhbWFyIGEgbG9zIHJlbmdsb25lcyBjb21vIENsYWltSUQKcm93bmFtZXMoYmRfbXVqZXJlc0NMKSA8LSBiZF9tdWplcmVzQ0wkQ2xhaW1JRApiZF9tdWplcmVzQ0wgPC0gc3Vic2V0KGJkX211amVyZXNDTCwgc2VsZWN0ID0gLWMoQ2xhaW1JRCkpCiMgVmlldyhiZF9tdWplcmVzQ0wpCgojIENvbHVtbmEgZGUgVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcwpJUVJfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSBJUVIoYmRfbXVqZXJlc0NMJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMpCklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzCnN1bW1hcnkoYmRfbXVqZXJlc0NMKQpMSV9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDwtIDMzIC0gMS41KklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzCkxJX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMKTFNfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSA4NDQgKyAxLjUqSVFSX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMKTFNfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcwpjYXQoIkxJX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXM6IiwgTElfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcywgIlxuIikKY2F0KCJMU19UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzOiIsIExTX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsICJcbiIpCgpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0NMW2JkX211amVyZXNDTCRUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDw9IDIwNjEsIF0KIyMjIE5vdGE6IHNlIHJlZG9uZGXDsyBhIDIwNjEgcG9ycXVlIGVsIExTIGRpw7MgdW4gcmVzdWx0YWRvIGRlIDIwNjAuNS4KCiNDb2x1bW5hIGRlIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KSVFSX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPC0gSVFSKGJkX211amVyZXNDTCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltKQpJUVJfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQpzdW1tYXJ5KGJkX211amVyZXNDTCkKTElfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSAwIC0gMS41KklRUl9Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltCkxJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KTFNfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSAxMTI1LjMgKyAxLjUqSVFSX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KTFNfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQpjYXQoIkxJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW06IiwgTElfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgIlxuIikKY2F0KCJMU19Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltOiIsIExTX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0sICJcbiIpCgpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0NMW2JkX211amVyZXNDTCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltIDw9IDI4NTksIF0KIyMjIE5vdGE6IHNlIHJlZG9uZGXDsyBhIDI4NTkgcG9ycXVlIGVsIExTIGRpw7MgdW4gcmVzdWx0YWRvIGRlIDI4NTguODY1LgpzdW1tYXJ5KGJkX211amVyZXNDTCkKYGBgCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogIzY2Q0RBQTsiPjQuIEFzaWduYWNpw7NuIGRlIGdydXBvczwvc3Bhbj4KYGBge3J9CiMgTy4gTm9ybWFsaXphciB2YXJpYWJsZXMKYmRfbXVqZXJlc0NMIDwtIGFzLmRhdGEuZnJhbWUoc2NhbGUoYmRfbXVqZXJlc0NMKSkKCiMgMS4gQ3JlYXIgYmFzZSBkZSBkYXRvcwpiZG11amVyZXNDTFVTVEVSIDwtIGJkX211amVyZXNDTAoKIyAyLiBEZXRlcm1pbmFyIGVsIG7Dum1lcm8gZGUgZ3J1cG9zCmdydXBvcyA8LSAxMAoKIyAzLiBSZWFsaXphciBsYSBjbGFzaWZpY2FjacOzbgpzZWdtZW50b3MgPC0ga21lYW5zKGJkbXVqZXJlc0NMVVNURVIsZ3J1cG9zKQoKIyA0LiBSZXZpc2FyIGxhIGFzaWduYWNpw7NuIGRlIGdydXBvcwphc2lnbmFjaW9uIDwtIGNiaW5kKGJkbXVqZXJlc0NMVVNURVIsIGNsdXN0ZXI9c2VnbWVudG9zJGNsdXN0ZXIpCgojIDUuIEdyYWZpY2FyIGFzaWduYWNpb25lcwojIGluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQpsaWJyYXJ5KGdncGxvdDIpCiMgaW5zdGFsbC5wYWNrYWdlcygiZmFjdG9leHRyYSIpCmxpYnJhcnkoZmFjdG9leHRyYSkKCmZ2aXpfY2x1c3RlcihzZWdtZW50b3MsIGRhdGEgPSBiZG11amVyZXNDTFVTVEVSLAogICAgICAgICAgICAgcGFsZXR0ZSA9IGMoImRhcmtvcmNoaWQiLCAiZGFya29yYW5nZTIiLCAiYXF1YW1hcmluZTIiLCAicGluayIsICJibHVlIiwgImRhcmtvbGl2ZWdyZWVuMSIsICJzYWxtb24xIiwgInNreWJsdWUzIiwgInNsYXRlZ3JheTIiLCAieWVsbG93IiksCiAgICAgICAgICAgICBlbGxpcHNlLnR5cGUgPSAiZXVjbGlkIiwKICAgICAgICAgICAgIHN0YXIucGxvdCA9IFQsCiAgICAgICAgICAgICByZXBlbCA9IFQsCiAgICAgICAgICAgICBnZ3RoZW1lID0gdGhlbWUoKSkKCiMgNi4gT3B0aW1pemFyIGxhIGNhbnRpZGFkIGRlIGdydXBvcwpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZGF0YS50YWJsZSkKCnNldC5zZWVkKDEyMykKb3B0aW1pemFjaW9uIDwtIGNsdXNHYXAoYmRtdWplcmVzQ0xVU1RFUiwgRlVOID0ga21lYW5zLCBuc3RhcnQgPSAxLCBLLm1heCA9IDEwKSAKcGxvdChvcHRpbWl6YWNpb24sIHhsYWIgPSAiTsO6bWVybyBkZSBjbHVzdGVycyBLIikKYGBgCgojIyMgPHNwYW4gc3R5bGUgPSAiY29sb3I6ICM2NkNEQUE7Ij41LiBDb25jbHVzaW9uZXM8L3NwYW4+CkRhZG8gcXVlIGVsIHB1bnRvIG3DoXMgYWx0byBlbiBsYSBncsOhZmljYSBlcyAxMCwgZXNvIG5vcyBpbmRpY2EgcXVlIGxhIGNhbnRpZGFkCmRlIGdydXBvcyDDs3B0aW1vIGVzIDEwLgoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjQkYzRUZGOyI+w4FyYm9sIGRlIERlY2lzaW9uZXM8L3NwYW4+CiFbXSgvVXNlcnMvZGFubmFsZWFsL0Rlc2t0b3AvYXJib2xkZWNpc2lvbmVzLmdpZikgCgojIyMgPHNwYW4gc3R5bGUgPSAiY29sb3I6ICNCRjNFRkY7Ij4xLiBFeHRyYWVyIGxhcyB2YXJpYWJsZXMgZGUgaW50ZXLDqXM8L3NwYW4+CmBgYHtyfQptdWplcmVzYXJib2wgPC0gYmRfbXVqZXJlc0FSWyAsYygiVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSIsIkNsYWltU3RhdHVzIiwgIkNsYWltYW50QWdlX2F0X0RPSSIsIkdlbmRlciIsIkNsYWltYW50VHlwZSIsICJUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIildCgpzdW0oaXMubmEobXVqZXJlc2FyYm9sKSkKYGBgCgojIyMgPHNwYW4gc3R5bGUgPSAiY29sb3I6ICNCRjNFRkY7Ij4yLiBDb252ZXJzacOzbiBkZSB2YXJpYWJsZXMgY2F0ZWfDs3JpY2FzIGEgZmFjdG9yZXM8L3NwYW4+CmBgYHtyfQptdWplcmVzYXJib2wkVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSBhcy5udW1lcmljKG11amVyZXNhcmJvbCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltKQptdWplcmVzYXJib2wkQ2xhaW1TdGF0dXMgPC0gYXMuZmFjdG9yKG11amVyZXNhcmJvbCRDbGFpbVN0YXR1cykKbXVqZXJlc2FyYm9sJENsYWltYW50QWdlX2F0X0RPSSA8LSBhcy5udW1lcmljKG11amVyZXNhcmJvbCRDbGFpbWFudEFnZV9hdF9ET0kpCm11amVyZXNhcmJvbCRHZW5kZXIgPC0gYXMuZmFjdG9yKG11amVyZXNhcmJvbCRHZW5kZXIpCm11amVyZXNhcmJvbCRDbGFpbWFudFR5cGUgPC0gYXMuZmFjdG9yKG11amVyZXNhcmJvbCRDbGFpbWFudFR5cGUpCm11amVyZXNhcmJvbCRUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDwtIGFzLm51bWVyaWMobXVqZXJlc2FyYm9sJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMpCnN0cihtdWplcmVzYXJib2wpCmBgYAoKIyMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiAjQkYzRUZGOyI+My4gR2VuZXJhciBlbCDDoXJib2wgZGUgZGVjaXNpw7NuPC9zcGFuPgpgYGB7cn0KbGlicmFyeShycGFydCkKbGlicmFyeShycGFydC5wbG90KQoKYXJib2wgPC0gcnBhcnQoZm9ybXVsYT0gVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyB+IC4sIGRhdGE9IG11amVyZXNhcmJvbCkKYXJib2wKcnBhcnQucGxvdChhcmJvbCkKcHJwKGFyYm9sKQpgYGAKCgoKCgoKCgoKCgoKCgoK