Contexto

1. Importar y juntar bases de datos

# file.choose()
bd1 <- read.csv("/Users/dannaleal/Downloads/ClaimsData2018.csv")
bd2 <- read.csv("/Users/dannaleal/Downloads/TransactionsSummary2018.csv")
bd <- merge(bd1, bd2, by="ClaimID",all=TRUE)

2. Crear nueva columna para Total Incurred Cost

# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bd <- bd %>% 
  mutate(Total_Incurred_Cost_Claim = TotalReserves + IndemnityPaid + OtherPaid - TotalRecovery)

3. Filtrar base de datos por sólo mujeres

bd_mujeres1 <- subset(bd, Gender == "Female")
# View(bd_mujeres1)

4. Eliminar columas de X

library(dplyr)
bd_mujeres1 <- bd_mujeres1 %>%
  select(-X:-X.22)
# View(bd_mujeres1)

5. Descargar base de datos limpia como CSV

write.csv(bd_mujeres1, "bd_mujeres limpia.csv", row.names = FALSE)

Regresión Lineal

1. Importar la base de datos

# file.choose()
bd_mujereslimpia <- read.csv("/Users/dannaleal/Downloads/bd_mujeres limpia.csv")

2. Entender la base de datos

summary(bd_mujereslimpia)
##     ClaimID           TotalPaid         TotalReserves     TotalRecovery     
##  Min.   :  650919   Min.   :    -81.8   Min.   :      0   Min.   :    0.00  
##  1st Qu.:  806228   1st Qu.:     20.1   1st Qu.:      0   1st Qu.:    0.00  
##  Median :  833851   Median :    223.0   Median :      0   Median :    0.00  
##  Mean   : 8053898   Mean   :   6504.3   Mean   :   2423   Mean   :   31.13  
##  3rd Qu.: 7143280   3rd Qu.:    932.1   3rd Qu.:      0   3rd Qu.:    0.00  
##  Max.   :62203889   Max.   :2985247.9   Max.   :2069575   Max.   :90357.52  
##                                                                             
##  IndemnityPaid        OtherPaid         ClaimStatus        IncidentDate      
##  Min.   :    -1.2   Min.   :    -81.8   Length:59197       Length:59197      
##  1st Qu.:     0.0   1st Qu.:     16.4   Class :character   Class :character  
##  Median :     0.0   Median :    218.7   Mode  :character   Mode  :character  
##  Mean   :  2945.2   Mean   :   3559.1                                        
##  3rd Qu.:     0.0   3rd Qu.:    857.8                                        
##  Max.   :492934.8   Max.   :2700073.4                                        
##                                                                              
##  IncidentDescription ReturnToWorkDate   AverageWeeklyWage  ClaimantOpenedDate
##  Length:59197        Length:59197       Length:59197       Length:59197      
##  Class :character    Class :character   Class :character   Class :character  
##  Mode  :character    Mode  :character   Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  ClaimantClosedDate EmployerNotificationDate ReceivedDate      
##  Length:59197       Length:59197             Length:59197      
##  Class :character   Class :character         Class :character  
##  Mode  :character   Mode  :character         Mode  :character  
##                                                                
##                                                                
##                                                                
##                                                                
##     IsDenied       ClaimantAge_at_DOI    Gender          ClaimantType      
##  Min.   :0.00000   Length:59197       Length:59197       Length:59197      
##  1st Qu.:0.00000   Class :character   Class :character   Class :character  
##  Median :0.00000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.05688                                                           
##  3rd Qu.:0.00000                                                           
##  Max.   :1.00000                                                           
##                                                                            
##  InjuryNature       BodyPartRegion       BodyPart         BillReviewALE     
##  Length:59197       Length:59197       Length:59197       Min.   : -456.00  
##  Class :character   Class :character   Class :character   1st Qu.:    8.25  
##  Mode  :character   Mode  :character   Mode  :character   Median :   24.00  
##                                                           Mean   :  174.80  
##                                                           3rd Qu.:   64.00  
##                                                           Max.   :20730.77  
##                                                           NA's   :46628     
##     Hospital        PhysicianOutpatient       Rx          
##  Min.   :-12570.4   Min.   :   -162.9   Min.   :  -160.7  
##  1st Qu.:   203.1   1st Qu.:    106.8   1st Qu.:    23.4  
##  Median :   572.9   Median :    220.2   Median :    61.1  
##  Mean   :  4580.8   Mean   :   1700.4   Mean   :  1357.1  
##  3rd Qu.:  2213.5   3rd Qu.:    667.2   3rd Qu.:   176.5  
##  Max.   :667973.0   Max.   :1481468.5   Max.   :380924.3  
##  NA's   :49187      NA's   :34369       NA's   :49906     
##  Total_Incurred_Cost_Claim
##  Min.   :  -2961          
##  1st Qu.:     22          
##  Median :    226          
##  Mean   :   8897          
##  3rd Qu.:    976          
##  Max.   :5054823          
## 

3. Crear una nueva base de datos para las variables que se tomarán en cuenta para la regresión

library(dplyr)
# Crear una nueva base de datos con las columnas deseadas
bd_mujeresAR <- bd_mujereslimpia %>%
  select(ClaimID, ClaimStatus, ClaimantAge_at_DOI, Gender, ClaimantType, ClaimantOpenedDate, ClaimantClosedDate, Total_Incurred_Cost_Claim)
# View(bd_mujeresAR)

4. Convertir las fechas a días y crear nueva columna de TiempodeProcesamientoDias

bd_mujeresAR$ClaimantOpenedDate <- as.Date(bd_mujeresAR$ClaimantOpenedDate, format = "%m/%d/%y")
bd_mujeresAR$ClaimantClosedDate <- as.Date(bd_mujeresAR$ClaimantClosedDate, format = "%m/%d/%y")

# Calcular la diferencia en días entre las fechas
bd_mujeresAR$TiempoDeProcesamientoDias <- as.numeric(difftime(bd_mujeresAR$ClaimantClosedDate, bd_mujeresAR$ClaimantOpenedDate, units = "days"))

# Eliminar las columnas originales de fecha
bd_mujeresAR <- bd_mujeresAR[, !(names(bd_mujeresAR) %in% c("ClaimantOpenedDate", "ClaimantClosedDate"))]
# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus        ClaimantAge_at_DOI    Gender         
##  Min.   :  650919   Length:59197       Length:59197       Length:59197      
##  1st Qu.:  806228   Class :character   Class :character   Class :character  
##  Median :  833851   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 8053898                                                           
##  3rd Qu.: 7143280                                                           
##  Max.   :62203889                                                           
##                                                                             
##  ClaimantType       Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Length:59197       Min.   :  -2961           Min.   :-333.0           
##  Class :character   1st Qu.:     22           1st Qu.:   0.0           
##  Mode  :character   Median :    226           Median : 245.0           
##                     Mean   :   8897           Mean   : 806.1           
##                     3rd Qu.:    976           3rd Qu.: 920.0           
##                     Max.   :5054823           Max.   :6912.0           
##                                               NA's   :54104

5. Convertir las variables de carácter a númerico

bd_mujeresAR$ClaimStatus <- factor(bd_mujeresAR$ClaimStatus, levels = c("C", "O", "R"), labels = c(1, 2, 3))
bd_mujeresAR$ClaimantAge_at_DOI <- as.numeric(bd_mujeresAR$ClaimantAge_at_DOI)
## Warning: NAs introduced by coercion
bd_mujeresAR$Gender <- as.numeric(factor(bd_mujeresAR$Gender, levels = c("Male", "Female", "Not Provided"), labels = c(1, 2, 3)))
bd_mujeresAR$ClaimantType <- as.numeric(factor(bd_mujeresAR$ClaimantType, levels = c("Medical Only", "Indemnity", "Report Only"), labels = c(1, 2, 3)))

# View(bd_mujeresAR)

6. Eliminar NA’s

bd <- na.omit(bd_mujeresAR)
# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType  
##  Min.   :  650919   1:56900     Min.   :-8000.00   Min.   :2   Min.   :1.000  
##  1st Qu.:  806228   2: 1786     1st Qu.:   33.00   1st Qu.:2   1st Qu.:1.000  
##  Median :  833851   3:  511     Median :   43.00   Median :2   Median :1.000  
##  Mean   : 8053898               Mean   :   39.75   Mean   :2   Mean   :1.357  
##  3rd Qu.: 7143280               3rd Qu.:   52.00   3rd Qu.:2   3rd Qu.:2.000  
##  Max.   :62203889               Max.   :   89.00   Max.   :2   Max.   :3.000  
##                                 NA's   :17097                                 
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :  -2961           Min.   :-333.0           
##  1st Qu.:     22           1st Qu.:   0.0           
##  Median :    226           Median : 245.0           
##  Mean   :   8897           Mean   : 806.1           
##  3rd Qu.:    976           3rd Qu.: 920.0           
##  Max.   :5054823           Max.   :6912.0           
##                            NA's   :54104

7. Eliminar valores negativos en ClaimantAge_at_DOI, Total_Incurred_Cost_Claim y TiempoDeProcesamientoDias

# Eliminar valores negativos en ClaimantAge_at_DOI
bd_mujeresAR <- bd_mujeresAR %>%
  filter(ClaimantAge_at_DOI >= 0)
# Eliminar valores negativos en Total_Incurred_Cost_Claim
bd_mujeresAR <- bd_mujeresAR %>%
  filter(Total_Incurred_Cost_Claim >= 0)
# Eliminar valores negativos en TiempoDeProcesamientoDías
bd_mujeresAR <- bd_mujeresAR %>%
  filter(TiempoDeProcesamientoDias >= 0)

# View(bd_mujeresAR)
summary(bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType 
##  Min.   :  650919   1:4015      Min.   : 1.00      Min.   :2   Min.   :1.00  
##  1st Qu.:  823404   2:   3      1st Qu.:34.00      1st Qu.:2   1st Qu.:1.00  
##  Median : 5970814   3:   0      Median :44.00      Median :2   Median :1.00  
##  Mean   :15622363               Mean   :43.14      Mean   :2   Mean   :1.68  
##  3rd Qu.:30288888               3rd Qu.:52.00      3rd Qu.:2   3rd Qu.:2.00  
##  Max.   :61592860               Max.   :87.00      Max.   :2   Max.   :3.00  
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :     0.0          Min.   :   0.0           
##  1st Qu.:     0.0          1st Qu.:  33.0           
##  Median :   171.3          Median : 301.0           
##  Mean   :  4459.3          Mean   : 717.1           
##  3rd Qu.:  1125.3          3rd Qu.: 844.0           
##  Max.   :388620.8          Max.   :6912.0

8. Generar la primer regresión lineal

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimStatus + ClaimantAge_at_DOI + 
                Gender + ClaimantType + TiempoDeProcesamientoDias, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimStatus + ClaimantAge_at_DOI + 
##     Gender + ClaimantType + TiempoDeProcesamientoDias, data = bd_mujeresAR)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28757  -5382  -2119    625 359883 
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -8599.8188  1447.1614  -5.943 3.04e-09 ***
## ClaimStatus2               -145.6059 11808.9879  -0.012     0.99    
## ClaimantAge_at_DOI          117.6975    27.5373   4.274 1.96e-05 ***
## Gender                            NA         NA      NA       NA    
## ClaimantType               2881.0214   406.1079   7.094 1.53e-12 ***
## TiempoDeProcesamientoDias     4.3814     0.3117  14.055  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20440 on 4013 degrees of freedom
## Multiple R-squared:  0.0524, Adjusted R-squared:  0.05145 
## F-statistic: 55.48 on 4 and 4013 DF,  p-value: < 2.2e-16

9. Ajustar la regresión lineal

regresion <- lm(Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + ClaimantType + TiempoDeProcesamientoDias, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = Total_Incurred_Cost_Claim ~ ClaimantAge_at_DOI + 
##     ClaimantType + TiempoDeProcesamientoDias, data = bd_mujeresAR)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28757  -5382  -2119    625 359883 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -8600.2016  1446.6483  -5.945 3.00e-09 ***
## ClaimantAge_at_DOI          117.6993    27.5335   4.275 1.96e-05 ***
## ClaimantType               2881.1015   406.0053   7.096 1.51e-12 ***
## TiempoDeProcesamientoDias     4.3815     0.3116  14.061  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20440 on 4014 degrees of freedom
## Multiple R-squared:  0.0524, Adjusted R-squared:  0.05169 
## F-statistic: 73.99 on 3 and 4014 DF,  p-value: < 2.2e-16

10. Construir un modelo predictivo

datos <- data.frame(ClaimantAge_at_DOI = 43.14, ClaimantType = 1.68, 
                    TiempoDeProcesamientoDias = 717.1)
predict(regresion, datos)
##        1 
## 4459.569

11. Generar la segunda regresión lineal

regresion <- lm(TiempoDeProcesamientoDias ~ ClaimStatus + ClaimantAge_at_DOI + 
                Gender + ClaimantType + Total_Incurred_Cost_Claim, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimStatus + ClaimantAge_at_DOI + 
##     Gender + ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresAR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2718.6  -569.1  -289.9    50.1  6331.8 
## 
## Coefficients: (1 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.717e+03  6.655e+01  25.808   <2e-16 ***
## ClaimStatus2              -7.947e+02  5.837e+02  -1.362    0.173    
## ClaimantAge_at_DOI        -1.229e+01  1.351e+00  -9.103   <2e-16 ***
## Gender                            NA         NA      NA       NA    
## ClaimantType              -3.078e+02  1.961e+01 -15.697   <2e-16 ***
## Total_Incurred_Cost_Claim  1.071e-02  7.619e-04  14.055   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1010 on 4013 degrees of freedom
## Multiple R-squared:  0.1137, Adjusted R-squared:  0.1128 
## F-statistic: 128.7 on 4 and 4013 DF,  p-value: < 2.2e-16

12. Ajustar la regresión lineal

regresion <- lm(TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + ClaimantType + Total_Incurred_Cost_Claim, 
                data = bd_mujeresAR)
summary(regresion)
## 
## Call:
## lm(formula = TiempoDeProcesamientoDias ~ ClaimantAge_at_DOI + 
##     ClaimantType + Total_Incurred_Cost_Claim, data = bd_mujeresAR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2719.3  -569.3  -289.7    48.3  6332.3 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.716e+03  6.655e+01  25.789   <2e-16 ***
## ClaimantAge_at_DOI        -1.229e+01  1.351e+00  -9.099   <2e-16 ***
## ClaimantType              -3.075e+02  1.961e+01 -15.682   <2e-16 ***
## Total_Incurred_Cost_Claim  1.071e-02  7.620e-04  14.061   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1011 on 4014 degrees of freedom
## Multiple R-squared:  0.1133, Adjusted R-squared:  0.1126 
## F-statistic:   171 on 3 and 4014 DF,  p-value: < 2.2e-16

13. Construir un modelo predictivo

datos <- data.frame(ClaimantAge_at_DOI = 43.14, ClaimantType = 1.68, 
                    Total_Incurred_Cost_Claim = 4459.3)
predict(regresion, datos)
##        1 
## 717.0795

Conclusiones

Como se puede observar, en el segundo análisis de regresión se obtuvo una R cuadrada ajustada del 11.26%, mientras que en la primera se obtuvo una R cuadrada ajustada del 5.16%. Esto nos indica que el segundo análisis de regresión se ajusta mejor al modelo de los datos.

Análisis Clusters

1. Entender la base de datos

summary (bd_mujeresAR)
##     ClaimID         ClaimStatus ClaimantAge_at_DOI     Gender   ClaimantType 
##  Min.   :  650919   1:4015      Min.   : 1.00      Min.   :2   Min.   :1.00  
##  1st Qu.:  823404   2:   3      1st Qu.:34.00      1st Qu.:2   1st Qu.:1.00  
##  Median : 5970814   3:   0      Median :44.00      Median :2   Median :1.00  
##  Mean   :15622363               Mean   :43.14      Mean   :2   Mean   :1.68  
##  3rd Qu.:30288888               3rd Qu.:52.00      3rd Qu.:2   3rd Qu.:2.00  
##  Max.   :61592860               Max.   :87.00      Max.   :2   Max.   :3.00  
##  Total_Incurred_Cost_Claim TiempoDeProcesamientoDias
##  Min.   :     0.0          Min.   :   0.0           
##  1st Qu.:     0.0          1st Qu.:  33.0           
##  Median :   171.3          Median : 301.0           
##  Mean   :  4459.3          Mean   : 717.1           
##  3rd Qu.:  1125.3          3rd Qu.: 844.0           
##  Max.   :388620.8          Max.   :6912.0

2. Identificar outliers

boxplot(bd_mujeresAR$Total_Incurred_Cost_Claim, horizontal = TRUE)

boxplot(bd_mujeresAR$TiempoDeProcesamientoDias, horizontal = TRUE)

3. Crear una nueva base de datos con ClaimID, TiempoDeProcesamientoDias y Total_Incurred_Cost_Claim y eliminar datos fuera de lo normal

# Crear una nueva base de datos con las columnas deseadas
bd_mujeresCL <- bd_mujeresAR[, c("ClaimID", "TiempoDeProcesamientoDias", "Total_Incurred_Cost_Claim")]

# Llamar a los renglones como ClaimID
rownames(bd_mujeresCL) <- bd_mujeresCL$ClaimID
bd_mujeresCL <- subset(bd_mujeresCL, select = -c(ClaimID))
# View(bd_mujeresCL)

# Columna de TiempoDeProcesamientoDias
IQR_TiempoDeProcesamientoDias <- IQR(bd_mujeresCL$TiempoDeProcesamientoDias)
IQR_TiempoDeProcesamientoDias
## [1] 811
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:  33.0            1st Qu.:     0.0         
##  Median : 301.0            Median :   171.3         
##  Mean   : 717.1            Mean   :  4459.3         
##  3rd Qu.: 844.0            3rd Qu.:  1125.3         
##  Max.   :6912.0            Max.   :388620.8
LI_TiempoDeProcesamientoDias <- 33 - 1.5*IQR_TiempoDeProcesamientoDias
LI_TiempoDeProcesamientoDias
## [1] -1183.5
LS_TiempoDeProcesamientoDias <- 844 + 1.5*IQR_TiempoDeProcesamientoDias
LS_TiempoDeProcesamientoDias
## [1] 2060.5
cat("LI_TiempoDeProcesamientoDias:", LI_TiempoDeProcesamientoDias, "\n")
## LI_TiempoDeProcesamientoDias: -1183.5
cat("LS_TiempoDeProcesamientoDias:", LS_TiempoDeProcesamientoDias, "\n")
## LS_TiempoDeProcesamientoDias: 2060.5
bd_mujeresCL <- bd_mujeresCL[bd_mujeresCL$TiempoDeProcesamientoDias <= 2061, ]
### Nota: se redondeó a 2061 porque el LS dió un resultado de 2060.5.

#Columna de Total_Incurred_Cost_Claim
IQR_Total_Incurred_Cost_Claim <- IQR(bd_mujeresCL$Total_Incurred_Cost_Claim)
IQR_Total_Incurred_Cost_Claim
## [1] 1155.71
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :     0.0         
##  1st Qu.:   4.0            1st Qu.:     0.0         
##  Median : 235.0            Median :   177.8         
##  Mean   : 398.6            Mean   :  3707.7         
##  3rd Qu.: 548.0            3rd Qu.:  1155.7         
##  Max.   :2058.0            Max.   :246847.9
LI_Total_Incurred_Cost_Claim <- 0 - 1.5*IQR_Total_Incurred_Cost_Claim
LI_Total_Incurred_Cost_Claim
## [1] -1733.565
LS_Total_Incurred_Cost_Claim <- 1125.3 + 1.5*IQR_Total_Incurred_Cost_Claim
LS_Total_Incurred_Cost_Claim
## [1] 2858.865
cat("LI_Total_Incurred_Cost_Claim:", LI_Total_Incurred_Cost_Claim, "\n")
## LI_Total_Incurred_Cost_Claim: -1733.565
cat("LS_Total_Incurred_Cost_Claim:", LS_Total_Incurred_Cost_Claim, "\n")
## LS_Total_Incurred_Cost_Claim: 2858.865
bd_mujeresCL <- bd_mujeresCL[bd_mujeresCL$Total_Incurred_Cost_Claim <= 2859, ]
### Nota: se redondeó a 2859 porque el LS dió un resultado de 2858.865.
summary(bd_mujeresCL)
##  TiempoDeProcesamientoDias Total_Incurred_Cost_Claim
##  Min.   :   0.0            Min.   :   0.00          
##  1st Qu.:   0.0            1st Qu.:   0.00          
##  Median : 175.0            Median :  98.62          
##  Mean   : 337.7            Mean   : 372.37          
##  3rd Qu.: 460.5            3rd Qu.: 428.27          
##  Max.   :2058.0            Max.   :2851.58

4. Asignación de grupos

# O. Normalizar variables
bd_mujeresCL <- as.data.frame(scale(bd_mujeresCL))

# 1. Crear base de datos
bdmujeresCLUSTER <- bd_mujeresCL

# 2. Determinar el número de grupos
grupos <- 10

# 3. Realizar la clasificación
segmentos <- kmeans(bdmujeresCLUSTER,grupos)

# 4. Revisar la asignación de grupos
asignacion <- cbind(bdmujeresCLUSTER, cluster=segmentos$cluster)

# 5. Graficar asignaciones
# install.packages("ggplot2")
library(ggplot2)
# install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data = bdmujeresCLUSTER,
             palette = c("darkorchid", "darkorange2", "aquamarine2", "pink", "blue", "darkolivegreen1", "salmon1", "skyblue3", "slategray2", "yellow"),
             ellipse.type = "euclid",
             star.plot = T,
             repel = T,
             ggtheme = theme())

# 6. Optimizar la cantidad de grupos
library(cluster)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
set.seed(123)
optimizacion <- clusGap(bdmujeresCLUSTER, FUN = kmeans, nstart = 1, K.max = 10) 
## Warning: did not converge in 10 iterations
plot(optimizacion, xlab = "Número de clusters K")

5. Conclusiones

Dado que el punto más alto en la gráfica es 10, eso nos indica que la cantidad de grupos óptimo es 10.

LS0tCnRpdGxlOiAiTTQgQWN0aXZpZGFkIE1lZGlvIFTDqXJtaW5vIgphdXRob3I6ICJEYW5uYSBMZWFsIEEwMDgzMTY5OCIKZGF0ZTogIjI4LzkvMjAyMyIKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiAieWV0aSIKICAgIGhpZ2hsaWdodDogInRhbmdvIgotLS0KIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij5Db250ZXh0bzwvc3Bhbj4KIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjEuIEltcG9ydGFyIHkganVudGFyIGJhc2VzIGRlIGRhdG9zPC9zcGFuPgpgYGB7cn0KIyBmaWxlLmNob29zZSgpCmJkMSA8LSByZWFkLmNzdigiL1VzZXJzL2Rhbm5hbGVhbC9Eb3dubG9hZHMvQ2xhaW1zRGF0YTIwMTguY3N2IikKYmQyIDwtIHJlYWQuY3N2KCIvVXNlcnMvZGFubmFsZWFsL0Rvd25sb2Fkcy9UcmFuc2FjdGlvbnNTdW1tYXJ5MjAxOC5jc3YiKQpiZCA8LSBtZXJnZShiZDEsIGJkMiwgYnk9IkNsYWltSUQiLGFsbD1UUlVFKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij4yLiBDcmVhciBudWV2YSBjb2x1bW5hIHBhcmEgVG90YWwgSW5jdXJyZWQgQ29zdDwvc3Bhbj4KYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQpsaWJyYXJ5KGRwbHlyKQoKYmQgPC0gYmQgJT4lIAogIG11dGF0ZShUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltID0gVG90YWxSZXNlcnZlcyArIEluZGVtbml0eVBhaWQgKyBPdGhlclBhaWQgLSBUb3RhbFJlY292ZXJ5KQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij4zLiBGaWx0cmFyIGJhc2UgZGUgZGF0b3MgcG9yIHPDs2xvIG11amVyZXM8L3NwYW4+CmBgYHtyfQpiZF9tdWplcmVzMSA8LSBzdWJzZXQoYmQsIEdlbmRlciA9PSAiRmVtYWxlIikKIyBWaWV3KGJkX211amVyZXMxKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij40LiBFbGltaW5hciBjb2x1bWFzIGRlIFg8L3NwYW4+CmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpiZF9tdWplcmVzMSA8LSBiZF9tdWplcmVzMSAlPiUKICBzZWxlY3QoLVg6LVguMjIpCmBgYAoKYGBge3J9CiMgVmlldyhiZF9tdWplcmVzMSkKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+NS4gRGVzY2FyZ2FyIGJhc2UgZGUgZGF0b3MgbGltcGlhIGNvbW8gQ1NWPC9zcGFuPgpgYGB7cn0Kd3JpdGUuY3N2KGJkX211amVyZXMxLCAiYmRfbXVqZXJlcyBsaW1waWEuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpCmBgYAoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+UmVncmVzacOzbiBMaW5lYWw8L3NwYW4+CiFbXSgvVXNlcnMvZGFubmFsZWFsL0Rlc2t0b3AvYW5hbHlzaXNnaWYuZ2lmKSAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xLiBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zPC9zcGFuPgpgYGB7cn0KIyBmaWxlLmNob29zZSgpCmJkX211amVyZXNsaW1waWEgPC0gcmVhZC5jc3YoIi9Vc2Vycy9kYW5uYWxlYWwvRG93bmxvYWRzL2JkX211amVyZXMgbGltcGlhLmNzdiIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjIuIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQpzdW1tYXJ5KGJkX211amVyZXNsaW1waWEpCmBgYAojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+My4gQ3JlYXIgdW5hIG51ZXZhIGJhc2UgZGUgZGF0b3MgcGFyYSBsYXMgdmFyaWFibGVzIHF1ZSBzZSB0b21hcsOhbiBlbiBjdWVudGEgcGFyYSBsYSByZWdyZXNpw7NuPC9zcGFuPgpgYGB7cn0KbGlicmFyeShkcGx5cikKIyBDcmVhciB1bmEgbnVldmEgYmFzZSBkZSBkYXRvcyBjb24gbGFzIGNvbHVtbmFzIGRlc2VhZGFzCmJkX211amVyZXNBUiA8LSBiZF9tdWplcmVzbGltcGlhICU+JQogIHNlbGVjdChDbGFpbUlELCBDbGFpbVN0YXR1cywgQ2xhaW1hbnRBZ2VfYXRfRE9JLCBHZW5kZXIsIENsYWltYW50VHlwZSwgQ2xhaW1hbnRPcGVuZWREYXRlLCBDbGFpbWFudENsb3NlZERhdGUsIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0pCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjQuIENvbnZlcnRpciBsYXMgZmVjaGFzIGEgZMOtYXMgeSBjcmVhciBudWV2YSBjb2x1bW5hIGRlIFRpZW1wb2RlUHJvY2VzYW1pZW50b0RpYXM8L3NwYW4+CmBgYHtyfQpiZF9tdWplcmVzQVIkQ2xhaW1hbnRPcGVuZWREYXRlIDwtIGFzLkRhdGUoYmRfbXVqZXJlc0FSJENsYWltYW50T3BlbmVkRGF0ZSwgZm9ybWF0ID0gIiVtLyVkLyV5IikKYmRfbXVqZXJlc0FSJENsYWltYW50Q2xvc2VkRGF0ZSA8LSBhcy5EYXRlKGJkX211amVyZXNBUiRDbGFpbWFudENsb3NlZERhdGUsIGZvcm1hdCA9ICIlbS8lZC8leSIpCgojIENhbGN1bGFyIGxhIGRpZmVyZW5jaWEgZW4gZMOtYXMgZW50cmUgbGFzIGZlY2hhcwpiZF9tdWplcmVzQVIkVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSBhcy5udW1lcmljKGRpZmZ0aW1lKGJkX211amVyZXNBUiRDbGFpbWFudENsb3NlZERhdGUsIGJkX211amVyZXNBUiRDbGFpbWFudE9wZW5lZERhdGUsIHVuaXRzID0gImRheXMiKSkKCiMgRWxpbWluYXIgbGFzIGNvbHVtbmFzIG9yaWdpbmFsZXMgZGUgZmVjaGEKYmRfbXVqZXJlc0FSIDwtIGJkX211amVyZXNBUlssICEobmFtZXMoYmRfbXVqZXJlc0FSKSAlaW4lIGMoIkNsYWltYW50T3BlbmVkRGF0ZSIsICJDbGFpbWFudENsb3NlZERhdGUiKSldCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKYGBge3J9CnN1bW1hcnkoYmRfbXVqZXJlc0FSKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij41LiBDb252ZXJ0aXIgbGFzIHZhcmlhYmxlcyBkZSBjYXLDoWN0ZXIgYSBuw7ptZXJpY288L3NwYW4+CmBgYHtyfQpiZF9tdWplcmVzQVIkQ2xhaW1TdGF0dXMgPC0gZmFjdG9yKGJkX211amVyZXNBUiRDbGFpbVN0YXR1cywgbGV2ZWxzID0gYygiQyIsICJPIiwgIlIiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkKYmRfbXVqZXJlc0FSJENsYWltYW50QWdlX2F0X0RPSSA8LSBhcy5udW1lcmljKGJkX211amVyZXNBUiRDbGFpbWFudEFnZV9hdF9ET0kpCmJkX211amVyZXNBUiRHZW5kZXIgPC0gYXMubnVtZXJpYyhmYWN0b3IoYmRfbXVqZXJlc0FSJEdlbmRlciwgbGV2ZWxzID0gYygiTWFsZSIsICJGZW1hbGUiLCAiTm90IFByb3ZpZGVkIiksIGxhYmVscyA9IGMoMSwgMiwgMykpKQpiZF9tdWplcmVzQVIkQ2xhaW1hbnRUeXBlIDwtIGFzLm51bWVyaWMoZmFjdG9yKGJkX211amVyZXNBUiRDbGFpbWFudFR5cGUsIGxldmVscyA9IGMoIk1lZGljYWwgT25seSIsICJJbmRlbW5pdHkiLCAiUmVwb3J0IE9ubHkiKSwgbGFiZWxzID0gYygxLCAyLCAzKSkpCgojIFZpZXcoYmRfbXVqZXJlc0FSKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij42LiBFbGltaW5hciBOQSdzPC9zcGFuPgpgYGB7cn0KYmQgPC0gbmEub21pdChiZF9tdWplcmVzQVIpCiMgVmlldyhiZF9tdWplcmVzQVIpCmBgYAoKYGBge3J9CnN1bW1hcnkoYmRfbXVqZXJlc0FSKQpgYGAKCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+Ny4gRWxpbWluYXIgdmFsb3JlcyBuZWdhdGl2b3MgZW4gQ2xhaW1hbnRBZ2VfYXRfRE9JLCAgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB5IFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXM8L3NwYW4+CmBgYHtyfQojIEVsaW1pbmFyIHZhbG9yZXMgbmVnYXRpdm9zIGVuIENsYWltYW50QWdlX2F0X0RPSQpiZF9tdWplcmVzQVIgPC0gYmRfbXVqZXJlc0FSICU+JQogIGZpbHRlcihDbGFpbWFudEFnZV9hdF9ET0kgPj0gMCkKIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltCmJkX211amVyZXNBUiA8LSBiZF9tdWplcmVzQVIgJT4lCiAgZmlsdGVyKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPj0gMCkKIyBFbGltaW5hciB2YWxvcmVzIG5lZ2F0aXZvcyBlbiBUaWVtcG9EZVByb2Nlc2FtaWVudG9Ew61hcwpiZF9tdWplcmVzQVIgPC0gYmRfbXVqZXJlc0FSICU+JQogIGZpbHRlcihUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzID49IDApCgojIFZpZXcoYmRfbXVqZXJlc0FSKQpzdW1tYXJ5KGJkX211amVyZXNBUikKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+OC4gR2VuZXJhciBsYSBwcmltZXIgcmVncmVzacOzbiBsaW5lYWw8L3NwYW4+CmBgYHtyfQpyZWdyZXNpb24gPC0gbG0oVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB+IENsYWltU3RhdHVzICsgQ2xhaW1hbnRBZ2VfYXRfRE9JICsgCiAgICAgICAgICAgICAgICBHZW5kZXIgKyBDbGFpbWFudFR5cGUgKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij45LiBBanVzdGFyIGxhIHJlZ3Jlc2nDs24gbGluZWFsPC9zcGFuPgpgYGB7cn0KcmVncmVzaW9uIDwtIGxtKFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gfiBDbGFpbWFudEFnZV9hdF9ET0kgKyBDbGFpbWFudFR5cGUgKyBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xMC4gQ29uc3RydWlyIHVuIG1vZGVsbyBwcmVkaWN0aXZvPC9zcGFuPgpgYGB7cn0KZGF0b3MgPC0gZGF0YS5mcmFtZShDbGFpbWFudEFnZV9hdF9ET0kgPSA0My4xNCwgQ2xhaW1hbnRUeXBlID0gMS42OCwgCiAgICAgICAgICAgICAgICAgICAgVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA9IDcxNy4xKQpwcmVkaWN0KHJlZ3Jlc2lvbiwgZGF0b3MpCmBgYAoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjogI0ZGMTQ5MzsiPjExLiBHZW5lcmFyIGxhIHNlZ3VuZGEgcmVncmVzacOzbiBsaW5lYWw8L3NwYW4+CmBgYHtyfQpyZWdyZXNpb24gPC0gbG0oVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyB+IENsYWltU3RhdHVzICsgQ2xhaW1hbnRBZ2VfYXRfRE9JICsgCiAgICAgICAgICAgICAgICBHZW5kZXIgKyBDbGFpbWFudFR5cGUgKyBUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltLCAKICAgICAgICAgICAgICAgIGRhdGEgPSBiZF9tdWplcmVzQVIpCnN1bW1hcnkocmVncmVzaW9uKQpgYGAKCiMjIyA8c3BhbiBzdHlsZT0iY29sb3I6ICNGRjE0OTM7Ij4xMi4gQWp1c3RhciBsYSByZWdyZXNpw7NuIGxpbmVhbDwvc3Bhbj4KYGBge3J9CnJlZ3Jlc2lvbiA8LSBsbShUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIH4gQ2xhaW1hbnRBZ2VfYXRfRE9JICsgQ2xhaW1hbnRUeXBlICsgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgCiAgICAgICAgICAgICAgICBkYXRhID0gYmRfbXVqZXJlc0FSKQpzdW1tYXJ5KHJlZ3Jlc2lvbikKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+MTMuIENvbnN0cnVpciB1biBtb2RlbG8gcHJlZGljdGl2bzwvc3Bhbj4KYGBge3J9CmRhdG9zIDwtIGRhdGEuZnJhbWUoQ2xhaW1hbnRBZ2VfYXRfRE9JID0gNDMuMTQsIENsYWltYW50VHlwZSA9IDEuNjgsIAogICAgICAgICAgICAgICAgICAgIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPSA0NDU5LjMpCnByZWRpY3QocmVncmVzaW9uLCBkYXRvcykKYGBgCgojIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiAjRkYxNDkzOyI+Q29uY2x1c2lvbmVzPC9zcGFuPgpDb21vIHNlIHB1ZWRlIG9ic2VydmFyLCBlbiBlbCBzZWd1bmRvIGFuw6FsaXNpcyBkZSByZWdyZXNpw7NuIHNlIG9idHV2byB1bmEgClIgY3VhZHJhZGEgYWp1c3RhZGEgZGVsIDExLjI2JSwgbWllbnRyYXMgcXVlIGVuIGxhIHByaW1lcmEgc2Ugb2J0dXZvIHVuYSAKUiBjdWFkcmFkYSBhanVzdGFkYSBkZWwgNS4xNiUuIEVzdG8gbm9zIGluZGljYSBxdWUgZWwgc2VndW5kbyBhbsOhbGlzaXMgZGUgCnJlZ3Jlc2nDs24gc2UgYWp1c3RhIG1lam9yIGFsIG1vZGVsbyBkZSBsb3MgZGF0b3MuCgoKIyA8c3BhbiBzdHlsZT0iY29sb3I6ICM2NkNEQUE7Ij5BbsOhbGlzaXMgQ2x1c3RlcnM8L3NwYW4+CiFbXSgvVXNlcnMvZGFubmFsZWFsL0Rlc2t0b3AvY2x1c3RlcmdpZi5naWYpIAoKIyMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiAjNjZDREFBOyI+MS4gRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvczwvc3Bhbj4KCmBgYHtyfQpzdW1tYXJ5IChiZF9tdWplcmVzQVIpCmBgYAoKIyMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiAjNjZDREFBOyI+Mi4gSWRlbnRpZmljYXIgb3V0bGllcnM8L3NwYW4+IApgYGB7cn0KYm94cGxvdChiZF9tdWplcmVzQVIkVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgaG9yaXpvbnRhbCA9IFRSVUUpCmJveHBsb3QoYmRfbXVqZXJlc0FSJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogIzY2Q0RBQTsiPjMuIENyZWFyIHVuYSBudWV2YSBiYXNlIGRlIGRhdG9zIGNvbiBDbGFpbUlELCBUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIHkgVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSB5IGVsaW1pbmFyIGRhdG9zIGZ1ZXJhIGRlIGxvIG5vcm1hbDwvc3Bhbj4KYGBge3J9CiMgQ3JlYXIgdW5hIG51ZXZhIGJhc2UgZGUgZGF0b3MgY29uIGxhcyBjb2x1bW5hcyBkZXNlYWRhcwpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0FSWywgYygiQ2xhaW1JRCIsICJUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIiwgIlRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0iKV0KCiMgTGxhbWFyIGEgbG9zIHJlbmdsb25lcyBjb21vIENsYWltSUQKcm93bmFtZXMoYmRfbXVqZXJlc0NMKSA8LSBiZF9tdWplcmVzQ0wkQ2xhaW1JRApiZF9tdWplcmVzQ0wgPC0gc3Vic2V0KGJkX211amVyZXNDTCwgc2VsZWN0ID0gLWMoQ2xhaW1JRCkpCiMgVmlldyhiZF9tdWplcmVzQ0wpCgojIENvbHVtbmEgZGUgVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcwpJUVJfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSBJUVIoYmRfbXVqZXJlc0NMJFRpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMpCklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzCnN1bW1hcnkoYmRfbXVqZXJlc0NMKQpMSV9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDwtIDMzIC0gMS41KklRUl9UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzCkxJX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMKTFNfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcyA8LSA4NDQgKyAxLjUqSVFSX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMKTFNfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcwpjYXQoIkxJX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXM6IiwgTElfVGllbXBvRGVQcm9jZXNhbWllbnRvRGlhcywgIlxuIikKY2F0KCJMU19UaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzOiIsIExTX1RpZW1wb0RlUHJvY2VzYW1pZW50b0RpYXMsICJcbiIpCgpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0NMW2JkX211amVyZXNDTCRUaWVtcG9EZVByb2Nlc2FtaWVudG9EaWFzIDw9IDIwNjEsIF0KIyMjIE5vdGE6IHNlIHJlZG9uZGXDsyBhIDIwNjEgcG9ycXVlIGVsIExTIGRpw7MgdW4gcmVzdWx0YWRvIGRlIDIwNjAuNS4KCiNDb2x1bW5hIGRlIFRvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KSVFSX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0gPC0gSVFSKGJkX211amVyZXNDTCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltKQpJUVJfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQpzdW1tYXJ5KGJkX211amVyZXNDTCkKTElfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSAwIC0gMS41KklRUl9Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltCkxJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KTFNfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSA8LSAxMTI1LjMgKyAxLjUqSVFSX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0KTFNfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbQpjYXQoIkxJX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW06IiwgTElfVG90YWxfSW5jdXJyZWRfQ29zdF9DbGFpbSwgIlxuIikKY2F0KCJMU19Ub3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltOiIsIExTX1RvdGFsX0luY3VycmVkX0Nvc3RfQ2xhaW0sICJcbiIpCgpiZF9tdWplcmVzQ0wgPC0gYmRfbXVqZXJlc0NMW2JkX211amVyZXNDTCRUb3RhbF9JbmN1cnJlZF9Db3N0X0NsYWltIDw9IDI4NTksIF0KIyMjIE5vdGE6IHNlIHJlZG9uZGXDsyBhIDI4NTkgcG9ycXVlIGVsIExTIGRpw7MgdW4gcmVzdWx0YWRvIGRlIDI4NTguODY1LgpzdW1tYXJ5KGJkX211amVyZXNDTCkKYGBgCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogIzY2Q0RBQTsiPjQuIEFzaWduYWNpw7NuIGRlIGdydXBvczwvc3Bhbj4KYGBge3J9CiMgTy4gTm9ybWFsaXphciB2YXJpYWJsZXMKYmRfbXVqZXJlc0NMIDwtIGFzLmRhdGEuZnJhbWUoc2NhbGUoYmRfbXVqZXJlc0NMKSkKCiMgMS4gQ3JlYXIgYmFzZSBkZSBkYXRvcwpiZG11amVyZXNDTFVTVEVSIDwtIGJkX211amVyZXNDTAoKIyAyLiBEZXRlcm1pbmFyIGVsIG7Dum1lcm8gZGUgZ3J1cG9zCmdydXBvcyA8LSAxMAoKIyAzLiBSZWFsaXphciBsYSBjbGFzaWZpY2FjacOzbgpzZWdtZW50b3MgPC0ga21lYW5zKGJkbXVqZXJlc0NMVVNURVIsZ3J1cG9zKQoKIyA0LiBSZXZpc2FyIGxhIGFzaWduYWNpw7NuIGRlIGdydXBvcwphc2lnbmFjaW9uIDwtIGNiaW5kKGJkbXVqZXJlc0NMVVNURVIsIGNsdXN0ZXI9c2VnbWVudG9zJGNsdXN0ZXIpCgojIDUuIEdyYWZpY2FyIGFzaWduYWNpb25lcwojIGluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQpsaWJyYXJ5KGdncGxvdDIpCiMgaW5zdGFsbC5wYWNrYWdlcygiZmFjdG9leHRyYSIpCmxpYnJhcnkoZmFjdG9leHRyYSkKCmZ2aXpfY2x1c3RlcihzZWdtZW50b3MsIGRhdGEgPSBiZG11amVyZXNDTFVTVEVSLAogICAgICAgICAgICAgcGFsZXR0ZSA9IGMoImRhcmtvcmNoaWQiLCAiZGFya29yYW5nZTIiLCAiYXF1YW1hcmluZTIiLCAicGluayIsICJibHVlIiwgImRhcmtvbGl2ZWdyZWVuMSIsICJzYWxtb24xIiwgInNreWJsdWUzIiwgInNsYXRlZ3JheTIiLCAieWVsbG93IiksCiAgICAgICAgICAgICBlbGxpcHNlLnR5cGUgPSAiZXVjbGlkIiwKICAgICAgICAgICAgIHN0YXIucGxvdCA9IFQsCiAgICAgICAgICAgICByZXBlbCA9IFQsCiAgICAgICAgICAgICBnZ3RoZW1lID0gdGhlbWUoKSkKCiMgNi4gT3B0aW1pemFyIGxhIGNhbnRpZGFkIGRlIGdydXBvcwpsaWJyYXJ5KGNsdXN0ZXIpCmxpYnJhcnkoZGF0YS50YWJsZSkKCnNldC5zZWVkKDEyMykKb3B0aW1pemFjaW9uIDwtIGNsdXNHYXAoYmRtdWplcmVzQ0xVU1RFUiwgRlVOID0ga21lYW5zLCBuc3RhcnQgPSAxLCBLLm1heCA9IDEwKSAKcGxvdChvcHRpbWl6YWNpb24sIHhsYWIgPSAiTsO6bWVybyBkZSBjbHVzdGVycyBLIikKYGBgCgojIyMgPHNwYW4gc3R5bGUgPSAiY29sb3I6ICM2NkNEQUE7Ij41LiBDb25jbHVzaW9uZXM8L3NwYW4+CkRhZG8gcXVlIGVsIHB1bnRvIG3DoXMgYWx0byBlbiBsYSBncsOhZmljYSBlcyAxMCwgZXNvIG5vcyBpbmRpY2EgcXVlIGxhIGNhbnRpZGFkCmRlIGdydXBvcyDDs3B0aW1vIGVzIDEwLg==