Motivations

According to an article by New York Times:

Data scientists, according to interviews and expert estimates, spend from 50 percent to 80 percent of their time mired in this more mundane labor of collecting and preparing unruly digital data, before it can be explored for useful nuggets.

A case from Kalapa Credit Scoring Challenge

# Clear workspace: 
rm(list = ls())

# load data: 
library(tidyverse)
library(stringi)
library(knitr)

read_csv("C:/Users/Admin/Documents/train.csv") -> trainData
read_csv("C:/Users/Admin/Documents/test.csv") -> testData

# Check data type for train and test: 

data.frame(fromTrain = sapply(trainData %>% select(-label), class), fromTest = sapply(testData, class)) -> df_dataType
df_dataType %>% mutate(col_name = row.names(.)) -> df_dataType

kable(df_dataType %>% slice(1:6))

df_dataType %>% 
  filter(fromTrain != fromTest) %>% 
  kable()

trainData$FIELD_36 %>% unique()
testData$FIELD_36 %>% unique()


# Convert to logical: 

testData %>% 
  mutate(FIELD_36 = case_when(FIELD_36 == "FALSE" ~ FALSE, FIELD_36 == "TRUE" ~ TRUE, TRUE ~ NA)) -> testData

# Convert all string columns to lower: 

trainData %>% mutate_if(is.character, function(x) {str_to_lower(x) %>% stri_trans_general("Latin-ASCII")}) -> trainData
testData %>% mutate_if(is.character, function(x) {str_to_lower(x) %>% stri_trans_general("Latin-ASCII")}) -> testData


# Check intersection: 

intersect_between <- function(col_name) {
  
  from_train <- trainData %>% pull(col_name) %>% unique()
  from_test <- testData %>% pull(col_name) %>% unique() 
  
  condition <- sum(length(from_test)) == sum(length(from_train))
  
  if (condition) {
    conclusion <- "yes"
  } else {
    conclusion <- "no"
  }
  
  return(data.frame(feature = col_name, in_train = conclusion))
  
}


# All categorical features: 
trainData %>% select_if(is.character) %>% names() -> all_categoricals


# Use the function: 
do.call("bind_rows", lapply(all_categoricals, intersect_between)) -> df_check_diff

df_check_diff %>% filter(in_train == "no") -> df_diff

df_diff %>% kable()

features_diff <- df_diff$feature

# An example: 

sapply(trainData %>% select(features_diff), n_distinct)
sapply(testData %>% select(features_diff), n_distinct)

# Function harmonizes for train: 

harmonize_for_train <- function(col) {
  
  from_Train <- trainData[, col, drop = TRUE]
  
  from_Test <- testData[, col, drop = TRUE] 
  
  dplyr::intersect(from_Train, from_Test) -> intersections
  
  new_values_forTrain <- case_when(from_Train %in% intersections ~ from_Train, TRUE ~ NA_character_)
  
  # new_values_forTest <- case_when(from_Test %in% intersections ~ from_Test, TRUE ~ NA_character_)
  
  return(new_values_forTrain)
  
}

# Function harmonizes for test: 

harmonize_for_test <- function(col) {
  
  from_Train <- trainData[, col, drop = TRUE]
  
  from_Test <- testData[, col, drop = TRUE] 
  
  dplyr::intersect(from_Train, from_Test) -> intersections
  
  # new_values_forTrain <- case_when(from_Train %in% intersections ~ from_Train, TRUE ~ NA_character_)
  
  new_values_forTest <- case_when(from_Test %in% intersections ~ from_Test, TRUE ~ NA_character_)
  
  return(new_values_forTest)
  
}



sapply(features_diff, harmonize_for_train) %>% data.frame() -> df_harmonized_train

sapply(features_diff, harmonize_for_test) %>% data.frame() -> df_harmonized_test


trainData %>% select(-features_diff) %>% bind_cols(df_harmonized_train) -> trainData
testData %>% select(-features_diff) %>% bind_cols(df_harmonized_test) -> testData

# Check: 

sapply(trainData %>% select(features_diff), n_distinct)
sapply(testData %>% select(features_diff), n_distinct)

# Convert logical to integer: 

trainData %>% mutate_if(is.logical, as.integer) -> trainData

testData %>% mutate_if(is.logical, as.integer) -> testData

# Combine train and test set: 

totalData <- bind_rows(trainData, testData)


# Convert Nan to missing: 

sum(is.nan(totalData$FIELD_54))


totalData %>% 
  mutate_if(is.double, function(x) {case_when(is.nan(x) ~ NA_real_, TRUE ~ x)}) %>% 
  mutate_if(is.integer, function(x) {case_when(is.nan(x) ~ NA_integer_, TRUE ~ x)}) -> totalData

# Check again: 
sum(is.nan(totalData$FIELD_54))

# Convert "none" to missing for categoricals: 

totalData %>% mutate_if(is.character, function(x) {case_when(x == "none" ~ NA_character_, TRUE ~ x)}) -> totalData


# Some special culumns: 

logical_condition <- sapply(totalData %>% select_if(is.character), function(x) {sum(x %in% c("true", "false"))}) > 0
all_columns <- names(totalData %>% select_if(is.character))
var_logical_character <- all_columns[logical_condition]

totalData %>% 
  select(var_logical_character) %>% 
  sample_n(20) %>% 
  kable()


totalData %>% mutate_at(var_logical_character, function(x) {case_when(x == "true" ~ 1L, x == "false" ~ 0L, TRUE ~ NA_integer_)}) -> totalData


# Check again: 

totalData %>% 
  select(var_logical_character) %>% 
  sample_n(20) %>% 
  kable()


# Convert to dummies for categorical features: 

library(caret)

dummies <- dummyVars("~ .", totalData %>% select_if(is.character))
predict(dummies, totalData) %>% as.data.frame() -> features_oneHot

# Final data for modelling: 

totalData %>% 
  select_if(is.numeric) %>% 
  bind_cols(features_oneHot) -> df_final

# Rename for columns: 
all_names <- names(df_final)

names(df_final) <- all_names %>% 
  str_replace_all("\\[|\\]|\\,|\\<|\\'| ", "_")

# Save data: 

write_csv(df_final, "C:/Users/Admin/Documents/df_final.csv")

XGBoost (Python/R)

R Codes:

# Convert features to DMatrix form: 

X_train <- df_final %>% 
  filter(!is.na(label)) %>% 
  select(-id, -label) %>% 
  as.matrix()


Y_train <- df_final %>% 
  filter(!is.na(label)) %>% 
  pull(label)

X_test <- df_final %>% 
  filter(is.na(label)) %>% 
  select(-id, -label) %>% 
  as.matrix()


#------------------------------------------
#   Train XGBoost with default parameters
#------------------------------------------
library(xgboost)

# Convert to DMatrix form for train data: 
dtrain <- xgb.DMatrix(data = X_train, label = Y_train)

# Train a default XGBoost: 
xgb1 <- xgboost(data = dtrain, 
                objective = "binary:logistic", 
                verbose = 1, 
                nround = 250)


xgb1$evaluation_log %>% 
  data.frame() %>% 
  ggplot(aes(iter, train_error)) + 
  geom_line() + 
  labs(title = "Figure 1: XGBoost Training Process with missing data")


# Make prediction: 
prediction <- predict(xgb1, X_test)

# DF of results: 
data.frame(id = df_final %>% filter(is.na(label)) %>% pull(id), label = prediction) -> df_sub

# Save for submission: 
write_csv(df_sub, "df_subR.csv")

Python codes:

# Load data:
import pandas as pd

df = pd.read_csv("C:/Users/Admin/Documents/df_final.csv")

# Split data: 

df_train = df[df['label'].notnull()]
X = df_train.drop(labels=["id", "label"], axis=1)
Y = df_train["label"]

df_test = df[df['label'].isnull()]
df_test = df_test.drop(labels=["id", "label"], axis=1)


# =================
#  Train XGboost
# =================

# Train XGBClassifier with cross-validation:
from xgboost import XGBClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score

cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=3, random_state=29)
xgb1 = XGBClassifier(random_state=29)
auc_scores1 = cross_val_score(xgb1, X, Y, cv=cv, scoring="roc_auc", n_jobs=-1)
auc_scores1.mean()


xgb1.fit(X, Y)
pd = xgb1.predict_proba(df_test)[:, 1]


df_test = df[df['label'].isnull()]

df_submission = df_test.assign(label=pd)
df_submission = df_submission[['id', 'label']]
df_submission.to_csv("C:/Users/Admin/Documents/df_subPython.csv")

WoE Solution

Codes for Random Forest:

totalData %>% 
  mutate_if(is.character, function(x) {x %>% str_replace_all("\\[|\\]|\\,|\\<|\\'| ", "_")}) -> totalData


df_train <- totalData %>% 
  filter(!is.na(label)) %>% 
  select(-id)


df_test <- totalData %>% 
  filter(is.na(label)) %>% 
  select(-id, -label)


# Generates optimal binning for all variables/features: 
library(scorecard)
bins_var <- woebin(df_train, y = "label", no_cores = 8, positive = "label|1")

# IV for variables/features: 

do.call("rbind", bins_var) %>% 
  as.data.frame() %>% 
  filter(!duplicated(variable)) %>% 
  rename(iv_var = total_iv) %>% 
  arrange(iv_var) %>% 
  mutate(variable = factor(variable, levels = variable)) -> iv_values

# Features have IV >= 0: 

iv_values %>% 
  filter(iv_var >= 0) %>% 
  pull(variable) %>% 
  as.character() -> var_IV_10


# Conduct data transformation based on IV/WoE and filter features with IV > 0.1: 

train_woe <- woebin_ply(df_train, bins_var) %>% 
  as.data.frame() %>% 
  select(c("label", paste0(var_IV_10, "_", "woe")))

write_csv(train_woe, "C:/Users/Admin/Documents/train_woe.csv") 


test_woe <- woebin_ply(df_test, bins_var) %>% 
  as.data.frame() %>% 
  select(paste0(var_IV_10, "_", "woe"))

# Relabel for target: 

train_woe %>% 
  mutate(label = case_when(label == 1 ~ "Bad", TRUE ~ "Good")) %>% 
  mutate(label = as.factor(label)) -> df_forGBM 

# Scale our data: 

df_forGBM %>% 
  # mutate_if(is.numeric, function(x) {(x - min(x)) / (max(x) - min(x))}) %>% 
  mutate_if(is.numeric, as.factor) -> df_forGBM_Scaled

test_woe %>% 
  # mutate_if(is.numeric, function(x) {(x - min(x)) / (max(x) - min(x))}) %>% 
  mutate_if(is.numeric, as.factor) -> df_test_Scaled

# Train Random Forest: 
library(ranger)
RF_default <- ranger(label ~ ., data = df_forGBM_Scaled, probability = TRUE, seed = 29)

# Use the RF Classifier for predicting PD (Probability of Default): 
pd_sub_RF <- predict(RF_default, df_test_Scaled, type = "response")

# Save results for submission: 
pd_sub_RF$predictions %>% as.data.frame() %>% pull(Bad) -> pd_sub_RF

df_sub <- data.frame(id = 30000:49999, label = pd_sub_RF)
write_csv(df_sub, "submission_RandomForest_ScaledData.csv")

Codes for XGBoost:

df_final <- read_csv("C:/Users/Admin/Documents/train_woe.csv")


# Convert features to DMatrix form: 

X_train <- df_final %>% 
  filter(!is.na(label)) %>% 
  select(-label) %>% 
  as.matrix()

X_test <- test_woe %>% as.matrix()

#------------------------------------------
#   Train XGBoost with default parameters
#------------------------------------------
library(xgboost)

# Convert to DMatrix form for train data: 
dtrain <- xgb.DMatrix(data = X_train, label = Y_train)

# Train a default XGBoost: 
xgb2 <- xgboost(data = dtrain, 
                objective = "binary:logistic", 
                verbose = 1, 
                nround = 1000)

# Training process: 

xgb2$evaluation_log %>% 
  data.frame() %>% 
  ggplot(aes(iter, train_error)) + 
  geom_line() + 
  labs(title = "Figure 2: XGBoost Training Process, WOE Transformation")

# Use for predicting: 

pd_xgb_woe <- predict(xgb2, X_test)

# Save result for submission: 

df_sub_xgb_woe <- data.frame(id = 30000:49999, label = pd_xgb_woe)
write_csv(df_sub_xgb_woe, "df_sub_xgb_woe.csv")
LS0tDQp0aXRsZTogJ0RhdGEgcHJlLXByb2Nlc3NpbmcgZm9yIEthbGFwYSBDcmVkaXQgU2NvcmluZyBDaGFsbGVuZ2UnDQphdXRob3I6ICdBdXRob3I6IE5ndXllbiBDaGkgRHVuZycNCnN1YnRpdGxlOiAiUiBNYWNoaW5lIExlYXJuaW5nIFNlcmllcyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDogDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICMgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgaGlnaGxpZ2h0OiB6ZW5idXJuDQogICAgIyBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiAiZmxhdGx5Ig0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLGluY2x1ZGU9RkFMU0V9DQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUsIHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFLCBjYWNoZSA9IFRSVUUsIGV2YWwgPSBGQUxTRSkNCg0KYGBgDQoNCg0KIyBNb3RpdmF0aW9ucw0KDQpBY2NvcmRpbmcgdG8gYW4gYXJ0aWNsZSBieSBbTmV3IFlvcmsgVGltZXNdKGh0dHBzOi8vd3d3Lm55dGltZXMuY29tLzIwMTQvMDgvMTgvdGVjaG5vbG9neS9mb3ItYmlnLWRhdGEtc2NpZW50aXN0cy1odXJkbGUtdG8taW5zaWdodHMtaXMtamFuaXRvci13b3JrLmh0bWwpOiANCg0KKkRhdGEgc2NpZW50aXN0cywgYWNjb3JkaW5nIHRvIGludGVydmlld3MgYW5kIGV4cGVydCBlc3RpbWF0ZXMsIHNwZW5kIGZyb20gNTAgcGVyY2VudCB0byA4MCBwZXJjZW50IG9mIHRoZWlyIHRpbWUgbWlyZWQgaW4gdGhpcyBtb3JlIG11bmRhbmUgbGFib3Igb2YgY29sbGVjdGluZyBhbmQgcHJlcGFyaW5nIHVucnVseSBkaWdpdGFsIGRhdGEsIGJlZm9yZSBpdCBjYW4gYmUgZXhwbG9yZWQgZm9yIHVzZWZ1bCBudWdnZXRzLioNCg0KIyBBIGNhc2UgZnJvbSBLYWxhcGEgQ3JlZGl0IFNjb3JpbmcgQ2hhbGxlbmdlDQoNCmBgYHtyLCBldmFsPUZBTFNFfQ0KDQojIENsZWFyIHdvcmtzcGFjZTogDQpybShsaXN0ID0gbHMoKSkNCg0KIyBsb2FkIGRhdGE6IA0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHN0cmluZ2kpDQpsaWJyYXJ5KGtuaXRyKQ0KDQpyZWFkX2NzdigiQzovVXNlcnMvQWRtaW4vRG9jdW1lbnRzL3RyYWluLmNzdiIpIC0+IHRyYWluRGF0YQ0KcmVhZF9jc3YoIkM6L1VzZXJzL0FkbWluL0RvY3VtZW50cy90ZXN0LmNzdiIpIC0+IHRlc3REYXRhDQoNCiMgQ2hlY2sgZGF0YSB0eXBlIGZvciB0cmFpbiBhbmQgdGVzdDogDQoNCmRhdGEuZnJhbWUoZnJvbVRyYWluID0gc2FwcGx5KHRyYWluRGF0YSAlPiUgc2VsZWN0KC1sYWJlbCksIGNsYXNzKSwgZnJvbVRlc3QgPSBzYXBwbHkodGVzdERhdGEsIGNsYXNzKSkgLT4gZGZfZGF0YVR5cGUNCmRmX2RhdGFUeXBlICU+JSBtdXRhdGUoY29sX25hbWUgPSByb3cubmFtZXMoLikpIC0+IGRmX2RhdGFUeXBlDQoNCmthYmxlKGRmX2RhdGFUeXBlICU+JSBzbGljZSgxOjYpKQ0KDQpkZl9kYXRhVHlwZSAlPiUgDQogIGZpbHRlcihmcm9tVHJhaW4gIT0gZnJvbVRlc3QpICU+JSANCiAga2FibGUoKQ0KDQp0cmFpbkRhdGEkRklFTERfMzYgJT4lIHVuaXF1ZSgpDQp0ZXN0RGF0YSRGSUVMRF8zNiAlPiUgdW5pcXVlKCkNCg0KDQojIENvbnZlcnQgdG8gbG9naWNhbDogDQoNCnRlc3REYXRhICU+JSANCiAgbXV0YXRlKEZJRUxEXzM2ID0gY2FzZV93aGVuKEZJRUxEXzM2ID09ICJGQUxTRSIgfiBGQUxTRSwgRklFTERfMzYgPT0gIlRSVUUiIH4gVFJVRSwgVFJVRSB+IE5BKSkgLT4gdGVzdERhdGENCg0KIyBDb252ZXJ0IGFsbCBzdHJpbmcgY29sdW1ucyB0byBsb3dlcjogDQoNCnRyYWluRGF0YSAlPiUgbXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgZnVuY3Rpb24oeCkge3N0cl90b19sb3dlcih4KSAlPiUgc3RyaV90cmFuc19nZW5lcmFsKCJMYXRpbi1BU0NJSSIpfSkgLT4gdHJhaW5EYXRhDQp0ZXN0RGF0YSAlPiUgbXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgZnVuY3Rpb24oeCkge3N0cl90b19sb3dlcih4KSAlPiUgc3RyaV90cmFuc19nZW5lcmFsKCJMYXRpbi1BU0NJSSIpfSkgLT4gdGVzdERhdGENCg0KDQojIENoZWNrIGludGVyc2VjdGlvbjogDQoNCmludGVyc2VjdF9iZXR3ZWVuIDwtIGZ1bmN0aW9uKGNvbF9uYW1lKSB7DQogIA0KICBmcm9tX3RyYWluIDwtIHRyYWluRGF0YSAlPiUgcHVsbChjb2xfbmFtZSkgJT4lIHVuaXF1ZSgpDQogIGZyb21fdGVzdCA8LSB0ZXN0RGF0YSAlPiUgcHVsbChjb2xfbmFtZSkgJT4lIHVuaXF1ZSgpIA0KICANCiAgY29uZGl0aW9uIDwtIHN1bShsZW5ndGgoZnJvbV90ZXN0KSkgPT0gc3VtKGxlbmd0aChmcm9tX3RyYWluKSkNCiAgDQogIGlmIChjb25kaXRpb24pIHsNCiAgICBjb25jbHVzaW9uIDwtICJ5ZXMiDQogIH0gZWxzZSB7DQogICAgY29uY2x1c2lvbiA8LSAibm8iDQogIH0NCiAgDQogIHJldHVybihkYXRhLmZyYW1lKGZlYXR1cmUgPSBjb2xfbmFtZSwgaW5fdHJhaW4gPSBjb25jbHVzaW9uKSkNCiAgDQp9DQoNCg0KIyBBbGwgY2F0ZWdvcmljYWwgZmVhdHVyZXM6IA0KdHJhaW5EYXRhICU+JSBzZWxlY3RfaWYoaXMuY2hhcmFjdGVyKSAlPiUgbmFtZXMoKSAtPiBhbGxfY2F0ZWdvcmljYWxzDQoNCg0KIyBVc2UgdGhlIGZ1bmN0aW9uOiANCmRvLmNhbGwoImJpbmRfcm93cyIsIGxhcHBseShhbGxfY2F0ZWdvcmljYWxzLCBpbnRlcnNlY3RfYmV0d2VlbikpIC0+IGRmX2NoZWNrX2RpZmYNCg0KZGZfY2hlY2tfZGlmZiAlPiUgZmlsdGVyKGluX3RyYWluID09ICJubyIpIC0+IGRmX2RpZmYNCg0KZGZfZGlmZiAlPiUga2FibGUoKQ0KDQpmZWF0dXJlc19kaWZmIDwtIGRmX2RpZmYkZmVhdHVyZQ0KDQojIEFuIGV4YW1wbGU6IA0KDQpzYXBwbHkodHJhaW5EYXRhICU+JSBzZWxlY3QoZmVhdHVyZXNfZGlmZiksIG5fZGlzdGluY3QpDQpzYXBwbHkodGVzdERhdGEgJT4lIHNlbGVjdChmZWF0dXJlc19kaWZmKSwgbl9kaXN0aW5jdCkNCg0KIyBGdW5jdGlvbiBoYXJtb25pemVzIGZvciB0cmFpbjogDQoNCmhhcm1vbml6ZV9mb3JfdHJhaW4gPC0gZnVuY3Rpb24oY29sKSB7DQogIA0KICBmcm9tX1RyYWluIDwtIHRyYWluRGF0YVssIGNvbCwgZHJvcCA9IFRSVUVdDQogIA0KICBmcm9tX1Rlc3QgPC0gdGVzdERhdGFbLCBjb2wsIGRyb3AgPSBUUlVFXSANCiAgDQogIGRwbHlyOjppbnRlcnNlY3QoZnJvbV9UcmFpbiwgZnJvbV9UZXN0KSAtPiBpbnRlcnNlY3Rpb25zDQogIA0KICBuZXdfdmFsdWVzX2ZvclRyYWluIDwtIGNhc2Vfd2hlbihmcm9tX1RyYWluICVpbiUgaW50ZXJzZWN0aW9ucyB+IGZyb21fVHJhaW4sIFRSVUUgfiBOQV9jaGFyYWN0ZXJfKQ0KICANCiAgIyBuZXdfdmFsdWVzX2ZvclRlc3QgPC0gY2FzZV93aGVuKGZyb21fVGVzdCAlaW4lIGludGVyc2VjdGlvbnMgfiBmcm9tX1Rlc3QsIFRSVUUgfiBOQV9jaGFyYWN0ZXJfKQ0KICANCiAgcmV0dXJuKG5ld192YWx1ZXNfZm9yVHJhaW4pDQogIA0KfQ0KDQojIEZ1bmN0aW9uIGhhcm1vbml6ZXMgZm9yIHRlc3Q6IA0KDQpoYXJtb25pemVfZm9yX3Rlc3QgPC0gZnVuY3Rpb24oY29sKSB7DQogIA0KICBmcm9tX1RyYWluIDwtIHRyYWluRGF0YVssIGNvbCwgZHJvcCA9IFRSVUVdDQogIA0KICBmcm9tX1Rlc3QgPC0gdGVzdERhdGFbLCBjb2wsIGRyb3AgPSBUUlVFXSANCiAgDQogIGRwbHlyOjppbnRlcnNlY3QoZnJvbV9UcmFpbiwgZnJvbV9UZXN0KSAtPiBpbnRlcnNlY3Rpb25zDQogIA0KICAjIG5ld192YWx1ZXNfZm9yVHJhaW4gPC0gY2FzZV93aGVuKGZyb21fVHJhaW4gJWluJSBpbnRlcnNlY3Rpb25zIH4gZnJvbV9UcmFpbiwgVFJVRSB+IE5BX2NoYXJhY3Rlcl8pDQogIA0KICBuZXdfdmFsdWVzX2ZvclRlc3QgPC0gY2FzZV93aGVuKGZyb21fVGVzdCAlaW4lIGludGVyc2VjdGlvbnMgfiBmcm9tX1Rlc3QsIFRSVUUgfiBOQV9jaGFyYWN0ZXJfKQ0KICANCiAgcmV0dXJuKG5ld192YWx1ZXNfZm9yVGVzdCkNCiAgDQp9DQoNCg0KDQpzYXBwbHkoZmVhdHVyZXNfZGlmZiwgaGFybW9uaXplX2Zvcl90cmFpbikgJT4lIGRhdGEuZnJhbWUoKSAtPiBkZl9oYXJtb25pemVkX3RyYWluDQoNCnNhcHBseShmZWF0dXJlc19kaWZmLCBoYXJtb25pemVfZm9yX3Rlc3QpICU+JSBkYXRhLmZyYW1lKCkgLT4gZGZfaGFybW9uaXplZF90ZXN0DQoNCg0KdHJhaW5EYXRhICU+JSBzZWxlY3QoLWZlYXR1cmVzX2RpZmYpICU+JSBiaW5kX2NvbHMoZGZfaGFybW9uaXplZF90cmFpbikgLT4gdHJhaW5EYXRhDQp0ZXN0RGF0YSAlPiUgc2VsZWN0KC1mZWF0dXJlc19kaWZmKSAlPiUgYmluZF9jb2xzKGRmX2hhcm1vbml6ZWRfdGVzdCkgLT4gdGVzdERhdGENCg0KIyBDaGVjazogDQoNCnNhcHBseSh0cmFpbkRhdGEgJT4lIHNlbGVjdChmZWF0dXJlc19kaWZmKSwgbl9kaXN0aW5jdCkNCnNhcHBseSh0ZXN0RGF0YSAlPiUgc2VsZWN0KGZlYXR1cmVzX2RpZmYpLCBuX2Rpc3RpbmN0KQ0KDQojIENvbnZlcnQgbG9naWNhbCB0byBpbnRlZ2VyOiANCg0KdHJhaW5EYXRhICU+JSBtdXRhdGVfaWYoaXMubG9naWNhbCwgYXMuaW50ZWdlcikgLT4gdHJhaW5EYXRhDQoNCnRlc3REYXRhICU+JSBtdXRhdGVfaWYoaXMubG9naWNhbCwgYXMuaW50ZWdlcikgLT4gdGVzdERhdGENCg0KIyBDb21iaW5lIHRyYWluIGFuZCB0ZXN0IHNldDogDQoNCnRvdGFsRGF0YSA8LSBiaW5kX3Jvd3ModHJhaW5EYXRhLCB0ZXN0RGF0YSkNCg0KDQojIENvbnZlcnQgTmFuIHRvIG1pc3Npbmc6IA0KDQpzdW0oaXMubmFuKHRvdGFsRGF0YSRGSUVMRF81NCkpDQoNCg0KdG90YWxEYXRhICU+JSANCiAgbXV0YXRlX2lmKGlzLmRvdWJsZSwgZnVuY3Rpb24oeCkge2Nhc2Vfd2hlbihpcy5uYW4oeCkgfiBOQV9yZWFsXywgVFJVRSB+IHgpfSkgJT4lIA0KICBtdXRhdGVfaWYoaXMuaW50ZWdlciwgZnVuY3Rpb24oeCkge2Nhc2Vfd2hlbihpcy5uYW4oeCkgfiBOQV9pbnRlZ2VyXywgVFJVRSB+IHgpfSkgLT4gdG90YWxEYXRhDQoNCiMgQ2hlY2sgYWdhaW46IA0Kc3VtKGlzLm5hbih0b3RhbERhdGEkRklFTERfNTQpKQ0KDQojIENvbnZlcnQgIm5vbmUiIHRvIG1pc3NpbmcgZm9yIGNhdGVnb3JpY2FsczogDQoNCnRvdGFsRGF0YSAlPiUgbXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgZnVuY3Rpb24oeCkge2Nhc2Vfd2hlbih4ID09ICJub25lIiB+IE5BX2NoYXJhY3Rlcl8sIFRSVUUgfiB4KX0pIC0+IHRvdGFsRGF0YQ0KDQoNCiMgU29tZSBzcGVjaWFsIGN1bHVtbnM6IA0KDQpsb2dpY2FsX2NvbmRpdGlvbiA8LSBzYXBwbHkodG90YWxEYXRhICU+JSBzZWxlY3RfaWYoaXMuY2hhcmFjdGVyKSwgZnVuY3Rpb24oeCkge3N1bSh4ICVpbiUgYygidHJ1ZSIsICJmYWxzZSIpKX0pID4gMA0KYWxsX2NvbHVtbnMgPC0gbmFtZXModG90YWxEYXRhICU+JSBzZWxlY3RfaWYoaXMuY2hhcmFjdGVyKSkNCnZhcl9sb2dpY2FsX2NoYXJhY3RlciA8LSBhbGxfY29sdW1uc1tsb2dpY2FsX2NvbmRpdGlvbl0NCg0KdG90YWxEYXRhICU+JSANCiAgc2VsZWN0KHZhcl9sb2dpY2FsX2NoYXJhY3RlcikgJT4lIA0KICBzYW1wbGVfbigyMCkgJT4lIA0KICBrYWJsZSgpDQoNCg0KdG90YWxEYXRhICU+JSBtdXRhdGVfYXQodmFyX2xvZ2ljYWxfY2hhcmFjdGVyLCBmdW5jdGlvbih4KSB7Y2FzZV93aGVuKHggPT0gInRydWUiIH4gMUwsIHggPT0gImZhbHNlIiB+IDBMLCBUUlVFIH4gTkFfaW50ZWdlcl8pfSkgLT4gdG90YWxEYXRhDQoNCg0KIyBDaGVjayBhZ2FpbjogDQoNCnRvdGFsRGF0YSAlPiUgDQogIHNlbGVjdCh2YXJfbG9naWNhbF9jaGFyYWN0ZXIpICU+JSANCiAgc2FtcGxlX24oMjApICU+JSANCiAga2FibGUoKQ0KDQoNCiMgQ29udmVydCB0byBkdW1taWVzIGZvciBjYXRlZ29yaWNhbCBmZWF0dXJlczogDQoNCmxpYnJhcnkoY2FyZXQpDQoNCmR1bW1pZXMgPC0gZHVtbXlWYXJzKCJ+IC4iLCB0b3RhbERhdGEgJT4lIHNlbGVjdF9pZihpcy5jaGFyYWN0ZXIpKQ0KcHJlZGljdChkdW1taWVzLCB0b3RhbERhdGEpICU+JSBhcy5kYXRhLmZyYW1lKCkgLT4gZmVhdHVyZXNfb25lSG90DQoNCiMgRmluYWwgZGF0YSBmb3IgbW9kZWxsaW5nOiANCg0KdG90YWxEYXRhICU+JSANCiAgc2VsZWN0X2lmKGlzLm51bWVyaWMpICU+JSANCiAgYmluZF9jb2xzKGZlYXR1cmVzX29uZUhvdCkgLT4gZGZfZmluYWwNCg0KIyBSZW5hbWUgZm9yIGNvbHVtbnM6IA0KYWxsX25hbWVzIDwtIG5hbWVzKGRmX2ZpbmFsKQ0KDQpuYW1lcyhkZl9maW5hbCkgPC0gYWxsX25hbWVzICU+JSANCiAgc3RyX3JlcGxhY2VfYWxsKCJcXFt8XFxdfFxcLHxcXDx8XFwnfCAiLCAiXyIpDQoNCiMgU2F2ZSBkYXRhOiANCg0Kd3JpdGVfY3N2KGRmX2ZpbmFsLCAiQzovVXNlcnMvQWRtaW4vRG9jdW1lbnRzL2RmX2ZpbmFsLmNzdiIpDQoNCmBgYA0KDQojIFhHQm9vc3QgKFB5dGhvbi9SKQ0KDQpSIENvZGVzOiANCg0KYGBge3J9DQoNCiMgQ29udmVydCBmZWF0dXJlcyB0byBETWF0cml4IGZvcm06IA0KDQpYX3RyYWluIDwtIGRmX2ZpbmFsICU+JSANCiAgZmlsdGVyKCFpcy5uYShsYWJlbCkpICU+JSANCiAgc2VsZWN0KC1pZCwgLWxhYmVsKSAlPiUgDQogIGFzLm1hdHJpeCgpDQoNCg0KWV90cmFpbiA8LSBkZl9maW5hbCAlPiUgDQogIGZpbHRlcighaXMubmEobGFiZWwpKSAlPiUgDQogIHB1bGwobGFiZWwpDQoNClhfdGVzdCA8LSBkZl9maW5hbCAlPiUgDQogIGZpbHRlcihpcy5uYShsYWJlbCkpICU+JSANCiAgc2VsZWN0KC1pZCwgLWxhYmVsKSAlPiUgDQogIGFzLm1hdHJpeCgpDQoNCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KIyAgIFRyYWluIFhHQm9vc3Qgd2l0aCBkZWZhdWx0IHBhcmFtZXRlcnMNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCmxpYnJhcnkoeGdib29zdCkNCg0KIyBDb252ZXJ0IHRvIERNYXRyaXggZm9ybSBmb3IgdHJhaW4gZGF0YTogDQpkdHJhaW4gPC0geGdiLkRNYXRyaXgoZGF0YSA9IFhfdHJhaW4sIGxhYmVsID0gWV90cmFpbikNCg0KIyBUcmFpbiBhIGRlZmF1bHQgWEdCb29zdDogDQp4Z2IxIDwtIHhnYm9vc3QoZGF0YSA9IGR0cmFpbiwgDQogICAgICAgICAgICAgICAgb2JqZWN0aXZlID0gImJpbmFyeTpsb2dpc3RpYyIsIA0KICAgICAgICAgICAgICAgIHZlcmJvc2UgPSAxLCANCiAgICAgICAgICAgICAgICBucm91bmQgPSAyNTApDQoNCg0KeGdiMSRldmFsdWF0aW9uX2xvZyAlPiUgDQogIGRhdGEuZnJhbWUoKSAlPiUgDQogIGdncGxvdChhZXMoaXRlciwgdHJhaW5fZXJyb3IpKSArIA0KICBnZW9tX2xpbmUoKSArIA0KICBsYWJzKHRpdGxlID0gIkZpZ3VyZSAxOiBYR0Jvb3N0IFRyYWluaW5nIFByb2Nlc3Mgd2l0aCBtaXNzaW5nIGRhdGEiKQ0KDQoNCiMgTWFrZSBwcmVkaWN0aW9uOiANCnByZWRpY3Rpb24gPC0gcHJlZGljdCh4Z2IxLCBYX3Rlc3QpDQoNCiMgREYgb2YgcmVzdWx0czogDQpkYXRhLmZyYW1lKGlkID0gZGZfZmluYWwgJT4lIGZpbHRlcihpcy5uYShsYWJlbCkpICU+JSBwdWxsKGlkKSwgbGFiZWwgPSBwcmVkaWN0aW9uKSAtPiBkZl9zdWINCg0KIyBTYXZlIGZvciBzdWJtaXNzaW9uOiANCndyaXRlX2NzdihkZl9zdWIsICJkZl9zdWJSLmNzdiIpDQoNCmBgYA0KDQoNClB5dGhvbiBjb2RlczogDQoNCmBgYHtyLCBldmFsPUZBTFNFfQ0KIyBMb2FkIGRhdGE6DQppbXBvcnQgcGFuZGFzIGFzIHBkDQoNCmRmID0gcGQucmVhZF9jc3YoIkM6L1VzZXJzL0FkbWluL0RvY3VtZW50cy9kZl9maW5hbC5jc3YiKQ0KDQojIFNwbGl0IGRhdGE6IA0KDQpkZl90cmFpbiA9IGRmW2RmWydsYWJlbCddLm5vdG51bGwoKV0NClggPSBkZl90cmFpbi5kcm9wKGxhYmVscz1bImlkIiwgImxhYmVsIl0sIGF4aXM9MSkNClkgPSBkZl90cmFpblsibGFiZWwiXQ0KDQpkZl90ZXN0ID0gZGZbZGZbJ2xhYmVsJ10uaXNudWxsKCldDQpkZl90ZXN0ID0gZGZfdGVzdC5kcm9wKGxhYmVscz1bImlkIiwgImxhYmVsIl0sIGF4aXM9MSkNCg0KDQojID09PT09PT09PT09PT09PT09DQojICBUcmFpbiBYR2Jvb3N0DQojID09PT09PT09PT09PT09PT09DQoNCiMgVHJhaW4gWEdCQ2xhc3NpZmllciB3aXRoIGNyb3NzLXZhbGlkYXRpb246DQpmcm9tIHhnYm9vc3QgaW1wb3J0IFhHQkNsYXNzaWZpZXINCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IFJlcGVhdGVkU3RyYXRpZmllZEtGb2xkLCBjcm9zc192YWxfc2NvcmUNCg0KY3YgPSBSZXBlYXRlZFN0cmF0aWZpZWRLRm9sZChuX3NwbGl0cz0zLCBuX3JlcGVhdHM9MywgcmFuZG9tX3N0YXRlPTI5KQ0KeGdiMSA9IFhHQkNsYXNzaWZpZXIocmFuZG9tX3N0YXRlPTI5KQ0KYXVjX3Njb3JlczEgPSBjcm9zc192YWxfc2NvcmUoeGdiMSwgWCwgWSwgY3Y9Y3YsIHNjb3Jpbmc9InJvY19hdWMiLCBuX2pvYnM9LTEpDQphdWNfc2NvcmVzMS5tZWFuKCkNCg0KDQp4Z2IxLmZpdChYLCBZKQ0KcGQgPSB4Z2IxLnByZWRpY3RfcHJvYmEoZGZfdGVzdClbOiwgMV0NCg0KDQpkZl90ZXN0ID0gZGZbZGZbJ2xhYmVsJ10uaXNudWxsKCldDQoNCmRmX3N1Ym1pc3Npb24gPSBkZl90ZXN0LmFzc2lnbihsYWJlbD1wZCkNCmRmX3N1Ym1pc3Npb24gPSBkZl9zdWJtaXNzaW9uW1snaWQnLCAnbGFiZWwnXV0NCmRmX3N1Ym1pc3Npb24udG9fY3N2KCJDOi9Vc2Vycy9BZG1pbi9Eb2N1bWVudHMvZGZfc3ViUHl0aG9uLmNzdiIpDQoNCg0KYGBgDQoNCg0KIyBXb0UgU29sdXRpb24NCg0KQ29kZXMgZm9yIFJhbmRvbSBGb3Jlc3Q6IA0KDQoNCmBgYHtyfQ0KdG90YWxEYXRhICU+JSANCiAgbXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgZnVuY3Rpb24oeCkge3ggJT4lIHN0cl9yZXBsYWNlX2FsbCgiXFxbfFxcXXxcXCx8XFw8fFxcJ3wgIiwgIl8iKX0pIC0+IHRvdGFsRGF0YQ0KDQoNCmRmX3RyYWluIDwtIHRvdGFsRGF0YSAlPiUgDQogIGZpbHRlcighaXMubmEobGFiZWwpKSAlPiUgDQogIHNlbGVjdCgtaWQpDQoNCg0KZGZfdGVzdCA8LSB0b3RhbERhdGEgJT4lIA0KICBmaWx0ZXIoaXMubmEobGFiZWwpKSAlPiUgDQogIHNlbGVjdCgtaWQsIC1sYWJlbCkNCg0KDQojIEdlbmVyYXRlcyBvcHRpbWFsIGJpbm5pbmcgZm9yIGFsbCB2YXJpYWJsZXMvZmVhdHVyZXM6IA0KbGlicmFyeShzY29yZWNhcmQpDQpiaW5zX3ZhciA8LSB3b2ViaW4oZGZfdHJhaW4sIHkgPSAibGFiZWwiLCBub19jb3JlcyA9IDgsIHBvc2l0aXZlID0gImxhYmVsfDEiKQ0KDQojIElWIGZvciB2YXJpYWJsZXMvZmVhdHVyZXM6IA0KDQpkby5jYWxsKCJyYmluZCIsIGJpbnNfdmFyKSAlPiUgDQogIGFzLmRhdGEuZnJhbWUoKSAlPiUgDQogIGZpbHRlcighZHVwbGljYXRlZCh2YXJpYWJsZSkpICU+JSANCiAgcmVuYW1lKGl2X3ZhciA9IHRvdGFsX2l2KSAlPiUgDQogIGFycmFuZ2UoaXZfdmFyKSAlPiUgDQogIG11dGF0ZSh2YXJpYWJsZSA9IGZhY3Rvcih2YXJpYWJsZSwgbGV2ZWxzID0gdmFyaWFibGUpKSAtPiBpdl92YWx1ZXMNCg0KIyBGZWF0dXJlcyBoYXZlIElWID49IDA6IA0KDQppdl92YWx1ZXMgJT4lIA0KICBmaWx0ZXIoaXZfdmFyID49IDApICU+JSANCiAgcHVsbCh2YXJpYWJsZSkgJT4lIA0KICBhcy5jaGFyYWN0ZXIoKSAtPiB2YXJfSVZfMTANCg0KDQojIENvbmR1Y3QgZGF0YSB0cmFuc2Zvcm1hdGlvbiBiYXNlZCBvbiBJVi9Xb0UgYW5kIGZpbHRlciBmZWF0dXJlcyB3aXRoIElWID4gMC4xOiANCg0KdHJhaW5fd29lIDwtIHdvZWJpbl9wbHkoZGZfdHJhaW4sIGJpbnNfdmFyKSAlPiUgDQogIGFzLmRhdGEuZnJhbWUoKSAlPiUgDQogIHNlbGVjdChjKCJsYWJlbCIsIHBhc3RlMCh2YXJfSVZfMTAsICJfIiwgIndvZSIpKSkNCg0Kd3JpdGVfY3N2KHRyYWluX3dvZSwgIkM6L1VzZXJzL0FkbWluL0RvY3VtZW50cy90cmFpbl93b2UuY3N2IikgDQoNCg0KdGVzdF93b2UgPC0gd29lYmluX3BseShkZl90ZXN0LCBiaW5zX3ZhcikgJT4lIA0KICBhcy5kYXRhLmZyYW1lKCkgJT4lIA0KICBzZWxlY3QocGFzdGUwKHZhcl9JVl8xMCwgIl8iLCAid29lIikpDQoNCiMgUmVsYWJlbCBmb3IgdGFyZ2V0OiANCg0KdHJhaW5fd29lICU+JSANCiAgbXV0YXRlKGxhYmVsID0gY2FzZV93aGVuKGxhYmVsID09IDEgfiAiQmFkIiwgVFJVRSB+ICJHb29kIikpICU+JSANCiAgbXV0YXRlKGxhYmVsID0gYXMuZmFjdG9yKGxhYmVsKSkgLT4gZGZfZm9yR0JNIA0KDQojIFNjYWxlIG91ciBkYXRhOiANCg0KZGZfZm9yR0JNICU+JSANCiAgIyBtdXRhdGVfaWYoaXMubnVtZXJpYywgZnVuY3Rpb24oeCkgeyh4IC0gbWluKHgpKSAvIChtYXgoeCkgLSBtaW4oeCkpfSkgJT4lIA0KICBtdXRhdGVfaWYoaXMubnVtZXJpYywgYXMuZmFjdG9yKSAtPiBkZl9mb3JHQk1fU2NhbGVkDQoNCnRlc3Rfd29lICU+JSANCiAgIyBtdXRhdGVfaWYoaXMubnVtZXJpYywgZnVuY3Rpb24oeCkgeyh4IC0gbWluKHgpKSAvIChtYXgoeCkgLSBtaW4oeCkpfSkgJT4lIA0KICBtdXRhdGVfaWYoaXMubnVtZXJpYywgYXMuZmFjdG9yKSAtPiBkZl90ZXN0X1NjYWxlZA0KDQojIFRyYWluIFJhbmRvbSBGb3Jlc3Q6IA0KbGlicmFyeShyYW5nZXIpDQpSRl9kZWZhdWx0IDwtIHJhbmdlcihsYWJlbCB+IC4sIGRhdGEgPSBkZl9mb3JHQk1fU2NhbGVkLCBwcm9iYWJpbGl0eSA9IFRSVUUsIHNlZWQgPSAyOSkNCg0KIyBVc2UgdGhlIFJGIENsYXNzaWZpZXIgZm9yIHByZWRpY3RpbmcgUEQgKFByb2JhYmlsaXR5IG9mIERlZmF1bHQpOiANCnBkX3N1Yl9SRiA8LSBwcmVkaWN0KFJGX2RlZmF1bHQsIGRmX3Rlc3RfU2NhbGVkLCB0eXBlID0gInJlc3BvbnNlIikNCg0KIyBTYXZlIHJlc3VsdHMgZm9yIHN1Ym1pc3Npb246IA0KcGRfc3ViX1JGJHByZWRpY3Rpb25zICU+JSBhcy5kYXRhLmZyYW1lKCkgJT4lIHB1bGwoQmFkKSAtPiBwZF9zdWJfUkYNCg0KZGZfc3ViIDwtIGRhdGEuZnJhbWUoaWQgPSAzMDAwMDo0OTk5OSwgbGFiZWwgPSBwZF9zdWJfUkYpDQp3cml0ZV9jc3YoZGZfc3ViLCAic3VibWlzc2lvbl9SYW5kb21Gb3Jlc3RfU2NhbGVkRGF0YS5jc3YiKQ0KDQpgYGANCg0KQ29kZXMgZm9yIFhHQm9vc3Q6IA0KDQpgYGB7cn0NCmRmX2ZpbmFsIDwtIHJlYWRfY3N2KCJDOi9Vc2Vycy9BZG1pbi9Eb2N1bWVudHMvdHJhaW5fd29lLmNzdiIpDQoNCg0KIyBDb252ZXJ0IGZlYXR1cmVzIHRvIERNYXRyaXggZm9ybTogDQoNClhfdHJhaW4gPC0gZGZfZmluYWwgJT4lIA0KICBmaWx0ZXIoIWlzLm5hKGxhYmVsKSkgJT4lIA0KICBzZWxlY3QoLWxhYmVsKSAlPiUgDQogIGFzLm1hdHJpeCgpDQoNClhfdGVzdCA8LSB0ZXN0X3dvZSAlPiUgYXMubWF0cml4KCkNCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KIyAgIFRyYWluIFhHQm9vc3Qgd2l0aCBkZWZhdWx0IHBhcmFtZXRlcnMNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCmxpYnJhcnkoeGdib29zdCkNCg0KIyBDb252ZXJ0IHRvIERNYXRyaXggZm9ybSBmb3IgdHJhaW4gZGF0YTogDQpkdHJhaW4gPC0geGdiLkRNYXRyaXgoZGF0YSA9IFhfdHJhaW4sIGxhYmVsID0gWV90cmFpbikNCg0KIyBUcmFpbiBhIGRlZmF1bHQgWEdCb29zdDogDQp4Z2IyIDwtIHhnYm9vc3QoZGF0YSA9IGR0cmFpbiwgDQogICAgICAgICAgICAgICAgb2JqZWN0aXZlID0gImJpbmFyeTpsb2dpc3RpYyIsIA0KICAgICAgICAgICAgICAgIHZlcmJvc2UgPSAxLCANCiAgICAgICAgICAgICAgICBucm91bmQgPSAxMDAwKQ0KDQojIFRyYWluaW5nIHByb2Nlc3M6IA0KDQp4Z2IyJGV2YWx1YXRpb25fbG9nICU+JSANCiAgZGF0YS5mcmFtZSgpICU+JSANCiAgZ2dwbG90KGFlcyhpdGVyLCB0cmFpbl9lcnJvcikpICsgDQogIGdlb21fbGluZSgpICsgDQogIGxhYnModGl0bGUgPSAiRmlndXJlIDI6IFhHQm9vc3QgVHJhaW5pbmcgUHJvY2VzcywgV09FIFRyYW5zZm9ybWF0aW9uIikNCg0KIyBVc2UgZm9yIHByZWRpY3Rpbmc6IA0KDQpwZF94Z2Jfd29lIDwtIHByZWRpY3QoeGdiMiwgWF90ZXN0KQ0KDQojIFNhdmUgcmVzdWx0IGZvciBzdWJtaXNzaW9uOiANCg0KZGZfc3ViX3hnYl93b2UgPC0gZGF0YS5mcmFtZShpZCA9IDMwMDAwOjQ5OTk5LCBsYWJlbCA9IHBkX3hnYl93b2UpDQp3cml0ZV9jc3YoZGZfc3ViX3hnYl93b2UsICJkZl9zdWJfeGdiX3dvZS5jc3YiKQ0KDQpgYGANCg0K