Library

library(readr)
library(DataExplorer)
library(skimr)
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(skimr)
library(rpart)
library(rpart.plot)
library(DMwR)
## Loading required package: lattice
## Loading required package: grid
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(randomForest)
## randomForest 4.7-1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.6     ✓ stringr 1.4.0
## ✓ tidyr   1.2.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x randomForest::combine() masks dplyr::combine()
## x dplyr::filter()         masks stats::filter()
## x dplyr::lag()            masks stats::lag()
## x purrr::lift()           masks caret::lift()
## x randomForest::margin()  masks ggplot2::margin()
library(e1071)
library(ipred)

Data Description & Project Overview

The data that I’m going to analyze is from the Kaggle’s website. https://www.kaggle.com/datasets/rupakroy/online-payments-fraud-detection-dataset This data set is considered as big data set, It consists with 6362620 observations and 11 variables. The data set has too many observations, therefore, it make sense to narrow the data observations in terms of efficiency. The purpose of this project is building models for detecting whether the online payment is fraud or not to provide a better insight for financial institutions. The selected models are random forest model and Bootstrap Aggregating Model. The attributes/variables are presented as below.

step: represents a unit of time where 1 step equals 1 hour

type: type of online transaction (have 5 different types, included PAYMENT, TRANSFER, CASH OUT, CASH IN, DEBIT)

amount: the amount of the transaction

nameOrig: customer starting the transaction

oldbalanceOrg: balance before the transaction

newbalanceOrig: balance after the transaction

nameDest: recipient of the transaction

oldbalanceDest: initial balance of recipient before the transaction

newbalanceDest: the new balance of recipient after the transaction()

isFraud: fraud transaction (0 represents the transaction is fraud, 1 represents the transaction is not fraud)

Load data

data<-read_csv('data.csv', col_types = "nfnfnnfnnff")

Explotary Analysis

dim(data)
## [1] 6362620      11
head(data)
## # A tibble: 6 × 11
##    step type     amount nameOrig    oldbalanceOrg newbalanceOrig nameDest   
##   <dbl> <fct>     <dbl> <fct>               <dbl>          <dbl> <fct>      
## 1     1 PAYMENT   9840. C1231006815        170136        160296. M1979787155
## 2     1 PAYMENT   1864. C1666544295         21249         19385. M2044282225
## 3     1 TRANSFER   181  C1305486145           181             0  C553264065 
## 4     1 CASH_OUT   181  C840083671            181             0  C38997010  
## 5     1 PAYMENT  11668. C2048537720         41554         29886. M1230701703
## 6     1 PAYMENT   7818. C90045638           53860         46042. M573487274 
## # … with 4 more variables: oldbalanceDest <dbl>, newbalanceDest <dbl>,
## #   isFraud <fct>, isFlaggedFraud <fct>
summary(data)
##       step             type             amount                nameOrig      
##  Min.   :  1.0   PAYMENT :2151495   Min.   :       0   C2098525306:      3  
##  1st Qu.:156.0   TRANSFER: 532909   1st Qu.:   13390   C400299098 :      3  
##  Median :239.0   CASH_OUT:2237500   Median :   74872   C1999539787:      3  
##  Mean   :243.4   DEBIT   :  41432   Mean   :  179862   C1065307291:      3  
##  3rd Qu.:335.0   CASH_IN :1399284   3rd Qu.:  208721   C545315117 :      3  
##  Max.   :743.0                      Max.   :92445517   C1976208114:      3  
##                                                        (Other)    :6362602  
##  oldbalanceOrg      newbalanceOrig            nameDest      
##  Min.   :       0   Min.   :       0   C1286084959:    113  
##  1st Qu.:       0   1st Qu.:       0   C985934102 :    109  
##  Median :   14208   Median :       0   C665576141 :    105  
##  Mean   :  833883   Mean   :  855114   C2083562754:    102  
##  3rd Qu.:  107315   3rd Qu.:  144258   C248609774 :    101  
##  Max.   :59585040   Max.   :49585040   C1590550415:    101  
##                                        (Other)    :6361989  
##  oldbalanceDest      newbalanceDest      isFraud     isFlaggedFraud
##  Min.   :        0   Min.   :        0   0:6354407   0:6362604     
##  1st Qu.:        0   1st Qu.:        0   1:   8213   1:     16     
##  Median :   132706   Median :   214661                             
##  Mean   :  1100702   Mean   :  1224996                             
##  3rd Qu.:   943037   3rd Qu.:  1111909                             
##  Max.   :356015889   Max.   :356179279                             
## 
str(data)
## spec_tbl_df [6,362,620 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ step          : num [1:6362620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ type          : Factor w/ 5 levels "PAYMENT","TRANSFER",..: 1 1 2 3 1 1 1 1 1 4 ...
##  $ amount        : num [1:6362620] 9840 1864 181 181 11668 ...
##  $ nameOrig      : Factor w/ 6353307 levels "C1231006815",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ oldbalanceOrg : num [1:6362620] 170136 21249 181 181 41554 ...
##  $ newbalanceOrig: num [1:6362620] 160296 19385 0 0 29886 ...
##  $ nameDest      : Factor w/ 2722362 levels "M1979787155",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ oldbalanceDest: num [1:6362620] 0 0 0 21182 0 ...
##  $ newbalanceDest: num [1:6362620] 0 0 0 0 0 ...
##  $ isFraud       : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 1 ...
##  $ isFlaggedFraud: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   step = col_number(),
##   ..   type = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
##   ..   amount = col_number(),
##   ..   nameOrig = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
##   ..   oldbalanceOrg = col_number(),
##   ..   newbalanceOrig = col_number(),
##   ..   nameDest = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
##   ..   oldbalanceDest = col_number(),
##   ..   newbalanceDest = col_number(),
##   ..   isFraud = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
##   ..   isFlaggedFraud = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE)
##   .. )
##  - attr(*, "problems")=<externalptr>

As the summary() and str() functions show, the data set seems doesn’t have any NA or missing values.

NA or Missing Values

plot_missing(data)

Fortunately, as the plot shows, the data set doesn’t have any missing values as the above str() function shows.

skim(data)
Data summary
Name data
Number of rows 6362620
Number of columns 11
_______________________
Column type frequency:
factor 5
numeric 6
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
type 0 1 FALSE 5 CAS: 2237500, PAY: 2151495, CAS: 1399284, TRA: 532909
nameOrig 0 1 FALSE 6353307 C20: 3, C40: 3, C19: 3, C10: 3
nameDest 0 1 FALSE 2722362 C12: 113, C98: 109, C66: 105, C20: 102
isFraud 0 1 FALSE 2 0: 6354407, 1: 8213
isFlaggedFraud 0 1 FALSE 2 0: 6362604, 1: 16

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
step 0 1 243.4 142.33 1 156.00 239.00 335.0 743 ▅▇▆▁▁
amount 0 1 179861.9 603858.23 0 13389.57 74871.94 208721.5 92445517 ▇▁▁▁▁
oldbalanceOrg 0 1 833883.1 2888242.67 0 0.00 14208.00 107315.2 59585040 ▇▁▁▁▁
newbalanceOrig 0 1 855113.7 2924048.50 0 0.00 0.00 144258.4 49585040 ▇▁▁▁▁
oldbalanceDest 0 1 1100701.7 3399180.11 0 0.00 132705.66 943036.7 356015889 ▇▁▁▁▁
newbalanceDest 0 1 1224996.4 3674128.94 0 0.00 214661.44 1111909.2 356179279 ▇▁▁▁▁

The skim() function provides overall statistic summaries, and it tells that the variable - isFraud has class imbalance problem. Therefore, should fix the class imbalance problem before building the models to avoid any bias.

Visualizations

There are more Not Fraud transactions than Fraud transactions.

ggplot(data, aes(x=isFraud, fill=isFraud))+geom_bar()+scale_y_log10()+
  ggtitle('The Fraud Transactions')+xlab('Fraud')+ylab('Log Count')+scale_fill_discrete(labels=c('Not Fraud', 'Fraud'))

Among the types of transactions,CASH_OUT ranks the first, and followed by PAYMENT AND CASH_IN. DEBIT is the least type of the transactions.

ggplot(data, aes(x=type, fill=type))+geom_bar()+ggtitle('The Type of Transactions')

According the plot, I can see that most of the Fraud is from the transaction type of TRANSFER and CASH_OUT.

ggplot(data, aes(x=type, y= isFraud, fill = isFraud)) +
  geom_bar(stat="identity")+scale_fill_discrete(labels=c('Not Fraud', 'Fraud'))

The transaction histogram expresses that the distribution of transaction amount is rightly skewed, it means most of transactions are made in low values.

ggplot(data, aes(x=amount))+geom_histogram()+scale_y_log10()+ggtitle('Histogram of Transactions')+ylab('Log Count')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 5 rows containing missing values (geom_bar).

According to the correlation plot, the numeric variables in the data set the new balance origin is highly correlated with the new balance origin, also the nw balance dest is slightly correlated with amount.

numeric<-data%>%
  select(step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest)
corrplot(cor(numeric))

Data Split and Feature Engineering

For this analysis, I decide to split the data in two groups. One for training data, and the other for testing the model accuracy as a holdout set. The data is split into 75/20 ratio.Due to the variable-isFraud has class imbalance problem. Therefore, use SMOTE() to fix the class imbalance problem.Before fixing the class imbalance problem, convert the name of level of isFraud from 0 and 1 to Not_Fraud and Fraud.

Fix Class Imbalance

data<-SMOTE(factor(isFraud)~., data.frame(data),perc.over = 100,perc.under = 200)
## Warning in names(data) == as.character(form[[2]]): longer object length is not a
## multiple of shorter object length
round(prop.table(table(select(data,isFraud),exclude = NULL)),4)*100
## 
##  0  1 
## 50 50

Perfectly, the ratio of Fraud and Not_Fraud becomes 50/50 roughly. Successfully fixed the class imbalance problem. Therefore, it is good to continue to building models. Since it is required to use one from week 1 to 10, and one from week 11 to week 15, I’d like to use the random forest model and bagging model for the further analysis.

Data Split

set.seed(1000)
data$isFraud <-ifelse(data$isFraud==0,'Not_Fraud','Fraud')
data$isFraud<-as.factor(data$isFraud)
levels(data$isFraud)<-c("Fraud","Not_Fraud")
split <- sample(nrow(data), round(nrow(data)*0.75), replace = F) 
train <- data[split,]
test <- data[-split,]
round(prop.table(table(select(train, isFraud),exclude = NULL)),4)*100
## 
##     Fraud Not_Fraud 
##     50.02     49.98

Building Models

Random Forest Model (from week1-week10)

set.seed(123)
forest <- randomForest(isFraud~ step+type+amount+oldbalanceOrg+newbalanceOrig+oldbalanceDest+newbalanceDest+isFlaggedFraud, data = train, importance=TRUE, ntree=1000)

# display model details
forest
## 
## Call:
##  randomForest(formula = isFraud ~ step + type + amount + oldbalanceOrg +      newbalanceOrig + oldbalanceDest + newbalanceDest + isFlaggedFraud,      data = train, importance = TRUE, ntree = 1000) 
##                Type of random forest: classification
##                      Number of trees: 1000
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 1.14%
## Confusion matrix:
##           Fraud Not_Fraud class.error
## Fraud     12246        78 0.006329114
## Not_Fraud   203     12112 0.016483963
plot(forest)

RF Variable Importance

varImpPlot(forest)

RF Prediction

forest_pred<-predict(forest, newdata = test, type='class')
forest_cm<-confusionMatrix(forest_pred,test$isFraud)
forest_cm
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Fraud Not_Fraud
##   Fraud      4074        68
##   Not_Fraud    28      4043
##                                           
##                Accuracy : 0.9883          
##                  95% CI : (0.9857, 0.9905)
##     No Information Rate : 0.5005          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9766          
##                                           
##  Mcnemar's Test P-Value : 6.879e-05       
##                                           
##             Sensitivity : 0.9932          
##             Specificity : 0.9835          
##          Pos Pred Value : 0.9836          
##          Neg Pred Value : 0.9931          
##              Prevalence : 0.4995          
##          Detection Rate : 0.4960          
##    Detection Prevalence : 0.5043          
##       Balanced Accuracy : 0.9883          
##                                           
##        'Positive' Class : Fraud           
## 

Bootstrap Aggregating Model/bagging (week 11 to week 15)

set.seed(1000)
bag<-bagging(isFraud~ step+type+amount+oldbalanceOrg+newbalanceOrig+oldbalanceDest+newbalanceDest+isFlaggedFraud, data = train, nbagg=150,coob=T,control=rpart.control(minsplit = 2, cp = 0))

Bag Variable Importance

varImp(bag)
##                  Overall
## amount         7399.9918
## isFlaggedFraud  952.6310
## newbalanceDest 2515.7845
## newbalanceOrig  506.5196
## oldbalanceDest 2577.1155
## oldbalanceOrg  9763.2609
## step           5081.1263
## type           5437.8756

Bag Prediction

bag_pred<-predict(bag, newdata = test, type='class')
bag_cm<-confusionMatrix(bag_pred,test$isFraud)
bag_cm
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  Fraud Not_Fraud
##   Fraud      4081        35
##   Not_Fraud    21      4076
##                                           
##                Accuracy : 0.9932          
##                  95% CI : (0.9912, 0.9948)
##     No Information Rate : 0.5005          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.9864          
##                                           
##  Mcnemar's Test P-Value : 0.08235         
##                                           
##             Sensitivity : 0.9949          
##             Specificity : 0.9915          
##          Pos Pred Value : 0.9915          
##          Neg Pred Value : 0.9949          
##              Prevalence : 0.4995          
##          Detection Rate : 0.4969          
##    Detection Prevalence : 0.5012          
##       Balanced Accuracy : 0.9932          
##                                           
##        'Positive' Class : Fraud           
## 

In conclusion, the Bootstrap Aggregating Model performs a better accuracy compare to the random forest model with 99.37% of accuracy rate. Especially, it not only provides more accuracy performance but also predicted less incorrectly the the Fraud as Not_Fraud. As the confusion Matrix shows, Bootstrap Aggregating Model predicted the Fraud as Not_Fraud incorrectly with 39 cases. Random forest model predicted the Fraud as Not_Fraud incorrectly with 57 cases. It is important for a financial institution to correctly determine the Fraud case since it may link to a tremendous disaster and risk of losing properties for the customers. Therefore, I’d like to recommend to use the Bootstrap Aggregating Model to the financial institutions to detect the fraud transactions.

LS0tCnRpdGxlOiAiREFUQTYyMl9GaW5hbF9DaHVuamllTmFuIgphdXRob3I6ICJDaHVuamllIE5hbiIKZGF0ZTogIjUvNC8yMDIyIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIGNvZGVfZG93bmxvYWQ6IHllcwogICAgY29kZV9mb2xkaW5nOiBoaWRlCiAgICBoaWdobGlnaHQ6IHB5Z21lbnRzCiAgICBudW1iZXJfc2VjdGlvbnM6IG5vCiAgICB0aGVtZTogZmxhdGx5CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHllcwotLS0KCiMjIExpYnJhcnkKCmBgYHtyfQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KERhdGFFeHBsb3JlcikKbGlicmFyeShza2ltcikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNvcnJwbG90KQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KHNraW1yKQpsaWJyYXJ5KHJwYXJ0KQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkoRE13UikKbGlicmFyeShyYW5kb21Gb3Jlc3QpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGUxMDcxKQpsaWJyYXJ5KGlwcmVkKQpgYGAKCgoKIyMgRGF0YSBEZXNjcmlwdGlvbiAmIFByb2plY3QgT3ZlcnZpZXcKClRoZSBkYXRhIHRoYXQgSSdtIGdvaW5nIHRvIGFuYWx5emUgaXMgZnJvbSB0aGUgS2FnZ2xlJ3Mgd2Vic2l0ZS4gCmh0dHBzOi8vd3d3LmthZ2dsZS5jb20vZGF0YXNldHMvcnVwYWtyb3kvb25saW5lLXBheW1lbnRzLWZyYXVkLWRldGVjdGlvbi1kYXRhc2V0ClRoaXMgZGF0YSBzZXQgaXMgY29uc2lkZXJlZCBhcyBiaWcgZGF0YSBzZXQsIEl0IGNvbnNpc3RzIHdpdGggNjM2MjYyMCBvYnNlcnZhdGlvbnMgYW5kIDExIHZhcmlhYmxlcy4gVGhlIGRhdGEgc2V0IGhhcyB0b28gbWFueSBvYnNlcnZhdGlvbnMsIHRoZXJlZm9yZSwgaXQgbWFrZSBzZW5zZSB0byBuYXJyb3cgdGhlIGRhdGEgb2JzZXJ2YXRpb25zIGluIHRlcm1zIG9mIGVmZmljaWVuY3kuIFRoZSBwdXJwb3NlIG9mIHRoaXMgcHJvamVjdCBpcyAgYnVpbGRpbmcgbW9kZWxzIGZvciBkZXRlY3Rpbmcgd2hldGhlciB0aGUgb25saW5lIHBheW1lbnQgaXMgZnJhdWQgb3Igbm90IHRvIHByb3ZpZGUgYSBiZXR0ZXIgaW5zaWdodCBmb3IgZmluYW5jaWFsIGluc3RpdHV0aW9ucy4gVGhlIHNlbGVjdGVkIG1vZGVscyBhcmUgcmFuZG9tIGZvcmVzdCBtb2RlbCBhbmQgQm9vdHN0cmFwIEFnZ3JlZ2F0aW5nIE1vZGVsLiBUaGUgYXR0cmlidXRlcy92YXJpYWJsZXMgYXJlIHByZXNlbnRlZCBhcyBiZWxvdy4KCgpzdGVwOiByZXByZXNlbnRzIGEgdW5pdCBvZiB0aW1lIHdoZXJlIDEgc3RlcCBlcXVhbHMgMSBob3VyCgp0eXBlOiB0eXBlIG9mIG9ubGluZSB0cmFuc2FjdGlvbiAoaGF2ZSA1IGRpZmZlcmVudCB0eXBlcywgaW5jbHVkZWQgUEFZTUVOVCwgVFJBTlNGRVIsIENBU0ggT1VULCBDQVNIIElOLCBERUJJVCkKCmFtb3VudDogdGhlIGFtb3VudCBvZiB0aGUgdHJhbnNhY3Rpb24KCm5hbWVPcmlnOiBjdXN0b21lciBzdGFydGluZyB0aGUgdHJhbnNhY3Rpb24KCm9sZGJhbGFuY2VPcmc6IGJhbGFuY2UgYmVmb3JlIHRoZSB0cmFuc2FjdGlvbgoKbmV3YmFsYW5jZU9yaWc6IGJhbGFuY2UgYWZ0ZXIgdGhlIHRyYW5zYWN0aW9uCgpuYW1lRGVzdDogcmVjaXBpZW50IG9mIHRoZSB0cmFuc2FjdGlvbgoKb2xkYmFsYW5jZURlc3Q6IGluaXRpYWwgYmFsYW5jZSBvZiByZWNpcGllbnQgYmVmb3JlIHRoZSB0cmFuc2FjdGlvbgoKbmV3YmFsYW5jZURlc3Q6IHRoZSBuZXcgYmFsYW5jZSBvZiByZWNpcGllbnQgYWZ0ZXIgdGhlIHRyYW5zYWN0aW9uKCkKCmlzRnJhdWQ6IGZyYXVkIHRyYW5zYWN0aW9uICgwIHJlcHJlc2VudHMgdGhlIHRyYW5zYWN0aW9uIGlzIGZyYXVkLCAxIHJlcHJlc2VudHMgdGhlIHRyYW5zYWN0aW9uIGlzIG5vdCBmcmF1ZCkKCgoKCiMjIyBMb2FkIGRhdGEgCgpgYGB7cn0KZGF0YTwtcmVhZF9jc3YoJ2RhdGEuY3N2JywgY29sX3R5cGVzID0gIm5mbmZubmZubmZmIikKCmBgYAoKCiMjIyBFeHBsb3RhcnkgQW5hbHlzaXMKCmBgYHtyfQpkaW0oZGF0YSkKaGVhZChkYXRhKQpzdW1tYXJ5KGRhdGEpCnN0cihkYXRhKQpgYGAKCkFzIHRoZSBzdW1tYXJ5KCkgYW5kIHN0cigpIGZ1bmN0aW9ucyBzaG93LCB0aGUgZGF0YSBzZXQgc2VlbXMgZG9lc24ndCBoYXZlIGFueSBOQSBvciBtaXNzaW5nIHZhbHVlcy4gCgoKCgojIyMgTkEgb3IgTWlzc2luZyBWYWx1ZXMKCmBgYHtyfQpwbG90X21pc3NpbmcoZGF0YSkKYGBgCgpGb3J0dW5hdGVseSwgYXMgdGhlIHBsb3Qgc2hvd3MsIHRoZSBkYXRhIHNldCBkb2Vzbid0IGhhdmUgYW55IG1pc3NpbmcgdmFsdWVzIGFzIHRoZSBhYm92ZSBzdHIoKSBmdW5jdGlvbiBzaG93cy4KCgoKYGBge3J9CnNraW0oZGF0YSkKYGBgCgpUaGUgc2tpbSgpIGZ1bmN0aW9uIHByb3ZpZGVzIG92ZXJhbGwgc3RhdGlzdGljIHN1bW1hcmllcywgYW5kIGl0IHRlbGxzIHRoYXQgdGhlIHZhcmlhYmxlIC0gaXNGcmF1ZCBoYXMgY2xhc3MgaW1iYWxhbmNlIHByb2JsZW0uIFRoZXJlZm9yZSwgc2hvdWxkIGZpeCB0aGUgY2xhc3MgaW1iYWxhbmNlIHByb2JsZW0gYmVmb3JlIGJ1aWxkaW5nIHRoZSBtb2RlbHMgdG8gYXZvaWQgYW55IGJpYXMuCgoKCgojIyMgVmlzdWFsaXphdGlvbnMKClRoZXJlIGFyZSBtb3JlIE5vdCBGcmF1ZCB0cmFuc2FjdGlvbnMgdGhhbiBGcmF1ZCB0cmFuc2FjdGlvbnMuCgpgYGB7cn0KZ2dwbG90KGRhdGEsIGFlcyh4PWlzRnJhdWQsIGZpbGw9aXNGcmF1ZCkpK2dlb21fYmFyKCkrc2NhbGVfeV9sb2cxMCgpKwogIGdndGl0bGUoJ1RoZSBGcmF1ZCBUcmFuc2FjdGlvbnMnKSt4bGFiKCdGcmF1ZCcpK3lsYWIoJ0xvZyBDb3VudCcpK3NjYWxlX2ZpbGxfZGlzY3JldGUobGFiZWxzPWMoJ05vdCBGcmF1ZCcsICdGcmF1ZCcpKQoKYGBgCgpBbW9uZyB0aGUgdHlwZXMgb2YgdHJhbnNhY3Rpb25zLENBU0hfT1VUIHJhbmtzIHRoZSBmaXJzdCwgYW5kIGZvbGxvd2VkIGJ5IFBBWU1FTlQgQU5EIENBU0hfSU4uIERFQklUIGlzIHRoZSBsZWFzdCB0eXBlIG9mIHRoZSB0cmFuc2FjdGlvbnMuCgpgYGB7cn0KZ2dwbG90KGRhdGEsIGFlcyh4PXR5cGUsIGZpbGw9dHlwZSkpK2dlb21fYmFyKCkrZ2d0aXRsZSgnVGhlIFR5cGUgb2YgVHJhbnNhY3Rpb25zJykKCmBgYAoKQWNjb3JkaW5nIHRoZSBwbG90LCBJIGNhbiBzZWUgdGhhdCBtb3N0IG9mIHRoZSBGcmF1ZCBpcyBmcm9tIHRoZSB0cmFuc2FjdGlvbiB0eXBlIG9mIFRSQU5TRkVSIGFuZCBDQVNIX09VVC4KCmBgYHtyfQpnZ3Bsb3QoZGF0YSwgYWVzKHg9dHlwZSwgeT0gaXNGcmF1ZCwgZmlsbCA9IGlzRnJhdWQpKSArCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiKStzY2FsZV9maWxsX2Rpc2NyZXRlKGxhYmVscz1jKCdOb3QgRnJhdWQnLCAnRnJhdWQnKSkKYGBgCiAKClRoZSB0cmFuc2FjdGlvbiBoaXN0b2dyYW0gZXhwcmVzc2VzIHRoYXQgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0cmFuc2FjdGlvbiBhbW91bnQgaXMgcmlnaHRseSBza2V3ZWQsIGl0IG1lYW5zIG1vc3Qgb2YgdHJhbnNhY3Rpb25zIGFyZSBtYWRlIGluIGxvdyB2YWx1ZXMuCgpgYGB7cn0KZ2dwbG90KGRhdGEsIGFlcyh4PWFtb3VudCkpK2dlb21faGlzdG9ncmFtKCkrc2NhbGVfeV9sb2cxMCgpK2dndGl0bGUoJ0hpc3RvZ3JhbSBvZiBUcmFuc2FjdGlvbnMnKSt5bGFiKCdMb2cgQ291bnQnKQoKYGBgCgoKQWNjb3JkaW5nIHRvIHRoZSBjb3JyZWxhdGlvbiBwbG90LCB0aGUgbnVtZXJpYyB2YXJpYWJsZXMgaW4gdGhlIGRhdGEgc2V0IHRoZSBuZXcgYmFsYW5jZSBvcmlnaW4gaXMgaGlnaGx5IGNvcnJlbGF0ZWQgd2l0aCB0aGUgbmV3IGJhbGFuY2Ugb3JpZ2luLCBhbHNvIHRoZSBudyBiYWxhbmNlIGRlc3QgaXMgc2xpZ2h0bHkgY29ycmVsYXRlZCB3aXRoIGFtb3VudC4KCmBgYHtyfQpudW1lcmljPC1kYXRhJT4lCiAgc2VsZWN0KHN0ZXAsYW1vdW50LG9sZGJhbGFuY2VPcmcsbmV3YmFsYW5jZU9yaWcsb2xkYmFsYW5jZURlc3QsbmV3YmFsYW5jZURlc3QpCmNvcnJwbG90KGNvcihudW1lcmljKSkKYGBgCgoKCgojIyBEYXRhIFNwbGl0IGFuZCBGZWF0dXJlIEVuZ2luZWVyaW5nCgoKRm9yIHRoaXMgYW5hbHlzaXMsIEkgZGVjaWRlIHRvIHNwbGl0IHRoZSBkYXRhIGluIHR3byBncm91cHMuIE9uZSBmb3IgdHJhaW5pbmcgZGF0YSwgYW5kIHRoZSBvdGhlciBmb3IgdGVzdGluZyB0aGUgbW9kZWwgYWNjdXJhY3kgYXMgYSBob2xkb3V0IHNldC4gVGhlIGRhdGEgaXMgc3BsaXQgaW50byA3NS8yMCByYXRpby5EdWUgdG8gdGhlIHZhcmlhYmxlLWlzRnJhdWQgaGFzIGNsYXNzIGltYmFsYW5jZSBwcm9ibGVtLiAgVGhlcmVmb3JlLCB1c2UgU01PVEUoKSB0byBmaXggdGhlIGNsYXNzIGltYmFsYW5jZSBwcm9ibGVtLkJlZm9yZSBmaXhpbmcgdGhlIGNsYXNzIGltYmFsYW5jZSBwcm9ibGVtLCBjb252ZXJ0IHRoZSBuYW1lIG9mIGxldmVsIG9mIGlzRnJhdWQgZnJvbSAwIGFuZCAxIHRvIE5vdF9GcmF1ZCBhbmQgRnJhdWQuCgoKIyMjIEZpeCBDbGFzcyBJbWJhbGFuY2UKCmBgYHtyfQpkYXRhPC1TTU9URShmYWN0b3IoaXNGcmF1ZCl+LiwgZGF0YS5mcmFtZShkYXRhKSxwZXJjLm92ZXIgPSAxMDAscGVyYy51bmRlciA9IDIwMCkKcm91bmQocHJvcC50YWJsZSh0YWJsZShzZWxlY3QoZGF0YSxpc0ZyYXVkKSxleGNsdWRlID0gTlVMTCkpLDQpKjEwMAoKYGBgCgoKUGVyZmVjdGx5LCB0aGUgcmF0aW8gb2YgRnJhdWQgYW5kIE5vdF9GcmF1ZCBiZWNvbWVzIDUwLzUwIHJvdWdobHkuIFN1Y2Nlc3NmdWxseSBmaXhlZCB0aGUgY2xhc3MgaW1iYWxhbmNlIHByb2JsZW0uIFRoZXJlZm9yZSwgaXQgaXMgZ29vZCB0byBjb250aW51ZSB0byBidWlsZGluZyBtb2RlbHMuIFNpbmNlIGl0IGlzIHJlcXVpcmVkIHRvIHVzZSBvbmUgZnJvbSB3ZWVrIDEgdG8gMTAsIGFuZCBvbmUgZnJvbSB3ZWVrIDExIHRvIHdlZWsgMTUsIEknZCBsaWtlIHRvIHVzZSB0aGUgcmFuZG9tIGZvcmVzdCBtb2RlbCBhbmQgYmFnZ2luZyBtb2RlbCBmb3IgdGhlIGZ1cnRoZXIgYW5hbHlzaXMuIAoKCiMjIyBEYXRhIFNwbGl0CgpgYGB7cn0Kc2V0LnNlZWQoMTAwMCkKZGF0YSRpc0ZyYXVkIDwtaWZlbHNlKGRhdGEkaXNGcmF1ZD09MCwnTm90X0ZyYXVkJywnRnJhdWQnKQpkYXRhJGlzRnJhdWQ8LWFzLmZhY3RvcihkYXRhJGlzRnJhdWQpCmxldmVscyhkYXRhJGlzRnJhdWQpPC1jKCJGcmF1ZCIsIk5vdF9GcmF1ZCIpCnNwbGl0IDwtIHNhbXBsZShucm93KGRhdGEpLCByb3VuZChucm93KGRhdGEpKjAuNzUpLCByZXBsYWNlID0gRikgCnRyYWluIDwtIGRhdGFbc3BsaXQsXQp0ZXN0IDwtIGRhdGFbLXNwbGl0LF0Kcm91bmQocHJvcC50YWJsZSh0YWJsZShzZWxlY3QodHJhaW4sIGlzRnJhdWQpLGV4Y2x1ZGUgPSBOVUxMKSksNCkqMTAwCmBgYAoKCgojIyBCdWlsZGluZyBNb2RlbHMKCiMjIyBSYW5kb20gRm9yZXN0IE1vZGVsIChmcm9tIHdlZWsxLXdlZWsxMCkKCmBgYHtyfQpzZXQuc2VlZCgxMjMpCmZvcmVzdCA8LSByYW5kb21Gb3Jlc3QoaXNGcmF1ZH4gc3RlcCt0eXBlK2Ftb3VudCtvbGRiYWxhbmNlT3JnK25ld2JhbGFuY2VPcmlnK29sZGJhbGFuY2VEZXN0K25ld2JhbGFuY2VEZXN0K2lzRmxhZ2dlZEZyYXVkLCBkYXRhID0gdHJhaW4sIGltcG9ydGFuY2U9VFJVRSwgbnRyZWU9MTAwMCkKCiMgZGlzcGxheSBtb2RlbCBkZXRhaWxzCmZvcmVzdApwbG90KGZvcmVzdCkKYGBgCgojIyMgUkYgVmFyaWFibGUgSW1wb3J0YW5jZSAKCmBgYHtyfQp2YXJJbXBQbG90KGZvcmVzdCkKYGBgCgojIyMgUkYgUHJlZGljdGlvbgpgYGB7cn0KZm9yZXN0X3ByZWQ8LXByZWRpY3QoZm9yZXN0LCBuZXdkYXRhID0gdGVzdCwgdHlwZT0nY2xhc3MnKQpmb3Jlc3RfY208LWNvbmZ1c2lvbk1hdHJpeChmb3Jlc3RfcHJlZCx0ZXN0JGlzRnJhdWQpCmZvcmVzdF9jbQpgYGAKCiMjIyBCb290c3RyYXAgQWdncmVnYXRpbmcgTW9kZWwvYmFnZ2luZyAod2VlayAxMSB0byB3ZWVrIDE1KQoKYGBge3J9CnNldC5zZWVkKDEwMDApCmJhZzwtYmFnZ2luZyhpc0ZyYXVkfiBzdGVwK3R5cGUrYW1vdW50K29sZGJhbGFuY2VPcmcrbmV3YmFsYW5jZU9yaWcrb2xkYmFsYW5jZURlc3QrbmV3YmFsYW5jZURlc3QraXNGbGFnZ2VkRnJhdWQsIGRhdGEgPSB0cmFpbiwgbmJhZ2c9MTUwLGNvb2I9VCxjb250cm9sPXJwYXJ0LmNvbnRyb2wobWluc3BsaXQgPSAyLCBjcCA9IDApKQpgYGAKCgojIyMgQmFnIFZhcmlhYmxlIEltcG9ydGFuY2UKCmBgYHtyfQp2YXJJbXAoYmFnKQpgYGAKCgojIyMgQmFnIFByZWRpY3Rpb24KCmBgYHtyfQpiYWdfcHJlZDwtcHJlZGljdChiYWcsIG5ld2RhdGEgPSB0ZXN0LCB0eXBlPSdjbGFzcycpCmJhZ19jbTwtY29uZnVzaW9uTWF0cml4KGJhZ19wcmVkLHRlc3QkaXNGcmF1ZCkKYmFnX2NtCgpgYGAKCkluIGNvbmNsdXNpb24sIHRoZSBCb290c3RyYXAgQWdncmVnYXRpbmcgTW9kZWwgcGVyZm9ybXMgYSBiZXR0ZXIgYWNjdXJhY3kgY29tcGFyZSB0byB0aGUgcmFuZG9tIGZvcmVzdCBtb2RlbCB3aXRoIDk5LjM3JSBvZiBhY2N1cmFjeSByYXRlLiBFc3BlY2lhbGx5LCBpdCBub3Qgb25seSBwcm92aWRlcyBtb3JlIGFjY3VyYWN5IHBlcmZvcm1hbmNlIGJ1dCBhbHNvIHByZWRpY3RlZCBsZXNzIGluY29ycmVjdGx5IHRoZSB0aGUgRnJhdWQgYXMgTm90X0ZyYXVkLgpBcyB0aGUgY29uZnVzaW9uIE1hdHJpeCBzaG93cywgQm9vdHN0cmFwIEFnZ3JlZ2F0aW5nIE1vZGVsIHByZWRpY3RlZCB0aGUgRnJhdWQgYXMgTm90X0ZyYXVkIGluY29ycmVjdGx5IHdpdGggMzkgY2FzZXMuIFJhbmRvbSBmb3Jlc3QgbW9kZWwgcHJlZGljdGVkIHRoZSBGcmF1ZCBhcyBOb3RfRnJhdWQgaW5jb3JyZWN0bHkgd2l0aCA1NyBjYXNlcy4gSXQgaXMgaW1wb3J0YW50IGZvciBhIGZpbmFuY2lhbCBpbnN0aXR1dGlvbiB0byBjb3JyZWN0bHkgZGV0ZXJtaW5lIHRoZSBGcmF1ZCBjYXNlIHNpbmNlIGl0IG1heSBsaW5rIHRvIGEgdHJlbWVuZG91cyBkaXNhc3RlciBhbmQgcmlzayBvZiBsb3NpbmcgcHJvcGVydGllcyBmb3IgdGhlIGN1c3RvbWVycy4gVGhlcmVmb3JlLCBJJ2QgbGlrZSB0byByZWNvbW1lbmQgdG8gdXNlIHRoZSBCb290c3RyYXAgQWdncmVnYXRpbmcgTW9kZWwgdG8gdGhlIGZpbmFuY2lhbCBpbnN0aXR1dGlvbnMgdG8gZGV0ZWN0IHRoZSBmcmF1ZCB0cmFuc2FjdGlvbnMuIAoKCg==