Required packages

library(dplyr)     # for pipe operator
library(ggplot2)   # for display of plots
library(readr)     # to import datasets
library(lattice)   # to display graphs
library(lubridate) # for operations on date and time variables
library(tidyr)     # used to tidy data
library(forecast)  # used for Box-Cox transformation
library(validate)  # to use functions on dataframes
library(outliers)

Executive Summary

Data

Importing and Reading the data set

library(readr)
dataset_1 <- read_csv("olist_order_items_dataset.csv")
Parsed with column specification:
cols(
  order_id = col_character(),
  order_item_id = col_double(),
  product_id = col_character(),
  seller_id = col_character(),
  shipping_limit_date = col_datetime(format = ""),
  price = col_double(),
  freight_value = col_double()
)
View(dataset_1)

library(readr)
dataset_2 <- read_csv("olist_orders_dataset.csv")
Parsed with column specification:
cols(
  order_id = col_character(),
  customer_id = col_character(),
  order_status = col_character(),
  order_purchase_timestamp = col_datetime(format = ""),
  order_approved_at = col_datetime(format = ""),
  order_delivered_carrier_date = col_datetime(format = ""),
  order_delivered_customer_date = col_datetime(format = ""),
  order_estimated_delivery_date = col_datetime(format = "")
)
View(dataset_2)

Order Items Dataset


head(dataset_1)
NA
NA

Overall Orders Dataset


head(dataset_2)
NA
NA

Joining Data set


joined_data <- left_join(dataset_2, dataset_1, by = "order_id")  
head(joined_data, 10)
NA

Understand

summary(joined_data)
   order_id         customer_id        order_status       order_purchase_timestamp     
 Length:113425      Length:113425      Length:113425      Min.   :2016-09-04 21:15:19  
 Class :character   Class :character   Class :character   1st Qu.:2017-09-13 11:05:49  
 Mode  :character   Mode  :character   Mode  :character   Median :2018-01-19 10:37:45  
                                                          Mean   :2017-12-31 12:25:41  
                                                          3rd Qu.:2018-05-04 14:22:16  
                                                          Max.   :2018-10-17 17:30:18  
                                                                                       
 order_approved_at             order_delivered_carrier_date  order_delivered_customer_date
 Min.   :2016-09-15 12:16:38   Min.   :2016-10-08 10:34:01   Min.   :2016-10-11 13:46:32  
 1st Qu.:2017-09-13 20:25:10   1st Qu.:2017-09-18 20:37:00   1st Qu.:2017-09-26 20:09:44  
 Median :2018-01-19 16:59:52   Median :2018-01-24 18:44:33   Median :2018-02-02 20:57:23  
 Mean   :2017-12-31 22:40:56   Mean   :2018-01-05 02:17:27   Mean   :2018-01-14 13:25:24  
 3rd Qu.:2018-05-04 18:31:38   3rd Qu.:2018-05-08 13:20:00   3rd Qu.:2018-05-15 20:09:21  
 Max.   :2018-09-03 17:40:06   Max.   :2018-09-11 19:48:28   Max.   :2018-10-17 13:22:46  
 NA's   :161                   NA's   :1968                  NA's   :3229                 
 order_estimated_delivery_date order_item_id     product_id         seller_id        
 Min.   :2016-09-30 00:00:00   Min.   : 1.000   Length:113425      Length:113425     
 1st Qu.:2017-10-04 00:00:00   1st Qu.: 1.000   Class :character   Class :character  
 Median :2018-02-15 00:00:00   Median : 1.000   Mode  :character   Mode  :character  
 Mean   :2018-01-24 08:22:34   Mean   : 1.198                                        
 3rd Qu.:2018-05-25 00:00:00   3rd Qu.: 1.000                                        
 Max.   :2018-11-12 00:00:00   Max.   :21.000                                        
                               NA's   :775                                           
 shipping_limit_date               price         freight_value   
 Min.   :2016-09-19 00:15:34   Min.   :   0.85   Min.   :  0.00  
 1st Qu.:2017-09-20 20:57:27   1st Qu.:  39.90   1st Qu.: 13.08  
 Median :2018-01-26 13:59:35   Median :  74.99   Median : 16.26  
 Mean   :2018-01-07 15:36:52   Mean   : 120.65   Mean   : 19.99  
 3rd Qu.:2018-05-10 14:34:00   3rd Qu.: 134.90   3rd Qu.: 21.15  
 Max.   :2020-04-09 22:35:08   Max.   :6735.00   Max.   :409.68  
 NA's   :775                   NA's   :775       NA's   :775     
joined_data$order_status <- joined_data$order_status %>% factor(levels = c("unavailable", "created", "cancelled", "approved", "processing","invoiced", "shipped", "delivered"), ordered = TRUE) 

class(joined_data$order_status) # Checking class of variable after conversion
[1] "ordered" "factor" 
levels(joined_data$order_status) # levelling order_status variable
[1] "unavailable" "created"     "cancelled"   "approved"    "processing"  "invoiced"    "shipped"    
[8] "delivered"  

str(joined_data)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame':    113425 obs. of  14 variables:
 $ order_id                     : chr  "e481f51cbdc54678b7cc49136f2d6af7" "53cdb2fc8bc7dce0b6741e2150273451" "47770eb9100c2d0c44946d9cf07ec65d" "949d5b44dbf5de918fe9c16f97b45f8a" ...
 $ customer_id                  : chr  "9ef432eb6251297304e76186b10a928d" "b0830fb4747a6c6d20dea0b8c802d7ef" "41ce2a54c0b03bf3443c3d931a367089" "f88197465ea7920adcdbec7375364d82" ...
 $ order_status                 : Ord.factor w/ 8 levels "unavailable"<..: 8 8 8 8 8 8 6 8 8 8 ...
 $ order_purchase_timestamp     : POSIXct, format: "2017-10-02 10:56:33" "2018-07-24 20:41:37" "2018-08-08 08:38:49" ...
 $ order_approved_at            : POSIXct, format: "2017-10-02 11:07:15" "2018-07-26 03:24:27" "2018-08-08 08:55:23" ...
 $ order_delivered_carrier_date : POSIXct, format: "2017-10-04 19:55:00" "2018-07-26 14:31:00" "2018-08-08 13:50:00" ...
 $ order_delivered_customer_date: POSIXct, format: "2017-10-10 21:25:13" "2018-08-07 15:27:45" "2018-08-17 18:06:29" ...
 $ order_estimated_delivery_date: POSIXct, format: "2017-10-18" "2018-08-13" "2018-09-04" ...
 $ order_item_id                : num  1 1 1 1 1 1 1 1 1 1 ...
 $ product_id                   : chr  "87285b34884572647811a353c7ac498a" "595fac2a385ac33a80bd5114aec74eb8" "aa4383b373c6aca5d8797843e5594415" "d0b61bfb1de832b15ba9d266ca96e5b0" ...
 $ seller_id                    : chr  "3504c0cb71d7fa48d967e0e4c94d59d9" "289cdb325fb7e7f891c38608bf9e0962" "4869f7a5dfa277a7dca6462dcf3b52b2" "66922902710d126a0e7d26b0e3805106" ...
 $ shipping_limit_date          : POSIXct, format: "2017-10-06 11:07:15" "2018-07-30 03:24:27" "2018-08-13 08:55:23" ...
 $ price                        : num  30 118.7 159.9 45 19.9 ...
 $ freight_value                : num  8.72 22.76 19.22 27.2 8.72 ...

Tidy & Manipulate Data I

tidy_data <- joined_data %>% separate (order_purchase_timestamp, into = c("purchase_Date", "purchase_Time"), sep = " ")

tidy_data <- tidy_data %>% separate (order_approved_at, into = c("approved_Date", "approved_Time"), sep = " ")

tidy_data <- tidy_data %>% separate (order_delivered_carrier_date, into = c("delivered_carrier_Date", "delivered_carrier_Time"), sep = " ")


tidy_data <- tidy_data %>% separate (order_delivered_customer_date, into = c("delivered_customer_Date", "delivered_customer_Time"), sep = " ")

tidy_data <- tidy_data %>% separate (shipping_limit_date, into = c("shipping_limit_date", "shipping_limit_Time"), sep = " ")


select(tidy_data,purchase_Date,purchase_Time,approved_Date,approved_Time,delivered_carrier_Date,delivered_carrier_Time,delivered_customer_Date,delivered_customer_Time,shipping_limit_date,shipping_limit_Time) %>% head()
NA
NA

Tidy & Manipulate Data II

mutated_data <- mutate(tidy_data, Total_valueTopay =(price) + (freight_value))
select(mutated_data,order_id,price,freight_value,Total_valueTopay) %>% head()
NA
NA

Scan I to check missing values

colSums(is.na(mutated_data))
                     order_id                   customer_id                  order_status 
                            0                             0                           706 
                purchase_Date                 purchase_Time                 approved_Date 
                            0                             0                           161 
                approved_Time        delivered_carrier_Date        delivered_carrier_Time 
                          161                          1968                          1968 
      delivered_customer_Date       delivered_customer_Time order_estimated_delivery_date 
                         3229                          3229                             0 
                order_item_id                    product_id                     seller_id 
                          775                           775                           775 
          shipping_limit_date           shipping_limit_Time                         price 
                          775                           775                           775 
                freight_value              Total_valueTopay 
                          775                           775 
sum(is.na(mutated_data))
[1] 17622
scanned_data1 <- na.omit(mutated_data) # removing NA values and keeping only rows with complete cases
colSums(is.na(scanned_data1)) # Checking for removed NA values
                     order_id                   customer_id                  order_status 
                            0                             0                             0 
                purchase_Date                 purchase_Time                 approved_Date 
                            0                             0                             0 
                approved_Time        delivered_carrier_Date        delivered_carrier_Time 
                            0                             0                             0 
      delivered_customer_Date       delivered_customer_Time order_estimated_delivery_date 
                            0                             0                             0 
                order_item_id                    product_id                     seller_id 
                            0                             0                             0 
          shipping_limit_date           shipping_limit_Time                         price 
                            0                             0                             0 
                freight_value              Total_valueTopay 
                            0                             0 
sum(is.na(scanned_data1))
[1] 0

Scan II

scanned_data1$freight_value %>%  boxplot( main="Box Plot of Price", ylab="freight_value", col = "grey")

scanned_data1$Total_valueTopay %>%  boxplot( main="Box Plot of Total_valueTopay", ylab="Price", col = "grey")





scanned_data1$price %>%  boxplot( main="Box Plot of Price", ylab="Price", col = "grey")

scanned_data1$freight_value %>%  boxplot( main="Box Plot of Price", ylab="freight_value", col = "grey")

scanned_data1$Total_valueTopay %>%  boxplot( main="Box Plot of Total_valueTopay", ylab="Price", col = "grey")

NA
NA
NA

Finding the z-score for numeric variables


z.scores_price <- scanned_data1$price %>%  scores(type = "z")
z.scores_price %>% summary()
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.65342 -0.43923 -0.24726  0.00000  0.07784 36.28366 
z.scores_freight <- scanned_data1$freight_value %>%  scores(type = "z")
z.scores_freight %>% summary()
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-1.2707 -0.4375 -0.2350  0.0000  0.0765 24.8244 
z.scores_total <- scanned_data1$Total_valueTopay %>%  scores(type = "z")
z.scores_total %>% summary()
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.70694 -0.44766 -0.25250  0.00000  0.09265 35.85921 

Finding the total number of outliers according to the z-score


length (which( abs(z.scores_price) >3 ))
[1] 1930
length (which( abs(z.scores_freight) >3 ))
[1] 2004
length (which( abs(z.scores_total) >3 ))
[1] 1931

Handling outliers

  • To remove outliers, we are using capping method where the values above the upper limit are replaced by 95 percentile.
  • After removing outliers, the variables are assigned to new_price, new_freight_value and Total_price


cap<- function (x){         ## Capping the dataset to handle outliers
  quantiles <- quantile(x,c(.05, 0.25, 0.75, 0.95))
  x[ x < quantiles[2] - 1.5*IQR(x) ] <- quantiles[1]
  x[x > quantiles[3] + 1.5*IQR(x) ] <-quantiles[4]
  x
}

new_price<- scanned_data1$price %>% cap()
new_freight_value<- scanned_data1$freight_value %>% cap()
Total_price<- scanned_data1$Total_valueTopay %>% cap()



boxplot(new_price, new_freight_value, Total_price, main = "Multiple boxplots without outliers", at = c(1,2,3),
        names = c("price", "freight", "total"),  col ="orange", border = "black")

Before Transform

*The new_price and total price histogram is plotted.

  • The below histogram plot of filtered price variable (value less than 500) shows that the data is right skewed. Thus, appropriate measures are taken to reduce skewness of the dataset
hist(new_price, xlab = "Price", main="Histogram after filtering Price <= 500")
grid()



hist(Total_price, xlab = "Total_price", main="Histogram after filtering Total_price <= 500")
grid()

NA
NA

After Transformation


squareroottransform <- sqrt(new_price)
hist(squareroottransform, xlab = "Square Root (Price)", main = "Histogram Plot for Normalised Data using Sqrt Transformation")




squareroottransform <- sqrt(Total_price)
hist(squareroottransform, xlab = "Square Root (Total_price)", main = "Histogram Plot for Normalised Data using Sqrt Transformation")

Conclusion

  • Hence, by performing various operations on the datasets like filtering, mutation, datatype conversions, using capping functions etc., we conclude that the obtained dataset can now be used to interpret data and derive inferences from it as we have successfully pre-preprocessed the data for further analysis as and when required.
LS0tDQp0aXRsZTogIk1BVEgyMzQ5IFNlbWVzdGVyIDIsIDIwMTkiDQphdXRob3I6ICJIYXJzaGFtIFN1bmRlciAoczM3NzMxNjEpICwgQ2luaXRhIE1hcnkgVmFyZ2hlc2UgKHMzNzk3NjM1KSAiDQpzdWJ0aXRsZTogQXNzaWdubWVudCAzDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCi0tLQ0KDQoNCiMjIFJlcXVpcmVkIHBhY2thZ2VzIA0KDQoNCg0KYGBge3J9DQpsaWJyYXJ5KGRwbHlyKSAgICAgIyBmb3IgcGlwZSBvcGVyYXRvcg0KbGlicmFyeShnZ3Bsb3QyKSAgICMgZm9yIGRpc3BsYXkgb2YgcGxvdHMNCmxpYnJhcnkocmVhZHIpICAgICAjIHRvIGltcG9ydCBkYXRhc2V0cw0KbGlicmFyeShsYXR0aWNlKSAgICMgdG8gZGlzcGxheSBncmFwaHMNCmxpYnJhcnkobHVicmlkYXRlKSAjIGZvciBvcGVyYXRpb25zIG9uIGRhdGUgYW5kIHRpbWUgdmFyaWFibGVzDQpsaWJyYXJ5KHRpZHlyKSAgICAgIyB1c2VkIHRvIHRpZHkgZGF0YQ0KbGlicmFyeShmb3JlY2FzdCkgICMgdXNlZCBmb3IgQm94LUNveCB0cmFuc2Zvcm1hdGlvbg0KbGlicmFyeSh2YWxpZGF0ZSkgICMgdG8gdXNlIGZ1bmN0aW9ucyBvbiBkYXRhZnJhbWVzDQpsaWJyYXJ5KG91dGxpZXJzKQ0KDQpgYGANCg0KDQojIyBFeGVjdXRpdmUgU3VtbWFyeSANCg0KDQoqIEZpcnN0LCB0aGUgcmVxdWlyZWQgcGFja2FnZXMgYXJlIGluc3RhbGxlZCBhbmQgbG9hZGVkIHRvIHVzZSByZXF1aXJlZCBmdW5jdGlvbiBhbmQgb3BlcmF0b3JzIGZvciBkYXRhIHByZS1wcm9jZXNzaW5nLiBUaGVuIHRoZSB0d28gZGF0YXNldHMgb2YgaW50cmlndWVkIGFyZSAgaW1wb3J0ZWQgaW50byBSIHdvcmtzcGFjZSB0byBwZXJmb3JtIHJlcXVpcmVkIG9wZXJhdGlvbnMuPGJyPg0KDQoqIEJlZm9yZSBjYXJyeWluZyBvdXQgdGhlIHByZS1wcm9jZXNzaW5nIHRhc2ssIHRoZSBsZWZ0IGpvaW4gY29uZGl0aW9uIGlzIHVzZWQgdG8gbWVyZ2UgdGhlIHR3byBkYXRhc2V0cyBpbiBvcmRlciB0byBnYXRoZXIgdGhlIGRldGFpbHMgb2Ygb25seSB0aG9zZSBvcmRlcnMgd2hpY2ggaGFzIGRlbGl2ZXJ5IHN0YXR1cy48YnI+DQoNCiogVGhlIHJ1bmRvd24gb2YgY29tYmluZWQgZGF0YXNldCBpcyBkaXNwbGF5ZWQgd2hpY2ggc2hvd3Mgc3RhdGlzdGljYWwgYW5kIGdlbmVyYWwgaW5mb3JtYXRpb24gb2YgYWxsIHRoZSB2YXJpYWJsZXMuIDxicj4NCg0KKiBUaGUgZGVzaXJlZCB2YXJpYWJsZXMgYXJlIGNoYW5nZWQgb3ZlciBpbnRvIHJlYXNvbmFibGUgZGF0YSB0eXBlIHN1Y2ggYXMgZmFjdG9yIGFzIHJlcXVpcmVkPGJyPg0KDQoqIFBvc3QgZGF0YXR5cGUgY29udmVyc2lvbnMsIHRoZSBOQSB2YWx1ZXMgYXJlIGRldGVybWluZWQgYW5kIG9taXR0ZWQgdG8gY2Fycnkgb3V0IG5lY2Vzc2FyeSBvcGVyYXRpb25zLiA8YnI+DQoNCiogVGhlIGRhdGFzZXQgaXMgaW4gYSBVTlRJRFkgZm9ybWF0LiBUaGUgZGF0ZSBhbmQgdGltZSBhcmUgaW4gdGhlIHNhbWUgdmFyaWFibGUsU28gd2Ugc2VwYXJhdGVkIGl0Ljxicj4NCg0KKiBBbm90aGVyIG5ldyB2YXJpYWJsZSBuYW1lZCBUb3RhbF92YWx1ZVRvcGF5IGlzIG11dGF0ZWQgdG8gZGlzcGxheSB0aGUgdG90YWwgY29zdCBvZiB0aGUgcHJvZHVjdCBpbmNsdWRpbmcgZGVsaXZleSBjaGFyZ2VzLjxicj4NCg0KKiBUaGUgb3V0bGllcnMgb2YgdGhlIDMgbnVtZXJpYyB2YXJpYWJsZXMsIHByaWNlLCBmcmVpZ2h0X3ZhbHVlIGFuZCBUb3RhbF92YWx1ZVRvcGF5IGFyZSAgY2hlY2tlZCBieSB1c2luZyBib3hwbG90LiBUaGUgb3V0bGllcnMgcmVjb2duaXplZCBhcmUgbWFuYWdlZCB1c2luZyB0aGUgQ0FQUElORyBtZXRob2QgdmlhIGEgdXNlci1kZWZpbmVkIGZ1bmN0aW9uIHdoaWNoIGNhcHMgdGhlIHZhbHVlcyBhYm92ZSA5NXBlcmNlbnRpbGUgb2YgdGhlIGRhdGEuPGJyPg0KDQoqIFdlIG5lZWQgdG8gTk9STUFMSVpFIHRoZSBoaXN0b2dyYW0gZGlzcGxheWVkIGFmdGVyIHJlbW92aW5nIHRoZSBvdXRsaWVycyB3aGljaCBpbmRpY2F0ZXMgdGhlIGRhdGEgaXMgcmlnaHQgc2tld2VkLjxicj4NCg0KKiBGaW5hbGx5LCB0byByZWR1Y2UgdGhlIHNrZXduZXNzIG9mIHRoZSBwcmljZSBhbmQgVG90YWxfcHJpY2UgdmFyaWFibGVzLCBkYXRhIHRyYW5zZm9ybWF0aW9uIHVzaW5nIFNRVUFSRSBST09UIE1FVEhPRCBpcyB1c2VkLjxicj4NCg0KDQoNCiMjIERhdGEgDQoNCiogVGhlIGRhdGFzZXQgbmFtZWQgIkJyYXppbGlhbiBFLUNvbW1lcmNlIFB1YmxpYyBEYXRhc2V0IGJ5IE9saXN0IiwgaGFzIG92ZXIgMTAwLDAwMCBPcmRlcnMgd2l0aCByZXZpZXdzIGluZm8scHJvZHVjdCBhbmQgY3VzdG9tZXIuPGJyPg0KDQoqIFNvdXJjZSA6IGh0dHBzOi8vd3d3LmthZ2dsZS5jb20vb2xpc3Rici9icmF6aWxpYW4tZWNvbW1lcmNlDQo8YnI+DQoNCiogQW1vbmdzdCB0aGVzZSB3aG9sZSBkYXRhc2V0IFdlIGhhdmUgb25seSBjb25zaWRlcmVkIDIgZmlsZXMgYXMgZGVzaXJlZCBieSB0aGUgYXNzaWdubWVudCByZXF1aXJlbWVudHMgd2hpY2ggYXJlIG9saXN0X29yZGVyc19kYXRhc2V0IGFuZCBvbGlzdF9vcmRlcl9pdGVtX2RhdGFzZXQuPGJyPg0KDQoqIFRoZXNlIHR3byBkYXRhIGZpbGVzIGhhdmUgdGhlIHZhcmlhYmxlcyBhcyBPcmRlcl9pZCwgcHJvZHVjdF9pZCBhbmQgc2VsbGVyX2lkIHdoaWNoIGFyZSBhbHBoYW51bWVyaWMsIG9yZGVyX2l0ZW1faWQsIHByaWNlIGFuZCBmcmVpZ2h0X3ZhbHVlIHdoaWNoIGFyZSBudW1lcmljLCBvcmRlcl9zdGF0dXMgaXMgY2hhcmFjdGVyIGJ1dCB3aWxsIGJlIGNvbnZlcnRlZCBpbnRvIGZhY3Rvciwgb3JkZXJfcHVyY2hhc2VfdGltZXN0YW1wLG9yZGVyX2FwcHJvdmVkX2F0LG9yZGVyX2RlbGl2ZXJlZF9jYXJyaWVyX2RhdGUsb3JkZXJfZGVsaXZlcmVkX2N1c3RvbWVyX2RhdGUsIGFuZCBzaGlwcGluZ19saW1pdF9kYXRlIGFyZSBkYXRlIHRpbWUgdmFyaWFibGVzLjxicj4NCg0KDQojIyBJbXBvcnRpbmcgYW5kIFJlYWRpbmcgdGhlIGRhdGEgc2V0DQoNCg0KYGBge3J9DQpsaWJyYXJ5KHJlYWRyKQ0KZGF0YXNldF8xIDwtIHJlYWRfY3N2KCJvbGlzdF9vcmRlcl9pdGVtc19kYXRhc2V0LmNzdiIpDQpWaWV3KGRhdGFzZXRfMSkNCg0KbGlicmFyeShyZWFkcikNCmRhdGFzZXRfMiA8LSByZWFkX2Nzdigib2xpc3Rfb3JkZXJzX2RhdGFzZXQuY3N2IikNClZpZXcoZGF0YXNldF8yKQ0KDQoNCg0KYGBgDQoNCg0KIyMgT3JkZXIgSXRlbXMgRGF0YXNldA0KDQpgYGB7cn0NCg0KaGVhZChkYXRhc2V0XzEpDQoNCg0KYGBgDQoNCiMjIE92ZXJhbGwgT3JkZXJzIERhdGFzZXQNCg0KDQpgYGB7cn0NCg0KaGVhZChkYXRhc2V0XzIpDQoNCg0KYGBgDQoNCiMjIEpvaW5pbmcgRGF0YSBzZXQNCg0KKiBEYXRhIGlzIGpvaW5lZCBieSB1c2luZyBsZWZ0X2pvaW4gZnVuY3Rpb24gYW5kIG5ld2x5IGZvcm1lZCBkYXRhc2V0IGlzIG5hbWVkIGFzIGpvaW5lZF9kYXRhLjxicj4NCmBgYHtyfQ0KDQpqb2luZWRfZGF0YSA8LSBsZWZ0X2pvaW4oZGF0YXNldF8yLCBkYXRhc2V0XzEsIGJ5ID0gIm9yZGVyX2lkIikgIA0KaGVhZChqb2luZWRfZGF0YSwgMTApDQoNCmBgYA0KDQojIyBVbmRlcnN0YW5kIA0KDQoqIFdlIGFyZSB1c2luZyBzdW1tYXJ5KCkgZnVuY3Rpb24gdG8gdW5kZXJzdGFuZCBkYXRhdHlwZXMgcHJlc2VudGVkIGluIHRoZSBkYXRhc2V0Lg0KDQoNCg0KDQpgYGB7cn0NCnN1bW1hcnkoam9pbmVkX2RhdGEpDQoNCmBgYA0KDQoqICBUaGUgdmFyaWFibGUgb3JkZXJfc3RhdHVzIGlzIGluIGNoYXJhY3Rlci4gSXQgaXMgdGhlbiBjb252ZXJ0ZWQgaW50byBmYWN0b3IgYW5kIGxldmVsIGlzIG9yZGVyZWQuPGJyPg0KDQpgYGB7cn0NCmpvaW5lZF9kYXRhJG9yZGVyX3N0YXR1cyA8LSBqb2luZWRfZGF0YSRvcmRlcl9zdGF0dXMgJT4lIGZhY3RvcihsZXZlbHMgPSBjKCJ1bmF2YWlsYWJsZSIsICJjcmVhdGVkIiwgImNhbmNlbGxlZCIsICJhcHByb3ZlZCIsICJwcm9jZXNzaW5nIiwiaW52b2ljZWQiLCAic2hpcHBlZCIsICJkZWxpdmVyZWQiKSwgb3JkZXJlZCA9IFRSVUUpIA0KDQpjbGFzcyhqb2luZWRfZGF0YSRvcmRlcl9zdGF0dXMpICMgQ2hlY2tpbmcgY2xhc3Mgb2YgdmFyaWFibGUgYWZ0ZXIgY29udmVyc2lvbg0KDQpsZXZlbHMoam9pbmVkX2RhdGEkb3JkZXJfc3RhdHVzKSAjIGxldmVsbGluZyBvcmRlcl9zdGF0dXMgdmFyaWFibGUNCmBgYA0KDQoqIFRoZSBzdHJ1Y3R1cmUgb2YgdGhlIGpvaW5lZCBEYXRhIGlzIGZvdW5kIG91dC48YnI+DQoNCmBgYHtyfQ0KDQpzdHIoam9pbmVkX2RhdGEpDQoNCmBgYA0KDQoNCg0KDQojIwlUaWR5ICYgTWFuaXB1bGF0ZSBEYXRhIEkgDQoNCiogVGhlIHZhcmlhYmxlcyBvcmRlcl9wdXJjaGFzZV90aW1lc3RhbXAsb3JkZXJfYXBwcm92ZWRfYXQsb3JkZXJfZGVsaXZlcmVkX2NhcnJpZXJfZGF0ZSxvcmRlcl9kZWxpdmVyZWRfY3VzdG9tZXJfZGF0ZSwgYW5kIHNoaXBwaW5nX2xpbWl0X2RhdGUgYXJlIGluIHVudGlkeSBmb3JtYXQgYmVjYXVzZSBkYXRlIGFuZCB0aW1lIGlzIGluIHNhbWUgY2VsbC4gU28gaXQgaXMgc2VwYXJhdGVkIHRvIHRpZHkgdGhlIGRhdGEuPGJyPg0KKiBUaWRpZWQgZGF0YSBpcyB0aGVuIHNob3duLjxicj4NCg0KYGBge3J9DQp0aWR5X2RhdGEgPC0gam9pbmVkX2RhdGEgJT4lIHNlcGFyYXRlIChvcmRlcl9wdXJjaGFzZV90aW1lc3RhbXAsIGludG8gPSBjKCJwdXJjaGFzZV9EYXRlIiwgInB1cmNoYXNlX1RpbWUiKSwgc2VwID0gIiAiKQ0KDQp0aWR5X2RhdGEgPC0gdGlkeV9kYXRhICU+JSBzZXBhcmF0ZSAob3JkZXJfYXBwcm92ZWRfYXQsIGludG8gPSBjKCJhcHByb3ZlZF9EYXRlIiwgImFwcHJvdmVkX1RpbWUiKSwgc2VwID0gIiAiKQ0KDQp0aWR5X2RhdGEgPC0gdGlkeV9kYXRhICU+JSBzZXBhcmF0ZSAob3JkZXJfZGVsaXZlcmVkX2NhcnJpZXJfZGF0ZSwgaW50byA9IGMoImRlbGl2ZXJlZF9jYXJyaWVyX0RhdGUiLCAiZGVsaXZlcmVkX2NhcnJpZXJfVGltZSIpLCBzZXAgPSAiICIpDQoNCg0KdGlkeV9kYXRhIDwtIHRpZHlfZGF0YSAlPiUgc2VwYXJhdGUgKG9yZGVyX2RlbGl2ZXJlZF9jdXN0b21lcl9kYXRlLCBpbnRvID0gYygiZGVsaXZlcmVkX2N1c3RvbWVyX0RhdGUiLCAiZGVsaXZlcmVkX2N1c3RvbWVyX1RpbWUiKSwgc2VwID0gIiAiKQ0KDQp0aWR5X2RhdGEgPC0gdGlkeV9kYXRhICU+JSBzZXBhcmF0ZSAoc2hpcHBpbmdfbGltaXRfZGF0ZSwgaW50byA9IGMoInNoaXBwaW5nX2xpbWl0X2RhdGUiLCAic2hpcHBpbmdfbGltaXRfVGltZSIpLCBzZXAgPSAiICIpDQoNCg0Kc2VsZWN0KHRpZHlfZGF0YSxwdXJjaGFzZV9EYXRlLHB1cmNoYXNlX1RpbWUsYXBwcm92ZWRfRGF0ZSxhcHByb3ZlZF9UaW1lLGRlbGl2ZXJlZF9jYXJyaWVyX0RhdGUsZGVsaXZlcmVkX2NhcnJpZXJfVGltZSxkZWxpdmVyZWRfY3VzdG9tZXJfRGF0ZSxkZWxpdmVyZWRfY3VzdG9tZXJfVGltZSxzaGlwcGluZ19saW1pdF9kYXRlLHNoaXBwaW5nX2xpbWl0X1RpbWUpICU+JSBoZWFkKCkNCg0KDQpgYGANCg0KIyMJVGlkeSAmIE1hbmlwdWxhdGUgRGF0YSBJSSANCg0KKiBBIG5ldyB2YXJpYWJsZSBUb3RhbF92YWx1ZVRvcGF5IGlzIG1hZGUgYnkgYWRkaW5nIHByaWNlIGFuZCBmcmVpZ2h0IHZhbHVlcy4NCg0KYGBge3J9DQptdXRhdGVkX2RhdGEgPC0gbXV0YXRlKHRpZHlfZGF0YSwgVG90YWxfdmFsdWVUb3BheSA9KHByaWNlKSArIChmcmVpZ2h0X3ZhbHVlKSkNCnNlbGVjdChtdXRhdGVkX2RhdGEsb3JkZXJfaWQscHJpY2UsZnJlaWdodF92YWx1ZSxUb3RhbF92YWx1ZVRvcGF5KSAlPiUgaGVhZCgpDQoNCg0KYGBgDQoNCg0KIyMJU2NhbiBJICB0byBjaGVjayBtaXNzaW5nIHZhbHVlcw0KIA0KDQoqIFRoZSBtaXNzaW5nIHZhbHVlcyBvZiBkYXRhc2V0IGhhcyBhIHRvdGFsIG9mIDE3NjIyLjxicj4NCg0KKiB3ZSB3aWxsIHJlbW92ZSB0aGVzZSBOQSB2YWx1ZXMuPGJyPg0KDQoNCg0KYGBge3J9DQpjb2xTdW1zKGlzLm5hKG11dGF0ZWRfZGF0YSkpDQpzdW0oaXMubmEobXV0YXRlZF9kYXRhKSkNCg0KYGBgDQoNCg0KYGBge3J9DQpzY2FubmVkX2RhdGExIDwtIG5hLm9taXQobXV0YXRlZF9kYXRhKSAjIHJlbW92aW5nIE5BIHZhbHVlcyBhbmQga2VlcGluZyBvbmx5IHJvd3Mgd2l0aCBjb21wbGV0ZSBjYXNlcw0KY29sU3Vtcyhpcy5uYShzY2FubmVkX2RhdGExKSkgIyBDaGVja2luZyBmb3IgcmVtb3ZlZCBOQSB2YWx1ZXMNCnN1bShpcy5uYShzY2FubmVkX2RhdGExKSkNCg0KDQpgYGANCg0KIyMJU2NhbiBJSQ0KDQoqIEJhc2VkIG9uIGJlbG93IGJveCBwbG90LCB0aGUgVFVLRVkncyBNZXRob2Qgb2YgT3V0bGllciBkZXRlY3Rpb24gaXMgdXNlZCB0byBkZXRlY3QgdGhlIG91dGxpZXJzICBvZiB0aGUgbnVtZXJpYyB2YXJpYWJsZXMgcHJpY2UsZnJlaWdodF92YWx1ZSBhbmQgVG90YWxfdmFsdWVUb3BheS4NCg0KDQpgYGB7cn0NCg0Kc2Nhbm5lZF9kYXRhMSRwcmljZSAlPiUgIGJveHBsb3QoIG1haW49IkJveCBQbG90IG9mIFByaWNlIiwgeWxhYj0iUHJpY2UiLCBjb2wgPSAiZ3JleSIpDQpzY2FubmVkX2RhdGExJGZyZWlnaHRfdmFsdWUgJT4lICBib3hwbG90KCBtYWluPSJCb3ggUGxvdCBvZiBQcmljZSIsIHlsYWI9ImZyZWlnaHRfdmFsdWUiLCBjb2wgPSAiZ3JleSIpDQpzY2FubmVkX2RhdGExJFRvdGFsX3ZhbHVlVG9wYXkgJT4lICBib3hwbG90KCBtYWluPSJCb3ggUGxvdCBvZiBUb3RhbF92YWx1ZVRvcGF5IiwgeWxhYj0iUHJpY2UiLCBjb2wgPSAiZ3JleSIpDQoNCg0KDQpgYGANCg0KIyBGaW5kaW5nIHRoZSB6LXNjb3JlIGZvciBudW1lcmljIHZhcmlhYmxlcw0KDQpgYGB7cn0NCg0Kei5zY29yZXNfcHJpY2UgPC0gc2Nhbm5lZF9kYXRhMSRwcmljZSAlPiUgIHNjb3Jlcyh0eXBlID0gInoiKQ0Kei5zY29yZXNfcHJpY2UgJT4lIHN1bW1hcnkoKQ0KDQp6LnNjb3Jlc19mcmVpZ2h0IDwtIHNjYW5uZWRfZGF0YTEkZnJlaWdodF92YWx1ZSAlPiUgIHNjb3Jlcyh0eXBlID0gInoiKQ0Kei5zY29yZXNfZnJlaWdodCAlPiUgc3VtbWFyeSgpDQoNCnouc2NvcmVzX3RvdGFsIDwtIHNjYW5uZWRfZGF0YTEkVG90YWxfdmFsdWVUb3BheSAlPiUgIHNjb3Jlcyh0eXBlID0gInoiKQ0Kei5zY29yZXNfdG90YWwgJT4lIHN1bW1hcnkoKQ0KDQpgYGANCg0KDQojIEZpbmRpbmcgdGhlIHRvdGFsIG51bWJlciBvZiBvdXRsaWVycyBhY2NvcmRpbmcgdG8gdGhlIHotc2NvcmUgDQoNCg0KYGBge3J9DQoNCmxlbmd0aCAod2hpY2goIGFicyh6LnNjb3Jlc19wcmljZSkgPjMgKSkNCmxlbmd0aCAod2hpY2goIGFicyh6LnNjb3Jlc19mcmVpZ2h0KSA+MyApKQ0KbGVuZ3RoICh3aGljaCggYWJzKHouc2NvcmVzX3RvdGFsKSA+MyApKQ0KDQoNCmBgYA0KDQoNCiMjIEhhbmRsaW5nIG91dGxpZXJzDQoNCiogVG8gcmVtb3ZlIG91dGxpZXJzLCB3ZSBhcmUgdXNpbmcgY2FwcGluZyBtZXRob2Qgd2hlcmUgdGhlIHZhbHVlcyBhYm92ZSB0aGUgdXBwZXIgbGltaXQgYXJlIHJlcGxhY2VkIGJ5IDk1IHBlcmNlbnRpbGUuPGJyPg0KKiBBZnRlciByZW1vdmluZyBvdXRsaWVycywgdGhlIHZhcmlhYmxlcyBhcmUgYXNzaWduZWQgdG8gbmV3X3ByaWNlLCBuZXdfZnJlaWdodF92YWx1ZSBhbmQgVG90YWxfcHJpY2UNCg0KYGBge3J9DQoNCg0KY2FwPC0gZnVuY3Rpb24gKHgpeyAgICAgICAgICMjIENhcHBpbmcgdGhlIGRhdGFzZXQgdG8gaGFuZGxlIG91dGxpZXJzDQogIHF1YW50aWxlcyA8LSBxdWFudGlsZSh4LGMoLjA1LCAwLjI1LCAwLjc1LCAwLjk1KSkNCiAgeFsgeCA8IHF1YW50aWxlc1syXSAtIDEuNSpJUVIoeCkgXSA8LSBxdWFudGlsZXNbMV0NCiAgeFt4ID4gcXVhbnRpbGVzWzNdICsgMS41KklRUih4KSBdIDwtcXVhbnRpbGVzWzRdDQogIHgNCn0NCg0KbmV3X3ByaWNlPC0gc2Nhbm5lZF9kYXRhMSRwcmljZSAlPiUgY2FwKCkNCm5ld19mcmVpZ2h0X3ZhbHVlPC0gc2Nhbm5lZF9kYXRhMSRmcmVpZ2h0X3ZhbHVlICU+JSBjYXAoKQ0KVG90YWxfcHJpY2U8LSBzY2FubmVkX2RhdGExJFRvdGFsX3ZhbHVlVG9wYXkgJT4lIGNhcCgpDQoNCg0KDQpib3hwbG90KG5ld19wcmljZSwgbmV3X2ZyZWlnaHRfdmFsdWUsIFRvdGFsX3ByaWNlLCBtYWluID0gIk11bHRpcGxlIGJveHBsb3RzIHdpdGhvdXQgb3V0bGllcnMiLCBhdCA9IGMoMSwyLDMpLA0KICAgICAgICBuYW1lcyA9IGMoInByaWNlIiwgImZyZWlnaHQiLCAidG90YWwiKSwgIGNvbCA9Im9yYW5nZSIsIGJvcmRlciA9ICJibGFjayIpDQoNCmBgYA0KDQoNCiMjIEJlZm9yZQlUcmFuc2Zvcm0gDQoNCipUaGUgbmV3X3ByaWNlIGFuZCB0b3RhbCBwcmljZSBoaXN0b2dyYW0gaXMgcGxvdHRlZC48YnI+DQoNCiogVGhlIGJlbG93IGhpc3RvZ3JhbSBwbG90IG9mIGZpbHRlcmVkIHByaWNlIHZhcmlhYmxlICh2YWx1ZSBsZXNzIHRoYW4gNTAwKSBzaG93cyB0aGF0IHRoZSBkYXRhIGlzIHJpZ2h0IHNrZXdlZC4gVGh1cywgYXBwcm9wcmlhdGUgbWVhc3VyZXMgYXJlIHRha2VuIHRvIHJlZHVjZSBza2V3bmVzcyBvZiB0aGUgZGF0YXNldDxicj4NCg0KYGBge3J9DQpoaXN0KG5ld19wcmljZSwgeGxhYiA9ICJQcmljZSIsIG1haW49Ikhpc3RvZ3JhbSBhZnRlciBmaWx0ZXJpbmcgUHJpY2UgPD0gNTAwIikNCmdyaWQoKQ0KDQoNCmhpc3QoVG90YWxfcHJpY2UsIHhsYWIgPSAiVG90YWxfcHJpY2UiLCBtYWluPSJIaXN0b2dyYW0gYWZ0ZXIgZmlsdGVyaW5nIFRvdGFsX3ByaWNlIDw9IDUwMCIpDQpncmlkKCkNCg0KDQpgYGANCg0KDQojIEFmdGVyIFRyYW5zZm9ybWF0aW9uDQoNCiogQWZ0ZXIgdHJ5aW5nIHNldmVyYWwgdHJhbnNmb3JtYXRpb25zIHN1Y2ggYm94LWNveCwgbG9nMTAsIGxvZywgbWluIG1heCBub3JtYWxpc2F0aW9uIGFuZCB6LXNjb3JlLCB3ZSBmb3VuZCBTcXJ0IG1ldGhvZCBhcyB0aGUgbW9zdCBhcHByb3ByaWF0ZSBtZXRob2QgZm9yIHRoZSB0cmFuc2Zvcm1hdGlvbi4NCg0KDQpgYGB7cn0NCg0Kc3F1YXJlcm9vdHRyYW5zZm9ybSA8LSBzcXJ0KG5ld19wcmljZSkNCmhpc3Qoc3F1YXJlcm9vdHRyYW5zZm9ybSwgeGxhYiA9ICJTcXVhcmUgUm9vdCAoUHJpY2UpIiwgbWFpbiA9ICJIaXN0b2dyYW0gUGxvdCBmb3IgTm9ybWFsaXNlZCBEYXRhIHVzaW5nIFNxcnQgVHJhbnNmb3JtYXRpb24iKQ0KDQoNCg0Kc3F1YXJlcm9vdHRyYW5zZm9ybSA8LSBzcXJ0KFRvdGFsX3ByaWNlKQ0KaGlzdChzcXVhcmVyb290dHJhbnNmb3JtLCB4bGFiID0gIlNxdWFyZSBSb290IChUb3RhbF9wcmljZSkiLCBtYWluID0gIkhpc3RvZ3JhbSBQbG90IGZvciBOb3JtYWxpc2VkIERhdGEgdXNpbmcgU3FydCBUcmFuc2Zvcm1hdGlvbiIpDQpgYGANCg0KIyMgQ29uY2x1c2lvbg0KDQoqIEhlbmNlLCBieSBwZXJmb3JtaW5nIHZhcmlvdXMgb3BlcmF0aW9ucyBvbiB0aGUgZGF0YXNldHMgbGlrZSBmaWx0ZXJpbmcsIG11dGF0aW9uLCBkYXRhdHlwZSBjb252ZXJzaW9ucywgdXNpbmcgY2FwcGluZyBmdW5jdGlvbnMgZXRjLiwgd2UgY29uY2x1ZGUgdGhhdCB0aGUgb2J0YWluZWQgZGF0YXNldCBjYW4gbm93IGJlIHVzZWQgdG8gaW50ZXJwcmV0IGRhdGEgYW5kIGRlcml2ZSBpbmZlcmVuY2VzIGZyb20gaXQgYXMgd2UgaGF2ZSBzdWNjZXNzZnVsbHkgcHJlLXByZXByb2Nlc3NlZCB0aGUgZGF0YSBmb3IgZnVydGhlciBhbmFseXNpcyBhcyBhbmQgd2hlbiByZXF1aXJlZC4NCjxicj4NCg==