Data Originally from Kaggle at: https://www.kaggle.com/backblaze/hard-drive-test-data.

But after experiencing issues with 64bit values, the data were obtained from the original source here: https://www.backblaze.com/b2/hard-drive-test-data.html.

Setup

Load the relevant libraries.


# rm(list = ls())
# .rs.restartR()


# data munging
library("tidyverse")
library("data.table")

# munge dates
library("lubridate")

# feature engineering and data prep
library("recipes")
library("caret")
library("DMwR")

# to explore missing data
library("visdat")
library("naniar")

# modeling
library("h2o")

Session Info.


sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] h2o_3.24.0.1      naniar_0.4.2      visdat_0.5.3      DMwR_0.4.1       
 [5] caret_6.0-84      lattice_0.20-38   recipes_0.1.5     lubridate_1.7.4  
 [9] data.table_1.12.2 forcats_0.4.0     stringr_1.4.0     dplyr_0.8.1      
[13] purrr_0.3.2       readr_1.3.1       tidyr_0.8.3       tibble_2.1.1     
[17] ggplot2_3.1.1     tidyverse_1.2.1  

loaded via a namespace (and not attached):
 [1] httr_1.4.0         jsonlite_1.6       splines_3.6.0      foreach_1.4.4     
 [5] gtools_3.8.1       prodlim_2018.04.18 modelr_0.1.4       assertthat_0.2.1  
 [9] TTR_0.23-4         stats4_3.6.0       cellranger_1.1.0   yaml_2.2.0        
[13] ipred_0.9-9        pillar_1.4.0       backports_1.1.4    glue_1.3.1        
[17] digest_0.6.19      rvest_0.3.4        colorspace_1.4-1   htmltools_0.3.6   
[21] Matrix_1.2-17      plyr_1.8.4         timeDate_3043.102  pkgconfig_2.0.2   
[25] broom_0.5.2        haven_2.1.0        scales_1.0.0       gdata_2.18.0      
[29] gower_0.2.1        lava_1.6.5         generics_0.0.2     withr_2.1.2       
[33] ROCR_1.0-7         nnet_7.3-12        lazyeval_0.2.2     cli_1.1.0         
[37] quantmod_0.4-14    survival_2.44-1.1  magrittr_1.5       crayon_1.3.4      
[41] readxl_1.3.1       evaluate_0.13      nlme_3.1-140       MASS_7.3-51.4     
[45] gplots_3.0.1.1     xts_0.11-2         xml2_1.2.0         class_7.3-15      
[49] tools_3.6.0        hms_0.4.2          munsell_0.5.0      packrat_0.5.0     
[53] compiler_3.6.0     caTools_1.17.1.2   rlang_0.3.4        RCurl_1.95-4.12   
[57] iterators_1.0.10   rstudioapi_0.10    bitops_1.0-6       base64enc_0.1-3   
[61] rmarkdown_1.12     gtable_0.3.0       ModelMetrics_1.2.2 codetools_0.2-16  
[65] abind_1.4-5        curl_3.3           reshape2_1.4.3     R6_2.4.0          
[69] zoo_1.8-5          knitr_1.23         KernSmooth_2.23-15 stringi_1.4.3     
[73] Rcpp_1.0.1         rpart_4.1-15       tidyselect_0.2.5   xfun_0.7          

Setup the root directory.

Setting wd as the working directory.


wd <- getwd()

wd
[1] "/Users/mdturse/Desktop/Analytics/hard_drive_failure"

Get the data

The site where the data are made available is https://www.backblaze.com/b2/hard-drive-test-data.html.

Download the .zip file.


# base url
url <- "https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q4_2018.zip"


# create a temporary directory
td <- tempdir()


# create the placeholder file
tf <- tempfile(tmpdir = td, fileext = ".zip")


# download into the placeholder file
download.file(url, tf)
trying URL 'https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q4_2018.zip'
Content type 'application/zip' length 547619262 bytes (522.3 MB)
==================================================
downloaded 522.3 MB

Download just the relevant .csv files.


# get the names of the files
fname <- unzip(tf, list = TRUE)
head(fname)

fname_csv <- 
  fname %>% 
  filter(str_detect(Name, "data_Q4_2018/2018-\\d{2}-\\d{2}\\.csv$")) %>% 
  arrange(Name) %>% 
  # head(5) %>%
  pull(Name)


# unzip the files to the temporary directory
fname_csv %>% 
  map(~unzip(tf,
             files = .x,
             exdir = td,
             overwrite = TRUE
             )
      )
[[1]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-01.csv"

[[2]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-02.csv"

[[3]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-03.csv"

[[4]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-04.csv"

[[5]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-05.csv"

[[6]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-06.csv"

[[7]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-07.csv"

[[8]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-08.csv"

[[9]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-09.csv"

[[10]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-10.csv"

[[11]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-11.csv"

[[12]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-12.csv"

[[13]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-13.csv"

[[14]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-14.csv"

[[15]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-15.csv"

[[16]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-16.csv"

[[17]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-17.csv"

[[18]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-18.csv"

[[19]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-19.csv"

[[20]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-20.csv"

[[21]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-21.csv"

[[22]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-22.csv"

[[23]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-23.csv"

[[24]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-24.csv"

[[25]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-25.csv"

[[26]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-26.csv"

[[27]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-27.csv"

[[28]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-28.csv"

[[29]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-29.csv"

[[30]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-30.csv"

[[31]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-10-31.csv"

[[32]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-01.csv"

[[33]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-02.csv"

[[34]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-03.csv"

[[35]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-04.csv"

[[36]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-05.csv"

[[37]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-06.csv"

[[38]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-07.csv"

[[39]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-08.csv"

[[40]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-09.csv"

[[41]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-10.csv"

[[42]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-11.csv"

[[43]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-12.csv"

[[44]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-13.csv"

[[45]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-14.csv"

[[46]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-15.csv"

[[47]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-16.csv"

[[48]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-17.csv"

[[49]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-18.csv"

[[50]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-19.csv"

[[51]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-20.csv"

[[52]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-21.csv"

[[53]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-22.csv"

[[54]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-23.csv"

[[55]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-24.csv"

[[56]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-25.csv"

[[57]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-26.csv"

[[58]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-27.csv"

[[59]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-28.csv"

[[60]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-29.csv"

[[61]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-11-30.csv"

[[62]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-01.csv"

[[63]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-02.csv"

[[64]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-03.csv"

[[65]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-04.csv"

[[66]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-05.csv"

[[67]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-06.csv"

[[68]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-07.csv"

[[69]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-08.csv"

[[70]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-09.csv"

[[71]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-10.csv"

[[72]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-11.csv"

[[73]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-12.csv"

[[74]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-13.csv"

[[75]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-14.csv"

[[76]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-15.csv"

[[77]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-16.csv"

[[78]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-17.csv"

[[79]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-18.csv"

[[80]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-19.csv"

[[81]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-20.csv"

[[82]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-21.csv"

[[83]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-22.csv"

[[84]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-23.csv"

[[85]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-24.csv"

[[86]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-25.csv"

[[87]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-26.csv"

[[88]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-27.csv"

[[89]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-28.csv"

[[90]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-29.csv"

[[91]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-30.csv"

[[92]]
[1] "/var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/data_Q4_2018/2018-12-31.csv"
# fpath is the full path to the extracted files
fpath <-
  fname_csv %>% 
  map(~file.path(td, .x)
      )

Read in the .csv files and stack them into a single .rds file.


hd <-
  fpath %>% 
  map(~fread(.x, verbose = FALSE)
      )
Previous fread() session was not cleaned up properly. Cleaned up ok at the beginning of this fread() call.|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
|--------------------------------------------------|
|==================================================|
hd_dt <- bind_rows(hd)


# fwrite(hd_dt,
#        paste0(wd,
#                "/Data/Interim/",
#                "hd_dt.csv"
#                )
#        )

# saveRDS(hd_dt,
#         paste0(wd,
#                "/Data/Interim/",
#                "hd_dt.Rds"
#                )
#         )

# hd_dt <-
#   readRDS(paste0(wd,
#                  "/Data/Interim/",
#                  "hd_dt.Rds"
#                  )
#           )


# message("hd")
# class(hd)
# glimpse(hd[1])
# 
# message("hd_dt")
# glimpse(hd_dt)


rm(fname, fpath, fname_csv, td, tf, url, hd)

Experimenting with converting integer64 values to smaller numeric values.


# class(hd_dt)
# hd_dt2 <- hd_dt

# 12,000,138,625,024
# hd_dt$capacity_bytes <- as.numeric(hd_dt$capacity_bytes / 1000000)# * 1000000
# hd_dt$smart_7_raw <- as.numeric(hd_dt$smart_7_raw / 1000000)# * 1000000
# hd_dt$smart_188_raw <- as.numeric(hd_dt$smart_188_raw / 1000000)# * 1000000
# hd_dt$smart_240_raw <- as.numeric(hd_dt$smart_240_raw / 1000000)# * 1000000
# hd_dt$smart_241_raw <- as.numeric(hd_dt$smart_241_raw / 1000000)# * 1000000
# hd_dt$smart_242_raw <- as.numeric(hd_dt$smart_242_raw / 1000000)# * 1000000

# class(hd_dt)
# glimpse(hd_dt)

# hd_dt$capacity_bytes3 <- as.integer(hd_dt$capacity_bytes)

Data Prep

Separate model into pieces - manufacturer and model.


hd2 <-
  hd_dt %>% 
  # head(1000) %>%
  separate(col = model,
           into = c("manu", "model2"),
           sep = "\\s",
           fill = "left",
           remove = TRUE
           ) %>%
  mutate(date = as_date(date),
         serial_number = factor(serial_number),
         manu = case_when(is.na(manu) ~ "(Missing)",
                          TRUE ~ manu
                          ) %>% 
           factor(),
         model2 = factor(model2),
         failure = factor(failure)
         ) %>% 
  select(-matches("normalized")
         ) %>% 
  as.data.table %>% 
  setkey(manu, model2, serial_number, date)
Expected 2 pieces. Additional pieces discarded in 500 rows [3037766, 3139107, 3240409, 3341661, 3442910, 3544156, 3582868, 3645402, 3684079, 3709513, 3746563, 3746576, 3785233, 3810667, 3847716, 3847729, 3886390, 3911822, 3918813, 3948873, ...].
class(hd2)
[1] "data.table" "data.frame"
glimpse(hd2)
Observations: 9,357,609
Variables: 68
$ date           <date> 2018-10-01, 2018-10-02, 2018-10-03, 2018-10-04, 2018-10-05, 2…
$ serial_number  <fct> ZA20NH66, ZA20NH66, ZA20NH66, ZA20NH66, ZA20NH66, ZA20NH66, ZA…
$ manu           <fct> (Missing), (Missing), (Missing), (Missing), (Missing), (Missin…
$ model2         <fct> ST10000NM0086, ST10000NM0086, ST10000NM0086, ST10000NM0086, ST…
$ capacity_bytes <dbl> 4.941067e-311, 4.941067e-311, 4.941067e-311, 4.941067e-311, 4.…
$ failure        <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_1_raw    <int> 98368032, 78733432, 74038736, 77143848, 117522120, 68864496, 5…
$ smart_2_raw    <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_3_raw    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_4_raw    <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ smart_5_raw    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_7_raw    <integr64> 1543760090, 1547844538, 1552162785, 1557445945, 156214729…
$ smart_8_raw    <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_9_raw    <int> 9635, 9659, 9683, 9707, 9731, 9755, 9779, 9803, 9819, 9852, 98…
$ smart_10_raw   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_11_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_12_raw   <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
$ smart_13_raw   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_15_raw   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_16_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_17_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_22_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_23_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_24_raw   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_168_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_170_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_173_raw  <integr64> 9218868437227407266, 9218868437227407266, 921886843722740…
$ smart_174_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_177_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_179_raw  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_181_raw  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_182_raw  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_183_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_184_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_187_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_188_raw  <integr64> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ smart_189_raw  <int> 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, …
$ smart_190_raw  <int> 23, 23, 24, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 23, 23…
$ smart_191_raw  <int> 105566, 105752, 105987, 106238, 106501, 106664, 106947, 107166…
$ smart_192_raw  <int> 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 68…
$ smart_193_raw  <int> 669, 670, 671, 672, 673, 674, 675, 676, 677, 681, 682, 685, 68…
$ smart_194_raw  <int> 23, 23, 24, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 23, 23…
$ smart_195_raw  <int> 98368032, 78733432, 74038736, 77143848, 117522120, 68864496, 5…
$ smart_196_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_197_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_198_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_199_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_200_raw  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ smart_201_raw  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_218_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_220_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_222_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_223_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_224_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_225_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_226_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_231_raw  <integr64> 9218868437227407266, 9218868437227407266, 921886843722740…
$ smart_232_raw  <integr64> 9218868437227407266, 9218868437227407266, 921886843722740…
$ smart_233_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_235_raw  <integr64> 9218868437227407266, 9218868437227407266, 921886843722740…
$ smart_240_raw  <integr64> 9546, 9570, 9594, 9618, 9642, 9665, 9690, 9714, 9729, 976…
$ smart_241_raw  <integr64> 46154421893, 46193919421, 46245435021, 46307985421, 46380…
$ smart_242_raw  <integr64> 97015934425, 97203507833, 97391775273, 97577094617, 97791…
$ smart_250_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_251_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_252_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_254_raw  <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_255_raw  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
summary(hd2)
      date              serial_number             manu        
 Min.   :2018-10-01   175PP3HDT:     92   (Missing) :7096719  
 1st Qu.:2018-10-24   175PP3I4T:     92   HGST      :1929277  
 Median :2018-11-17   175PP3I5T:     92   TOSHIBA   : 206129  
 Mean   :2018-11-16   175PP3I6T:     92   WDC       :  68637  
 3rd Qu.:2018-12-09   175PP3I8T:     92   ST500LM012:  54777  
 Max.   :2018-12-31   175PP3I9T:     92   Hitachi   :   1388  
                      (Other)  :9357057   (Other)   :    682  
             model2        capacity_bytes failure      smart_1_raw       
 ST12000NM0007  :2460704   Min.   :0      0:9357216   Min.   :        0  
 ST4000DM000    :2156756   1st Qu.:0      1:    393   1st Qu.:  2389506  
 HMS5C4040BLE640:1335577   Median :0                  Median : 83386312  
 ST8000NM0055   :1312180   Mean   :0                  Mean   : 92613745  
 ST8000DM002    : 901613   3rd Qu.:0                  3rd Qu.:164073018  
 HMS5C4040ALE640: 426449   Max.   :0                  Max.   :844902462  
 (Other)        : 764330   NA's   :363                NA's   :363        
  smart_2_raw       smart_3_raw       smart_4_raw         smart_5_raw      
 Min.   :   0      Min.   :    0.0   Min.   :    1.000   Min.   :    0.00  
 1st Qu.: 100      1st Qu.:    0.0   1st Qu.:    3.000   1st Qu.:    0.00  
 Median : 102      Median :    0.0   Median :    5.000   Median :    0.00  
 Mean   :  88      Mean   :  229.9   Mean   :    8.638   Mean   :    5.76  
 3rd Qu.: 104      3rd Qu.:    0.0   3rd Qu.:    8.000   3rd Qu.:    0.00  
 Max.   :1061      Max.   :11042.0   Max.   :25118.000   Max.   :55320.00  
 NA's   :7165913   NA's   :863       NA's   :863         NA's   :863       
  smart_7_raw              smart_8_raw       smart_9_raw     smart_10_raw   
 Min.   :             0   Min.   : 0        Min.   :    0   Min.   :     0  
 1st Qu.:       6874406   1st Qu.:41        1st Qu.: 8913   1st Qu.:     0  
 Median :     371331439   Median :42        Median :15247   Median :     0  
 Mean   :    1668407196   Mean   :35        Mean   :17252   Mean   :    22  
 3rd Qu.:     942362808   3rd Qu.:42        3rd Qu.:25131   3rd Qu.:     0  
 Max.   :10649354203107   Max.   :45        Max.   :70441   Max.   :327680  
 NA's   :           863   NA's   :7165913   NA's   :363     NA's   :863     
  smart_11_raw      smart_12_raw      smart_13_raw   smart_15_raw    smart_16_raw    
 Min.   :   0      Min.   :   0.000   Mode:logical   Mode:logical   Min.   : 62      
 1st Qu.:   0      1st Qu.:   2.000   NA's:9357609   NA's:9357609   1st Qu.: 68      
 Median :   0      Median :   4.000                                 Median : 79      
 Mean   : 349      Mean   :   5.949                                 Mean   : 79      
 3rd Qu.: 507      3rd Qu.:   7.000                                 3rd Qu.: 83      
 Max.   :9225      Max.   :1231.000                                 Max.   :171      
 NA's   :9234013   NA's   :363                                      NA's   :9357109  
  smart_17_raw      smart_22_raw      smart_23_raw      smart_24_raw    
 Min.   : 62       Min.   : 68       Min.   :0         Min.   :0        
 1st Qu.: 68       1st Qu.:100       1st Qu.:0         1st Qu.:0        
 Median : 79       Median :100       Median :0         Median :0        
 Mean   : 79       Mean   :100       Mean   :0         Mean   :0        
 3rd Qu.: 83       3rd Qu.:100       3rd Qu.:0         3rd Qu.:0        
 Max.   :171       Max.   :100       Max.   :0         Max.   :0        
 NA's   :9357109   NA's   :9197269   NA's   :9251432   NA's   :9251432  
 smart_168_raw     smart_170_raw     smart_173_raw                 smart_174_raw    
 Min.   :0         Min.   : 165      Min.   :         4294967297   Min.   :1        
 1st Qu.:0         1st Qu.: 319      1st Qu.:9218868437227407266   1st Qu.:1        
 Median :0         Median : 391      Median :9218868437227407266   Median :1        
 Mean   :0         Mean   : 392      Mean   :9217302923573511133   Mean   :2        
 3rd Qu.:0         3rd Qu.: 411      3rd Qu.:9218868437227407266   3rd Qu.:2        
 Max.   :0         Max.   :1397      Max.   :9218868437227407266   Max.   :3        
 NA's   :9357109   NA's   :9357109   NA's   :            6413250   NA's   :9357109  
 smart_177_raw     smart_179_raw  smart_181_raw  smart_182_raw  smart_183_raw    
 Min.   :0         Mode:logical   Mode:logical   Mode:logical   Min.   :    0    
 1st Qu.:0         NA's:9357609   NA's:9357609   NA's:9357609   1st Qu.:    0    
 Median :1                                                      Median :    0    
 Mean   :1                                                      Mean   :    3    
 3rd Qu.:1                                                      3rd Qu.:    0    
 Max.   :4                                                      Max.   :37728    
 NA's   :9357109                                                NA's   :7055154  
 smart_184_raw     smart_187_raw     smart_188_raw          smart_189_raw    
 Min.   : 0        Min.   :  0.0     Min.   :           0   Min.   :    0    
 1st Qu.: 0        1st Qu.:  0.0     1st Qu.:           0   1st Qu.:    0    
 Median : 0        Median :  0.0     Median :           0   Median :    0    
 Mean   : 0        Mean   :  0.1     Mean   :    80816376   Mean   :    7    
 3rd Qu.: 0        3rd Qu.:  0.0     3rd Qu.:           0   3rd Qu.:    0    
 Max.   :72        Max.   :524.0     Max.   :601305711020   Max.   :65535    
 NA's   :4721683   NA's   :2261196   NA's   :     2261196   NA's   :4721683  
 smart_190_raw     smart_191_raw     smart_192_raw     smart_193_raw    
 Min.   :14.0      Min.   :      0   Min.   :    0.0   Min.   :      1  
 1st Qu.:24.0      1st Qu.:      0   1st Qu.:    0.0   1st Qu.:    361  
 Median :29.0      Median :      1   Median :   12.0   Median :   2426  
 Mean   :29.2      Mean   :  10960   Mean   :  167.7   Mean   :  15292  
 3rd Qu.:34.0      3rd Qu.:   9318   3rd Qu.:   93.0   3rd Qu.:  13107  
 Max.   :56.0      Max.   :4752298   Max.   :65535.0   Max.   :1104852  
 NA's   :2261196   NA's   :4448060   NA's   :762       NA's   :56221    
 smart_194_raw   smart_195_raw       smart_196_raw     smart_197_raw      
 Min.   :12.00   Min.   :        0   Min.   :   0      Min.   :    0.000  
 1st Qu.:25.00   1st Qu.: 59043538   1st Qu.:   0      1st Qu.:    0.000  
 Median :29.00   Median :121289036   Median :   0      Median :    0.000  
 Mean   :29.12   Mean   :120787181   Mean   :   1      Mean   :    0.097  
 3rd Qu.:33.00   3rd Qu.:181994984   3rd Qu.:   0      3rd Qu.:    0.000  
 Max.   :56.00   Max.   :244140616   Max.   :1759      Max.   :10016.000  
 NA's   :363     NA's   :4373803     NA's   :7097276   NA's   :863        
 smart_198_raw       smart_199_raw      smart_200_raw     smart_201_raw 
 Min.   :    0.000   Min.   :   0.000   Min.   :     0    Mode:logical  
 1st Qu.:    0.000   1st Qu.:   0.000   1st Qu.:     0    NA's:9357609  
 Median :    0.000   Median :   0.000   Median :     0                  
 Mean   :    0.092   Mean   :   0.616   Mean   :  4771                  
 3rd Qu.:    0.000   3rd Qu.:   0.000   3rd Qu.:     0                  
 Max.   :10016.000   Max.   :4139.000   Max.   :850585                  
 NA's   :863         NA's   :863        NA's   :6663044                 
 smart_218_raw     smart_220_raw       smart_222_raw     smart_223_raw    
 Min.   :0         Min.   :        0   Min.   :    3     Min.   :   0     
 1st Qu.:0         1st Qu.:        0   1st Qu.: 1078     1st Qu.:   0     
 Median :0         Median :   393217   Median : 2079     Median :   0     
 Mean   :0         Mean   : 47079213   Mean   : 7378     Mean   : 165     
 3rd Qu.:0         3rd Qu.: 68681730   3rd Qu.:11085     3rd Qu.:   0     
 Max.   :0         Max.   :287440904   Max.   :33653     Max.   :9225     
 NA's   :9357109   NA's   :9151485     NA's   :9151485   NA's   :9096526  
 smart_224_raw     smart_225_raw     smart_226_raw     smart_231_raw                
 Min.   :0         Min.   :  34185   Min.   :159       Min.   :    109951162777699  
 1st Qu.:0         1st Qu.: 103221   1st Qu.:261       1st Qu.:9218868437227407266  
 Median :0         Median : 164134   Median :529       Median :9218868437227407266  
 Mean   :0         Mean   : 397460   Mean   :410       Mean   :9217302942244275773  
 3rd Qu.:0         3rd Qu.: 639641   3rd Qu.:534       3rd Qu.:9218868437227407266  
 Max.   :0         Max.   :2390237   Max.   :650       Max.   :9218868437227407266  
 NA's   :9151485   NA's   :9302650   NA's   :9151485   NA's   :            6413250  
 smart_232_raw                 smart_233_raw     smart_235_raw                
 Min.   :       270582939648   Min.   : 480      Min.   :         1015083328  
 1st Qu.:9218868437227407266   1st Qu.: 653      1st Qu.:9218868437227407266  
 Median :9218868437227407266   Median : 902      Median :9218868437227407266  
 Mean   :9217302923633694609   Mean   :1178      Mean   :9217949159185155570  
 3rd Qu.:9218868437227407266   3rd Qu.:1380      3rd Qu.:9218868437227407266  
 Max.   :9218868437227407266   Max.   :5355      Max.   :9218868437227407266  
 NA's   :            6413250   NA's   :9357109   NA's   :            4689096  
 smart_240_raw             smart_241_raw          smart_242_raw           
 Min.   :              0   Min.   :           0   Min.   :             1  
 1st Qu.:           7552   1st Qu.: 37510271200   1st Qu.:   67912675784  
 Median :          12571   Median : 43794509624   Median :   92947379492  
 Mean   :  1796550365729   Mean   : 42077709475   Mean   :   94024240191  
 3rd Qu.:          24691   3rd Qu.: 49428172152   3rd Qu.:  111958104848  
 Max.   :281462091840688   Max.   :179003923936   Max.   :32377330241963  
 NA's   :        2048040   NA's   :     2259950   NA's   :       2259950  
 smart_250_raw      smart_251_raw     smart_252_raw     smart_254_raw    
 Min.   : 3153310   Min.   : 14518    Min.   :0         Min.   :0        
 1st Qu.: 7663916   1st Qu.: 36075    1st Qu.:0         1st Qu.:0        
 Median :10025026   Median : 49629    Median :0         Median :0        
 Mean   :15392228   Mean   : 60436    Mean   :0         Mean   :0        
 3rd Qu.:15888884   3rd Qu.: 76800    3rd Qu.:0         3rd Qu.:0        
 Max.   :54639907   Max.   :154487    Max.   :0         Max.   :0        
 NA's   :9357463    NA's   :9357463   NA's   :9357463   NA's   :9350964  
 smart_255_raw 
 Mode:logical  
 NA's:9357609  
               
               
               
               
               
# View(hd2 %>% filter(model2 == "HDS5C3030ALA630"))
# View(head(hd2, 1000))

Get some basic counts for manufacturer, model, and serial number.


hd2 %>% 
  count(manu, model2, serial_number) %>% 
  arrange(desc(n))

hd2 %>% 
  count(manu) %>% 
  arrange(desc(n))

hd2 %>% 
  count(model2) %>% 
  arrange(desc(n))
NA

Explore failure rates.


table(hd2$failure)

      0       1 
9357216     393 
prop.table(table(hd2$failure))

           0            1 
9.999580e-01 4.199791e-05 
cnts_manu <-
  hd2 %>% 
  count(manu)
  
cnts_manu_failure <-
  hd2 %>% 
  filter(failure == "1") %>% 
  count(manu)

manu_failures <-
  cnts_manu %>% 
  rename(cnt_overall = n) %>% 
  inner_join(y = cnts_manu_failure %>% 
               rename(cnt_failure = n),
             by = c("manu" = "manu")
             ) %>% 
  mutate(failure_pct = cnt_failure / cnt_overall) %>% 
  arrange(desc(failure_pct))
  
manu_failures


rm(cnts_manu, cnts_manu_failure)

Explore modeling just for Toshiba

Create train_valid and test datasets.


toshiba <-
  hd2 %>% 
  filter(manu == "TOSHIBA") %>% 
  mutate_if(is.factor, factor) %>% 
  select_if(negate(is.logical))


fwrite(toshiba,
       paste0(wd,
              "/Data/Interim/",
              "toshiba.csv"
              )
       )

# toshiba <-
#   fread(paste0(wd,
#                  "/Data/Interim/",
#                  "toshiba.csv"
#                  )
#           )

# toshiba[ , date := as_date(date)]
# toshiba[ , serial_number := factor(serial_number)]
# toshiba[ , manu := factor(manu)]
# toshiba[ , model2 := factor(model2)]


str(toshiba)
'data.frame':   206129 obs. of  61 variables:
 $ date          : Date, format: "2018-10-01" "2018-10-02" ...
 $ serial_number : Factor w/ 2343 levels "175PP3HDT","175PP3I4T",..: 1410 1410 1410 1410 1410 1410 1410 1410 1410 1410 ...
 $ manu          : Factor w/ 1 level "TOSHIBA": 1 1 1 1 1 1 1 1 1 1 ...
 $ model2        : Factor w/ 7 levels "HDWE160","HDWF180",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ capacity_bytes: num  2.96e-311 2.96e-311 2.96e-311 2.96e-311 2.96e-311 ...
 $ failure       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ smart_1_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_2_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_3_raw   : int  10345 10345 10345 10345 10345 10345 10345 10345 10345 10345 ...
 $ smart_4_raw   : int  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_5_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_7_raw   :integer64 0 0 0 0 0 0 0 0 ... 
 $ smart_8_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_9_raw   : int  4254 4278 4302 4326 4350 4374 4397 4421 4446 4469 ...
 $ smart_10_raw  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_11_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_12_raw  : int  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_16_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_17_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_22_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_23_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_24_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_168_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_170_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_173_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_174_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_177_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_183_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_184_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_187_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_188_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_189_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_190_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_191_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_192_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_193_raw : int  4 4 4 4 4 4 4 4 4 4 ...
 $ smart_194_raw : int  22 22 22 22 22 22 22 22 22 22 ...
 $ smart_195_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_196_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_197_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_198_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_199_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_200_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_218_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_220_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_222_raw : int  4254 4278 4302 4326 4350 4373 4397 4421 4445 4469 ...
 $ smart_223_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_224_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_225_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_226_raw : int  630 630 630 630 630 630 630 630 630 630 ...
 $ smart_231_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_232_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_233_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_235_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_240_raw :integer64 0 0 0 0 0 0 0 0 ... 
 $ smart_241_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_242_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_250_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_251_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_252_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_254_raw : int  NA NA NA NA NA NA NA NA NA NA ...
summary(toshiba)
      date              serial_number         manu                model2      
 Min.   :2018-10-01   175PP3HDT:    92   TOSHIBA:206129   HDWE160    :   914  
 1st Qu.:2018-10-26   175PP3I4T:    92                    HDWF180    :  1820  
 Median :2018-11-17   175PP3I5T:    92                    MD04ABA400V: 13188  
 Mean   :2018-11-16   175PP3I6T:    92                    MD04ABA500V:  4095  
 3rd Qu.:2018-12-09   175PP3I8T:    92                    MG07ACA14TA:106177  
 Max.   :2018-12-31   175PP3I9T:    92                    MQ01ABF050 : 48732  
                      (Other)  :205577                    MQ01ABF050M: 31203  
 capacity_bytes failure     smart_1_raw  smart_2_raw  smart_3_raw     smart_4_raw     
 Min.   :0      0:206108   Min.   :0    Min.   :0    Min.   :  517   Min.   :  1.000  
 1st Qu.:0      1:    21   1st Qu.:0    1st Qu.:0    1st Qu.: 1508   1st Qu.:  2.000  
 Median :0                 Median :0    Median :0    Median : 7562   Median :  3.000  
 Mean   :0                 Mean   :0    Mean   :0    Mean   : 5155   Mean   :  4.388  
 3rd Qu.:0                 3rd Qu.:0    3rd Qu.:0    3rd Qu.: 7816   3rd Qu.:  6.000  
 Max.   :0                 Max.   :0    Max.   :0    Max.   :11042   Max.   :380.000  
 NA's   :5                 NA's   :5    NA's   :5    NA's   :5       NA's   :5        
  smart_5_raw        smart_7_raw  smart_8_raw  smart_9_raw     smart_10_raw
 Min.   :   0.000   Min.   :0    Min.   :0    Min.   :    3   Min.   :0    
 1st Qu.:   0.000   1st Qu.:0    1st Qu.:0    1st Qu.: 1097   1st Qu.:0    
 Median :   0.000   Median :0    Median :0    Median : 2102   Median :0    
 Mean   :   2.699   Mean   :0    Mean   :0    Mean   : 7404   Mean   :0    
 3rd Qu.:   0.000   3rd Qu.:0    3rd Qu.:0    3rd Qu.:11093   3rd Qu.:0    
 Max.   :2752.000   Max.   :0    Max.   :0    Max.   :33674   Max.   :0    
 NA's   :5          NA's   :5    NA's   :5    NA's   :5       NA's   :5    
  smart_11_raw     smart_12_raw     smart_16_raw     smart_17_raw     smart_22_raw   
 Min.   : NA      Min.   : 1.000   Min.   : NA      Min.   : NA      Min.   : NA     
 1st Qu.: NA      1st Qu.: 2.000   1st Qu.: NA      1st Qu.: NA      1st Qu.: NA     
 Median : NA      Median : 3.000   Median : NA      Median : NA      Median : NA     
 Mean   :NaN      Mean   : 4.201   Mean   :NaN      Mean   :NaN      Mean   :NaN     
 3rd Qu.: NA      3rd Qu.: 6.000   3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA     
 Max.   : NA      Max.   :67.000   Max.   : NA      Max.   : NA      Max.   : NA     
 NA's   :206129   NA's   :5        NA's   :206129   NA's   :206129   NA's   :206129  
  smart_23_raw    smart_24_raw   smart_168_raw    smart_170_raw   
 Min.   :0       Min.   :0       Min.   : NA      Min.   : NA     
 1st Qu.:0       1st Qu.:0       1st Qu.: NA      1st Qu.: NA     
 Median :0       Median :0       Median : NA      Median : NA     
 Mean   :0       Mean   :0       Mean   :NaN      Mean   :NaN     
 3rd Qu.:0       3rd Qu.:0       3rd Qu.: NA      3rd Qu.: NA     
 Max.   :0       Max.   :0       Max.   : NA      Max.   : NA     
 NA's   :99952   NA's   :99952   NA's   :206129   NA's   :206129  
 smart_173_raw                 smart_174_raw    smart_177_raw    smart_183_raw   
 Min.   :9218868437227407266   Min.   : NA      Min.   : NA      Min.   : NA     
 1st Qu.:9218868437227407266   1st Qu.: NA      1st Qu.: NA      1st Qu.: NA     
 Median :9218868437227407266   Median : NA      Median : NA      Median : NA     
 Mean   :9218868437227405446   Mean   :NaN      Mean   :NaN      Mean   :NaN     
 3rd Qu.:9218868437227407266   3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA     
 Max.   :9218868437227407266   Max.   : NA      Max.   : NA      Max.   : NA     
 NA's   :             143538   NA's   :206129   NA's   :206129   NA's   :206129  
 smart_184_raw    smart_187_raw    smart_188_raw                 smart_189_raw   
 Min.   : NA      Min.   : NA      Min.   :9218868437227407266   Min.   : NA     
 1st Qu.: NA      1st Qu.: NA      1st Qu.:9218868437227407266   1st Qu.: NA     
 Median : NA      Median : NA      Median :                 NA   Median : NA     
 Mean   :NaN      Mean   :NaN      Mean   :                 NA   Mean   :NaN     
 3rd Qu.: NA      3rd Qu.: NA      3rd Qu.:                 NA   3rd Qu.: NA     
 Max.   : NA      Max.   : NA      Max.   :                 NA   Max.   : NA     
 NA's   :206129   NA's   :206129   NA's   :             206129   NA's   :206129  
 smart_190_raw    smart_191_raw     smart_192_raw     smart_193_raw    smart_194_raw  
 Min.   : NA      Min.   :      0   Min.   : 0.0000   Min.   :     2   Min.   :12.00  
 1st Qu.: NA      1st Qu.:      0   1st Qu.: 0.0000   1st Qu.:    59   1st Qu.:27.00  
 Median : NA      Median :      0   Median : 0.0000   Median :    80   Median :30.00  
 Mean   :NaN      Mean   :   2683   Mean   : 0.6801   Mean   :  2626   Mean   :29.74  
 3rd Qu.: NA      3rd Qu.:      2   3rd Qu.: 1.0000   3rd Qu.:  1487   3rd Qu.:33.00  
 Max.   : NA      Max.   :4752298   Max.   :60.0000   Max.   :108361   Max.   :49.00  
 NA's   :206129   NA's   :5         NA's   :5         NA's   :5        NA's   :5      
 smart_195_raw    smart_196_raw      smart_197_raw       smart_198_raw     
 Min.   : NA      Min.   :  0.0000   Min.   :  0.00000   Min.   : 0.00000  
 1st Qu.: NA      1st Qu.:  0.0000   1st Qu.:  0.00000   1st Qu.: 0.00000  
 Median : NA      Median :  0.0000   Median :  0.00000   Median : 0.00000  
 Mean   :NaN      Mean   :  0.4245   Mean   :  0.02391   Mean   : 0.00249  
 3rd Qu.: NA      3rd Qu.:  0.0000   3rd Qu.:  0.00000   3rd Qu.: 0.00000  
 Max.   : NA      Max.   :497.0000   Max.   :256.00000   Max.   :57.00000  
 NA's   :206129   NA's   :5          NA's   :5           NA's   :5         
 smart_199_raw      smart_200_raw    smart_218_raw    smart_220_raw      
 Min.   : 0.00000   Min.   : NA      Min.   : NA      Min.   :        0  
 1st Qu.: 0.00000   1st Qu.: NA      1st Qu.: NA      1st Qu.:        0  
 Median : 0.00000   Median : NA      Median : NA      Median :   393217  
 Mean   : 0.04074   Mean   :NaN      Mean   :NaN      Mean   : 47079213  
 3rd Qu.: 0.00000   3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: 68681730  
 Max.   :32.00000   Max.   : NA      Max.   : NA      Max.   :287440904  
 NA's   :5          NA's   :206129   NA's   :206129   NA's   :5          
 smart_222_raw   smart_223_raw smart_224_raw smart_225_raw    smart_226_raw  
 Min.   :    3   Min.   :0     Min.   :0     Min.   : NA      Min.   :159.0  
 1st Qu.: 1078   1st Qu.:0     1st Qu.:0     1st Qu.: NA      1st Qu.:261.0  
 Median : 2079   Median :0     Median :0     Median : NA      Median :529.0  
 Mean   : 7378   Mean   :0     Mean   :0     Mean   :NaN      Mean   :409.9  
 3rd Qu.:11085   3rd Qu.:0     3rd Qu.:0     3rd Qu.: NA      3rd Qu.:534.0  
 Max.   :33653   Max.   :0     Max.   :0     Max.   : NA      Max.   :650.0  
 NA's   :5       NA's   :5     NA's   :5     NA's   :206129   NA's   :5      
 smart_231_raw                 smart_232_raw                 smart_233_raw   
 Min.   :9218868437227407266   Min.   :9218868437227407266   Min.   : NA     
 1st Qu.:9218868437227407266   1st Qu.:9218868437227407266   1st Qu.: NA     
 Median :9218868437227407266   Median :9218868437227407266   Median : NA     
 Mean   :9218868437227405446   Mean   :9218868437227405446   Mean   :NaN     
 3rd Qu.:9218868437227407266   3rd Qu.:9218868437227407266   3rd Qu.: NA     
 Max.   :9218868437227407266   Max.   :9218868437227407266   Max.   : NA     
 NA's   :             143538   NA's   :             143538   NA's   :206129  
 smart_235_raw                 smart_240_raw smart_241_raw                
 Min.   :9218868437227407266   Min.   :0     Min.   :9218868437227407266  
 1st Qu.:9218868437227407266   1st Qu.:0     1st Qu.:9218868437227407266  
 Median :9218868437227407266   Median :0     Median :                 NA  
 Mean   :9218868437227405394   Mean   :0     Mean   :                 NA  
 3rd Qu.:9218868437227407266   3rd Qu.:0     3rd Qu.:                 NA  
 Max.   :9218868437227407266   Max.   :0     Max.   :                 NA  
 NA's   :             104252   NA's   :5     NA's   :             206129  
 smart_242_raw                 smart_250_raw    smart_251_raw    smart_252_raw   
 Min.   :9218868437227407266   Min.   : NA      Min.   : NA      Min.   : NA     
 1st Qu.:9218868437227407266   1st Qu.: NA      1st Qu.: NA      1st Qu.: NA     
 Median :                 NA   Median : NA      Median : NA      Median : NA     
 Mean   :                 NA   Mean   :NaN      Mean   :NaN      Mean   :NaN     
 3rd Qu.:                 NA   3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA     
 Max.   :                 NA   Max.   : NA      Max.   : NA      Max.   : NA     
 NA's   :             206129   NA's   :206129   NA's   :206129   NA's   :206129  
 smart_254_raw   
 Min.   : NA     
 1st Qu.: NA     
 Median : NA     
 Mean   :NaN     
 3rd Qu.: NA     
 Max.   : NA     
 NA's   :206129  
  

Feature Engineering

One-hot encode the data.


message("toshiba")
toshiba
str(toshiba)
'data.frame':   206129 obs. of  61 variables:
 $ date          : Date, format: "2018-10-01" "2018-10-02" ...
 $ serial_number : Factor w/ 2343 levels "175PP3HDT","175PP3I4T",..: 1410 1410 1410 1410 1410 1410 1410 1410 1410 1410 ...
 $ manu          : Factor w/ 1 level "TOSHIBA": 1 1 1 1 1 1 1 1 1 1 ...
 $ model2        : Factor w/ 7 levels "HDWE160","HDWF180",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ capacity_bytes: num  2.96e-311 2.96e-311 2.96e-311 2.96e-311 2.96e-311 ...
 $ failure       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ smart_1_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_2_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_3_raw   : int  10345 10345 10345 10345 10345 10345 10345 10345 10345 10345 ...
 $ smart_4_raw   : int  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_5_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_7_raw   :integer64 0 0 0 0 0 0 0 0 ... 
 $ smart_8_raw   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_9_raw   : int  4254 4278 4302 4326 4350 4374 4397 4421 4446 4469 ...
 $ smart_10_raw  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_11_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_12_raw  : int  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_16_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_17_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_22_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_23_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_24_raw  : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_168_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_170_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_173_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_174_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_177_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_183_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_184_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_187_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_188_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_189_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_190_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_191_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_192_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_193_raw : int  4 4 4 4 4 4 4 4 4 4 ...
 $ smart_194_raw : int  22 22 22 22 22 22 22 22 22 22 ...
 $ smart_195_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_196_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_197_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_198_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_199_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_200_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_218_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_220_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_222_raw : int  4254 4278 4302 4326 4350 4373 4397 4421 4445 4469 ...
 $ smart_223_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_224_raw : int  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_225_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_226_raw : int  630 630 630 630 630 630 630 630 630 630 ...
 $ smart_231_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_232_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_233_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_235_raw :integer64 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 9218868437227407266 ... 
 $ smart_240_raw :integer64 0 0 0 0 0 0 0 0 ... 
 $ smart_241_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_242_raw :integer64 NA NA NA NA NA NA NA NA ... 
 $ smart_250_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_251_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_252_raw : int  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_254_raw : int  NA NA NA NA NA NA NA NA NA NA ...
one_hot <- dummyVars(failure ~ .,
                     data = toshiba %>% 
                       select(-date,
                              -serial_number,
                              -manu
                              ) %>%
                       select_if(negate(is.logical)
                                 )
                     )


toshiba_one_hot <-
  toshiba %>% 
  select(failure,
         date,
         # serial_number,
         manu) %>% 
  bind_cols(predict(object = one_hot,
                    newdata = toshiba %>% 
                      select(-date,
                             -serial_number,
                             -manu
                             ) %>%
                       select_if(negate(is.logical)
                                 )
                    ) %>% 
              as.data.frame()
            )
variable 'failure' is not a factor
message("toshiba_one_hot")
toshiba_one_hot
str(toshiba_one_hot)
'data.frame':   206129 obs. of  66 variables:
 $ failure           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ date              : Date, format: "2018-10-01" "2018-10-02" ...
 $ manu              : Factor w/ 1 level "TOSHIBA": 1 1 1 1 1 1 1 1 1 1 ...
 $ model2.HDWE160    : num  1 1 1 1 1 1 1 1 1 1 ...
 $ model2.HDWF180    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ model2.MD04ABA400V: num  0 0 0 0 0 0 0 0 0 0 ...
 $ model2.MD04ABA500V: num  0 0 0 0 0 0 0 0 0 0 ...
 $ model2.MG07ACA14TA: num  0 0 0 0 0 0 0 0 0 0 ...
 $ model2.MQ01ABF050 : num  0 0 0 0 0 0 0 0 0 0 ...
 $ model2.MQ01ABF050M: num  0 0 0 0 0 0 0 0 0 0 ...
 $ capacity_bytes    : num  2.96e-311 2.96e-311 2.96e-311 2.96e-311 2.96e-311 ...
 $ smart_1_raw       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_2_raw       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_3_raw       : num  10345 10345 10345 10345 10345 ...
 $ smart_4_raw       : num  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_5_raw       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_7_raw       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_8_raw       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_9_raw       : num  4254 4278 4302 4326 4350 ...
 $ smart_10_raw      : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_11_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_12_raw      : num  2 2 2 2 2 2 2 2 2 2 ...
 $ smart_16_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_17_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_22_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_23_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_24_raw      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_168_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_170_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_173_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_174_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_177_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_183_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_184_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_187_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_188_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_189_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_190_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_191_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_192_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_193_raw     : num  4 4 4 4 4 4 4 4 4 4 ...
 $ smart_194_raw     : num  22 22 22 22 22 22 22 22 22 22 ...
 $ smart_195_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_196_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_197_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_198_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_199_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_200_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_218_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_220_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_222_raw     : num  4254 4278 4302 4326 4350 ...
 $ smart_223_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_224_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_225_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_226_raw     : num  630 630 630 630 630 630 630 630 630 630 ...
 $ smart_231_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_232_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_233_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_235_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_240_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_241_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_242_raw     : num  0 0 0 0 0 0 0 0 0 0 ...
 $ smart_250_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_251_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_252_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_254_raw     : num  NA NA NA NA NA NA NA NA NA NA ...
summary(toshiba_one_hot)
 failure         date                 manu        model2.HDWE160     model2.HDWF180    
 0:206108   Min.   :2018-10-01   TOSHIBA:206129   Min.   :0.000000   Min.   :0.000000  
 1:    21   1st Qu.:2018-10-26                    1st Qu.:0.000000   1st Qu.:0.000000  
            Median :2018-11-17                    Median :0.000000   Median :0.000000  
            Mean   :2018-11-16                    Mean   :0.004434   Mean   :0.008829  
            3rd Qu.:2018-12-09                    3rd Qu.:0.000000   3rd Qu.:0.000000  
            Max.   :2018-12-31                    Max.   :1.000000   Max.   :1.000000  
                                                                                       
 model2.MD04ABA400V model2.MD04ABA500V model2.MG07ACA14TA model2.MQ01ABF050
 Min.   :0.00000    Min.   :0.00000    Min.   :0.0000     Min.   :0.0000   
 1st Qu.:0.00000    1st Qu.:0.00000    1st Qu.:0.0000     1st Qu.:0.0000   
 Median :0.00000    Median :0.00000    Median :1.0000     Median :0.0000   
 Mean   :0.06398    Mean   :0.01987    Mean   :0.5151     Mean   :0.2364   
 3rd Qu.:0.00000    3rd Qu.:0.00000    3rd Qu.:1.0000     3rd Qu.:0.0000   
 Max.   :1.00000    Max.   :1.00000    Max.   :1.0000     Max.   :1.0000   
                                                                           
 model2.MQ01ABF050M capacity_bytes  smart_1_raw  smart_2_raw  smart_3_raw   
 Min.   :0.0000     Min.   :0      Min.   :0    Min.   :0    Min.   :  517  
 1st Qu.:0.0000     1st Qu.:0      1st Qu.:0    1st Qu.:0    1st Qu.: 1508  
 Median :0.0000     Median :0      Median :0    Median :0    Median : 7562  
 Mean   :0.1514     Mean   :0      Mean   :0    Mean   :0    Mean   : 5155  
 3rd Qu.:0.0000     3rd Qu.:0      3rd Qu.:0    3rd Qu.:0    3rd Qu.: 7816  
 Max.   :1.0000     Max.   :0      Max.   :0    Max.   :0    Max.   :11042  
                    NA's   :5      NA's   :5    NA's   :5    NA's   :5      
  smart_4_raw       smart_5_raw        smart_7_raw  smart_8_raw  smart_9_raw   
 Min.   :  1.000   Min.   :   0.000   Min.   :0    Min.   :0    Min.   :    3  
 1st Qu.:  2.000   1st Qu.:   0.000   1st Qu.:0    1st Qu.:0    1st Qu.: 1097  
 Median :  3.000   Median :   0.000   Median :0    Median :0    Median : 2102  
 Mean   :  4.388   Mean   :   2.699   Mean   :0    Mean   :0    Mean   : 7404  
 3rd Qu.:  6.000   3rd Qu.:   0.000   3rd Qu.:0    3rd Qu.:0    3rd Qu.:11093  
 Max.   :380.000   Max.   :2752.000   Max.   :0    Max.   :0    Max.   :33674  
 NA's   :5         NA's   :5                       NA's   :5    NA's   :5      
  smart_10_raw  smart_11_raw     smart_12_raw     smart_16_raw     smart_17_raw   
 Min.   :0     Min.   : NA      Min.   : 1.000   Min.   : NA      Min.   : NA     
 1st Qu.:0     1st Qu.: NA      1st Qu.: 2.000   1st Qu.: NA      1st Qu.: NA     
 Median :0     Median : NA      Median : 3.000   Median : NA      Median : NA     
 Mean   :0     Mean   :NaN      Mean   : 4.201   Mean   :NaN      Mean   :NaN     
 3rd Qu.:0     3rd Qu.: NA      3rd Qu.: 6.000   3rd Qu.: NA      3rd Qu.: NA     
 Max.   :0     Max.   : NA      Max.   :67.000   Max.   : NA      Max.   : NA     
 NA's   :5     NA's   :206129   NA's   :5        NA's   :206129   NA's   :206129  
  smart_22_raw     smart_23_raw    smart_24_raw   smart_168_raw    smart_170_raw   
 Min.   : NA      Min.   :0       Min.   :0       Min.   : NA      Min.   : NA     
 1st Qu.: NA      1st Qu.:0       1st Qu.:0       1st Qu.: NA      1st Qu.: NA     
 Median : NA      Median :0       Median :0       Median : NA      Median : NA     
 Mean   :NaN      Mean   :0       Mean   :0       Mean   :NaN      Mean   :NaN     
 3rd Qu.: NA      3rd Qu.:0       3rd Qu.:0       3rd Qu.: NA      3rd Qu.: NA     
 Max.   : NA      Max.   :0       Max.   :0       Max.   : NA      Max.   : NA     
 NA's   :206129   NA's   :99952   NA's   :99952   NA's   :206129   NA's   :206129  
 smart_173_raw   smart_174_raw    smart_177_raw    smart_183_raw    smart_184_raw   
 Min.   :0       Min.   : NA      Min.   : NA      Min.   : NA      Min.   : NA     
 1st Qu.:0       1st Qu.: NA      1st Qu.: NA      1st Qu.: NA      1st Qu.: NA     
 Median :0       Median : NA      Median : NA      Median : NA      Median : NA     
 Mean   :0       Mean   :NaN      Mean   :NaN      Mean   :NaN      Mean   :NaN     
 3rd Qu.:0       3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA     
 Max.   :0       Max.   : NA      Max.   : NA      Max.   : NA      Max.   : NA     
 NA's   :62591   NA's   :206129   NA's   :206129   NA's   :206129   NA's   :206129  
 smart_187_raw    smart_188_raw smart_189_raw    smart_190_raw    smart_191_raw    
 Min.   : NA      Min.   :0     Min.   : NA      Min.   : NA      Min.   :      0  
 1st Qu.: NA      1st Qu.:0     1st Qu.: NA      1st Qu.: NA      1st Qu.:      0  
 Median : NA      Median :0     Median : NA      Median : NA      Median :      0  
 Mean   :NaN      Mean   :0     Mean   :NaN      Mean   :NaN      Mean   :   2683  
 3rd Qu.: NA      3rd Qu.:0     3rd Qu.: NA      3rd Qu.: NA      3rd Qu.:      2  
 Max.   : NA      Max.   :0     Max.   : NA      Max.   : NA      Max.   :4752298  
 NA's   :206129                 NA's   :206129   NA's   :206129   NA's   :5        
 smart_192_raw     smart_193_raw    smart_194_raw   smart_195_raw    smart_196_raw     
 Min.   : 0.0000   Min.   :     2   Min.   :12.00   Min.   : NA      Min.   :  0.0000  
 1st Qu.: 0.0000   1st Qu.:    59   1st Qu.:27.00   1st Qu.: NA      1st Qu.:  0.0000  
 Median : 0.0000   Median :    80   Median :30.00   Median : NA      Median :  0.0000  
 Mean   : 0.6801   Mean   :  2626   Mean   :29.74   Mean   :NaN      Mean   :  0.4245  
 3rd Qu.: 1.0000   3rd Qu.:  1487   3rd Qu.:33.00   3rd Qu.: NA      3rd Qu.:  0.0000  
 Max.   :60.0000   Max.   :108361   Max.   :49.00   Max.   : NA      Max.   :497.0000  
 NA's   :5         NA's   :5        NA's   :5       NA's   :206129   NA's   :5         
 smart_197_raw       smart_198_raw      smart_199_raw      smart_200_raw   
 Min.   :  0.00000   Min.   : 0.00000   Min.   : 0.00000   Min.   : NA     
 1st Qu.:  0.00000   1st Qu.: 0.00000   1st Qu.: 0.00000   1st Qu.: NA     
 Median :  0.00000   Median : 0.00000   Median : 0.00000   Median : NA     
 Mean   :  0.02391   Mean   : 0.00249   Mean   : 0.04074   Mean   :NaN     
 3rd Qu.:  0.00000   3rd Qu.: 0.00000   3rd Qu.: 0.00000   3rd Qu.: NA     
 Max.   :256.00000   Max.   :57.00000   Max.   :32.00000   Max.   : NA     
 NA's   :5           NA's   :5          NA's   :5          NA's   :206129  
 smart_218_raw    smart_220_raw       smart_222_raw   smart_223_raw smart_224_raw
 Min.   : NA      Min.   :        0   Min.   :    3   Min.   :0     Min.   :0    
 1st Qu.: NA      1st Qu.:        0   1st Qu.: 1078   1st Qu.:0     1st Qu.:0    
 Median : NA      Median :   393217   Median : 2079   Median :0     Median :0    
 Mean   :NaN      Mean   : 47079213   Mean   : 7378   Mean   :0     Mean   :0    
 3rd Qu.: NA      3rd Qu.: 68681730   3rd Qu.:11085   3rd Qu.:0     3rd Qu.:0    
 Max.   : NA      Max.   :287440904   Max.   :33653   Max.   :0     Max.   :0    
 NA's   :206129   NA's   :5           NA's   :5       NA's   :5     NA's   :5    
 smart_225_raw    smart_226_raw   smart_231_raw   smart_232_raw   smart_233_raw   
 Min.   : NA      Min.   :159.0   Min.   :0       Min.   :0       Min.   : NA     
 1st Qu.: NA      1st Qu.:261.0   1st Qu.:0       1st Qu.:0       1st Qu.: NA     
 Median : NA      Median :529.0   Median :0       Median :0       Median : NA     
 Mean   :NaN      Mean   :409.9   Mean   :0       Mean   :0       Mean   :NaN     
 3rd Qu.: NA      3rd Qu.:534.0   3rd Qu.:0       3rd Qu.:0       3rd Qu.: NA     
 Max.   : NA      Max.   :650.0   Max.   :0       Max.   :0       Max.   : NA     
 NA's   :206129   NA's   :5       NA's   :62591   NA's   :62591   NA's   :206129  
 smart_235_raw    smart_240_raw smart_241_raw smart_242_raw smart_250_raw   
 Min.   :0        Min.   :0     Min.   :0     Min.   :0     Min.   : NA     
 1st Qu.:0        1st Qu.:0     1st Qu.:0     1st Qu.:0     1st Qu.: NA     
 Median :0        Median :0     Median :0     Median :0     Median : NA     
 Mean   :0        Mean   :0     Mean   :0     Mean   :0     Mean   :NaN     
 3rd Qu.:0        3rd Qu.:0     3rd Qu.:0     3rd Qu.:0     3rd Qu.: NA     
 Max.   :0        Max.   :0     Max.   :0     Max.   :0     Max.   : NA     
 NA's   :101877                                             NA's   :206129  
 smart_251_raw    smart_252_raw    smart_254_raw   
 Min.   : NA      Min.   : NA      Min.   : NA     
 1st Qu.: NA      1st Qu.: NA      1st Qu.: NA     
 Median : NA      Median : NA      Median : NA     
 Mean   :NaN      Mean   :NaN      Mean   :NaN     
 3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: NA     
 Max.   : NA      Max.   : NA      Max.   : NA     
 NA's   :206129   NA's   :206129   NA's   :206129  
rm(one_hot, toshiba)

Create separate train_valid and the test data sets.


set.seed(123456789)
train_index <-
  createDataPartition(y = toshiba_one_hot$failure,
                      p = .70,
                      times = 1,
                      list = FALSE
                      )


hd2_train_valid <-
  toshiba_one_hot[train_index, ] %>% 
  select(-date)

hd2_test <-
  toshiba_one_hot[-train_index, ] %>%
  select(-date)

Create separate train and valid datasets.


set.seed(123456789)
train_index <-
  createDataPartition(y = hd2_train_valid$failure,
                      p = .70,
                      times = 1,
                      list = FALSE
                      )


hd2_train <- hd2_train_valid[train_index, ]
hd2_valid <- hd2_train_valid[-train_index, ]

message("hd2_train$failure")
hd2_train$failure
table(hd2_train$failure)

     0      1 
100994     11 
prop.table(table(hd2_train$failure))

           0            1 
0.9998910945 0.0001089055 
class(hd2_train)
[1] "data.frame"
glimpse(hd2_train)
Observations: 101,005
Variables: 65
$ failure            <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ manu               <fct> TOSHIBA, TOSHIBA, TOSHIBA, TOSHIBA, TOSHIBA, TOSHIBA, TOSH…
$ model2.HDWE160     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ model2.HDWF180     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MD04ABA400V <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MD04ABA500V <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MG07ACA14TA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MQ01ABF050  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MQ01ABF050M <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ capacity_bytes     <dbl> 2.964974e-311, 2.964974e-311, 2.964974e-311, 2.964974e-311…
$ smart_1_raw        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_2_raw        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_3_raw        <dbl> 10345, 10345, 10345, 10345, 10345, 10345, 10345, 10345, 10…
$ smart_4_raw        <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ smart_5_raw        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_7_raw        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_8_raw        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_9_raw        <dbl> 4302, 4326, 4350, 4374, 4421, 4446, 4469, 4494, 4518, 4542…
$ smart_10_raw       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_11_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_12_raw       <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ smart_16_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_17_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_22_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_23_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_24_raw       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_168_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_170_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_173_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_174_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_177_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_183_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_184_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_187_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_188_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_189_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_190_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_191_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_192_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_193_raw      <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4…
$ smart_194_raw      <dbl> 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22…
$ smart_195_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_196_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_197_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_198_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_199_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_200_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_218_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_220_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_222_raw      <dbl> 4302, 4326, 4350, 4373, 4421, 4445, 4469, 4494, 4518, 4541…
$ smart_223_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_224_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_225_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_226_raw      <dbl> 630, 630, 630, 630, 630, 630, 630, 630, 630, 630, 630, 630…
$ smart_231_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_232_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_233_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_235_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_240_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_241_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_242_raw      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ smart_250_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_251_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_252_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ smart_254_raw      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

Use the recipe package for multiple feature engineering steps.


# create the recipe object
rec_obj <- recipe(failure ~ ., data = hd2_train)


# implement each recipe step
rec_steps <-
  rec_obj %>% 
  step_shuffle(all_predictors()) %>% 
  step_nzv(all_predictors()) %>%  # helps reduce the number of variables with near zero variance (including NA values)
  step_log(all_predictors(), offset = 1) %>%  # puts less emphasis on "outliers"
  step_center(all_predictors()) %>%
  step_scale(all_predictors()) %>% 
  step_medianimpute(all_predictors())

rec_steps
Data Recipe

Inputs:

Operations:

Shuffled all_predictors()
Sparse, unbalanced variable filter on all_predictors()
Log transformation on all_predictors()
Centering for all_predictors()
Scaling for all_predictors()
Median Imputation for all_predictors()
# create the recipe based on the TRAIN data set
trained_rec <- prep(rec_steps, training = hd2_train)

trained_rec
Data Recipe

Inputs:

Training data contained 101005 data points and 101005 incomplete rows. 

Operations:

Shuffled manu, model2.HDWE160, model2.HDWF180, ... [trained]
Sparse, unbalanced variable filter removed manu, model2.HDWE160, ... [trained]
Log transformation on model2.MD04ABA400V, ... [trained]
Centering for model2.MD04ABA400V, ... [trained]
Scaling for model2.MD04ABA400V, ... [trained]
Median Imputation for model2.MD04ABA400V, ... [trained]
# apply the recipe to the the train, valid, and test datasets
train_data <- bake(trained_rec, new_data = hd2_train)
valid_data  <- bake(trained_rec, new_data = hd2_valid)
test_data  <- bake(trained_rec, new_data = hd2_test)


# View(hd2_train %>% head(1000))
# View(train_data %>% head(1000))


message("train_data")
train_data
dim(train_data)
[1] 101005     16
str(train_data)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   101005 obs. of  16 variables:
 $ failure           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ model2.MD04ABA400V: num  -0.262 -0.262 -0.262 3.822 -0.262 ...
 $ model2.MG07ACA14TA: num  -1.032 -1.032 0.969 0.969 0.969 ...
 $ model2.MQ01ABF050 : num  -0.557 -0.557 -0.557 -0.557 -0.557 ...
 $ model2.MQ01ABF050M: num  -0.42 -0.42 -0.42 -0.42 -0.42 ...
 $ capacity_bytes    : num  NA NA NA NA NA NA NA NA NA NA ...
 $ smart_3_raw       : num  0.855 -0.937 0.866 0.86 0.84 ...
 $ smart_4_raw       : num  1.033 3.518 1.033 0.241 0.548 ...
 $ smart_9_raw       : num  1.149 0.53 -1.952 1.272 -0.429 ...
 $ smart_12_raw      : num  -1.275 -0.596 0.822 1.046 -0.596 ...
 $ smart_191_raw     : num  1.1738 1.398 -0.5619 0.0564 -0.5619 ...
 $ smart_192_raw     : num  -0.728 -0.728 0.675 -0.728 0.675 ...
 $ smart_193_raw     : num  -0.62 -0.872 -0.896 -0.404 1.154 ...
 $ smart_194_raw     : num  0.137 0.903 -0.205 1.18 0.61 ...
 $ smart_222_raw     : num  -0.509 -0.506 -0.79 1.125 -0.405 ...
 $ smart_226_raw     : num  0.837 -0.958 -0.911 0.814 1.081 ...
summary(train_data)
 failure    model2.MD04ABA400V model2.MG07ACA14TA model2.MQ01ABF050 model2.MQ01ABF050M
 0:100994   Min.   :-0.2617    Min.   :-1.0324    Min.   :-0.5565   Min.   :-0.4204   
 1:    11   1st Qu.:-0.2617    1st Qu.:-1.0324    1st Qu.:-0.5565   1st Qu.:-0.4204   
            Median :-0.2617    Median : 0.9686    Median :-0.5565   Median :-0.4204   
            Mean   : 0.0000    Mean   : 0.0000    Mean   : 0.0000   Mean   : 0.0000   
            3rd Qu.:-0.2617    3rd Qu.: 0.9686    3rd Qu.:-0.5565   3rd Qu.:-0.4204   
            Max.   : 3.8218    Max.   : 0.9686    Max.   : 1.7968   Max.   : 2.3786   
                                                                                      
 capacity_bytes    smart_3_raw          smart_4_raw         smart_9_raw       
 Min.   : NA      Min.   :-2.6661165   Min.   :-1.301765   Min.   :-4.522747  
 1st Qu.: NA      1st Qu.:-1.2737431   1st Qu.:-0.618948   1st Qu.:-0.691515  
 Median : NA      Median : 0.8187356   Median :-0.134482   Median :-0.260400  
 Mean   :NaN      Mean   : 0.0000162   Mean   :-0.000003   Mean   :-0.000005  
 3rd Qu.: NA      3rd Qu.: 0.8615058   3rd Qu.: 0.807930   3rd Qu.: 0.873419  
 Max.   : NA      Max.   : 1.3107509   Max.   : 7.538825   Max.   : 1.627442  
 NA's   :101005                                                               
  smart_12_raw       smart_191_raw       smart_192_raw       smart_193_raw      
 Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.728452   Min.   :-2.210091  
 1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.728452   1st Qu.:-0.704389  
 Median :-0.114405   Median :-0.561869   Median :-0.728452   Median :-0.553552  
 Mean   :-0.000002   Mean   :-0.000011   Mean   :-0.000014   Mean   :-0.000011  
 3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.675337   3rd Qu.: 0.906041  
 Max.   : 4.628689   Max.   :13.151356   Max.   : 7.597056   Max.   : 3.064668  
                                                                                
 smart_194_raw       smart_222_raw       smart_226_raw      
 Min.   :-4.314398   Min.   :-4.390568   Min.   :-2.131422  
 1st Qu.:-0.384310   1st Qu.:-0.677479   1st Qu.:-0.920394  
 Median : 0.137049   Median :-0.243378   Median : 0.809652  
 Mean   : 0.000003   Mean   :-0.000005   Mean   : 0.000016  
 3rd Qu.: 0.610210   3rd Qu.: 0.869443   3rd Qu.: 0.832710  
 Max.   : 2.585678   Max.   : 1.604504   Max.   : 1.307047  
                                                            
message("valid_data")
valid_data
summary(valid_data)
 failure   model2.MD04ABA400V  model2.MG07ACA14TA  model2.MQ01ABF050   
 0:43282   Min.   :-0.261653   Min.   :-1.032430   Min.   :-0.5565456  
 1:    4   1st Qu.:-0.261653   1st Qu.:-1.032430   1st Qu.:-0.5565456  
           Median :-0.261653   Median : 0.968579   Median :-0.5565456  
           Mean   :-0.002132   Mean   :-0.008719   Mean   : 0.0001171  
           3rd Qu.:-0.261653   3rd Qu.: 0.968579   3rd Qu.:-0.5565456  
           Max.   : 3.821821   Max.   : 0.968579   Max.   : 1.7967801  
                                                                       
 model2.MQ01ABF050M capacity_bytes   smart_3_raw         smart_4_raw       
 Min.   :-0.42041   Min.   : NA     Min.   :-2.666116   Min.   :-1.301765  
 1st Qu.:-0.42041   1st Qu.: NA     1st Qu.:-1.282368   1st Qu.:-0.618948  
 Median :-0.42041   Median : NA     Median : 0.699483   Median :-0.134482  
 Mean   : 0.01374   Mean   :NaN     Mean   :-0.009821   Mean   :-0.001311  
 3rd Qu.:-0.42041   3rd Qu.: NA     3rd Qu.: 0.861672   3rd Qu.: 0.807930  
 Max.   : 2.37859   Max.   : NA     Max.   : 1.310751   Max.   : 7.538825  
                    NA's   :43286                                          
  smart_9_raw         smart_12_raw       smart_191_raw       smart_192_raw      
 Min.   :-3.720713   Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.728452  
 1st Qu.:-0.703210   1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.728452  
 Median :-0.254274   Median :-0.114405   Median :-0.561869   Median :-0.728452  
 Mean   : 0.000725   Mean   :-0.001094   Mean   : 0.005503   Mean   :-0.003547  
 3rd Qu.: 0.870413   3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.675337  
 Max.   : 1.627422   Max.   : 4.628689   Max.   :13.151554   Max.   : 7.597056  
                                                                                
 smart_193_raw       smart_194_raw       smart_222_raw       smart_226_raw      
 Min.   :-2.210091   Min.   :-4.314398   Min.   :-3.608709   Min.   :-2.131422  
 1st Qu.:-0.704389   1st Qu.:-0.384310   1st Qu.:-0.677479   1st Qu.:-0.920394  
 Median :-0.553552   Median : 0.137049   Median :-0.239555   Median : 0.809652  
 Mean   : 0.003183   Mean   : 0.008034   Mean   : 0.000771   Mean   :-0.009393  
 3rd Qu.: 0.919132   3rd Qu.: 0.758692   3rd Qu.: 0.866452   3rd Qu.: 0.832710  
 Max.   : 3.064668   Max.   : 2.482194   Max.   : 1.604484   Max.   : 1.295670  
                                                                                
message("test_data")
test_data
summary(test_data)
 failure   model2.MD04ABA400V  model2.MG07ACA14TA   model2.MQ01ABF050   
 0:61832   Min.   :-0.261653   Min.   :-1.0324299   Min.   :-0.5565456  
 1:    6   1st Qu.:-0.261653   1st Qu.:-1.0324299   1st Qu.:-0.5565456  
           Median :-0.261653   Median : 0.9685791   Median :-0.5565456  
           Mean   : 0.000176   Mean   : 0.0004011   Mean   :-0.0006953  
           3rd Qu.:-0.261653   3rd Qu.: 0.9685791   3rd Qu.:-0.5565456  
           Max.   : 3.821821   Max.   : 0.9685791   Max.   : 1.7967801  
                                                                        
 model2.MQ01ABF050M  capacity_bytes   smart_3_raw         smart_4_raw       
 Min.   :-0.420412   Min.   : NA     Min.   :-2.666116   Min.   :-1.301765  
 1st Qu.:-0.420412   1st Qu.: NA     1st Qu.:-1.276325   1st Qu.:-0.618948  
 Median :-0.420412   Median : NA     Median : 0.819079   Median :-0.134482  
 Mean   : 0.001354   Mean   :NaN     Mean   :-0.001042   Mean   :-0.003516  
 3rd Qu.:-0.420412   3rd Qu.: NA     3rd Qu.: 0.861672   3rd Qu.: 0.807930  
 Max.   : 2.378595   Max.   : NA     Max.   : 1.310751   Max.   : 7.538825  
                     NA's   :61838                                          
  smart_9_raw          smart_12_raw       smart_191_raw       smart_192_raw    
 Min.   :-3.1516761   Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.7285  
 1st Qu.:-0.7038315   1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.7285  
 Median :-0.2607243   Median :-0.114405   Median :-0.561869   Median :-0.7285  
 Mean   :-0.0009615   Mean   :-0.004237   Mean   : 0.001789   Mean   : 0.0011  
 3rd Qu.: 0.8703519   3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.6753  
 Max.   : 1.6274017   Max.   : 4.628689   Max.   :13.151357   Max.   : 7.5971  
                                                                               
 smart_193_raw        smart_194_raw       smart_222_raw       smart_226_raw      
 Min.   :-2.2100905   Min.   :-4.314398   Min.   :-3.430757   Min.   :-2.131422  
 1st Qu.:-0.7043892   1st Qu.:-0.384310   1st Qu.:-0.679943   1st Qu.:-0.920394  
 Median :-0.5535520   Median : 0.137049   Median :-0.243698   Median : 0.809652  
 Mean   :-0.0004676   Mean   : 0.002912   Mean   :-0.000962   Mean   : 0.001625  
 3rd Qu.: 0.9005704   3rd Qu.: 0.610210   3rd Qu.: 0.866392   3rd Qu.: 0.837295  
 Max.   : 3.0646676   Max.   : 2.585678   Max.   : 1.604484   Max.   : 1.314603  
                                                                                 

Add weight_col, as this is a potential parameter to be used. The vlaue of 100 is arbitrarily chosen to make predicting failure 100 times more important than predicting non_failure.


train_data <-
  train_data %>%
  mutate(weight_col = case_when(failure == 1 ~ 100,
                                failure == 0 ~ 1,
                                TRUE ~ 0
                                )
         )

valid_data <-
  valid_data %>%
  mutate(weight_col = case_when(failure == 1 ~ 100,
                                failure == 0 ~ 1,
                                TRUE ~ 0
                                )
         )

test_data <-
  test_data %>%
  mutate(weight_col = case_when(failure == 1 ~ 100,
                                failure == 0 ~ 1,
                                TRUE ~ 0
                                )
         )

message("train_data")
train_data
dim(train_data)
[1] 101005     17
summary(train_data)
 failure    model2.MD04ABA400V model2.MG07ACA14TA model2.MQ01ABF050 model2.MQ01ABF050M
 0:100994   Min.   :-0.2617    Min.   :-1.0324    Min.   :-0.5565   Min.   :-0.4204   
 1:    11   1st Qu.:-0.2617    1st Qu.:-1.0324    1st Qu.:-0.5565   1st Qu.:-0.4204   
            Median :-0.2617    Median : 0.9686    Median :-0.5565   Median :-0.4204   
            Mean   : 0.0000    Mean   : 0.0000    Mean   : 0.0000   Mean   : 0.0000   
            3rd Qu.:-0.2617    3rd Qu.: 0.9686    3rd Qu.:-0.5565   3rd Qu.:-0.4204   
            Max.   : 3.8218    Max.   : 0.9686    Max.   : 1.7968   Max.   : 2.3786   
                                                                                      
 capacity_bytes    smart_3_raw          smart_4_raw         smart_9_raw       
 Min.   : NA      Min.   :-2.6661165   Min.   :-1.301765   Min.   :-4.522747  
 1st Qu.: NA      1st Qu.:-1.2737431   1st Qu.:-0.618948   1st Qu.:-0.691515  
 Median : NA      Median : 0.8187356   Median :-0.134482   Median :-0.260400  
 Mean   :NaN      Mean   : 0.0000162   Mean   :-0.000003   Mean   :-0.000005  
 3rd Qu.: NA      3rd Qu.: 0.8615058   3rd Qu.: 0.807930   3rd Qu.: 0.873419  
 Max.   : NA      Max.   : 1.3107509   Max.   : 7.538825   Max.   : 1.627442  
 NA's   :101005                                                               
  smart_12_raw       smart_191_raw       smart_192_raw       smart_193_raw      
 Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.728452   Min.   :-2.210091  
 1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.728452   1st Qu.:-0.704389  
 Median :-0.114405   Median :-0.561869   Median :-0.728452   Median :-0.553552  
 Mean   :-0.000002   Mean   :-0.000011   Mean   :-0.000014   Mean   :-0.000011  
 3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.675337   3rd Qu.: 0.906041  
 Max.   : 4.628689   Max.   :13.151356   Max.   : 7.597056   Max.   : 3.064668  
                                                                                
 smart_194_raw       smart_222_raw       smart_226_raw         weight_col     
 Min.   :-4.314398   Min.   :-4.390568   Min.   :-2.131422   Min.   :  1.000  
 1st Qu.:-0.384310   1st Qu.:-0.677479   1st Qu.:-0.920394   1st Qu.:  1.000  
 Median : 0.137049   Median :-0.243378   Median : 0.809652   Median :  1.000  
 Mean   : 0.000003   Mean   :-0.000005   Mean   : 0.000016   Mean   :  1.011  
 3rd Qu.: 0.610210   3rd Qu.: 0.869443   3rd Qu.: 0.832710   3rd Qu.:  1.000  
 Max.   : 2.585678   Max.   : 1.604504   Max.   : 1.307047   Max.   :100.000  
                                                                              
message("valid_data")
valid_data
summary(valid_data)
 failure   model2.MD04ABA400V  model2.MG07ACA14TA  model2.MQ01ABF050   
 0:43282   Min.   :-0.261653   Min.   :-1.032430   Min.   :-0.5565456  
 1:    4   1st Qu.:-0.261653   1st Qu.:-1.032430   1st Qu.:-0.5565456  
           Median :-0.261653   Median : 0.968579   Median :-0.5565456  
           Mean   :-0.002132   Mean   :-0.008719   Mean   : 0.0001171  
           3rd Qu.:-0.261653   3rd Qu.: 0.968579   3rd Qu.:-0.5565456  
           Max.   : 3.821821   Max.   : 0.968579   Max.   : 1.7967801  
                                                                       
 model2.MQ01ABF050M capacity_bytes   smart_3_raw         smart_4_raw       
 Min.   :-0.42041   Min.   : NA     Min.   :-2.666116   Min.   :-1.301765  
 1st Qu.:-0.42041   1st Qu.: NA     1st Qu.:-1.282368   1st Qu.:-0.618948  
 Median :-0.42041   Median : NA     Median : 0.699483   Median :-0.134482  
 Mean   : 0.01374   Mean   :NaN     Mean   :-0.009821   Mean   :-0.001311  
 3rd Qu.:-0.42041   3rd Qu.: NA     3rd Qu.: 0.861672   3rd Qu.: 0.807930  
 Max.   : 2.37859   Max.   : NA     Max.   : 1.310751   Max.   : 7.538825  
                    NA's   :43286                                          
  smart_9_raw         smart_12_raw       smart_191_raw       smart_192_raw      
 Min.   :-3.720713   Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.728452  
 1st Qu.:-0.703210   1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.728452  
 Median :-0.254274   Median :-0.114405   Median :-0.561869   Median :-0.728452  
 Mean   : 0.000725   Mean   :-0.001094   Mean   : 0.005503   Mean   :-0.003547  
 3rd Qu.: 0.870413   3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.675337  
 Max.   : 1.627422   Max.   : 4.628689   Max.   :13.151554   Max.   : 7.597056  
                                                                                
 smart_193_raw       smart_194_raw       smart_222_raw       smart_226_raw      
 Min.   :-2.210091   Min.   :-4.314398   Min.   :-3.608709   Min.   :-2.131422  
 1st Qu.:-0.704389   1st Qu.:-0.384310   1st Qu.:-0.677479   1st Qu.:-0.920394  
 Median :-0.553552   Median : 0.137049   Median :-0.239555   Median : 0.809652  
 Mean   : 0.003183   Mean   : 0.008034   Mean   : 0.000771   Mean   :-0.009393  
 3rd Qu.: 0.919132   3rd Qu.: 0.758692   3rd Qu.: 0.866452   3rd Qu.: 0.832710  
 Max.   : 3.064668   Max.   : 2.482194   Max.   : 1.604484   Max.   : 1.295670  
                                                                                
   weight_col     
 Min.   :  1.000  
 1st Qu.:  1.000  
 Median :  1.000  
 Mean   :  1.009  
 3rd Qu.:  1.000  
 Max.   :100.000  
                  
message("test_data")
test_data
summary(test_data)
 failure   model2.MD04ABA400V  model2.MG07ACA14TA   model2.MQ01ABF050   
 0:61832   Min.   :-0.261653   Min.   :-1.0324299   Min.   :-0.5565456  
 1:    6   1st Qu.:-0.261653   1st Qu.:-1.0324299   1st Qu.:-0.5565456  
           Median :-0.261653   Median : 0.9685791   Median :-0.5565456  
           Mean   : 0.000176   Mean   : 0.0004011   Mean   :-0.0006953  
           3rd Qu.:-0.261653   3rd Qu.: 0.9685791   3rd Qu.:-0.5565456  
           Max.   : 3.821821   Max.   : 0.9685791   Max.   : 1.7967801  
                                                                        
 model2.MQ01ABF050M  capacity_bytes   smart_3_raw         smart_4_raw       
 Min.   :-0.420412   Min.   : NA     Min.   :-2.666116   Min.   :-1.301765  
 1st Qu.:-0.420412   1st Qu.: NA     1st Qu.:-1.276325   1st Qu.:-0.618948  
 Median :-0.420412   Median : NA     Median : 0.819079   Median :-0.134482  
 Mean   : 0.001354   Mean   :NaN     Mean   :-0.001042   Mean   :-0.003516  
 3rd Qu.:-0.420412   3rd Qu.: NA     3rd Qu.: 0.861672   3rd Qu.: 0.807930  
 Max.   : 2.378595   Max.   : NA     Max.   : 1.310751   Max.   : 7.538825  
                     NA's   :61838                                          
  smart_9_raw          smart_12_raw       smart_191_raw       smart_192_raw    
 Min.   :-3.1516761   Min.   :-1.274805   Min.   :-0.561869   Min.   :-0.7285  
 1st Qu.:-0.7038315   1st Qu.:-0.596015   1st Qu.:-0.561869   1st Qu.:-0.7285  
 Median :-0.2607243   Median :-0.114405   Median :-0.561869   Median :-0.7285  
 Mean   :-0.0009615   Mean   :-0.004237   Mean   : 0.001789   Mean   : 0.0011  
 3rd Qu.: 0.8703519   3rd Qu.: 0.822450   3rd Qu.: 0.418071   3rd Qu.: 0.6753  
 Max.   : 1.6274017   Max.   : 4.628689   Max.   :13.151357   Max.   : 7.5971  
                                                                               
 smart_193_raw        smart_194_raw       smart_222_raw       smart_226_raw      
 Min.   :-2.2100905   Min.   :-4.314398   Min.   :-3.430757   Min.   :-2.131422  
 1st Qu.:-0.7043892   1st Qu.:-0.384310   1st Qu.:-0.679943   1st Qu.:-0.920394  
 Median :-0.5535520   Median : 0.137049   Median :-0.243698   Median : 0.809652  
 Mean   :-0.0004676   Mean   : 0.002912   Mean   :-0.000962   Mean   : 0.001625  
 3rd Qu.: 0.9005704   3rd Qu.: 0.610210   3rd Qu.: 0.866392   3rd Qu.: 0.837295  
 Max.   : 3.0646676   Max.   : 2.585678   Max.   : 1.604484   Max.   : 1.314603  
                                                                                 
   weight_col    
 Min.   :  1.00  
 1st Qu.:  1.00  
 Median :  1.00  
 Mean   :  1.01  
 3rd Qu.:  1.00  
 Max.   :100.00  
                 

Explore missing data, and remove any variables where more more than 50% of the values are missing.


# Visualize missing data
vis_miss(train_data %>% sample_frac(size = .10))

miss_var_summary(train_data)


# Remove variables where more than 50% of the values are missing. This is done for train, valid, and test datasets.
miss_50pct_below <-
  miss_var_summary(train_data) %>% 
  filter(pct_miss <= 50) %>% 
  pull(variable)

miss_50pct_below
 [1] "failure"            "model2.MD04ABA400V" "model2.MG07ACA14TA" "model2.MQ01ABF050" 
 [5] "model2.MQ01ABF050M" "smart_3_raw"        "smart_4_raw"        "smart_9_raw"       
 [9] "smart_12_raw"       "smart_191_raw"      "smart_192_raw"      "smart_193_raw"     
[13] "smart_194_raw"      "smart_222_raw"      "smart_226_raw"      "weight_col"        
train_data <-
  train_data %>%
  select(one_of(miss_50pct_below)
         )

valid_data <-
  valid_data %>%
  select(one_of(miss_50pct_below)
         )

test_data <-
  test_data %>%
  select(one_of(miss_50pct_below)
         )


dim(train_data)
[1] 101005     16
dim(valid_data)
[1] 43286    16
dim(test_data)
[1] 61838    16

Use DMwR::SMOTE to rebalance the clasess of failure.


message("train_data$failure")
train_data$failure
str(train_data)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   101005 obs. of  16 variables:
 $ failure           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ model2.MD04ABA400V: num  -0.262 -0.262 -0.262 3.822 -0.262 ...
 $ model2.MG07ACA14TA: num  -1.032 -1.032 0.969 0.969 0.969 ...
 $ model2.MQ01ABF050 : num  -0.557 -0.557 -0.557 -0.557 -0.557 ...
 $ model2.MQ01ABF050M: num  -0.42 -0.42 -0.42 -0.42 -0.42 ...
 $ smart_3_raw       : num  0.855 -0.937 0.866 0.86 0.84 ...
 $ smart_4_raw       : num  1.033 3.518 1.033 0.241 0.548 ...
 $ smart_9_raw       : num  1.149 0.53 -1.952 1.272 -0.429 ...
 $ smart_12_raw      : num  -1.275 -0.596 0.822 1.046 -0.596 ...
 $ smart_191_raw     : num  1.1738 1.398 -0.5619 0.0564 -0.5619 ...
 $ smart_192_raw     : num  -0.728 -0.728 0.675 -0.728 0.675 ...
 $ smart_193_raw     : num  -0.62 -0.872 -0.896 -0.404 1.154 ...
 $ smart_194_raw     : num  0.137 0.903 -0.205 1.18 0.61 ...
 $ smart_222_raw     : num  -0.509 -0.506 -0.79 1.125 -0.405 ...
 $ smart_226_raw     : num  0.837 -0.958 -0.911 0.814 1.081 ...
 $ weight_col        : num  1 1 1 1 1 1 1 1 1 1 ...
summary(train_data)
 failure    model2.MD04ABA400V model2.MG07ACA14TA model2.MQ01ABF050 model2.MQ01ABF050M
 0:100994   Min.   :-0.2617    Min.   :-1.0324    Min.   :-0.5565   Min.   :-0.4204   
 1:    11   1st Qu.:-0.2617    1st Qu.:-1.0324    1st Qu.:-0.5565   1st Qu.:-0.4204   
            Median :-0.2617    Median : 0.9686    Median :-0.5565   Median :-0.4204   
            Mean   : 0.0000    Mean   : 0.0000    Mean   : 0.0000   Mean   : 0.0000   
            3rd Qu.:-0.2617    3rd Qu.: 0.9686    3rd Qu.:-0.5565   3rd Qu.:-0.4204   
            Max.   : 3.8218    Max.   : 0.9686    Max.   : 1.7968   Max.   : 2.3786   
  smart_3_raw          smart_4_raw         smart_9_raw         smart_12_raw      
 Min.   :-2.6661165   Min.   :-1.301765   Min.   :-4.522747   Min.   :-1.274805  
 1st Qu.:-1.2737431   1st Qu.:-0.618948   1st Qu.:-0.691515   1st Qu.:-0.596015  
 Median : 0.8187356   Median :-0.134482   Median :-0.260400   Median :-0.114405  
 Mean   : 0.0000162   Mean   :-0.000003   Mean   :-0.000005   Mean   :-0.000002  
 3rd Qu.: 0.8615058   3rd Qu.: 0.807930   3rd Qu.: 0.873419   3rd Qu.: 0.822450  
 Max.   : 1.3107509   Max.   : 7.538825   Max.   : 1.627442   Max.   : 4.628689  
 smart_191_raw       smart_192_raw       smart_193_raw       smart_194_raw      
 Min.   :-0.561869   Min.   :-0.728452   Min.   :-2.210091   Min.   :-4.314398  
 1st Qu.:-0.561869   1st Qu.:-0.728452   1st Qu.:-0.704389   1st Qu.:-0.384310  
 Median :-0.561869   Median :-0.728452   Median :-0.553552   Median : 0.137049  
 Mean   :-0.000011   Mean   :-0.000014   Mean   :-0.000011   Mean   : 0.000003  
 3rd Qu.: 0.418071   3rd Qu.: 0.675337   3rd Qu.: 0.906041   3rd Qu.: 0.610210  
 Max.   :13.151356   Max.   : 7.597056   Max.   : 3.064668   Max.   : 2.585678  
 smart_222_raw       smart_226_raw         weight_col     
 Min.   :-4.390568   Min.   :-2.131422   Min.   :  1.000  
 1st Qu.:-0.677479   1st Qu.:-0.920394   1st Qu.:  1.000  
 Median :-0.243378   Median : 0.809652   Median :  1.000  
 Mean   :-0.000005   Mean   : 0.000016   Mean   :  1.011  
 3rd Qu.: 0.869443   3rd Qu.: 0.832710   3rd Qu.:  1.000  
 Max.   : 1.604504   Max.   : 1.307047   Max.   :100.000  
table(train_data$failure)

     0      1 
100994     11 
prop.table(table(train_data$failure))

           0            1 
0.9998910945 0.0001089055 
set.seed(123456789)
train_data_SMOTE <-
  SMOTE(failure ~ .,
        data  = as.data.frame(train_data),
        k = 10,
        perc.over = 100000,
        perc.under = 1300
        )

message("train_data_SMOTE$failure")
train_data_SMOTE$failure
table(train_data_SMOTE$failure)

     0      1 
143000  11011 
prop.table(table(train_data_SMOTE$failure))

         0          1 
0.92850511 0.07149489 
class(train_data_SMOTE)
[1] "data.frame"
glimpse(train_data_SMOTE)
Observations: 154,011
Variables: 16
$ failure            <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ model2.MD04ABA400V <dbl> -0.2616528, -0.2616528, -0.2616528, -0.2616528, -0.2616528…
$ model2.MG07ACA14TA <dbl> -1.0324299, -1.0324299, 0.9685791, 0.9685791, -1.0324299, …
$ model2.MQ01ABF050  <dbl> -0.5565456, -0.5565456, 1.7967801, 1.7967801, -0.5565456, …
$ model2.MQ01ABF050M <dbl> -0.4204122, -0.4204122, -0.4204122, -0.4204122, 2.3785946,…
$ smart_3_raw        <dbl> 0.8432514, 0.8975866, -1.2980393, 0.8353004, -1.3981441, 0…
$ smart_4_raw        <dbl> 0.5483349, -0.6189480, 0.8079298, -0.1344818, -1.3017648, …
$ smart_9_raw        <dbl> -0.40651706, 0.04642314, -0.70507547, -0.53517251, 1.61166…
$ smart_12_raw       <dbl> 1.4195612, -0.5960145, 0.2591607, 0.8224503, -1.2748053, 0…
$ smart_191_raw      <dbl> -0.56186948, -0.56186948, -0.56186948, -0.56186948, -0.561…
$ smart_192_raw      <dbl> -0.7284523, -0.7284523, -0.7284523, -0.7284523, 1.4965019,…
$ smart_193_raw      <dbl> -0.61275161, -0.14665595, 1.33890134, -0.61978132, 0.13065…
$ smart_194_raw      <dbl> 0.2996742, 0.1370488, 0.2996742, -0.2045625, 0.2996742, -0…
$ smart_222_raw      <dbl> -0.7689613, -0.5777751, 1.5570162, -0.9099636, -0.6433306,…
$ smart_226_raw      <dbl> -1.9681499022, 0.8189011527, -1.5469111861, 0.8510008909, …
$ weight_col         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
summary(train_data_SMOTE)
 failure    model2.MD04ABA400V model2.MG07ACA14TA model2.MQ01ABF050 model2.MQ01ABF050M 
 0:143000   Min.   :-0.26165   Min.   :-1.03243   Min.   :-0.5565   Min.   :-0.420412  
 1: 11011   1st Qu.:-0.26165   1st Qu.:-1.03243   1st Qu.:-0.5565   1st Qu.:-0.420412  
            Median :-0.26165   Median : 0.96858   Median :-0.5565   Median :-0.420412  
            Mean   :-0.01743   Mean   : 0.01308   Mean   :-0.0050   Mean   :-0.009432  
            3rd Qu.:-0.26165   3rd Qu.: 0.96858   3rd Qu.:-0.5565   3rd Qu.:-0.420412  
            Max.   : 3.82182   Max.   : 0.96858   Max.   : 1.7968   Max.   : 2.378595  
  smart_3_raw         smart_4_raw        smart_9_raw        smart_12_raw     
 Min.   :-2.666116   Min.   :-1.30177   Min.   :-4.52275   Min.   :-1.27480  
 1st Qu.:-0.986453   1st Qu.:-0.61895   1st Qu.:-0.66166   1st Qu.:-0.59601  
 Median : 0.617853   Median :-0.13448   Median :-0.15333   Median :-0.11441  
 Mean   :-0.004027   Mean   :-0.03874   Mean   : 0.03135   Mean   : 0.01031  
 3rd Qu.: 0.860508   3rd Qu.: 0.54834   3rd Qu.: 0.88166   3rd Qu.: 0.82245  
 Max.   : 1.310751   Max.   : 7.53883   Max.   : 1.62742   Max.   : 4.62869  
 smart_191_raw      smart_192_raw       smart_193_raw      smart_194_raw     
 Min.   :-0.56187   Min.   :-0.728452   Min.   :-2.21009   Min.   :-4.31440  
 1st Qu.:-0.56187   1st Qu.:-0.728452   1st Qu.:-0.70439   1st Qu.:-0.57059  
 Median :-0.56187   Median :-0.728452   Median :-0.55355   Median : 0.13705  
 Mean   : 0.04032   Mean   : 0.000833   Mean   :-0.02579   Mean   :-0.00806  
 3rd Qu.: 0.41807   3rd Qu.: 0.675337   3rd Qu.: 0.81290   3rd Qu.: 0.61021  
 Max.   :13.15136   Max.   : 7.597056   Max.   : 3.06467   Max.   : 2.48219  
 smart_222_raw      smart_226_raw         weight_col     
 Min.   :-3.25973   Min.   :-2.131422   Min.   :  1.000  
 1st Qu.:-0.69114   1st Qu.:-0.911040   1st Qu.:  1.000  
 Median :-0.25596   Median : 0.805015   Median :  1.000  
 Mean   :-0.02421   Mean   :-0.009757   Mean   :  8.078  
 3rd Qu.: 0.85049   3rd Qu.: 0.832710   3rd Qu.:  1.000  
 Max.   : 1.60450   Max.   : 1.295670   Max.   :100.000  

Modeling with H2O

Start h2o.


h2o.init()

H2O is not running yet, starting it now...

Note:  In case of errors look at the following log files:
    /var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/h2o_mdturse_started_from_r.out
    /var/folders/yb/g384t9010db3z533jmxcp5280000gn/T//Rtmpco3HiE/h2o_mdturse_started_from_r.err
java version "1.8.0_121"
Java(TM) SE Runtime Environment (build 1.8.0_121-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.121-b13, mixed mode)

Starting H2O JVM and connecting: ... Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         4 seconds 9 milliseconds 
    H2O cluster timezone:       America/New_York 
    H2O data parsing timezone:  UTC 
    H2O cluster version:        3.24.0.1 
    H2O cluster version age:    1 month and 21 days  
    H2O cluster name:           H2O_started_from_R_mdturse_qhg316 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   1.78 GB 
    H2O cluster total cores:    4 
    H2O cluster allowed cores:  4 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Connection proxy:       NA 
    H2O Internal Security:      FALSE 
    H2O API Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4 
    R Version:                  R version 3.6.0 (2019-04-26) 
h2o.no_progress() # Turn off progress bars

Convert to the datasets to h2o objects.


# Multiple sets (train, train_no_weight, SMOTE, SMOTE_no_weight) are used to test the effects of these different datasets.
train_h2o <- as.h2o(train_data)
train_h2o_no_weight <- as.h2o(train_data %>% select(-weight_col))
train_h2o_SMOTE <- as.h2o(train_data_SMOTE)
train_h2o_SMOTE_no_weight <- as.h2o(train_data_SMOTE %>% select(-weight_col))

valid_h2o <- as.h2o(valid_data)
valid_h2o_no_weight <- as.h2o(valid_data %>% select(-weight_col))

test_h2o  <- as.h2o(test_data)
test_h2o_no_weight  <- as.h2o(test_data %>% select(-weight_col))


# Save the data
saveRDS(train_data,
        paste0(wd,
               "/Data/Processed/",
               "train_data.rds"
               )
        )


saveRDS(train_data %>% 
          select(-weight_col),
        paste0(wd,
               "/Data/Processed/",
               "train_data_no_weight.rds"
               )
        )


saveRDS(valid_data,
        paste0(wd,
               "/Data/Processed/",
               "valid_data.rds"
               )
        )

saveRDS(valid_data %>% 
          select(-weight_col),
        paste0(wd,
               "/Data/Processed/",
               "valid_data_no_weight.rds"
               )
        )


saveRDS(test_data,
        paste0(wd,
               "/Data/Processed/",
               "test_data.rds"
               )
        )

saveRDS(test_data %>% 
          select(-weight_col),
        paste0(wd,
               "/Data/Processed/",
               "test_data_no_weight.rds"
               )
        )

Set the relevant variable names.


y <- "failure"

x <-
  setdiff(names(train_h2o),
  # setdiff(names(train_h2o_no_weight),
  # setdiff(names(train_h2o_SMOTE),
  # setdiff(names(train_h2o_SMOTE_no_weight),
          y
          )

Run h2o.automl.

NOTES

  1. train_h2o using weight_col & balance_classes = TRUE does not function - finds a quick “solution”

  2. train_h2o NOT using weight_col & balance_classes = TRUE does function

MODEL USED: 3) train_h2o using weight_col & balance_classes = FALSE

  1. train_h2o_SMOTE using weight_col & balance_classes = FALSE
  1. train_h2o_SMOTE_no_weight NOT using weight_col & balance_classes = TRUE

# user   system  elapsed 
#   22.355    7.826 1310.738
# ~ 22 min

start <- proc.time()
automl_models_h2o <-
  h2o.automl(x = x,
             y = y,
             training_frame = train_h2o,
             # training_frame = train_h2o_SMOTE_no_weight,
             validation_frame = valid_h2o,
             leaderboard_frame = test_h2o,
             nfolds = 10,
             # balance_classes = TRUE,
             balance_classes = FALSE,
             weights_column = "weight_col",
             max_runtime_secs = 3600, # 1 hour
             # max_runtime_secs = 60,
             max_models = 10,
             stopping_metric = "AUTO",
             seed = 123456789
             )

h2o.time <- proc.time() - start
h2o.time
    user   system  elapsed 
  20.010    7.500 1303.079 
rm(start)

Pull out the “leader” model.


automl_leader <- automl_models_h2o@leader

automl_leader
Model Details:
==============

H2OBinomialModel: gbm
Model ID:  GBM_4_AutoML_20190522_081032 
Model Summary: 


H2OBinomialMetrics: gbm
** Reported on training data. **

MSE:  1.121729e-05
RMSE:  0.003349223
LogLoss:  0.0007321303
Mean Per-Class Error:  0
AUC:  1
pr_auc:  0.9090909
Gini:  1

Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:

Maximum Metrics: Maximum metrics at their respective thresholds

Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
H2OBinomialMetrics: gbm
** Reported on validation data. **

MSE:  0.009422029
RMSE:  0.09706713
LogLoss:  0.0699779
Mean Per-Class Error:  0.1479252
AUC:  0.7775172
pr_auc:  0.01888206
Gini:  0.5550344

Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:

Maximum Metrics: Maximum metrics at their respective thresholds

Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
H2OBinomialMetrics: gbm
** Reported on cross-validation data. **
** 10-fold cross-validation on training data (Metrics computed for combined holdout predictions) **

MSE:  0.0109958
RMSE:  0.1048609
LogLoss:  0.08324178
Mean Per-Class Error:  0.427777
AUC:  0.4566617
pr_auc:  0.008994093
Gini:  -0.08667662

Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:

Maximum Metrics: Maximum metrics at their respective thresholds

Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
Cross-Validation Metrics Summary: 
# save each individual model
model_path_h2o <-
   h2o.saveModel(object = automl_leader,
                     path = paste0(wd,
                                   "/Models/"
                                   ),
                     force = TRUE
                     )


saveRDS(model_path_h2o,
        paste0(wd,
               "/Models/",
               "model_path_h2o.rds"
               )
        )



# load the model
# model_path_h2o <-
#   read_rds(path = paste0(wd,
#                          "/Models/",
#                          "model_path_h2o.rds"
#                          )
#            )
# 
# print(model_path_h2o)
# 
# automl_leader <-
#   h2o.loadModel(path = model_path_h2o)

Inspect the leaderboard.


automl_models_h2o@leaderboard

[12 rows x 6 columns] 

Investigate variable importance of the leader model.


leader_models_varimp <- h2o.varimp(object = automl_leader)

leader_models_varimp_plot <- h2o.varimp_plot(model = automl_leader, num_of_features = 10)



leader_models_varimp
Variable Importances: 
leader_models_varimp_plot
      [,1]
 [1,]  1.5
 [2,]  3.5
 [3,]  5.5
 [4,]  7.5
 [5,]  9.5
 [6,] 11.5
 [7,] 13.5
 [8,] 15.5
 [9,] 17.5
[10,] 19.5

Create predictions from the models.


pred_h2o <- h2o.predict(automl_leader, newdata = test_h2o)


# save as a .rds file
saveRDS(pred_h2o,
        paste0(wd,
               "/Models/",
               "pred_h2o.rds"
               )
        )

# pred_h2o <-
#   read_rds(path = paste0(wd,
#                          "/Models/",
#                          "pred_h2o.rds"
#                          )
#            )

Look at performance stats using the test data.


perf_stats_test_h2o <- h2o.performance(automl_leader, newdata = test_h2o)

perf_stats_test_h2o
H2OBinomialMetrics: gbm

MSE:  0.009857633
RMSE:  0.09928561
LogLoss:  0.07323695
Mean Per-Class Error:  0.3187832
AUC:  0.7210951
pr_auc:  0.0195803
Gini:  0.4421901

Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:

Maximum Metrics: Maximum metrics at their respective thresholds

Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
# save as a .rds file
saveRDS(perf_stats_test_h2o,
        paste0(wd,
               "/Models/",
               "perf_stats_test_h2o.rds"
               )
        )

# perf_stats_test_h2o <-
#   read_rds(path = paste0(wd,
#                          "/Models/",
#                          "perf_stats_test_h2o.rds"
#                          )
#            )

Investigate test error.


error_tbl_h2o <-
  test_data %>% 
  bind_cols(pred_h2o %>% as_tibble()
            ) %>% 
  rename(obs = failure,
         pred = predict
         )


confusionMatrix(data = error_tbl_h2o$pred,
                reference = error_tbl_h2o$obs,
                positive = "1",
                mode = "everything"
                )
Confusion Matrix and Statistics

          Reference
Prediction     0     1
         0 43901     2
         1 17931     4
                                          
               Accuracy : 0.71            
                 95% CI : (0.7064, 0.7136)
    No Information Rate : 0.9999          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 3e-04           
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 6.667e-01       
            Specificity : 7.100e-01       
         Pos Pred Value : 2.230e-04       
         Neg Pred Value : 1.000e+00       
              Precision : 2.230e-04       
                 Recall : 6.667e-01       
                     F1 : 4.459e-04       
             Prevalence : 9.703e-05       
         Detection Rate : 6.469e-05       
   Detection Prevalence : 2.900e-01       
      Balanced Accuracy : 6.883e-01       
                                          
       'Positive' Class : 1               
                                          
# save as a .rds file
saveRDS(error_tbl_h2o,
        paste0(wd,
               "/Models/",
               "error_tbl_h2o.rds"
               )
        )

# error_tbl_h2o <-
#   read_rds(path = paste0(wd,
#                          "/Models/",
#                          "error_tbl_h2o.rds"
#                          )
#            )
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayAgLS0gIEV4cGxvcmluZy9QcmVkaWN0aW5nIEhhcmQgRHJpdmUgRmFpbHVyZSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKRGF0YSBPcmlnaW5hbGx5IGZyb20gS2FnZ2xlIGF0OiAgW2h0dHBzOi8vd3d3LmthZ2dsZS5jb20vYmFja2JsYXplL2hhcmQtZHJpdmUtdGVzdC1kYXRhXShodHRwczovL3d3dy5rYWdnbGUuY29tL2JhY2tibGF6ZS9oYXJkLWRyaXZlLXRlc3QtZGF0YSkuCgpCdXQgYWZ0ZXIgZXhwZXJpZW5jaW5nIGlzc3VlcyB3aXRoIDY0Yml0IHZhbHVlcywgdGhlIGRhdGEgd2VyZSBvYnRhaW5lZCBmcm9tIHRoZSBvcmlnaW5hbCBzb3VyY2UgaGVyZTogIFtodHRwczovL3d3dy5iYWNrYmxhemUuY29tL2IyL2hhcmQtZHJpdmUtdGVzdC1kYXRhLmh0bWxdKGh0dHBzOi8vd3d3LmJhY2tibGF6ZS5jb20vYjIvaGFyZC1kcml2ZS10ZXN0LWRhdGEuaHRtbCkuICAKCgojIyBTZXR1cCAgICAKICBMb2FkIHRoZSByZWxldmFudCBsaWJyYXJpZXMuCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CgojIHJtKGxpc3QgPSBscygpKQojIC5ycy5yZXN0YXJ0UigpCgoKIyBkYXRhIG11bmdpbmcKbGlicmFyeSgidGlkeXZlcnNlIikKbGlicmFyeSgiZGF0YS50YWJsZSIpCgojIG11bmdlIGRhdGVzCmxpYnJhcnkoImx1YnJpZGF0ZSIpCgojIGZlYXR1cmUgZW5naW5lZXJpbmcgYW5kIGRhdGEgcHJlcApsaWJyYXJ5KCJyZWNpcGVzIikKbGlicmFyeSgiY2FyZXQiKQpsaWJyYXJ5KCJETXdSIikKCiMgdG8gZXhwbG9yZSBtaXNzaW5nIGRhdGEKbGlicmFyeSgidmlzZGF0IikKbGlicmFyeSgibmFuaWFyIikKCiMgbW9kZWxpbmcKbGlicmFyeSgiaDJvIikKCmBgYAogIAogICAgCiAgU2Vzc2lvbiBJbmZvLgpgYGB7cn0KCnNlc3Npb25JbmZvKCkKCmBgYAoKCiAgU2V0dXAgdGhlIHJvb3QgZGlyZWN0b3J5LgpgYGB7ciAic2V0dXAiLCBpbmNsdWRlID0gRkFMU0V9CgpyZXF1aXJlKCJrbml0ciIpCgpvcHRzX2tuaXQkc2V0KHJvb3QuZGlyID0gIi9Vc2Vycy9tZHR1cnNlL0Rlc2t0b3AvQW5hbHl0aWNzL2hhcmRfZHJpdmVfZmFpbHVyZS8iKQoKYGBgCiAgCiAgICAKICBTZXR0aW5nIGB3ZGAgYXMgdGhlIHdvcmtpbmcgZGlyZWN0b3J5LgpgYGB7cn0KCndkIDwtIGdldHdkKCkKCndkCgpgYGAKCgojIyBHZXQgdGhlIGRhdGEgIAogIAogIFRoZSBzaXRlIHdoZXJlIHRoZSBkYXRhIGFyZSBtYWRlIGF2YWlsYWJsZSBpcyBbaHR0cHM6Ly93d3cuYmFja2JsYXplLmNvbS9iMi9oYXJkLWRyaXZlLXRlc3QtZGF0YS5odG1sXShodHRwczovL3d3dy5iYWNrYmxhemUuY29tL2IyL2hhcmQtZHJpdmUtdGVzdC1kYXRhLmh0bWwpLiAgCiAgICAKICBEb3dubG9hZCB0aGUgLnppcCBmaWxlLgpgYGB7cn0KCiMgYmFzZSB1cmwKdXJsIDwtICJodHRwczovL2YwMDEuYmFja2JsYXplYjIuY29tL2ZpbGUvQmFja2JsYXplLUhhcmQtRHJpdmUtRGF0YS9kYXRhX1E0XzIwMTguemlwIgoKCiMgY3JlYXRlIGEgdGVtcG9yYXJ5IGRpcmVjdG9yeQp0ZCA8LSB0ZW1wZGlyKCkKCgojIGNyZWF0ZSB0aGUgcGxhY2Vob2xkZXIgZmlsZQp0ZiA8LSB0ZW1wZmlsZSh0bXBkaXIgPSB0ZCwgZmlsZWV4dCA9ICIuemlwIikKCgojIGRvd25sb2FkIGludG8gdGhlIHBsYWNlaG9sZGVyIGZpbGUKZG93bmxvYWQuZmlsZSh1cmwsIHRmKQoKYGBgCiAgCiAgICAKICBEb3dubG9hZCBqdXN0IHRoZSByZWxldmFudCAuY3N2IGZpbGVzLgpgYGB7ciBtZXNzYWdlPUZBTFNFfQoKIyBnZXQgdGhlIG5hbWVzIG9mIHRoZSBmaWxlcwpmbmFtZSA8LSB1bnppcCh0ZiwgbGlzdCA9IFRSVUUpCmhlYWQoZm5hbWUpCgpmbmFtZV9jc3YgPC0gCiAgZm5hbWUgJT4lIAogIGZpbHRlcihzdHJfZGV0ZWN0KE5hbWUsICJkYXRhX1E0XzIwMTgvMjAxOC1cXGR7Mn0tXFxkezJ9XFwuY3N2JCIpKSAlPiUgCiAgYXJyYW5nZShOYW1lKSAlPiUgCiAgIyBoZWFkKDUpICU+JQogIHB1bGwoTmFtZSkKCgojIHVuemlwIHRoZSBmaWxlcyB0byB0aGUgdGVtcG9yYXJ5IGRpcmVjdG9yeQpmbmFtZV9jc3YgJT4lIAogIG1hcCh+dW56aXAodGYsCiAgICAgICAgICAgICBmaWxlcyA9IC54LAogICAgICAgICAgICAgZXhkaXIgPSB0ZCwKICAgICAgICAgICAgIG92ZXJ3cml0ZSA9IFRSVUUKICAgICAgICAgICAgICkKICAgICAgKQoKCiMgZnBhdGggaXMgdGhlIGZ1bGwgcGF0aCB0byB0aGUgZXh0cmFjdGVkIGZpbGVzCmZwYXRoIDwtCiAgZm5hbWVfY3N2ICU+JSAKICBtYXAofmZpbGUucGF0aCh0ZCwgLngpCiAgICAgICkKCmBgYAogIAogICAgCiAgUmVhZCBpbiB0aGUgLmNzdiBmaWxlcyBhbmQgc3RhY2sgdGhlbSBpbnRvIGEgc2luZ2xlIC5yZHMgZmlsZS4KYGBge3IgbWVzc2FnZT1GQUxTRX0KCmhkIDwtCiAgZnBhdGggJT4lIAogIG1hcCh+ZnJlYWQoLngsIHZlcmJvc2UgPSBGQUxTRSkKICAgICAgKQoKCmhkX2R0IDwtIGJpbmRfcm93cyhoZCkKCgojIGZ3cml0ZShoZF9kdCwKIyAgICAgICAgcGFzdGUwKHdkLAojICAgICAgICAgICAgICAgICIvRGF0YS9JbnRlcmltLyIsCiMgICAgICAgICAgICAgICAgImhkX2R0LmNzdiIKIyAgICAgICAgICAgICAgICApCiMgICAgICAgICkKCiMgc2F2ZVJEUyhoZF9kdCwKIyAgICAgICAgIHBhc3RlMCh3ZCwKIyAgICAgICAgICAgICAgICAiL0RhdGEvSW50ZXJpbS8iLAojICAgICAgICAgICAgICAgICJoZF9kdC5SZHMiCiMgICAgICAgICAgICAgICAgKQojICAgICAgICAgKQoKIyBoZF9kdCA8LQojICAgcmVhZFJEUyhwYXN0ZTAod2QsCiMgICAgICAgICAgICAgICAgICAiL0RhdGEvSW50ZXJpbS8iLAojICAgICAgICAgICAgICAgICAgImhkX2R0LlJkcyIKIyAgICAgICAgICAgICAgICAgICkKIyAgICAgICAgICAgKQoKCiMgbWVzc2FnZSgiaGQiKQojIGNsYXNzKGhkKQojIGdsaW1wc2UoaGRbMV0pCiMgCiMgbWVzc2FnZSgiaGRfZHQiKQojIGdsaW1wc2UoaGRfZHQpCgoKcm0oZm5hbWUsIGZwYXRoLCBmbmFtZV9jc3YsIHRkLCB0ZiwgdXJsLCBoZCkKCmBgYAogIAogICAgCiAgRXhwZXJpbWVudGluZyB3aXRoIGNvbnZlcnRpbmcgYGludGVnZXI2NGAgdmFsdWVzIHRvIHNtYWxsZXIgbnVtZXJpYyB2YWx1ZXMuCmBgYHtyfQoKIyBjbGFzcyhoZF9kdCkKIyBoZF9kdDIgPC0gaGRfZHQKCiMgMTIsMDAwLDEzOCw2MjUsMDI0CiMgaGRfZHQkY2FwYWNpdHlfYnl0ZXMgPC0gYXMubnVtZXJpYyhoZF9kdCRjYXBhY2l0eV9ieXRlcyAvIDEwMDAwMDApIyAqIDEwMDAwMDAKIyBoZF9kdCRzbWFydF83X3JhdyA8LSBhcy5udW1lcmljKGhkX2R0JHNtYXJ0XzdfcmF3IC8gMTAwMDAwMCkjICogMTAwMDAwMAojIGhkX2R0JHNtYXJ0XzE4OF9yYXcgPC0gYXMubnVtZXJpYyhoZF9kdCRzbWFydF8xODhfcmF3IC8gMTAwMDAwMCkjICogMTAwMDAwMAojIGhkX2R0JHNtYXJ0XzI0MF9yYXcgPC0gYXMubnVtZXJpYyhoZF9kdCRzbWFydF8yNDBfcmF3IC8gMTAwMDAwMCkjICogMTAwMDAwMAojIGhkX2R0JHNtYXJ0XzI0MV9yYXcgPC0gYXMubnVtZXJpYyhoZF9kdCRzbWFydF8yNDFfcmF3IC8gMTAwMDAwMCkjICogMTAwMDAwMAojIGhkX2R0JHNtYXJ0XzI0Ml9yYXcgPC0gYXMubnVtZXJpYyhoZF9kdCRzbWFydF8yNDJfcmF3IC8gMTAwMDAwMCkjICogMTAwMDAwMAoKIyBjbGFzcyhoZF9kdCkKIyBnbGltcHNlKGhkX2R0KQoKIyBoZF9kdCRjYXBhY2l0eV9ieXRlczMgPC0gYXMuaW50ZWdlcihoZF9kdCRjYXBhY2l0eV9ieXRlcykKCmBgYAoKCiMjIERhdGEgUHJlcCAgCiAgCiAgU2VwYXJhdGUgbW9kZWwgaW50byBwaWVjZXMgLSBtYW51ZmFjdHVyZXIgYW5kIG1vZGVsLgpgYGB7cn0KCmhkMiA8LQogIGhkX2R0ICU+JSAKICAjIGhlYWQoMTAwMCkgJT4lCiAgc2VwYXJhdGUoY29sID0gbW9kZWwsCiAgICAgICAgICAgaW50byA9IGMoIm1hbnUiLCAibW9kZWwyIiksCiAgICAgICAgICAgc2VwID0gIlxccyIsCiAgICAgICAgICAgZmlsbCA9ICJsZWZ0IiwKICAgICAgICAgICByZW1vdmUgPSBUUlVFCiAgICAgICAgICAgKSAlPiUKICBtdXRhdGUoZGF0ZSA9IGFzX2RhdGUoZGF0ZSksCiAgICAgICAgIHNlcmlhbF9udW1iZXIgPSBmYWN0b3Ioc2VyaWFsX251bWJlciksCiAgICAgICAgIG1hbnUgPSBjYXNlX3doZW4oaXMubmEobWFudSkgfiAiKE1pc3NpbmcpIiwKICAgICAgICAgICAgICAgICAgICAgICAgICBUUlVFIH4gbWFudQogICAgICAgICAgICAgICAgICAgICAgICAgICkgJT4lIAogICAgICAgICAgIGZhY3RvcigpLAogICAgICAgICBtb2RlbDIgPSBmYWN0b3IobW9kZWwyKSwKICAgICAgICAgZmFpbHVyZSA9IGZhY3RvcihmYWlsdXJlKQogICAgICAgICApICU+JSAKICBzZWxlY3QoLW1hdGNoZXMoIm5vcm1hbGl6ZWQiKQogICAgICAgICApICU+JSAKICBhcy5kYXRhLnRhYmxlICU+JSAKICBzZXRrZXkobWFudSwgbW9kZWwyLCBzZXJpYWxfbnVtYmVyLCBkYXRlKQoKCmNsYXNzKGhkMikKZ2xpbXBzZShoZDIpCnN1bW1hcnkoaGQyKQoKIyBWaWV3KGhkMiAlPiUgZmlsdGVyKG1vZGVsMiA9PSAiSERTNUMzMDMwQUxBNjMwIikpCiMgVmlldyhoZWFkKGhkMiwgMTAwMCkpCgpgYGAKICAKICAgIAogIEdldCBzb21lIGJhc2ljIGNvdW50cyBmb3IgbWFudWZhY3R1cmVyLCBtb2RlbCwgYW5kIHNlcmlhbCBudW1iZXIuCmBgYHtyfQoKaGQyICU+JSAKICBjb3VudChtYW51LCBtb2RlbDIsIHNlcmlhbF9udW1iZXIpICU+JSAKICBhcnJhbmdlKGRlc2MobikpCgpoZDIgJT4lIAogIGNvdW50KG1hbnUpICU+JSAKICBhcnJhbmdlKGRlc2MobikpCgpoZDIgJT4lIAogIGNvdW50KG1vZGVsMikgJT4lIAogIGFycmFuZ2UoZGVzYyhuKSkKCmBgYAogIAogICAgCiAgRXhwbG9yZSBmYWlsdXJlIHJhdGVzLgpgYGB7cn0KCnRhYmxlKGhkMiRmYWlsdXJlKQpwcm9wLnRhYmxlKHRhYmxlKGhkMiRmYWlsdXJlKSkKCmNudHNfbWFudSA8LQogIGhkMiAlPiUgCiAgY291bnQobWFudSkKICAKY250c19tYW51X2ZhaWx1cmUgPC0KICBoZDIgJT4lIAogIGZpbHRlcihmYWlsdXJlID09ICIxIikgJT4lIAogIGNvdW50KG1hbnUpCgptYW51X2ZhaWx1cmVzIDwtCiAgY250c19tYW51ICU+JSAKICByZW5hbWUoY250X292ZXJhbGwgPSBuKSAlPiUgCiAgaW5uZXJfam9pbih5ID0gY250c19tYW51X2ZhaWx1cmUgJT4lIAogICAgICAgICAgICAgICByZW5hbWUoY250X2ZhaWx1cmUgPSBuKSwKICAgICAgICAgICAgIGJ5ID0gYygibWFudSIgPSAibWFudSIpCiAgICAgICAgICAgICApICU+JSAKICBtdXRhdGUoZmFpbHVyZV9wY3QgPSBjbnRfZmFpbHVyZSAvIGNudF9vdmVyYWxsKSAlPiUgCiAgYXJyYW5nZShkZXNjKGZhaWx1cmVfcGN0KSkKICAKbWFudV9mYWlsdXJlcwoKCnJtKGNudHNfbWFudSwgY250c19tYW51X2ZhaWx1cmUpCgpgYGAKCgojIyBFeHBsb3JlIG1vZGVsaW5nIGp1c3QgZm9yIFRvc2hpYmEgIAogICAKICBDcmVhdGUgdHJhaW5fdmFsaWQgYW5kIHRlc3QgZGF0YXNldHMuCmBgYHtyfQoKdG9zaGliYSA8LQogIGhkMiAlPiUgCiAgZmlsdGVyKG1hbnUgPT0gIlRPU0hJQkEiKSAlPiUgCiAgbXV0YXRlX2lmKGlzLmZhY3RvciwgZmFjdG9yKSAlPiUgCiAgc2VsZWN0X2lmKG5lZ2F0ZShpcy5sb2dpY2FsKSkKCgpmd3JpdGUodG9zaGliYSwKICAgICAgIHBhc3RlMCh3ZCwKICAgICAgICAgICAgICAiL0RhdGEvSW50ZXJpbS8iLAogICAgICAgICAgICAgICJ0b3NoaWJhLmNzdiIKICAgICAgICAgICAgICApCiAgICAgICApCgojIHRvc2hpYmEgPC0KIyAgIGZyZWFkKHBhc3RlMCh3ZCwKIyAgICAgICAgICAgICAgICAgICIvRGF0YS9JbnRlcmltLyIsCiMgICAgICAgICAgICAgICAgICAidG9zaGliYS5jc3YiCiMgICAgICAgICAgICAgICAgICApCiMgICAgICAgICAgICkKCiMgdG9zaGliYVsgLCBkYXRlIDo9IGFzX2RhdGUoZGF0ZSldCiMgdG9zaGliYVsgLCBzZXJpYWxfbnVtYmVyIDo9IGZhY3RvcihzZXJpYWxfbnVtYmVyKV0KIyB0b3NoaWJhWyAsIG1hbnUgOj0gZmFjdG9yKG1hbnUpXQojIHRvc2hpYmFbICwgbW9kZWwyIDo9IGZhY3Rvcihtb2RlbDIpXQoKCnN0cih0b3NoaWJhKQpzdW1tYXJ5KHRvc2hpYmEpCiAgCmBgYAoKCiMjIyBGZWF0dXJlIEVuZ2luZWVyaW5nICAKICAgIAogIE9uZS1ob3QgZW5jb2RlIHRoZSBkYXRhLgpgYGB7cn0KCm1lc3NhZ2UoInRvc2hpYmEiKQpzdHIodG9zaGliYSkKCm9uZV9ob3QgPC0gZHVtbXlWYXJzKGZhaWx1cmUgfiAuLAogICAgICAgICAgICAgICAgICAgICBkYXRhID0gdG9zaGliYSAlPiUgCiAgICAgICAgICAgICAgICAgICAgICAgc2VsZWN0KC1kYXRlLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAtc2VyaWFsX251bWJlciwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLW1hbnUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKSAlPiUKICAgICAgICAgICAgICAgICAgICAgICBzZWxlY3RfaWYobmVnYXRlKGlzLmxvZ2ljYWwpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgICAgICAgICAgKQoKCnRvc2hpYmFfb25lX2hvdCA8LQogIHRvc2hpYmEgJT4lIAogIHNlbGVjdChmYWlsdXJlLAogICAgICAgICBkYXRlLAogICAgICAgICAjIHNlcmlhbF9udW1iZXIsCiAgICAgICAgIG1hbnUpICU+JSAKICBiaW5kX2NvbHMocHJlZGljdChvYmplY3QgPSBvbmVfaG90LAogICAgICAgICAgICAgICAgICAgIG5ld2RhdGEgPSB0b3NoaWJhICU+JSAKICAgICAgICAgICAgICAgICAgICAgIHNlbGVjdCgtZGF0ZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAtc2VyaWFsX251bWJlciwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAtbWFudQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICkgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgc2VsZWN0X2lmKG5lZ2F0ZShpcy5sb2dpY2FsKQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApCiAgICAgICAgICAgICAgICAgICAgKSAlPiUgCiAgICAgICAgICAgICAgYXMuZGF0YS5mcmFtZSgpCiAgICAgICAgICAgICkKCm1lc3NhZ2UoInRvc2hpYmFfb25lX2hvdCIpCnN0cih0b3NoaWJhX29uZV9ob3QpCnN1bW1hcnkodG9zaGliYV9vbmVfaG90KQoKCnJtKG9uZV9ob3QsIHRvc2hpYmEpCgpgYGAKICAKICAgIAogIENyZWF0ZSBzZXBhcmF0ZSB0cmFpbl92YWxpZCBhbmQgdGhlIHRlc3QgZGF0YSBzZXRzLgpgYGB7cn0KCnNldC5zZWVkKDEyMzQ1Njc4OSkKdHJhaW5faW5kZXggPC0KICBjcmVhdGVEYXRhUGFydGl0aW9uKHkgPSB0b3NoaWJhX29uZV9ob3QkZmFpbHVyZSwKICAgICAgICAgICAgICAgICAgICAgIHAgPSAuNzAsCiAgICAgICAgICAgICAgICAgICAgICB0aW1lcyA9IDEsCiAgICAgICAgICAgICAgICAgICAgICBsaXN0ID0gRkFMU0UKICAgICAgICAgICAgICAgICAgICAgICkKCgpoZDJfdHJhaW5fdmFsaWQgPC0KICB0b3NoaWJhX29uZV9ob3RbdHJhaW5faW5kZXgsIF0gJT4lIAogIHNlbGVjdCgtZGF0ZSkKCmhkMl90ZXN0IDwtCiAgdG9zaGliYV9vbmVfaG90Wy10cmFpbl9pbmRleCwgXSAlPiUKICBzZWxlY3QoLWRhdGUpCgpgYGAKICAKICAgIAogIENyZWF0ZSBzZXBhcmF0ZSB0cmFpbiBhbmQgdmFsaWQgZGF0YXNldHMuCmBgYHtyfQoKc2V0LnNlZWQoMTIzNDU2Nzg5KQp0cmFpbl9pbmRleCA8LQogIGNyZWF0ZURhdGFQYXJ0aXRpb24oeSA9IGhkMl90cmFpbl92YWxpZCRmYWlsdXJlLAogICAgICAgICAgICAgICAgICAgICAgcCA9IC43MCwKICAgICAgICAgICAgICAgICAgICAgIHRpbWVzID0gMSwKICAgICAgICAgICAgICAgICAgICAgIGxpc3QgPSBGQUxTRQogICAgICAgICAgICAgICAgICAgICAgKQoKCmhkMl90cmFpbiA8LSBoZDJfdHJhaW5fdmFsaWRbdHJhaW5faW5kZXgsIF0KaGQyX3ZhbGlkIDwtIGhkMl90cmFpbl92YWxpZFstdHJhaW5faW5kZXgsIF0KCm1lc3NhZ2UoImhkMl90cmFpbiRmYWlsdXJlIikKdGFibGUoaGQyX3RyYWluJGZhaWx1cmUpCnByb3AudGFibGUodGFibGUoaGQyX3RyYWluJGZhaWx1cmUpKQpjbGFzcyhoZDJfdHJhaW4pCmdsaW1wc2UoaGQyX3RyYWluKQoKYGBgCiAgCiAgICAKICBVc2UgdGhlIGByZWNpcGVgIHBhY2thZ2UgZm9yIG11bHRpcGxlIGZlYXR1cmUgZW5naW5lZXJpbmcgc3RlcHMuCmBgYHtyfQoKIyBjcmVhdGUgdGhlIHJlY2lwZSBvYmplY3QKcmVjX29iaiA8LSByZWNpcGUoZmFpbHVyZSB+IC4sIGRhdGEgPSBoZDJfdHJhaW4pCgoKIyBpbXBsZW1lbnQgZWFjaCByZWNpcGUgc3RlcApyZWNfc3RlcHMgPC0KICByZWNfb2JqICU+JSAKICBzdGVwX3NodWZmbGUoYWxsX3ByZWRpY3RvcnMoKSkgJT4lIAogIHN0ZXBfbnp2KGFsbF9wcmVkaWN0b3JzKCkpICU+JSAgIyBoZWxwcyByZWR1Y2UgdGhlIG51bWJlciBvZiB2YXJpYWJsZXMgd2l0aCBuZWFyIHplcm8gdmFyaWFuY2UgKGluY2x1ZGluZyBOQSB2YWx1ZXMpCiAgc3RlcF9sb2coYWxsX3ByZWRpY3RvcnMoKSwgb2Zmc2V0ID0gMSkgJT4lICAjIHB1dHMgbGVzcyBlbXBoYXNpcyBvbiAib3V0bGllcnMiCiAgc3RlcF9jZW50ZXIoYWxsX3ByZWRpY3RvcnMoKSkgJT4lCiAgc3RlcF9zY2FsZShhbGxfcHJlZGljdG9ycygpKSAlPiUgCiAgc3RlcF9tZWRpYW5pbXB1dGUoYWxsX3ByZWRpY3RvcnMoKSkKCnJlY19zdGVwcwoKCiMgY3JlYXRlIHRoZSByZWNpcGUgYmFzZWQgb24gdGhlIFRSQUlOIGRhdGEgc2V0CnRyYWluZWRfcmVjIDwtIHByZXAocmVjX3N0ZXBzLCB0cmFpbmluZyA9IGhkMl90cmFpbikKCnRyYWluZWRfcmVjCgoKIyBhcHBseSB0aGUgcmVjaXBlIHRvIHRoZSB0aGUgdHJhaW4sIHZhbGlkLCBhbmQgdGVzdCBkYXRhc2V0cwp0cmFpbl9kYXRhIDwtIGJha2UodHJhaW5lZF9yZWMsIG5ld19kYXRhID0gaGQyX3RyYWluKQp2YWxpZF9kYXRhICA8LSBiYWtlKHRyYWluZWRfcmVjLCBuZXdfZGF0YSA9IGhkMl92YWxpZCkKdGVzdF9kYXRhICA8LSBiYWtlKHRyYWluZWRfcmVjLCBuZXdfZGF0YSA9IGhkMl90ZXN0KQoKCiMgVmlldyhoZDJfdHJhaW4gJT4lIGhlYWQoMTAwMCkpCiMgVmlldyh0cmFpbl9kYXRhICU+JSBoZWFkKDEwMDApKQoKCm1lc3NhZ2UoInRyYWluX2RhdGEiKQpkaW0odHJhaW5fZGF0YSkKc3RyKHRyYWluX2RhdGEpCnN1bW1hcnkodHJhaW5fZGF0YSkKCm1lc3NhZ2UoInZhbGlkX2RhdGEiKQpzdW1tYXJ5KHZhbGlkX2RhdGEpCgptZXNzYWdlKCJ0ZXN0X2RhdGEiKQpzdW1tYXJ5KHRlc3RfZGF0YSkKCmBgYAogIAogICAgCiAgQWRkIGB3ZWlnaHRfY29sYCwgYXMgdGhpcyBpcyBhIHBvdGVudGlhbCBwYXJhbWV0ZXIgdG8gYmUgdXNlZC4gVGhlIHZsYXVlIG9mIDEwMCBpcyBhcmJpdHJhcmlseSBjaG9zZW4gdG8gbWFrZSBwcmVkaWN0aW5nIGBmYWlsdXJlYCAxMDAgdGltZXMgbW9yZSBpbXBvcnRhbnQgdGhhbiBwcmVkaWN0aW5nIGBub25fZmFpbHVyZWAuCmBgYHtyfQoKdHJhaW5fZGF0YSA8LQogIHRyYWluX2RhdGEgJT4lCiAgbXV0YXRlKHdlaWdodF9jb2wgPSBjYXNlX3doZW4oZmFpbHVyZSA9PSAxIH4gMTAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZhaWx1cmUgPT0gMCB+IDEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgVFJVRSB+IDAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApCiAgICAgICAgICkKCnZhbGlkX2RhdGEgPC0KICB2YWxpZF9kYXRhICU+JQogIG11dGF0ZSh3ZWlnaHRfY29sID0gY2FzZV93aGVuKGZhaWx1cmUgPT0gMSB+IDEwMCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmYWlsdXJlID09IDAgfiAxLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFRSVUUgfiAwCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKQogICAgICAgICApCgp0ZXN0X2RhdGEgPC0KICB0ZXN0X2RhdGEgJT4lCiAgbXV0YXRlKHdlaWdodF9jb2wgPSBjYXNlX3doZW4oZmFpbHVyZSA9PSAxIH4gMTAwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZhaWx1cmUgPT0gMCB+IDEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgVFJVRSB+IDAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApCiAgICAgICAgICkKCm1lc3NhZ2UoInRyYWluX2RhdGEiKQpkaW0odHJhaW5fZGF0YSkKc3VtbWFyeSh0cmFpbl9kYXRhKQoKbWVzc2FnZSgidmFsaWRfZGF0YSIpCnN1bW1hcnkodmFsaWRfZGF0YSkKCm1lc3NhZ2UoInRlc3RfZGF0YSIpCnN1bW1hcnkodGVzdF9kYXRhKQoKYGBgCiAgCiAgICAKICBFeHBsb3JlIG1pc3NpbmcgZGF0YSwgYW5kIHJlbW92ZSBhbnkgdmFyaWFibGVzIHdoZXJlIG1vcmUgbW9yZSB0aGFuIDUwJSBvZiB0aGUgdmFsdWVzIGFyZSBtaXNzaW5nLgpgYGB7cn0KCiMgVmlzdWFsaXplIG1pc3NpbmcgZGF0YQp2aXNfbWlzcyh0cmFpbl9kYXRhICU+JSBzYW1wbGVfZnJhYyhzaXplID0gLjEwKSkKbWlzc192YXJfc3VtbWFyeSh0cmFpbl9kYXRhKQoKCiMgUmVtb3ZlIHZhcmlhYmxlcyB3aGVyZSBtb3JlIHRoYW4gNTAlIG9mIHRoZSB2YWx1ZXMgYXJlIG1pc3NpbmcuIFRoaXMgaXMgZG9uZSBmb3IgdHJhaW4sIHZhbGlkLCBhbmQgdGVzdCBkYXRhc2V0cy4KbWlzc181MHBjdF9iZWxvdyA8LQogIG1pc3NfdmFyX3N1bW1hcnkodHJhaW5fZGF0YSkgJT4lIAogIGZpbHRlcihwY3RfbWlzcyA8PSA1MCkgJT4lIAogIHB1bGwodmFyaWFibGUpCgptaXNzXzUwcGN0X2JlbG93CgoKdHJhaW5fZGF0YSA8LQogIHRyYWluX2RhdGEgJT4lCiAgc2VsZWN0KG9uZV9vZihtaXNzXzUwcGN0X2JlbG93KQogICAgICAgICApCgp2YWxpZF9kYXRhIDwtCiAgdmFsaWRfZGF0YSAlPiUKICBzZWxlY3Qob25lX29mKG1pc3NfNTBwY3RfYmVsb3cpCiAgICAgICAgICkKCnRlc3RfZGF0YSA8LQogIHRlc3RfZGF0YSAlPiUKICBzZWxlY3Qob25lX29mKG1pc3NfNTBwY3RfYmVsb3cpCiAgICAgICAgICkKCgpkaW0odHJhaW5fZGF0YSkKZGltKHZhbGlkX2RhdGEpCmRpbSh0ZXN0X2RhdGEpCgpgYGAKICAKICAgIAogIFVzZSBgRE13Ujo6U01PVEVgIHRvIHJlYmFsYW5jZSB0aGUgY2xhc2VzcyBvZiBgZmFpbHVyZWAuCmBgYHtyfQoKbWVzc2FnZSgidHJhaW5fZGF0YSRmYWlsdXJlIikKc3RyKHRyYWluX2RhdGEpCnN1bW1hcnkodHJhaW5fZGF0YSkKdGFibGUodHJhaW5fZGF0YSRmYWlsdXJlKQpwcm9wLnRhYmxlKHRhYmxlKHRyYWluX2RhdGEkZmFpbHVyZSkpCgpzZXQuc2VlZCgxMjM0NTY3ODkpCnRyYWluX2RhdGFfU01PVEUgPC0KICBTTU9URShmYWlsdXJlIH4gLiwKICAgICAgICBkYXRhICA9IGFzLmRhdGEuZnJhbWUodHJhaW5fZGF0YSksCiAgICAgICAgayA9IDEwLAogICAgICAgIHBlcmMub3ZlciA9IDEwMDAwMCwKICAgICAgICBwZXJjLnVuZGVyID0gMTMwMAogICAgICAgICkKCm1lc3NhZ2UoInRyYWluX2RhdGFfU01PVEUkZmFpbHVyZSIpCnRhYmxlKHRyYWluX2RhdGFfU01PVEUkZmFpbHVyZSkKcHJvcC50YWJsZSh0YWJsZSh0cmFpbl9kYXRhX1NNT1RFJGZhaWx1cmUpKQpjbGFzcyh0cmFpbl9kYXRhX1NNT1RFKQpnbGltcHNlKHRyYWluX2RhdGFfU01PVEUpCnN1bW1hcnkodHJhaW5fZGF0YV9TTU9URSkKCmBgYAoKCiMjIyBNb2RlbGluZyB3aXRoIEgyTyAgCiAgCiAgU3RhcnQgaDJvLgpgYGB7cn0KCmgyby5pbml0KCkKCmgyby5ub19wcm9ncmVzcygpICMgVHVybiBvZmYgcHJvZ3Jlc3MgYmFycwoKYGBgCiAgCiAgICAKICBDb252ZXJ0IHRvIHRoZSBkYXRhc2V0cyB0byBoMm8gb2JqZWN0cy4KYGBge3J9CgojIE11bHRpcGxlIHNldHMgKHRyYWluLCB0cmFpbl9ub193ZWlnaHQsIFNNT1RFLCBTTU9URV9ub193ZWlnaHQpIGFyZSB1c2VkIHRvIHRlc3QgdGhlIGVmZmVjdHMgb2YgdGhlc2UgZGlmZmVyZW50IGRhdGFzZXRzLgp0cmFpbl9oMm8gPC0gYXMuaDJvKHRyYWluX2RhdGEpCnRyYWluX2gyb19ub193ZWlnaHQgPC0gYXMuaDJvKHRyYWluX2RhdGEgJT4lIHNlbGVjdCgtd2VpZ2h0X2NvbCkpCnRyYWluX2gyb19TTU9URSA8LSBhcy5oMm8odHJhaW5fZGF0YV9TTU9URSkKdHJhaW5faDJvX1NNT1RFX25vX3dlaWdodCA8LSBhcy5oMm8odHJhaW5fZGF0YV9TTU9URSAlPiUgc2VsZWN0KC13ZWlnaHRfY29sKSkKCnZhbGlkX2gybyA8LSBhcy5oMm8odmFsaWRfZGF0YSkKdmFsaWRfaDJvX25vX3dlaWdodCA8LSBhcy5oMm8odmFsaWRfZGF0YSAlPiUgc2VsZWN0KC13ZWlnaHRfY29sKSkKCnRlc3RfaDJvICA8LSBhcy5oMm8odGVzdF9kYXRhKQp0ZXN0X2gyb19ub193ZWlnaHQgIDwtIGFzLmgybyh0ZXN0X2RhdGEgJT4lIHNlbGVjdCgtd2VpZ2h0X2NvbCkpCgoKIyBTYXZlIHRoZSBkYXRhCnNhdmVSRFModHJhaW5fZGF0YSwKICAgICAgICBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICIvRGF0YS9Qcm9jZXNzZWQvIiwKICAgICAgICAgICAgICAgInRyYWluX2RhdGEucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKCnNhdmVSRFModHJhaW5fZGF0YSAlPiUgCiAgICAgICAgICBzZWxlY3QoLXdlaWdodF9jb2wpLAogICAgICAgIHBhc3RlMCh3ZCwKICAgICAgICAgICAgICAgIi9EYXRhL1Byb2Nlc3NlZC8iLAogICAgICAgICAgICAgICAidHJhaW5fZGF0YV9ub193ZWlnaHQucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKCnNhdmVSRFModmFsaWRfZGF0YSwKICAgICAgICBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICIvRGF0YS9Qcm9jZXNzZWQvIiwKICAgICAgICAgICAgICAgInZhbGlkX2RhdGEucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKc2F2ZVJEUyh2YWxpZF9kYXRhICU+JSAKICAgICAgICAgIHNlbGVjdCgtd2VpZ2h0X2NvbCksCiAgICAgICAgcGFzdGUwKHdkLAogICAgICAgICAgICAgICAiL0RhdGEvUHJvY2Vzc2VkLyIsCiAgICAgICAgICAgICAgICJ2YWxpZF9kYXRhX25vX3dlaWdodC5yZHMiCiAgICAgICAgICAgICAgICkKICAgICAgICApCgoKc2F2ZVJEUyh0ZXN0X2RhdGEsCiAgICAgICAgcGFzdGUwKHdkLAogICAgICAgICAgICAgICAiL0RhdGEvUHJvY2Vzc2VkLyIsCiAgICAgICAgICAgICAgICJ0ZXN0X2RhdGEucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKc2F2ZVJEUyh0ZXN0X2RhdGEgJT4lIAogICAgICAgICAgc2VsZWN0KC13ZWlnaHRfY29sKSwKICAgICAgICBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICIvRGF0YS9Qcm9jZXNzZWQvIiwKICAgICAgICAgICAgICAgInRlc3RfZGF0YV9ub193ZWlnaHQucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKYGBgCiAgCiAgICAKICBTZXQgdGhlIHJlbGV2YW50IHZhcmlhYmxlIG5hbWVzLgpgYGB7cn0KCnkgPC0gImZhaWx1cmUiCgp4IDwtCiAgc2V0ZGlmZihuYW1lcyh0cmFpbl9oMm8pLAogICMgc2V0ZGlmZihuYW1lcyh0cmFpbl9oMm9fbm9fd2VpZ2h0KSwKICAjIHNldGRpZmYobmFtZXModHJhaW5faDJvX1NNT1RFKSwKICAjIHNldGRpZmYobmFtZXModHJhaW5faDJvX1NNT1RFX25vX3dlaWdodCksCiAgICAgICAgICB5CiAgICAgICAgICApCgpgYGAKICAKICAgIAogIFJ1biBgaDJvLmF1dG9tbGAuICAKICAgIAogICoqTk9URVMqKiAgCiAgICAKICAxKSBgdHJhaW5faDJvYCB1c2luZyBgd2VpZ2h0X2NvbGAgJiBgYmFsYW5jZV9jbGFzc2VzYCA9IFRSVUUgZG9lcyBub3QgZnVuY3Rpb24gLSBmaW5kcyBhIHF1aWNrICJzb2x1dGlvbiIgIAogICAgCiAgMikgYHRyYWluX2gyb2AgTk9UIHVzaW5nIGB3ZWlnaHRfY29sYCAmIGBiYWxhbmNlX2NsYXNzZXNgID0gVFJVRSBkb2VzIGZ1bmN0aW9uCjwhLS0gQ29uZnVzaW9uIE1hdHJpeCBhbmQgU3RhdGlzdGljcyAtLT4KCjwhLS0gICAgICAgICAgIFJlZmVyZW5jZSAtLT4KPCEtLSBQcmVkaWN0aW9uICAgICAwICAgICAxIC0tPgo8IS0tICAgICAgICAgIDAgNTM2MDMgICAgIDQgLS0+CjwhLS0gICAgICAgICAgMSAgODIyOSAgICAgMiAtLT4KCjwhLS0gICAgICAgICAgICAgICAgQWNjdXJhY3kgOiAwLjg2NjkgICAgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgOTUlIENJIDogKDAuODY0MiwgMC44Njk1KSAtLT4KPCEtLSAgICAgTm8gSW5mb3JtYXRpb24gUmF0ZSA6IDAuOTk5OSAgICAgICAgICAgLS0+CjwhLS0gICAgIFAtVmFsdWUgW0FjYyA+IE5JUl0gOiAxICAgICAgICAgICAgICAgIC0tPgoKPCEtLSAgICAgICAgICAgICAgICAgICBLYXBwYSA6IDNlLTA0ICAgICAgICAgICAgLS0+Cgo8IS0tICBNY25lbWFyJ3MgVGVzdCBQLVZhbHVlIDogPDJlLTE2ICAgICAgICAgICAtLT4KCjwhLS0gICAgICAgICAgICAgU2Vuc2l0aXZpdHkgOiAzLjMzM2UtMDEgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgIFNwZWNpZmljaXR5IDogOC42NjllLTAxICAgICAgICAtLT4KPCEtLSAgICAgICAgICBQb3MgUHJlZCBWYWx1ZSA6IDIuNDMwZS0wNCAgICAgICAgLS0+CjwhLS0gICAgICAgICAgTmVnIFByZWQgVmFsdWUgOiA5Ljk5OWUtMDEgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgUHJlY2lzaW9uIDogMi40MzBlLTA0ICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgICAgIFJlY2FsbCA6IDMuMzMzZS0wMSAgICAgICAgLS0+CjwhLS0gICAgICAgICAgICAgICAgICAgICAgRjEgOiA0Ljg1NmUtMDQgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICBQcmV2YWxlbmNlIDogOS43MDNlLTA1ICAgICAgICAtLT4KPCEtLSAgICAgICAgICBEZXRlY3Rpb24gUmF0ZSA6IDMuMjM0ZS0wNSAgICAgICAgLS0+CjwhLS0gICAgRGV0ZWN0aW9uIFByZXZhbGVuY2UgOiAxLjMzMWUtMDEgICAgICAgIC0tPgo8IS0tICAgICAgIEJhbGFuY2VkIEFjY3VyYWN5IDogNi4wMDFlLTAxICAgICAgICAtLT4KCjwhLS0gICAgICAgICdQb3NpdGl2ZScgQ2xhc3MgOiAxICAtLT4gIAogIAogICoqTU9ERUwgVVNFRDogMykgYHRyYWluX2gyb2AgdXNpbmcgYHdlaWdodF9jb2xgICYgYGJhbGFuY2VfY2xhc3Nlc2AgPSBGQUxTRSoqCjwhLS0gQ29uZnVzaW9uIE1hdHJpeCBhbmQgU3RhdGlzdGljcyAtLT4KCjwhLS0gICAgICAgICAgIFJlZmVyZW5jZSAtLT4KPCEtLSBQcmVkaWN0aW9uICAgICAwICAgICAxIC0tPgo8IS0tICAgICAgICAgIDAgNDM5MDEgICAgIDIgLS0+CjwhLS0gICAgICAgICAgMSAxNzkzMSAgICAgNCAtLT4KCjwhLS0gICAgICAgICAgICAgICAgQWNjdXJhY3kgOiAwLjcxICAgICAgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgOTUlIENJIDogKDAuNzA2NCwgMC43MTM2KSAtLT4KPCEtLSAgICAgTm8gSW5mb3JtYXRpb24gUmF0ZSA6IDAuOTk5OSAgICAgICAgICAgLS0+CjwhLS0gICAgIFAtVmFsdWUgW0FjYyA+IE5JUl0gOiAxICAgICAgICAgICAgICAgIC0tPgoKPCEtLSAgICAgICAgICAgICAgICAgICBLYXBwYSA6IDNlLTA0ICAgICAgICAgICAgLS0+Cgo8IS0tICBNY25lbWFyJ3MgVGVzdCBQLVZhbHVlIDogPDJlLTE2ICAgICAgICAgICAtLT4KCjwhLS0gICAgICAgICAgICAgU2Vuc2l0aXZpdHkgOiA2LjY2N2UtMDEgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgIFNwZWNpZmljaXR5IDogNy4xMDBlLTAxICAgICAgICAtLT4KPCEtLSAgICAgICAgICBQb3MgUHJlZCBWYWx1ZSA6IDIuMjMwZS0wNCAgICAgICAgLS0+CjwhLS0gICAgICAgICAgTmVnIFByZWQgVmFsdWUgOiAxLjAwMGUrMDAgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgUHJlY2lzaW9uIDogMi4yMzBlLTA0ICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgICAgIFJlY2FsbCA6IDYuNjY3ZS0wMSAgICAgICAgLS0+CjwhLS0gICAgICAgICAgICAgICAgICAgICAgRjEgOiA0LjQ1OWUtMDQgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICBQcmV2YWxlbmNlIDogOS43MDNlLTA1ICAgICAgICAtLT4KPCEtLSAgICAgICAgICBEZXRlY3Rpb24gUmF0ZSA6IDYuNDY5ZS0wNSAgICAgICAgLS0+CjwhLS0gICAgRGV0ZWN0aW9uIFByZXZhbGVuY2UgOiAyLjkwMGUtMDEgICAgICAgIC0tPgo8IS0tICAgICAgIEJhbGFuY2VkIEFjY3VyYWN5IDogNi44ODNlLTAxICAgICAgICAtLT4KCjwhLS0gICAgICAgICdQb3NpdGl2ZScgQ2xhc3MgOiAxIC0tPiAgCiAgCiAgNCkgYHRyYWluX2gyb19TTU9URWAgdXNpbmcgYHdlaWdodF9jb2xgICYgYGJhbGFuY2VfY2xhc3Nlc2AgPSBGQUxTRQo8IS0tIENvbmZ1c2lvbiBNYXRyaXggYW5kIFN0YXRpc3RpY3MgLS0+Cgo8IS0tICAgICAgICAgICBSZWZlcmVuY2UgLS0+CjwhLS0gUHJlZGljdGlvbiAgICAgMCAgICAgMSAtLT4KPCEtLSAgICAgICAgICAwIDM4NjY4ICAgICAzIC0tPgo8IS0tICAgICAgICAgIDEgMjMxNjQgICAgIDMgLS0+Cgo8IS0tICAgICAgICAgICAgICAgIEFjY3VyYWN5IDogMC42MjU0ICAgICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgICAgIDk1JSBDSSA6ICgwLjYyMTUsIDAuNjI5MikgLS0+CjwhLS0gICAgIE5vIEluZm9ybWF0aW9uIFJhdGUgOiAwLjk5OTkgICAgICAgICAgIC0tPgo8IS0tICAgICBQLVZhbHVlIFtBY2MgPiBOSVJdIDogMSAgICAgICAgICAgICAgICAtLT4KCjwhLS0gICAgICAgICAgICAgICAgICAgS2FwcGEgOiAxZS0wNCAgICAgICAgICAgIC0tPgoKPCEtLSAgTWNuZW1hcidzIFRlc3QgUC1WYWx1ZSA6IDwyZS0xNiAgICAgICAgICAgLS0+Cgo8IS0tICAgICAgICAgICAgIFNlbnNpdGl2aXR5IDogNS4wMDBlLTAxICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICBTcGVjaWZpY2l0eSA6IDYuMjU0ZS0wMSAgICAgICAgLS0+CjwhLS0gICAgICAgICAgUG9zIFByZWQgVmFsdWUgOiAxLjI5NWUtMDQgICAgICAgIC0tPgo8IS0tICAgICAgICAgIE5lZyBQcmVkIFZhbHVlIDogOS45OTllLTAxICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgIFByZWNpc2lvbiA6IDEuMjk1ZS0wNCAgICAgICAgLS0+CjwhLS0gICAgICAgICAgICAgICAgICBSZWNhbGwgOiA1LjAwMGUtMDEgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgICAgIEYxIDogMi41ODllLTA0ICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgUHJldmFsZW5jZSA6IDkuNzAzZS0wNSAgICAgICAgLS0+CjwhLS0gICAgICAgICAgRGV0ZWN0aW9uIFJhdGUgOiA0Ljg1MWUtMDUgICAgICAgIC0tPgo8IS0tICAgIERldGVjdGlvbiBQcmV2YWxlbmNlIDogMy43NDZlLTAxICAgICAgICAtLT4KPCEtLSAgICAgICBCYWxhbmNlZCBBY2N1cmFjeSA6IDUuNjI3ZS0wMSAgICAgICAgLS0+Cgo8IS0tICAgICAgICAnUG9zaXRpdmUnIENsYXNzIDogMSAgLS0+ICAKICAKICA1KSBgdHJhaW5faDJvX1NNT1RFX25vX3dlaWdodGAgTk9UIHVzaW5nIGB3ZWlnaHRfY29sYCAmIGBiYWxhbmNlX2NsYXNzZXNgID0gVFJVRQo8IS0tIENvbmZ1c2lvbiBNYXRyaXggYW5kIFN0YXRpc3RpY3MgLS0+Cgo8IS0tICAgICAgICAgICBSZWZlcmVuY2UgLS0+CjwhLS0gUHJlZGljdGlvbiAgICAgMCAgICAgMSAtLT4KPCEtLSAgICAgICAgICAwICAzODk5ICAgICAwIC0tPgo8IS0tICAgICAgICAgIDEgNTc5MzMgICAgIDYgLS0+Cgo8IS0tICAgICAgICAgICAgICAgIEFjY3VyYWN5IDogMC4wNjMxICAgICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgICAgIDk1JSBDSSA6ICgwLjA2MTIsIDAuMDY1MSkgLS0+CjwhLS0gICAgIE5vIEluZm9ybWF0aW9uIFJhdGUgOiAwLjk5OTkgICAgICAgICAgIC0tPgo8IS0tICAgICBQLVZhbHVlIFtBY2MgPiBOSVJdIDogMSAgICAgICAgICAgICAgICAtLT4KCjwhLS0gICAgICAgICAgICAgICAgICAgS2FwcGEgOiAwICAgICAgICAgICAgICAgIC0tPgoKPCEtLSAgTWNuZW1hcidzIFRlc3QgUC1WYWx1ZSA6IDwyZS0xNiAgICAgICAgICAgLS0+Cgo8IS0tICAgICAgICAgICAgIFNlbnNpdGl2aXR5IDogMS4wMDBlKzAwICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICBTcGVjaWZpY2l0eSA6IDYuMzA2ZS0wMiAgICAgICAgLS0+CjwhLS0gICAgICAgICAgUG9zIFByZWQgVmFsdWUgOiAxLjAzNmUtMDQgICAgICAgIC0tPgo8IS0tICAgICAgICAgIE5lZyBQcmVkIFZhbHVlIDogMS4wMDBlKzAwICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgIFByZWNpc2lvbiA6IDEuMDM2ZS0wNCAgICAgICAgLS0+CjwhLS0gICAgICAgICAgICAgICAgICBSZWNhbGwgOiAxLjAwMGUrMDAgICAgICAgIC0tPgo8IS0tICAgICAgICAgICAgICAgICAgICAgIEYxIDogMi4wNzFlLTA0ICAgICAgICAtLT4KPCEtLSAgICAgICAgICAgICAgUHJldmFsZW5jZSA6IDkuNzAzZS0wNSAgICAgICAgLS0+CjwhLS0gICAgICAgICAgRGV0ZWN0aW9uIFJhdGUgOiA5LjcwM2UtMDUgICAgICAgIC0tPgo8IS0tICAgIERldGVjdGlvbiBQcmV2YWxlbmNlIDogOS4zNjllLTAxICAgICAgICAtLT4KPCEtLSAgICAgICBCYWxhbmNlZCBBY2N1cmFjeSA6IDUuMzE1ZS0wMSAgICAgICAgLS0+Cgo8IS0tICAgICAgICAnUG9zaXRpdmUnIENsYXNzIDogMSAgIC0tPgogICAgICAgCmBgYHtyfQoKIyB1c2VyICAgc3lzdGVtICBlbGFwc2VkIAojICAgMjIuMzU1ICAgIDcuODI2IDEzMTAuNzM4CiMgfiAyMiBtaW4KCnN0YXJ0IDwtIHByb2MudGltZSgpCmF1dG9tbF9tb2RlbHNfaDJvIDwtCiAgaDJvLmF1dG9tbCh4ID0geCwKICAgICAgICAgICAgIHkgPSB5LAogICAgICAgICAgICAgdHJhaW5pbmdfZnJhbWUgPSB0cmFpbl9oMm8sCiAgICAgICAgICAgICAjIHRyYWluaW5nX2ZyYW1lID0gdHJhaW5faDJvX1NNT1RFX25vX3dlaWdodCwKICAgICAgICAgICAgIHZhbGlkYXRpb25fZnJhbWUgPSB2YWxpZF9oMm8sCiAgICAgICAgICAgICBsZWFkZXJib2FyZF9mcmFtZSA9IHRlc3RfaDJvLAogICAgICAgICAgICAgbmZvbGRzID0gMTAsCiAgICAgICAgICAgICAjIGJhbGFuY2VfY2xhc3NlcyA9IFRSVUUsCiAgICAgICAgICAgICBiYWxhbmNlX2NsYXNzZXMgPSBGQUxTRSwKICAgICAgICAgICAgIHdlaWdodHNfY29sdW1uID0gIndlaWdodF9jb2wiLAogICAgICAgICAgICAgbWF4X3J1bnRpbWVfc2VjcyA9IDM2MDAsICMgMSBob3VyCiAgICAgICAgICAgICAjIG1heF9ydW50aW1lX3NlY3MgPSA2MCwKICAgICAgICAgICAgIG1heF9tb2RlbHMgPSAxMCwKICAgICAgICAgICAgIHN0b3BwaW5nX21ldHJpYyA9ICJBVVRPIiwKICAgICAgICAgICAgIHNlZWQgPSAxMjM0NTY3ODkKICAgICAgICAgICAgICkKCmgyby50aW1lIDwtIHByb2MudGltZSgpIC0gc3RhcnQKaDJvLnRpbWUKCgpybShzdGFydCkKCmBgYAogIAogICAgCiAgUHVsbCBvdXQgdGhlICJsZWFkZXIiIG1vZGVsLgpgYGB7cn0KCmF1dG9tbF9sZWFkZXIgPC0gYXV0b21sX21vZGVsc19oMm9AbGVhZGVyCgphdXRvbWxfbGVhZGVyCgoKIyBzYXZlIGVhY2ggaW5kaXZpZHVhbCBtb2RlbAptb2RlbF9wYXRoX2gybyA8LQogICBoMm8uc2F2ZU1vZGVsKG9iamVjdCA9IGF1dG9tbF9sZWFkZXIsCiAgICAgICAgICAgICAgICAgICAgIHBhdGggPSBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIi9Nb2RlbHMvIgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICksCiAgICAgICAgICAgICAgICAgICAgIGZvcmNlID0gVFJVRQogICAgICAgICAgICAgICAgICAgICApCgoKc2F2ZVJEUyhtb2RlbF9wYXRoX2gybywKICAgICAgICBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICIvTW9kZWxzLyIsCiAgICAgICAgICAgICAgICJtb2RlbF9wYXRoX2gyby5yZHMiCiAgICAgICAgICAgICAgICkKICAgICAgICApCgoKCiMgbG9hZCB0aGUgbW9kZWwKIyBtb2RlbF9wYXRoX2gybyA8LQojICAgcmVhZF9yZHMocGF0aCA9IHBhc3RlMCh3ZCwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgIi9Nb2RlbHMvIiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgIm1vZGVsX3BhdGhfaDJvLnJkcyIKIyAgICAgICAgICAgICAgICAgICAgICAgICAgKQojICAgICAgICAgICAgKQojIAojIHByaW50KG1vZGVsX3BhdGhfaDJvKQojIAojIGF1dG9tbF9sZWFkZXIgPC0KIyAgIGgyby5sb2FkTW9kZWwocGF0aCA9IG1vZGVsX3BhdGhfaDJvKQoKYGBgCiAgCiAgICAKICBJbnNwZWN0IHRoZSBsZWFkZXJib2FyZC4KYGBge3J9CgphdXRvbWxfbW9kZWxzX2gyb0BsZWFkZXJib2FyZAoKYGBgCgoKICBJbnZlc3RpZ2F0ZSB2YXJpYWJsZSBpbXBvcnRhbmNlIG9mIHRoZSBsZWFkZXIgbW9kZWwuCmBgYHtyfQoKbGVhZGVyX21vZGVsc192YXJpbXAgPC0gaDJvLnZhcmltcChvYmplY3QgPSBhdXRvbWxfbGVhZGVyKQoKbGVhZGVyX21vZGVsc192YXJpbXBfcGxvdCA8LSBoMm8udmFyaW1wX3Bsb3QobW9kZWwgPSBhdXRvbWxfbGVhZGVyLCBudW1fb2ZfZmVhdHVyZXMgPSAxMCkKCgpsZWFkZXJfbW9kZWxzX3ZhcmltcAoKbGVhZGVyX21vZGVsc192YXJpbXBfcGxvdAoKYGBgCiAKIAogQ3JlYXRlIHByZWRpY3Rpb25zIGZyb20gdGhlIG1vZGVscy4KYGBge3J9CgpwcmVkX2gybyA8LSBoMm8ucHJlZGljdChhdXRvbWxfbGVhZGVyLCBuZXdkYXRhID0gdGVzdF9oMm8pCgoKIyBzYXZlIGFzIGEgLnJkcyBmaWxlCnNhdmVSRFMocHJlZF9oMm8sCiAgICAgICAgcGFzdGUwKHdkLAogICAgICAgICAgICAgICAiL01vZGVscy8iLAogICAgICAgICAgICAgICAicHJlZF9oMm8ucmRzIgogICAgICAgICAgICAgICApCiAgICAgICAgKQoKIyBwcmVkX2gybyA8LQojICAgcmVhZF9yZHMocGF0aCA9IHBhc3RlMCh3ZCwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgIi9Nb2RlbHMvIiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgInByZWRfaDJvLnJkcyIKIyAgICAgICAgICAgICAgICAgICAgICAgICAgKQojICAgICAgICAgICAgKQoKYGBgCiAgCiAgICAKICBMb29rIGF0IHBlcmZvcm1hbmNlIHN0YXRzIHVzaW5nIHRoZSB0ZXN0IGRhdGEuCmBgYHtyfQoKcGVyZl9zdGF0c190ZXN0X2gybyA8LSBoMm8ucGVyZm9ybWFuY2UoYXV0b21sX2xlYWRlciwgbmV3ZGF0YSA9IHRlc3RfaDJvKQoKcGVyZl9zdGF0c190ZXN0X2gybwoKCiMgc2F2ZSBhcyBhIC5yZHMgZmlsZQpzYXZlUkRTKHBlcmZfc3RhdHNfdGVzdF9oMm8sCiAgICAgICAgcGFzdGUwKHdkLAogICAgICAgICAgICAgICAiL01vZGVscy8iLAogICAgICAgICAgICAgICAicGVyZl9zdGF0c190ZXN0X2gyby5yZHMiCiAgICAgICAgICAgICAgICkKICAgICAgICApCgojIHBlcmZfc3RhdHNfdGVzdF9oMm8gPC0KIyAgIHJlYWRfcmRzKHBhdGggPSBwYXN0ZTAod2QsCiMgICAgICAgICAgICAgICAgICAgICAgICAgICIvTW9kZWxzLyIsCiMgICAgICAgICAgICAgICAgICAgICAgICAgICJwZXJmX3N0YXRzX3Rlc3RfaDJvLnJkcyIKIyAgICAgICAgICAgICAgICAgICAgICAgICAgKQojICAgICAgICAgICAgKQoKYGBgCiAgCiAgICAKICBJbnZlc3RpZ2F0ZSB0ZXN0IGVycm9yLgpgYGB7cn0KCmVycm9yX3RibF9oMm8gPC0KICB0ZXN0X2RhdGEgJT4lIAogIGJpbmRfY29scyhwcmVkX2gybyAlPiUgYXNfdGliYmxlKCkKICAgICAgICAgICAgKSAlPiUgCiAgcmVuYW1lKG9icyA9IGZhaWx1cmUsCiAgICAgICAgIHByZWQgPSBwcmVkaWN0CiAgICAgICAgICkKCgpjb25mdXNpb25NYXRyaXgoZGF0YSA9IGVycm9yX3RibF9oMm8kcHJlZCwKICAgICAgICAgICAgICAgIHJlZmVyZW5jZSA9IGVycm9yX3RibF9oMm8kb2JzLAogICAgICAgICAgICAgICAgcG9zaXRpdmUgPSAiMSIsCiAgICAgICAgICAgICAgICBtb2RlID0gImV2ZXJ5dGhpbmciCiAgICAgICAgICAgICAgICApCgoKIyBzYXZlIGFzIGEgLnJkcyBmaWxlCnNhdmVSRFMoZXJyb3JfdGJsX2gybywKICAgICAgICBwYXN0ZTAod2QsCiAgICAgICAgICAgICAgICIvTW9kZWxzLyIsCiAgICAgICAgICAgICAgICJlcnJvcl90YmxfaDJvLnJkcyIKICAgICAgICAgICAgICAgKQogICAgICAgICkKCiMgZXJyb3JfdGJsX2gybyA8LQojICAgcmVhZF9yZHMocGF0aCA9IHBhc3RlMCh3ZCwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgIi9Nb2RlbHMvIiwKIyAgICAgICAgICAgICAgICAgICAgICAgICAgImVycm9yX3RibF9oMm8ucmRzIgojICAgICAgICAgICAgICAgICAgICAgICAgICApCiMgICAgICAgICAgICApCgpgYGAKCgo=