Investigating missing values

The target of this notebook is to show a way of dealing with missing values on the Housing Price dataset from kaggle.

Looking at the dataset

The first step before dealing with the missing values is to have a quick look into the dataset.

First, we load the tidyverse package:

## Importing packages
library(tidyverse)

And our datasets:

#load data
test = read_csv("house_test.csv")
train = read_csv("house_train.csv")

For personal preference, I convert all variables to lowercase (helps typing it quicker!):

# View the column names
names(train)<-tolower(names(train))
names(test)<-tolower(names(test))

And we combine both train and test datasets to analyse the missing values all at once, excluding our dependant variable:

train_variables <- train %>% 
  select(-saleprice)
all_data <- rbind(train_variables, test)

Additionally, it is good to have a perspective of the dimension of the dataset:

dim(all_data)
[1] 2919   80

We have 80 variables and 2919 observations. Now, it is time to check the structure of the data:

# Look at the structure using dplyr's glimpse()
glimpse(all_data)
Rows: 2,919
Columns: 80
$ id            <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, …
$ mssubclass    <dbl> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60, 20, 20, 20, 45, 20, 90…
$ mszoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RM", "RL", "RL", "RL",…
$ lotfrontage   <dbl> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, NA, 91, NA, 51, NA, 72,…
$ lotarea       <dbl> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, 6120, 7420, 11200,…
$ street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave",…
$ alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ lotshape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg", "IR1", "Reg", "Reg", "…
$ landcontour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "…
$ utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "…
$ lotconfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Inside", "Inside", "Corner…
$ landslope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "…
$ neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoRidge", "Mitchel", "Some…
$ condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm", "PosN", "Arter…
$ condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm",…
$ bldgtype      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam",…
$ housestyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story", "1.5Fin", "1Story", "…
$ overallqual   <dbl> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, 6, 4, 5, 5, 8, 7, 8, 5,…
$ overallcond   <dbl> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5, 5, 6, 5, 7, 5, 7,…
$ yearbuilt     <dbl> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931, 1939, 1965, 2005,…
$ yearremodadd  <dbl> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, 1950, 1950, 1965, 2006,…
$ roofstyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable",…
$ roofmatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "Comp…
$ exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "VinylSd", "VinylSd", "Viny…
$ exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "VinylSd", "VinylSd", "Viny…
$ masvnrtype    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace", "None", "Stone", "Ston…
$ masvnrarea    <dbl> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, 0, 306, 212, 0, 180, 0,…
$ exterqual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", "TA", "TA", "TA", "Ex",…
$ extercond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",…
$ foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "Wood", "PConc", "CBlock…
$ bsmtqual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", "TA", "TA", "TA", "Ex",…
$ bsmtcond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",…
$ bsmtexposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", "No", "No", "No", "No",…
$ bsmtfintype1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ", "Unf", "GLQ", "…
$ bsmtfinsf1    <dbl> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851, 906, 998, 737, 0, 733,…
$ bsmtfintype2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ", "Unf", "Unf", "…
$ bsmtfinsf2    <dbl> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ bsmtunfsf     <dbl> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140, 134, 177, 175, 1494, 5…
$ totalbsmtsf   <dbl> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952, 991, 1040, 1175, 912, …
$ heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA",…
$ heatingqc     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex",…
$ centralair    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "…
$ electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",…
$ `1stflrsf`    <dbl> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022, 1077, 1040, 1182, 912…
$ `2ndflrsf`    <dbl> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, 1142, 0, 0, 0, 0, 0, 0,…
$ lowqualfinsf  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ grlivarea     <dbl> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774, 1077, 1040, 2324,…
$ bsmtfullbath  <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,…
$ bsmthalfbath  <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ fullbath      <dbl> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 2, 1, 1, 3, 1, 2, 1,…
$ halfbath      <dbl> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,…
$ bedroomabvgr  <dbl> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 2, 2, 3, 3, 4, 3, 3, 3,…
$ kitchenabvgr  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,…
$ kitchenqual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", "TA", "TA", "TA", "Ex",…
$ totrmsabvgrd  <dbl> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5, 5, 6, 6, 6, 9, 6, 7, 6…
$ functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Min1", "Typ", …
$ fireplaces    <dbl> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1,…
$ fireplacequ   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA", "TA", NA, "Gd", NA, "…
$ garagetype    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd", "Attchd", "Attchd", "…
$ garageyrblt   <dbl> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, 1931, 1939, 1965, 2005,…
$ garagefinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn", "RFn", "Unf", "RFn", "…
$ garagecars    <dbl> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, 2, 2, 2, 1, 3, 1, 2, 2,…
$ garagearea    <dbl> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205, 384, 736, 352, 840, 3…
$ garagequal    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "Fa", "Gd", "TA", "TA",…
$ garagecond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",…
$ paveddrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "…
$ wooddecksf    <dbl> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, 140, 160, 0, 48, 0, 0, …
$ openporchsf   <dbl> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, 33, 213, 112, 0, 0, 102…
$ enclosedporch <dbl> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, …
$ `3ssnporch`   <dbl> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ screenporch   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ poolarea      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ poolqc        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, NA, NA, "GdWo", "G…
$ miscfeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, NA, NA, NA, NA, NA, NA,…
$ miscval       <dbl> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0, 0, 700, 500, 0, 0, 0, …
$ mosold        <dbl> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, 7, 3, 10, 6, 5, 11, 6, …
$ yrsold        <dbl> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, 2008, 2008, 2008, 2006,…
$ saletype      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "New"…
$ salecondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal", "Normal", "Normal", …

It seems we have some numerical data that is not numerical really (like “mssubclass”) but we won’t be analysing it in this notebook.

Next step is to get a summary of all the data, to see if we find any inconsistency:

summary(all_data)
       id           mssubclass       mszoning          lotfrontage        lotarea      
 Min.   :   1.0   Min.   : 20.00   Length:2919        Min.   : 21.00   Min.   :  1300  
 1st Qu.: 730.5   1st Qu.: 20.00   Class :character   1st Qu.: 59.00   1st Qu.:  7478  
 Median :1460.0   Median : 50.00   Mode  :character   Median : 68.00   Median :  9453  
 Mean   :1460.0   Mean   : 57.14                      Mean   : 69.31   Mean   : 10168  
 3rd Qu.:2189.5   3rd Qu.: 70.00                      3rd Qu.: 80.00   3rd Qu.: 11570  
 Max.   :2919.0   Max.   :190.00                      Max.   :313.00   Max.   :215245  
                                                      NA's   :486                      
    street             alley             lotshape         landcontour         utilities        
 Length:2919        Length:2919        Length:2919        Length:2919        Length:2919       
 Class :character   Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                                               
                                                                                               
                                                                                               
                                                                                               
  lotconfig          landslope         neighborhood        condition1         condition2       
 Length:2919        Length:2919        Length:2919        Length:2919        Length:2919       
 Class :character   Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                                               
                                                                                               
                                                                                               
                                                                                               
   bldgtype          housestyle         overallqual      overallcond      yearbuilt   
 Length:2919        Length:2919        Min.   : 1.000   Min.   :1.000   Min.   :1872  
 Class :character   Class :character   1st Qu.: 5.000   1st Qu.:5.000   1st Qu.:1954  
 Mode  :character   Mode  :character   Median : 6.000   Median :5.000   Median :1973  
                                       Mean   : 6.089   Mean   :5.565   Mean   :1971  
                                       3rd Qu.: 7.000   3rd Qu.:6.000   3rd Qu.:2001  
                                       Max.   :10.000   Max.   :9.000   Max.   :2010  
                                                                                      
  yearremodadd   roofstyle           roofmatl         exterior1st        exterior2nd       
 Min.   :1950   Length:2919        Length:2919        Length:2919        Length:2919       
 1st Qu.:1965   Class :character   Class :character   Class :character   Class :character  
 Median :1993   Mode  :character   Mode  :character   Mode  :character   Mode  :character  
 Mean   :1984                                                                              
 3rd Qu.:2004                                                                              
 Max.   :2010                                                                              
                                                                                           
  masvnrtype          masvnrarea      exterqual          extercond          foundation       
 Length:2919        Min.   :   0.0   Length:2919        Length:2919        Length:2919       
 Class :character   1st Qu.:   0.0   Class :character   Class :character   Class :character  
 Mode  :character   Median :   0.0   Mode  :character   Mode  :character   Mode  :character  
                    Mean   : 102.2                                                           
                    3rd Qu.: 164.0                                                           
                    Max.   :1600.0                                                           
                    NA's   :23                                                               
   bsmtqual           bsmtcond         bsmtexposure       bsmtfintype1         bsmtfinsf1    
 Length:2919        Length:2919        Length:2919        Length:2919        Min.   :   0.0  
 Class :character   Class :character   Class :character   Class :character   1st Qu.:   0.0  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median : 368.5  
                                                                             Mean   : 441.4  
                                                                             3rd Qu.: 733.0  
                                                                             Max.   :5644.0  
                                                                             NA's   :1       
 bsmtfintype2         bsmtfinsf2        bsmtunfsf       totalbsmtsf       heating         
 Length:2919        Min.   :   0.00   Min.   :   0.0   Min.   :   0.0   Length:2919       
 Class :character   1st Qu.:   0.00   1st Qu.: 220.0   1st Qu.: 793.0   Class :character  
 Mode  :character   Median :   0.00   Median : 467.0   Median : 989.5   Mode  :character  
                    Mean   :  49.58   Mean   : 560.8   Mean   :1051.8                     
                    3rd Qu.:   0.00   3rd Qu.: 805.5   3rd Qu.:1302.0                     
                    Max.   :1526.00   Max.   :2336.0   Max.   :6110.0                     
                    NA's   :1         NA's   :1        NA's   :1                          
  heatingqc          centralair         electrical           1stflrsf       2ndflrsf     
 Length:2919        Length:2919        Length:2919        Min.   : 334   Min.   :   0.0  
 Class :character   Class :character   Class :character   1st Qu.: 876   1st Qu.:   0.0  
 Mode  :character   Mode  :character   Mode  :character   Median :1082   Median :   0.0  
                                                          Mean   :1160   Mean   : 336.5  
                                                          3rd Qu.:1388   3rd Qu.: 704.0  
                                                          Max.   :5095   Max.   :2065.0  
                                                                                         
  lowqualfinsf        grlivarea     bsmtfullbath     bsmthalfbath        fullbath    
 Min.   :   0.000   Min.   : 334   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
 1st Qu.:   0.000   1st Qu.:1126   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000  
 Median :   0.000   Median :1444   Median :0.0000   Median :0.00000   Median :2.000  
 Mean   :   4.694   Mean   :1501   Mean   :0.4299   Mean   :0.06136   Mean   :1.568  
 3rd Qu.:   0.000   3rd Qu.:1744   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000  
 Max.   :1064.000   Max.   :5642   Max.   :3.0000   Max.   :2.00000   Max.   :4.000  
                                   NA's   :2        NA's   :2                        
    halfbath       bedroomabvgr   kitchenabvgr   kitchenqual         totrmsabvgrd   
 Min.   :0.0000   Min.   :0.00   Min.   :0.000   Length:2919        Min.   : 2.000  
 1st Qu.:0.0000   1st Qu.:2.00   1st Qu.:1.000   Class :character   1st Qu.: 5.000  
 Median :0.0000   Median :3.00   Median :1.000   Mode  :character   Median : 6.000  
 Mean   :0.3803   Mean   :2.86   Mean   :1.045                      Mean   : 6.452  
 3rd Qu.:1.0000   3rd Qu.:3.00   3rd Qu.:1.000                      3rd Qu.: 7.000  
 Max.   :2.0000   Max.   :8.00   Max.   :3.000                      Max.   :15.000  
                                                                                    
  functional          fireplaces     fireplacequ         garagetype         garageyrblt  
 Length:2919        Min.   :0.0000   Length:2919        Length:2919        Min.   :1895  
 Class :character   1st Qu.:0.0000   Class :character   Class :character   1st Qu.:1960  
 Mode  :character   Median :1.0000   Mode  :character   Mode  :character   Median :1979  
                    Mean   :0.5971                                         Mean   :1978  
                    3rd Qu.:1.0000                                         3rd Qu.:2002  
                    Max.   :4.0000                                         Max.   :2207  
                                                                           NA's   :159   
 garagefinish         garagecars      garagearea      garagequal         garagecond       
 Length:2919        Min.   :0.000   Min.   :   0.0   Length:2919        Length:2919       
 Class :character   1st Qu.:1.000   1st Qu.: 320.0   Class :character   Class :character  
 Mode  :character   Median :2.000   Median : 480.0   Mode  :character   Mode  :character  
                    Mean   :1.767   Mean   : 472.9                                        
                    3rd Qu.:2.000   3rd Qu.: 576.0                                        
                    Max.   :5.000   Max.   :1488.0                                        
                    NA's   :1       NA's   :1                                             
  paveddrive          wooddecksf       openporchsf     enclosedporch      3ssnporch      
 Length:2919        Min.   :   0.00   Min.   :  0.00   Min.   :   0.0   Min.   :  0.000  
 Class :character   1st Qu.:   0.00   1st Qu.:  0.00   1st Qu.:   0.0   1st Qu.:  0.000  
 Mode  :character   Median :   0.00   Median : 26.00   Median :   0.0   Median :  0.000  
                    Mean   :  93.71   Mean   : 47.49   Mean   :  23.1   Mean   :  2.602  
                    3rd Qu.: 168.00   3rd Qu.: 70.00   3rd Qu.:   0.0   3rd Qu.:  0.000  
                    Max.   :1424.00   Max.   :742.00   Max.   :1012.0   Max.   :508.000  
                                                                                         
  screenporch        poolarea          poolqc             fence           miscfeature       
 Min.   :  0.00   Min.   :  0.000   Length:2919        Length:2919        Length:2919       
 1st Qu.:  0.00   1st Qu.:  0.000   Class :character   Class :character   Class :character  
 Median :  0.00   Median :  0.000   Mode  :character   Mode  :character   Mode  :character  
 Mean   : 16.06   Mean   :  2.252                                                           
 3rd Qu.:  0.00   3rd Qu.:  0.000                                                           
 Max.   :576.00   Max.   :800.000                                                           
                                                                                            
    miscval             mosold           yrsold       saletype         salecondition     
 Min.   :    0.00   Min.   : 1.000   Min.   :2006   Length:2919        Length:2919       
 1st Qu.:    0.00   1st Qu.: 4.000   1st Qu.:2007   Class :character   Class :character  
 Median :    0.00   Median : 6.000   Median :2008   Mode  :character   Mode  :character  
 Mean   :   50.83   Mean   : 6.213   Mean   :2008                                        
 3rd Qu.:    0.00   3rd Qu.: 8.000   3rd Qu.:2009                                        
 Max.   :17000.00   Max.   :12.000   Max.   :2010                                        
                                                                                         

If we have a closer look, it seems there is a typo in one observation, regarding “garageyrblt”:

all_data %>% 
  select(garageyrblt) %>% 
  summary()
  garageyrblt  
 Min.   :1895  
 1st Qu.:1960  
 Median :1979  
 Mean   :1978  
 3rd Qu.:2002  
 Max.   :2207  
 NA's   :159   

This variable is telling us the year where the garage was build. As we can see, the maximun value is 2207, which it is obviusly a typo.

Typo in garageyrblt

As we saw, there is a typo in the data. In order to find out where the typo is, we can just filter the data for any year after 2020 (I will also compare it versus the year that the property was built to get more perspective):

all_data %>% 
  filter(garageyrblt>2020) %>% 
  select(yearbuilt, garageyrblt)

We can see that the year when the property was built was 2006 and so the garage probably was built in 2007. We will amend the value to 2007.

all_data <- all_data %>%
  mutate(garageyrblt = ifelse(garageyrblt > 2020, 2007, garageyrblt))

Missing values

Now, it is our turn to get more deeply into the missing values. A good management of them, should help our model to predict the sales price more accurate (in theory!).

We can visualise the missing values:

all_data %>%
  gather(key = "key", value = "val") %>% 
  mutate(is.missing = is.na(val)) %>% 
  group_by(key, is.missing) %>%
  summarise(num_missing = n())%>%
  filter(is.missing == TRUE, num_missing > 1) %>% 
  select(-is.missing) %>%
  arrange(desc(num_missing)) %>% 
  ggplot(aes(x = reorder(key, num_missing), y = num_missing, fill = key)) +
  geom_col() +
  coord_flip() +
  xlab("Variable") +
  ylab("Missing values")+
  theme(legend.position='none')
`summarise()` regrouping output by 'key' (override with `.groups` argument)

It seems we have a good quantity of variables with missing values, specially the top 6.

Let’s see the actual amount of missing values per variable:

NAcol <- which(colSums(is.na(all_data)) > 0)
sort(colSums(sapply(all_data[NAcol], is.na)), decreasing = TRUE)
      poolqc  miscfeature        alley        fence  fireplacequ  lotfrontage  garageyrblt 
        2909         2814         2721         2348         1420          486          159 
garagefinish   garagequal   garagecond   garagetype     bsmtcond bsmtexposure     bsmtqual 
         159          159          159          157           82           82           81 
bsmtfintype2 bsmtfintype1   masvnrtype   masvnrarea     mszoning    utilities bsmtfullbath 
          80           79           24           23            4            2            2 
bsmthalfbath   functional  exterior1st  exterior2nd   bsmtfinsf1   bsmtfinsf2    bsmtunfsf 
           2            2            1            1            1            1            1 
 totalbsmtsf   electrical  kitchenqual   garagecars   garagearea     saletype 
           1            1            1            1            1            1 

We are going to work from the top to the bottom.

I have created the following funciton to have a visual look of most common observation, so that will help us when filling the missing values.

view_group_data <- function(data, col){   
  data %>% 
    count({{col}}) %>%
    arrange(desc(n)) %>%
    top_n(5)
}

Pool

Does the property have a swimming pool?

view_group_data(all_data, poolqc)
Selecting by n

The majority of the observations (2909) are telling us that the properties don’t have a swimming pool. However, if we compare the pool quality versus the area, we see some surprises:

pool.cols <- c('poolarea', 'poolqc')

all_data %>%
  subset(select = pool.cols) %>% 
  filter(poolarea > 0, is.na(poolqc))

It seems that 3 rows have a pool area bigger that 0, which means that 3 missing values seem to be incorrect.

For simplicity, we will fill those 3 values with “Gd” as seems to be the most frequent value. We will change the other missing values to “No”

all_data <- all_data %>%
  mutate(poolqc = case_when(
                           poolarea > 0 & is.na(poolqc) ~ "Gd",
                           is.na(poolqc) ~ "No",
                           TRUE ~ poolqc
                           )
         )

Miscfeature

Here are included those features not covered in other categories.

view_group_data(all_data, miscfeature)
Selecting by n

For now, we can fill the missing values with “No” for those observations with no extra features:

all_data <- all_data %>%
  mutate(miscfeature = ifelse(is.na(miscfeature), "No", miscfeature))

Alley

As per the documentation, those NA mean that they dont have alley access. We can fill them with “No”

all_data <- all_data %>%
  mutate(alley = ifelse(is.na(alley), "No", alley))

Fence

Similar to the previous variable, NA means “No” fence. We will fill the missing values with “No”.

all_data <- all_data %>%
  mutate(fence = ifelse(is.na(fence), "No", fence))

view_group_data(all_data, fence)
Selecting by n

Fireplace

For fireplace quality missing values, a good thing to check is to see if any missing value is aligned with a fireplace:

all_data %>% 
  select(fireplaces, fireplacequ) %>% 
  filter(fireplaces > 0, is.na(fireplacequ))

Seems all to be correct, so we are sure that missing value means “No” fireplace:

all_data <- all_data %>%
  mutate(fireplacequ = ifelse(is.na(fireplacequ), "No", fireplacequ))

Lotfrontage

For the linear feet of street connected to the property, we can assume that missing values means 0 linear feet:

all_data <- all_data %>%
  mutate(lotfrontage = ifelse(is.na(lotfrontage), 0, lotfrontage))

Garage

For Garage variables, we will filter our data set with all the related variables:

garage.cols <- c('id', 'garageyrblt', 'garagearea', 'garagecars', 'garagequal', 'garagefinish', 'garagecond', 'garagetype')

It seems some columns have 159 missing values (“no garage”) and garagetype have 157 missing values.

A good chech is to filter the data to see if any observation has garage area and also missing values on other attributes:

all_data %>%
  subset(select = garage.cols) %>% 
  filter(garagearea > 0, is.na(garagequal))

We can see that in this case the data seems to be incomplete, as the area is 360 and the garage have space for 1 car.

We can check for detached garages with similar area in order to fill the missing values:

all_data %>%
  filter(garagecars == 1,garagetype == "Detchd") %>% 
  select(garagequal, garagefinish, garagecond) %>% 
  group_by(garagequal, garagefinish, garagecond) %>% 
  summarise(freq = n()) %>% 
  arrange(desc(freq))
`summarise()` regrouping output by 'garagequal', 'garagefinish' (override with `.groups` argument)

I will use the most frequent set to fill the missing values of the observation number 2127:

all_data <- all_data %>% 
  mutate(garagequal = ifelse(id == 2127, "TA", garagequal),
         garagefinish = ifelse(id == 2127, "Unf", garagefinish),
         garagecond = ifelse(id == 2127, "TA", garagecond),
         garageyrblt = ifelse(id == 2127, yearbuilt, garageyrblt))

all_data %>%
  subset(select = garage.cols) %>% 
  filter(id ==  2127)

Regarding observation 2577, we have a detached garage but no more information, it sounds like no garage at all

all_data %>%
  filter(garagetype == "Detchd", is.na(garagecars)) %>% 
  subset(select = garage.cols)
all_data <- all_data %>% 
  mutate(garagetype = ifelse(id == 2577, "No", garagetype))

Now we can fill all the rest of missing values. For garageyrblt we will use yearbuilt. For categorical data we will use “No” and for numerical 0. Also, we will add a column “hasgarage” that should be helpful when modeling the data.

all_data <- all_data %>%
  mutate(hasgarage = ifelse(is.na(garagequal), "No", "Yes"),
         garageyrblt = ifelse(is.na(garageyrblt), yearbuilt, garageyrblt),
         garagearea = ifelse(is.na(garagearea), 0, garagearea),
         garagecars = ifelse(is.na(garagecars), 0, garagecars),
         garagequal = ifelse(is.na(garagequal), "No", garagequal),
         garagefinish = ifelse(is.na(garagefinish), "No", garagefinish),
         garagecond = ifelse(is.na(garagecond), "No", garagecond),
         garagetype = ifelse(is.na(garagetype), "No", garagetype)
          )

Basement

We will use a similar approach to fill the missing values on the basement variables. So, first we get all the columns from the data.

bsmt.cols <- c('id', 'bsmtqual', 'bsmtexposure', 'bsmtcond', 'bsmtfintype1', 'bsmtfinsf1', 'bsmtfintype2', 'bsmtfinsf2', 'bsmtunfsf', 'totalbsmtsf', 'bsmtfullbath', 'bsmthalfbath')

We can have a quick look to see the missing values:

all_data %>%
  subset(select = bsmt.cols) %>% 
  summarise_all(funs(sum(is.na(.)))) %>% 
  arrange()
`funs()` is deprecated as of dplyr 0.8.0.
Please use a list of either functions or lambdas: 

  # Simple named list: 
  list(mean = mean, median = median)

  # Auto named with `tibble::lst()`: 
  tibble::lst(mean, median)

  # Using lambdas
  list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.

Comparing some basement variables with the total basement area greater than zero, we found the following variables where their missing values should not mean “No basement”:

  • bsmtexposure: we can assume that instead of “No basement” they should be “No exposure” so we are going to assing “No” value.
  • bsmtqual: we’ll evaluate the height as “TA” (Typical) for simplicity, as we cannot really know the height of these basements.
  • bsmtcond: we will use bsmtqual value to fill them in this case
  • bsmtfintype2: we can assume that the rating of the finished area of the type2 is the same than type1

For those with no basement, we will mark them as “NB” for categorical variables and 0 for numerical.

all_data <- all_data %>%
  mutate(bsmtexposure = ifelse(is.na(bsmtexposure) & totalbsmtsf > 0, "No", bsmtexposure),
         bsmtexposure = ifelse(is.na(bsmtexposure), "NB", bsmtexposure),
         bsmtcond= ifelse(is.na(bsmtcond) & totalbsmtsf > 0,  bsmtqual, bsmtcond),
         bsmtcond= ifelse(is.na(bsmtcond),  "NB", bsmtcond),
         bsmtqual = ifelse(is.na(bsmtqual)& totalbsmtsf > 0, "TA", bsmtqual),
         bsmtqual = ifelse(is.na(bsmtqual), "NB", bsmtqual),
         bsmtfintype2 = ifelse(is.na(bsmtfintype2)& totalbsmtsf > 0, bsmtfintype1, bsmtfintype2),
         bsmtfintype2 = ifelse(is.na(bsmtfintype2), "NB", bsmtfintype2),
         bsmtfintype1 = ifelse(is.na(bsmtfintype1), "NB", bsmtfintype1),
         bsmtfinsf1 = ifelse(is.na(bsmtfinsf1), 0, bsmtfinsf1),
         bsmtunfsf = ifelse(is.na(bsmtunfsf), 0, bsmtunfsf),
         bsmtfinsf2 = ifelse(is.na(bsmtfinsf2), 0, bsmtfinsf2),
         totalbsmtsf = ifelse(is.na(totalbsmtsf), 0, totalbsmtsf),
         bsmtfullbath = ifelse(is.na(bsmtfullbath), 0, bsmtfullbath),
         bsmthalfbath = ifelse(is.na(bsmthalfbath), 0, bsmthalfbath))

Masonry

One of the missing values for masvnrtype seems to have an area greater than zero. We can fill this missing value with the foundation value. For the rest of the missing values, we will fill them with “none” and 0.

all_data %>% 
  filter(is.na(masvnrtype), masvnrarea > 0)
all_data <- all_data %>%
  mutate(masvnrtype = ifelse(is.na(masvnrtype) & masvnrarea > 0, foundation, masvnrtype),
         masvnrtype = ifelse(is.na(masvnrtype), "None", masvnrtype),
         masvnrarea = ifelse(is.na(masvnrarea), 0, masvnrarea))

Mszoning

We will fill the missing values with the most common value, “RL” in this case.

view_group_data(all_data, mszoning)
Selecting by n
#updated missing value with "RL"
all_data <- all_data %>%
  mutate(mszoning = ifelse(is.na(mszoning), "RL", mszoning))

Utilities

We will fill the missing values with the most common value, “AllPub” in this case.

view_group_data(all_data, utilities)
Selecting by n
#updated missing value with "AllPub"
all_data <- all_data %>%
  mutate(utilities = ifelse(is.na(utilities), "AllPub", utilities))

Functional

As per the data description, we will assume “Typ” (Typical) for missing values:

all_data <- all_data %>%
  mutate(functional = ifelse(is.na(functional), "Typ", functional))

Exterior covering

We will fill the missing values with the most common value, “VinylSd” in this case.

view_group_data(all_data, exterior2nd)
Selecting by n
#updated missing values with "VinylSd"
all_data <- all_data %>%
  mutate(exterior1st = ifelse(is.na(exterior1st), "VinylSd", exterior1st),
         exterior2nd = ifelse(is.na(exterior2nd), "VinylSd", exterior2nd))

electrical

This must be an error, so we will use the most common value as well, “SBrkr”:

view_group_data(all_data, electrical)
Selecting by n
#updated missing value with "SBrkr"
all_data <- all_data %>%
  mutate(electrical = ifelse(is.na(electrical), "SBrkr", electrical))

kitchenqual

We will fill the missing values with the most common value, “TA” in this case.

view_group_data(all_data, kitchenqual)
Selecting by n
#updated missing value with "TA"
all_data <- all_data %>%
  mutate(kitchenqual = ifelse(is.na(kitchenqual), "TA", kitchenqual))

saletype

We will fill the missing values with the most common value, “WD” in this case.

view_group_data(all_data, saletype)
Selecting by n
#updated missing value with "WD"
all_data <- all_data %>%
  mutate(saletype = ifelse(is.na(saletype), "WD", saletype))
LS0tCnRpdGxlOiAiSG91c2luZyBQcmljZXMgLSBNaXNzaW5nIFZhbHVlcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgdG9jX2NvbGxhcHNlZDogZmFsc2UKLS0tCiMgSW52ZXN0aWdhdGluZyBtaXNzaW5nIHZhbHVlcwoKVGhlIHRhcmdldCBvZiB0aGlzIG5vdGVib29rIGlzIHRvIHNob3cgYSB3YXkgb2YgZGVhbGluZyB3aXRoIG1pc3NpbmcgdmFsdWVzIG9uIHRoZSBbSG91c2luZyBQcmljZV0oaHR0cHM6Ly93d3cua2FnZ2xlLmNvbS9jL2hvdXNlLXByaWNlcy1hZHZhbmNlZC1yZWdyZXNzaW9uLXRlY2huaXF1ZXMpIGRhdGFzZXQgZnJvbSBrYWdnbGUuCgojIyBMb29raW5nIGF0IHRoZSBkYXRhc2V0CgpUaGUgZmlyc3Qgc3RlcCBiZWZvcmUgZGVhbGluZyB3aXRoIHRoZSBtaXNzaW5nIHZhbHVlcyBpcyB0byBoYXZlIGEgcXVpY2sgbG9vayBpbnRvIHRoZSBkYXRhc2V0LgoKRmlyc3QsIHdlIGxvYWQgdGhlIHRpZHl2ZXJzZSBwYWNrYWdlOgpgYGB7cn0KIyMgSW1wb3J0aW5nIHBhY2thZ2VzCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCkFuZCBvdXIgZGF0YXNldHM6CmBgYHtyIG1lc3NhZ2U9RkFMU0V9CiNsb2FkIGRhdGEKdGVzdCA9IHJlYWRfY3N2KCJob3VzZV90ZXN0LmNzdiIpCnRyYWluID0gcmVhZF9jc3YoImhvdXNlX3RyYWluLmNzdiIpCmBgYAoKRm9yIHBlcnNvbmFsIHByZWZlcmVuY2UsIEkgY29udmVydCBhbGwgdmFyaWFibGVzIHRvIGxvd2VyY2FzZSAoaGVscHMgdHlwaW5nIGl0IHF1aWNrZXIhKToKYGBge3J9CiMgVmlldyB0aGUgY29sdW1uIG5hbWVzCm5hbWVzKHRyYWluKTwtdG9sb3dlcihuYW1lcyh0cmFpbikpCm5hbWVzKHRlc3QpPC10b2xvd2VyKG5hbWVzKHRlc3QpKQpgYGAKCkFuZCB3ZSBjb21iaW5lIGJvdGggdHJhaW4gYW5kIHRlc3QgZGF0YXNldHMgdG8gYW5hbHlzZSB0aGUgbWlzc2luZyB2YWx1ZXMgYWxsIGF0IG9uY2UsIGV4Y2x1ZGluZyBvdXIgZGVwZW5kYW50IHZhcmlhYmxlOgpgYGB7cn0KdHJhaW5fdmFyaWFibGVzIDwtIHRyYWluICU+JSAKICBzZWxlY3QoLXNhbGVwcmljZSkKYWxsX2RhdGEgPC0gcmJpbmQodHJhaW5fdmFyaWFibGVzLCB0ZXN0KQpgYGAKCkFkZGl0aW9uYWxseSwgaXQgaXMgZ29vZCB0byBoYXZlIGEgcGVyc3BlY3RpdmUgb2YgdGhlIGRpbWVuc2lvbiBvZiB0aGUgZGF0YXNldDoKYGBge3J9CmRpbShhbGxfZGF0YSkKYGBgCldlIGhhdmUgODAgdmFyaWFibGVzIGFuZCAyOTE5IG9ic2VydmF0aW9ucy4gTm93LCBpdCBpcyB0aW1lIHRvIGNoZWNrIHRoZSBzdHJ1Y3R1cmUgb2YgdGhlIGRhdGE6CmBgYHtyfQojIExvb2sgYXQgdGhlIHN0cnVjdHVyZSB1c2luZyBkcGx5cidzIGdsaW1wc2UoKQpnbGltcHNlKGFsbF9kYXRhKQpgYGAKSXQgc2VlbXMgd2UgaGF2ZSBzb21lIG51bWVyaWNhbCBkYXRhIHRoYXQgaXMgbm90IG51bWVyaWNhbCByZWFsbHkgKGxpa2UgIm1zc3ViY2xhc3MiKSBidXQgd2Ugd29uJ3QgYmUgYW5hbHlzaW5nIGl0IGluIHRoaXMgbm90ZWJvb2suCgpOZXh0IHN0ZXAgaXMgdG8gZ2V0IGEgc3VtbWFyeSBvZiBhbGwgdGhlIGRhdGEsIHRvIHNlZSBpZiB3ZSBmaW5kIGFueSBpbmNvbnNpc3RlbmN5OgpgYGB7cn0Kc3VtbWFyeShhbGxfZGF0YSkKYGBgCklmIHdlIGhhdmUgYSBjbG9zZXIgbG9vaywgaXQgc2VlbXMgdGhlcmUgaXMgYSB0eXBvIGluIG9uZSBvYnNlcnZhdGlvbiwgcmVnYXJkaW5nICJnYXJhZ2V5cmJsdCI6CgpgYGB7cn0KYWxsX2RhdGEgJT4lIAogIHNlbGVjdChnYXJhZ2V5cmJsdCkgJT4lIAogIHN1bW1hcnkoKQpgYGAKVGhpcyB2YXJpYWJsZSBpcyB0ZWxsaW5nIHVzIHRoZSB5ZWFyIHdoZXJlIHRoZSBnYXJhZ2Ugd2FzIGJ1aWxkLiBBcyB3ZSBjYW4gc2VlLCB0aGUgbWF4aW11biB2YWx1ZSBpcyAyMjA3LCB3aGljaCBpdCBpcyBvYnZpdXNseSBhIHR5cG8uCgojIyMgVHlwbyBpbiBnYXJhZ2V5cmJsdAoKQXMgd2Ugc2F3LCB0aGVyZSBpcyBhIHR5cG8gaW4gdGhlIGRhdGEuIEluIG9yZGVyIHRvIGZpbmQgb3V0IHdoZXJlIHRoZSB0eXBvIGlzLCB3ZSBjYW4ganVzdCBmaWx0ZXIgdGhlIGRhdGEgZm9yIGFueSB5ZWFyIGFmdGVyIDIwMjAgKEkgd2lsbCBhbHNvIGNvbXBhcmUgaXQgdmVyc3VzIHRoZSB5ZWFyIHRoYXQgdGhlIHByb3BlcnR5IHdhcyBidWlsdCB0byBnZXQgbW9yZSBwZXJzcGVjdGl2ZSk6CmBgYHtyfQphbGxfZGF0YSAlPiUgCiAgZmlsdGVyKGdhcmFnZXlyYmx0PjIwMjApICU+JSAKICBzZWxlY3QoeWVhcmJ1aWx0LCBnYXJhZ2V5cmJsdCkKYGBgCldlIGNhbiBzZWUgdGhhdCB0aGUgeWVhciB3aGVuIHRoZSBwcm9wZXJ0eSB3YXMgYnVpbHQgd2FzIDIwMDYgYW5kIHNvIHRoZSBnYXJhZ2UgcHJvYmFibHkgd2FzIGJ1aWx0IGluIDIwMDcuIFdlIHdpbGwgYW1lbmQgdGhlIHZhbHVlIHRvIDIwMDcuCmBgYHtyfQphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUoZ2FyYWdleXJibHQgPSBpZmVsc2UoZ2FyYWdleXJibHQgPiAyMDIwLCAyMDA3LCBnYXJhZ2V5cmJsdCkpCmBgYAoKIyMgTWlzc2luZyB2YWx1ZXMKCk5vdywgaXQgaXMgb3VyIHR1cm4gdG8gZ2V0IG1vcmUgZGVlcGx5IGludG8gdGhlIG1pc3NpbmcgdmFsdWVzLiBBIGdvb2QgbWFuYWdlbWVudCBvZiB0aGVtLCBzaG91bGQgaGVscCBvdXIgbW9kZWwgdG8gcHJlZGljdCB0aGUgc2FsZXMgcHJpY2UgbW9yZSBhY2N1cmF0ZSAoaW4gdGhlb3J5ISkuCgpXZSBjYW4gdmlzdWFsaXNlIHRoZSBtaXNzaW5nIHZhbHVlczoKYGBge3J9CmFsbF9kYXRhICU+JQogIGdhdGhlcihrZXkgPSAia2V5IiwgdmFsdWUgPSAidmFsIikgJT4lIAogIG11dGF0ZShpcy5taXNzaW5nID0gaXMubmEodmFsKSkgJT4lIAogIGdyb3VwX2J5KGtleSwgaXMubWlzc2luZykgJT4lCiAgc3VtbWFyaXNlKG51bV9taXNzaW5nID0gbigpKSU+JQogIGZpbHRlcihpcy5taXNzaW5nID09IFRSVUUsIG51bV9taXNzaW5nID4gMSkgJT4lIAogIHNlbGVjdCgtaXMubWlzc2luZykgJT4lCiAgYXJyYW5nZShkZXNjKG51bV9taXNzaW5nKSkgJT4lIAogIGdncGxvdChhZXMoeCA9IHJlb3JkZXIoa2V5LCBudW1fbWlzc2luZyksIHkgPSBudW1fbWlzc2luZywgZmlsbCA9IGtleSkpICsKICBnZW9tX2NvbCgpICsKICBjb29yZF9mbGlwKCkgKwogIHhsYWIoIlZhcmlhYmxlIikgKwogIHlsYWIoIk1pc3NpbmcgdmFsdWVzIikrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSdub25lJykKYGBgCkl0IHNlZW1zIHdlIGhhdmUgYSBnb29kIHF1YW50aXR5IG9mIHZhcmlhYmxlcyB3aXRoIG1pc3NpbmcgdmFsdWVzLCBzcGVjaWFsbHkgdGhlIHRvcCA2LgoKTGV0J3Mgc2VlIHRoZSBhY3R1YWwgYW1vdW50IG9mIG1pc3NpbmcgdmFsdWVzIHBlciB2YXJpYWJsZToKYGBge3J9Ck5BY29sIDwtIHdoaWNoKGNvbFN1bXMoaXMubmEoYWxsX2RhdGEpKSA+IDApCnNvcnQoY29sU3VtcyhzYXBwbHkoYWxsX2RhdGFbTkFjb2xdLCBpcy5uYSkpLCBkZWNyZWFzaW5nID0gVFJVRSkKYGBgCldlIGFyZSBnb2luZyB0byB3b3JrIGZyb20gdGhlIHRvcCB0byB0aGUgYm90dG9tLiAKCkkgaGF2ZSBjcmVhdGVkIHRoZSBmb2xsb3dpbmcgZnVuY2l0b24gdG8gaGF2ZSBhIHZpc3VhbCBsb29rIG9mIG1vc3QgY29tbW9uIG9ic2VydmF0aW9uLCBzbyB0aGF0IHdpbGwgaGVscCB1cyB3aGVuIGZpbGxpbmcgdGhlIG1pc3NpbmcgdmFsdWVzLgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQpyZXBsYWNlX21pc3NpbmcgPC0gZnVuY3Rpb24oZGF0YSwgY29sLCB2YWx1ZSl7CgogIGRhdGEgJT4lCiAgICBtdXRhdGUoInt7Y29sfX0iIDo9IGlmZWxzZShpcy5uYSh7e2NvbH19ICksIHZhbHVlLCB7e2NvbH19KSkKfQpgYGAKCgpgYGB7cn0Kdmlld19ncm91cF9kYXRhIDwtIGZ1bmN0aW9uKGRhdGEsIGNvbCl7ICAgCiAgZGF0YSAlPiUgCiAgICBjb3VudCh7e2NvbH19KSAlPiUKICAgIGFycmFuZ2UoZGVzYyhuKSkgJT4lCiAgICB0b3Bfbig1KQp9CmBgYAojIyMgUG9vbAoKRG9lcyB0aGUgcHJvcGVydHkgaGF2ZSBhIHN3aW1taW5nIHBvb2w/CmBgYHtyfQp2aWV3X2dyb3VwX2RhdGEoYWxsX2RhdGEsIHBvb2xxYykKYGBgClRoZSBtYWpvcml0eSBvZiB0aGUgb2JzZXJ2YXRpb25zICgyOTA5KSBhcmUgdGVsbGluZyB1cyB0aGF0IHRoZSBwcm9wZXJ0aWVzIGRvbid0IGhhdmUgYSBzd2ltbWluZyBwb29sLiBIb3dldmVyLCBpZiB3ZSBjb21wYXJlIHRoZSBwb29sIHF1YWxpdHkgdmVyc3VzIHRoZSBhcmVhLCB3ZSBzZWUgc29tZSBzdXJwcmlzZXM6CmBgYHtyfQpwb29sLmNvbHMgPC0gYygncG9vbGFyZWEnLCAncG9vbHFjJykKCmFsbF9kYXRhICU+JQogIHN1YnNldChzZWxlY3QgPSBwb29sLmNvbHMpICU+JSAKICBmaWx0ZXIocG9vbGFyZWEgPiAwLCBpcy5uYShwb29scWMpKQpgYGAKSXQgc2VlbXMgdGhhdCAzIHJvd3MgaGF2ZSBhIHBvb2wgYXJlYSBiaWdnZXIgdGhhdCAwLCB3aGljaCBtZWFucyB0aGF0IDMgbWlzc2luZyB2YWx1ZXMgc2VlbSB0byBiZSBpbmNvcnJlY3QuCgpGb3Igc2ltcGxpY2l0eSwgd2Ugd2lsbCBmaWxsIHRob3NlIDMgdmFsdWVzIHdpdGggIkdkIiBhcyBzZWVtcyB0byBiZSB0aGUgbW9zdCBmcmVxdWVudCB2YWx1ZS4gV2Ugd2lsbCBjaGFuZ2UgdGhlIG90aGVyIG1pc3NpbmcgdmFsdWVzIHRvICJObyIKYGBge3J9CmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShwb29scWMgPSBjYXNlX3doZW4oCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHBvb2xhcmVhID4gMCAmIGlzLm5hKHBvb2xxYykgfiAiR2QiLAogICAgICAgICAgICAgICAgICAgICAgICAgICBpcy5uYShwb29scWMpIH4gIk5vIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgVFJVRSB+IHBvb2xxYwogICAgICAgICAgICAgICAgICAgICAgICAgICApCiAgICAgICAgICkKYGBgCgojIyMgTWlzY2ZlYXR1cmUgICAgICAgIAoKSGVyZSBhcmUgaW5jbHVkZWQgdGhvc2UgZmVhdHVyZXMgbm90IGNvdmVyZWQgaW4gb3RoZXIgY2F0ZWdvcmllcy4KCmBgYHtyfQp2aWV3X2dyb3VwX2RhdGEoYWxsX2RhdGEsIG1pc2NmZWF0dXJlKQpgYGAKRm9yIG5vdywgd2UgY2FuIGZpbGwgdGhlIG1pc3NpbmcgdmFsdWVzIHdpdGggIk5vIiBmb3IgdGhvc2Ugb2JzZXJ2YXRpb25zIHdpdGggbm8gZXh0cmEgZmVhdHVyZXM6CgpgYGB7cn0KYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lCiAgbXV0YXRlKG1pc2NmZWF0dXJlID0gaWZlbHNlKGlzLm5hKG1pc2NmZWF0dXJlKSwgIk5vIiwgbWlzY2ZlYXR1cmUpKQpgYGAKCiMjIyBBbGxleSAgICAgICAgCgpBcyBwZXIgdGhlIGRvY3VtZW50YXRpb24sIHRob3NlIE5BIG1lYW4gdGhhdCB0aGV5IGRvbnQgaGF2ZSBhbGxleSBhY2Nlc3MuIFdlIGNhbiBmaWxsIHRoZW0gd2l0aCAiTm8iCgpgYGB7cn0KYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lCiAgbXV0YXRlKGFsbGV5ID0gaWZlbHNlKGlzLm5hKGFsbGV5KSwgIk5vIiwgYWxsZXkpKQpgYGAKCiMjIyBGZW5jZQoKU2ltaWxhciB0byB0aGUgcHJldmlvdXMgdmFyaWFibGUsIE5BIG1lYW5zICJObyIgZmVuY2UuIFdlIHdpbGwgZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgd2l0aCAiTm8iLgoKYGBge3J9CmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShmZW5jZSA9IGlmZWxzZShpcy5uYShmZW5jZSksICJObyIsIGZlbmNlKSkKCnZpZXdfZ3JvdXBfZGF0YShhbGxfZGF0YSwgZmVuY2UpCmBgYAojIyMgRmlyZXBsYWNlCgpGb3IgZmlyZXBsYWNlIHF1YWxpdHkgbWlzc2luZyB2YWx1ZXMsIGEgZ29vZCB0aGluZyB0byBjaGVjayBpcyB0byBzZWUgaWYgYW55IG1pc3NpbmcgdmFsdWUgaXMgYWxpZ25lZCB3aXRoIGEgZmlyZXBsYWNlOgpgYGB7cn0KYWxsX2RhdGEgJT4lIAogIHNlbGVjdChmaXJlcGxhY2VzLCBmaXJlcGxhY2VxdSkgJT4lIAogIGZpbHRlcihmaXJlcGxhY2VzID4gMCwgaXMubmEoZmlyZXBsYWNlcXUpKQpgYGAKU2VlbXMgYWxsIHRvIGJlIGNvcnJlY3QsIHNvIHdlIGFyZSBzdXJlIHRoYXQgbWlzc2luZyB2YWx1ZSBtZWFucyAiTm8iIGZpcmVwbGFjZToKYGBge3J9CmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShmaXJlcGxhY2VxdSA9IGlmZWxzZShpcy5uYShmaXJlcGxhY2VxdSksICJObyIsIGZpcmVwbGFjZXF1KSkKYGBgCiMjIyBMb3Rmcm9udGFnZQoKRm9yIHRoZSBsaW5lYXIgZmVldCBvZiBzdHJlZXQgY29ubmVjdGVkIHRvIHRoZSBwcm9wZXJ0eSwgd2UgY2FuIGFzc3VtZSB0aGF0IG1pc3NpbmcgdmFsdWVzIG1lYW5zIDAgbGluZWFyIGZlZXQ6CmBgYHtyfQphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUobG90ZnJvbnRhZ2UgPSBpZmVsc2UoaXMubmEobG90ZnJvbnRhZ2UpLCAwLCBsb3Rmcm9udGFnZSkpCmBgYAojIyMgR2FyYWdlCgpGb3IgR2FyYWdlIHZhcmlhYmxlcywgd2Ugd2lsbCBmaWx0ZXIgb3VyIGRhdGEgc2V0IHdpdGggYWxsIHRoZSByZWxhdGVkIHZhcmlhYmxlczoKCmBgYHtyfQpnYXJhZ2UuY29scyA8LSBjKCdpZCcsICdnYXJhZ2V5cmJsdCcsICdnYXJhZ2VhcmVhJywgJ2dhcmFnZWNhcnMnLCAnZ2FyYWdlcXVhbCcsICdnYXJhZ2VmaW5pc2gnLCAnZ2FyYWdlY29uZCcsICdnYXJhZ2V0eXBlJykKYGBgCkl0IHNlZW1zIHNvbWUgY29sdW1ucyBoYXZlIDE1OSBtaXNzaW5nIHZhbHVlcyAoIm5vIGdhcmFnZSIpIGFuZCBnYXJhZ2V0eXBlIGhhdmUgMTU3IG1pc3NpbmcgdmFsdWVzLgoKQSBnb29kIGNoZWNoIGlzIHRvIGZpbHRlciB0aGUgZGF0YSB0byBzZWUgaWYgYW55IG9ic2VydmF0aW9uIGhhcyBnYXJhZ2UgYXJlYSBhbmQgYWxzbyBtaXNzaW5nIHZhbHVlcyBvbiBvdGhlciBhdHRyaWJ1dGVzOgpgYGB7cn0KYWxsX2RhdGEgJT4lCiAgc3Vic2V0KHNlbGVjdCA9IGdhcmFnZS5jb2xzKSAlPiUgCiAgZmlsdGVyKGdhcmFnZWFyZWEgPiAwLCBpcy5uYShnYXJhZ2VxdWFsKSkKYGBgCldlIGNhbiBzZWUgdGhhdCBpbiB0aGlzIGNhc2UgdGhlIGRhdGEgc2VlbXMgdG8gYmUgaW5jb21wbGV0ZSwgYXMgdGhlIGFyZWEgaXMgMzYwIGFuZCB0aGUgZ2FyYWdlIGhhdmUgc3BhY2UgZm9yIDEgY2FyLgoKV2UgY2FuIGNoZWNrIGZvciBkZXRhY2hlZCBnYXJhZ2VzIHdpdGggc2ltaWxhciBhcmVhIGluIG9yZGVyIHRvIGZpbGwgdGhlIG1pc3NpbmcgdmFsdWVzOgpgYGB7cn0KYWxsX2RhdGEgJT4lCiAgZmlsdGVyKGdhcmFnZWNhcnMgPT0gMSxnYXJhZ2V0eXBlID09ICJEZXRjaGQiKSAlPiUgCiAgc2VsZWN0KGdhcmFnZXF1YWwsIGdhcmFnZWZpbmlzaCwgZ2FyYWdlY29uZCkgJT4lIAogIGdyb3VwX2J5KGdhcmFnZXF1YWwsIGdhcmFnZWZpbmlzaCwgZ2FyYWdlY29uZCkgJT4lIAogIHN1bW1hcmlzZShmcmVxID0gbigpKSAlPiUgCiAgYXJyYW5nZShkZXNjKGZyZXEpKQpgYGAKSSB3aWxsIHVzZSB0aGUgbW9zdCBmcmVxdWVudCBzZXQgdG8gZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgb2YgdGhlIG9ic2VydmF0aW9uIG51bWJlciAyMTI3OgpgYGB7cn0KYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lIAogIG11dGF0ZShnYXJhZ2VxdWFsID0gaWZlbHNlKGlkID09IDIxMjcsICJUQSIsIGdhcmFnZXF1YWwpLAogICAgICAgICBnYXJhZ2VmaW5pc2ggPSBpZmVsc2UoaWQgPT0gMjEyNywgIlVuZiIsIGdhcmFnZWZpbmlzaCksCiAgICAgICAgIGdhcmFnZWNvbmQgPSBpZmVsc2UoaWQgPT0gMjEyNywgIlRBIiwgZ2FyYWdlY29uZCksCiAgICAgICAgIGdhcmFnZXlyYmx0ID0gaWZlbHNlKGlkID09IDIxMjcsIHllYXJidWlsdCwgZ2FyYWdleXJibHQpKQoKYWxsX2RhdGEgJT4lCiAgc3Vic2V0KHNlbGVjdCA9IGdhcmFnZS5jb2xzKSAlPiUgCiAgZmlsdGVyKGlkID09ICAyMTI3KQpgYGAKClJlZ2FyZGluZyBvYnNlcnZhdGlvbiAyNTc3LCB3ZSBoYXZlIGEgZGV0YWNoZWQgZ2FyYWdlIGJ1dCBubyBtb3JlIGluZm9ybWF0aW9uLCBpdCBzb3VuZHMgbGlrZSBubyBnYXJhZ2UgYXQgYWxsCmBgYHtyfQphbGxfZGF0YSAlPiUKICBmaWx0ZXIoZ2FyYWdldHlwZSA9PSAiRGV0Y2hkIiwgaXMubmEoZ2FyYWdlY2FycykpICU+JSAKICBzdWJzZXQoc2VsZWN0ID0gZ2FyYWdlLmNvbHMpCmBgYApgYGB7cn0KYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lIAogIG11dGF0ZShnYXJhZ2V0eXBlID0gaWZlbHNlKGlkID09IDI1NzcsICJObyIsIGdhcmFnZXR5cGUpKQpgYGAKTm93IHdlIGNhbiBmaWxsIGFsbCB0aGUgcmVzdCBvZiBtaXNzaW5nIHZhbHVlcy4gRm9yIGdhcmFnZXlyYmx0IHdlIHdpbGwgdXNlIHllYXJidWlsdC4gRm9yIGNhdGVnb3JpY2FsIGRhdGEgd2Ugd2lsbCB1c2UgIk5vIiBhbmQgZm9yIG51bWVyaWNhbCAwLiBBbHNvLCB3ZSB3aWxsIGFkZCBhIGNvbHVtbiAiaGFzZ2FyYWdlIiB0aGF0IHNob3VsZCBiZSBoZWxwZnVsIHdoZW4gbW9kZWxpbmcgdGhlIGRhdGEuCmBgYHtyfQphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUoaGFzZ2FyYWdlID0gaWZlbHNlKGlzLm5hKGdhcmFnZXF1YWwpLCAiTm8iLCAiWWVzIiksCiAgICAgICAgIGdhcmFnZXlyYmx0ID0gaWZlbHNlKGlzLm5hKGdhcmFnZXlyYmx0KSwgeWVhcmJ1aWx0LCBnYXJhZ2V5cmJsdCksCiAgICAgICAgIGdhcmFnZWFyZWEgPSBpZmVsc2UoaXMubmEoZ2FyYWdlYXJlYSksIDAsIGdhcmFnZWFyZWEpLAogICAgICAgICBnYXJhZ2VjYXJzID0gaWZlbHNlKGlzLm5hKGdhcmFnZWNhcnMpLCAwLCBnYXJhZ2VjYXJzKSwKICAgICAgICAgZ2FyYWdlcXVhbCA9IGlmZWxzZShpcy5uYShnYXJhZ2VxdWFsKSwgIk5vIiwgZ2FyYWdlcXVhbCksCiAgICAgICAgIGdhcmFnZWZpbmlzaCA9IGlmZWxzZShpcy5uYShnYXJhZ2VmaW5pc2gpLCAiTm8iLCBnYXJhZ2VmaW5pc2gpLAogICAgICAgICBnYXJhZ2Vjb25kID0gaWZlbHNlKGlzLm5hKGdhcmFnZWNvbmQpLCAiTm8iLCBnYXJhZ2Vjb25kKSwKICAgICAgICAgZ2FyYWdldHlwZSA9IGlmZWxzZShpcy5uYShnYXJhZ2V0eXBlKSwgIk5vIiwgZ2FyYWdldHlwZSkKICAgICAgICAgICkKYGBgCgoKIyMjIEJhc2VtZW50CgpXZSB3aWxsIHVzZSBhIHNpbWlsYXIgYXBwcm9hY2ggdG8gZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgb24gdGhlIGJhc2VtZW50IHZhcmlhYmxlcy4gU28sIGZpcnN0IHdlIGdldCBhbGwgdGhlIGNvbHVtbnMgZnJvbSB0aGUgZGF0YS4KYGBge3J9CmJzbXQuY29scyA8LSBjKCdpZCcsICdic210cXVhbCcsICdic210ZXhwb3N1cmUnLCAnYnNtdGNvbmQnLCAnYnNtdGZpbnR5cGUxJywgJ2JzbXRmaW5zZjEnLCAnYnNtdGZpbnR5cGUyJywgJ2JzbXRmaW5zZjInLCAnYnNtdHVuZnNmJywgJ3RvdGFsYnNtdHNmJywgJ2JzbXRmdWxsYmF0aCcsICdic210aGFsZmJhdGgnKQpgYGAKV2UgY2FuIGhhdmUgYSBxdWljayBsb29rIHRvIHNlZSB0aGUgbWlzc2luZyB2YWx1ZXM6CmBgYHtyfQphbGxfZGF0YSAlPiUKICBzdWJzZXQoc2VsZWN0ID0gYnNtdC5jb2xzKSAlPiUgCiAgc3VtbWFyaXNlX2FsbChmdW5zKHN1bShpcy5uYSguKSkpKSAlPiUgCiAgYXJyYW5nZSgpCmBgYApDb21wYXJpbmcgc29tZSBiYXNlbWVudCB2YXJpYWJsZXMgd2l0aCB0aGUgdG90YWwgYmFzZW1lbnQgYXJlYSBncmVhdGVyIHRoYW4gemVybywgd2UgZm91bmQgdGhlIGZvbGxvd2luZyB2YXJpYWJsZXMgd2hlcmUgdGhlaXIgbWlzc2luZyB2YWx1ZXMgc2hvdWxkIG5vdCBtZWFuICJObyBiYXNlbWVudCI6CgotIGJzbXRleHBvc3VyZTogd2UgY2FuIGFzc3VtZSB0aGF0IGluc3RlYWQgb2YgIk5vIGJhc2VtZW50IiB0aGV5IHNob3VsZCBiZSAiTm8gZXhwb3N1cmUiIHNvIHdlIGFyZSBnb2luZyB0byBhc3NpbmcgIk5vIiB2YWx1ZS4KLSBic210cXVhbDogd2UnbGwgZXZhbHVhdGUgdGhlIGhlaWdodCBhcyAiVEEiIChUeXBpY2FsKSBmb3Igc2ltcGxpY2l0eSwgYXMgd2UgY2Fubm90IHJlYWxseSBrbm93IHRoZSBoZWlnaHQgb2YgdGhlc2UgYmFzZW1lbnRzLgotIGJzbXRjb25kOiB3ZSB3aWxsIHVzZSBic210cXVhbCB2YWx1ZSB0byBmaWxsIHRoZW0gaW4gdGhpcyBjYXNlCi0gYnNtdGZpbnR5cGUyOiB3ZSBjYW4gYXNzdW1lIHRoYXQgdGhlIHJhdGluZyBvZiB0aGUgZmluaXNoZWQgYXJlYSBvZiB0aGUgdHlwZTIgaXMgdGhlIHNhbWUgdGhhbiB0eXBlMQoKRm9yIHRob3NlIHdpdGggbm8gYmFzZW1lbnQsIHdlIHdpbGwgbWFyayB0aGVtIGFzICJOQiIgZm9yIGNhdGVnb3JpY2FsIHZhcmlhYmxlcyBhbmQgMCBmb3IgbnVtZXJpY2FsLgpgYGB7cn0KYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lCiAgbXV0YXRlKGJzbXRleHBvc3VyZSA9IGlmZWxzZShpcy5uYShic210ZXhwb3N1cmUpICYgdG90YWxic210c2YgPiAwLCAiTm8iLCBic210ZXhwb3N1cmUpLAogICAgICAgICBic210ZXhwb3N1cmUgPSBpZmVsc2UoaXMubmEoYnNtdGV4cG9zdXJlKSwgIk5CIiwgYnNtdGV4cG9zdXJlKSwKICAgICAgICAgYnNtdGNvbmQ9IGlmZWxzZShpcy5uYShic210Y29uZCkgJiB0b3RhbGJzbXRzZiA+IDAsICBic210cXVhbCwgYnNtdGNvbmQpLAogICAgICAgICBic210Y29uZD0gaWZlbHNlKGlzLm5hKGJzbXRjb25kKSwgICJOQiIsIGJzbXRjb25kKSwKICAgICAgICAgYnNtdHF1YWwgPSBpZmVsc2UoaXMubmEoYnNtdHF1YWwpJiB0b3RhbGJzbXRzZiA+IDAsICJUQSIsIGJzbXRxdWFsKSwKICAgICAgICAgYnNtdHF1YWwgPSBpZmVsc2UoaXMubmEoYnNtdHF1YWwpLCAiTkIiLCBic210cXVhbCksCiAgICAgICAgIGJzbXRmaW50eXBlMiA9IGlmZWxzZShpcy5uYShic210ZmludHlwZTIpJiB0b3RhbGJzbXRzZiA+IDAsIGJzbXRmaW50eXBlMSwgYnNtdGZpbnR5cGUyKSwKICAgICAgICAgYnNtdGZpbnR5cGUyID0gaWZlbHNlKGlzLm5hKGJzbXRmaW50eXBlMiksICJOQiIsIGJzbXRmaW50eXBlMiksCiAgICAgICAgIGJzbXRmaW50eXBlMSA9IGlmZWxzZShpcy5uYShic210ZmludHlwZTEpLCAiTkIiLCBic210ZmludHlwZTEpLAogICAgICAgICBic210Zmluc2YxID0gaWZlbHNlKGlzLm5hKGJzbXRmaW5zZjEpLCAwLCBic210Zmluc2YxKSwKICAgICAgICAgYnNtdHVuZnNmID0gaWZlbHNlKGlzLm5hKGJzbXR1bmZzZiksIDAsIGJzbXR1bmZzZiksCiAgICAgICAgIGJzbXRmaW5zZjIgPSBpZmVsc2UoaXMubmEoYnNtdGZpbnNmMiksIDAsIGJzbXRmaW5zZjIpLAogICAgICAgICB0b3RhbGJzbXRzZiA9IGlmZWxzZShpcy5uYSh0b3RhbGJzbXRzZiksIDAsIHRvdGFsYnNtdHNmKSwKICAgICAgICAgYnNtdGZ1bGxiYXRoID0gaWZlbHNlKGlzLm5hKGJzbXRmdWxsYmF0aCksIDAsIGJzbXRmdWxsYmF0aCksCiAgICAgICAgIGJzbXRoYWxmYmF0aCA9IGlmZWxzZShpcy5uYShic210aGFsZmJhdGgpLCAwLCBic210aGFsZmJhdGgpKQpgYGAKIyMjIE1hc29ucnkKCk9uZSBvZiB0aGUgbWlzc2luZyB2YWx1ZXMgZm9yIG1hc3ZucnR5cGUgc2VlbXMgdG8gaGF2ZSBhbiBhcmVhIGdyZWF0ZXIgdGhhbiB6ZXJvLiBXZSBjYW4gZmlsbCB0aGlzIG1pc3NpbmcgdmFsdWUgd2l0aCB0aGUgZm91bmRhdGlvbiB2YWx1ZS4gRm9yIHRoZSByZXN0IG9mIHRoZSBtaXNzaW5nIHZhbHVlcywgd2Ugd2lsbCBmaWxsIHRoZW0gd2l0aCAibm9uZSIgYW5kIDAuCgpgYGB7cn0KYWxsX2RhdGEgJT4lIAogIGZpbHRlcihpcy5uYShtYXN2bnJ0eXBlKSwgbWFzdm5yYXJlYSA+IDApCmBgYAoKYGBge3J9CmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShtYXN2bnJ0eXBlID0gaWZlbHNlKGlzLm5hKG1hc3ZucnR5cGUpICYgbWFzdm5yYXJlYSA+IDAsIGZvdW5kYXRpb24sIG1hc3ZucnR5cGUpLAogICAgICAgICBtYXN2bnJ0eXBlID0gaWZlbHNlKGlzLm5hKG1hc3ZucnR5cGUpLCAiTm9uZSIsIG1hc3ZucnR5cGUpLAogICAgICAgICBtYXN2bnJhcmVhID0gaWZlbHNlKGlzLm5hKG1hc3ZucmFyZWEpLCAwLCBtYXN2bnJhcmVhKSkKYGBgCiMjIyBNc3pvbmluZyAgICAKCldlIHdpbGwgZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgd2l0aCB0aGUgbW9zdCBjb21tb24gdmFsdWUsICJSTCIgaW4gdGhpcyBjYXNlLgpgYGB7cn0Kdmlld19ncm91cF9kYXRhKGFsbF9kYXRhLCBtc3pvbmluZykKYGBgCgpgYGB7cn0KI3VwZGF0ZWQgbWlzc2luZyB2YWx1ZSB3aXRoICJSTCIKYWxsX2RhdGEgPC0gYWxsX2RhdGEgJT4lCiAgbXV0YXRlKG1zem9uaW5nID0gaWZlbHNlKGlzLm5hKG1zem9uaW5nKSwgIlJMIiwgbXN6b25pbmcpKQpgYGAKCiMjIyBVdGlsaXRpZXMKCldlIHdpbGwgZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgd2l0aCB0aGUgbW9zdCBjb21tb24gdmFsdWUsICJBbGxQdWIiIGluIHRoaXMgY2FzZS4KYGBge3J9CnZpZXdfZ3JvdXBfZGF0YShhbGxfZGF0YSwgdXRpbGl0aWVzKQpgYGAKYGBge3J9CiN1cGRhdGVkIG1pc3NpbmcgdmFsdWUgd2l0aCAiQWxsUHViIgphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUodXRpbGl0aWVzID0gaWZlbHNlKGlzLm5hKHV0aWxpdGllcyksICJBbGxQdWIiLCB1dGlsaXRpZXMpKQpgYGAKIyMjIEZ1bmN0aW9uYWwKCkFzIHBlciB0aGUgZGF0YSBkZXNjcmlwdGlvbiwgd2Ugd2lsbCBhc3N1bWUgIlR5cCIgKFR5cGljYWwpIGZvciBtaXNzaW5nIHZhbHVlczoKCmBgYHtyfQphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUoZnVuY3Rpb25hbCA9IGlmZWxzZShpcy5uYShmdW5jdGlvbmFsKSwgIlR5cCIsIGZ1bmN0aW9uYWwpKQpgYGAKCiMjIyBFeHRlcmlvciBjb3ZlcmluZwoKV2Ugd2lsbCBmaWxsIHRoZSBtaXNzaW5nIHZhbHVlcyB3aXRoIHRoZSBtb3N0IGNvbW1vbiB2YWx1ZSwgIlZpbnlsU2QiIGluIHRoaXMgY2FzZS4KCmBgYHtyfQp2aWV3X2dyb3VwX2RhdGEoYWxsX2RhdGEsIGV4dGVyaW9yMm5kKQpgYGAKYGBge3J9CiN1cGRhdGVkIG1pc3NpbmcgdmFsdWVzIHdpdGggIlZpbnlsU2QiCmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShleHRlcmlvcjFzdCA9IGlmZWxzZShpcy5uYShleHRlcmlvcjFzdCksICJWaW55bFNkIiwgZXh0ZXJpb3Ixc3QpLAogICAgICAgICBleHRlcmlvcjJuZCA9IGlmZWxzZShpcy5uYShleHRlcmlvcjJuZCksICJWaW55bFNkIiwgZXh0ZXJpb3IybmQpKQpgYGAKIyMjIGVsZWN0cmljYWwgCgpUaGlzIG11c3QgYmUgYW4gZXJyb3IsIHNvIHdlIHdpbGwgdXNlIHRoZSBtb3N0IGNvbW1vbiB2YWx1ZSBhcyB3ZWxsLCAiU0Jya3IiOgoKYGBge3J9CnZpZXdfZ3JvdXBfZGF0YShhbGxfZGF0YSwgZWxlY3RyaWNhbCkKYGBgCmBgYHtyfQojdXBkYXRlZCBtaXNzaW5nIHZhbHVlIHdpdGggIlNCcmtyIgphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUoZWxlY3RyaWNhbCA9IGlmZWxzZShpcy5uYShlbGVjdHJpY2FsKSwgIlNCcmtyIiwgZWxlY3RyaWNhbCkpCmBgYAojIyMga2l0Y2hlbnF1YWwKCldlIHdpbGwgZmlsbCB0aGUgbWlzc2luZyB2YWx1ZXMgd2l0aCB0aGUgbW9zdCBjb21tb24gdmFsdWUsICJUQSIgaW4gdGhpcyBjYXNlLgpgYGB7cn0Kdmlld19ncm91cF9kYXRhKGFsbF9kYXRhLCBraXRjaGVucXVhbCkKYGBgCmBgYHtyfQojdXBkYXRlZCBtaXNzaW5nIHZhbHVlIHdpdGggIlRBIgphbGxfZGF0YSA8LSBhbGxfZGF0YSAlPiUKICBtdXRhdGUoa2l0Y2hlbnF1YWwgPSBpZmVsc2UoaXMubmEoa2l0Y2hlbnF1YWwpLCAiVEEiLCBraXRjaGVucXVhbCkpCmBgYAoKIyMjIHNhbGV0eXBlIAoKV2Ugd2lsbCBmaWxsIHRoZSBtaXNzaW5nIHZhbHVlcyB3aXRoIHRoZSBtb3N0IGNvbW1vbiB2YWx1ZSwgIldEIiBpbiB0aGlzIGNhc2UuCmBgYHtyfQp2aWV3X2dyb3VwX2RhdGEoYWxsX2RhdGEsIHNhbGV0eXBlKQpgYGAKYGBge3J9CiN1cGRhdGVkIG1pc3NpbmcgdmFsdWUgd2l0aCAiV0QiCmFsbF9kYXRhIDwtIGFsbF9kYXRhICU+JQogIG11dGF0ZShzYWxldHlwZSA9IGlmZWxzZShpcy5uYShzYWxldHlwZSksICJXRCIsIHNhbGV0eXBlKSkKYGBgCgoK