library(h2o)
h2o.init(nthreads=-1,max_mem_size = "16G" )

H2O is not running yet, starting it now...

Note:  In case of errors look at the following log files:
    C:\Users\r631758\AppData\Local\Temp\1\RtmpQTFvmG/h2o_r631758_started_from_r.out
    C:\Users\r631758\AppData\Local\Temp\1\RtmpQTFvmG/h2o_r631758_started_from_r.err

java version "1.8.0_144"
Java(TM) SE Runtime Environment (build 1.8.0_144-b01)
Java HotSpot(TM) 64-Bit Server VM (build 25.144-b01, mixed mode)

Starting H2O JVM and connecting: . Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         1 seconds 986 milliseconds 
    H2O cluster version:        3.14.0.3 
    H2O cluster version age:    10 days  
    H2O cluster name:           H2O_started_from_R_r631758_qhp284 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   14.22 GB 
    H2O cluster total cores:    8 
    H2O cluster allowed cores:  8 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Connection proxy:       NA 
    H2O Internal Security:      FALSE 
    H2O API Extensions:         Algos, AutoML, Core V3, Core V4 
    R Version:                  R version 3.4.2 (2017-09-28) 
h2o.removeAll()
[1] 0

load the forest cover type file

forest.h2o<-h2o.importFile("https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/covtype/covtype.full.csv")

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |===============================================================        |  89%
  |                                                                             
  |=======================================================================| 100%
forest.R<-as.data.frame(forest.h2o)

split data

splits<-h2o.splitFrame(forest.h2o,c(0.6,0.2), seed=1234)
train<-h2o.assign(splits[[1]],"train.hex")
valid<-h2o.assign(splits[[2]],"valid.hex")
test<-h2o.assign(splits[[3]],"test.hex")
train[1:5,]
  Elevation Aspect Slope Horizontal_Distance_To_Hydrology
1      3136     32    20                              450
2      3217     80    13                               30
3      3119    293    13                               30
4      2679     48     7                              150
5      3261    322    13                               30
  Vertical_Distance_To_Hydrology Horizontal_Distance_To_Roadways Hillshade_9am
1                            -38                            1290           211
2                              1                            3901           237
3                             10                            4810           182
4                             24                            1588           223
5                              5                            5701           186
  Hillshade_Noon Hillshade_3pm Horizontal_Distance_To_Fire_Points
1            193           111                               1112
2            217           109                               2859
3            237           194                               1200
4            224           136                               6265
5            226           180                                769
  Wilderness_Area Soil_Type Cover_Type
1          area_0   type_28    class_1
2          area_0   type_22    class_7
3          area_0   type_21    class_1
4          area_0   type_11    class_2
5          area_0   type_21    class_1

[5 rows x 13 columns] 

run random forest

starttime<-Sys.time()
model.rf1<-h2o.randomForest(x=1:12, y=13, training_frame = train,validation_frame = valid, model_id="rf_covType_v1", ntree=2000, stopping_rounds =2, score_each_iteration = T, seed=1000000 )

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |                                                                       |   1%
  |                                                                             
  |=                                                                      |   1%
  |                                                                             
  |=======================================================================| 100%
rf1_time<-Sys.time()-starttime
print(paste("Took", round(rf1_time, digits=2), units(rf1_time), "to build random forest model."))
[1] "Took 1.01 mins to build random forest model."
summary(model.rf1)
Model Details:
==============

H2OMultinomialModel: drf
Model Key:  rf_covType_v1 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              24                      168            11813083        17
  max_depth mean_depth min_leaves max_leaves mean_leaves
1        20   19.95238        527      16103  5504.44630

H2OMultinomialMetrics: drf
** Reported on training data. **
** Metrics reported on Out-Of-Bag training samples **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.05607684
RMSE: (Extract with `h2o.rmse`) 0.2368055
Logloss: (Extract with `h2o.logloss`) 0.238433
Mean Per-Class Error: 0.1110219
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1  117176    9534       5       0      53      11     338 0.0782
class_2    5414  164066     321       3     240     244      50 0.0368
class_3      32     413   20370      93      22     512       0 0.0500
class_4       0      32     178    1390       0      58       0 0.1616
class_5      93    1386      63       0    4161      17       0 0.2726
class_6      38     368     739      42       7    9239       0 0.1144
class_7     709      70       0       0       2       0   11519 0.0635
Totals   123462  175869   21676    1528    4485   10081   11907 0.0604
                      Rate
class_1 =  9,941 / 127,117
class_2 =  6,272 / 170,338
class_3 =   1,072 / 21,442
class_4 =      268 / 1,658
class_5 =    1,559 / 5,720
class_6 =   1,194 / 10,433
class_7 =     781 / 12,300
Totals  = 21,087 / 349,008

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.939580
2 2  0.996241
3 3  0.998201
4 4  0.998252
5 5  0.998252
6 6  0.998252
7 7  1.000000


H2OMultinomialMetrics: drf
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.05314141
RMSE: (Extract with `h2o.rmse`) 0.2305242
Logloss: (Extract with `h2o.logloss`) 0.2003041
Mean Per-Class Error: 0.1025131
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   39403    2998       0       0      15       2      82 0.0729
class_2    1589   54529     104       0      83      60      15 0.0328
class_3       0     131    6844      30       3     135       0 0.0419
class_4       1       1      61     479       0      20       0 0.1477
class_5      29     432      24       0    1377       8       0 0.2636
class_6       0     129     212      19       3    3101       0 0.1048
class_7     204      16       0       0       1       0    3878 0.0539
Totals    41226   58236    7245     528    1482    3326    3975 0.0552
                     Rate
class_1 =  3,097 / 42,500
class_2 =  1,851 / 56,380
class_3 =     299 / 7,143
class_4 =        83 / 562
class_5 =     493 / 1,870
class_6 =     363 / 3,464
class_7 =     221 / 4,099
Totals  = 6,407 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.944776
2 2  0.997845
3 3  0.999681
4 4  0.999784
5 5  0.999793
6 6  0.999793
7 7  1.000000




Scoring History: 
            timestamp   duration number_of_trees training_rmse training_logloss
1 2017-10-03 11:44:55  0.022 sec               0                               
2 2017-10-03 11:44:57  2.319 sec               1       0.33564          2.49674
3 2017-10-03 11:44:59  4.109 sec               2       0.31925          2.16527
4 2017-10-03 11:45:00  5.308 sec               3       0.30618          1.81911
5 2017-10-03 11:45:02  6.728 sec               4       0.29535          1.50134
  training_classification_error validation_rmse validation_logloss
1                                                                 
2                       0.12395         0.33424            2.44479
3                       0.11354         0.26638            0.79153
4                       0.10557         0.25065            0.46030
5                       0.09925         0.24492            0.34187
  validation_classification_error
1                                
2                         0.12729
3                         0.08501
4                         0.07233
5                         0.06740

---
             timestamp   duration number_of_trees training_rmse training_logloss
20 2017-10-03 11:45:33 38.323 sec              19       0.23949          0.27068
21 2017-10-03 11:45:36 41.281 sec              20       0.23857          0.26135
22 2017-10-03 11:45:39 44.355 sec              21       0.23803          0.25378
23 2017-10-03 11:45:43 47.683 sec              22       0.23737          0.24814
24 2017-10-03 11:45:46 51.026 sec              23       0.23720          0.24367
25 2017-10-03 11:45:50 54.591 sec              24       0.23681          0.23843
   training_classification_error validation_rmse validation_logloss
20                       0.06279         0.23107            0.20225
21                       0.06201         0.23071            0.20166
22                       0.06156         0.23063            0.20089
23                       0.06116         0.23034            0.20031
24                       0.06090         0.23053            0.20089
25                       0.06042         0.23052            0.20030
   validation_classification_error
20                         0.05619
21                         0.05609
22                         0.05566
23                         0.05545
24                         0.05535
25                         0.05522

Variable Importances: (Extract with `h2o.varimp`) 
=================================================

Variable Importances: 
                             variable relative_importance scaled_importance
1                           Soil_Type       784024.125000          1.000000
2                           Elevation       738730.625000          0.942229
3     Horizontal_Distance_To_Roadways       327317.500000          0.417484
4  Horizontal_Distance_To_Fire_Points       317907.875000          0.405482
5                     Wilderness_Area       178298.437500          0.227414
6    Horizontal_Distance_To_Hydrology       159165.671875          0.203011
7      Vertical_Distance_To_Hydrology       134396.140625          0.171418
8                              Aspect       106050.000000          0.135264
9                      Hillshade_Noon        99759.429688          0.127240
10                      Hillshade_9am        92490.742188          0.117969
11                      Hillshade_3pm        87322.453125          0.111377
12                              Slope        80532.898438          0.102717
   percentage
1    0.252423
2    0.237840
3    0.105382
4    0.102353
5    0.057405
6    0.051245
7    0.043270
8    0.034144
9    0.032118
10   0.029778
11   0.028114
12   0.025928
model.rf1@model$validation_metrics
H2OMultinomialMetrics: drf
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.05314141
RMSE: (Extract with `h2o.rmse`) 0.2305242
Logloss: (Extract with `h2o.logloss`) 0.2003041
Mean Per-Class Error: 0.1025131
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   39403    2998       0       0      15       2      82 0.0729
class_2    1589   54529     104       0      83      60      15 0.0328
class_3       0     131    6844      30       3     135       0 0.0419
class_4       1       1      61     479       0      20       0 0.1477
class_5      29     432      24       0    1377       8       0 0.2636
class_6       0     129     212      19       3    3101       0 0.1048
class_7     204      16       0       0       1       0    3878 0.0539
Totals    41226   58236    7245     528    1482    3326    3975 0.0552
                     Rate
class_1 =  3,097 / 42,500
class_2 =  1,851 / 56,380
class_3 =     299 / 7,143
class_4 =        83 / 562
class_5 =     493 / 1,870
class_6 =     363 / 3,464
class_7 =     221 / 4,099
Totals  = 6,407 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.944776
2 2  0.997845
3 3  0.999681
4 4  0.999784
5 5  0.999793
6 6  0.999793
7 7  1.000000
h2o.hit_ratio_table(model.rf1,valid=T)[1,2]
[1] 0.9447758

try GBM

starttime<-Sys.time()
model.gbm1<-h2o.gbm(x=1:12,y=13, training_frame = train, validation_frame = valid,model_id="gbm_covType1", seed=2000000)

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |=                                                                      |   2%
  |                                                                             
  |===                                                                    |   4%
  |                                                                             
  |======                                                                 |   8%
  |                                                                             
  |=======                                                                |  10%
  |                                                                             
  |==========                                                             |  14%
  |                                                                             
  |=============                                                          |  18%
  |                                                                             
  |================                                                       |  22%
  |                                                                             
  |=================                                                      |  24%
  |                                                                             
  |====================                                                   |  28%
  |                                                                             
  |=======================                                                |  32%
  |                                                                             
  |==========================                                             |  36%
  |                                                                             
  |==============================                                         |  42%
  |                                                                             
  |=================================                                      |  46%
  |                                                                             
  |==================================                                     |  48%
  |                                                                             
  |=====================================                                  |  52%
  |                                                                             
  |========================================                               |  56%
  |                                                                             
  |===========================================                            |  60%
  |                                                                             
  |=============================================                          |  64%
  |                                                                             
  |================================================                       |  68%
  |                                                                             
  |===================================================                    |  72%
  |                                                                             
  |======================================================                 |  76%
  |                                                                             
  |=========================================================              |  80%
  |                                                                             
  |============================================================           |  84%
  |                                                                             
  |==============================================================         |  88%
  |                                                                             
  |=================================================================      |  92%
  |                                                                             
  |===================================================================    |  94%
  |                                                                             
  |====================================================================== |  98%
  |                                                                             
  |=======================================================================| 100%
gbm1_time<-Sys.time()-starttime
print(paste("Took", round(gbm1_time, digits=2), units(gbm1_time), "to build GBM model."))
[1] "Took 39.24 secs to build GBM model."
model.gbm1
Model Details:
==============

H2OMultinomialModel: gbm
Model ID:  gbm_covType1 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              50                      350              173814         5
  max_depth mean_depth min_leaves max_leaves mean_leaves
1         5    5.00000         22         32    31.14000


H2OMultinomialMetrics: gbm
** Reported on training data. **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.1435669
RMSE: (Extract with `h2o.rmse`) 0.3789023
Logloss: (Extract with `h2o.logloss`) 0.4560168
Mean Per-Class Error: 0.2697606
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   99691   26454      19       0      35      21     900 0.2158
class_2   21964  146151    1049       0     241     873      64 0.1420
class_3       0    1517   18811     111       1    1002       0 0.1227
class_4       0       0     224    1400       0      34       0 0.1556
class_5      45    3689     184       0    1787      15       0 0.6876
class_6      18    1392    2243      37       4    6739       0 0.3541
class_7    2502      88       0       0       0       0    9710 0.2106
Totals   124220  179291   22530    1548    2068    8684   10674 0.1855
                      Rate
class_1 = 27,429 / 127,120
class_2 = 24,191 / 170,342
class_3 =   2,631 / 21,442
class_4 =      258 / 1,658
class_5 =    3,933 / 5,720
class_6 =   3,694 / 10,433
class_7 =   2,590 / 12,300
Totals  = 64,726 / 349,015

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.814547
2 2  0.984293
3 3  0.998123
4 4  0.999734
5 5  0.999994
6 6  1.000000
7 7  1.000000


H2OMultinomialMetrics: gbm
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.1469814
RMSE: (Extract with `h2o.rmse`) 0.3833815
Logloss: (Extract with `h2o.logloss`) 0.4652781
Mean Per-Class Error: 0.281651
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   33108    9074       0       0       5      10     303 0.2210
class_2    7333   48242     373       0      98     301      33 0.1443
class_3       1     514    6193      51       0     384       0 0.1330
class_4       0       0      90     452       0      20       0 0.1957
class_5      12    1196      59       0     598       5       0 0.6802
class_6       4     506     747      21       0    2186       0 0.3689
class_7     902      34       0       0       0       0    3163 0.2283
Totals    41360   59566    7462     524     701    2906    3499 0.1903
                      Rate
class_1 =   9,392 / 42,500
class_2 =   8,138 / 56,380
class_3 =      950 / 7,143
class_4 =        110 / 562
class_5 =    1,272 / 1,870
class_6 =    1,278 / 3,464
class_7 =      936 / 4,099
Totals  = 22,076 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.809719
2 2  0.983244
3 3  0.998061
4 4  0.999698
5 5  0.999983
6 6  0.999991
7 7  1.000000

Overall accuracy.

This default GBM is much worse than our original random forest.

The GBM is far from converging, so there are three primary knobs to adjust

to get our performance up if we want to keep a similar run time.

1: Adding trees will help. The default is 50.

2: Increasing the learning rate will also help. The contribution of each

tree will be stronger, so the model will move further away from the

overall mean.

3: Increasing the depth will help. This is the parameter that is the least

straightforward. Tuning trees and learning rate both have direct impact

that is easy to understand. Changing the depth means you are adjusting

the “weakness” of each learner. Adding depth makes each tree fit the data

closer.

The first configuration will attack depth the most, since we’ve seen the

random forest focus on a continuous variable (elevation) and 40-class factor

(soil type) the most.

Also we will take a look at how to review a model while it is running.

starttime<-Sys.time()
model.gbm2<-h2o.gbm(x=1:12, y=13, training_frame = train, validation_frame = valid, ntrees=20, learn_rate=0.2, max_depth = 10, stopping_rounds = 2, stopping_tolerance = 0.01, score_each_iteration = T, model_id="gbm_covType2", seed=2000000)

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |====                                                                   |   5%
  |                                                                             
  |=======                                                                |  10%
  |                                                                             
  |===========                                                            |  15%
  |                                                                             
  |==============                                                         |  20%
  |                                                                             
  |==================                                                     |  25%
  |                                                                             
  |=====================                                                  |  30%
  |                                                                             
  |=========================                                              |  35%
  |                                                                             
  |============================                                           |  40%
  |                                                                             
  |================================                                       |  45%
  |                                                                             
  |====================================                                   |  50%
  |                                                                             
  |=======================================                                |  55%
  |                                                                             
  |===========================================                            |  60%
  |                                                                             
  |==============================================                         |  65%
  |                                                                             
  |==================================================                     |  70%
  |                                                                             
  |=====================================================                  |  75%
  |                                                                             
  |=========================================================              |  80%
  |                                                                             
  |============================================================           |  85%
  |                                                                             
  |================================================================       |  90%
  |                                                                             
  |===================================================================    |  95%
  |                                                                             
  |=======================================================================| 100%
gbm2_time=Sys.time()-starttime
print(paste("Took", round(gbm2_time, digits=2), units(gbm2_time), "to build GBM model."))
[1] "Took 48.66 secs to build GBM model."
model.gbm2
Model Details:
==============

H2OMultinomialModel: gbm
Model ID:  gbm_covType2 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              20                      140             1097498        10
  max_depth mean_depth min_leaves max_leaves mean_leaves
1        10   10.00000        167        855   600.65717


H2OMultinomialMetrics: gbm
** Reported on training data. **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.05351069
RMSE: (Extract with `h2o.rmse`) 0.2313238
Logloss: (Extract with `h2o.logloss`) 0.1961796
Mean Per-Class Error: 0.05628537
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1  115685   11234       1       0      24       2     174 0.0900
class_2    7733  162200     139       0     171      75      24 0.0478
class_3       0     185   20945       9       4     299       0 0.0232
class_4       0       0      15    1639       0       4       0 0.0115
class_5      26     790      42       0    4850      12       0 0.1521
class_6       1     129     319       6       3    9975       0 0.0439
class_7     294      21       0       0       0       0   11985 0.0256
Totals   123739  174559   21461    1654    5052   10367   12183 0.0623
                      Rate
class_1 = 11,435 / 127,120
class_2 =  8,142 / 170,342
class_3 =     497 / 21,442
class_4 =       19 / 1,658
class_5 =      870 / 5,720
class_6 =     458 / 10,433
class_7 =     315 / 12,300
Totals  = 21,736 / 349,015

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.937722
2 2  0.999123
3 3  0.999971
4 4  0.999997
5 5  1.000000
6 6  1.000000
7 7  1.000000


H2OMultinomialMetrics: gbm
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.06612896
RMSE: (Extract with `h2o.rmse`) 0.2571555
Logloss: (Extract with `h2o.logloss`) 0.2308381
Mean Per-Class Error: 0.1138421
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   37964    4406       0       0      20       2     108 0.1067
class_2    3080   52961     103       0     132      76      28 0.0606
class_3       1     152    6739      36       5     210       0 0.0566
class_4       0       0      56     482       0      24       0 0.1423
class_5      32     418      31       0    1382       7       0 0.2610
class_6       4     131     240      18       2    3069       0 0.1140
class_7     210      17       0       0       1       0    3871 0.0556
Totals    41291   58085    7169     536    1542    3388    4007 0.0823
                     Rate
class_1 =  4,536 / 42,500
class_2 =  3,419 / 56,380
class_3 =     404 / 7,143
class_4 =        80 / 562
class_5 =     488 / 1,870
class_6 =     395 / 3,464
class_7 =     228 / 4,099
Totals  = 9,550 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.917685
2 2  0.996932
3 3  0.999853
4 4  0.999983
5 5  1.000000
6 6  1.000000
7 7  1.000000

While this is running, we can actually look at the model.

To do this we simply need a new connection to H2O.

This R console will run the model, so we need either another R console

or the web browser (or python, etc.).

In the demo, we will use Flow in our web browser

And the focus will be to look at model performance, since we are using R to

control H2O. So we can simply type in:

getModel “gbm_covType2”

review both gbm models

summary(model.gbm2)
Model Details:
==============

H2OMultinomialModel: gbm
Model Key:  gbm_covType2 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              20                      140             1097498        10
  max_depth mean_depth min_leaves max_leaves mean_leaves
1        10   10.00000        167        855   600.65717

H2OMultinomialMetrics: gbm
** Reported on training data. **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.05351069
RMSE: (Extract with `h2o.rmse`) 0.2313238
Logloss: (Extract with `h2o.logloss`) 0.1961796
Mean Per-Class Error: 0.05628537
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1  115685   11234       1       0      24       2     174 0.0900
class_2    7733  162200     139       0     171      75      24 0.0478
class_3       0     185   20945       9       4     299       0 0.0232
class_4       0       0      15    1639       0       4       0 0.0115
class_5      26     790      42       0    4850      12       0 0.1521
class_6       1     129     319       6       3    9975       0 0.0439
class_7     294      21       0       0       0       0   11985 0.0256
Totals   123739  174559   21461    1654    5052   10367   12183 0.0623
                      Rate
class_1 = 11,435 / 127,120
class_2 =  8,142 / 170,342
class_3 =     497 / 21,442
class_4 =       19 / 1,658
class_5 =      870 / 5,720
class_6 =     458 / 10,433
class_7 =     315 / 12,300
Totals  = 21,736 / 349,015

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.937722
2 2  0.999123
3 3  0.999971
4 4  0.999997
5 5  1.000000
6 6  1.000000
7 7  1.000000


H2OMultinomialMetrics: gbm
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.06612896
RMSE: (Extract with `h2o.rmse`) 0.2571555
Logloss: (Extract with `h2o.logloss`) 0.2308381
Mean Per-Class Error: 0.1138421
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   37964    4406       0       0      20       2     108 0.1067
class_2    3080   52961     103       0     132      76      28 0.0606
class_3       1     152    6739      36       5     210       0 0.0566
class_4       0       0      56     482       0      24       0 0.1423
class_5      32     418      31       0    1382       7       0 0.2610
class_6       4     131     240      18       2    3069       0 0.1140
class_7     210      17       0       0       1       0    3871 0.0556
Totals    41291   58085    7169     536    1542    3388    4007 0.0823
                     Rate
class_1 =  4,536 / 42,500
class_2 =  3,419 / 56,380
class_3 =     404 / 7,143
class_4 =        80 / 562
class_5 =     488 / 1,870
class_6 =     395 / 3,464
class_7 =     228 / 4,099
Totals  = 9,550 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.917685
2 2  0.996932
3 3  0.999853
4 4  0.999983
5 5  1.000000
6 6  1.000000
7 7  1.000000




Scoring History: 
            timestamp   duration number_of_trees training_rmse training_logloss
1 2017-10-03 11:46:36  0.004 sec               0       0.85714          1.94591
2 2017-10-03 11:46:38  1.259 sec               1       0.69862          1.22691
3 2017-10-03 11:46:39  2.797 sec               2       0.59806          0.94045
4 2017-10-03 11:46:41  4.413 sec               3       0.52151          0.75984
5 2017-10-03 11:46:42  6.093 sec               4       0.46291          0.63428
  training_classification_error validation_rmse validation_logloss
1                       0.62247         0.85714            1.94591
2                       0.16357         0.70052            1.23468
3                       0.15296         0.60170            0.95177
4                       0.14244         0.52703            0.77435
5                       0.13394         0.47023            0.65121
  validation_classification_error
1                         0.62143
2                         0.17170
3                         0.16229
4                         0.15360
5                         0.14518

---
             timestamp   duration number_of_trees training_rmse training_logloss
16 2017-10-03 11:47:07 30.949 sec              15       0.25733          0.23749
17 2017-10-03 11:47:10 33.739 sec              16       0.25153          0.22779
18 2017-10-03 11:47:13 36.592 sec              17       0.24541          0.21805
19 2017-10-03 11:47:16 39.570 sec              18       0.24007          0.20959
20 2017-10-03 11:47:19 42.651 sec              19       0.23585          0.20293
21 2017-10-03 11:47:22 45.830 sec              20       0.23132          0.19618
   training_classification_error validation_rmse validation_logloss
16                       0.07602         0.27834            0.26820
17                       0.07288         0.27344            0.25927
18                       0.06961         0.26838            0.25031
19                       0.06670         0.26407            0.24273
20                       0.06478         0.26075            0.23687
21                       0.06228         0.25716            0.23084
   validation_classification_error
16                         0.09342
17                         0.09074
18                         0.08821
19                         0.08584
20                         0.08427
21                         0.08231

Variable Importances: (Extract with `h2o.varimp`) 
=================================================

Variable Importances: 
                             variable relative_importance scaled_importance
1                           Elevation       103639.062500          1.000000
2                           Soil_Type        96176.960938          0.927999
3     Horizontal_Distance_To_Roadways        40708.468750          0.392791
4  Horizontal_Distance_To_Fire_Points        39770.984375          0.383745
5    Horizontal_Distance_To_Hydrology        14269.908203          0.137689
6                     Wilderness_Area        10816.267578          0.104365
7      Vertical_Distance_To_Hydrology         9111.434570          0.087915
8                      Hillshade_Noon         9095.528320          0.087762
9                              Aspect         6482.574707          0.062550
10                      Hillshade_9am         6397.168945          0.061725
11                      Hillshade_3pm         4135.834961          0.039906
12                              Slope         3477.522461          0.033554
   percentage
1    0.301205
2    0.279518
3    0.118310
4    0.115586
5    0.041472
6    0.031435
7    0.026480
8    0.026434
9    0.018840
10   0.018592
11   0.012020
12   0.010107
h2o.hit_ratio_table(model.gbm1,valid = T)[1,2]    ## review the first model's accuracy
[1] 0.8097192
h2o.hit_ratio_table(model.gbm2,valid = T)[1,2]    ## review the new model's accuracy
[1] 0.9176852

try gbm again

This has moved us in the right direction, but still lower accuracy

than the random forest.

And it still has not converged, so we can make it more aggressive.

We can now add the stochastic nature of random forest into the GBM

using some of the new H2O settings. This will help generalize

and also provide a quicker runtime, so we can add a few more trees.

model.gbm3 <- h2o.gbm(
  training_frame = train,     ##
  validation_frame = valid,   ##
  x=1:12,                     ##
  y=13,                       ## 
  ntrees = 30,                ## add a few trees (from 20, though default is 50)
  learn_rate = 0.3,           ## increase the learning rate even further
  max_depth = 10,             ## 
  sample_rate = 0.7,          ## use a random 70% of the rows to fit each tree
  col_sample_rate = 0.7,       ## use 70% of the columns to fit each tree
  stopping_rounds = 2,        ## 
  stopping_tolerance = 0.01,  ##
  score_each_iteration = T,   ##
  model_id = "gbm_covType3",  ##
  seed = 2000000)             ##

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |==                                                                     |   3%
  |                                                                             
  |=====                                                                  |   7%
  |                                                                             
  |=======                                                                |  10%
  |                                                                             
  |=========                                                              |  13%
  |                                                                             
  |============                                                           |  17%
  |                                                                             
  |==============                                                         |  20%
  |                                                                             
  |=================                                                      |  23%
  |                                                                             
  |===================                                                    |  27%
  |                                                                             
  |=====================                                                  |  30%
  |                                                                             
  |========================                                               |  33%
  |                                                                             
  |==========================                                             |  37%
  |                                                                             
  |============================                                           |  40%
  |                                                                             
  |===============================                                        |  43%
  |                                                                             
  |=================================                                      |  47%
  |                                                                             
  |====================================                                   |  50%
  |                                                                             
  |======================================                                 |  53%
  |                                                                             
  |========================================                               |  57%
  |                                                                             
  |===========================================                            |  60%
  |                                                                             
  |=============================================                          |  63%
  |                                                                             
  |===============================================                        |  67%
  |                                                                             
  |==================================================                     |  70%
  |                                                                             
  |====================================================                   |  73%
  |                                                                             
  |======================================================                 |  77%
  |                                                                             
  |=========================================================              |  80%
  |                                                                             
  |===========================================================            |  83%
  |                                                                             
  |==============================================================         |  87%
  |                                                                             
  |================================================================       |  90%
  |                                                                             
  |==================================================================     |  93%
  |                                                                             
  |=====================================================================  |  97%
  |                                                                             
  |=======================================================================| 100%
summary(model.gbm3)
Model Details:
==============

H2OMultinomialModel: gbm
Model Key:  gbm_covType3 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              30                      210             1375695        10
  max_depth mean_depth min_leaves max_leaves mean_leaves
1        10   10.00000        160        800   496.31906

H2OMultinomialMetrics: gbm
** Reported on training data. **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.02767722
RMSE: (Extract with `h2o.rmse`) 0.1663647
Logloss: (Extract with `h2o.logloss`) 0.1113528
Mean Per-Class Error: 0.02172058
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1  121209    5849       2       0      19       0      41 0.0465
class_2    3779  166343      77       0      86      45      12 0.0235
class_3       0      98   21292       0       3      49       0 0.0070
class_4       0       0       0    1658       0       0       0 0.0000
class_5      15     276      11       0    5411       7       0 0.0540
class_6       2      60      54       0       1   10316       0 0.0112
class_7     113       8       0       0       0       0   12179 0.0098
Totals   125118  172634   21436    1658    5520   10417   12232 0.0304
                      Rate
class_1 =  5,911 / 127,120
class_2 =  3,999 / 170,342
class_3 =     150 / 21,442
class_4 =        0 / 1,658
class_5 =      309 / 5,720
class_6 =     117 / 10,433
class_7 =     121 / 12,300
Totals  = 10,607 / 349,015

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.969609
2 2  0.999668
3 3  0.999991
4 4  0.999997
5 5  0.999997
6 6  0.999997
7 7  1.000000


H2OMultinomialMetrics: gbm
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.04525272
RMSE: (Extract with `h2o.rmse`) 0.2127269
Logloss: (Extract with `h2o.logloss`) 0.1619933
Mean Per-Class Error: 0.0903014
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   39524    2876       0       0      19       3      78 0.0700
class_2    2027   54023      87       0     149      71      23 0.0418
class_3       0     135    6823      33      10     142       0 0.0448
class_4       0       0      58     478       0      26       0 0.1495
class_5      25     294      24       0    1518       9       0 0.1882
class_6       4     121     179      19       3    3138       0 0.0941
class_7     168      10       0       0       1       0    3920 0.0437
Totals    41748   57459    7171     530    1700    3389    4021 0.0568
                     Rate
class_1 =  2,976 / 42,500
class_2 =  2,357 / 56,380
class_3 =     320 / 7,143
class_4 =        84 / 562
class_5 =     352 / 1,870
class_6 =     326 / 3,464
class_7 =     179 / 4,099
Totals  = 6,594 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.943164
2 2  0.997725
3 3  0.999828
4 4  0.999983
5 5  0.999983
6 6  0.999983
7 7  1.000000




Scoring History: 
            timestamp   duration number_of_trees training_rmse training_logloss
1 2017-10-03 11:47:26  0.002 sec               0       0.85714          1.94591
2 2017-10-03 11:47:27  1.037 sec               1       0.61321          0.99140
3 2017-10-03 11:47:29  2.088 sec               2       0.50243          0.72338
4 2017-10-03 11:47:30  3.288 sec               3       0.42976          0.56747
5 2017-10-03 11:47:31  4.611 sec               4       0.38287          0.47130
  training_classification_error validation_rmse validation_logloss
1                       0.62247         0.85714            1.94591
2                       0.17880         0.61639            1.00202
3                       0.15876         0.50813            0.73836
4                       0.14303         0.43776            0.58550
5                       0.13322         0.39288            0.49164
  validation_classification_error
1                         0.62143
2                         0.18642
3                         0.16820
4                         0.15305
5                         0.14418

---
             timestamp          duration number_of_trees training_rmse
26 2017-10-03 11:48:20        53.104 sec              25       0.18490
27 2017-10-03 11:48:23        56.417 sec              26       0.18087
28 2017-10-03 11:48:28  1 min  1.677 sec              27       0.17751
29 2017-10-03 11:48:32  1 min  5.293 sec              28       0.17327
30 2017-10-03 11:48:35  1 min  8.927 sec              29       0.16997
31 2017-10-03 11:48:39  1 min 12.702 sec              30       0.16636
   training_logloss training_classification_error validation_rmse
26          0.13258                       0.03876         0.22431
27          0.12787                       0.03690         0.22160
28          0.12390                       0.03520         0.21946
29          0.11914                       0.03347         0.21707
30          0.11546                       0.03186         0.21507
31          0.11135                       0.03039         0.21273
   validation_logloss validation_classification_error
26            0.17888                         0.06321
27            0.17497                         0.06184
28            0.17190                         0.06061
29            0.16824                         0.05934
30            0.16538                         0.05835
31            0.16199                         0.05684

Variable Importances: (Extract with `h2o.varimp`) 
=================================================

Variable Importances: 
                             variable relative_importance scaled_importance
1                           Elevation        82004.531250          1.000000
2                           Soil_Type        36520.035156          0.445342
3     Horizontal_Distance_To_Roadways        19911.322266          0.242808
4  Horizontal_Distance_To_Fire_Points        19259.638672          0.234861
5    Horizontal_Distance_To_Hydrology         8144.814941          0.099322
6                     Wilderness_Area         5873.890137          0.071629
7      Vertical_Distance_To_Hydrology         5433.041504          0.066253
8                      Hillshade_Noon         4849.979980          0.059143
9                              Aspect         4088.666748          0.049859
10                      Hillshade_9am         3875.101807          0.047255
11                      Hillshade_3pm         2898.975830          0.035351
12                              Slope         2387.373047          0.029113
   percentage
1    0.420003
2    0.187045
3    0.101980
4    0.098642
5    0.041715
6    0.030084
7    0.027826
8    0.024840
9    0.020941
10   0.019847
11   0.014848
12   0.012227
h2o.hit_ratio_table(model.rf1,valid = T)[1,2]     ## review the random forest accuracy
[1] 0.9447758
h2o.hit_ratio_table(model.gbm1,valid = T)[1,2]    ## review the first model's accuracy
[1] 0.8097192
h2o.hit_ratio_table(model.gbm2,valid = T)[1,2]    ## review the second model's accuracy
[1] 0.9176852
h2o.hit_ratio_table(model.gbm3,valid = T)[1,2]    ## review the newest model's accuracy
[1] 0.943164

tuning random forest

starttime<-Sys.time()
model.rf2<-h2o.randomForest(x=1:12, y=13, training_frame = train, validation_frame = valid, model_id="rf_covType2", ntrees=2000, max_depth=30, stopping_rounds=2, stopping_tolerance = 1e-2, score_each_iteration=T, seed=3000000)

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |                                                                       |   1%
  |                                                                             
  |=                                                                      |   1%
  |                                                                             
  |=======================================================================| 100%
rf2_time=Sys.time()-starttime
print(paste("Took", round(rf2_time, digits=2), units(rf2_time), "to build random forest model."))
[1] "Took 1.04 mins to build random forest model."
summary(model.rf2)
Model Details:
==============

H2OMultinomialModel: drf
Model Key:  rf_covType2 
Model Summary: 
  number_of_trees number_of_internal_trees model_size_in_bytes min_depth
1              22                      154            16197330        18
  max_depth mean_depth min_leaves max_leaves mean_leaves
1        30   28.03896        538      25147  8261.27900

H2OMultinomialMetrics: drf
** Reported on training data. **
** Metrics reported on Out-Of-Bag training samples **

Training Set Metrics: 
=====================

Extract training frame with `h2o.getFrame("train.hex")`
MSE: (Extract with `h2o.mse`) 0.04415075
RMSE: (Extract with `h2o.rmse`) 0.2101208
Logloss: (Extract with `h2o.logloss`) 0.2736372
Mean Per-Class Error: 0.1031423
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1  119338    7439       3       0      43       9     284 0.0612
class_2    4032  165551     275       1     255     176      42 0.0281
class_3     170     450   20333      81      22     385       0 0.0517
class_4       3      71     176    1363       0      45       0 0.1779
class_5     312     887      51       0    4448      22       0 0.2224
class_6     168     452     605      41      10    9157       0 0.1223
class_7     645      72       0       0       2       0   11581 0.0585
Totals   124668  174922   21443    1486    4780    9794   11907 0.0494
                      Rate
class_1 =  7,778 / 127,116
class_2 =  4,781 / 170,332
class_3 =   1,108 / 21,441
class_4 =      295 / 1,658
class_5 =    1,272 / 5,720
class_6 =   1,276 / 10,433
class_7 =     719 / 12,300
Totals  = 17,229 / 349,000

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.950633
2 2  0.991731
3 3  0.992776
4 4  0.992814
5 5  0.992814
6 6  0.992814
7 7  1.000000


H2OMultinomialMetrics: drf
** Reported on validation data. **

Validation Set Metrics: 
=====================

Extract validation frame with `h2o.getFrame("valid.hex")`
MSE: (Extract with `h2o.mse`) 0.03989236
RMSE: (Extract with `h2o.rmse`) 0.1997307
Logloss: (Extract with `h2o.logloss`) 0.1646399
Mean Per-Class Error: 0.08603375
Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,valid = TRUE)`)
=========================================================================
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
        class_1 class_2 class_3 class_4 class_5 class_6 class_7  Error
class_1   40394    2013       0       0      13       1      79 0.0496
class_2    1252   54865      88       0      98      56      21 0.0269
class_3      18     114    6856      34       4     117       0 0.0402
class_4       2       5      56     480       0      19       0 0.1459
class_5      42     292      25       0    1499      12       0 0.1984
class_6      17      93     196      19       4    3135       0 0.0950
class_7     169      20       0       0       1       0    3909 0.0464
Totals    41894   57402    7221     533    1619    3340    4009 0.0421
                     Rate
class_1 =  2,106 / 42,500
class_2 =  1,515 / 56,380
class_3 =     287 / 7,143
class_4 =        82 / 562
class_5 =     371 / 1,870
class_6 =     329 / 3,464
class_7 =     190 / 4,099
Totals  = 4,880 / 116,018

Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,valid = TRUE)`
=======================================================================
Top-7 Hit Ratios: 
  k hit_ratio
1 1  0.957938
2 2  0.997130
3 3  0.998492
4 4  0.998526
5 5  0.998535
6 6  0.998535
7 7  1.000000




Scoring History: 
            timestamp   duration number_of_trees training_rmse training_logloss
1 2017-10-03 11:48:44  0.001 sec               0                               
2 2017-10-03 11:48:45  1.441 sec               1       0.36401          3.95096
3 2017-10-03 11:48:47  3.017 sec               2       0.34539          3.39327
4 2017-10-03 11:48:48  4.657 sec               3       0.32529          2.85289
5 2017-10-03 11:48:50  6.432 sec               4       0.30838          2.41212
  training_classification_error validation_rmse validation_logloss
1                                                                 
2                       0.12640         0.36551            3.97857
3                       0.11751         0.26355            1.34657
4                       0.10701         0.23660            0.75357
5                       0.09891         0.22490            0.51062
  validation_classification_error
1                                
2                         0.13315
3                         0.08370
4                         0.06657
5                         0.05871

---
             timestamp   duration number_of_trees training_rmse training_logloss
18 2017-10-03 11:49:22 38.629 sec              17       0.21713          0.37665
19 2017-10-03 11:49:25 41.916 sec              18       0.21542          0.34832
20 2017-10-03 11:49:29 45.436 sec              19       0.21375          0.32388
21 2017-10-03 11:49:33 48.950 sec              20       0.21253          0.30567
22 2017-10-03 11:49:36 52.585 sec              21       0.21116          0.28735
23 2017-10-03 11:49:40 56.335 sec              22       0.21012          0.27364
   training_classification_error validation_rmse validation_logloss
18                       0.05379         0.20175            0.17190
19                       0.05266         0.20127            0.16925
20                       0.05163         0.20059            0.16632
21                       0.05091         0.20031            0.16575
22                       0.05011         0.19980            0.16480
23                       0.04937         0.19973            0.16464
   validation_classification_error
18                         0.04305
19                         0.04287
20                         0.04252
21                         0.04254
22                         0.04215
23                         0.04206

Variable Importances: (Extract with `h2o.varimp`) 
=================================================

Variable Importances: 
                             variable relative_importance scaled_importance
1                           Elevation       807548.937500          1.000000
2                           Soil_Type       679147.000000          0.840998
3     Horizontal_Distance_To_Roadways       364836.187500          0.451782
4  Horizontal_Distance_To_Fire_Points       343292.375000          0.425104
5    Horizontal_Distance_To_Hydrology       179493.984375          0.222270
6      Vertical_Distance_To_Hydrology       158505.203125          0.196279
7                     Wilderness_Area       141778.484375          0.175566
8                              Aspect       123543.703125          0.152986
9                      Hillshade_Noon       116247.953125          0.143952
10                      Hillshade_9am       110365.007812          0.136667
11                      Hillshade_3pm       104382.250000          0.129258
12                              Slope        93387.914062          0.115644
   percentage
1    0.250595
2    0.210750
3    0.113214
4    0.106529
5    0.055700
6    0.049187
7    0.043996
8    0.038337
9    0.036074
10   0.034248
11   0.032391
12   0.028980
h2o.hit_ratio_table(model.gbm3,valid = T)[1,2]    ## review the newest GBM accuracy
[1] 0.943164
h2o.hit_ratio_table(model.rf1,valid = T)[1,2]     ## original random forest accuracy
[1] 0.9447758
h2o.hit_ratio_table(model.rf2,valid = T)[1,2]     ## newest random forest accuracy
[1] 0.9579375

So we now have our accuracy up beyond 95%.

We have witheld an extra test set to ensure that after all the parameter

tuning we have done, repeatedly applied to the validation data, that our

model produces similar results against the third data set.

Create predictions using latest RF model against the test set

finalRf_predictions<-h2o.predict(object=model.rf2, newdata=test)

  |                                                                             
  |                                                                       |   0%
  |                                                                             
  |=======================================================================| 100%

Glance at what that prediction set looks like

We see a final prediction in the “predict” column,

and then the predicted probabilities per class.

finalRf_predictions
  predict   class_1   class_2 class_3 class_4   class_5 class_6 class_7
1 class_2 0.3000000 0.7000000       0       0 0.0000000       0       0
2 class_1 1.0000000 0.0000000       0       0 0.0000000       0       0
3 class_1 0.7777778 0.2222222       0       0 0.0000000       0       0
4 class_1 0.8450704 0.1549296       0       0 0.0000000       0       0
5 class_2 0.2052980 0.7947020       0       0 0.0000000       0       0
6 class_5 0.0000000 0.3333333       0       0 0.6666667       0       0

[115979 rows x 8 columns] 

Compare these predictions to the accuracy we got from our experimentation

h2o.hit_ratio_table(model.rf2, valid=T)[1,2] ## validation set accuracy
[1] 0.9579375
mean(finalRf_predictions$predict==test$Cover_Type) ## test set accuracy
[1] 0.9579752

We have very similar error rates on both sets, so it would not seem

that we have overfit the validation set through our experimentation.

This concludes the demo, but what might we try next, if we were to continue?

We could further experiment with deeper trees or a higher percentage of

columns used (mtries).

Also we could experiment with the nbins and nbins_cats settings to control

the H2O splitting.

The general guidance is to lower the number to increase generalization

(avoid overfitting), increase to better fit the distribution.

A good example of adjusting this value is for nbins_cats to be increased

to match the number of values in a category. Though usually unnecessary,

if a problem has a very important categorical predictor, this can

improve performance.

Also, we can tune our GBM more and surely get better performance.

The GBM will converge a little slower for optimal accuracy, so if we

were to relax our runtime requirements a little bit, we could balance

the learn rate and number of trees used.

In a production setting where fine-grain accuracy is beneficial, it is

common to set the learn rate to a very small number, such as 0.01 or less,

and add trees to match. Use of early stopping is very powerful to allow

the setting of a low learning rate and then building as many trees as

needed until the desired convergence is met.

As with random forest, we can also adjust nbins and nbins_cats.

All done, shutdown H2O

h2o.shutdown(prompt=FALSE)
[1] TRUE
LS0tDQp0aXRsZTogIkdCTSAmIHJhb25kb20gZm9yc3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoaDJvKQ0KaDJvLmluaXQobnRocmVhZHM9LTEsbWF4X21lbV9zaXplID0gIjE2RyIgKQ0KaDJvLnJlbW92ZUFsbCgpDQpgYGANCg0KI2xvYWQgdGhlIGZvcmVzdCBjb3ZlciB0eXBlIGZpbGUNCmBgYHtyfQ0KZm9yZXN0LmgybzwtaDJvLmltcG9ydEZpbGUoImh0dHBzOi8vczMuYW1hem9uYXdzLmNvbS9oMm8tcHVibGljLXRlc3QtZGF0YS9iaWdkYXRhL2xhcHRvcC9jb3Z0eXBlL2NvdnR5cGUuZnVsbC5jc3YiKQ0KZm9yZXN0LlI8LWFzLmRhdGEuZnJhbWUoZm9yZXN0LmgybykNCmBgYA0KDQojc3BsaXQgZGF0YQ0KYGBge3J9DQpzcGxpdHM8LWgyby5zcGxpdEZyYW1lKGZvcmVzdC5oMm8sYygwLjYsMC4yKSwgc2VlZD0xMjM0KQ0KdHJhaW48LWgyby5hc3NpZ24oc3BsaXRzW1sxXV0sInRyYWluLmhleCIpDQp2YWxpZDwtaDJvLmFzc2lnbihzcGxpdHNbWzJdXSwidmFsaWQuaGV4IikNCnRlc3Q8LWgyby5hc3NpZ24oc3BsaXRzW1szXV0sInRlc3QuaGV4IikNCnRyYWluWzE6NSxdDQpgYGANCg0KI3J1biByYW5kb20gZm9yZXN0DQpgYGB7cn0NCnN0YXJ0dGltZTwtU3lzLnRpbWUoKQ0KbW9kZWwucmYxPC1oMm8ucmFuZG9tRm9yZXN0KHg9MToxMiwgeT0xMywgdHJhaW5pbmdfZnJhbWUgPSB0cmFpbix2YWxpZGF0aW9uX2ZyYW1lID0gdmFsaWQsIG1vZGVsX2lkPSJyZl9jb3ZUeXBlX3YxIiwgbnRyZWU9MjAwMCwgc3RvcHBpbmdfcm91bmRzID0yLCBzY29yZV9lYWNoX2l0ZXJhdGlvbiA9IFQsIHNlZWQ9MTAwMDAwMCApDQpyZjFfdGltZTwtU3lzLnRpbWUoKS1zdGFydHRpbWUNCnByaW50KHBhc3RlKCJUb29rIiwgcm91bmQocmYxX3RpbWUsIGRpZ2l0cz0yKSwgdW5pdHMocmYxX3RpbWUpLCAidG8gYnVpbGQgcmFuZG9tIGZvcmVzdCBtb2RlbC4iKSkNCnN1bW1hcnkobW9kZWwucmYxKQ0KbW9kZWwucmYxQG1vZGVsJHZhbGlkYXRpb25fbWV0cmljcw0KaDJvLmhpdF9yYXRpb190YWJsZShtb2RlbC5yZjEsdmFsaWQ9VClbMSwyXQ0KYGBgDQoNCiN0cnkgR0JNDQpgYGB7cn0NCnN0YXJ0dGltZTwtU3lzLnRpbWUoKQ0KbW9kZWwuZ2JtMTwtaDJvLmdibSh4PTE6MTIseT0xMywgdHJhaW5pbmdfZnJhbWUgPSB0cmFpbiwgdmFsaWRhdGlvbl9mcmFtZSA9IHZhbGlkLG1vZGVsX2lkPSJnYm1fY292VHlwZTEiLCBzZWVkPTIwMDAwMDApDQpnYm0xX3RpbWU8LVN5cy50aW1lKCktc3RhcnR0aW1lDQpwcmludChwYXN0ZSgiVG9vayIsIHJvdW5kKGdibTFfdGltZSwgZGlnaXRzPTIpLCB1bml0cyhnYm0xX3RpbWUpLCAidG8gYnVpbGQgR0JNIG1vZGVsLiIpKQ0KbW9kZWwuZ2JtMQ0KDQpgYGANCg0KIyMgT3ZlcmFsbCBhY2N1cmFjeS4NCg0KIyMgVGhpcyBkZWZhdWx0IEdCTSBpcyBtdWNoIHdvcnNlIHRoYW4gb3VyIG9yaWdpbmFsIHJhbmRvbSBmb3Jlc3QuDQojIyBUaGUgR0JNIGlzIGZhciBmcm9tIGNvbnZlcmdpbmcsIHNvIHRoZXJlIGFyZSB0aHJlZSBwcmltYXJ5IGtub2JzIHRvIGFkanVzdA0KIyMgIHRvIGdldCBvdXIgcGVyZm9ybWFuY2UgdXAgaWYgd2Ugd2FudCB0byBrZWVwIGEgc2ltaWxhciBydW4gdGltZS4NCiMjIDE6IEFkZGluZyB0cmVlcyB3aWxsIGhlbHAuIFRoZSBkZWZhdWx0IGlzIDUwLg0KIyMgMjogSW5jcmVhc2luZyB0aGUgbGVhcm5pbmcgcmF0ZSB3aWxsIGFsc28gaGVscC4gVGhlIGNvbnRyaWJ1dGlvbiBvZiBlYWNoDQojIyAgdHJlZSB3aWxsIGJlIHN0cm9uZ2VyLCBzbyB0aGUgbW9kZWwgd2lsbCBtb3ZlIGZ1cnRoZXIgYXdheSBmcm9tIHRoZQ0KIyMgIG92ZXJhbGwgbWVhbi4NCiMjIDM6IEluY3JlYXNpbmcgdGhlIGRlcHRoIHdpbGwgaGVscC4gVGhpcyBpcyB0aGUgcGFyYW1ldGVyIHRoYXQgaXMgdGhlIGxlYXN0DQojIyAgc3RyYWlnaHRmb3J3YXJkLiBUdW5pbmcgdHJlZXMgYW5kIGxlYXJuaW5nIHJhdGUgYm90aCBoYXZlIGRpcmVjdCBpbXBhY3QNCiMjICB0aGF0IGlzIGVhc3kgdG8gdW5kZXJzdGFuZC4gQ2hhbmdpbmcgdGhlIGRlcHRoIG1lYW5zIHlvdSBhcmUgYWRqdXN0aW5nDQojIyAgdGhlICJ3ZWFrbmVzcyIgb2YgZWFjaCBsZWFybmVyLiBBZGRpbmcgZGVwdGggbWFrZXMgZWFjaCB0cmVlIGZpdCB0aGUgZGF0YQ0KIyMgIGNsb3Nlci4gDQoNCiMjIFRoZSBmaXJzdCBjb25maWd1cmF0aW9uIHdpbGwgYXR0YWNrIGRlcHRoIHRoZSBtb3N0LCBzaW5jZSB3ZSd2ZSBzZWVuIHRoZQ0KIyMgIHJhbmRvbSBmb3Jlc3QgZm9jdXMgb24gYSBjb250aW51b3VzIHZhcmlhYmxlIChlbGV2YXRpb24pIGFuZCA0MC1jbGFzcyBmYWN0b3INCiMjICAoc29pbCB0eXBlKSB0aGUgbW9zdC4NCg0KIyMgQWxzbyB3ZSB3aWxsIHRha2UgYSBsb29rIGF0IGhvdyB0byByZXZpZXcgYSBtb2RlbCB3aGlsZSBpdCBpcyBydW5uaW5nLg0KDQoNCmBgYHtyfQ0Kc3RhcnR0aW1lPC1TeXMudGltZSgpDQptb2RlbC5nYm0yPC1oMm8uZ2JtKHg9MToxMiwgeT0xMywgdHJhaW5pbmdfZnJhbWUgPSB0cmFpbiwgdmFsaWRhdGlvbl9mcmFtZSA9IHZhbGlkLCBudHJlZXM9MjAsIGxlYXJuX3JhdGU9MC4yLCBtYXhfZGVwdGggPSAxMCwgc3RvcHBpbmdfcm91bmRzID0gMiwgc3RvcHBpbmdfdG9sZXJhbmNlID0gMC4wMSwgc2NvcmVfZWFjaF9pdGVyYXRpb24gPSBULCBtb2RlbF9pZD0iZ2JtX2NvdlR5cGUyIiwgc2VlZD0yMDAwMDAwKQ0KZ2JtMl90aW1lPVN5cy50aW1lKCktc3RhcnR0aW1lDQpwcmludChwYXN0ZSgiVG9vayIsIHJvdW5kKGdibTJfdGltZSwgZGlnaXRzPTIpLCB1bml0cyhnYm0yX3RpbWUpLCAidG8gYnVpbGQgR0JNIG1vZGVsLiIpKQ0KbW9kZWwuZ2JtMg0KYGBgDQojIyMjIFdoaWxlIHRoaXMgaXMgcnVubmluZywgd2UgY2FuIGFjdHVhbGx5IGxvb2sgYXQgdGhlIG1vZGVsLg0KIyMjIyBUbyBkbyB0aGlzIHdlIHNpbXBseSBuZWVkIGEgbmV3IGNvbm5lY3Rpb24gdG8gSDJPLg0KIyMjIyBUaGlzIFIgY29uc29sZSB3aWxsIHJ1biB0aGUgbW9kZWwsIHNvIHdlIG5lZWQgZWl0aGVyIGFub3RoZXIgUiBjb25zb2xlDQojIyMjICAgb3IgdGhlIHdlYiBicm93c2VyIChvciBweXRob24sIGV0Yy4pLg0KIyMjIyBJbiB0aGUgZGVtbywgd2Ugd2lsbCB1c2UgRmxvdyBpbiBvdXIgd2ViIGJyb3dzZXINCiMjIyMgIGh0dHA6Ly9sb2NhbGhvc3Q6NTQzMjENCiMjIyMgQW5kIHRoZSBmb2N1cyB3aWxsIGJlIHRvIGxvb2sgYXQgbW9kZWwgcGVyZm9ybWFuY2UsIHNpbmNlIHdlIGFyZSB1c2luZyBSIHRvIA0KIyMjIyAgY29udHJvbCBIMk8uIFNvIHdlIGNhbiBzaW1wbHkgdHlwZSBpbjoNCiMjIyMgIGdldE1vZGVsICJnYm1fY292VHlwZTIiDQoNCiNyZXZpZXcgYm90aCBnYm0gbW9kZWxzDQpgYGB7cn0NCnN1bW1hcnkobW9kZWwuZ2JtMikNCmgyby5oaXRfcmF0aW9fdGFibGUobW9kZWwuZ2JtMSx2YWxpZCA9IFQpWzEsMl0gICAgIyMgcmV2aWV3IHRoZSBmaXJzdCBtb2RlbCdzIGFjY3VyYWN5DQpoMm8uaGl0X3JhdGlvX3RhYmxlKG1vZGVsLmdibTIsdmFsaWQgPSBUKVsxLDJdICAgICMjIHJldmlldyB0aGUgbmV3IG1vZGVsJ3MgYWNjdXJhY3kNCmBgYA0KDQojdHJ5IGdibSBhZ2Fpbg0KIyMgVGhpcyBoYXMgbW92ZWQgdXMgaW4gdGhlIHJpZ2h0IGRpcmVjdGlvbiwgYnV0IHN0aWxsIGxvd2VyIGFjY3VyYWN5IA0KIyMgIHRoYW4gdGhlIHJhbmRvbSBmb3Jlc3QuDQojIyBBbmQgaXQgc3RpbGwgaGFzIG5vdCBjb252ZXJnZWQsIHNvIHdlIGNhbiBtYWtlIGl0IG1vcmUgYWdncmVzc2l2ZS4NCiMjIFdlIGNhbiBub3cgYWRkIHRoZSBzdG9jaGFzdGljIG5hdHVyZSBvZiByYW5kb20gZm9yZXN0IGludG8gdGhlIEdCTQ0KIyMgIHVzaW5nIHNvbWUgb2YgdGhlIG5ldyBIMk8gc2V0dGluZ3MuIFRoaXMgd2lsbCBoZWxwIGdlbmVyYWxpemUgDQojIyAgYW5kIGFsc28gcHJvdmlkZSBhIHF1aWNrZXIgcnVudGltZSwgc28gd2UgY2FuIGFkZCBhIGZldyBtb3JlIHRyZWVzLg0KYGBge3J9DQptb2RlbC5nYm0zIDwtIGgyby5nYm0oDQogIHRyYWluaW5nX2ZyYW1lID0gdHJhaW4sICAgICAjIw0KICB2YWxpZGF0aW9uX2ZyYW1lID0gdmFsaWQsICAgIyMNCiAgeD0xOjEyLCAgICAgICAgICAgICAgICAgICAgICMjDQogIHk9MTMsICAgICAgICAgICAgICAgICAgICAgICAjIyANCiAgbnRyZWVzID0gMzAsICAgICAgICAgICAgICAgICMjIGFkZCBhIGZldyB0cmVlcyAoZnJvbSAyMCwgdGhvdWdoIGRlZmF1bHQgaXMgNTApDQogIGxlYXJuX3JhdGUgPSAwLjMsICAgICAgICAgICAjIyBpbmNyZWFzZSB0aGUgbGVhcm5pbmcgcmF0ZSBldmVuIGZ1cnRoZXINCiAgbWF4X2RlcHRoID0gMTAsICAgICAgICAgICAgICMjIA0KICBzYW1wbGVfcmF0ZSA9IDAuNywgICAgICAgICAgIyMgdXNlIGEgcmFuZG9tIDcwJSBvZiB0aGUgcm93cyB0byBmaXQgZWFjaCB0cmVlDQogIGNvbF9zYW1wbGVfcmF0ZSA9IDAuNywgICAgICAgIyMgdXNlIDcwJSBvZiB0aGUgY29sdW1ucyB0byBmaXQgZWFjaCB0cmVlDQogIHN0b3BwaW5nX3JvdW5kcyA9IDIsICAgICAgICAjIyANCiAgc3RvcHBpbmdfdG9sZXJhbmNlID0gMC4wMSwgICMjDQogIHNjb3JlX2VhY2hfaXRlcmF0aW9uID0gVCwgICAjIw0KICBtb2RlbF9pZCA9ICJnYm1fY292VHlwZTMiLCAgIyMNCiAgc2VlZCA9IDIwMDAwMDApICAgICAgICAgICAgICMjDQoNCnN1bW1hcnkobW9kZWwuZ2JtMykNCmgyby5oaXRfcmF0aW9fdGFibGUobW9kZWwucmYxLHZhbGlkID0gVClbMSwyXSAgICAgIyMgcmV2aWV3IHRoZSByYW5kb20gZm9yZXN0IGFjY3VyYWN5DQpoMm8uaGl0X3JhdGlvX3RhYmxlKG1vZGVsLmdibTEsdmFsaWQgPSBUKVsxLDJdICAgICMjIHJldmlldyB0aGUgZmlyc3QgbW9kZWwncyBhY2N1cmFjeQ0KaDJvLmhpdF9yYXRpb190YWJsZShtb2RlbC5nYm0yLHZhbGlkID0gVClbMSwyXSAgICAjIyByZXZpZXcgdGhlIHNlY29uZCBtb2RlbCdzIGFjY3VyYWN5DQpoMm8uaGl0X3JhdGlvX3RhYmxlKG1vZGVsLmdibTMsdmFsaWQgPSBUKVsxLDJdICAgICMjIHJldmlldyB0aGUgbmV3ZXN0IG1vZGVsJ3MgYWNjdXJhY3kNCmBgYA0KDQojdHVuaW5nIHJhbmRvbSBmb3Jlc3QgDQpgYGB7cn0NCnN0YXJ0dGltZTwtU3lzLnRpbWUoKQ0KbW9kZWwucmYyPC1oMm8ucmFuZG9tRm9yZXN0KHg9MToxMiwgeT0xMywgdHJhaW5pbmdfZnJhbWUgPSB0cmFpbiwgdmFsaWRhdGlvbl9mcmFtZSA9IHZhbGlkLCBtb2RlbF9pZD0icmZfY292VHlwZTIiLCBudHJlZXM9MjAwMCwgbWF4X2RlcHRoPTMwLCBzdG9wcGluZ19yb3VuZHM9Miwgc3RvcHBpbmdfdG9sZXJhbmNlID0gMWUtMiwgc2NvcmVfZWFjaF9pdGVyYXRpb249VCwgc2VlZD0zMDAwMDAwKQ0KcmYyX3RpbWU9U3lzLnRpbWUoKS1zdGFydHRpbWUNCnByaW50KHBhc3RlKCJUb29rIiwgcm91bmQocmYyX3RpbWUsIGRpZ2l0cz0yKSwgdW5pdHMocmYyX3RpbWUpLCAidG8gYnVpbGQgcmFuZG9tIGZvcmVzdCBtb2RlbC4iKSkNCnN1bW1hcnkobW9kZWwucmYyKQ0KaDJvLmhpdF9yYXRpb190YWJsZShtb2RlbC5nYm0zLHZhbGlkID0gVClbMSwyXSAgICAjIyByZXZpZXcgdGhlIG5ld2VzdCBHQk0gYWNjdXJhY3kNCmgyby5oaXRfcmF0aW9fdGFibGUobW9kZWwucmYxLHZhbGlkID0gVClbMSwyXSAgICAgIyMgb3JpZ2luYWwgcmFuZG9tIGZvcmVzdCBhY2N1cmFjeQ0KaDJvLmhpdF9yYXRpb190YWJsZShtb2RlbC5yZjIsdmFsaWQgPSBUKVsxLDJdICAgICAjIyBuZXdlc3QgcmFuZG9tIGZvcmVzdCBhY2N1cmFjeQ0KDQpgYGANCg0KDQojIyBTbyB3ZSBub3cgaGF2ZSBvdXIgYWNjdXJhY3kgdXAgYmV5b25kIDk1JS4gDQojIyBXZSBoYXZlIHdpdGhlbGQgYW4gZXh0cmEgdGVzdCBzZXQgdG8gZW5zdXJlIHRoYXQgYWZ0ZXIgYWxsIHRoZSBwYXJhbWV0ZXINCiMjICB0dW5pbmcgd2UgaGF2ZSBkb25lLCByZXBlYXRlZGx5IGFwcGxpZWQgdG8gdGhlIHZhbGlkYXRpb24gZGF0YSwgdGhhdCBvdXINCiMjICBtb2RlbCBwcm9kdWNlcyBzaW1pbGFyIHJlc3VsdHMgYWdhaW5zdCB0aGUgdGhpcmQgZGF0YSBzZXQuIA0KDQojIENyZWF0ZSBwcmVkaWN0aW9ucyB1c2luZyBsYXRlc3QgUkYgbW9kZWwgYWdhaW5zdCB0aGUgdGVzdCBzZXQNCmBgYHtyfQ0KZmluYWxSZl9wcmVkaWN0aW9uczwtaDJvLnByZWRpY3Qob2JqZWN0PW1vZGVsLnJmMiwgbmV3ZGF0YT10ZXN0KQ0KYGBgDQoNCiMjIEdsYW5jZSBhdCB3aGF0IHRoYXQgcHJlZGljdGlvbiBzZXQgbG9va3MgbGlrZQ0KIyMgV2Ugc2VlIGEgZmluYWwgcHJlZGljdGlvbiBpbiB0aGUgInByZWRpY3QiIGNvbHVtbiwNCiMjICBhbmQgdGhlbiB0aGUgcHJlZGljdGVkIHByb2JhYmlsaXRpZXMgcGVyIGNsYXNzLg0KYGBge3J9DQpmaW5hbFJmX3ByZWRpY3Rpb25zDQpgYGANCg0KIyMgQ29tcGFyZSB0aGVzZSBwcmVkaWN0aW9ucyB0byB0aGUgYWNjdXJhY3kgd2UgZ290IGZyb20gb3VyIGV4cGVyaW1lbnRhdGlvbg0KYGBge3J9DQpoMm8uaGl0X3JhdGlvX3RhYmxlKG1vZGVsLnJmMiwgdmFsaWQ9VClbMSwyXSAjIyB2YWxpZGF0aW9uIHNldCBhY2N1cmFjeQ0KbWVhbihmaW5hbFJmX3ByZWRpY3Rpb25zJHByZWRpY3Q9PXRlc3QkQ292ZXJfVHlwZSkgIyMgdGVzdCBzZXQgYWNjdXJhY3kNCmBgYA0KDQojIyBXZSBoYXZlIHZlcnkgc2ltaWxhciBlcnJvciByYXRlcyBvbiBib3RoIHNldHMsIHNvIGl0IHdvdWxkIG5vdCBzZWVtDQojIyAgdGhhdCB3ZSBoYXZlIG92ZXJmaXQgdGhlIHZhbGlkYXRpb24gc2V0IHRocm91Z2ggb3VyIGV4cGVyaW1lbnRhdGlvbi4NCg0KIyMgVGhpcyBjb25jbHVkZXMgdGhlIGRlbW8sIGJ1dCB3aGF0IG1pZ2h0IHdlIHRyeSBuZXh0LCBpZiB3ZSB3ZXJlIHRvIGNvbnRpbnVlPw0KIyMNCiMjIFdlIGNvdWxkIGZ1cnRoZXIgZXhwZXJpbWVudCB3aXRoIGRlZXBlciB0cmVlcyBvciBhIGhpZ2hlciBwZXJjZW50YWdlIG9mDQojIyAgY29sdW1ucyB1c2VkIChtdHJpZXMpLg0KIyMgQWxzbyB3ZSBjb3VsZCBleHBlcmltZW50IHdpdGggdGhlIG5iaW5zIGFuZCBuYmluc19jYXRzIHNldHRpbmdzIHRvIGNvbnRyb2wNCiMjICB0aGUgSDJPIHNwbGl0dGluZy4NCiMjIFRoZSBnZW5lcmFsIGd1aWRhbmNlIGlzIHRvIGxvd2VyIHRoZSBudW1iZXIgdG8gaW5jcmVhc2UgZ2VuZXJhbGl6YXRpb24NCiMjICAoYXZvaWQgb3ZlcmZpdHRpbmcpLCBpbmNyZWFzZSB0byBiZXR0ZXIgZml0IHRoZSBkaXN0cmlidXRpb24uDQojIyBBIGdvb2QgZXhhbXBsZSBvZiBhZGp1c3RpbmcgdGhpcyB2YWx1ZSBpcyBmb3IgbmJpbnNfY2F0cyB0byBiZSBpbmNyZWFzZWQNCiMjICB0byBtYXRjaCB0aGUgbnVtYmVyIG9mIHZhbHVlcyBpbiBhIGNhdGVnb3J5LiBUaG91Z2ggdXN1YWxseSB1bm5lY2Vzc2FyeSwNCiMjICBpZiBhIHByb2JsZW0gaGFzIGEgdmVyeSBpbXBvcnRhbnQgY2F0ZWdvcmljYWwgcHJlZGljdG9yLCB0aGlzIGNhbiANCiMjICBpbXByb3ZlIHBlcmZvcm1hbmNlLg0KIyMNCiMjIEFsc28sIHdlIGNhbiB0dW5lIG91ciBHQk0gbW9yZSBhbmQgc3VyZWx5IGdldCBiZXR0ZXIgcGVyZm9ybWFuY2UuDQojIyBUaGUgR0JNIHdpbGwgY29udmVyZ2UgYSBsaXR0bGUgc2xvd2VyIGZvciBvcHRpbWFsIGFjY3VyYWN5LCBzbyBpZiB3ZSANCiMjICB3ZXJlIHRvIHJlbGF4IG91ciBydW50aW1lIHJlcXVpcmVtZW50cyBhIGxpdHRsZSBiaXQsIHdlIGNvdWxkIGJhbGFuY2UNCiMjICB0aGUgbGVhcm4gcmF0ZSBhbmQgbnVtYmVyIG9mIHRyZWVzIHVzZWQuDQojIyBJbiBhIHByb2R1Y3Rpb24gc2V0dGluZyB3aGVyZSBmaW5lLWdyYWluIGFjY3VyYWN5IGlzIGJlbmVmaWNpYWwsIGl0IGlzIA0KIyMgIGNvbW1vbiB0byBzZXQgdGhlIGxlYXJuIHJhdGUgdG8gYSB2ZXJ5IHNtYWxsIG51bWJlciwgc3VjaCBhcyAwLjAxIG9yIGxlc3MsDQojIyAgYW5kIGFkZCB0cmVlcyB0byBtYXRjaC4gVXNlIG9mIGVhcmx5IHN0b3BwaW5nIGlzIHZlcnkgcG93ZXJmdWwgdG8gYWxsb3cgDQojIyAgdGhlIHNldHRpbmcgb2YgYSBsb3cgbGVhcm5pbmcgcmF0ZSBhbmQgdGhlbiBidWlsZGluZyBhcyBtYW55IHRyZWVzIGFzIA0KIyMgIG5lZWRlZCB1bnRpbCB0aGUgZGVzaXJlZCBjb252ZXJnZW5jZSBpcyBtZXQuDQojIyBBcyB3aXRoIHJhbmRvbSBmb3Jlc3QsIHdlIGNhbiBhbHNvIGFkanVzdCBuYmlucyBhbmQgbmJpbnNfY2F0cy4NCg0KDQojIyMgQWxsIGRvbmUsIHNodXRkb3duIEgyTyAgICANCmBgYHtyfQ0KaDJvLnNodXRkb3duKHByb21wdD1GQUxTRSkNCmBgYA0KDQo=