library(h2o)
localH2O = h2o.init()
## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /tmp/Rtmp9vKuTg/h2o_rinor_started_from_r.out
##     /tmp/Rtmp9vKuTg/h2o_rinor_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: .. Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 450 milliseconds 
##     H2O cluster version:        3.8.3.3 
##     H2O cluster name:           H2O_started_from_R_rinor_myf984 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.47 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     R Version:                  R version 3.2.2 (2015-08-14) 
## 
## Note:  As started, H2O is limited to the CRAN default of 2 CPUs.
##        Shut down and restart H2O as shown below to use all your CPUs.
##            > h2o.shutdown()
##            > h2o.init(nthreads = -1)
demo(h2o.glm)
## 
## 
##  demo(h2o.glm)
##  ---- ~~~~~~~
## 
## > # This is a demo of H2O's GLM function
## > # It imports a data set, parses it, and prints a summary
## > # Then, it runs GLM with a binomial link function using 10-fold cross-validation
## > # Note: This demo runs H2O on localhost:54321
## > library(h2o)
## 
## > h2o.init()
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 713 milliseconds 
##     H2O cluster version:        3.8.3.3 
##     H2O cluster name:           H2O_started_from_R_rinor_myf984 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.47 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     R Version:                  R version 3.2.2 (2015-08-14) 
## 
## 
## > prostate.hex = h2o.uploadFile(path = system.file("extdata", "prostate.csv", package="h2o"), destination_frame = "prostate.hex")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
## > summary(prostate.hex)
## Warning in summary.H2OFrame(prostate.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
##  ID               CAPSULE          AGE             RACE           
##  Min.   :  1.00   Min.   :0.0000   Min.   :43.00   Min.   :0.000  
##  1st Qu.: 95.75   1st Qu.:0.0000   1st Qu.:62.00   1st Qu.:1.000  
##  Median :190.50   Median :0.0000   Median :67.00   Median :1.000  
##  Mean   :190.50   Mean   :0.4026   Mean   :66.04   Mean   :1.087  
##  3rd Qu.:285.25   3rd Qu.:1.0000   3rd Qu.:71.00   3rd Qu.:1.000  
##  Max.   :380.00   Max.   :1.0000   Max.   :79.00   Max.   :2.000  
##  DPROS           DCAPS           PSA               VOL            
##  Min.   :1.000   Min.   :1.000   Min.   :  0.300   Min.   : 0.00  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:  4.900   1st Qu.: 0.00  
##  Median :2.000   Median :1.000   Median :  8.664   Median :14.20  
##  Mean   :2.271   Mean   :1.108   Mean   : 15.409   Mean   :15.81  
##  3rd Qu.:3.000   3rd Qu.:1.000   3rd Qu.: 17.063   3rd Qu.:26.40  
##  Max.   :4.000   Max.   :2.000   Max.   :139.700   Max.   :97.60  
##  GLEASON        
##  Min.   :0.000  
##  1st Qu.:6.000  
##  Median :6.000  
##  Mean   :6.384  
##  3rd Qu.:7.000  
##  Max.   :9.000  
## 
## > prostate.glm = h2o.glm(x = c("AGE","RACE","PSA","DCAPS"), y = "CAPSULE", training_frame = prostate.hex, family = "binomial", alpha = 0.5)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=================================================================| 100%
## 
## > print(prostate.glm)
## Model Details:
## ==============
## 
## H2OBinomialModel: glm
## Model ID:  GLM_model_R_1468420523821_1 
## GLM Model: summary
##     family  link                                regularization
## 1 binomial logit Elastic Net (alpha = 0.5, lambda = 3.247E-4 )
##   number_of_predictors_total number_of_active_predictors
## 1                          4                           4
##   number_of_iterations training_frame
## 1                    4   prostate.hex
## 
## Coefficients: glm coefficients
##       names coefficients standardized_coefficients
## 1 Intercept    -1.112897                 -0.337684
## 2       AGE    -0.010981                 -0.071671
## 3      RACE    -0.624682                 -0.192885
## 4     DCAPS     1.314730                  0.408429
## 5       PSA     0.046903                  0.937948
## 
## H2OBinomialMetrics: glm
## ** Reported on training data. **
## 
## MSE:  0.2027051
## R^2:  0.1572193
## LogLoss:  0.5914632
## Mean Per-Class Error:  0.3826121
## AUC:  0.7175722
## Gini:  0.4351444
## Null Deviance:  512.2888
## Residual Deviance:  449.5121
## AIC:  459.5121
## 
## Confusion Matrix for F1-optimal threshold:
##         0   1    Error      Rate
## 0      80 147 0.647577  =147/227
## 1      18 135 0.117647   =18/153
## Totals 98 282 0.434211  =165/380
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold    value idx
## 1                       max f1  0.284048 0.620690 274
## 2                       max f2  0.206863 0.778230 360
## 3                 max f0point5  0.413304 0.636672 108
## 4                 max accuracy  0.413304 0.705263 108
## 5                max precision  0.998481 1.000000   0
## 6                   max recall  0.206863 1.000000 360
## 7              max specificity  0.998481 1.000000   0
## 8             max absolute_MCC  0.413304 0.369123 108
## 9   max min_per_class_accuracy  0.331823 0.647577 176
## 10 max mean_per_class_accuracy  0.372893 0.672123 126
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## 
## 
## 
## > myLabels = c(prostate.glm@model$x, "Intercept")
## 
## > plot(prostate.glm@model$coefficients, xaxt = "n", xlab = "Coefficients", ylab = "Values")

## 
## > axis(1, at = 1:length(myLabels), labels = myLabels)
## 
## > abline(h = 0, col = 2, lty = 2)
## 
## > title("Coefficients from Logistic Regression\n of Prostate Cancer Data")
## 
## > barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")

demo(h2o.gbm)
## 
## 
##  demo(h2o.gbm)
##  ---- ~~~~~~~
## 
## > # This is a demo of H2O's GBM function
## > # It imports a data set, parses it, and prints a summary
## > # Then, it runs GBM on a subset of the dataset
## > # Note: This demo runs H2O on localhost:54321
## > library(h2o)
## 
## > h2o.init()
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         7 seconds 642 milliseconds 
##     H2O cluster version:        3.8.3.3 
##     H2O cluster name:           H2O_started_from_R_rinor_myf984 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.46 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     R Version:                  R version 3.2.2 (2015-08-14) 
## 
## 
## > prostate.hex = h2o.uploadFile(path = system.file("extdata", "prostate.csv", package="h2o"), destination_frame = "prostate.hex")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
## > summary(prostate.hex)
## Warning in summary.H2OFrame(prostate.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
##  ID               CAPSULE          AGE             RACE           
##  Min.   :  1.00   Min.   :0.0000   Min.   :43.00   Min.   :0.000  
##  1st Qu.: 95.75   1st Qu.:0.0000   1st Qu.:62.00   1st Qu.:1.000  
##  Median :190.50   Median :0.0000   Median :67.00   Median :1.000  
##  Mean   :190.50   Mean   :0.4026   Mean   :66.04   Mean   :1.087  
##  3rd Qu.:285.25   3rd Qu.:1.0000   3rd Qu.:71.00   3rd Qu.:1.000  
##  Max.   :380.00   Max.   :1.0000   Max.   :79.00   Max.   :2.000  
##  DPROS           DCAPS           PSA               VOL            
##  Min.   :1.000   Min.   :1.000   Min.   :  0.300   Min.   : 0.00  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:  4.900   1st Qu.: 0.00  
##  Median :2.000   Median :1.000   Median :  8.664   Median :14.20  
##  Mean   :2.271   Mean   :1.108   Mean   : 15.409   Mean   :15.81  
##  3rd Qu.:3.000   3rd Qu.:1.000   3rd Qu.: 17.063   3rd Qu.:26.40  
##  Max.   :4.000   Max.   :2.000   Max.   :139.700   Max.   :97.60  
##  GLEASON        
##  Min.   :0.000  
##  1st Qu.:6.000  
##  Median :6.000  
##  Mean   :6.384  
##  3rd Qu.:7.000  
##  Max.   :9.000  
## 
## > prostate.gbm = h2o.gbm(x = setdiff(colnames(prostate.hex), "CAPSULE"), y = "CAPSULE", training_frame = prostate.hex, ntrees = 10, max_depth = 5, learn_rate = 0.1)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
## > print(prostate.gbm)
## Model Details:
## ==============
## 
## H2ORegressionModel: gbm
## Model ID:  GBM_model_R_1468420523821_3 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              10                       10                3311         5
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1         5    5.00000         17         24    21.40000
## 
## 
## H2ORegressionMetrics: gbm
## ** Reported on training data. **
## 
## MSE:  0.1358996
## R2 :  0.4349746
## Mean Residual Deviance :  0.1358996
## 
## 
## 
## 
## 
## > prostate.gbm2 = h2o.gbm(x = c("AGE", "RACE", "PSA", "VOL", "GLEASON"), y = "CAPSULE", training_frame = prostate.hex, ntrees = 10, max_depth = 8, min_rows = 10, learn_rate = 0.2)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
## > print(prostate.gbm2)
## Model Details:
## ==============
## 
## H2ORegressionModel: gbm
## Model ID:  GBM_model_R_1468420523821_4 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              10                       10                3984         6
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1         8    7.50000         19         30    26.60000
## 
## 
## H2ORegressionMetrics: gbm
## ** Reported on training data. **
## 
## MSE:  0.1051889
## R2 :  0.5626592
## Mean Residual Deviance :  0.1051889
## 
## 
## 
## 
## 
## > # This is a demo of H2O's GBM use of default parameters on iris dataset (three classes)
## > iris.hex = h2o.uploadFile(path = system.file("extdata", "iris.csv", package="h2o"), destination_frame = "iris.hex")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
## > summary(iris.hex)
## Warning in summary.H2OFrame(iris.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
##  C1              C2              C3              C4              
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.1000  
##  1st Qu.:5.099   1st Qu.:2.799   1st Qu.:1.596   1st Qu.:0.2992  
##  Median :5.798   Median :2.998   Median :4.348   Median :1.3000  
##  Mean   :5.843   Mean   :3.054   Mean   :3.759   Mean   :1.1987  
##  3rd Qu.:6.399   3rd Qu.:3.298   3rd Qu.:5.095   3rd Qu.:1.7992  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.5000  
##  C5                 
##  Iris-setosa    :50 
##  Iris-versicolor:50 
##  Iris-virginica :50 
##                     
##                     
##                     
## 
## > iris.gbm = h2o.gbm(x = 1:4, y = 5, training_frame = iris.hex)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |=================================================================| 100%
## 
## > print(iris.gbm)
## Model Details:
## ==============
## 
## H2OMultinomialModel: gbm
## Model ID:  GBM_model_R_1468420523821_5 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              50                      150               28325         1
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1         5    4.92000          2         12    10.07333
## 
## 
## H2OMultinomialMetrics: gbm
## ** Reported on training data. **
## 
## Training Set Metrics: 
## =====================
## 
## Extract training frame with `h2o.getFrame("iris.hex")`
## MSE: (Extract with `h2o.mse`) 0.00283639
## R^2: (Extract with `h2o.r2`) 0.9957454
## Logloss: (Extract with `h2o.logloss`) 0.01881246
## Mean Per-Class Error: 0
## Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
## =========================================================================
## Confusion Matrix: vertical: actual; across: predicted
##                 Iris-setosa Iris-versicolor Iris-virginica  Error
## Iris-setosa              50               0              0 0.0000
## Iris-versicolor           0              50              0 0.0000
## Iris-virginica            0               0             50 0.0000
## Totals                   50              50             50 0.0000
##                      Rate
## Iris-setosa     =  0 / 50
## Iris-versicolor =  0 / 50
## Iris-virginica  =  0 / 50
## Totals          = 0 / 150
## 
## Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
## =======================================================================
## Top-3 Hit Ratios: 
##   k hit_ratio
## 1 1  1.000000
## 2 2  1.000000
## 3 3  1.000000