library(h2o)
localH2O = h2o.init()
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## /tmp/Rtmp9vKuTg/h2o_rinor_started_from_r.out
## /tmp/Rtmp9vKuTg/h2o_rinor_started_from_r.err
##
##
## Starting H2O JVM and connecting: .. Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 450 milliseconds
## H2O cluster version: 3.8.3.3
## H2O cluster name: H2O_started_from_R_rinor_myf984
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.47 GB
## H2O cluster total cores: 1
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.2.2 (2015-08-14)
##
## Note: As started, H2O is limited to the CRAN default of 2 CPUs.
## Shut down and restart H2O as shown below to use all your CPUs.
## > h2o.shutdown()
## > h2o.init(nthreads = -1)
demo(h2o.glm)
##
##
## demo(h2o.glm)
## ---- ~~~~~~~
##
## > # This is a demo of H2O's GLM function
## > # It imports a data set, parses it, and prints a summary
## > # Then, it runs GLM with a binomial link function using 10-fold cross-validation
## > # Note: This demo runs H2O on localhost:54321
## > library(h2o)
##
## > h2o.init()
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 713 milliseconds
## H2O cluster version: 3.8.3.3
## H2O cluster name: H2O_started_from_R_rinor_myf984
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.47 GB
## H2O cluster total cores: 1
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.2.2 (2015-08-14)
##
##
## > prostate.hex = h2o.uploadFile(path = system.file("extdata", "prostate.csv", package="h2o"), destination_frame = "prostate.hex")
##
|
| | 0%
|
|=================================================================| 100%
##
## > summary(prostate.hex)
## Warning in summary.H2OFrame(prostate.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
## ID CAPSULE AGE RACE
## Min. : 1.00 Min. :0.0000 Min. :43.00 Min. :0.000
## 1st Qu.: 95.75 1st Qu.:0.0000 1st Qu.:62.00 1st Qu.:1.000
## Median :190.50 Median :0.0000 Median :67.00 Median :1.000
## Mean :190.50 Mean :0.4026 Mean :66.04 Mean :1.087
## 3rd Qu.:285.25 3rd Qu.:1.0000 3rd Qu.:71.00 3rd Qu.:1.000
## Max. :380.00 Max. :1.0000 Max. :79.00 Max. :2.000
## DPROS DCAPS PSA VOL
## Min. :1.000 Min. :1.000 Min. : 0.300 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 4.900 1st Qu.: 0.00
## Median :2.000 Median :1.000 Median : 8.664 Median :14.20
## Mean :2.271 Mean :1.108 Mean : 15.409 Mean :15.81
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 17.063 3rd Qu.:26.40
## Max. :4.000 Max. :2.000 Max. :139.700 Max. :97.60
## GLEASON
## Min. :0.000
## 1st Qu.:6.000
## Median :6.000
## Mean :6.384
## 3rd Qu.:7.000
## Max. :9.000
##
## > prostate.glm = h2o.glm(x = c("AGE","RACE","PSA","DCAPS"), y = "CAPSULE", training_frame = prostate.hex, family = "binomial", alpha = 0.5)
##
|
| | 0%
|
|======== | 12%
|
|=================================================================| 100%
##
## > print(prostate.glm)
## Model Details:
## ==============
##
## H2OBinomialModel: glm
## Model ID: GLM_model_R_1468420523821_1
## GLM Model: summary
## family link regularization
## 1 binomial logit Elastic Net (alpha = 0.5, lambda = 3.247E-4 )
## number_of_predictors_total number_of_active_predictors
## 1 4 4
## number_of_iterations training_frame
## 1 4 prostate.hex
##
## Coefficients: glm coefficients
## names coefficients standardized_coefficients
## 1 Intercept -1.112897 -0.337684
## 2 AGE -0.010981 -0.071671
## 3 RACE -0.624682 -0.192885
## 4 DCAPS 1.314730 0.408429
## 5 PSA 0.046903 0.937948
##
## H2OBinomialMetrics: glm
## ** Reported on training data. **
##
## MSE: 0.2027051
## R^2: 0.1572193
## LogLoss: 0.5914632
## Mean Per-Class Error: 0.3826121
## AUC: 0.7175722
## Gini: 0.4351444
## Null Deviance: 512.2888
## Residual Deviance: 449.5121
## AIC: 459.5121
##
## Confusion Matrix for F1-optimal threshold:
## 0 1 Error Rate
## 0 80 147 0.647577 =147/227
## 1 18 135 0.117647 =18/153
## Totals 98 282 0.434211 =165/380
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.284048 0.620690 274
## 2 max f2 0.206863 0.778230 360
## 3 max f0point5 0.413304 0.636672 108
## 4 max accuracy 0.413304 0.705263 108
## 5 max precision 0.998481 1.000000 0
## 6 max recall 0.206863 1.000000 360
## 7 max specificity 0.998481 1.000000 0
## 8 max absolute_MCC 0.413304 0.369123 108
## 9 max min_per_class_accuracy 0.331823 0.647577 176
## 10 max mean_per_class_accuracy 0.372893 0.672123 126
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
##
##
##
## > myLabels = c(prostate.glm@model$x, "Intercept")
##
## > plot(prostate.glm@model$coefficients, xaxt = "n", xlab = "Coefficients", ylab = "Values")

##
## > axis(1, at = 1:length(myLabels), labels = myLabels)
##
## > abline(h = 0, col = 2, lty = 2)
##
## > title("Coefficients from Logistic Regression\n of Prostate Cancer Data")
##
## > barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")

demo(h2o.gbm)
##
##
## demo(h2o.gbm)
## ---- ~~~~~~~
##
## > # This is a demo of H2O's GBM function
## > # It imports a data set, parses it, and prints a summary
## > # Then, it runs GBM on a subset of the dataset
## > # Note: This demo runs H2O on localhost:54321
## > library(h2o)
##
## > h2o.init()
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 7 seconds 642 milliseconds
## H2O cluster version: 3.8.3.3
## H2O cluster name: H2O_started_from_R_rinor_myf984
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.46 GB
## H2O cluster total cores: 1
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.2.2 (2015-08-14)
##
##
## > prostate.hex = h2o.uploadFile(path = system.file("extdata", "prostate.csv", package="h2o"), destination_frame = "prostate.hex")
##
|
| | 0%
|
|=================================================================| 100%
##
## > summary(prostate.hex)
## Warning in summary.H2OFrame(prostate.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
## ID CAPSULE AGE RACE
## Min. : 1.00 Min. :0.0000 Min. :43.00 Min. :0.000
## 1st Qu.: 95.75 1st Qu.:0.0000 1st Qu.:62.00 1st Qu.:1.000
## Median :190.50 Median :0.0000 Median :67.00 Median :1.000
## Mean :190.50 Mean :0.4026 Mean :66.04 Mean :1.087
## 3rd Qu.:285.25 3rd Qu.:1.0000 3rd Qu.:71.00 3rd Qu.:1.000
## Max. :380.00 Max. :1.0000 Max. :79.00 Max. :2.000
## DPROS DCAPS PSA VOL
## Min. :1.000 Min. :1.000 Min. : 0.300 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.: 4.900 1st Qu.: 0.00
## Median :2.000 Median :1.000 Median : 8.664 Median :14.20
## Mean :2.271 Mean :1.108 Mean : 15.409 Mean :15.81
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 17.063 3rd Qu.:26.40
## Max. :4.000 Max. :2.000 Max. :139.700 Max. :97.60
## GLEASON
## Min. :0.000
## 1st Qu.:6.000
## Median :6.000
## Mean :6.384
## 3rd Qu.:7.000
## Max. :9.000
##
## > prostate.gbm = h2o.gbm(x = setdiff(colnames(prostate.hex), "CAPSULE"), y = "CAPSULE", training_frame = prostate.hex, ntrees = 10, max_depth = 5, learn_rate = 0.1)
##
|
| | 0%
|
|=================================================================| 100%
##
## > print(prostate.gbm)
## Model Details:
## ==============
##
## H2ORegressionModel: gbm
## Model ID: GBM_model_R_1468420523821_3
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 10 10 3311 5
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 5 5.00000 17 24 21.40000
##
##
## H2ORegressionMetrics: gbm
## ** Reported on training data. **
##
## MSE: 0.1358996
## R2 : 0.4349746
## Mean Residual Deviance : 0.1358996
##
##
##
##
##
## > prostate.gbm2 = h2o.gbm(x = c("AGE", "RACE", "PSA", "VOL", "GLEASON"), y = "CAPSULE", training_frame = prostate.hex, ntrees = 10, max_depth = 8, min_rows = 10, learn_rate = 0.2)
##
|
| | 0%
|
|=================================================================| 100%
##
## > print(prostate.gbm2)
## Model Details:
## ==============
##
## H2ORegressionModel: gbm
## Model ID: GBM_model_R_1468420523821_4
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 10 10 3984 6
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 8 7.50000 19 30 26.60000
##
##
## H2ORegressionMetrics: gbm
## ** Reported on training data. **
##
## MSE: 0.1051889
## R2 : 0.5626592
## Mean Residual Deviance : 0.1051889
##
##
##
##
##
## > # This is a demo of H2O's GBM use of default parameters on iris dataset (three classes)
## > iris.hex = h2o.uploadFile(path = system.file("extdata", "iris.csv", package="h2o"), destination_frame = "iris.hex")
##
|
| | 0%
|
|=================================================================| 100%
##
## > summary(iris.hex)
## Warning in summary.H2OFrame(iris.hex): Approximated quantiles
## computed! If you are interested in exact quantiles, please pass the
## `exact_quantiles=TRUE` parameter.
## C1 C2 C3 C4
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.1000
## 1st Qu.:5.099 1st Qu.:2.799 1st Qu.:1.596 1st Qu.:0.2992
## Median :5.798 Median :2.998 Median :4.348 Median :1.3000
## Mean :5.843 Mean :3.054 Mean :3.759 Mean :1.1987
## 3rd Qu.:6.399 3rd Qu.:3.298 3rd Qu.:5.095 3rd Qu.:1.7992
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.5000
## C5
## Iris-setosa :50
## Iris-versicolor:50
## Iris-virginica :50
##
##
##
##
## > iris.gbm = h2o.gbm(x = 1:4, y = 5, training_frame = iris.hex)
##
|
| | 0%
|
|====== | 10%
|
|============================= | 44%
|
|=============================================== | 72%
|
|=================================================================| 100%
##
## > print(iris.gbm)
## Model Details:
## ==============
##
## H2OMultinomialModel: gbm
## Model ID: GBM_model_R_1468420523821_5
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 50 150 28325 1
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 5 4.92000 2 12 10.07333
##
##
## H2OMultinomialMetrics: gbm
## ** Reported on training data. **
##
## Training Set Metrics:
## =====================
##
## Extract training frame with `h2o.getFrame("iris.hex")`
## MSE: (Extract with `h2o.mse`) 0.00283639
## R^2: (Extract with `h2o.r2`) 0.9957454
## Logloss: (Extract with `h2o.logloss`) 0.01881246
## Mean Per-Class Error: 0
## Confusion Matrix: Extract with `h2o.confusionMatrix(<model>,train = TRUE)`)
## =========================================================================
## Confusion Matrix: vertical: actual; across: predicted
## Iris-setosa Iris-versicolor Iris-virginica Error
## Iris-setosa 50 0 0 0.0000
## Iris-versicolor 0 50 0 0.0000
## Iris-virginica 0 0 50 0.0000
## Totals 50 50 50 0.0000
## Rate
## Iris-setosa = 0 / 50
## Iris-versicolor = 0 / 50
## Iris-virginica = 0 / 50
## Totals = 0 / 150
##
## Hit Ratio Table: Extract with `h2o.hit_ratio_table(<model>,train = TRUE)`
## =======================================================================
## Top-3 Hit Ratios:
## k hit_ratio
## 1 1 1.000000
## 2 2 1.000000
## 3 3 1.000000