library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
h2o.init()
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## /tmp/RtmpSwOIBb/h2o_rstudio_user_started_from_r.out
## /tmp/RtmpSwOIBb/h2o_rstudio_user_started_from_r.err
##
##
## Starting H2O JVM and connecting: . Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 517 milliseconds
## H2O cluster timezone: Etc/UTC
## H2O data parsing timezone: UTC
## H2O cluster version: 3.20.0.8
## H2O cluster version age: 2 months and 8 days
## H2O cluster name: H2O_started_from_R_rstudio-user_hiw456
## H2O cluster total nodes: 1
## H2O cluster total memory: 6.54 GB
## H2O cluster total cores: 16
## H2O cluster allowed cores: 16
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## H2O API Extensions: XGBoost, Algos, AutoML, Core V3, Core V4
## R Version: R version 3.5.0 (2018-04-23)
df <- h2o.importFile("https://h2o-public-test-data.s3.amazonaws.com/smalldata/prostate/prostate.csv")
##
|
| | 0%
|
|=================================================================| 100%
df$CAPSULE <- as.factor(df$CAPSULE)
df$RACE <- as.factor(df$RACE)
df$DCAPS <- as.factor(df$DCAPS)
df$DPROS <- as.factor(df$DPROS)
predictors <- c("AGE", "RACE", "VOL", "GLEASON")
response <- "CAPSULE"
prostate.glm <- h2o.glm(family= "binomial", x= predictors, y=response, training_frame=df, lambda = 0, compute_p_values = TRUE)
##
|
| | 0%
|
|= | 2%
|
|=================================================================| 100%
# Coefficients that can be applied to the non-standardized data
h2o.coef(prostate.glm)
## Intercept RACE.1 RACE.2 AGE VOL GLEASON
## -6.67515539 -0.44278752 -0.58992326 -0.01788870 -0.01278335 1.25035939
# Coefficients fitted on the standardized data (requires standardize=TRUE, which is on by default)
h2o.coef_norm(prostate.glm)
## Intercept RACE.1 RACE.2 AGE VOL GLEASON
## -0.07610006 -0.44278752 -0.58992326 -0.11676080 -0.23454402 1.36533415
# Print the coefficients table
prostate.glm@model$coefficients_table
# Print the standard error
prostate.glm@model$coefficients_table$std_error
## [1] 1.931760363 1.324230832 1.373465793 0.018701933 0.007514354 0.156156271
# Print the p values
prostate.glm@model$coefficients_table$p_value
## [1] 5.493181e-04 7.380978e-01 6.675490e-01 3.388116e-01 8.890718e-02
## [6] 1.221245e-15
# Print the z values
prostate.glm@model$coefficients_table$z_value
## [1] -3.4554780 -0.3343734 -0.4295143 -0.9565159 -1.7011907 8.0071033
# Retrieve a graphical plot of the standardized coefficient magnitudes
h2o.std_coef_plot(prostate.glm)
