deeplearningh2o_breast_cancer.utf8.md

library(mlbench)
library(h2o)

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

h2o.init()

##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         1 hours 2 minutes 
##     H2O cluster timezone:       Asia/Kolkata 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.26.0.4804 
##     H2O cluster version age:    24 days  
##     H2O cluster name:           H2O_started_from_R_somy_jde424 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   1.45 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     H2O API Extensions:         Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4 
##     R Version:                  R version 3.6.1 (2019-07-05)

data(BreastCancer)

data<-BreastCancer[,-1]

data[,c(1:ncol(data))]<-sapply(data[,c(1:ncol(data))],as.numeric)

data[,'Class']<-as.factor(data[,'Class'])

splitSample<-sample(1:3,size = nrow(data),prob = c(0.6,0.2,0.2),replace=T)


train_h2o<-as.h2o(data[splitSample==1,])

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

val<-as.h2o(data[splitSample==2,])

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

test<-as.h2o(data[splitSample==3,])

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

model<-h2o.deeplearning(x=1:9,
                        y=10,
                        training_frame = train_h2o,
                        activation = 'TanhWithDropout',
                        input_dropout_ratio = 0.2,
                        balance_classes = T,
                        hidden = c(10,10),
                        hidden_dropout_ratios = c(0.3,0.3),
                        epochs = 10,
                        seed = 0)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

h2o.confusionMatrix(model)

## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.0290576086066156:
##          1   2    Error    Rate
## 1      266   9 0.032727  =9/275
## 2        0 273 0.000000  =0/273
## Totals 266 282 0.016423  =9/548

h2o.confusionMatrix(model,val)

## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.256976791735646:
##         1  2    Error    Rate
## 1      86  4 0.044444   =4/90
## 2       0 48 0.000000   =0/48
## Totals 86 52 0.028986  =4/138

h2o.confusionMatrix(model,test)

## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.954494725478634:
##         1  2    Error    Rate
## 1      91  2 0.021505   =2/93
## 2       1 48 0.020408   =1/49
## Totals 92 50 0.021127  =3/142

deeplearningh2o_breast_cancer.R

somy

2019-10-25