This data set contains details of a bankโs customers and the target variable is a binary variable reflecting the fact whether the customer left the bank (closed his account) or he continues to be a customer.
Dataset
data <- read.csv('Churn_Modelling.csv')
data <- data[4:14]
converting as factors
data$Gender <- as.numeric(factor(data$Gender),levels=c('Female','male'),labels=c(1,2))
data$Geography <- as.numeric(factor(data$Geography),levels=c('Franch','spain','germany'),labels=c(1,2,3))
build and train and test data
library(caTools)
set.seed(101)
sample = sample.split(data$Exited,SplitRatio = 0.80)
train_data = subset(data,sample==T)
test_data = subset(data,sample==F)
Standrazing
train_data[-11] <- scale(train_data[-11])
test_data[-11] <- scale(test_data[-11])
Connnecting with h20 server
library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
h2o.init(nthreads = -1)
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 6 minutes 53 seconds
## H2O cluster timezone: Asia/Kolkata
## H2O data parsing timezone: UTC
## H2O cluster version: 3.30.0.7
## H2O cluster version age: 19 days
## H2O cluster name: H2O_started_from_R_manor_lpc090
## H2O cluster total nodes: 1
## H2O cluster total memory: 1.47 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## H2O API Extensions: Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4
## R Version: R version 4.0.2 (2020-06-22)
library(h2o)
model <- h2o.deeplearning(y='Exited',training_frame = as.h2o(train_data),activation = 'Rectifier',hidden = c(6,6),epochs = 100,train_samples_per_iteration = -2)
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|============================ | 40%
|
|======================================================================| 100%
pro_pre <- h2o.predict(model,as.h2o(test_data[-11]))
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
y_pred <- (pro_pre > 0.5)
y_pred <- as.vector(y_pred)
cm <-table(test_data[,11],y_pred)
cm
## y_pred
## 0 1
## 0 1504 89
## 1 195 212
#Acurracy score
(1508+189)/2000
## [1] 0.8485