#Creation of the Artificial Neural Network
dataset <- read.csv('Churn_Modelling.csv')
dataset <- dataset[4:14]
str(dataset)
## 'data.frame': 10000 obs. of 11 variables:
## $ CreditScore : int 619 608 502 699 850 645 822 376 501 684 ...
## $ Geography : Factor w/ 3 levels "France","Germany",..: 1 3 1 1 3 3 1 2 1 1 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 2 2 1 2 2 ...
## $ Age : int 42 41 42 39 43 44 50 29 44 27 ...
## $ Tenure : int 2 1 8 1 2 8 7 4 4 2 ...
## $ Balance : num 0 83808 159661 0 125511 ...
## $ NumOfProducts : int 1 1 3 2 1 2 2 4 2 1 ...
## $ HasCrCard : int 1 0 1 0 1 1 1 1 0 1 ...
## $ IsActiveMember : int 1 1 0 0 1 0 1 0 1 1 ...
## $ EstimatedSalary: num 101349 112543 113932 93827 79084 ...
## $ Exited : int 1 0 1 0 0 1 0 1 0 0 ...
#Ensure that target variable is a factor. Encode categorical variable as factors
dataset$Geography <- as.numeric(factor(dataset$Geography,
levels = c('France', 'Spain', 'Germany'),
labels = c(1, 2, 3)))
dataset$Gender <- as.numeric(factor(dataset$Gender,
levels = c('Female', 'Male'),
labels = c(1, 2)))
# Splitting the dataset into the Training set and Test set
library(caTools)
set.seed(123)
split <- sample.split(dataset$Exited, SplitRatio = 0.8)
training_set <- subset(dataset, split == TRUE)
test_set <- subset(dataset, split == FALSE)
# Feature Scaling
training_set[-11] <- scale(training_set[-11])
test_set[-11] <- scale(test_set[-11])
# Fitting ANN to the Training set
# install.packages('h2o')
library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
#connecting to the default server , optimize the no of cores used
h2o.init(nthreads = -1)
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 9 minutes 45 seconds
## H2O cluster version: 3.10.4.6
## H2O cluster version age: 24 days
## H2O cluster name: H2O_started_from_R_Kamalm_iiw313
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.71 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 4
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 3.3.3 (2017-03-06)
model <- h2o.deeplearning(y = 'Exited',
training_frame = as.h2o(training_set),
activation = 'Rectifier',
hidden = c(9,9),
epochs = 100,
train_samples_per_iteration = -2)
##
|
| | 0%
|
|=================================================================| 100%
##
|
| | 0%
|
|=================================================================| 100%
# Predicting the Test set results
y_pred <- h2o.predict(model, newdata = as.h2o(test_set[-11]))
##
|
| | 0%
|
|=================================================================| 100%
##
|
| | 0%
|
|=================================================================| 100%
#Based on the sensitivity se the threshold
y_pred <- (y_pred > 0.5)
y_pred <- as.vector(y_pred)
#Create a confusion matrix
cm <- table(test_set[,11] , y_pred)
cm
## y_pred
## 0 1
## 0 1507 86
## 1 190 217
print('Accuracy')
## [1] "Accuracy"
(cm[1,1] + cm[2,2]) / sum(cm)
## [1] 0.862
#h2o.shutdown()