ANN_Basic.R

#Creation of the Artificial Neural Network 

dataset <- read.csv('Churn_Modelling.csv')
dataset <- dataset[4:14]

str(dataset)

## 'data.frame':    10000 obs. of  11 variables:
##  $ CreditScore    : int  619 608 502 699 850 645 822 376 501 684 ...
##  $ Geography      : Factor w/ 3 levels "France","Germany",..: 1 3 1 1 3 3 1 2 1 1 ...
##  $ Gender         : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 2 2 1 2 2 ...
##  $ Age            : int  42 41 42 39 43 44 50 29 44 27 ...
##  $ Tenure         : int  2 1 8 1 2 8 7 4 4 2 ...
##  $ Balance        : num  0 83808 159661 0 125511 ...
##  $ NumOfProducts  : int  1 1 3 2 1 2 2 4 2 1 ...
##  $ HasCrCard      : int  1 0 1 0 1 1 1 1 0 1 ...
##  $ IsActiveMember : int  1 1 0 0 1 0 1 0 1 1 ...
##  $ EstimatedSalary: num  101349 112543 113932 93827 79084 ...
##  $ Exited         : int  1 0 1 0 0 1 0 1 0 0 ...

#Ensure that target variable is a factor. Encode categorical variable as factors 

dataset$Geography <- as.numeric(factor(dataset$Geography,
                                      levels = c('France', 'Spain', 'Germany'),
                                      labels = c(1, 2, 3)))
dataset$Gender <- as.numeric(factor(dataset$Gender,
                                   levels = c('Female', 'Male'),
                                   labels = c(1, 2)))

# Splitting the dataset into the Training set and Test set
library(caTools)
set.seed(123)
split <- sample.split(dataset$Exited, SplitRatio = 0.8)
training_set <- subset(dataset, split == TRUE)
test_set <- subset(dataset, split == FALSE)

# Feature Scaling
training_set[-11] <- scale(training_set[-11])
test_set[-11] <- scale(test_set[-11])

# Fitting ANN to the Training set
# install.packages('h2o')

library(h2o)

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

#connecting to the default server , optimize the no of cores used 
h2o.init(nthreads = -1)

##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         9 minutes 45 seconds 
##     H2O cluster version:        3.10.4.6 
##     H2O cluster version age:    24 days  
##     H2O cluster name:           H2O_started_from_R_Kamalm_iiw313 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.71 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  4 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 3.3.3 (2017-03-06)

model <- h2o.deeplearning(y = 'Exited',
                         training_frame = as.h2o(training_set),
                         activation = 'Rectifier',
                         hidden = c(9,9),
                         epochs = 100,
                         train_samples_per_iteration = -2)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

# Predicting the Test set results
y_pred <- h2o.predict(model, newdata = as.h2o(test_set[-11]))

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

#Based on the sensitivity se the threshold
y_pred <- (y_pred > 0.5)
y_pred <- as.vector(y_pred)

#Create a confusion matrix 

cm <- table(test_set[,11] , y_pred)

cm

##    y_pred
##        0    1
##   0 1507   86
##   1  190  217

print('Accuracy')

## [1] "Accuracy"

(cm[1,1] + cm[2,2]) / sum(cm)

## [1] 0.862

#h2o.shutdown()

ANN_Basic.R

Kamalm

Sun May 21 18:38:47 2017