df <-  read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 8 - Deep Learning\\Section 39 - Artificial Neural Networks (ANN)\\Artificial_Neural_Networks\\Churn_Modelling.csv")
head(df)

Goal is to predict the ‘exited’ (dependent variable) 1 = exited, 0= stayed
The items not needed for the analysis are taken out. Things like the rownumber, customerID, surname.

Select the fields that we will be working with

# we include the dependent variable. 
df <- df[,4:14]
head(df)

Convert some of the independent variables as numeric factors

df$Geography = as.numeric(factor(df$Geography, levels = c("France","Spain","Germany"), labels = c(1,2,3)))
df$Gender = as.numeric(factor(df$Gender, levels = c("Female", "Male"), labels = c(1,2)))
df$Gender
df$Geography

Split dataset into training and test set (300 training, 100 test)

library(caTools)
package <U+393C><U+3E31>caTools<U+393C><U+3E32> was built under R version 3.3.3
set.seed(1234)
split <- sample.split(df$Exited, SplitRatio = 0.80)
training_set <- subset(df, split == TRUE)
test_set <- subset(df, split == FALSE)

For Neural Network, it is mandatory to do feature scaling (normalization)

# Feature Scaling 1 age, 2 is salary
training_set[,-11] <-  scale(training_set[,-11])
test_set[,-11] <-  scale(test_set[,-11])

Fitting ANN to the Training Set

# Create the classifier here
# install.packages("h2o")
library(h2o)
# establish the connection
h2o.init(nthreads = -1)
 Connection successful!

R is connected to the H2O cluster: 
    H2O cluster uptime:         53 minutes 21 seconds 
    H2O cluster version:        3.10.4.6 
    H2O cluster version age:    1 month and 21 days  
    H2O cluster name:           H2O_started_from_R_wilsonpc_fmq912 
    H2O cluster total nodes:    1 
    H2O cluster total memory:   0.66 GB 
    H2O cluster total cores:    8 
    H2O cluster allowed cores:  8 
    H2O cluster healthy:        TRUE 
    H2O Connection ip:          localhost 
    H2O Connection port:        54321 
    H2O Connection proxy:       NA 
    H2O Internal Security:      FALSE 
    R Version:                  R version 3.3.2 (2016-10-31) 
classifier <- h2o.deeplearning(y = "Exited" , training_frame = as.h2o(training_set),
                               activation = "Rectifier",
                               hidden = c(6,6),
                               epochs = 100,
                               train_samples_per_iteration = -2)

  |                                                                                                            
  |                                                                                                      |   0%
  |                                                                                                            
  |======================================================================================================| 100%

  |                                                                                                            
  |                                                                                                      |   0%
  |                                                                                                            
  |===================================================                                                   |  50%
  |                                                                                                            
  |======================================================================================================| 100%
summary(classifier)
Model Details:
==============

H2ORegressionModel: deeplearning
Model Key:  DeepLearning_model_R_1497675052674_3 
Status of Neuron Layers: predicting Exited, regression, gaussian distribution, Quadratic loss, 115 weights/biases, 5.6 KB, 800,000 training samples, mini-batch size 1
  layer units      type dropout       l1       l2 mean_rate rate_rms momentum mean_weight weight_rms mean_bias
1     1    10     Input  0.00 %                                                                               
2     2     6 Rectifier  0.00 % 0.000000 0.000000  0.000735 0.000360 0.000000    0.109288   0.397864  0.048861
3     3     6 Rectifier  0.00 % 0.000000 0.000000  0.001545 0.004108 0.000000   -0.117767   0.493791  0.609235
4     4     1    Linear         0.000000 0.000000  0.000328 0.000126 0.000000   -0.351699   0.801255  0.379231
  bias_rms
1         
2 0.493353
3 1.348580
4 0.000000

H2ORegressionMetrics: deeplearning
** Reported on training data. **
** Metrics reported on full training frame **

MSE:  0.1044161
RMSE:  0.3231348
MAE:  0.2002827
RMSLE:  0.2263449
Mean Residual Deviance :  0.1044161





Scoring History: 
            timestamp   duration training_speed    epochs iterations       samples training_rmse
1 2017-06-17 13:44:17  0.000 sec                  0.00000          0      0.000000              
2 2017-06-17 13:44:17  0.686 sec 141342 obs/sec  10.00000          1  80000.000000       0.32638
3 2017-06-17 13:44:20  2.986 sec 280898 obs/sec 100.00000         10 800000.000000       0.32313
  training_deviance training_mae
1                               
2           0.10653      0.21030
3           0.10442      0.20028

Predicting the test set results

y_pred <-  h2o.predict(classifier, newdata = as.h2o(test_set[-11]))

  |                                                                                                            
  |                                                                                                      |   0%
  |                                                                                                            
  |======================================================================================================| 100%

  |                                                                                                            
  |                                                                                                      |   0%
  |                                                                                                            
  |======================================================================================================| 100%
# convert into 1 or 0 depending on a threshold value
y_pred <-  y_pred>0.5
# convert h2o object back into a vector
y_pred <-  as.vector(y_pred)

Evaluate the prediction using confusion Matrix.

# Making the confusion matrix
# [3] refers to the outcome
cm <- table(test_set[,11], y_pred)
cm
   y_pred
       0    1
  0 1529   64
  1  211  196
# shutdown the h2o
h2o.shutdown()
[1] TRUE
LS0tDQp0aXRsZTogIk1MUiBTZWN0aW9uIDMxIEFydGlmaWNpYWwgTmV1cmFsIE5ldHdvcmtzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiRzpcXFJTdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCA4IC0gRGVlcCBMZWFybmluZ1xcU2VjdGlvbiAzOSAtIEFydGlmaWNpYWwgTmV1cmFsIE5ldHdvcmtzIChBTk4pXFxBcnRpZmljaWFsX05ldXJhbF9OZXR3b3Jrc1xcQ2h1cm5fTW9kZWxsaW5nLmNzdiIpDQpoZWFkKGRmKQ0KYGBgDQpHb2FsIGlzIHRvIHByZWRpY3QgdGhlICdleGl0ZWQnIChkZXBlbmRlbnQgdmFyaWFibGUpIDEgPSBleGl0ZWQsIDA9IHN0YXllZDwvYnI+DQpUaGUgaXRlbXMgbm90IG5lZWRlZCBmb3IgdGhlIGFuYWx5c2lzIGFyZSB0YWtlbiBvdXQuIFRoaW5ncyBsaWtlIHRoZSByb3dudW1iZXIsIGN1c3RvbWVySUQsIHN1cm5hbWUuIDwvYnI+DQoNCg0KIyBTZWxlY3QgdGhlIGZpZWxkcyB0aGF0IHdlIHdpbGwgYmUgd29ya2luZyB3aXRoDQoNCmBgYHtyfQ0KIyB3ZSBpbmNsdWRlIHRoZSBkZXBlbmRlbnQgdmFyaWFibGUuIA0KZGYgPC0gZGZbLDQ6MTRdDQpoZWFkKGRmKQ0KYGBgDQojIENvbnZlcnQgc29tZSBvZiB0aGUgaW5kZXBlbmRlbnQgdmFyaWFibGVzIGFzIG51bWVyaWMgZmFjdG9ycw0KDQpgYGB7cn0NCmRmJEdlb2dyYXBoeSA9IGFzLm51bWVyaWMoZmFjdG9yKGRmJEdlb2dyYXBoeSwgbGV2ZWxzID0gYygiRnJhbmNlIiwiU3BhaW4iLCJHZXJtYW55IiksIGxhYmVscyA9IGMoMSwyLDMpKSkNCmRmJEdlbmRlciA9IGFzLm51bWVyaWMoZmFjdG9yKGRmJEdlbmRlciwgbGV2ZWxzID0gYygiRmVtYWxlIiwgIk1hbGUiKSwgbGFiZWxzID0gYygxLDIpKSkNCmRmJEdlbmRlcg0KZGYkR2VvZ3JhcGh5DQpgYGANCg0KDQojIFNwbGl0IGRhdGFzZXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXQgKDMwMCB0cmFpbmluZywgMTAwIHRlc3QpDQpgYGB7cn0NCmxpYnJhcnkoY2FUb29scykNCnNldC5zZWVkKDEyMzQpDQpzcGxpdCA8LSBzYW1wbGUuc3BsaXQoZGYkRXhpdGVkLCBTcGxpdFJhdGlvID0gMC44MCkNCnRyYWluaW5nX3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IFRSVUUpDQp0ZXN0X3NldCA8LSBzdWJzZXQoZGYsIHNwbGl0ID09IEZBTFNFKQ0KDQpgYGANCg0KDQoNCiMgRm9yIE5ldXJhbCBOZXR3b3JrLCBpdCBpcyBtYW5kYXRvcnkgdG8gZG8gZmVhdHVyZSBzY2FsaW5nIChub3JtYWxpemF0aW9uKQ0KDQpgYGB7cn0NCiMgRmVhdHVyZSBTY2FsaW5nIA0KdHJhaW5pbmdfc2V0WywtMTFdIDwtICBzY2FsZSh0cmFpbmluZ19zZXRbLC0xMV0pDQp0ZXN0X3NldFssLTExXSA8LSAgc2NhbGUodGVzdF9zZXRbLC0xMV0pDQpgYGANCg0KIyBGaXR0aW5nIEFOTiB0byB0aGUgVHJhaW5pbmcgU2V0DQoNCmBgYHtyfQ0KIyBDcmVhdGUgdGhlIGNsYXNzaWZpZXIgaGVyZQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJoMm8iKQ0KDQpsaWJyYXJ5KGgybykNCiMgZXN0YWJsaXNoIHRoZSBjb25uZWN0aW9uDQpoMm8uaW5pdChudGhyZWFkcyA9IC0xKQ0KY2xhc3NpZmllciA8LSBoMm8uZGVlcGxlYXJuaW5nKHkgPSAiRXhpdGVkIiAsIHRyYWluaW5nX2ZyYW1lID0gYXMuaDJvKHRyYWluaW5nX3NldCksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWN0aXZhdGlvbiA9ICJSZWN0aWZpZXIiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGhpZGRlbiA9IGMoNiw2KSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlcG9jaHMgPSAxMDAsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhaW5fc2FtcGxlc19wZXJfaXRlcmF0aW9uID0gLTIpDQpzdW1tYXJ5KGNsYXNzaWZpZXIpDQoNCmBgYA0KDQojIFByZWRpY3RpbmcgdGhlIHRlc3Qgc2V0IHJlc3VsdHMNCg0KYGBge3J9DQoNCnlfcHJlZCA8LSAgaDJvLnByZWRpY3QoY2xhc3NpZmllciwgbmV3ZGF0YSA9IGFzLmgybyh0ZXN0X3NldFstMTFdKSkNCiMgY29udmVydCBpbnRvIDEgb3IgMCBkZXBlbmRpbmcgb24gYSB0aHJlc2hvbGQgdmFsdWUNCnlfcHJlZCA8LSAgeV9wcmVkID4gMC41DQojIGNvbnZlcnQgaDJvIG9iamVjdCBiYWNrIGludG8gYSB2ZWN0b3INCnlfcHJlZCA8LSAgYXMudmVjdG9yKHlfcHJlZCkNCg0KYGBgDQoNCiMgRXZhbHVhdGUgdGhlIHByZWRpY3Rpb24gdXNpbmcgY29uZnVzaW9uIE1hdHJpeC4NCg0KYGBge3J9DQojIE1ha2luZyB0aGUgY29uZnVzaW9uIG1hdHJpeA0KIyBbM10gcmVmZXJzIHRvIHRoZSBvdXRjb21lDQoNCmNtIDwtIHRhYmxlKHRlc3Rfc2V0WywxMV0sIHlfcHJlZCkNCmNtDQpgYGANCmBgYHtyfQ0KDQpgYGANCg0KYGBge3J9DQojIHNodXRkb3duIHRoZSBoMm8NCmgyby5zaHV0ZG93bigpeQ0KDQpgYGANCg0K