Load Data and Change Attrition to 1 and 0

#opts_knit$set(root.dir = "C:/Users/willi/Desktop/Georgetown/RStudio Datasource")

Employee = read.csv("C:/Users/willi/Desktop/Georgetown/RStudio Datasource/Employee_Data_Project.csv")

Employee1 <- Employee %>%  #Change categorical to numeric
  mutate(Attrition = if_else(Attrition == "Yes", 1, 0)) #%>%    #Attrition 1 = "No", 0 = "Yes"
  #select(-"StandardHours", "EmployeeID")) #remove standard hours bc they are all the same 
Employee1 <- subset(Employee1, select = -StandardHours)
Employee1 <- subset(Employee1, select = -EmployeeID)
  
###EDA
dim(Employee1) #4410 Observations, 18 variables
## [1] 4410   16

Visualize Data

head(Employee1)
##   Age Attrition    BusinessTravel DistanceFromHome Education Gender JobLevel
## 1  51         0     Travel_Rarely                6         2 Female        1
## 2  31         1 Travel_Frequently               10         1 Female        1
## 3  32         0 Travel_Frequently               17         4   Male        4
## 4  38         0        Non-Travel                2         5   Male        3
## 5  32         0     Travel_Rarely               10         1   Male        1
## 6  46         0     Travel_Rarely                8         3 Female        4
##   MaritalStatus Income NumCompaniesWorked TotalWorkingYears
## 1       Married 131160                  1                 1
## 2        Single  41890                  0                 6
## 3       Married 193280                  1                 5
## 4       Married  83210                  3                13
## 5        Single  23420                  4                 9
## 6       Married  40710                  3                28
##   TrainingTimesLastYear YearsAtCompany YearsWithCurrManager
## 1                     6              1                    0
## 2                     3              5                    4
## 3                     2              5                    3
## 4                     5              8                    5
## 5                     2              6                    4
## 6                     5              7                    7
##   EnvironmentSatisfaction JobSatisfaction
## 1                       3               4
## 2                       3               2
## 3                       2               2
## 4                       4               4
## 5                       4               1
## 6                       3               2

Imbalanced Dataset Analysis

prop.table(table(Employee1$Attrition)) ##Imbalanced dataset with only 16.1% attrition rate (711/3699)
## 
##         0         1 
## 0.8387755 0.1612245

Data Partitioning

set.seed(123)  # for reproducibility
part <-sample(1:3, size=nrow(Employee1), prob=c(0.6, 0.20, 0.20), replace=TRUE)

#Create a train, validation and test from original data frame
train <-Employee1[part == 1, ] 
valid <-Employee1[part == 2, ]
test <-Employee1[part == 3, ] 


#observe distribution of partitioned data

table(train$Attrition)
## 
##    0    1 
## 2215  434
table(valid$Attrition)
## 
##   0   1 
## 742 153
table(test$Attrition)
## 
##   0   1 
## 742 124

Check missing values

sapply(train,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                      13                       3                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                      14 
##         JobSatisfaction 
##                      13
sapply(valid,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       1                       3                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       5 
##         JobSatisfaction 
##                       4
sapply(test,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       5                       3                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       6 
##         JobSatisfaction 
##                       3

Replace Missing Values

Missing values introduced by coercion to be transformed

#replace Number of Companies Worked 
train$NumCompaniesWorked[is.na(train$NumCompaniesWorked)] = median(train$NumCompaniesWorked, na.rm = TRUE)
test$NumCompaniesWorked[is.na(test$NumCompaniesWorked)] = median(test$NumCompaniesWorked, na.rm = TRUE)
valid$NumCompaniesWorked[is.na(valid$NumCompaniesWorked)] = median(valid$NumCompaniesWorked, na.rm = TRUE)

#Replacing TotalWorkingYears with median
train$TotalWorkingYears[is.na(train$TotalWorkingYears)] = median(train$TotalWorkingYears, na.rm = TRUE)
test$TotalWorkingYears[is.na(test$TotalWorkingYears)] = median(test$TotalWorkingYears, na.rm = TRUE)
valid$TotalWorkingYears[is.na(valid$TotalWorkingYears)] = median(valid$TotalWorkingYears, na.rm = TRUE)

#Replacing EnvironmentSatisfaction with median
train$EnvironmentSatisfaction[is.na(train$EnvironmentSatisfaction)] = median(train$EnvironmentSatisfaction, na.rm = TRUE)
test$EnvironmentSatisfaction[is.na(test$EnvironmentSatisfaction)] = median(test$EnvironmentSatisfaction, na.rm = TRUE)
valid$EnvironmentSatisfaction[is.na(valid$EnvironmentSatisfaction)] = median(valid$EnvironmentSatisfaction, na.rm = TRUE)


#Replacing JobSatisfaction with median
train$JobSatisfaction[is.na(train$JobSatisfaction)] = median(train$JobSatisfaction, na.rm = TRUE)
test$JobSatisfaction[is.na(test$JobSatisfaction)] = median(test$JobSatisfaction, na.rm = TRUE)
valid$JobSatisfaction[is.na(valid$JobSatisfaction)] = median(valid$JobSatisfaction, na.rm = TRUE)


#Check again to make sure all replacements are done
sapply(train,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0
sapply(valid,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0
sapply(test,function(x) sum(is.na(x)))
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0
#Check for values that are blank

sapply(train, function(x){sum(x=='')})
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0
sapply(valid, function(x){sum(x=='')})
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0
sapply(test, function(x){sum(x=='')})
##                     Age               Attrition          BusinessTravel 
##                       0                       0                       0 
##        DistanceFromHome               Education                  Gender 
##                       0                       0                       0 
##                JobLevel           MaritalStatus                  Income 
##                       0                       0                       0 
##      NumCompaniesWorked       TotalWorkingYears   TrainingTimesLastYear 
##                       0                       0                       0 
##          YearsAtCompany    YearsWithCurrManager EnvironmentSatisfaction 
##                       0                       0                       0 
##         JobSatisfaction 
##                       0

Changing Variable Names

#Changing categorical variables to factor and leaving integers
train <- train %>% 
  mutate(Attrition = as.factor(Attrition),
         BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
         DistanceFromHome = as.integer(DistanceFromHome),
         Education = as.factor(Education),
         Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
         JobLevel = as.factor(JobLevel),
         MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
         Income = as.integer(Income), 
         NumCompaniesWorked = as.integer(NumCompaniesWorked),
         TotalWorkingYears = as.integer(TotalWorkingYears),
         TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
         YearsAtCompany = as.integer(YearsAtCompany),
         YearsWithCurrManager = as.integer(YearsWithCurrManager),
         EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
         JobSatisfaction = as.factor(JobSatisfaction))

valid <- valid %>% 
  mutate(Attrition = as.factor(Attrition),
         BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
         DistanceFromHome = as.integer(DistanceFromHome),
         Education = as.factor(Education),
         Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
         JobLevel = as.factor(JobLevel),
         MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
         Income = as.integer(Income), 
         NumCompaniesWorked = as.integer(NumCompaniesWorked),
         TotalWorkingYears = as.integer(TotalWorkingYears),
         TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
         YearsAtCompany = as.integer(YearsAtCompany),
         YearsWithCurrManager = as.integer(YearsWithCurrManager),
         EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
         JobSatisfaction = as.factor(JobSatisfaction))

test <- test %>% 
  mutate(Attrition = as.factor(Attrition),
         BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
         DistanceFromHome = as.integer(DistanceFromHome),
         Education = as.factor(Education),
         Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
         JobLevel = as.factor(JobLevel),
         MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
         Income = as.integer(Income), 
         NumCompaniesWorked = as.integer(NumCompaniesWorked),
         TotalWorkingYears = as.integer(TotalWorkingYears),
         TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
         YearsAtCompany = as.integer(YearsAtCompany),
         YearsWithCurrManager = as.integer(YearsWithCurrManager),
         EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
         JobSatisfaction = as.factor(JobSatisfaction))

Downsampling

set.seed(123)
data_balanced_over <- ovun.sample(Attrition ~ ., data = train, method = "under", N= 860)$data
table(data_balanced_over$Attrition)
## 
##   0   1 
## 426 434

MODEL LEVERAGING ALL DATA

The initial model developed leveraged all variables provided within the Employee dataset inclusive of both factor and numeric. Factor variables were transformed using dummy coding and incorporated into the model. In addition, variables which were in charachter form were turned to numeric and also dummy coded.

K-Nearest Neighbors

Transform non numeric data to dummy variables and character data. Data that was in character form was already transformed to ‘numeric’

#Create dummy coding of all variables that are factors

#training dataset

train2 <- dummy_cols(train, select_columns = 'Education')
train2 <- dummy_cols(train, select_columns = 'JobLevel')
train2 <- dummy_cols(train, select_columns = 'EnvironmentSatisfaction')
train2 <- dummy_cols(train, select_columns = 'JobSatisfaction')
train2 <- dummy_cols(train, select_columns = 'Gender')
train2 <- dummy_cols(train, select_columns = 'MaritalStatus')
train2 <- dummy_cols(train, select_columns = 'BusinessTravel')

#validation dataset

valid2 <- dummy_cols(valid, select_columns = 'Education')
valid2 <- dummy_cols(valid, select_columns = 'JobLevel')
valid2 <- dummy_cols(valid, select_columns = 'EnvironmentSatisfaction')
valid2 <- dummy_cols(valid, select_columns = 'JobSatisfaction')
valid2 <- dummy_cols(valid, select_columns = 'Gender')
valid2 <- dummy_cols(valid, select_columns = 'MaritalStatus')
valid2 <- dummy_cols(valid, select_columns = 'BusinessTravel')

#test dataset

test2 <- dummy_cols(test, select_columns = 'Education')
test2 <- dummy_cols(test, select_columns = 'JobLevel')
test2 <- dummy_cols(test, select_columns = 'EnvironmentSatisfaction')
test2 <- dummy_cols(test, select_columns = 'JobSatisfaction')
test2 <- dummy_cols(test, select_columns = 'Gender')
test2 <- dummy_cols(test, select_columns = 'MaritalStatus')
test2 <- dummy_cols(test, select_columns = 'BusinessTravel')

str(train2)
## 'data.frame':    2649 obs. of  19 variables:
##  $ Age                    : int  51 32 46 28 31 25 36 47 28 21 ...
##  $ Attrition              : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...
##  $ BusinessTravel         : num  3 2 3 3 3 1 3 1 3 3 ...
##  $ DistanceFromHome       : int  6 17 8 11 1 7 28 1 1 3 ...
##  $ Education              : Factor w/ 5 levels "1","2","3","4",..: 2 4 3 2 3 4 1 1 3 2 ...
##  $ Gender                 : num  1 2 1 2 2 1 2 2 2 2 ...
##  $ JobLevel               : Factor w/ 5 levels "1","2","3","4",..: 1 4 4 2 3 4 1 1 1 1 ...
##  $ MaritalStatus          : num  2 2 2 3 2 1 2 2 2 3 ...
##  $ Income                 : int  131160 193280 40710 58130 20440 134640 33770 57620 25920 42130 ...
##  $ NumCompaniesWorked     : int  1 1 3 2 0 1 0 1 1 1 ...
##  $ TotalWorkingYears      : int  1 5 28 5 10 6 16 10 5 3 ...
##  $ TrainingTimesLastYear  : int  6 2 5 2 2 2 2 4 2 3 ...
##  $ YearsAtCompany         : int  1 5 7 0 9 6 15 10 5 3 ...
##  $ YearsWithCurrManager   : int  0 3 7 0 8 5 11 9 4 0 ...
##  $ EnvironmentSatisfaction: Factor w/ 4 levels "1","2","3","4": 3 2 3 1 2 2 3 1 4 4 ...
##  $ JobSatisfaction        : Factor w/ 4 levels "1","2","3","4": 4 2 2 3 4 1 4 2 4 3 ...
##  $ BusinessTravel_1       : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ BusinessTravel_2       : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ BusinessTravel_3       : int  1 0 1 1 1 0 1 0 1 1 ...
#remove variables

train2 <- dummy_cols(train, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
           remove_selected_columns = TRUE)

valid2 <- dummy_cols(valid, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
           remove_selected_columns = TRUE)

test2 <- dummy_cols(valid, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
           remove_selected_columns = TRUE)
str(train2)
## 'data.frame':    2649 obs. of  35 variables:
##  $ Age                      : int  51 32 46 28 31 25 36 47 28 21 ...
##  $ Attrition                : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...
##  $ DistanceFromHome         : int  6 17 8 11 1 7 28 1 1 3 ...
##  $ Income                   : int  131160 193280 40710 58130 20440 134640 33770 57620 25920 42130 ...
##  $ NumCompaniesWorked       : int  1 1 3 2 0 1 0 1 1 1 ...
##  $ TotalWorkingYears        : int  1 5 28 5 10 6 16 10 5 3 ...
##  $ TrainingTimesLastYear    : int  6 2 5 2 2 2 2 4 2 3 ...
##  $ YearsAtCompany           : int  1 5 7 0 9 6 15 10 5 3 ...
##  $ YearsWithCurrManager     : int  0 3 7 0 8 5 11 9 4 0 ...
##  $ Education_1              : int  0 0 0 0 0 0 1 1 0 0 ...
##  $ Education_2              : int  1 0 0 1 0 0 0 0 0 1 ...
##  $ Education_3              : int  0 0 1 0 1 0 0 0 1 0 ...
##  $ Education_4              : int  0 1 0 0 0 1 0 0 0 0 ...
##  $ Education_5              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ JobLevel_1               : int  1 0 0 0 0 0 1 1 1 1 ...
##  $ JobLevel_2               : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ JobLevel_3               : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ JobLevel_4               : int  0 1 1 0 0 1 0 0 0 0 ...
##  $ JobLevel_5               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ EnvironmentSatisfaction_1: int  0 0 0 1 0 0 0 1 0 0 ...
##  $ EnvironmentSatisfaction_2: int  0 1 0 0 1 1 0 0 0 0 ...
##  $ EnvironmentSatisfaction_3: int  1 0 1 0 0 0 1 0 0 0 ...
##  $ EnvironmentSatisfaction_4: int  0 0 0 0 0 0 0 0 1 1 ...
##  $ JobSatisfaction_1        : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ JobSatisfaction_2        : int  0 1 1 0 0 0 0 1 0 0 ...
##  $ JobSatisfaction_3        : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ JobSatisfaction_4        : int  1 0 0 0 1 0 1 0 1 0 ...
##  $ BusinessTravel_1         : int  0 0 0 0 0 1 0 1 0 0 ...
##  $ BusinessTravel_2         : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ BusinessTravel_3         : int  1 0 1 1 1 0 1 0 1 1 ...
##  $ MaritalStatus_1          : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ MaritalStatus_2          : int  1 1 1 0 1 0 1 1 1 0 ...
##  $ MaritalStatus_3          : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ Gender_1                 : int  1 0 1 0 0 1 0 0 0 0 ...
##  $ Gender_2                 : int  0 1 0 1 1 0 1 1 1 1 ...

Create neural net dataframes and convert attrition to integer from factor

train_nn = train2 %>% 
  mutate(Attrition = as.integer(Attrition))
test_nn = test2 %>% 
  mutate(Attrition = as.integer(Attrition))
valid_nn = valid2 %>% 
  mutate(Attrition = as.integer(Attrition))


glimpse(train_nn)
## Rows: 2,649
## Columns: 35
## $ Age                       <int> 51, 32, 46, 28, 31, 25, 36, 47, 28, 21, 37, ~
## $ Attrition                 <int> 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2,~
## $ DistanceFromHome          <int> 6, 17, 8, 11, 1, 7, 28, 1, 1, 3, 1, 7, 9, 5,~
## $ Income                    <int> 131160, 193280, 40710, 58130, 20440, 134640,~
## $ NumCompaniesWorked        <int> 1, 1, 3, 2, 0, 1, 0, 1, 1, 1, 2, 7, 1, 1, 3,~
## $ TotalWorkingYears         <int> 1, 5, 28, 5, 10, 6, 16, 10, 5, 3, 15, 10, 5,~
## $ TrainingTimesLastYear     <int> 6, 2, 5, 2, 2, 2, 2, 4, 2, 3, 2, 5, 3, 2, 2,~
## $ YearsAtCompany            <int> 1, 5, 7, 0, 9, 6, 15, 10, 5, 3, 5, 7, 5, 17,~
## $ YearsWithCurrManager      <int> 0, 3, 7, 0, 8, 5, 11, 9, 4, 0, 2, 2, 3, 7, 0~
## $ Education_1               <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,~
## $ Education_2               <int> 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,~
## $ Education_3               <int> 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ Education_4               <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ Education_5               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1                <int> 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,~
## $ JobLevel_2                <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
## $ JobLevel_3                <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_4                <int> 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_5                <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_4 <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,~
## $ JobSatisfaction_1         <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobSatisfaction_2         <int> 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,~
## $ JobSatisfaction_3         <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,~
## $ JobSatisfaction_4         <int> 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,~
## $ BusinessTravel_1          <int> 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,~
## $ BusinessTravel_2          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,~
## $ BusinessTravel_3          <int> 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,~
## $ MaritalStatus_1           <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,~
## $ MaritalStatus_2           <int> 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,~
## $ MaritalStatus_3           <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ Gender_1                  <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ Gender_2                  <int> 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,~
glimpse(test_nn)
## Rows: 895
## Columns: 35
## $ Age                       <int> 38, 32, 29, 45, 37, 38, 26, 42, 26, 28, 38, ~
## $ Attrition                 <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,~
## $ DistanceFromHome          <int> 2, 10, 18, 17, 1, 8, 1, 4, 4, 7, 5, 1, 7, 13~
## $ Income                    <int> 83210, 23420, 31430, 79910, 53460, 68700, 10~
## $ NumCompaniesWorked        <int> 3, 4, 2, 0, 4, 1, 1, 1, 2, 1, 3, 7, 7, 1, 1,~
## $ TotalWorkingYears         <int> 13, 9, 10, 21, 7, 8, 6, 9, 5, 5, 19, 25, 10,~
## $ TrainingTimesLastYear     <int> 5, 2, 2, 2, 2, 5, 3, 4, 5, 6, 4, 2, 2, 3, 5,~
## $ YearsAtCompany            <int> 8, 6, 0, 20, 5, 8, 6, 20, 3, 5, 10, 7, 7, 10~
## $ YearsWithCurrManager      <int> 5, 4, 0, 10, 1, 7, 4, 6, 2, 2, 1, 7, 7, 9, 9~
## $ Education_1               <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ Education_2               <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~
## $ Education_3               <int> 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,~
## $ Education_4               <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ Education_5               <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1                <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,~
## $ JobLevel_2                <int> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobLevel_3                <int> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,~
## $ JobLevel_4                <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,~
## $ JobLevel_5                <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,~
## $ EnvironmentSatisfaction_4 <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ JobSatisfaction_1         <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobSatisfaction_2         <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,~
## $ JobSatisfaction_3         <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,~
## $ JobSatisfaction_4         <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,~
## $ BusinessTravel_1          <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ BusinessTravel_2          <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ BusinessTravel_3          <int> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,~
## $ MaritalStatus_1           <int> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,~
## $ MaritalStatus_2           <int> 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,~
## $ MaritalStatus_3           <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,~
## $ Gender_1                  <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,~
## $ Gender_2                  <int> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,~
glimpse(valid_nn)
## Rows: 895
## Columns: 35
## $ Age                       <int> 38, 32, 29, 45, 37, 38, 26, 42, 26, 28, 38, ~
## $ Attrition                 <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,~
## $ DistanceFromHome          <int> 2, 10, 18, 17, 1, 8, 1, 4, 4, 7, 5, 1, 7, 13~
## $ Income                    <int> 83210, 23420, 31430, 79910, 53460, 68700, 10~
## $ NumCompaniesWorked        <int> 3, 4, 2, 0, 4, 1, 1, 1, 2, 1, 3, 7, 7, 1, 1,~
## $ TotalWorkingYears         <int> 13, 9, 10, 21, 7, 8, 6, 9, 5, 5, 19, 25, 10,~
## $ TrainingTimesLastYear     <int> 5, 2, 2, 2, 2, 5, 3, 4, 5, 6, 4, 2, 2, 3, 5,~
## $ YearsAtCompany            <int> 8, 6, 0, 20, 5, 8, 6, 20, 3, 5, 10, 7, 7, 10~
## $ YearsWithCurrManager      <int> 5, 4, 0, 10, 1, 7, 4, 6, 2, 2, 1, 7, 7, 9, 9~
## $ Education_1               <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ Education_2               <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~
## $ Education_3               <int> 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,~
## $ Education_4               <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ Education_5               <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1                <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,~
## $ JobLevel_2                <int> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobLevel_3                <int> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,~
## $ JobLevel_4                <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,~
## $ JobLevel_5                <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,~
## $ EnvironmentSatisfaction_4 <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ JobSatisfaction_1         <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobSatisfaction_2         <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,~
## $ JobSatisfaction_3         <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,~
## $ JobSatisfaction_4         <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,~
## $ BusinessTravel_1          <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ BusinessTravel_2          <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ BusinessTravel_3          <int> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,~
## $ MaritalStatus_1           <int> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,~
## $ MaritalStatus_2           <int> 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,~
## $ MaritalStatus_3           <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,~
## $ Gender_1                  <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,~
## $ Gender_2                  <int> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,~

As neural networks use activation functions between 1 and +1, it’s important to scale your variables down

Otherwise, the neural network will have to spend training iterations doing that scaling for you

scale01 = function(x)((x-min(x))/(max(x)-min(x)))

train_nn_scale = train_nn %>% mutate_all(scale01)

test_nn_scale = test_nn %>% mutate_all(scale01)

valid_nn_scale = valid_nn %>% mutate_all(scale01)

Train the neural network model

# Tuning with different activation functions

set.seed(2890)

emp_nn6 <- neuralnet(Attrition ~., data=train_nn_scale, 
                      hidden = c(12, 12), linear.output=FALSE, 
                      threshold=0.01,
                      act.fct = "logistic")

Plot by weighted values

plot(emp_nn6)
emp_nn6$result.matrix
##                                                 [,1]
## error                                   39.018757127
## reached.threshold                        0.009755535
## steps                                  940.000000000
## Intercept.to.1layhid1                    0.434740539
## Age.to.1layhid1                          1.389763989
## DistanceFromHome.to.1layhid1            -1.026027556
## Income.to.1layhid1                      -0.044232059
## NumCompaniesWorked.to.1layhid1          -1.958066159
## TotalWorkingYears.to.1layhid1           -3.075845950
## TrainingTimesLastYear.to.1layhid1        4.522248122
## YearsAtCompany.to.1layhid1              -3.835486953
## YearsWithCurrManager.to.1layhid1         3.067150823
## Education_1.to.1layhid1                  4.228482476
## Education_2.to.1layhid1                  0.208646372
## Education_3.to.1layhid1                 -0.702147547
## Education_4.to.1layhid1                  0.540708184
## Education_5.to.1layhid1                  1.555915522
## JobLevel_1.to.1layhid1                  -0.571056763
## JobLevel_2.to.1layhid1                   0.189275705
## JobLevel_3.to.1layhid1                   3.442092700
## JobLevel_4.to.1layhid1                   5.648673629
## JobLevel_5.to.1layhid1                  -2.171466778
## EnvironmentSatisfaction_1.to.1layhid1   -1.249579058
## EnvironmentSatisfaction_2.to.1layhid1    1.111057783
## EnvironmentSatisfaction_3.to.1layhid1   -0.073886489
## EnvironmentSatisfaction_4.to.1layhid1    0.979889042
## JobSatisfaction_1.to.1layhid1           -1.324995008
## JobSatisfaction_2.to.1layhid1            0.915169760
## JobSatisfaction_3.to.1layhid1            6.933164985
## JobSatisfaction_4.to.1layhid1            3.176353198
## BusinessTravel_1.to.1layhid1             0.829675033
## BusinessTravel_2.to.1layhid1            39.035199614
## BusinessTravel_3.to.1layhid1            -0.083371797
## MaritalStatus_1.to.1layhid1             -0.131945136
## MaritalStatus_2.to.1layhid1              2.459167058
## MaritalStatus_3.to.1layhid1             -0.629119674
## Gender_1.to.1layhid1                    -0.604408325
## Gender_2.to.1layhid1                    -1.238266308
## Intercept.to.1layhid2                    1.599980629
## Age.to.1layhid2                          6.843607867
## DistanceFromHome.to.1layhid2            -2.602525535
## Income.to.1layhid2                      -1.041364098
## NumCompaniesWorked.to.1layhid2          -4.876330980
## TotalWorkingYears.to.1layhid2            2.167928191
## TrainingTimesLastYear.to.1layhid2        4.732866792
## YearsAtCompany.to.1layhid2              -0.744365696
## YearsWithCurrManager.to.1layhid2        -5.222960727
## Education_1.to.1layhid2                 14.170853736
## Education_2.to.1layhid2                  7.939741164
## Education_3.to.1layhid2                 -1.551077723
## Education_4.to.1layhid2                 -1.173125950
## Education_5.to.1layhid2                 -1.080287816
## JobLevel_1.to.1layhid2                   5.287624474
## JobLevel_2.to.1layhid2                  -0.442085780
## JobLevel_3.to.1layhid2                  -9.619869686
## JobLevel_4.to.1layhid2                  -7.611922970
## JobLevel_5.to.1layhid2                   7.286342957
## EnvironmentSatisfaction_1.to.1layhid2   -2.483204989
## EnvironmentSatisfaction_2.to.1layhid2   -0.296373767
## EnvironmentSatisfaction_3.to.1layhid2    6.009044811
## EnvironmentSatisfaction_4.to.1layhid2   -1.448770997
## JobSatisfaction_1.to.1layhid2           -1.491467575
## JobSatisfaction_2.to.1layhid2           -5.527640415
## JobSatisfaction_3.to.1layhid2            8.556758168
## JobSatisfaction_4.to.1layhid2           43.989077463
## BusinessTravel_1.to.1layhid2            11.693643301
## BusinessTravel_2.to.1layhid2             2.092818043
## BusinessTravel_3.to.1layhid2            -0.512488195
## MaritalStatus_1.to.1layhid2             55.623592020
## MaritalStatus_2.to.1layhid2              0.960565223
## MaritalStatus_3.to.1layhid2              0.453408509
## Gender_1.to.1layhid2                    -3.486940350
## Gender_2.to.1layhid2                     4.000614554
## Intercept.to.1layhid3                    0.700517892
## Age.to.1layhid3                         -0.558053026
## DistanceFromHome.to.1layhid3             4.054490058
## Income.to.1layhid3                      -3.711373959
## NumCompaniesWorked.to.1layhid3          -1.167471860
## TotalWorkingYears.to.1layhid3           -0.938465624
## TrainingTimesLastYear.to.1layhid3       -0.011738864
## YearsAtCompany.to.1layhid3               5.137325288
## YearsWithCurrManager.to.1layhid3         2.158099540
## Education_1.to.1layhid3                 -5.162445699
## Education_2.to.1layhid3                  0.770033494
## Education_3.to.1layhid3                  0.326124692
## Education_4.to.1layhid3                  1.942581186
## Education_5.to.1layhid3                 -2.518913653
## JobLevel_1.to.1layhid3                  -0.145393422
## JobLevel_2.to.1layhid3                  -0.654723572
## JobLevel_3.to.1layhid3                  -0.825744983
## JobLevel_4.to.1layhid3                   1.478614271
## JobLevel_5.to.1layhid3                  -2.514700156
## EnvironmentSatisfaction_1.to.1layhid3    2.130042973
## EnvironmentSatisfaction_2.to.1layhid3    0.514606936
## EnvironmentSatisfaction_3.to.1layhid3    0.910918145
## EnvironmentSatisfaction_4.to.1layhid3   -0.943206059
## JobSatisfaction_1.to.1layhid3            0.790299259
## JobSatisfaction_2.to.1layhid3           -3.491863885
## JobSatisfaction_3.to.1layhid3           -1.413854517
## JobSatisfaction_4.to.1layhid3            2.791891196
## BusinessTravel_1.to.1layhid3            -0.117834650
## BusinessTravel_2.to.1layhid3            -1.385882026
## BusinessTravel_3.to.1layhid3             0.781585333
## MaritalStatus_1.to.1layhid3             -2.329744842
## MaritalStatus_2.to.1layhid3             -1.063478298
## MaritalStatus_3.to.1layhid3              3.073693229
## Gender_1.to.1layhid3                    -3.576824921
## Gender_2.to.1layhid3                     2.646360944
## Intercept.to.1layhid4                    0.461857461
## Age.to.1layhid4                         -3.591764737
## DistanceFromHome.to.1layhid4             0.013229833
## Income.to.1layhid4                      -5.230440365
## NumCompaniesWorked.to.1layhid4           1.072333127
## TotalWorkingYears.to.1layhid4           -5.742679925
## TrainingTimesLastYear.to.1layhid4        0.196271965
## YearsAtCompany.to.1layhid4               2.867384123
## YearsWithCurrManager.to.1layhid4        -2.810189961
## Education_1.to.1layhid4                  0.342014063
## Education_2.to.1layhid4                 -1.160391305
## Education_3.to.1layhid4                 -0.436147344
## Education_4.to.1layhid4                 -0.053017116
## Education_5.to.1layhid4                -87.248234905
## JobLevel_1.to.1layhid4                   0.873180473
## JobLevel_2.to.1layhid4                  -0.805332154
## JobLevel_3.to.1layhid4                  -2.487648796
## JobLevel_4.to.1layhid4                   0.989819565
## JobLevel_5.to.1layhid4                   1.253411156
## EnvironmentSatisfaction_1.to.1layhid4    0.114369251
## EnvironmentSatisfaction_2.to.1layhid4   -0.137137213
## EnvironmentSatisfaction_3.to.1layhid4    0.816031777
## EnvironmentSatisfaction_4.to.1layhid4  -60.072056270
## JobSatisfaction_1.to.1layhid4            1.244590905
## JobSatisfaction_2.to.1layhid4            1.780786158
## JobSatisfaction_3.to.1layhid4            0.757845432
## JobSatisfaction_4.to.1layhid4           -2.350116095
## BusinessTravel_1.to.1layhid4           -13.267042613
## BusinessTravel_2.to.1layhid4             1.302671366
## BusinessTravel_3.to.1layhid4            -0.238626918
## MaritalStatus_1.to.1layhid4              0.329444978
## MaritalStatus_2.to.1layhid4             -1.070369959
## MaritalStatus_3.to.1layhid4              0.688978247
## Gender_1.to.1layhid4                    -0.931433074
## Gender_2.to.1layhid4                    -0.420545575
## Intercept.to.1layhid5                    0.837475543
## Age.to.1layhid5                         -0.441116460
## DistanceFromHome.to.1layhid5            -2.231849149
## Income.to.1layhid5                       4.857567579
## NumCompaniesWorked.to.1layhid5           6.344842381
## TotalWorkingYears.to.1layhid5           -1.762127435
## TrainingTimesLastYear.to.1layhid5       -2.040972735
## YearsAtCompany.to.1layhid5               1.222684681
## YearsWithCurrManager.to.1layhid5        -2.939317625
## Education_1.to.1layhid5                  2.806709776
## Education_2.to.1layhid5                 -0.172682119
## Education_3.to.1layhid5                  1.241585106
## Education_4.to.1layhid5                 -3.999001596
## Education_5.to.1layhid5                 -0.292421294
## JobLevel_1.to.1layhid5                  -0.235747758
## JobLevel_2.to.1layhid5                   1.720381636
## JobLevel_3.to.1layhid5                  -4.864680337
## JobLevel_4.to.1layhid5                  -4.846888479
## JobLevel_5.to.1layhid5                   3.719340273
## EnvironmentSatisfaction_1.to.1layhid5    2.973707549
## EnvironmentSatisfaction_2.to.1layhid5    0.663127913
## EnvironmentSatisfaction_3.to.1layhid5    0.838779805
## EnvironmentSatisfaction_4.to.1layhid5   -0.759461185
## JobSatisfaction_1.to.1layhid5            0.165776163
## JobSatisfaction_2.to.1layhid5            1.813314342
## JobSatisfaction_3.to.1layhid5            0.404944440
## JobSatisfaction_4.to.1layhid5           -0.103988522
## BusinessTravel_1.to.1layhid5            -7.701689140
## BusinessTravel_2.to.1layhid5             0.055895501
## BusinessTravel_3.to.1layhid5             0.143354779
## MaritalStatus_1.to.1layhid5             -3.227951064
## MaritalStatus_2.to.1layhid5              9.628304882
## MaritalStatus_3.to.1layhid5              0.061680647
## Gender_1.to.1layhid5                    -2.088948656
## Gender_2.to.1layhid5                    -0.678661752
## Intercept.to.1layhid6                    0.474400937
## Age.to.1layhid6                         -1.629109757
## DistanceFromHome.to.1layhid6             0.910634092
## Income.to.1layhid6                      -1.273013521
## NumCompaniesWorked.to.1layhid6          -3.216942043
## TotalWorkingYears.to.1layhid6           -2.935760067
## TrainingTimesLastYear.to.1layhid6        0.809014985
## YearsAtCompany.to.1layhid6               0.370145954
## YearsWithCurrManager.to.1layhid6        -4.333245437
## Education_1.to.1layhid6                  0.918784646
## Education_2.to.1layhid6                 -1.762875235
## Education_3.to.1layhid6                 -3.174130538
## Education_4.to.1layhid6                  1.592934225
## Education_5.to.1layhid6                  2.214149554
## JobLevel_1.to.1layhid6                   0.431818953
## JobLevel_2.to.1layhid6                  -0.846082599
## JobLevel_3.to.1layhid6                  -1.414199148
## JobLevel_4.to.1layhid6                  -0.557306305
## JobLevel_5.to.1layhid6                   0.520577737
## EnvironmentSatisfaction_1.to.1layhid6   -1.036591264
## EnvironmentSatisfaction_2.to.1layhid6   -1.031896502
## EnvironmentSatisfaction_3.to.1layhid6   -0.470906038
## EnvironmentSatisfaction_4.to.1layhid6   -1.764328390
## JobSatisfaction_1.to.1layhid6            0.859664034
## JobSatisfaction_2.to.1layhid6            0.700416770
## JobSatisfaction_3.to.1layhid6           -4.078520854
## JobSatisfaction_4.to.1layhid6           -3.582747876
## BusinessTravel_1.to.1layhid6             2.188763156
## BusinessTravel_2.to.1layhid6             2.238974648
## BusinessTravel_3.to.1layhid6            -7.835396713
## MaritalStatus_1.to.1layhid6             -3.519891625
## MaritalStatus_2.to.1layhid6             -3.362365783
## MaritalStatus_3.to.1layhid6              0.510597889
## Gender_1.to.1layhid6                    -1.600404526
## Gender_2.to.1layhid6                     0.018397842
## Intercept.to.1layhid7                   -0.394239528
## Age.to.1layhid7                         -5.862008899
## DistanceFromHome.to.1layhid7            12.271565721
## Income.to.1layhid7                       1.409925262
## NumCompaniesWorked.to.1layhid7           7.755818447
## TotalWorkingYears.to.1layhid7            7.322579546
## TrainingTimesLastYear.to.1layhid7        3.501640760
## YearsAtCompany.to.1layhid7              -4.986516607
## YearsWithCurrManager.to.1layhid7         8.138612483
## Education_1.to.1layhid7                 -8.852859223
## Education_2.to.1layhid7                 -4.884783180
## Education_3.to.1layhid7                  0.036036320
## Education_4.to.1layhid7                  4.161931907
## Education_5.to.1layhid7                 -4.528991574
## JobLevel_1.to.1layhid7                  -2.210118429
## JobLevel_2.to.1layhid7                   0.565900903
## JobLevel_3.to.1layhid7                   0.163878770
## JobLevel_4.to.1layhid7                  -0.222870532
## JobLevel_5.to.1layhid7                  -2.914994638
## EnvironmentSatisfaction_1.to.1layhid7    5.893181793
## EnvironmentSatisfaction_2.to.1layhid7   49.462151972
## EnvironmentSatisfaction_3.to.1layhid7   -3.735164588
## EnvironmentSatisfaction_4.to.1layhid7    0.518647471
## JobSatisfaction_1.to.1layhid7           10.544979717
## JobSatisfaction_2.to.1layhid7           -3.863232891
## JobSatisfaction_3.to.1layhid7           -1.940030973
## JobSatisfaction_4.to.1layhid7           -0.052280881
## BusinessTravel_1.to.1layhid7            -8.686987152
## BusinessTravel_2.to.1layhid7            -9.320081162
## BusinessTravel_3.to.1layhid7             1.199220609
## MaritalStatus_1.to.1layhid7              3.505475207
## MaritalStatus_2.to.1layhid7              0.276291631
## MaritalStatus_3.to.1layhid7             -1.014054205
## Gender_1.to.1layhid7                    -1.339371497
## Gender_2.to.1layhid7                     1.420611739
## Intercept.to.1layhid8                    0.046896415
## Age.to.1layhid8                          1.205132426
## DistanceFromHome.to.1layhid8            -0.436394495
## Income.to.1layhid8                      -0.104943040
## NumCompaniesWorked.to.1layhid8           1.904752374
## TotalWorkingYears.to.1layhid8            0.194911594
## TrainingTimesLastYear.to.1layhid8       -1.550909772
## YearsAtCompany.to.1layhid8              -5.212371517
## YearsWithCurrManager.to.1layhid8        -1.258501392
## Education_1.to.1layhid8                 -0.222054459
## Education_2.to.1layhid8                 -0.862559565
## Education_3.to.1layhid8                 -0.645586096
## Education_4.to.1layhid8                  0.996638029
## Education_5.to.1layhid8                 34.072031510
## JobLevel_1.to.1layhid8                   1.129282693
## JobLevel_2.to.1layhid8                  -0.272377605
## JobLevel_3.to.1layhid8                  -2.710779209
## JobLevel_4.to.1layhid8                   1.071101183
## JobLevel_5.to.1layhid8                 -36.317396882
## EnvironmentSatisfaction_1.to.1layhid8   -0.473694805
## EnvironmentSatisfaction_2.to.1layhid8    1.721545514
## EnvironmentSatisfaction_3.to.1layhid8   -2.284525743
## EnvironmentSatisfaction_4.to.1layhid8   -0.714960128
## JobSatisfaction_1.to.1layhid8            1.011446643
## JobSatisfaction_2.to.1layhid8          -86.466874515
## JobSatisfaction_3.to.1layhid8            0.979183120
## JobSatisfaction_4.to.1layhid8           -1.029221731
## BusinessTravel_1.to.1layhid8            -1.279988438
## BusinessTravel_2.to.1layhid8             0.009906306
## BusinessTravel_3.to.1layhid8            -1.618318785
## MaritalStatus_1.to.1layhid8              0.370750809
## MaritalStatus_2.to.1layhid8             -0.262334441
## MaritalStatus_3.to.1layhid8             -0.615673668
## Gender_1.to.1layhid8                     0.640481095
## Gender_2.to.1layhid8                    -0.501681840
## Intercept.to.1layhid9                    0.004553357
## Age.to.1layhid9                          4.175136709
## DistanceFromHome.to.1layhid9             2.962993113
## Income.to.1layhid9                      -7.521437723
## NumCompaniesWorked.to.1layhid9           1.022792255
## TotalWorkingYears.to.1layhid9            0.231663415
## TrainingTimesLastYear.to.1layhid9        0.231584110
## YearsAtCompany.to.1layhid9              -1.755353770
## YearsWithCurrManager.to.1layhid9         1.649556483
## Education_1.to.1layhid9                  2.233546740
## Education_2.to.1layhid9                  1.291078581
## Education_3.to.1layhid9                  0.403775915
## Education_4.to.1layhid9                  2.211539333
## Education_5.to.1layhid9                  6.302040848
## JobLevel_1.to.1layhid9                   0.611993439
## JobLevel_2.to.1layhid9                  -0.194844583
## JobLevel_3.to.1layhid9                  -7.990291589
## JobLevel_4.to.1layhid9                   1.464760940
## JobLevel_5.to.1layhid9                  -1.124952230
## EnvironmentSatisfaction_1.to.1layhid9   -2.113730716
## EnvironmentSatisfaction_2.to.1layhid9    1.346890926
## EnvironmentSatisfaction_3.to.1layhid9   -0.664987308
## EnvironmentSatisfaction_4.to.1layhid9   -0.622473612
## JobSatisfaction_1.to.1layhid9            6.800359599
## JobSatisfaction_2.to.1layhid9           -2.961554769
## JobSatisfaction_3.to.1layhid9           -1.038842113
## JobSatisfaction_4.to.1layhid9            0.223550454
## BusinessTravel_1.to.1layhid9             3.210765243
## BusinessTravel_2.to.1layhid9             1.295864855
## BusinessTravel_3.to.1layhid9            -0.989724932
## MaritalStatus_1.to.1layhid9             -2.974856965
## MaritalStatus_2.to.1layhid9              0.251519599
## MaritalStatus_3.to.1layhid9             -1.152112580
## Gender_1.to.1layhid9                    -6.670298060
## Gender_2.to.1layhid9                     1.188025142
## Intercept.to.1layhid10                   1.252096252
## Age.to.1layhid10                        -1.218872612
## DistanceFromHome.to.1layhid10           -1.941790801
## Income.to.1layhid10                     -2.452922472
## NumCompaniesWorked.to.1layhid10         -5.071689746
## TotalWorkingYears.to.1layhid10           5.091282816
## TrainingTimesLastYear.to.1layhid10      -0.535692910
## YearsAtCompany.to.1layhid10             14.554081620
## YearsWithCurrManager.to.1layhid10        6.082110536
## Education_1.to.1layhid10                 9.332496385
## Education_2.to.1layhid10                 1.442109196
## Education_3.to.1layhid10                -2.285398658
## Education_4.to.1layhid10                 3.567799305
## Education_5.to.1layhid10                 3.877699803
## JobLevel_1.to.1layhid10                  6.444811455
## JobLevel_2.to.1layhid10                 -2.762630795
## JobLevel_3.to.1layhid10                 -1.376472002
## JobLevel_4.to.1layhid10                  0.293026004
## JobLevel_5.to.1layhid10                 52.575193940
## EnvironmentSatisfaction_1.to.1layhid10   2.078864662
## EnvironmentSatisfaction_2.to.1layhid10  -2.572669213
## EnvironmentSatisfaction_3.to.1layhid10   0.434968962
## EnvironmentSatisfaction_4.to.1layhid10  -4.041986274
## JobSatisfaction_1.to.1layhid10          -0.485626009
## JobSatisfaction_2.to.1layhid10           2.758962309
## JobSatisfaction_3.to.1layhid10          -0.254162971
## JobSatisfaction_4.to.1layhid10          -7.205131520
## BusinessTravel_1.to.1layhid10            8.937612186
## BusinessTravel_2.to.1layhid10            1.230306651
## BusinessTravel_3.to.1layhid10            0.229043167
## MaritalStatus_1.to.1layhid10             0.708152292
## MaritalStatus_2.to.1layhid10            -2.804196265
## MaritalStatus_3.to.1layhid10             1.015336884
## Gender_1.to.1layhid10                   -0.168371850
## Gender_2.to.1layhid10                   -0.184478373
## Intercept.to.1layhid11                  -0.727410012
## Age.to.1layhid11                        -1.422950961
## DistanceFromHome.to.1layhid11           -0.023811446
## Income.to.1layhid11                     -1.125448285
## NumCompaniesWorked.to.1layhid11          2.622412831
## TotalWorkingYears.to.1layhid11          -1.803563938
## TrainingTimesLastYear.to.1layhid11       1.791582889
## YearsAtCompany.to.1layhid11              4.445202788
## YearsWithCurrManager.to.1layhid11       -5.751107481
## Education_1.to.1layhid11               -20.543100260
## Education_2.to.1layhid11                 1.920818169
## Education_3.to.1layhid11                 1.106227228
## Education_4.to.1layhid11                 0.136654649
## Education_5.to.1layhid11                -4.460647709
## JobLevel_1.to.1layhid11                  0.515429197
## JobLevel_2.to.1layhid11                  0.120245522
## JobLevel_3.to.1layhid11                 -0.601765136
## JobLevel_4.to.1layhid11                  1.661226109
## JobLevel_5.to.1layhid11                -89.648634602
## EnvironmentSatisfaction_1.to.1layhid11   1.014351719
## EnvironmentSatisfaction_2.to.1layhid11  -2.403109005
## EnvironmentSatisfaction_3.to.1layhid11  -0.556491289
## EnvironmentSatisfaction_4.to.1layhid11   1.728526300
## JobSatisfaction_1.to.1layhid11           0.115280535
## JobSatisfaction_2.to.1layhid11          -5.077145212
## JobSatisfaction_3.to.1layhid11          -0.636504588
## JobSatisfaction_4.to.1layhid11          -1.375346521
## BusinessTravel_1.to.1layhid11            1.384742820
## BusinessTravel_2.to.1layhid11            1.327353341
## BusinessTravel_3.to.1layhid11           -0.949166236
## MaritalStatus_1.to.1layhid11             1.656053266
## MaritalStatus_2.to.1layhid11            -0.291433769
## MaritalStatus_3.to.1layhid11            -0.053767533
## Gender_1.to.1layhid11                   -2.149311013
## Gender_2.to.1layhid11                    1.652788593
## Intercept.to.1layhid12                   0.358174281
## Age.to.1layhid12                         3.730811217
## DistanceFromHome.to.1layhid12           -1.525453303
## Income.to.1layhid12                      2.163734690
## NumCompaniesWorked.to.1layhid12          1.152214256
## TotalWorkingYears.to.1layhid12           7.327969300
## TrainingTimesLastYear.to.1layhid12      -0.045165918
## YearsAtCompany.to.1layhid12             -3.063124869
## YearsWithCurrManager.to.1layhid12       -9.479770369
## Education_1.to.1layhid12                -6.724594699
## Education_2.to.1layhid12                 0.359769869
## Education_3.to.1layhid12                16.233527399
## Education_4.to.1layhid12                 0.437973375
## Education_5.to.1layhid12                 0.890020785
## JobLevel_1.to.1layhid12                  1.045179812
## JobLevel_2.to.1layhid12                 -0.633047868
## JobLevel_3.to.1layhid12                 -0.769159638
## JobLevel_4.to.1layhid12                 45.696529509
## JobLevel_5.to.1layhid12                 -6.439470580
## EnvironmentSatisfaction_1.to.1layhid12   0.793415601
## EnvironmentSatisfaction_2.to.1layhid12  -1.621695767
## EnvironmentSatisfaction_3.to.1layhid12   0.328387421
## EnvironmentSatisfaction_4.to.1layhid12  -0.136245747
## JobSatisfaction_1.to.1layhid12           1.254265774
## JobSatisfaction_2.to.1layhid12          -1.858892771
## JobSatisfaction_3.to.1layhid12          -1.812411788
## JobSatisfaction_4.to.1layhid12          -1.713577137
## BusinessTravel_1.to.1layhid12            4.899525205
## BusinessTravel_2.to.1layhid12            2.965989563
## BusinessTravel_3.to.1layhid12            0.656410156
## MaritalStatus_1.to.1layhid12            -0.178455232
## MaritalStatus_2.to.1layhid12             0.535580011
## MaritalStatus_3.to.1layhid12            -1.428465097
## Gender_1.to.1layhid12                   -1.614100305
## Gender_2.to.1layhid12                    0.877750989
## Intercept.to.2layhid1                    0.057815998
## 1layhid1.to.2layhid1                     0.674569700
## 1layhid2.to.2layhid1                    -0.587130490
## 1layhid3.to.2layhid1                    -0.825550995
## 1layhid4.to.2layhid1                    -7.553679539
## 1layhid5.to.2layhid1                    -2.182153269
## 1layhid6.to.2layhid1                    -2.466232065
## 1layhid7.to.2layhid1                     3.824892539
## 1layhid8.to.2layhid1                     0.292636130
## 1layhid9.to.2layhid1                    15.084252184
## 1layhid10.to.2layhid1                    1.224552960
## 1layhid11.to.2layhid1                   -2.692936821
## 1layhid12.to.2layhid1                   -0.240378758
## Intercept.to.2layhid2                    1.222936913
## 1layhid1.to.2layhid2                    -2.075862218
## 1layhid2.to.2layhid2                     0.009488240
## 1layhid3.to.2layhid2                    -0.441697573
## 1layhid4.to.2layhid2                     2.771248429
## 1layhid5.to.2layhid2                    -0.165124762
## 1layhid6.to.2layhid2                     6.602841301
## 1layhid7.to.2layhid2                     0.938749223
## 1layhid8.to.2layhid2                     1.804913726
## 1layhid9.to.2layhid2                     0.039935305
## 1layhid10.to.2layhid2                   -1.918654046
## 1layhid11.to.2layhid2                    2.415135721
## 1layhid12.to.2layhid2                   -1.836166652
## Intercept.to.2layhid3                   -0.229977002
## 1layhid1.to.2layhid3                     1.652848603
## 1layhid2.to.2layhid3                     1.174945298
## 1layhid3.to.2layhid3                    -0.503708650
## 1layhid4.to.2layhid3                    -3.864295244
## 1layhid5.to.2layhid3                    -1.843449649
## 1layhid6.to.2layhid3                    -5.392544988
## 1layhid7.to.2layhid3                     0.590437636
## 1layhid8.to.2layhid3                    -1.391825631
## 1layhid9.to.2layhid3                     0.919346699
## 1layhid10.to.2layhid3                    0.513684171
## 1layhid11.to.2layhid3                   -0.576769639
## 1layhid12.to.2layhid3                   -0.003670397
## Intercept.to.2layhid4                    1.348372379
## 1layhid1.to.2layhid4                     1.551474242
## 1layhid2.to.2layhid4                     1.858382160
## 1layhid3.to.2layhid4                     1.419966215
## 1layhid4.to.2layhid4                    -5.777316038
## 1layhid5.to.2layhid4                    -3.071313438
## 1layhid6.to.2layhid4                    -1.932326533
## 1layhid7.to.2layhid4                     1.546759445
## 1layhid8.to.2layhid4                    -3.425978432
## 1layhid9.to.2layhid4                    -2.398879893
## 1layhid10.to.2layhid4                   -1.257115148
## 1layhid11.to.2layhid4                   -1.771480993
## 1layhid12.to.2layhid4                    1.911615455
## Intercept.to.2layhid5                    1.766089959
## 1layhid1.to.2layhid5                    -0.329230607
## 1layhid2.to.2layhid5                     1.538846820
## 1layhid3.to.2layhid5                    -4.202986015
## 1layhid4.to.2layhid5                    -3.864286126
## 1layhid5.to.2layhid5                    -1.818347556
## 1layhid6.to.2layhid5                    -1.005359466
## 1layhid7.to.2layhid5                     1.723856572
## 1layhid8.to.2layhid5                     0.076832497
## 1layhid9.to.2layhid5                    -0.645115592
## 1layhid10.to.2layhid5                    2.367598012
## 1layhid11.to.2layhid5                    0.592196196
## 1layhid12.to.2layhid5                   -1.089197342
## Intercept.to.2layhid6                    1.274730570
## 1layhid1.to.2layhid6                     1.240341643
## 1layhid2.to.2layhid6                    -0.313175373
## 1layhid3.to.2layhid6                     0.645066119
## 1layhid4.to.2layhid6                    -5.677724241
## 1layhid5.to.2layhid6                    -1.421422837
## 1layhid6.to.2layhid6                    -8.180485860
## 1layhid7.to.2layhid6                    -0.008429381
## 1layhid8.to.2layhid6                    -1.449470009
## 1layhid9.to.2layhid6                     0.967375251
## 1layhid10.to.2layhid6                    0.180707619
## 1layhid11.to.2layhid6                   -0.915076111
## 1layhid12.to.2layhid6                    0.143130101
## Intercept.to.2layhid7                    1.004351658
## 1layhid1.to.2layhid7                    -1.146536406
## 1layhid2.to.2layhid7                    -1.355934125
## 1layhid3.to.2layhid7                     0.799850254
## 1layhid4.to.2layhid7                    -0.604847228
## 1layhid5.to.2layhid7                     0.933446389
## 1layhid6.to.2layhid7                     1.768515139
## 1layhid7.to.2layhid7                     0.335922213
## 1layhid8.to.2layhid7                     1.110871090
## 1layhid9.to.2layhid7                     1.681607074
## 1layhid10.to.2layhid7                   -0.322476176
## 1layhid11.to.2layhid7                   -0.878984023
## 1layhid12.to.2layhid7                   -2.607971805
## Intercept.to.2layhid8                   -0.292592130
## 1layhid1.to.2layhid8                    -1.840614642
## 1layhid2.to.2layhid8                     0.977641797
## 1layhid3.to.2layhid8                    -1.884970580
## 1layhid4.to.2layhid8                    -6.572789519
## 1layhid5.to.2layhid8                    -0.672038777
## 1layhid6.to.2layhid8                    -1.175080809
## 1layhid7.to.2layhid8                     1.763570440
## 1layhid8.to.2layhid8                    -1.653196500
## 1layhid9.to.2layhid8                     8.608387238
## 1layhid10.to.2layhid8                    1.485866934
## 1layhid11.to.2layhid8                    0.084389728
## 1layhid12.to.2layhid8                    0.694561184
## Intercept.to.2layhid9                    1.859122552
## 1layhid1.to.2layhid9                    -2.242048963
## 1layhid2.to.2layhid9                     0.311373369
## 1layhid3.to.2layhid9                    -0.408972629
## 1layhid4.to.2layhid9                     2.654800123
## 1layhid5.to.2layhid9                     1.373827975
## 1layhid6.to.2layhid9                     2.165314430
## 1layhid7.to.2layhid9                    -0.738787464
## 1layhid8.to.2layhid9                     1.597292239
## 1layhid9.to.2layhid9                     0.235448926
## 1layhid10.to.2layhid9                   -1.059953638
## 1layhid11.to.2layhid9                   -1.358326117
## 1layhid12.to.2layhid9                    0.131296088
## Intercept.to.2layhid10                  -0.228529330
## 1layhid1.to.2layhid10                   -0.639343962
## 1layhid2.to.2layhid10                   -1.170062748
## 1layhid3.to.2layhid10                    5.491049169
## 1layhid4.to.2layhid10                   18.871623170
## 1layhid5.to.2layhid10                    0.187857362
## 1layhid6.to.2layhid10                   48.210103555
## 1layhid7.to.2layhid10                   -2.741781319
## 1layhid8.to.2layhid10                    2.279345023
## 1layhid9.to.2layhid10                   -1.764126490
## 1layhid10.to.2layhid10                  60.983531482
## 1layhid11.to.2layhid10                   1.791077135
## 1layhid12.to.2layhid10                  -2.154817861
## Intercept.to.2layhid11                   0.310184621
## 1layhid1.to.2layhid11                    0.929315128
## 1layhid2.to.2layhid11                    1.733396535
## 1layhid3.to.2layhid11                   -1.738477589
## 1layhid4.to.2layhid11                   -0.461068387
## 1layhid5.to.2layhid11                   -3.026813295
## 1layhid6.to.2layhid11                  -10.374164168
## 1layhid7.to.2layhid11                    2.396163495
## 1layhid8.to.2layhid11                   -2.170183053
## 1layhid9.to.2layhid11                    2.098585031
## 1layhid10.to.2layhid11                   0.343034781
## 1layhid11.to.2layhid11                  -1.645528101
## 1layhid12.to.2layhid11                   0.164578193
## Intercept.to.2layhid12                  -0.819019587
## 1layhid1.to.2layhid12                    1.244423810
## 1layhid2.to.2layhid12                    3.253053457
## 1layhid3.to.2layhid12                   -4.464585978
## 1layhid4.to.2layhid12                  -19.421357033
## 1layhid5.to.2layhid12                    0.774749485
## 1layhid6.to.2layhid12                   -0.471638330
## 1layhid7.to.2layhid12                   -0.009282565
## 1layhid8.to.2layhid12                   -1.060261936
## 1layhid9.to.2layhid12                    5.951380358
## 1layhid10.to.2layhid12                  -2.000668334
## 1layhid11.to.2layhid12                  -1.295409172
## 1layhid12.to.2layhid12                   0.055991751
## Intercept.to.Attrition                   0.836625365
## 2layhid1.to.Attrition                   -0.569625117
## 2layhid2.to.Attrition                   35.783265394
## 2layhid3.to.Attrition                   -1.803498662
## 2layhid4.to.Attrition                   -0.898646145
## 2layhid5.to.Attrition                   -0.452126282
## 2layhid6.to.Attrition                   -0.227631586
## 2layhid7.to.Attrition                   15.900752803
## 2layhid8.to.Attrition                  -16.665026013
## 2layhid9.to.Attrition                    4.799038931
## 2layhid10.to.Attrition                  11.821454166
## 2layhid11.to.Attrition                 -30.114904967
## 2layhid12.to.Attrition                  -5.471753984

Variable importance

testy = test_nn_scale$Attrition
testx = -(test_nn_scale$Attrition)

Prediction on the test set

empnn6.results <- compute(emp_nn6, test_nn_scale)
results6 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
##   actual   prediction
## 1      0 1.556773e-13
## 2      0 1.447750e-09
## 3      0 9.999842e-01
## 4      0 5.225753e-12
## 5      0 8.070033e-15
## 6      0 4.543550e-12

Compute RMSE

#Compute error statistics
resultnnfit6 <- empnn6.results$net.result
results6 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
##   actual   prediction
## 1      0 1.556773e-13
## 2      0 1.447750e-09
## 3      0 9.999842e-01
## 4      0 5.225753e-12
## 5      0 8.070033e-15
## 6      0 4.543550e-12
RMSE6 = sqrt(sum(abs(test_nn_scale - empnn6.results$net.result))^2)
RMSE6
## [1] 10673.91
resultnnfit6 <- empnn6.results$net.result
resultnnfit6 = ifelse(resultnnfit6>0.5, 1, 0)

confusionMatrix(data = as.factor(resultnnfit6), reference = as.factor(testy), 
                positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 720  40
##          1  22 113
##                                           
##                Accuracy : 0.9307          
##                  95% CI : (0.9121, 0.9465)
##     No Information Rate : 0.8291          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.7436          
##                                           
##  Mcnemar's Test P-Value : 0.03085         
##                                           
##             Sensitivity : 0.7386          
##             Specificity : 0.9704          
##          Pos Pred Value : 0.8370          
##          Neg Pred Value : 0.9474          
##              Prevalence : 0.1709          
##          Detection Rate : 0.1263          
##    Detection Prevalence : 0.1508          
##       Balanced Accuracy : 0.8545          
##                                           
##        'Positive' Class : 1               
## 
#ROC Curve#
library(ROSE)

roc.curve(test_nn_scale$Attrition, empnn6.results$net.result)
## Warning in if (cl <- class(predicted) == "factor" | class(predicted) == : the
## condition has length > 1 and only the first element will be used

## Area under the curve (AUC): 0.835
#ROC
roc_comp <- ROCR::prediction(predictions = empnn6.results$net.result, labels = test_nn_scale$Attrition)
roc_comp <- ROCR::performance(roc_comp , "tpr" , "fpr")
plot(roc_comp,
     colorize = TRUE,
     print.cutoffs.at= seq(0,1,0.05),
     text.adj=c(-0.2,1.7))

Tune the model using backpropagation and learning rate

# Tuning with different activation functions



emp_nn9 <- neuralnet(Attrition ~., data=train_nn_scale, 
                      hidden = c(12, 12), linear.output=FALSE, 
                      threshold=0.01,
                      act.fct = "logistic",
                      algorithm = "backprop",
                      learningrate = .01)

Plot by weighted values

plot(emp_nn9)
testy = test_nn_scale$Attrition
testx = -(test_nn_scale$Attrition)

Prediction on the test set

empnn9.results <- compute(emp_nn9, test_nn_scale)
results9 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
##   actual   prediction
## 1      0 1.556773e-13
## 2      0 1.447750e-09
## 3      0 9.999842e-01
## 4      0 5.225753e-12
## 5      0 8.070033e-15
## 6      0 4.543550e-12

Compute RMSE

#Compute error statistics
resultnnfit9 <- empnn9.results$net.result
results9 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn9.results$net.result)
head(results9)
##   actual   prediction
## 1      0 1.577895e-06
## 2      0 3.074286e-04
## 3      0 1.000000e+00
## 4      0 1.074922e-05
## 5      0 2.284982e-03
## 6      0 2.869911e-08
RMSE9 = sqrt(sum(abs(test_nn_scale - empnn6.results$net.result))^2)
RMSE9
## [1] 10673.91
resultnnfit9 <- empnn9.results$net.result
resultnnfit9 = ifelse(resultnnfit9>0.5, 1, 0)

confusionMatrix(data = as.factor(resultnnfit9), reference = as.factor(testy), 
                positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 727  30
##          1  15 123
##                                           
##                Accuracy : 0.9497          
##                  95% CI : (0.9333, 0.9631)
##     No Information Rate : 0.8291          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.8154          
##                                           
##  Mcnemar's Test P-Value : 0.03689         
##                                           
##             Sensitivity : 0.8039          
##             Specificity : 0.9798          
##          Pos Pred Value : 0.8913          
##          Neg Pred Value : 0.9604          
##              Prevalence : 0.1709          
##          Detection Rate : 0.1374          
##    Detection Prevalence : 0.1542          
##       Balanced Accuracy : 0.8919          
##                                           
##        'Positive' Class : 1               
## 
#ROC Curve#
library(ROSE)

roc.curve(test_nn_scale$Attrition, empnn9.results$net.result)
## Warning in if (cl <- class(predicted) == "factor" | class(predicted) == : the
## condition has length > 1 and only the first element will be used

## Area under the curve (AUC): 0.865
#ROC
roc_comp <- ROCR::prediction(predictions = empnn9.results$net.result, labels = test_nn_scale$Attrition)
roc_comp <- ROCR::performance(roc_comp , "tpr" , "fpr")
plot(roc_comp,
     colorize = TRUE,
     print.cutoffs.at= seq(0,1,0.05),
     text.adj=c(-0.2,1.7))

## Attempt number 1

#set.seed(2890)
#emp_nn1 <- neuralnet(Attrition ~ ., data = train_nn_scale, hidden = 1, linear.output=FALSE, threshold=0.01)
#result.matrix, a matrix containing the error, weights between input, hidden and output for each replication 
#It also includes reached threshold, needed steps, AIC and BIC computed if likelihood=TRUE). 
#Each column represents one replication.
#emp_nn1$result.matrix
#plot(emp_nn1)
#empnn1.results <- compute(emp_nn1, test_nn_scale)
#head(empnn1.results)
#Compute error statistics
#resultnnfit1 <- empnn1.results$net.result
#results <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn1.results$net.result)
#head(results)

#RMSE1 = sqrt(sum(abs(test_nn - empnn1.results$net.result))^2)
#RMSE1
#resultnnfit1 <- empnn1.results$net.result
#resultnnfit1 = ifelse(resultnnfit1>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit1), reference = as.factor(testy), 
#                positive="1")

Attempt number 2

#set.seed(2890)

# Tuning with a 3, 2 hidden layer configuration
#emp_nn2 <- neuralnet(Attrition ~ ., data = train_nn_scale, hidden = c(2), linear.output=FALSE, threshold=0.01)
#emp_nn2$result.matrix
#plot(emp_nn2)
#empnn2.results <- compute(emp_nn2, test_nn_scale)
#head(empnn2.results)

#results2 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn2.results$net.result)
#head(results2)
#Compute error statistics
#resultnnfit2 <- empnn2.results$net.result
#results2 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn2.results$net.result)
#head(results2)

#RMSE2 = sqrt(sum(abs(test_nn_scale - empnn2.results$net.result))^2)
#RMSE2
#resultnnfit2 <- empnn2.results$net.result
#resultnnfit2 = ifelse(resultnnfit3>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit2), reference = as.factor(testy), 
#                positive="1")

Attempt number 3

# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation

#set.seed(2890)

#emp_nn3 <- neuralnet(Attrition ~., data=train_nn_scale, 
#                      hidden = c(4, 1), linear.output=FALSE, 
#                      threshold=0.01,
#                      act.fct = "logistic")
#plot(emp_nn3)
#empnn3.results <- compute(emp_nn3, test_nn_scale)
#results3 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn3.results$net.result)
#head(results3)
#Compute error statistics
#resultnnfit3 <- empnn3.results$net.result
#results3 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn3.results$net.result)
#head(results3)

#RMSE3 = sqrt(sum(abs(test_nn_scale - empnn3.results$net.result))^2)
#RMSE3
#resultnnfit3 <- empnn3.results$net.result
#resultnnfit3 = ifelse(resultnnfit3>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit3), reference = as.factor(testy), 
#                positive="1")

Attempt number 4

# Tuning with different activation functions
#set.seed(2890)

#emp_nn4 <- neuralnet(Attrition ~., data=train_nn_scale, 
#                      hidden = c(4, 4), linear.output=FALSE, 
#                      threshold=0.01,
#                      act.fct = "logistic")
#plot(emp_nn4)
#empnn4.results <- compute(emp_nn4, test_nn_scale)
#results4 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn4.results$net.result)
#head(results4)
#Compute error statistics
#resultnnfit4 <- empnn4.results$net.result
#results4 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn4.results$net.result)
#head(results4)

#RMSE4 = sqrt(sum(abs(test_nn_scale - empnn4.results$net.result))^2)
#RMSE4
#resultnnfit4 <- empnn4.results$net.result
#resultnnfit4 = ifelse(resultnnfit4>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit4), reference = as.factor(testy), 
#                positive="1")

Attempt number 5

# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)


#emp_nn5 <- neuralnet(Attrition ~., data=train_nn_scale, 
#                      hidden = c(8, 8), linear.output=FALSE, 
#                      threshold=0.01,
#                      act.fct = "logistic")
#plot(emp_nn5)
#empnn5.results <- compute(emp_nn5, test_nn_scale)
#results5 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn5.results$net.result)
#head(results5)
#Compute error statistics
#resultnnfit5 <- empnn5.results$net.result
#results5 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn5.results$net.result)
#head(results5)

#RMSE5 = sqrt(sum(abs(test_nn_scale - empnn5.results$net.result))^2)
#RMSE5
#resultnnfit5 <- empnn5.results$net.result
#resultnnfit5 = ifelse(resultnnfit5>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit5), reference = as.factor(testy), 
#                positive="1")

Attempt number 7

# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)


#emp_nn7 <- neuralnet(Attrition ~., data=train_nn_scale, 
#                      hidden = c(12, 12), linear.output=FALSE, 
#                      threshold=0.01,
#                      act.fct = "logistic")
#plot(emp_nn7)
#empnn7.results <- compute(emp_nn7, test_nn_scale)
#results7 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn7.results$net.result)
#head(results7)
#Compute error statistics
#resultnnfit7 <- empnn7.results$net.result
#results7 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn7.results$net.result)
#head(results7)

#RMSE7 = sqrt(sum(abs(test_nn_scale - empnn7.results$net.result))^2)
#RMSE7
#resultnnfit7 <- empnn7.results$net.result
#resultnnfit7 = ifelse(resultnnfit7>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit7), reference = as.factor(testy), 
#                positive="1")

Attempt number 8

# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)


#emp_nn8 <- neuralnet(Attrition ~., data=train_nn_scale, 
#                      hidden = c(11, 11), linear.output=FALSE, 
#                      threshold=0.01,
#                      act.fct = "logistic")
#plot(emp_nn8)
#empnn8.results <- compute(emp_nn8, test_nn_scale)
#results8 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn8.results$net.result)
#head(results8)
#Compute error statistics
#resultnnfit8 <- empnn7.results$net.result
#results8 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn8.results$net.result)
#head(results8)

#RMSE8 = sqrt(sum(abs(test_nn_scale - empnn8.results$net.result))^2)
#RMSE8
#resultnnfit8 <- empnn8.results$net.result
#resultnnfit8 = ifelse(resultnnfit7>0.5, 1, 0)

#confusionMatrix(data = as.factor(resultnnfit8), reference = as.factor(testy), 
#                positive="1")