#opts_knit$set(root.dir = "C:/Users/willi/Desktop/Georgetown/RStudio Datasource")
Employee = read.csv("C:/Users/willi/Desktop/Georgetown/RStudio Datasource/Employee_Data_Project.csv")
Employee1 <- Employee %>% #Change categorical to numeric
mutate(Attrition = if_else(Attrition == "Yes", 1, 0)) #%>% #Attrition 1 = "No", 0 = "Yes"
#select(-"StandardHours", "EmployeeID")) #remove standard hours bc they are all the same
Employee1 <- subset(Employee1, select = -StandardHours)
Employee1 <- subset(Employee1, select = -EmployeeID)
###EDA
dim(Employee1) #4410 Observations, 18 variables
## [1] 4410 16
head(Employee1)
## Age Attrition BusinessTravel DistanceFromHome Education Gender JobLevel
## 1 51 0 Travel_Rarely 6 2 Female 1
## 2 31 1 Travel_Frequently 10 1 Female 1
## 3 32 0 Travel_Frequently 17 4 Male 4
## 4 38 0 Non-Travel 2 5 Male 3
## 5 32 0 Travel_Rarely 10 1 Male 1
## 6 46 0 Travel_Rarely 8 3 Female 4
## MaritalStatus Income NumCompaniesWorked TotalWorkingYears
## 1 Married 131160 1 1
## 2 Single 41890 0 6
## 3 Married 193280 1 5
## 4 Married 83210 3 13
## 5 Single 23420 4 9
## 6 Married 40710 3 28
## TrainingTimesLastYear YearsAtCompany YearsWithCurrManager
## 1 6 1 0
## 2 3 5 4
## 3 2 5 3
## 4 5 8 5
## 5 2 6 4
## 6 5 7 7
## EnvironmentSatisfaction JobSatisfaction
## 1 3 4
## 2 3 2
## 3 2 2
## 4 4 4
## 5 4 1
## 6 3 2
prop.table(table(Employee1$Attrition)) ##Imbalanced dataset with only 16.1% attrition rate (711/3699)
##
## 0 1
## 0.8387755 0.1612245
set.seed(123) # for reproducibility
part <-sample(1:3, size=nrow(Employee1), prob=c(0.6, 0.20, 0.20), replace=TRUE)
#Create a train, validation and test from original data frame
train <-Employee1[part == 1, ]
valid <-Employee1[part == 2, ]
test <-Employee1[part == 3, ]
#observe distribution of partitioned data
table(train$Attrition)
##
## 0 1
## 2215 434
table(valid$Attrition)
##
## 0 1
## 742 153
table(test$Attrition)
##
## 0 1
## 742 124
sapply(train,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 13 3 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 14
## JobSatisfaction
## 13
sapply(valid,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 1 3 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 5
## JobSatisfaction
## 4
sapply(test,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 5 3 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 6
## JobSatisfaction
## 3
Missing values introduced by coercion to be transformed
#replace Number of Companies Worked
train$NumCompaniesWorked[is.na(train$NumCompaniesWorked)] = median(train$NumCompaniesWorked, na.rm = TRUE)
test$NumCompaniesWorked[is.na(test$NumCompaniesWorked)] = median(test$NumCompaniesWorked, na.rm = TRUE)
valid$NumCompaniesWorked[is.na(valid$NumCompaniesWorked)] = median(valid$NumCompaniesWorked, na.rm = TRUE)
#Replacing TotalWorkingYears with median
train$TotalWorkingYears[is.na(train$TotalWorkingYears)] = median(train$TotalWorkingYears, na.rm = TRUE)
test$TotalWorkingYears[is.na(test$TotalWorkingYears)] = median(test$TotalWorkingYears, na.rm = TRUE)
valid$TotalWorkingYears[is.na(valid$TotalWorkingYears)] = median(valid$TotalWorkingYears, na.rm = TRUE)
#Replacing EnvironmentSatisfaction with median
train$EnvironmentSatisfaction[is.na(train$EnvironmentSatisfaction)] = median(train$EnvironmentSatisfaction, na.rm = TRUE)
test$EnvironmentSatisfaction[is.na(test$EnvironmentSatisfaction)] = median(test$EnvironmentSatisfaction, na.rm = TRUE)
valid$EnvironmentSatisfaction[is.na(valid$EnvironmentSatisfaction)] = median(valid$EnvironmentSatisfaction, na.rm = TRUE)
#Replacing JobSatisfaction with median
train$JobSatisfaction[is.na(train$JobSatisfaction)] = median(train$JobSatisfaction, na.rm = TRUE)
test$JobSatisfaction[is.na(test$JobSatisfaction)] = median(test$JobSatisfaction, na.rm = TRUE)
valid$JobSatisfaction[is.na(valid$JobSatisfaction)] = median(valid$JobSatisfaction, na.rm = TRUE)
#Check again to make sure all replacements are done
sapply(train,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
sapply(valid,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
sapply(test,function(x) sum(is.na(x)))
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
#Check for values that are blank
sapply(train, function(x){sum(x=='')})
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
sapply(valid, function(x){sum(x=='')})
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
sapply(test, function(x){sum(x=='')})
## Age Attrition BusinessTravel
## 0 0 0
## DistanceFromHome Education Gender
## 0 0 0
## JobLevel MaritalStatus Income
## 0 0 0
## NumCompaniesWorked TotalWorkingYears TrainingTimesLastYear
## 0 0 0
## YearsAtCompany YearsWithCurrManager EnvironmentSatisfaction
## 0 0 0
## JobSatisfaction
## 0
#Changing categorical variables to factor and leaving integers
train <- train %>%
mutate(Attrition = as.factor(Attrition),
BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
DistanceFromHome = as.integer(DistanceFromHome),
Education = as.factor(Education),
Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
JobLevel = as.factor(JobLevel),
MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
Income = as.integer(Income),
NumCompaniesWorked = as.integer(NumCompaniesWorked),
TotalWorkingYears = as.integer(TotalWorkingYears),
TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
YearsAtCompany = as.integer(YearsAtCompany),
YearsWithCurrManager = as.integer(YearsWithCurrManager),
EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
JobSatisfaction = as.factor(JobSatisfaction))
valid <- valid %>%
mutate(Attrition = as.factor(Attrition),
BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
DistanceFromHome = as.integer(DistanceFromHome),
Education = as.factor(Education),
Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
JobLevel = as.factor(JobLevel),
MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
Income = as.integer(Income),
NumCompaniesWorked = as.integer(NumCompaniesWorked),
TotalWorkingYears = as.integer(TotalWorkingYears),
TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
YearsAtCompany = as.integer(YearsAtCompany),
YearsWithCurrManager = as.integer(YearsWithCurrManager),
EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
JobSatisfaction = as.factor(JobSatisfaction))
test <- test %>%
mutate(Attrition = as.factor(Attrition),
BusinessTravel = as.numeric(as.factor(BusinessTravel)), #change character variables to numeric
DistanceFromHome = as.integer(DistanceFromHome),
Education = as.factor(Education),
Gender = as.numeric(as.factor(Gender)), #change character variables to numeric
JobLevel = as.factor(JobLevel),
MaritalStatus = as.numeric(as.factor(MaritalStatus)), #change character variables to numeric
Income = as.integer(Income),
NumCompaniesWorked = as.integer(NumCompaniesWorked),
TotalWorkingYears = as.integer(TotalWorkingYears),
TrainingTimesLastYear = as.integer(TrainingTimesLastYear),
YearsAtCompany = as.integer(YearsAtCompany),
YearsWithCurrManager = as.integer(YearsWithCurrManager),
EnvironmentSatisfaction = as.factor(EnvironmentSatisfaction),
JobSatisfaction = as.factor(JobSatisfaction))
set.seed(123)
data_balanced_over <- ovun.sample(Attrition ~ ., data = train, method = "under", N= 860)$data
table(data_balanced_over$Attrition)
##
## 0 1
## 426 434
The initial model developed leveraged all variables provided within the Employee dataset inclusive of both factor and numeric. Factor variables were transformed using dummy coding and incorporated into the model. In addition, variables which were in charachter form were turned to numeric and also dummy coded.
Transform non numeric data to dummy variables and character data. Data that was in character form was already transformed to ‘numeric’
#Create dummy coding of all variables that are factors
#training dataset
train2 <- dummy_cols(train, select_columns = 'Education')
train2 <- dummy_cols(train, select_columns = 'JobLevel')
train2 <- dummy_cols(train, select_columns = 'EnvironmentSatisfaction')
train2 <- dummy_cols(train, select_columns = 'JobSatisfaction')
train2 <- dummy_cols(train, select_columns = 'Gender')
train2 <- dummy_cols(train, select_columns = 'MaritalStatus')
train2 <- dummy_cols(train, select_columns = 'BusinessTravel')
#validation dataset
valid2 <- dummy_cols(valid, select_columns = 'Education')
valid2 <- dummy_cols(valid, select_columns = 'JobLevel')
valid2 <- dummy_cols(valid, select_columns = 'EnvironmentSatisfaction')
valid2 <- dummy_cols(valid, select_columns = 'JobSatisfaction')
valid2 <- dummy_cols(valid, select_columns = 'Gender')
valid2 <- dummy_cols(valid, select_columns = 'MaritalStatus')
valid2 <- dummy_cols(valid, select_columns = 'BusinessTravel')
#test dataset
test2 <- dummy_cols(test, select_columns = 'Education')
test2 <- dummy_cols(test, select_columns = 'JobLevel')
test2 <- dummy_cols(test, select_columns = 'EnvironmentSatisfaction')
test2 <- dummy_cols(test, select_columns = 'JobSatisfaction')
test2 <- dummy_cols(test, select_columns = 'Gender')
test2 <- dummy_cols(test, select_columns = 'MaritalStatus')
test2 <- dummy_cols(test, select_columns = 'BusinessTravel')
str(train2)
## 'data.frame': 2649 obs. of 19 variables:
## $ Age : int 51 32 46 28 31 25 36 47 28 21 ...
## $ Attrition : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...
## $ BusinessTravel : num 3 2 3 3 3 1 3 1 3 3 ...
## $ DistanceFromHome : int 6 17 8 11 1 7 28 1 1 3 ...
## $ Education : Factor w/ 5 levels "1","2","3","4",..: 2 4 3 2 3 4 1 1 3 2 ...
## $ Gender : num 1 2 1 2 2 1 2 2 2 2 ...
## $ JobLevel : Factor w/ 5 levels "1","2","3","4",..: 1 4 4 2 3 4 1 1 1 1 ...
## $ MaritalStatus : num 2 2 2 3 2 1 2 2 2 3 ...
## $ Income : int 131160 193280 40710 58130 20440 134640 33770 57620 25920 42130 ...
## $ NumCompaniesWorked : int 1 1 3 2 0 1 0 1 1 1 ...
## $ TotalWorkingYears : int 1 5 28 5 10 6 16 10 5 3 ...
## $ TrainingTimesLastYear : int 6 2 5 2 2 2 2 4 2 3 ...
## $ YearsAtCompany : int 1 5 7 0 9 6 15 10 5 3 ...
## $ YearsWithCurrManager : int 0 3 7 0 8 5 11 9 4 0 ...
## $ EnvironmentSatisfaction: Factor w/ 4 levels "1","2","3","4": 3 2 3 1 2 2 3 1 4 4 ...
## $ JobSatisfaction : Factor w/ 4 levels "1","2","3","4": 4 2 2 3 4 1 4 2 4 3 ...
## $ BusinessTravel_1 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ BusinessTravel_2 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ BusinessTravel_3 : int 1 0 1 1 1 0 1 0 1 1 ...
#remove variables
train2 <- dummy_cols(train, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
remove_selected_columns = TRUE)
valid2 <- dummy_cols(valid, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
remove_selected_columns = TRUE)
test2 <- dummy_cols(valid, select_columns = c("Education", "JobLevel", "EnvironmentSatisfaction", "JobSatisfaction", "BusinessTravel", "MaritalStatus", "Gender"),
remove_selected_columns = TRUE)
str(train2)
## 'data.frame': 2649 obs. of 35 variables:
## $ Age : int 51 32 46 28 31 25 36 47 28 21 ...
## $ Attrition : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...
## $ DistanceFromHome : int 6 17 8 11 1 7 28 1 1 3 ...
## $ Income : int 131160 193280 40710 58130 20440 134640 33770 57620 25920 42130 ...
## $ NumCompaniesWorked : int 1 1 3 2 0 1 0 1 1 1 ...
## $ TotalWorkingYears : int 1 5 28 5 10 6 16 10 5 3 ...
## $ TrainingTimesLastYear : int 6 2 5 2 2 2 2 4 2 3 ...
## $ YearsAtCompany : int 1 5 7 0 9 6 15 10 5 3 ...
## $ YearsWithCurrManager : int 0 3 7 0 8 5 11 9 4 0 ...
## $ Education_1 : int 0 0 0 0 0 0 1 1 0 0 ...
## $ Education_2 : int 1 0 0 1 0 0 0 0 0 1 ...
## $ Education_3 : int 0 0 1 0 1 0 0 0 1 0 ...
## $ Education_4 : int 0 1 0 0 0 1 0 0 0 0 ...
## $ Education_5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ JobLevel_1 : int 1 0 0 0 0 0 1 1 1 1 ...
## $ JobLevel_2 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ JobLevel_3 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ JobLevel_4 : int 0 1 1 0 0 1 0 0 0 0 ...
## $ JobLevel_5 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ EnvironmentSatisfaction_1: int 0 0 0 1 0 0 0 1 0 0 ...
## $ EnvironmentSatisfaction_2: int 0 1 0 0 1 1 0 0 0 0 ...
## $ EnvironmentSatisfaction_3: int 1 0 1 0 0 0 1 0 0 0 ...
## $ EnvironmentSatisfaction_4: int 0 0 0 0 0 0 0 0 1 1 ...
## $ JobSatisfaction_1 : int 0 0 0 0 0 1 0 0 0 0 ...
## $ JobSatisfaction_2 : int 0 1 1 0 0 0 0 1 0 0 ...
## $ JobSatisfaction_3 : int 0 0 0 1 0 0 0 0 0 1 ...
## $ JobSatisfaction_4 : int 1 0 0 0 1 0 1 0 1 0 ...
## $ BusinessTravel_1 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ BusinessTravel_2 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ BusinessTravel_3 : int 1 0 1 1 1 0 1 0 1 1 ...
## $ MaritalStatus_1 : int 0 0 0 0 0 1 0 0 0 0 ...
## $ MaritalStatus_2 : int 1 1 1 0 1 0 1 1 1 0 ...
## $ MaritalStatus_3 : int 0 0 0 1 0 0 0 0 0 1 ...
## $ Gender_1 : int 1 0 1 0 0 1 0 0 0 0 ...
## $ Gender_2 : int 0 1 0 1 1 0 1 1 1 1 ...
train_nn = train2 %>%
mutate(Attrition = as.integer(Attrition))
test_nn = test2 %>%
mutate(Attrition = as.integer(Attrition))
valid_nn = valid2 %>%
mutate(Attrition = as.integer(Attrition))
glimpse(train_nn)
## Rows: 2,649
## Columns: 35
## $ Age <int> 51, 32, 46, 28, 31, 25, 36, 47, 28, 21, 37, ~
## $ Attrition <int> 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2,~
## $ DistanceFromHome <int> 6, 17, 8, 11, 1, 7, 28, 1, 1, 3, 1, 7, 9, 5,~
## $ Income <int> 131160, 193280, 40710, 58130, 20440, 134640,~
## $ NumCompaniesWorked <int> 1, 1, 3, 2, 0, 1, 0, 1, 1, 1, 2, 7, 1, 1, 3,~
## $ TotalWorkingYears <int> 1, 5, 28, 5, 10, 6, 16, 10, 5, 3, 15, 10, 5,~
## $ TrainingTimesLastYear <int> 6, 2, 5, 2, 2, 2, 2, 4, 2, 3, 2, 5, 3, 2, 2,~
## $ YearsAtCompany <int> 1, 5, 7, 0, 9, 6, 15, 10, 5, 3, 5, 7, 5, 17,~
## $ YearsWithCurrManager <int> 0, 3, 7, 0, 8, 5, 11, 9, 4, 0, 2, 2, 3, 7, 0~
## $ Education_1 <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,~
## $ Education_2 <int> 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,~
## $ Education_3 <int> 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ Education_4 <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ Education_5 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1 <int> 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,~
## $ JobLevel_2 <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,~
## $ JobLevel_3 <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_4 <int> 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_5 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_4 <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,~
## $ JobSatisfaction_1 <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobSatisfaction_2 <int> 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,~
## $ JobSatisfaction_3 <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,~
## $ JobSatisfaction_4 <int> 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,~
## $ BusinessTravel_1 <int> 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,~
## $ BusinessTravel_2 <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,~
## $ BusinessTravel_3 <int> 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,~
## $ MaritalStatus_1 <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,~
## $ MaritalStatus_2 <int> 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,~
## $ MaritalStatus_3 <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ Gender_1 <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ Gender_2 <int> 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,~
glimpse(test_nn)
## Rows: 895
## Columns: 35
## $ Age <int> 38, 32, 29, 45, 37, 38, 26, 42, 26, 28, 38, ~
## $ Attrition <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,~
## $ DistanceFromHome <int> 2, 10, 18, 17, 1, 8, 1, 4, 4, 7, 5, 1, 7, 13~
## $ Income <int> 83210, 23420, 31430, 79910, 53460, 68700, 10~
## $ NumCompaniesWorked <int> 3, 4, 2, 0, 4, 1, 1, 1, 2, 1, 3, 7, 7, 1, 1,~
## $ TotalWorkingYears <int> 13, 9, 10, 21, 7, 8, 6, 9, 5, 5, 19, 25, 10,~
## $ TrainingTimesLastYear <int> 5, 2, 2, 2, 2, 5, 3, 4, 5, 6, 4, 2, 2, 3, 5,~
## $ YearsAtCompany <int> 8, 6, 0, 20, 5, 8, 6, 20, 3, 5, 10, 7, 7, 10~
## $ YearsWithCurrManager <int> 5, 4, 0, 10, 1, 7, 4, 6, 2, 2, 1, 7, 7, 9, 9~
## $ Education_1 <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ Education_2 <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~
## $ Education_3 <int> 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,~
## $ Education_4 <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ Education_5 <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1 <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,~
## $ JobLevel_2 <int> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobLevel_3 <int> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,~
## $ JobLevel_4 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,~
## $ JobLevel_5 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,~
## $ EnvironmentSatisfaction_4 <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ JobSatisfaction_1 <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobSatisfaction_2 <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,~
## $ JobSatisfaction_3 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,~
## $ JobSatisfaction_4 <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,~
## $ BusinessTravel_1 <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ BusinessTravel_2 <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ BusinessTravel_3 <int> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,~
## $ MaritalStatus_1 <int> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,~
## $ MaritalStatus_2 <int> 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,~
## $ MaritalStatus_3 <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,~
## $ Gender_1 <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,~
## $ Gender_2 <int> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,~
glimpse(valid_nn)
## Rows: 895
## Columns: 35
## $ Age <int> 38, 32, 29, 45, 37, 38, 26, 42, 26, 28, 38, ~
## $ Attrition <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,~
## $ DistanceFromHome <int> 2, 10, 18, 17, 1, 8, 1, 4, 4, 7, 5, 1, 7, 13~
## $ Income <int> 83210, 23420, 31430, 79910, 53460, 68700, 10~
## $ NumCompaniesWorked <int> 3, 4, 2, 0, 4, 1, 1, 1, 2, 1, 3, 7, 7, 1, 1,~
## $ TotalWorkingYears <int> 13, 9, 10, 21, 7, 8, 6, 9, 5, 5, 19, 25, 10,~
## $ TrainingTimesLastYear <int> 5, 2, 2, 2, 2, 5, 3, 4, 5, 6, 4, 2, 2, 3, 5,~
## $ YearsAtCompany <int> 8, 6, 0, 20, 5, 8, 6, 20, 3, 5, 10, 7, 7, 10~
## $ YearsWithCurrManager <int> 5, 4, 0, 10, 1, 7, 4, 6, 2, 2, 1, 7, 7, 9, 9~
## $ Education_1 <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ Education_2 <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~
## $ Education_3 <int> 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,~
## $ Education_4 <int> 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ Education_5 <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobLevel_1 <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,~
## $ JobLevel_2 <int> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,~
## $ JobLevel_3 <int> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,~
## $ JobLevel_4 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,~
## $ JobLevel_5 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_1 <int> 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,~
## $ EnvironmentSatisfaction_2 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,~
## $ EnvironmentSatisfaction_3 <int> 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,~
## $ EnvironmentSatisfaction_4 <int> 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,~
## $ JobSatisfaction_1 <int> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ JobSatisfaction_2 <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,~
## $ JobSatisfaction_3 <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,~
## $ JobSatisfaction_4 <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,~
## $ BusinessTravel_1 <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ BusinessTravel_2 <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,~
## $ BusinessTravel_3 <int> 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,~
## $ MaritalStatus_1 <int> 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,~
## $ MaritalStatus_2 <int> 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,~
## $ MaritalStatus_3 <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,~
## $ Gender_1 <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,~
## $ Gender_2 <int> 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,~
scale01 = function(x)((x-min(x))/(max(x)-min(x)))
train_nn_scale = train_nn %>% mutate_all(scale01)
test_nn_scale = test_nn %>% mutate_all(scale01)
valid_nn_scale = valid_nn %>% mutate_all(scale01)
# Tuning with different activation functions
set.seed(2890)
emp_nn6 <- neuralnet(Attrition ~., data=train_nn_scale,
hidden = c(12, 12), linear.output=FALSE,
threshold=0.01,
act.fct = "logistic")
plot(emp_nn6)
emp_nn6$result.matrix
## [,1]
## error 39.018757127
## reached.threshold 0.009755535
## steps 940.000000000
## Intercept.to.1layhid1 0.434740539
## Age.to.1layhid1 1.389763989
## DistanceFromHome.to.1layhid1 -1.026027556
## Income.to.1layhid1 -0.044232059
## NumCompaniesWorked.to.1layhid1 -1.958066159
## TotalWorkingYears.to.1layhid1 -3.075845950
## TrainingTimesLastYear.to.1layhid1 4.522248122
## YearsAtCompany.to.1layhid1 -3.835486953
## YearsWithCurrManager.to.1layhid1 3.067150823
## Education_1.to.1layhid1 4.228482476
## Education_2.to.1layhid1 0.208646372
## Education_3.to.1layhid1 -0.702147547
## Education_4.to.1layhid1 0.540708184
## Education_5.to.1layhid1 1.555915522
## JobLevel_1.to.1layhid1 -0.571056763
## JobLevel_2.to.1layhid1 0.189275705
## JobLevel_3.to.1layhid1 3.442092700
## JobLevel_4.to.1layhid1 5.648673629
## JobLevel_5.to.1layhid1 -2.171466778
## EnvironmentSatisfaction_1.to.1layhid1 -1.249579058
## EnvironmentSatisfaction_2.to.1layhid1 1.111057783
## EnvironmentSatisfaction_3.to.1layhid1 -0.073886489
## EnvironmentSatisfaction_4.to.1layhid1 0.979889042
## JobSatisfaction_1.to.1layhid1 -1.324995008
## JobSatisfaction_2.to.1layhid1 0.915169760
## JobSatisfaction_3.to.1layhid1 6.933164985
## JobSatisfaction_4.to.1layhid1 3.176353198
## BusinessTravel_1.to.1layhid1 0.829675033
## BusinessTravel_2.to.1layhid1 39.035199614
## BusinessTravel_3.to.1layhid1 -0.083371797
## MaritalStatus_1.to.1layhid1 -0.131945136
## MaritalStatus_2.to.1layhid1 2.459167058
## MaritalStatus_3.to.1layhid1 -0.629119674
## Gender_1.to.1layhid1 -0.604408325
## Gender_2.to.1layhid1 -1.238266308
## Intercept.to.1layhid2 1.599980629
## Age.to.1layhid2 6.843607867
## DistanceFromHome.to.1layhid2 -2.602525535
## Income.to.1layhid2 -1.041364098
## NumCompaniesWorked.to.1layhid2 -4.876330980
## TotalWorkingYears.to.1layhid2 2.167928191
## TrainingTimesLastYear.to.1layhid2 4.732866792
## YearsAtCompany.to.1layhid2 -0.744365696
## YearsWithCurrManager.to.1layhid2 -5.222960727
## Education_1.to.1layhid2 14.170853736
## Education_2.to.1layhid2 7.939741164
## Education_3.to.1layhid2 -1.551077723
## Education_4.to.1layhid2 -1.173125950
## Education_5.to.1layhid2 -1.080287816
## JobLevel_1.to.1layhid2 5.287624474
## JobLevel_2.to.1layhid2 -0.442085780
## JobLevel_3.to.1layhid2 -9.619869686
## JobLevel_4.to.1layhid2 -7.611922970
## JobLevel_5.to.1layhid2 7.286342957
## EnvironmentSatisfaction_1.to.1layhid2 -2.483204989
## EnvironmentSatisfaction_2.to.1layhid2 -0.296373767
## EnvironmentSatisfaction_3.to.1layhid2 6.009044811
## EnvironmentSatisfaction_4.to.1layhid2 -1.448770997
## JobSatisfaction_1.to.1layhid2 -1.491467575
## JobSatisfaction_2.to.1layhid2 -5.527640415
## JobSatisfaction_3.to.1layhid2 8.556758168
## JobSatisfaction_4.to.1layhid2 43.989077463
## BusinessTravel_1.to.1layhid2 11.693643301
## BusinessTravel_2.to.1layhid2 2.092818043
## BusinessTravel_3.to.1layhid2 -0.512488195
## MaritalStatus_1.to.1layhid2 55.623592020
## MaritalStatus_2.to.1layhid2 0.960565223
## MaritalStatus_3.to.1layhid2 0.453408509
## Gender_1.to.1layhid2 -3.486940350
## Gender_2.to.1layhid2 4.000614554
## Intercept.to.1layhid3 0.700517892
## Age.to.1layhid3 -0.558053026
## DistanceFromHome.to.1layhid3 4.054490058
## Income.to.1layhid3 -3.711373959
## NumCompaniesWorked.to.1layhid3 -1.167471860
## TotalWorkingYears.to.1layhid3 -0.938465624
## TrainingTimesLastYear.to.1layhid3 -0.011738864
## YearsAtCompany.to.1layhid3 5.137325288
## YearsWithCurrManager.to.1layhid3 2.158099540
## Education_1.to.1layhid3 -5.162445699
## Education_2.to.1layhid3 0.770033494
## Education_3.to.1layhid3 0.326124692
## Education_4.to.1layhid3 1.942581186
## Education_5.to.1layhid3 -2.518913653
## JobLevel_1.to.1layhid3 -0.145393422
## JobLevel_2.to.1layhid3 -0.654723572
## JobLevel_3.to.1layhid3 -0.825744983
## JobLevel_4.to.1layhid3 1.478614271
## JobLevel_5.to.1layhid3 -2.514700156
## EnvironmentSatisfaction_1.to.1layhid3 2.130042973
## EnvironmentSatisfaction_2.to.1layhid3 0.514606936
## EnvironmentSatisfaction_3.to.1layhid3 0.910918145
## EnvironmentSatisfaction_4.to.1layhid3 -0.943206059
## JobSatisfaction_1.to.1layhid3 0.790299259
## JobSatisfaction_2.to.1layhid3 -3.491863885
## JobSatisfaction_3.to.1layhid3 -1.413854517
## JobSatisfaction_4.to.1layhid3 2.791891196
## BusinessTravel_1.to.1layhid3 -0.117834650
## BusinessTravel_2.to.1layhid3 -1.385882026
## BusinessTravel_3.to.1layhid3 0.781585333
## MaritalStatus_1.to.1layhid3 -2.329744842
## MaritalStatus_2.to.1layhid3 -1.063478298
## MaritalStatus_3.to.1layhid3 3.073693229
## Gender_1.to.1layhid3 -3.576824921
## Gender_2.to.1layhid3 2.646360944
## Intercept.to.1layhid4 0.461857461
## Age.to.1layhid4 -3.591764737
## DistanceFromHome.to.1layhid4 0.013229833
## Income.to.1layhid4 -5.230440365
## NumCompaniesWorked.to.1layhid4 1.072333127
## TotalWorkingYears.to.1layhid4 -5.742679925
## TrainingTimesLastYear.to.1layhid4 0.196271965
## YearsAtCompany.to.1layhid4 2.867384123
## YearsWithCurrManager.to.1layhid4 -2.810189961
## Education_1.to.1layhid4 0.342014063
## Education_2.to.1layhid4 -1.160391305
## Education_3.to.1layhid4 -0.436147344
## Education_4.to.1layhid4 -0.053017116
## Education_5.to.1layhid4 -87.248234905
## JobLevel_1.to.1layhid4 0.873180473
## JobLevel_2.to.1layhid4 -0.805332154
## JobLevel_3.to.1layhid4 -2.487648796
## JobLevel_4.to.1layhid4 0.989819565
## JobLevel_5.to.1layhid4 1.253411156
## EnvironmentSatisfaction_1.to.1layhid4 0.114369251
## EnvironmentSatisfaction_2.to.1layhid4 -0.137137213
## EnvironmentSatisfaction_3.to.1layhid4 0.816031777
## EnvironmentSatisfaction_4.to.1layhid4 -60.072056270
## JobSatisfaction_1.to.1layhid4 1.244590905
## JobSatisfaction_2.to.1layhid4 1.780786158
## JobSatisfaction_3.to.1layhid4 0.757845432
## JobSatisfaction_4.to.1layhid4 -2.350116095
## BusinessTravel_1.to.1layhid4 -13.267042613
## BusinessTravel_2.to.1layhid4 1.302671366
## BusinessTravel_3.to.1layhid4 -0.238626918
## MaritalStatus_1.to.1layhid4 0.329444978
## MaritalStatus_2.to.1layhid4 -1.070369959
## MaritalStatus_3.to.1layhid4 0.688978247
## Gender_1.to.1layhid4 -0.931433074
## Gender_2.to.1layhid4 -0.420545575
## Intercept.to.1layhid5 0.837475543
## Age.to.1layhid5 -0.441116460
## DistanceFromHome.to.1layhid5 -2.231849149
## Income.to.1layhid5 4.857567579
## NumCompaniesWorked.to.1layhid5 6.344842381
## TotalWorkingYears.to.1layhid5 -1.762127435
## TrainingTimesLastYear.to.1layhid5 -2.040972735
## YearsAtCompany.to.1layhid5 1.222684681
## YearsWithCurrManager.to.1layhid5 -2.939317625
## Education_1.to.1layhid5 2.806709776
## Education_2.to.1layhid5 -0.172682119
## Education_3.to.1layhid5 1.241585106
## Education_4.to.1layhid5 -3.999001596
## Education_5.to.1layhid5 -0.292421294
## JobLevel_1.to.1layhid5 -0.235747758
## JobLevel_2.to.1layhid5 1.720381636
## JobLevel_3.to.1layhid5 -4.864680337
## JobLevel_4.to.1layhid5 -4.846888479
## JobLevel_5.to.1layhid5 3.719340273
## EnvironmentSatisfaction_1.to.1layhid5 2.973707549
## EnvironmentSatisfaction_2.to.1layhid5 0.663127913
## EnvironmentSatisfaction_3.to.1layhid5 0.838779805
## EnvironmentSatisfaction_4.to.1layhid5 -0.759461185
## JobSatisfaction_1.to.1layhid5 0.165776163
## JobSatisfaction_2.to.1layhid5 1.813314342
## JobSatisfaction_3.to.1layhid5 0.404944440
## JobSatisfaction_4.to.1layhid5 -0.103988522
## BusinessTravel_1.to.1layhid5 -7.701689140
## BusinessTravel_2.to.1layhid5 0.055895501
## BusinessTravel_3.to.1layhid5 0.143354779
## MaritalStatus_1.to.1layhid5 -3.227951064
## MaritalStatus_2.to.1layhid5 9.628304882
## MaritalStatus_3.to.1layhid5 0.061680647
## Gender_1.to.1layhid5 -2.088948656
## Gender_2.to.1layhid5 -0.678661752
## Intercept.to.1layhid6 0.474400937
## Age.to.1layhid6 -1.629109757
## DistanceFromHome.to.1layhid6 0.910634092
## Income.to.1layhid6 -1.273013521
## NumCompaniesWorked.to.1layhid6 -3.216942043
## TotalWorkingYears.to.1layhid6 -2.935760067
## TrainingTimesLastYear.to.1layhid6 0.809014985
## YearsAtCompany.to.1layhid6 0.370145954
## YearsWithCurrManager.to.1layhid6 -4.333245437
## Education_1.to.1layhid6 0.918784646
## Education_2.to.1layhid6 -1.762875235
## Education_3.to.1layhid6 -3.174130538
## Education_4.to.1layhid6 1.592934225
## Education_5.to.1layhid6 2.214149554
## JobLevel_1.to.1layhid6 0.431818953
## JobLevel_2.to.1layhid6 -0.846082599
## JobLevel_3.to.1layhid6 -1.414199148
## JobLevel_4.to.1layhid6 -0.557306305
## JobLevel_5.to.1layhid6 0.520577737
## EnvironmentSatisfaction_1.to.1layhid6 -1.036591264
## EnvironmentSatisfaction_2.to.1layhid6 -1.031896502
## EnvironmentSatisfaction_3.to.1layhid6 -0.470906038
## EnvironmentSatisfaction_4.to.1layhid6 -1.764328390
## JobSatisfaction_1.to.1layhid6 0.859664034
## JobSatisfaction_2.to.1layhid6 0.700416770
## JobSatisfaction_3.to.1layhid6 -4.078520854
## JobSatisfaction_4.to.1layhid6 -3.582747876
## BusinessTravel_1.to.1layhid6 2.188763156
## BusinessTravel_2.to.1layhid6 2.238974648
## BusinessTravel_3.to.1layhid6 -7.835396713
## MaritalStatus_1.to.1layhid6 -3.519891625
## MaritalStatus_2.to.1layhid6 -3.362365783
## MaritalStatus_3.to.1layhid6 0.510597889
## Gender_1.to.1layhid6 -1.600404526
## Gender_2.to.1layhid6 0.018397842
## Intercept.to.1layhid7 -0.394239528
## Age.to.1layhid7 -5.862008899
## DistanceFromHome.to.1layhid7 12.271565721
## Income.to.1layhid7 1.409925262
## NumCompaniesWorked.to.1layhid7 7.755818447
## TotalWorkingYears.to.1layhid7 7.322579546
## TrainingTimesLastYear.to.1layhid7 3.501640760
## YearsAtCompany.to.1layhid7 -4.986516607
## YearsWithCurrManager.to.1layhid7 8.138612483
## Education_1.to.1layhid7 -8.852859223
## Education_2.to.1layhid7 -4.884783180
## Education_3.to.1layhid7 0.036036320
## Education_4.to.1layhid7 4.161931907
## Education_5.to.1layhid7 -4.528991574
## JobLevel_1.to.1layhid7 -2.210118429
## JobLevel_2.to.1layhid7 0.565900903
## JobLevel_3.to.1layhid7 0.163878770
## JobLevel_4.to.1layhid7 -0.222870532
## JobLevel_5.to.1layhid7 -2.914994638
## EnvironmentSatisfaction_1.to.1layhid7 5.893181793
## EnvironmentSatisfaction_2.to.1layhid7 49.462151972
## EnvironmentSatisfaction_3.to.1layhid7 -3.735164588
## EnvironmentSatisfaction_4.to.1layhid7 0.518647471
## JobSatisfaction_1.to.1layhid7 10.544979717
## JobSatisfaction_2.to.1layhid7 -3.863232891
## JobSatisfaction_3.to.1layhid7 -1.940030973
## JobSatisfaction_4.to.1layhid7 -0.052280881
## BusinessTravel_1.to.1layhid7 -8.686987152
## BusinessTravel_2.to.1layhid7 -9.320081162
## BusinessTravel_3.to.1layhid7 1.199220609
## MaritalStatus_1.to.1layhid7 3.505475207
## MaritalStatus_2.to.1layhid7 0.276291631
## MaritalStatus_3.to.1layhid7 -1.014054205
## Gender_1.to.1layhid7 -1.339371497
## Gender_2.to.1layhid7 1.420611739
## Intercept.to.1layhid8 0.046896415
## Age.to.1layhid8 1.205132426
## DistanceFromHome.to.1layhid8 -0.436394495
## Income.to.1layhid8 -0.104943040
## NumCompaniesWorked.to.1layhid8 1.904752374
## TotalWorkingYears.to.1layhid8 0.194911594
## TrainingTimesLastYear.to.1layhid8 -1.550909772
## YearsAtCompany.to.1layhid8 -5.212371517
## YearsWithCurrManager.to.1layhid8 -1.258501392
## Education_1.to.1layhid8 -0.222054459
## Education_2.to.1layhid8 -0.862559565
## Education_3.to.1layhid8 -0.645586096
## Education_4.to.1layhid8 0.996638029
## Education_5.to.1layhid8 34.072031510
## JobLevel_1.to.1layhid8 1.129282693
## JobLevel_2.to.1layhid8 -0.272377605
## JobLevel_3.to.1layhid8 -2.710779209
## JobLevel_4.to.1layhid8 1.071101183
## JobLevel_5.to.1layhid8 -36.317396882
## EnvironmentSatisfaction_1.to.1layhid8 -0.473694805
## EnvironmentSatisfaction_2.to.1layhid8 1.721545514
## EnvironmentSatisfaction_3.to.1layhid8 -2.284525743
## EnvironmentSatisfaction_4.to.1layhid8 -0.714960128
## JobSatisfaction_1.to.1layhid8 1.011446643
## JobSatisfaction_2.to.1layhid8 -86.466874515
## JobSatisfaction_3.to.1layhid8 0.979183120
## JobSatisfaction_4.to.1layhid8 -1.029221731
## BusinessTravel_1.to.1layhid8 -1.279988438
## BusinessTravel_2.to.1layhid8 0.009906306
## BusinessTravel_3.to.1layhid8 -1.618318785
## MaritalStatus_1.to.1layhid8 0.370750809
## MaritalStatus_2.to.1layhid8 -0.262334441
## MaritalStatus_3.to.1layhid8 -0.615673668
## Gender_1.to.1layhid8 0.640481095
## Gender_2.to.1layhid8 -0.501681840
## Intercept.to.1layhid9 0.004553357
## Age.to.1layhid9 4.175136709
## DistanceFromHome.to.1layhid9 2.962993113
## Income.to.1layhid9 -7.521437723
## NumCompaniesWorked.to.1layhid9 1.022792255
## TotalWorkingYears.to.1layhid9 0.231663415
## TrainingTimesLastYear.to.1layhid9 0.231584110
## YearsAtCompany.to.1layhid9 -1.755353770
## YearsWithCurrManager.to.1layhid9 1.649556483
## Education_1.to.1layhid9 2.233546740
## Education_2.to.1layhid9 1.291078581
## Education_3.to.1layhid9 0.403775915
## Education_4.to.1layhid9 2.211539333
## Education_5.to.1layhid9 6.302040848
## JobLevel_1.to.1layhid9 0.611993439
## JobLevel_2.to.1layhid9 -0.194844583
## JobLevel_3.to.1layhid9 -7.990291589
## JobLevel_4.to.1layhid9 1.464760940
## JobLevel_5.to.1layhid9 -1.124952230
## EnvironmentSatisfaction_1.to.1layhid9 -2.113730716
## EnvironmentSatisfaction_2.to.1layhid9 1.346890926
## EnvironmentSatisfaction_3.to.1layhid9 -0.664987308
## EnvironmentSatisfaction_4.to.1layhid9 -0.622473612
## JobSatisfaction_1.to.1layhid9 6.800359599
## JobSatisfaction_2.to.1layhid9 -2.961554769
## JobSatisfaction_3.to.1layhid9 -1.038842113
## JobSatisfaction_4.to.1layhid9 0.223550454
## BusinessTravel_1.to.1layhid9 3.210765243
## BusinessTravel_2.to.1layhid9 1.295864855
## BusinessTravel_3.to.1layhid9 -0.989724932
## MaritalStatus_1.to.1layhid9 -2.974856965
## MaritalStatus_2.to.1layhid9 0.251519599
## MaritalStatus_3.to.1layhid9 -1.152112580
## Gender_1.to.1layhid9 -6.670298060
## Gender_2.to.1layhid9 1.188025142
## Intercept.to.1layhid10 1.252096252
## Age.to.1layhid10 -1.218872612
## DistanceFromHome.to.1layhid10 -1.941790801
## Income.to.1layhid10 -2.452922472
## NumCompaniesWorked.to.1layhid10 -5.071689746
## TotalWorkingYears.to.1layhid10 5.091282816
## TrainingTimesLastYear.to.1layhid10 -0.535692910
## YearsAtCompany.to.1layhid10 14.554081620
## YearsWithCurrManager.to.1layhid10 6.082110536
## Education_1.to.1layhid10 9.332496385
## Education_2.to.1layhid10 1.442109196
## Education_3.to.1layhid10 -2.285398658
## Education_4.to.1layhid10 3.567799305
## Education_5.to.1layhid10 3.877699803
## JobLevel_1.to.1layhid10 6.444811455
## JobLevel_2.to.1layhid10 -2.762630795
## JobLevel_3.to.1layhid10 -1.376472002
## JobLevel_4.to.1layhid10 0.293026004
## JobLevel_5.to.1layhid10 52.575193940
## EnvironmentSatisfaction_1.to.1layhid10 2.078864662
## EnvironmentSatisfaction_2.to.1layhid10 -2.572669213
## EnvironmentSatisfaction_3.to.1layhid10 0.434968962
## EnvironmentSatisfaction_4.to.1layhid10 -4.041986274
## JobSatisfaction_1.to.1layhid10 -0.485626009
## JobSatisfaction_2.to.1layhid10 2.758962309
## JobSatisfaction_3.to.1layhid10 -0.254162971
## JobSatisfaction_4.to.1layhid10 -7.205131520
## BusinessTravel_1.to.1layhid10 8.937612186
## BusinessTravel_2.to.1layhid10 1.230306651
## BusinessTravel_3.to.1layhid10 0.229043167
## MaritalStatus_1.to.1layhid10 0.708152292
## MaritalStatus_2.to.1layhid10 -2.804196265
## MaritalStatus_3.to.1layhid10 1.015336884
## Gender_1.to.1layhid10 -0.168371850
## Gender_2.to.1layhid10 -0.184478373
## Intercept.to.1layhid11 -0.727410012
## Age.to.1layhid11 -1.422950961
## DistanceFromHome.to.1layhid11 -0.023811446
## Income.to.1layhid11 -1.125448285
## NumCompaniesWorked.to.1layhid11 2.622412831
## TotalWorkingYears.to.1layhid11 -1.803563938
## TrainingTimesLastYear.to.1layhid11 1.791582889
## YearsAtCompany.to.1layhid11 4.445202788
## YearsWithCurrManager.to.1layhid11 -5.751107481
## Education_1.to.1layhid11 -20.543100260
## Education_2.to.1layhid11 1.920818169
## Education_3.to.1layhid11 1.106227228
## Education_4.to.1layhid11 0.136654649
## Education_5.to.1layhid11 -4.460647709
## JobLevel_1.to.1layhid11 0.515429197
## JobLevel_2.to.1layhid11 0.120245522
## JobLevel_3.to.1layhid11 -0.601765136
## JobLevel_4.to.1layhid11 1.661226109
## JobLevel_5.to.1layhid11 -89.648634602
## EnvironmentSatisfaction_1.to.1layhid11 1.014351719
## EnvironmentSatisfaction_2.to.1layhid11 -2.403109005
## EnvironmentSatisfaction_3.to.1layhid11 -0.556491289
## EnvironmentSatisfaction_4.to.1layhid11 1.728526300
## JobSatisfaction_1.to.1layhid11 0.115280535
## JobSatisfaction_2.to.1layhid11 -5.077145212
## JobSatisfaction_3.to.1layhid11 -0.636504588
## JobSatisfaction_4.to.1layhid11 -1.375346521
## BusinessTravel_1.to.1layhid11 1.384742820
## BusinessTravel_2.to.1layhid11 1.327353341
## BusinessTravel_3.to.1layhid11 -0.949166236
## MaritalStatus_1.to.1layhid11 1.656053266
## MaritalStatus_2.to.1layhid11 -0.291433769
## MaritalStatus_3.to.1layhid11 -0.053767533
## Gender_1.to.1layhid11 -2.149311013
## Gender_2.to.1layhid11 1.652788593
## Intercept.to.1layhid12 0.358174281
## Age.to.1layhid12 3.730811217
## DistanceFromHome.to.1layhid12 -1.525453303
## Income.to.1layhid12 2.163734690
## NumCompaniesWorked.to.1layhid12 1.152214256
## TotalWorkingYears.to.1layhid12 7.327969300
## TrainingTimesLastYear.to.1layhid12 -0.045165918
## YearsAtCompany.to.1layhid12 -3.063124869
## YearsWithCurrManager.to.1layhid12 -9.479770369
## Education_1.to.1layhid12 -6.724594699
## Education_2.to.1layhid12 0.359769869
## Education_3.to.1layhid12 16.233527399
## Education_4.to.1layhid12 0.437973375
## Education_5.to.1layhid12 0.890020785
## JobLevel_1.to.1layhid12 1.045179812
## JobLevel_2.to.1layhid12 -0.633047868
## JobLevel_3.to.1layhid12 -0.769159638
## JobLevel_4.to.1layhid12 45.696529509
## JobLevel_5.to.1layhid12 -6.439470580
## EnvironmentSatisfaction_1.to.1layhid12 0.793415601
## EnvironmentSatisfaction_2.to.1layhid12 -1.621695767
## EnvironmentSatisfaction_3.to.1layhid12 0.328387421
## EnvironmentSatisfaction_4.to.1layhid12 -0.136245747
## JobSatisfaction_1.to.1layhid12 1.254265774
## JobSatisfaction_2.to.1layhid12 -1.858892771
## JobSatisfaction_3.to.1layhid12 -1.812411788
## JobSatisfaction_4.to.1layhid12 -1.713577137
## BusinessTravel_1.to.1layhid12 4.899525205
## BusinessTravel_2.to.1layhid12 2.965989563
## BusinessTravel_3.to.1layhid12 0.656410156
## MaritalStatus_1.to.1layhid12 -0.178455232
## MaritalStatus_2.to.1layhid12 0.535580011
## MaritalStatus_3.to.1layhid12 -1.428465097
## Gender_1.to.1layhid12 -1.614100305
## Gender_2.to.1layhid12 0.877750989
## Intercept.to.2layhid1 0.057815998
## 1layhid1.to.2layhid1 0.674569700
## 1layhid2.to.2layhid1 -0.587130490
## 1layhid3.to.2layhid1 -0.825550995
## 1layhid4.to.2layhid1 -7.553679539
## 1layhid5.to.2layhid1 -2.182153269
## 1layhid6.to.2layhid1 -2.466232065
## 1layhid7.to.2layhid1 3.824892539
## 1layhid8.to.2layhid1 0.292636130
## 1layhid9.to.2layhid1 15.084252184
## 1layhid10.to.2layhid1 1.224552960
## 1layhid11.to.2layhid1 -2.692936821
## 1layhid12.to.2layhid1 -0.240378758
## Intercept.to.2layhid2 1.222936913
## 1layhid1.to.2layhid2 -2.075862218
## 1layhid2.to.2layhid2 0.009488240
## 1layhid3.to.2layhid2 -0.441697573
## 1layhid4.to.2layhid2 2.771248429
## 1layhid5.to.2layhid2 -0.165124762
## 1layhid6.to.2layhid2 6.602841301
## 1layhid7.to.2layhid2 0.938749223
## 1layhid8.to.2layhid2 1.804913726
## 1layhid9.to.2layhid2 0.039935305
## 1layhid10.to.2layhid2 -1.918654046
## 1layhid11.to.2layhid2 2.415135721
## 1layhid12.to.2layhid2 -1.836166652
## Intercept.to.2layhid3 -0.229977002
## 1layhid1.to.2layhid3 1.652848603
## 1layhid2.to.2layhid3 1.174945298
## 1layhid3.to.2layhid3 -0.503708650
## 1layhid4.to.2layhid3 -3.864295244
## 1layhid5.to.2layhid3 -1.843449649
## 1layhid6.to.2layhid3 -5.392544988
## 1layhid7.to.2layhid3 0.590437636
## 1layhid8.to.2layhid3 -1.391825631
## 1layhid9.to.2layhid3 0.919346699
## 1layhid10.to.2layhid3 0.513684171
## 1layhid11.to.2layhid3 -0.576769639
## 1layhid12.to.2layhid3 -0.003670397
## Intercept.to.2layhid4 1.348372379
## 1layhid1.to.2layhid4 1.551474242
## 1layhid2.to.2layhid4 1.858382160
## 1layhid3.to.2layhid4 1.419966215
## 1layhid4.to.2layhid4 -5.777316038
## 1layhid5.to.2layhid4 -3.071313438
## 1layhid6.to.2layhid4 -1.932326533
## 1layhid7.to.2layhid4 1.546759445
## 1layhid8.to.2layhid4 -3.425978432
## 1layhid9.to.2layhid4 -2.398879893
## 1layhid10.to.2layhid4 -1.257115148
## 1layhid11.to.2layhid4 -1.771480993
## 1layhid12.to.2layhid4 1.911615455
## Intercept.to.2layhid5 1.766089959
## 1layhid1.to.2layhid5 -0.329230607
## 1layhid2.to.2layhid5 1.538846820
## 1layhid3.to.2layhid5 -4.202986015
## 1layhid4.to.2layhid5 -3.864286126
## 1layhid5.to.2layhid5 -1.818347556
## 1layhid6.to.2layhid5 -1.005359466
## 1layhid7.to.2layhid5 1.723856572
## 1layhid8.to.2layhid5 0.076832497
## 1layhid9.to.2layhid5 -0.645115592
## 1layhid10.to.2layhid5 2.367598012
## 1layhid11.to.2layhid5 0.592196196
## 1layhid12.to.2layhid5 -1.089197342
## Intercept.to.2layhid6 1.274730570
## 1layhid1.to.2layhid6 1.240341643
## 1layhid2.to.2layhid6 -0.313175373
## 1layhid3.to.2layhid6 0.645066119
## 1layhid4.to.2layhid6 -5.677724241
## 1layhid5.to.2layhid6 -1.421422837
## 1layhid6.to.2layhid6 -8.180485860
## 1layhid7.to.2layhid6 -0.008429381
## 1layhid8.to.2layhid6 -1.449470009
## 1layhid9.to.2layhid6 0.967375251
## 1layhid10.to.2layhid6 0.180707619
## 1layhid11.to.2layhid6 -0.915076111
## 1layhid12.to.2layhid6 0.143130101
## Intercept.to.2layhid7 1.004351658
## 1layhid1.to.2layhid7 -1.146536406
## 1layhid2.to.2layhid7 -1.355934125
## 1layhid3.to.2layhid7 0.799850254
## 1layhid4.to.2layhid7 -0.604847228
## 1layhid5.to.2layhid7 0.933446389
## 1layhid6.to.2layhid7 1.768515139
## 1layhid7.to.2layhid7 0.335922213
## 1layhid8.to.2layhid7 1.110871090
## 1layhid9.to.2layhid7 1.681607074
## 1layhid10.to.2layhid7 -0.322476176
## 1layhid11.to.2layhid7 -0.878984023
## 1layhid12.to.2layhid7 -2.607971805
## Intercept.to.2layhid8 -0.292592130
## 1layhid1.to.2layhid8 -1.840614642
## 1layhid2.to.2layhid8 0.977641797
## 1layhid3.to.2layhid8 -1.884970580
## 1layhid4.to.2layhid8 -6.572789519
## 1layhid5.to.2layhid8 -0.672038777
## 1layhid6.to.2layhid8 -1.175080809
## 1layhid7.to.2layhid8 1.763570440
## 1layhid8.to.2layhid8 -1.653196500
## 1layhid9.to.2layhid8 8.608387238
## 1layhid10.to.2layhid8 1.485866934
## 1layhid11.to.2layhid8 0.084389728
## 1layhid12.to.2layhid8 0.694561184
## Intercept.to.2layhid9 1.859122552
## 1layhid1.to.2layhid9 -2.242048963
## 1layhid2.to.2layhid9 0.311373369
## 1layhid3.to.2layhid9 -0.408972629
## 1layhid4.to.2layhid9 2.654800123
## 1layhid5.to.2layhid9 1.373827975
## 1layhid6.to.2layhid9 2.165314430
## 1layhid7.to.2layhid9 -0.738787464
## 1layhid8.to.2layhid9 1.597292239
## 1layhid9.to.2layhid9 0.235448926
## 1layhid10.to.2layhid9 -1.059953638
## 1layhid11.to.2layhid9 -1.358326117
## 1layhid12.to.2layhid9 0.131296088
## Intercept.to.2layhid10 -0.228529330
## 1layhid1.to.2layhid10 -0.639343962
## 1layhid2.to.2layhid10 -1.170062748
## 1layhid3.to.2layhid10 5.491049169
## 1layhid4.to.2layhid10 18.871623170
## 1layhid5.to.2layhid10 0.187857362
## 1layhid6.to.2layhid10 48.210103555
## 1layhid7.to.2layhid10 -2.741781319
## 1layhid8.to.2layhid10 2.279345023
## 1layhid9.to.2layhid10 -1.764126490
## 1layhid10.to.2layhid10 60.983531482
## 1layhid11.to.2layhid10 1.791077135
## 1layhid12.to.2layhid10 -2.154817861
## Intercept.to.2layhid11 0.310184621
## 1layhid1.to.2layhid11 0.929315128
## 1layhid2.to.2layhid11 1.733396535
## 1layhid3.to.2layhid11 -1.738477589
## 1layhid4.to.2layhid11 -0.461068387
## 1layhid5.to.2layhid11 -3.026813295
## 1layhid6.to.2layhid11 -10.374164168
## 1layhid7.to.2layhid11 2.396163495
## 1layhid8.to.2layhid11 -2.170183053
## 1layhid9.to.2layhid11 2.098585031
## 1layhid10.to.2layhid11 0.343034781
## 1layhid11.to.2layhid11 -1.645528101
## 1layhid12.to.2layhid11 0.164578193
## Intercept.to.2layhid12 -0.819019587
## 1layhid1.to.2layhid12 1.244423810
## 1layhid2.to.2layhid12 3.253053457
## 1layhid3.to.2layhid12 -4.464585978
## 1layhid4.to.2layhid12 -19.421357033
## 1layhid5.to.2layhid12 0.774749485
## 1layhid6.to.2layhid12 -0.471638330
## 1layhid7.to.2layhid12 -0.009282565
## 1layhid8.to.2layhid12 -1.060261936
## 1layhid9.to.2layhid12 5.951380358
## 1layhid10.to.2layhid12 -2.000668334
## 1layhid11.to.2layhid12 -1.295409172
## 1layhid12.to.2layhid12 0.055991751
## Intercept.to.Attrition 0.836625365
## 2layhid1.to.Attrition -0.569625117
## 2layhid2.to.Attrition 35.783265394
## 2layhid3.to.Attrition -1.803498662
## 2layhid4.to.Attrition -0.898646145
## 2layhid5.to.Attrition -0.452126282
## 2layhid6.to.Attrition -0.227631586
## 2layhid7.to.Attrition 15.900752803
## 2layhid8.to.Attrition -16.665026013
## 2layhid9.to.Attrition 4.799038931
## 2layhid10.to.Attrition 11.821454166
## 2layhid11.to.Attrition -30.114904967
## 2layhid12.to.Attrition -5.471753984
testy = test_nn_scale$Attrition
testx = -(test_nn_scale$Attrition)
empnn6.results <- compute(emp_nn6, test_nn_scale)
results6 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
## actual prediction
## 1 0 1.556773e-13
## 2 0 1.447750e-09
## 3 0 9.999842e-01
## 4 0 5.225753e-12
## 5 0 8.070033e-15
## 6 0 4.543550e-12
#Compute error statistics
resultnnfit6 <- empnn6.results$net.result
results6 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
## actual prediction
## 1 0 1.556773e-13
## 2 0 1.447750e-09
## 3 0 9.999842e-01
## 4 0 5.225753e-12
## 5 0 8.070033e-15
## 6 0 4.543550e-12
RMSE6 = sqrt(sum(abs(test_nn_scale - empnn6.results$net.result))^2)
RMSE6
## [1] 10673.91
resultnnfit6 <- empnn6.results$net.result
resultnnfit6 = ifelse(resultnnfit6>0.5, 1, 0)
confusionMatrix(data = as.factor(resultnnfit6), reference = as.factor(testy),
positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 720 40
## 1 22 113
##
## Accuracy : 0.9307
## 95% CI : (0.9121, 0.9465)
## No Information Rate : 0.8291
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.7436
##
## Mcnemar's Test P-Value : 0.03085
##
## Sensitivity : 0.7386
## Specificity : 0.9704
## Pos Pred Value : 0.8370
## Neg Pred Value : 0.9474
## Prevalence : 0.1709
## Detection Rate : 0.1263
## Detection Prevalence : 0.1508
## Balanced Accuracy : 0.8545
##
## 'Positive' Class : 1
##
#ROC Curve#
library(ROSE)
roc.curve(test_nn_scale$Attrition, empnn6.results$net.result)
## Warning in if (cl <- class(predicted) == "factor" | class(predicted) == : the
## condition has length > 1 and only the first element will be used
## Area under the curve (AUC): 0.835
#ROC
roc_comp <- ROCR::prediction(predictions = empnn6.results$net.result, labels = test_nn_scale$Attrition)
roc_comp <- ROCR::performance(roc_comp , "tpr" , "fpr")
plot(roc_comp,
colorize = TRUE,
print.cutoffs.at= seq(0,1,0.05),
text.adj=c(-0.2,1.7))
Tune the model using backpropagation and learning rate
# Tuning with different activation functions
emp_nn9 <- neuralnet(Attrition ~., data=train_nn_scale,
hidden = c(12, 12), linear.output=FALSE,
threshold=0.01,
act.fct = "logistic",
algorithm = "backprop",
learningrate = .01)
plot(emp_nn9)
testy = test_nn_scale$Attrition
testx = -(test_nn_scale$Attrition)
empnn9.results <- compute(emp_nn9, test_nn_scale)
results9 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn6.results$net.result)
head(results6)
## actual prediction
## 1 0 1.556773e-13
## 2 0 1.447750e-09
## 3 0 9.999842e-01
## 4 0 5.225753e-12
## 5 0 8.070033e-15
## 6 0 4.543550e-12
#Compute error statistics
resultnnfit9 <- empnn9.results$net.result
results9 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn9.results$net.result)
head(results9)
## actual prediction
## 1 0 1.577895e-06
## 2 0 3.074286e-04
## 3 0 1.000000e+00
## 4 0 1.074922e-05
## 5 0 2.284982e-03
## 6 0 2.869911e-08
RMSE9 = sqrt(sum(abs(test_nn_scale - empnn6.results$net.result))^2)
RMSE9
## [1] 10673.91
resultnnfit9 <- empnn9.results$net.result
resultnnfit9 = ifelse(resultnnfit9>0.5, 1, 0)
confusionMatrix(data = as.factor(resultnnfit9), reference = as.factor(testy),
positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 727 30
## 1 15 123
##
## Accuracy : 0.9497
## 95% CI : (0.9333, 0.9631)
## No Information Rate : 0.8291
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8154
##
## Mcnemar's Test P-Value : 0.03689
##
## Sensitivity : 0.8039
## Specificity : 0.9798
## Pos Pred Value : 0.8913
## Neg Pred Value : 0.9604
## Prevalence : 0.1709
## Detection Rate : 0.1374
## Detection Prevalence : 0.1542
## Balanced Accuracy : 0.8919
##
## 'Positive' Class : 1
##
#ROC Curve#
library(ROSE)
roc.curve(test_nn_scale$Attrition, empnn9.results$net.result)
## Warning in if (cl <- class(predicted) == "factor" | class(predicted) == : the
## condition has length > 1 and only the first element will be used
## Area under the curve (AUC): 0.865
#ROC
roc_comp <- ROCR::prediction(predictions = empnn9.results$net.result, labels = test_nn_scale$Attrition)
roc_comp <- ROCR::performance(roc_comp , "tpr" , "fpr")
plot(roc_comp,
colorize = TRUE,
print.cutoffs.at= seq(0,1,0.05),
text.adj=c(-0.2,1.7))
## Attempt number 1
#set.seed(2890)
#emp_nn1 <- neuralnet(Attrition ~ ., data = train_nn_scale, hidden = 1, linear.output=FALSE, threshold=0.01)
#result.matrix, a matrix containing the error, weights between input, hidden and output for each replication
#It also includes reached threshold, needed steps, AIC and BIC computed if likelihood=TRUE).
#Each column represents one replication.
#emp_nn1$result.matrix
#plot(emp_nn1)
#empnn1.results <- compute(emp_nn1, test_nn_scale)
#head(empnn1.results)
#Compute error statistics
#resultnnfit1 <- empnn1.results$net.result
#results <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn1.results$net.result)
#head(results)
#RMSE1 = sqrt(sum(abs(test_nn - empnn1.results$net.result))^2)
#RMSE1
#resultnnfit1 <- empnn1.results$net.result
#resultnnfit1 = ifelse(resultnnfit1>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit1), reference = as.factor(testy),
# positive="1")
#set.seed(2890)
# Tuning with a 3, 2 hidden layer configuration
#emp_nn2 <- neuralnet(Attrition ~ ., data = train_nn_scale, hidden = c(2), linear.output=FALSE, threshold=0.01)
#emp_nn2$result.matrix
#plot(emp_nn2)
#empnn2.results <- compute(emp_nn2, test_nn_scale)
#head(empnn2.results)
#results2 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn2.results$net.result)
#head(results2)
#Compute error statistics
#resultnnfit2 <- empnn2.results$net.result
#results2 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn2.results$net.result)
#head(results2)
#RMSE2 = sqrt(sum(abs(test_nn_scale - empnn2.results$net.result))^2)
#RMSE2
#resultnnfit2 <- empnn2.results$net.result
#resultnnfit2 = ifelse(resultnnfit3>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit2), reference = as.factor(testy),
# positive="1")
# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)
#emp_nn3 <- neuralnet(Attrition ~., data=train_nn_scale,
# hidden = c(4, 1), linear.output=FALSE,
# threshold=0.01,
# act.fct = "logistic")
#plot(emp_nn3)
#empnn3.results <- compute(emp_nn3, test_nn_scale)
#results3 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn3.results$net.result)
#head(results3)
#Compute error statistics
#resultnnfit3 <- empnn3.results$net.result
#results3 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn3.results$net.result)
#head(results3)
#RMSE3 = sqrt(sum(abs(test_nn_scale - empnn3.results$net.result))^2)
#RMSE3
#resultnnfit3 <- empnn3.results$net.result
#resultnnfit3 = ifelse(resultnnfit3>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit3), reference = as.factor(testy),
# positive="1")
# Tuning with different activation functions
#set.seed(2890)
#emp_nn4 <- neuralnet(Attrition ~., data=train_nn_scale,
# hidden = c(4, 4), linear.output=FALSE,
# threshold=0.01,
# act.fct = "logistic")
#plot(emp_nn4)
#empnn4.results <- compute(emp_nn4, test_nn_scale)
#results4 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn4.results$net.result)
#head(results4)
#Compute error statistics
#resultnnfit4 <- empnn4.results$net.result
#results4 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn4.results$net.result)
#head(results4)
#RMSE4 = sqrt(sum(abs(test_nn_scale - empnn4.results$net.result))^2)
#RMSE4
#resultnnfit4 <- empnn4.results$net.result
#resultnnfit4 = ifelse(resultnnfit4>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit4), reference = as.factor(testy),
# positive="1")
# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)
#emp_nn5 <- neuralnet(Attrition ~., data=train_nn_scale,
# hidden = c(8, 8), linear.output=FALSE,
# threshold=0.01,
# act.fct = "logistic")
#plot(emp_nn5)
#empnn5.results <- compute(emp_nn5, test_nn_scale)
#results5 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn5.results$net.result)
#head(results5)
#Compute error statistics
#resultnnfit5 <- empnn5.results$net.result
#results5 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn5.results$net.result)
#head(results5)
#RMSE5 = sqrt(sum(abs(test_nn_scale - empnn5.results$net.result))^2)
#RMSE5
#resultnnfit5 <- empnn5.results$net.result
#resultnnfit5 = ifelse(resultnnfit5>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit5), reference = as.factor(testy),
# positive="1")
# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)
#emp_nn7 <- neuralnet(Attrition ~., data=train_nn_scale,
# hidden = c(12, 12), linear.output=FALSE,
# threshold=0.01,
# act.fct = "logistic")
#plot(emp_nn7)
#empnn7.results <- compute(emp_nn7, test_nn_scale)
#results7 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn7.results$net.result)
#head(results7)
#Compute error statistics
#resultnnfit7 <- empnn7.results$net.result
#results7 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn7.results$net.result)
#head(results7)
#RMSE7 = sqrt(sum(abs(test_nn_scale - empnn7.results$net.result))^2)
#RMSE7
#resultnnfit7 <- empnn7.results$net.result
#resultnnfit7 = ifelse(resultnnfit7>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit7), reference = as.factor(testy),
# positive="1")
# Tuning with different activation functions
# 2-Hidden Layers, Layer-1 4-neurons, Layer-2, 1-neuron, logistic activation
#set.seed(2890)
#emp_nn8 <- neuralnet(Attrition ~., data=train_nn_scale,
# hidden = c(11, 11), linear.output=FALSE,
# threshold=0.01,
# act.fct = "logistic")
#plot(emp_nn8)
#empnn8.results <- compute(emp_nn8, test_nn_scale)
#results8 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn8.results$net.result)
#head(results8)
#Compute error statistics
#resultnnfit8 <- empnn7.results$net.result
#results8 <- data.frame(actual = test_nn_scale$Attrition, prediction = empnn8.results$net.result)
#head(results8)
#RMSE8 = sqrt(sum(abs(test_nn_scale - empnn8.results$net.result))^2)
#RMSE8
#resultnnfit8 <- empnn8.results$net.result
#resultnnfit8 = ifelse(resultnnfit7>0.5, 1, 0)
#confusionMatrix(data = as.factor(resultnnfit8), reference = as.factor(testy),
# positive="1")