library(openxlsx)
HR_Analytics <- read.xlsx("H:/Yashwanth/Kaggle/HR_comma_sep.xlsx",sheet = 1)
# lsf.str("package:openxlsx"); help(package="openxlsx")
Gradient Boosting
Objective : H(x) = {Set of all possible hypothesis} Reference
# Summary
rbind(Workaccident=prop.table(table(HR_Analytics$Work_accident))*100,
Promotion=prop.table(table(HR_Analytics$promotion_last_5years))*100,
Left=prop.table(table(HR_Analytics$left))*100)
0 1
Workaccident 85.53904 14.460964
Promotion 97.87319 2.126808
Left 76.19175 23.808254
Basic summary(Insights) of the data:
- ~14% of employees falls into Workaccident bucket
- ~2% of employees falls into promotion bucket
- ~23% of employees falls into left bucket
Distribution of Variables
# Distribution
View(head(HR_Analytics))
library(ggplot2)
attach(HR_Analytics)
ggplot(HR_Analytics,aes(x=satisfaction_level)) +
facet_wrap(~ left)+
geom_histogram(aes(y=..density..),col = "blue2") +
stat_function(fun = dnorm,args = list(mean=mean(satisfaction_level),sd=sd(satisfaction_level)),colour = "red") +
labs(title = "satisfaction_level Distribution")

ggplot(HR_Analytics,aes(x=last_evaluation)) +
facet_wrap(~ left)+
geom_histogram(aes(y=..density..),col = "blue2") +
stat_function(fun = dnorm,args = list(mean=mean(last_evaluation),sd=sd(last_evaluation)),colour = "red") +
labs(title = "last_evaluation Distribution")

ggplot(HR_Analytics,aes(x=number_project)) +
facet_wrap(~ left)+
geom_histogram(aes(y=..density..),col = "blue2") +
stat_function(fun = dnorm,args = list(mean=mean(number_project),sd=sd(number_project)),colour = "red") +
labs(title = "number_project Distribution")

ggplot(HR_Analytics,aes(x=average_montly_hours)) +
facet_wrap(~ left)+
geom_histogram(aes(y=..density..),col = "blue2") +
stat_function(fun = dnorm,args = list(mean=mean(average_montly_hours),sd=sd(average_montly_hours)),
colour = "red") +
labs(title = "average_montly_hours Distribution")

ggplot(HR_Analytics,aes(x=time_spend_company)) +
facet_wrap(~ left)+
geom_histogram(aes(y=..density..),col = "blue2") +
stat_function(fun = dnorm,args = list(mean=mean(time_spend_company),sd=sd(time_spend_company)),colour = "red") +
labs(title = "time_spend_company Distribution")

detach(HR_Analytics)
LS0tDQp0aXRsZTogIlIgRGF0YSBTY2llbmNlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQpsaWJyYXJ5KG9wZW54bHN4KQ0KSFJfQW5hbHl0aWNzIDwtIHJlYWQueGxzeCgiSDovWWFzaHdhbnRoL0thZ2dsZS9IUl9jb21tYV9zZXAueGxzeCIsc2hlZXQgPSAxKQ0KIyBsc2Yuc3RyKCJwYWNrYWdlOm9wZW54bHN4Iik7IGhlbHAocGFja2FnZT0ib3Blbnhsc3giKQ0KYGBgDQoNCipHcmFkaWVudCBCb29zdGluZyoNCg0KT2JqZWN0aXZlIDogSCh4KSA9IHtTZXQgb2YgYWxsIHBvc3NpYmxlIGh5cG90aGVzaXN9IFtSZWZlcmVuY2VdKGh0dHBzOi8vd3d3LnF1b3JhLmNvbS9XaGF0LWlzLWh5cG90aGVzaXMtaW4tbWFjaGluZS1sZWFybmluZykgDQoNCmBgYHtyfQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoZ2JtKQ0KDQojIFN1bW1hcnkNCnJiaW5kKFdvcmthY2NpZGVudD1wcm9wLnRhYmxlKHRhYmxlKEhSX0FuYWx5dGljcyRXb3JrX2FjY2lkZW50KSkqMTAwLA0KICAgICAgUHJvbW90aW9uPXByb3AudGFibGUodGFibGUoSFJfQW5hbHl0aWNzJHByb21vdGlvbl9sYXN0XzV5ZWFycykpKjEwMCwNCiAgICAgIExlZnQ9cHJvcC50YWJsZSh0YWJsZShIUl9BbmFseXRpY3MkbGVmdCkpKjEwMCkNCmBgYA0KQmFzaWMgc3VtbWFyeShJbnNpZ2h0cykgb2YgdGhlIGRhdGE6DQoNCjEuIH4xNCUgb2YgZW1wbG95ZWVzIGZhbGxzIGludG8gV29ya2FjY2lkZW50IGJ1Y2tldA0KMi4gfjIlIG9mIGVtcGxveWVlcyBmYWxscyBpbnRvIHByb21vdGlvbiBidWNrZXQNCjMuIH4yMyUgb2YgZW1wbG95ZWVzIGZhbGxzIGludG8gbGVmdCBidWNrZXQNCg0KKkRpc3RyaWJ1dGlvbiBvZiBWYXJpYWJsZXMqDQoNCmBgYHtyfQ0KIyBEaXN0cmlidXRpb24NClZpZXcoaGVhZChIUl9BbmFseXRpY3MpKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KDQphdHRhY2goSFJfQW5hbHl0aWNzKQ0KZ2dwbG90KEhSX0FuYWx5dGljcyxhZXMoeD1zYXRpc2ZhY3Rpb25fbGV2ZWwpKSArDQogIGZhY2V0X3dyYXAofiBsZWZ0KSsNCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHk9Li5kZW5zaXR5Li4pLGNvbCA9ICJibHVlMiIpICsNCiAgc3RhdF9mdW5jdGlvbihmdW4gPSBkbm9ybSxhcmdzID0gbGlzdChtZWFuPW1lYW4oc2F0aXNmYWN0aW9uX2xldmVsKSxzZD1zZChzYXRpc2ZhY3Rpb25fbGV2ZWwpKSxjb2xvdXIgPSAicmVkIikgKw0KICBsYWJzKHRpdGxlID0gInNhdGlzZmFjdGlvbl9sZXZlbCBEaXN0cmlidXRpb24iKQ0KDQpnZ3Bsb3QoSFJfQW5hbHl0aWNzLGFlcyh4PWxhc3RfZXZhbHVhdGlvbikpICsNCiAgZmFjZXRfd3JhcCh+IGxlZnQpKw0KICBnZW9tX2hpc3RvZ3JhbShhZXMoeT0uLmRlbnNpdHkuLiksY29sID0gImJsdWUyIikgKw0KICBzdGF0X2Z1bmN0aW9uKGZ1biA9IGRub3JtLGFyZ3MgPSBsaXN0KG1lYW49bWVhbihsYXN0X2V2YWx1YXRpb24pLHNkPXNkKGxhc3RfZXZhbHVhdGlvbikpLGNvbG91ciA9ICJyZWQiKSArDQogIGxhYnModGl0bGUgPSAibGFzdF9ldmFsdWF0aW9uIERpc3RyaWJ1dGlvbiIpDQoNCmdncGxvdChIUl9BbmFseXRpY3MsYWVzKHg9bnVtYmVyX3Byb2plY3QpKSArDQogIGZhY2V0X3dyYXAofiBsZWZ0KSsNCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHk9Li5kZW5zaXR5Li4pLGNvbCA9ICJibHVlMiIpICsNCiAgc3RhdF9mdW5jdGlvbihmdW4gPSBkbm9ybSxhcmdzID0gbGlzdChtZWFuPW1lYW4obnVtYmVyX3Byb2plY3QpLHNkPXNkKG51bWJlcl9wcm9qZWN0KSksY29sb3VyID0gInJlZCIpICsNCiAgbGFicyh0aXRsZSA9ICJudW1iZXJfcHJvamVjdCBEaXN0cmlidXRpb24iKQ0KDQpnZ3Bsb3QoSFJfQW5hbHl0aWNzLGFlcyh4PWF2ZXJhZ2VfbW9udGx5X2hvdXJzKSkgKw0KICBmYWNldF93cmFwKH4gbGVmdCkrDQogIGdlb21faGlzdG9ncmFtKGFlcyh5PS4uZGVuc2l0eS4uKSxjb2wgPSAiYmx1ZTIiKSArDQogIHN0YXRfZnVuY3Rpb24oZnVuID0gZG5vcm0sYXJncyA9IGxpc3QobWVhbj1tZWFuKGF2ZXJhZ2VfbW9udGx5X2hvdXJzKSxzZD1zZChhdmVyYWdlX21vbnRseV9ob3VycykpLA0KICAgICAgICAgICAgICAgIGNvbG91ciA9ICJyZWQiKSArDQogIGxhYnModGl0bGUgPSAiYXZlcmFnZV9tb250bHlfaG91cnMgRGlzdHJpYnV0aW9uIikNCg0KZ2dwbG90KEhSX0FuYWx5dGljcyxhZXMoeD10aW1lX3NwZW5kX2NvbXBhbnkpKSArDQogIGZhY2V0X3dyYXAofiBsZWZ0KSsNCiAgZ2VvbV9oaXN0b2dyYW0oYWVzKHk9Li5kZW5zaXR5Li4pLGNvbCA9ICJibHVlMiIpICsNCiAgc3RhdF9mdW5jdGlvbihmdW4gPSBkbm9ybSxhcmdzID0gbGlzdChtZWFuPW1lYW4odGltZV9zcGVuZF9jb21wYW55KSxzZD1zZCh0aW1lX3NwZW5kX2NvbXBhbnkpKSxjb2xvdXIgPSAicmVkIikgKw0KICBsYWJzKHRpdGxlID0gInRpbWVfc3BlbmRfY29tcGFueSBEaXN0cmlidXRpb24iKQ0KZGV0YWNoKEhSX0FuYWx5dGljcykNCmBgYA0KDQo=