---
title: "Отток молодых сотрудников из компании"
output:
flexdashboard::flex_dashboard:
theme: flatly
source_code: embed
orientation: rows
---
```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(plotly)
library(crosstalk)
library(DBI)
library(d3scatter)
library(DBI)
library(kableExtra)
library(vip)
library(rsample)
library(parsnip)
```
```{r}
con <- dbConnect(ClickHouseHTTP::ClickHouseHTTP(),
user='studentminor',
password='DataMinorHSE!2023',
dbname='employee',
host='rc1a-i6ui9dhblsq8rgdo.mdb.yandexcloud.net',
port = 8443,
https=TRUE,
ssl_verifypeer=FALSE)
youth= dbGetQuery(con, "SELECT * FROM profile
INNER JOIN portfolio ON profile.EmployeeNumber = portfolio.EmployeeNumber
WHERE Age BETWEEN 18 AND 35")
# Преобразование данных
youth$Attrition[youth$Attrition == 1] = "Yes"
youth$Attrition[youth$Attrition == 0] = "No"
youth = youth %>% select(-portfolio.EmployeeNumber) %>% mutate_if(is.character, as.factor)
# Разделение данных на обучающую и тестовую выборки
set.seed(100)
ind = initial_split(youth, prop = 0.8)
train = training(ind)
test = testing(ind)
# Построение модели логистической регрессии
model = logistic_reg()
logreg = model %>% fit(Attrition~., data = train)
predlog = predict(logreg, test)
test2 = test
test2$OverTime[test2$OverTime == "Yes"] =
sample(c("Yes", "No"),
size = length(test2$OverTime[test2$OverTime == "Yes"]),
replace = T,
prob = c(0.9, 0.1))
predTest = predict(logreg, test2)$.pred_class
predInitial = predict(logreg, test)$.pred_class
test3 = test
test3$BusinessTravel[test3$BusinessTravel == "Travel_Frequently"] =
sample(c("Travel_Rarely", "Non-Travel", "Travel_Frequently"),
size = length(test3$BusinessTravel[test3$BusinessTravel == "Travel_Frequently"]),
replace = T,
prob = c(0.4,0.2,0.4))
test2$BusinessTravel[test2$BusinessTravel == "Travel_Frequently"] =
sample(c("Travel_Rarely", "Non-Travel", "Travel_Frequently"),
size = length(test2$BusinessTravel[test2$BusinessTravel == "Travel_Frequently"]),
replace = T,
prob = c(0.4,0.2,0.4))
predTest_new = predict(logreg, test2)$.pred_class
predTest3 = predict(logreg, test3)$.pred_class
graph = ggplot(data.frame(predTest_new)) + geom_bar(aes(x = predTest_new), alpha = 0.5, fill = "red") +
geom_bar(data = data.frame(predTest3), aes(x = predTest3), alpha = 0.5, fill = "blue") +
geom_bar(data = test, aes(x = predInitial), alpha = 0.5, fill = "yellow") +
labs(x = "Предсказание", y = "Количество")
data = SharedData$new(youth)
```
Параметры {.sidebar}
-------------------------------------
```{r}
filter_checkbox("OverTime", "Переработки", data, ~OverTime)
filter_select("BusinessTravel", "Командировки", data, ~BusinessTravel)
```
Row {data-height=100}
-------------------------------------
### Процент уволившихся сотрудников в общих данных
```{r}
overall_attrition_percent <- dbGetQuery(con, "SELECT
ROUND(SUM(CASE WHEN Attrition = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS attrition_percent
FROM portfolio")
overall_percent = paste0(overall_attrition_percent, "%")
valueBox(overall_percent, icon = "fas fa-chart-pie", color = "primary")
```
### Процент уволившихся молодых сотрудников
```{r}
young_attrition_percent <- dbGetQuery(con, "SELECT
ROUND(SUM(CASE WHEN Attrition = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS young_attrition_percent
FROM portfolio
INNER JOIN profile ON portfolio.EmployeeNumber = profile.EmployeeNumber
WHERE profile.Age BETWEEN 18 AND 35")
young_percent = paste0(young_attrition_percent, "%")
valueBox(young_percent, icon = "fas fa-chart-pie", color = "primary")
```
Row {data-height=400}
-----------------------------------------------------------------------
### Все сотрудники компании
```{r}
data %>%
plot_ly(x = ~Attrition,
colors = c("#6AB187", "#484848"),
type = "histogram") %>%
layout(title = " ",
xaxis = list(title = "Да - ушел, нет - остался"),
yaxis = list(title = "Количество"))
```
Row {data-height=200}
-----------------------------------------------------------------------
```{r}
ggplotly(graph, height = 300, width = 600)
```
### Результаты предсказания
```{r}
first = sum(predInitial == "No")
second = sum(predTest == "No")
third = sum(predTest3 == "No")
final = sum(predTest_new == "No")
table = rbind(first, second, third, final)
rownames(table) = c("Предсказание на начальных данных", "Предсказание после первой симуляции", "Предсказание после второй симулации", "Предсказание после четвертой(дополненной первой) симуляции")
kable(table)%>%
kable_styling(bootstrap_options=c("bordered", "responsive","striped"), full_width = FALSE)
```