Row

Процент уволившихся сотрудников в общих данных

16.92%

Процент уволившихся молодых сотрудников

22.75%

Row

Все сотрудники компании

Row

Результаты предсказания

Предсказание на начальных данных 75
Предсказание после первой симуляции 78
Предсказание после второй симулации 78
Предсказание после четвертой(дополненной первой) симуляции 80
---
title: "Отток молодых сотрудников из компании"
output: 
  flexdashboard::flex_dashboard:
    theme: flatly
    source_code: embed
    orientation: rows
---

```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(plotly)
library(crosstalk)
library(DBI)
library(d3scatter)
library(DBI)
library(kableExtra)
library(vip)
library(rsample)
library(parsnip)
```

```{r}
con <- dbConnect(ClickHouseHTTP::ClickHouseHTTP(), 
                 user='studentminor', 
                 password='DataMinorHSE!2023', 
                 dbname='employee', 
                 host='rc1a-i6ui9dhblsq8rgdo.mdb.yandexcloud.net',
                 port = 8443,
                 https=TRUE,
                 ssl_verifypeer=FALSE)

youth= dbGetQuery(con, "SELECT * FROM profile
           INNER JOIN portfolio ON profile.EmployeeNumber = portfolio.EmployeeNumber
           WHERE Age BETWEEN 18 AND 35")

# Преобразование данных
youth$Attrition[youth$Attrition == 1] = "Yes"
youth$Attrition[youth$Attrition == 0] = "No"
youth = youth %>% select(-portfolio.EmployeeNumber) %>% mutate_if(is.character, as.factor)

# Разделение данных на обучающую и тестовую выборки
set.seed(100)
ind = initial_split(youth, prop = 0.8)
train = training(ind)
test = testing(ind)

# Построение модели логистической регрессии
model = logistic_reg()
logreg = model %>% fit(Attrition~., data = train)
predlog = predict(logreg, test)

test2 = test

test2$OverTime[test2$OverTime == "Yes"] = 
  sample(c("Yes", "No"),
         size = length(test2$OverTime[test2$OverTime == "Yes"]),
         replace = T,
         prob = c(0.9, 0.1))

predTest = predict(logreg, test2)$.pred_class
predInitial = predict(logreg, test)$.pred_class

test3 = test 
test3$BusinessTravel[test3$BusinessTravel == "Travel_Frequently"] = 
  sample(c("Travel_Rarely", "Non-Travel", "Travel_Frequently"), 
         size = length(test3$BusinessTravel[test3$BusinessTravel == "Travel_Frequently"]),
         replace = T, 
         prob = c(0.4,0.2,0.4))


test2$BusinessTravel[test2$BusinessTravel == "Travel_Frequently"] = 
  sample(c("Travel_Rarely", "Non-Travel", "Travel_Frequently"), 
         size = length(test2$BusinessTravel[test2$BusinessTravel == "Travel_Frequently"]),
         replace = T, 
         prob = c(0.4,0.2,0.4))

predTest_new = predict(logreg, test2)$.pred_class
predTest3 = predict(logreg, test3)$.pred_class



graph = ggplot(data.frame(predTest_new)) + geom_bar(aes(x = predTest_new), alpha = 0.5, fill = "red") +
   geom_bar(data = data.frame(predTest3), aes(x = predTest3), alpha = 0.5, fill = "blue") +
  geom_bar(data = test, aes(x = predInitial), alpha = 0.5, fill = "yellow") +
  labs(x = "Предсказание", y = "Количество")

data = SharedData$new(youth)

```

Параметры {.sidebar}
-------------------------------------

```{r}
filter_checkbox("OverTime", "Переработки", data, ~OverTime)
filter_select("BusinessTravel", "Командировки", data, ~BusinessTravel)
```

Row {data-height=100}
-------------------------------------

### Процент уволившихся сотрудников в общих данных

```{r}
overall_attrition_percent <- dbGetQuery(con, "SELECT 
                                        ROUND(SUM(CASE WHEN Attrition = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS attrition_percent
                                        FROM portfolio") 
overall_percent = paste0(overall_attrition_percent, "%")

valueBox(overall_percent, icon = "fas fa-chart-pie", color = "primary")
```


### Процент уволившихся молодых сотрудников
```{r}
young_attrition_percent <- dbGetQuery(con, "SELECT 
                                       ROUND(SUM(CASE WHEN Attrition = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS young_attrition_percent
                                       FROM portfolio
                                       INNER JOIN profile ON portfolio.EmployeeNumber = profile.EmployeeNumber
                                       WHERE profile.Age BETWEEN 18 AND 35")
young_percent = paste0(young_attrition_percent, "%")

valueBox(young_percent, icon = "fas fa-chart-pie", color = "primary")
```

Row {data-height=400}
-----------------------------------------------------------------------

### Все сотрудники компании

```{r}
data %>%
  plot_ly(x = ~Attrition,
          colors = c("#6AB187", "#484848"),
          type = "histogram") %>%
  layout(title = " ",
         xaxis = list(title = "Да - ушел, нет - остался"),
         yaxis = list(title = "Количество"))  
```

Row {data-height=200}
-----------------------------------------------------------------------

```{r}
ggplotly(graph, height = 300, width = 600)
```

### Результаты предсказания 
```{r}
first = sum(predInitial == "No")
second = sum(predTest == "No")
third = sum(predTest3 == "No")
final = sum(predTest_new == "No")
table = rbind(first, second,  third, final)
rownames(table) = c("Предсказание на начальных данных", "Предсказание после первой симуляции", "Предсказание после второй симулации", "Предсказание после четвертой(дополненной первой) симуляции")
kable(table)%>%
  kable_styling(bootstrap_options=c("bordered", "responsive","striped"), full_width = FALSE)
```