---
title: "Банковские клиенты: анализ оттока немецкого департамента"
output:
flexdashboard::flex_dashboard:
theme: flatly
source_code: embed
orientation: rows
---
```{r setup, include=FALSE}
library(flexdashboard)
library(plotly)
library(crosstalk)
library(tidyverse)
library(DBI)
library(RMariaDB)
library(ggplot2)
library(dplyr)
library(tidymodels)
con <- dbConnect(RMariaDB::MariaDB(),
user='studentminor',
password='DataMinorHSE!2020',
dbname='bank',
host='34.88.193.134',
port = 3306)
germ = dbGetQuery(con, "SELECT Exited, IsActiveMember, HasCrCard, Gender,
Age, EstimatedSalary, CreditScore, Tenure, Balance, NumOfProducts
FROM
(portfolio INNER JOIN profile ON portfolio.CustomerId = profile.CustomerId)
WHERE profile.CountryId = 'country3'
")
germ$Exited <- ifelse(germ$Exited == 1, "yes", "no") #1=yes
germ$Exited = as.factor(germ$Exited)
germ$Exited = relevel(germ$Exited, "yes", "no")
germ$IsActiveMember = as.factor(germ$IsActiveMember)
germ$HasCrCard = as.factor(germ$HasCrCard)
germ$Gender = as.factor(germ$Gender)
germ_model = germ
germ <- SharedData$new(germ)
```
Inputs {.sidebar}
-------------------------------------
```{r}
filter_checkbox("Exited", "Ушел ли клиент", germ, ~ Exited)
filter_slider("Age", "Возраст", germ, ~ Age)
filter_select("NumOfProducts", "Количество продуктов", germ, ~ NumOfProducts)
```
Row {data-height=200}
-------------------------------------
### Точность предиктивной модели
```{r}
valueBox(0.8, icon = "fas fa-bullseye", color = "primary")
```
### Общий процент ушедших клиентов
```{r}
value2 = dbGetQuery(con, "SELECT Exited, COUNT(Exited) AS n FROM portfolio
GROUP BY Exited")
value2 = value2 %>% mutate(pct = n/sum(n))
value2 = value2 %>% filter(Exited == "1")
value2 <- value2$pct * 100
valueBox(value2, icon = "fas fa-percent", color = "primary")
```
### Процент ушедших клиентов из немецкого отдела банка
```{r}
value3 = dbGetQuery(con, "SELECT Exited, profile.CountryId
FROM
(portfolio LEFT JOIN profile ON portfolio.CustomerId = profile.CustomerId)
")
value3 <- value3 %>% filter(CountryId == "country3") %>% group_by(Exited) %>% summarise(n = n()) %>% mutate(pct=n/sum(n))
value3 = value3 %>% filter(Exited == "1")
value3 <- round((value3$pct * 100), 2)
valueBox(value3, icon = "fas fa-percent", color = "primary")
dbDisconnect(con)
```
Row {data-height=500}
-------------------------------------
### Распределение количества клиентов относительно количества продутов
```{r}
plot_ly(germ, x = ~NumOfProducts, color = ~Exited, colors = c("#484848", "#6AB187")) %>%
layout(xaxis = list(title = "Количетво продуктов"),
yaxis = list(title = "Количество клиентов"))
```
### Соотношение ушедших и оставшихся клиентов после кампания по повышению активноти
```{r}
# модель
set.seed(444)
split1 = initial_split(germ_model, prop = 0.8)
train1 = training(split1)
test1 = testing(split1)
set.seed(444)
library(rpart.plot)
tree <- decision_tree(
mode = "classification") %>%
set_engine("rpart")
tree.wf <- workflow() %>%
add_formula(Exited ~.) %>%
add_model(tree) %>%
fit(train1)
####################################### симуляция
test_sim = test1
set.seed(333)
test_sim$NumOfProducts[test_sim$NumOfProducts == 1] =
sample(c(1,2),
size = length(test_sim$NumOfProducts[test_sim$NumOfProducts == 1]),
replace = T, prob = c(0.9, 0.1))
predTest_sim_active = predict(tree.wf, test_sim)$.pred_class
library(ggplot2)
ggplot(test_sim) + geom_bar(aes(x = Exited), alpha = 0.8) +
geom_bar(data = data.frame(predTest_sim_active), aes(x = predTest_sim_active), alpha = 0.8, fill = "#6AB187") +
scale_x_discrete(labels = c("Да", "Нет")) +
ggtitle(" ") +
xlab("Ушел ли клиент") +
ylab("Количество клиентов") +
theme_minimal()
```
Row {data-height=450}
-------------------------------------
### Распределение оставшихся и ушедших клиентов по возрасту
```{r}
germ %>%
plot_ly(alpha = 0.9, color = ~Exited, colors = c("#484848", "#6AB187"), hoverinfo = 'x') %>%
add_histogram(x = ~Age) %>%
layout(barmode = "stack",
xaxis = list(title = "Возраст"),
yaxis = list(title = "Количество клиентов"))
```