Prerequisites:

Packages

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.0     ✔ rsample      1.1.0
## ✔ dials        1.0.0     ✔ tune         1.0.1
## ✔ infer        1.0.3     ✔ workflows    1.1.0
## ✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
## ✔ parsnip      1.0.2     ✔ yardstick    1.1.0
## ✔ recipes      1.0.1     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
library(vip)
## 
## Attaching package: 'vip'
## 
## The following object is masked from 'package:utils':
## 
##     vi
library(ggplot2)
library(kernlab)
## 
## Attaching package: 'kernlab'
## 
## The following object is masked from 'package:scales':
## 
##     alpha
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha

File Path

customer_retention_path <- here::here("Final Project","customer_retention.csv")
cust_retention <- read_csv(customer_retention_path)
## Rows: 6999 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): Gender, Partner, Dependents, PhoneService, MultipleLines, Internet...
## dbl  (4): SeniorCitizen, Tenure, MonthlyCharges, TotalCharges
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Mutate Data

cust_retention <- mutate(cust_retention, Status = factor(Status))

Exploratory Analysis

cust_retention %>%
  ggplot(aes(TechSupport)) +
  geom_bar(width = 0.7, size = 1.5) +
  facet_wrap(~ Status) +
  ylab("Number of Clients") +
  xlab("Have Tech Support") +
  ggtitle("The Number of Clients that have Stayed or Left due to Tech Support")

Preparing Data

set.seed(123)
cust_retention_split <- initial_split(cust_retention, prop = .7, strata = "Status")
cust_retention_train <- training(cust_retention_split)
cust_retention_train <- na.omit(cust_retention_train)
cust_retention_test <- testing(cust_retention_split)

Logistic Regression Model

log_reg_mod <- logistic_reg() %>%
  fit(Status ~ ., data = cust_retention_train)

kfold <- vfold_cv(cust_retention_train, v = 5)

results <- logistic_reg() %>%
  fit_resamples(Status ~ ., kfold)
## ! Fold1: preprocessor 1/1, model 1/1 (predictions): prediction from a rank-deficient fit may be misleading
## ! Fold2: preprocessor 1/1, model 1/1 (predictions): prediction from a rank-deficient fit may be misleading
## ! Fold3: preprocessor 1/1, model 1/1 (predictions): prediction from a rank-deficient fit may be misleading
## ! Fold4: preprocessor 1/1, model 1/1 (predictions): prediction from a rank-deficient fit may be misleading
## ! Fold5: preprocessor 1/1, model 1/1 (predictions): prediction from a rank-deficient fit may be misleading
collect_metrics(results)
## # A tibble: 2 × 6
##   .metric  .estimator  mean     n std_err .config             
##   <chr>    <chr>      <dbl> <int>   <dbl> <chr>               
## 1 accuracy binary     0.799     5 0.00422 Preprocessor1_Model1
## 2 roc_auc  binary     0.842     5 0.00747 Preprocessor1_Model1
  • The logistic regression model has an AUC .843, which means it is correct about 84.3% of the time.

Confusion Matrix

log_reg_mod %>%
   predict(cust_retention_test) %>%
   bind_cols(cust_retention_test %>% select(Status)) %>%
   conf_mat(truth = Status, estimate = .pred_class)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
##           Truth
## Prediction Current Left
##    Current    1389  247
##    Left        153  310
  • the confusion matrix tells us that the model correctly predicts 1389 current clients and 310 clients leaving. It also shows us that the model falsely predicted that 153 clients would leave, and that 247 clients would stay