L_Churn

Clear

#| output: false

rm(list = ls())   # Clear environment 
gc()              # Clear unused memory
          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells  576222 30.8    1311724 70.1         NA   669248 35.8
Vcells 1050139  8.1    8388608 64.0      32768  1840578 14.1
cat("\f")         # Clear the console
graphics.off()    # Clear the charts

Import Data

library(readxl)
df <- read_excel("../raw_data/Churn Analysis_Data.xlsx", 
                 n_max = 79
                 )

df<- as.data.frame(df)

# df$account_number<- NULL

Visualize Data

Summary Stasistics

library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
stargazer::stargazer(df, 
                     type = "text", 
                     title = "Summary Statistics", 
                     digits = 2,
                     nobs = FALSE,
                     notes = "n=79"
                      
                     )

Summary Statistics
==================================================================================
Statistic                                           Mean    St. Dev.  Min    Max  
----------------------------------------------------------------------------------
Account #                                           40.00     22.95    1     79   
License Count                                       3.70      5.00     0     29   
Active Users (Based on last 2 months of Activity)   17.19     23.05    0     130  
Total Internal Users                                29.56     39.24    2     255  
% of Active Users                                   0.60      0.35    0.00  1.00  
Authentication Required                             0.44      0.50     0      1   
Avg Monthly User Engagement - Last month          26,205.08 80,127.04  0   567,405
Explorer Functionality                              0.91      0.29     0      1   
Sandbox Feature                                     0.28      0.45     0      1   
Custom Auth                                         0.65      0.48     0      1   
Avg CX Interactions/month past 6 months             2.63      3.34     0     20   
Expansions                                          0.39      0.65     0      3   
Downgrades (if churned)?                            0.06      0.25     0      1   
Renewal Cycles                                      3.18      1.41     1      7   
----------------------------------------------------------------------------------
n=79                                                                              

Clean Data

For regression


Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
df_raw<- df
table(df_raw$Status)

 Churned Customer 
      33       46 
df <- clean_names(df)

df$status <- ifelse(test = (df$status=="Customer"), yes = 1, no = 0)
table(df$status)

 0  1 
33 46 
# Summary statistics by category
summary_stats <- aggregate(license_count ~ status, data = df, FUN = mean)
print(summary_stats)

# Compute summary statistics by category
summary_stats <- by(data = df, 
                    INDICES = df$status, 
                    summary
                    )
summary_stats

Balance Tables

Loading required package: kableExtra
sumtable(data = df, group = "status", group.test = TRUE)
Summary Statistics
status
0
1
Variable N Mean SD N Mean SD Test
account_number 33 63 10 46 24 14 F=196.355<sup>***</sup>
license_count 33 3 5.4 46 4.2 4.7 F=1.1<sup></sup>
active_users_based_on_last_2_months_of_activity 33 6.5 10 46 25 27 F=14.121<sup>***</sup>
total_internal_users 33 22 29 46 35 45 F=2.132<sup></sup>
percent_of_active_users 33 0.32 0.28 46 0.8 0.24 F=63.677<sup>***</sup>
authentication_required 33 0.36 0.49 46 0.5 0.51 F=1.438<sup></sup>
avg_monthly_user_engagement_last_month 33 3516 6412 46 42482 102230 F=4.764<sup>**</sup>
explorer_functionality 33 0.91 0.29 46 0.91 0.28 F=0.004<sup></sup>
sandbox_feature 33 0.12 0.33 46 0.39 0.49 F=7.458<sup>***</sup>
custom_auth 33 0.48 0.51 46 0.76 0.43 F=6.786<sup>**</sup>
avg_cx_interactions_month_past_6_months 33 1.4 1.8 46 3.5 3.9 F=8.115<sup>***</sup>
expansions 33 0.18 0.46 46 0.54 0.72 F=6.382<sup>**</sup>
downgrades_if_churned 33 0.15 0.36 46 0 0 F=8.006<sup>***</sup>
renewal_cycles 33 3 1.3 46 3.3 1.5 F=0.61<sup></sup>
Statistical significance markers: * p<0.1; ** p<0.05; *** p<0.01

Correlation Analysis

corrplot 0.92 loaded
library(polycor)

# Calculate correlation matrix with p-values
correlation_matrix <- cor(df)
p_values <- cor.mtest(correlation_matrix)$p

?corrplot
# Display correlation matrix with statistical significance
corrplot(correlation_matrix, 
         method = "shade", 
         type = "lower",
         p.mat = p_values, 
         sig.level = 0.05, 
         tl.cex = 0.5  # Adjust label size as needed
         )

Regressions

  • Need variable definitions

Logistic Models

?glm()

logit_1 <-
glm(data = df, 
    formula = status ~ active_users_based_on_last_2_months_of_activity, 
    family = "binomial")
# summary(logit_1)

logit_2 <-
glm(data = df, 
    formula = status ~ active_users_based_on_last_2_months_of_activity + percent_of_active_users, 
    family = "binomial")
# summary(logit_2) # 1 multicollinearity

logit_3 <-
glm(data = df, 
    formula = status ~ percent_of_active_users+sandbox_feature, 
    family = "binomial")
# summary(logit_3) 


logit_4 <-
glm(data = df, 
    formula = status ~ percent_of_active_users + sandbox_feature + renewal_cycles + avg_monthly_user_engagement_last_month, 
    family = "binomial")
# summary(logit_4) 

stargazer(logit_1, logit_2, logit_3,logit_4,
          type="text")

=======================================================================================
                                                          Dependent variable:          
                                                ---------------------------------------
                                                                status                 
                                                   (1)       (2)       (3)       (4)   
---------------------------------------------------------------------------------------
active_users_based_on_last_2_months_of_activity 0.129***    0.036                      
                                                 (0.037)   (0.030)                     
                                                                                       
percent_of_active_users                                   4.604***  5.229***  4.571*** 
                                                           (1.181)   (1.104)   (1.174) 
                                                                                       
sandbox_feature                                                       1.232     1.187  
                                                                     (0.805)   (0.813) 
                                                                                       
renewal_cycles                                                                  0.109  
                                                                               (0.239) 
                                                                                       
avg_monthly_user_engagement_last_month                                         0.00003 
                                                                              (0.00003)
                                                                                       
Constant                                        -1.150*** -2.816*** -2.913*** -3.237***
                                                 (0.430)   (0.692)   (0.711)   (1.092) 
                                                                                       
---------------------------------------------------------------------------------------
Observations                                       79        79        79        79    
Log Likelihood                                   -41.106   -31.684   -32.048   -30.511 
Akaike Inf. Crit.                                86.213    69.367    70.097    71.022  
=======================================================================================
Note:                                                       *p<0.1; **p<0.05; ***p<0.01
df$probabilty<-
predict(object = logit_4, 
        newdata = df, 
        type = "response"
        )

df$log_odds<-
predict(object = logit_4, 
        newdata = df
        )

df$probabilty2<- plogis(df$log_odds)

?plogis


df$churn_probability <-  1 - as.vector(df$probabilty)

Outcome varibale

  • Probability of account not churning
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()     masks stats::filter()
✖ dplyr::group_rows() masks kableExtra::group_rows()
✖ dplyr::lag()        masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df %>% select(account_number, status, churn_probability)
   account_number status churn_probability
1               1      1      1.188074e-03
2               2      1      2.574224e-01
3               3      1      6.911856e-01
4               4      1      4.077103e-01
5               5      1      2.172633e-01
6               6      1      2.892200e-01
7               7      1      2.116734e-02
8               8      1      4.579936e-02
9               9      1      1.910761e-01
10             10      1      1.327845e-01
11             11      1      1.490410e-01
12             12      1      2.699755e-02
13             13      1      1.836038e-02
14             14      1      6.039882e-02
15             15      1      8.785048e-01
16             16      1      8.084099e-03
17             17      1      3.336580e-01
18             18      1      3.458956e-01
19             19      1      1.532644e-01
20             20      1      3.405806e-01
21             21      1      7.331098e-01
22             22      1      6.358018e-01
23             23      1      2.224204e-02
24             24      1      5.009823e-02
25             25      1      1.107220e-01
26             26      1      7.822882e-02
27             27      1      4.867392e-01
28             28      1      1.991833e-02
29             29      1      7.557248e-08
30             30      1      1.511767e-01
31             31      1      2.023825e-09
32             32      1      7.958913e-03
33             33      1      1.857039e-01
34             34      1      1.344411e-01
35             35      1      2.016549e-01
36             36      1      1.570622e-01
37             37      1      5.193490e-01
38             38      1      3.705125e-02
39             39      1      2.981764e-01
40             40      1      2.106362e-02
41             41      0      9.426751e-01
42             42      1      3.165200e-02
43             43      1      2.491232e-02
44             44      1      1.482619e-01
45             45      1      1.213626e-01
46             46      1      6.032286e-01
47             47      1      1.889661e-01
48             48      0      7.151515e-01
49             49      0      2.552433e-01
50             50      0      1.348354e-01
51             51      0      8.798097e-01
52             52      0      4.159130e-01
53             53      0      9.052073e-01
54             54      0      7.340894e-01
55             55      0      8.093086e-01
56             56      0      9.364748e-01
57             57      0      7.847068e-01
58             58      0      5.783046e-01
59             59      0      6.113407e-01
60             60      0      8.306432e-01
61             61      0      1.521418e-01
62             62      0      9.207343e-01
63             63      0      8.671552e-01
64             64      0      9.224299e-01
65             65      0      4.317664e-02
66             66      0      6.523988e-01
67             67      0      8.671286e-01
68             68      0      8.385520e-01
69             69      0      9.427313e-01
70             70      0      9.533775e-01
71             71      0      8.769371e-01
72             72      0      5.669235e-01
73             73      0      8.780352e-01
74             74      0      6.755336e-01
75             75      0      9.534141e-01
76             76      0      9.364999e-01
77             77      0      8.148408e-01
78             78      0      9.523018e-01
79             79      0      1.135002e-01