L_Churn

Clear

#| output: false

rm(list = ls())   # Clear environment 
gc()              # Clear unused memory

          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells  576222 30.8    1311724 70.1         NA   669248 35.8
Vcells 1050139  8.1    8388608 64.0      32768  1840578 14.1

cat("\f")         # Clear the console

graphics.off()    # Clear the charts

Import Data

library(readxl)
df <- read_excel("../raw_data/Churn Analysis_Data.xlsx", 
                 n_max = 79
                 )

df<- as.data.frame(df)

# df$account_number<- NULL

Visualize Data

library(visdat)
vis_dat(df)

Summary Stasistics

library(stargazer)


Please cite as:

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

stargazer::stargazer(df, 
                     type = "text", 
                     title = "Summary Statistics", 
                     digits = 2,
                     nobs = FALSE,
                     notes = "n=79"
                      
                     )


Summary Statistics
==================================================================================
Statistic                                           Mean    St. Dev.  Min    Max  
----------------------------------------------------------------------------------
Account #                                           40.00     22.95    1     79   
License Count                                       3.70      5.00     0     29   
Active Users (Based on last 2 months of Activity)   17.19     23.05    0     130  
Total Internal Users                                29.56     39.24    2     255  
% of Active Users                                   0.60      0.35    0.00  1.00  
Authentication Required                             0.44      0.50     0      1   
Avg Monthly User Engagement - Last month          26,205.08 80,127.04  0   567,405
Explorer Functionality                              0.91      0.29     0      1   
Sandbox Feature                                     0.28      0.45     0      1   
Custom Auth                                         0.65      0.48     0      1   
Avg CX Interactions/month past 6 months             2.63      3.34     0     20   
Expansions                                          0.39      0.65     0      3   
Downgrades (if churned)?                            0.06      0.25     0      1   
Renewal Cycles                                      3.18      1.41     1      7   
----------------------------------------------------------------------------------
n=79

Clean Data

For regression

library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

df_raw<- df
table(df_raw$Status)


 Churned Customer 
      33       46

df <- clean_names(df)

df$status <- ifelse(test = (df$status=="Customer"), yes = 1, no = 0)
table(df$status)


 0  1 
33 46

# Summary statistics by category
summary_stats <- aggregate(license_count ~ status, data = df, FUN = mean)
print(summary_stats)

# Compute summary statistics by category
summary_stats <- by(data = df, 
                    INDICES = df$status, 
                    summary
                    )
summary_stats

Balance Tables

https://www.youtube.com/watch?v=tQredW74x_o

library(vtable)

Loading required package: kableExtra

sumtable(data = df, group = "status", group.test = TRUE)

Summary Statistics
status	0			1
Variable	N	Mean	SD	N	Mean	SD	Test
account_number	33	63	10	46	24	14	F=196.355<sup>***</sup>
license_count	33	3	5.4	46	4.2	4.7	F=1.1<sup></sup>
active_users_based_on_last_2_months_of_activity	33	6.5	10	46	25	27	F=14.121<sup>***</sup>
total_internal_users	33	22	29	46	35	45	F=2.132<sup></sup>
percent_of_active_users	33	0.32	0.28	46	0.8	0.24	F=63.677<sup>***</sup>
authentication_required	33	0.36	0.49	46	0.5	0.51	F=1.438<sup></sup>
avg_monthly_user_engagement_last_month	33	3516	6412	46	42482	102230	F=4.764<sup>**</sup>
explorer_functionality	33	0.91	0.29	46	0.91	0.28	F=0.004<sup></sup>
sandbox_feature	33	0.12	0.33	46	0.39	0.49	F=7.458<sup>***</sup>
custom_auth	33	0.48	0.51	46	0.76	0.43	F=6.786<sup>**</sup>
avg_cx_interactions_month_past_6_months	33	1.4	1.8	46	3.5	3.9	F=8.115<sup>***</sup>
expansions	33	0.18	0.46	46	0.54	0.72	F=6.382<sup>**</sup>
downgrades_if_churned	33	0.15	0.36	46	0	0	F=8.006<sup>***</sup>
renewal_cycles	33	3	1.3	46	3.3	1.5	F=0.61<sup></sup>
Statistical significance markers: * p<0.1; p<0.05; * p<0.01

Correlation Analysis

library(corrplot)

corrplot 0.92 loaded

library(polycor)

# Calculate correlation matrix with p-values
correlation_matrix <- cor(df)
p_values <- cor.mtest(correlation_matrix)$p

?corrplot
# Display correlation matrix with statistical significance
corrplot(correlation_matrix, 
         method = "shade", 
         type = "lower",
         p.mat = p_values, 
         sig.level = 0.05, 
         tl.cex = 0.5  # Adjust label size as needed
         )

Regressions

Need variable definitions

Logistic Models

?glm()

logit_1 <-
glm(data = df, 
    formula = status ~ active_users_based_on_last_2_months_of_activity, 
    family = "binomial")
# summary(logit_1)

logit_2 <-
glm(data = df, 
    formula = status ~ active_users_based_on_last_2_months_of_activity + percent_of_active_users, 
    family = "binomial")
# summary(logit_2) # 1 multicollinearity

logit_3 <-
glm(data = df, 
    formula = status ~ percent_of_active_users+sandbox_feature, 
    family = "binomial")
# summary(logit_3) 


logit_4 <-
glm(data = df, 
    formula = status ~ percent_of_active_users + sandbox_feature + renewal_cycles + avg_monthly_user_engagement_last_month, 
    family = "binomial")
# summary(logit_4) 

stargazer(logit_1, logit_2, logit_3,logit_4,
          type="text")


=======================================================================================
                                                          Dependent variable:          
                                                ---------------------------------------
                                                                status                 
                                                   (1)       (2)       (3)       (4)   
---------------------------------------------------------------------------------------
active_users_based_on_last_2_months_of_activity 0.129***    0.036                      
                                                 (0.037)   (0.030)                     
                                                                                       
percent_of_active_users                                   4.604***  5.229***  4.571*** 
                                                           (1.181)   (1.104)   (1.174) 
                                                                                       
sandbox_feature                                                       1.232     1.187  
                                                                     (0.805)   (0.813) 
                                                                                       
renewal_cycles                                                                  0.109  
                                                                               (0.239) 
                                                                                       
avg_monthly_user_engagement_last_month                                         0.00003 
                                                                              (0.00003)
                                                                                       
Constant                                        -1.150*** -2.816*** -2.913*** -3.237***
                                                 (0.430)   (0.692)   (0.711)   (1.092) 
                                                                                       
---------------------------------------------------------------------------------------
Observations                                       79        79        79        79    
Log Likelihood                                   -41.106   -31.684   -32.048   -30.511 
Akaike Inf. Crit.                                86.213    69.367    70.097    71.022  
=======================================================================================
Note:                                                       *p<0.1; **p<0.05; ***p<0.01

df$probabilty<-
predict(object = logit_4, 
        newdata = df, 
        type = "response"
        )

df$log_odds<-
predict(object = logit_4, 
        newdata = df
        )

df$probabilty2<- plogis(df$log_odds)

?plogis


df$churn_probability <-  1 - as.vector(df$probabilty)

Outcome varibale

Probability of account not churning

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()     masks stats::filter()
✖ dplyr::group_rows() masks kableExtra::group_rows()
✖ dplyr::lag()        masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

df %>% select(account_number, status, churn_probability)

   account_number status churn_probability
1               1      1      1.188074e-03
2               2      1      2.574224e-01
3               3      1      6.911856e-01
4               4      1      4.077103e-01
5               5      1      2.172633e-01
6               6      1      2.892200e-01
7               7      1      2.116734e-02
8               8      1      4.579936e-02
9               9      1      1.910761e-01
10             10      1      1.327845e-01
11             11      1      1.490410e-01
12             12      1      2.699755e-02
13             13      1      1.836038e-02
14             14      1      6.039882e-02
15             15      1      8.785048e-01
16             16      1      8.084099e-03
17             17      1      3.336580e-01
18             18      1      3.458956e-01
19             19      1      1.532644e-01
20             20      1      3.405806e-01
21             21      1      7.331098e-01
22             22      1      6.358018e-01
23             23      1      2.224204e-02
24             24      1      5.009823e-02
25             25      1      1.107220e-01
26             26      1      7.822882e-02
27             27      1      4.867392e-01
28             28      1      1.991833e-02
29             29      1      7.557248e-08
30             30      1      1.511767e-01
31             31      1      2.023825e-09
32             32      1      7.958913e-03
33             33      1      1.857039e-01
34             34      1      1.344411e-01
35             35      1      2.016549e-01
36             36      1      1.570622e-01
37             37      1      5.193490e-01
38             38      1      3.705125e-02
39             39      1      2.981764e-01
40             40      1      2.106362e-02
41             41      0      9.426751e-01
42             42      1      3.165200e-02
43             43      1      2.491232e-02
44             44      1      1.482619e-01
45             45      1      1.213626e-01
46             46      1      6.032286e-01
47             47      1      1.889661e-01
48             48      0      7.151515e-01
49             49      0      2.552433e-01
50             50      0      1.348354e-01
51             51      0      8.798097e-01
52             52      0      4.159130e-01
53             53      0      9.052073e-01
54             54      0      7.340894e-01
55             55      0      8.093086e-01
56             56      0      9.364748e-01
57             57      0      7.847068e-01
58             58      0      5.783046e-01
59             59      0      6.113407e-01
60             60      0      8.306432e-01
61             61      0      1.521418e-01
62             62      0      9.207343e-01
63             63      0      8.671552e-01
64             64      0      9.224299e-01
65             65      0      4.317664e-02
66             66      0      6.523988e-01
67             67      0      8.671286e-01
68             68      0      8.385520e-01
69             69      0      9.427313e-01
70             70      0      9.533775e-01
71             71      0      8.769371e-01
72             72      0      5.669235e-01
73             73      0      8.780352e-01
74             74      0      6.755336e-01
75             75      0      9.534141e-01
76             76      0      9.364999e-01
77             77      0      8.148408e-01
78             78      0      9.523018e-01
79             79      0      1.135002e-01