Projecting Customer Retention in R

In the paper “How to project customer retention” by Fader and Hardie there is a neat example of using shifted Beta Geometric distribution (sBG) to project customer retention in Excel. Below you can see the same model being replicated in R.

Code

library(plotly)

churnBG <- Vectorize(function(alpha,beta,period){
  # Computes churn probabilities based on sBG distribution
  # Equation (7) in Paper 1
  #
  # Args:
  #  alpha: numeric
  #  beta: numeric
  #  period: integer or vector of integers
  #
  # Returns:
  #  Vector of churn probabilities for period(s) 
  t1 = alpha/(alpha+beta)
  result = t1
  if (period > 1) {
    result = churnBG(alpha,beta,period-1)*(beta+period-2)/(alpha+beta+period-1)
  }
  
  return(result)
}, vectorize.args = c("period"))

# values
churnBG(1,1,1:10)

##  [1] 0.500000000 0.166666667 0.083333333 0.050000000 0.033333333 0.023809524
##  [7] 0.017857143 0.013888889 0.011111111 0.009090909

# plot
t <- 1:10
churns.prob <- churnBG(1,1,1:10)
data <- data.frame(t, churns.prob)

plot_ly(data, x = ~t, y = ~churns.prob, type = 'scatter', mode = 'lines') %>%
  layout(title = "Density plot for P(T=t | aplha, beta) for t=10",
                     xaxis = list(title = "Period, t"),
                     yaxis = list(title = "P(T=t | aplha, beta)"))

Survival Probabilities

survivalBG = Vectorize(function(alpha, beta, period){
  # Computes survival probabilites based on a sBG distribution
  #
  # Args:
  #  alpha: numeric
  #  beta: numeric
  #  period: integer or vector of integers
  #
  # Returns:
  #  Vector of survival probabilities for period(s) 
  t1 = 1-churnBG(alpha,beta,1)
  result = t1
  if(period>1){
    result = survivalBG(alpha,beta,period-1)-churnBG(alpha,beta,period)
  }
  return(result)
}, vectorize.args = c("period"))

# values
survivalBG(1,1,1:10)

##  [1] 0.50000000 0.33333333 0.25000000 0.20000000 0.16666667 0.14285714
##  [7] 0.12500000 0.11111111 0.10000000 0.09090909

# plot
t <- 1:10
survival.probs <- survivalBG(1,1,1:10)
data <- data.frame(t, survival.probs)

plot_ly(data, x = ~t, y = ~survival.probs, type = 'scatter', mode = 'lines') %>%
  layout(title = "Density plot for S(T=t | aplha, beta) for t=10",
                     xaxis = list(title = "Period, t"),
                     yaxis = list(title = "S(T=t | aplha, beta)"))

Likelihood function

MLL = function(alphabeta){
  # Computesl ikelihood. Equation (B3) in Paper 1
  #
  # Args:
  #  alphabeta: vector with alpha being the first and beta being the second elements, c(a,b)
  #
  # Returns:
  #  Vector of churn probabilities for period(s) 
  #
  # Error handling
  if(length(activeCust) != length(lostCust)){
    stop("Variables activeCust and lostCust have different lengths: ",
         length(activeCust), " and ", length(lostCust), ".")
  }
  # Example data for seven periods
  # activeCust = c(869,743,653,593,551,517,491)
  # lostCust = c(131,126,90,60,42,34,26)
  t=length(activeCust) # number of periods
  alpha = alphabeta[1]
  beta = alphabeta[2]
  return(-as.numeric(
    sum(lostCust*log(churnBG(alpha,beta,1:t))) +
      activeCust[t]*log(survivalBG(alpha,beta,t))
  ))
}

# Data from paper
activeCust = c(869,743,653,593,551,517,491)
lostCust = c(131,126,90,60,42,34,26)

MLL(c(1,1))

## [1] 2115.546

Example From Paper

Below we recreate the example from the paper using the same data.

# Data
activeCust = c(869,743,653,593,551,517,491)
lostCust = c(131,126,90,60,42,34,26)

# MLL optimization. Result are same as in the paper alpha=0.668, beta = 3.806. Good to go
# Obj. value = -1611.2
optim(c(1,1),MLL)

## Warning in log(churnBG(alpha, beta, 1:t)): NaNs produced

## $par
## [1] 0.6677123 3.8024773
## 
## $value
## [1] 1611.158
## 
## $counts
## function gradient 
##       93       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

# plot of churn probabilities
plot_ly(x = 1:7, y = churnBG(alpha = 0.668, beta = 3.806, 1:7),
        type = 'scatter', mode = 'lines') %>%
  layout(title = "Density plot for P(T=t | aplha, beta) for t=10 fitted to the example data",
         xaxis = list(title = "Period, t"),
         yaxis = list(title = "P(T=t | aplha, beta)"))

# plot of survival probabilities
plot_ly(x = 0:7, y = c(1,survivalBG(alpha = 0.668, beta = 3.806, 1:7)),
        type = 'scatter', mode = 'lines') %>%
  layout(title = "Density plot for S(T=t | aplha, beta) for t=10 fitted to the example data",
         xaxis = list(title = "Period, t"),
         yaxis = list(title = "S(T=t | aplha, beta)"))

Projecting Customer Retention in R

October 18, 2022

Model

Code

Survival Probabilities

Likelihood function

Example From Paper