Conversion Rate Analysis

Nir Regev
Chief Data Scientist
Sisense Ltd.

May 16th, 2016

Conversion Anomaly Detection

The freqentist statistics threshold approach

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.5
library(Rmpfr)
# create data from binomail distribution
mass.traffic <- data.frame(conversion = rbinom(10000,10000,.05))
# create cdf for mass.traffic
mass.traffic.cdf <- ecdf(rbinom(10000,10000,.05))
# let's plot the binomial distribution
a <- ggplot(mass.traffic, aes(conversion))
a + geom_density(kernel="gaussian")

# let's plot the cdf
ggplot(mass.traffic, aes(conversion)) + stat_ecdf(geom = "step")

# the probability of P(conversions < 400)
mass.traffic.cdf(400)
## [1] 0
mass.traffic <- data.frame(conversion = rbinom(1000,1000,.05))
# create cdf for mass.traffic
mass.traffic.cdf <- ecdf(rbinom(1000,1000,.05))
# let's plot the binomial distribution
a <- ggplot(mass.traffic, aes(conversion))
a + geom_density(kernel="gaussian")

# let's plot the cdf
ggplot(mass.traffic, aes(conversion)) + stat_ecdf(geom = "step")

# the probability of P(conversions < 40)
mass.traffic.cdf(40)
## [1] 0.073

Some Bayesian Theory

log_likelihood <- function(n, c, theta){
  
  return (sum(log(dbinom(x = c, size = n, prob = theta))))
}
bayesian_anomaly_detector <- function(n, c, base_cr=0.05, null_prior=0.98, post_jump_cr=0.03){
  # Returns a posterior representing the beliefs on the probability of a
  # change in conversion rate
  # First value in the returned vector - represent the null hypothesis prior (our belief that there will be no change in conversion rate)
  # Then, 2nd value represents our belief that conversion rate changed at time 1, 3rd value at time 2 ...
  theta = rep(base_cr, length(n))
  likelihood =  rep(0 , length(n)+1) #First element represents the probability of no change
  likelihood[1] = null_prior #Set likelihood equal to prior
  likelihood[2:length(likelihood)] = (1.0-null_prior) / length(n) #Remainder represents probability of a change in c. rate
  
  likelihood[1] = likelihood[1] * exp(log_likelihood(n, c, theta))
  for (i in 1:(length(n))){
    theta[] = base_cr
    theta[i:length(n)] = post_jump_cr
    likelihood[i+1] = likelihood[i+1] * exp(log_likelihood(n, c, theta))
    
  }
  likelihood = likelihood / sum(likelihood)
  return (c(likelihood[1], likelihood[2:(length(likelihood))]))
}

Let’s simulate some data and see how model works

# traffic for 20 days
traffic = c( 1000,  1000,  1000,  1000,  1000,
       1000,  1000,  1000,  1000,  1000,
       1000,  1000,  1000,  1000,  1000,
       1000,  1000,  1000,  1000,  1000)
# conversions for 20 days
conversions = c(51, 40, 51, 41, 44,
      52, 40, 51, 42, 44,
      50, 41, 54, 41, 44,
      53, 40, 50, 40, 44)
# priors for a change in conversion rate (a fall from 5% to 4%)
theta = c( 0.05,  0.05,  0.05,  0.05,  0.05,
           0.05,  0.05,  0.05,  0.05,  0.05,
           0.05,  0.05,  0.05,  0.05,  0.05,
           0.05,  0.05,  0.05, 0.05,   0.05)

#n = rep(100, 20)
#c = rbinom(20,100,.05)
#c[14:length(c)] = rbinom(7,100,.03) #Jump occurs at t=13
#base_cr - base conversion rate
#post_change_cr - conversion rate after a change
# null_prior - prior belief that no change occurs
base_cr=0.05; null_prior=0.9; post_change_cr=0.03
posterior <- bayesian_anomaly_detector(traffic, conversions,base_cr, null_prior, post_change_cr)
posterior
##  [1] 9.992901e-01 7.909490e-33 4.224354e-30 6.508883e-30 3.476308e-27
##  [6] 9.115098e-27 1.177864e-25 1.070542e-22 1.649491e-22 8.809712e-20
## [11] 3.930987e-19 5.079668e-18 1.594227e-15 4.180164e-15 1.100261e-11
## [16] 2.884954e-11 3.727971e-10 5.766046e-07 8.884322e-07 2.788298e-04
## [21] 4.296209e-04

Model Evaluation

setwd("C:/Users/Nir.Regev/Documents/Target")
source("bayesian_anomaly_detector.R")
time <- 40
traffic.mean <- 1000
traffic.sd <- 100
traffic = rep(traffic.mean, time)
conversions = rbinom(time,traffic.mean,.05)
#c[14:length(c)] = rbinom(7,100,.03) #Jump occurs at t=13
base_cr=0.05; null_prior=0.9; post_jump_cr=0.03
posterior <- bayesian_anomaly_detector(traffic,conversions,base_cr, null_prior, post_jump_cr)
posterior
##  [1]  1.000000e+00 6.397294e-103 4.857727e-102  6.301242e-98  3.352576e-98
##  [6]  6.182963e-96  5.619600e-93  6.090130e-91  1.602983e-87  5.030880e-85
## [11]  9.278165e-83  1.198935e-81  2.636491e-80  4.077856e-77  2.606848e-72
## [16]  8.212777e-67  1.806012e-65  1.154528e-60  6.166179e-58  1.132856e-58
## [21]  3.541851e-59  7.788627e-58  1.719297e-53  2.221696e-52  3.423188e-52
## [26]  5.294638e-49  9.764598e-47  1.062270e-41  5.673445e-39  5.156506e-36
## [31]  4.686669e-33  2.984617e-31  4.598696e-31  2.051987e-30  3.173805e-27
## [36]  4.908918e-24  5.340310e-19  2.392025e-15  1.067347e-14  2.809365e-11
## [41]  3.044594e-09
df.posterior <- data.frame(time_of_jump = c(1:time), probability = posterior[2:length(posterior)])
df.prior <- data.frame(time_of_jump = c(1:time), probability = rep((1-null_prior)/time,time))
ymin <- min(min(posterior[2:length(posterior)]),min(df.prior$probability))
ymax <- max(max(posterior[2:length(posterior)]),max(df.prior$probability))
three_peaks <- sort(mpfr(posterior,1024),decreasing = T)[2:4]
p <- ggplot() + 
  geom_line(data = df.posterior, aes(x = time_of_jump, y = probability, color = "Posterior")) +
  geom_line(data = df.prior, aes(x = time_of_jump, y = probability, color = "Prior"))  +
  xlab('Time of Jump') +
  ylab('Probability') +
  ylim(ymin,ymax)
p

# p +  annotate("text", time-2, y = as.numeric(three_peaks[1]), label = "Conversion Dropped ?",
#               colour = "blue", size = 3)
time <- 40
traffic.mean <- 1000
traffic.sd <- 100
conversions = rbinom(time,traffic.mean,.05)
traffic = abs(floor(rnorm(time,traffic.mean,traffic.sd))) # generate random traffic from gaussian distribution [mean=10000,sd=5000]
conversions = rbinom(time,traffic.mean,.05)
# jump in time time - 5
fill.jump <- length(conversions) - (time-5)
conversions[(time-5):length(conversions)] = rbinom(fill.jump+1,traffic.mean,.03)

base_cr=0.05; null_prior=0.8; post_jump_cr=0.03
posterior <- bayesian_anomaly_detector(traffic,conversions,base_cr,null_prior,post_jump_cr)
df.posterior <- data.frame(time_of_jump = c(1:time), probability = posterior[2:length(posterior)])
df.prior <- data.frame(time_of_jump = c(1:time), probability = rep((1-null_prior)/time,time))
ymin <- min(min(posterior[2:length(posterior)]),min(df.prior$probability))
ymax <- max(max(posterior[2:length(posterior)]),max(df.prior$probability))
p <- ggplot() + 
  geom_line(data = df.posterior, aes(x = time_of_jump, y = probability, color = "Posterior")) +
  geom_line(data = df.prior, aes(x = time_of_jump, y = probability, color = "Prior"))  +
  xlab('Time of Jump') +
  ylab('Probability') +
  ylim(ymin,ymax)
three_peaks <- sort(mpfr(posterior,1024),decreasing = T)[2:4]
p +  annotate("text", time-5, y = as.numeric(three_peaks[1]), label = "Conversion Dropped",
              colour = "blue", size = 4)

posterior <- mpfr(posterior,1024)
prior <- df.prior$probability  
for (i in 1:20){
  time <- 40
  traffic.mean <- 1000
  traffic.sd <- 100
  conversions = rbinom(time,traffic.mean,.05)
  traffic = abs(floor(rnorm(time,traffic.mean,traffic.sd))) # generate random traffic from gaussian distribution [mean=10000,sd=5000]
  # jump in time time - 5
  fill.jump <- length(conversions) - (time-5)
  conversions[(time-5):length(conversions)] = rbinom(fill.jump+1,traffic.mean,.04) #Jump occurs at t=13
  base_cr=0.05; null_prior=0.8; post_jump_cr=0.03
  posterior <- bayesian_anomaly_detector(traffic,conversions,base_cr,null_prior,post_jump_cr)
  df.posterior <- data.frame(time_of_jump = c(1:time), probability = posterior[2:length(posterior)])
  df.prior <- data.frame(time_of_jump = c(1:time), probability = rep((1-null_prior)/time,time))
  ymin <- min(min(posterior[2:length(posterior)]),min(df.prior$probability))
  ymax <- max(max(posterior[2:length(posterior)]),max(df.prior$probability))
  p <- ggplot() + 
    geom_line(data = df.posterior, aes(x = time_of_jump, y = probability, color = "Posterior")) +
    geom_line(data = df.prior, aes(x = time_of_jump, y = probability, color = "Prior"))  +
    xlab('Time of Jump') +
    ylab('Probability') +
    ylim(ymin,ymax) #+
    #theme_grey(base_size = 15)
  three_peaks <- sort(mpfr(posterior,1024),decreasing = T)[2:4]
  print(p +  annotate("text", time-5, y = as.numeric(three_peaks[1]), label = "Conversion Dropped",
                colour = "blue", size = 4))
 
  posterior <- mpfr(posterior,1024)
  prior <- df.prior$probability
  
}

Summary

Prior