G. Games Customer LTV Model

Nir Regev
Principal Data Scientist
Sisense Ltd.

April 16, 2016

G Games Use-Case

G Games use daily customer revenues per country and fit a linear regression model to predict the life time value of each customers segmentaion (per country in this case)

Agenda

Generating data for demo

args <- list(c(2, 4), c(3, 5), c(2, 3), c(4, 5), c(4, 1))
args 
## [[1]]
## [1] 2 4
## 
## [[2]]
## [1] 3 5
## 
## [[3]]
## [1] 2 3
## 
## [[4]]
## [1] 4 5
## 
## [[5]]
## [1] 4 1

Defining parsing and A wrapper function

tlv <- function (args) {
  # data generation (for simulation purposes)
  #args <- list(c(2, 4), c(3, 5), c(2, 3), c(4, 5), c(4, 1))
  arg1=args[[1]];
  arg2=args[[2]];
  arg3=args[[3]];
  arg4=args[[4]];
  arg5=args[[5]];
  sisense_result <-c()
  for (i in 1:length(arg1)) {
    retention <- c(arg1[i],arg2[i],arg3[i],arg4[i],arg5[i])
    result <- tlv_model(retention)
    sisense_result <-c(sisense_result,result)
  }
  #retention <- data.frame(t(mapply( FUN = c,args)))
  #sisense_result <- lapply(X = retention, FUN = tlv_model)
  return (sisense_result)
  #sisense_result # result vector after "tlv function" calculation.
}

defining a model (linear regression) function

tlv_model <- function(res){
  retentiondf = data.frame(day = c(1,7,14,21,28), retention=res);
  modell = lm(log((retentiondf$retention), base = exp(1)) ~ log(retentiondf$day, base = exp(1)));
  a = exp(modell$coefficients[1]);
  m = modell$coefficients[2];predictedRetentiondf = data.frame(day = 1:360);
  predictedRetentiondf = data.frame(day = predictedRetentiondf$day, pRetention =    a*predictedRetentiondf$day^m);
  lt = sum(predictedRetentiondf$pRetention)*0.55;
  return(lt)
}

simulation of entire model execution

first_results <- tlv(args)
first_results
## [1] 936.4313 349.0783

simulation with different args length

args <- list(c(2, 4), c(3, 5), c(2, 3), c(4, 5), c(4, 1),c(2,7))
second_results <- tlv(args)
first_results; second_results
## [1] 936.4313 349.0783
## [1] 936.4313 349.0783

same results as before - how is that possible ? The new data point (2,7) is not being parsed (parsing is hard coded)

Make the code more generic and robust

tlv <- function (args) {
  
  retention <- data.frame(t(mapply( FUN = c,args)))
  sisense_result <- lapply(X = retention, FUN = tlv_model)
  return (sisense_result)
}

tlv_model <- function(res){
  retentiondf = data.frame(day = seq(1,30, by=round(30/length(args))), retention=res);
  modell = lm(log((retentiondf$retention), base = exp(1)) ~ log(retentiondf$day, base = exp(1)));
  a = exp(modell$coefficients[1]);
  m = modell$coefficients[2];predictedRetentiondf = data.frame(day = 1:360);
  predictedRetentiondf = data.frame(day = predictedRetentiondf$day, pRetention = a*predictedRetentiondf$day^m);
  lt = sum(predictedRetentiondf$pRetention)*0.55;
  return(lt)
}

Let’s run the new code

args <- list(c(2, 4), c(3, 5), c(2, 3), c(4, 5), c(4, 1), c(4,5))
tlv(args)
## $X1
## [1] 1077.17
## 
## $X2
## [1] 475.286