The method introduced below allows us to assess incrementality after implementing the winning variation, when the control data is no longer available to be compared against, since after the test, 100% of traffic is directed to the winning variation.

By using Facebook’s open sourcing forecasting tool Prophet, we were able to forecast the future conversion rate as if no treatment was applied (94.4% accuracy rate according to Y2016 data), based on historial control data from 2012.01.01 to 2017.04.19.

 

Data Preparation

Prepare Observed Data

Load aggregated daily sim order conversion rate from BigQuery for date range: 2014.04.09 - 2017.04.19

observed_17 <- read.csv("gg_simOrder_2017.csv")
observed_1516 <- read.csv("gg_simOrder_2015-16.csv")
observed_14 <- read.csv("gg_simOrder_2014.csv")

 

Fetch aggregated daily sim order conversion rate from GA API for date range: 2012.01.01 - 2014.04.08

Noted that sim order conversion rate is calculated based on sessions that has viewed the test page (sim order page):

  • From Jan 2012 to Apr 2016: giffgaff.com/orders/free-sim
  • From May 2016 onwards: giffgaff.com/free-sim-cards

Query every half a year to mitigate sampling.

#authorisation
ga_auth()

viewID <- 20792028 #0.1 Giffgaff

#Apply a segment of sessions that has viewed the test page (sim order page)
segment_rhydon2_visited <- segment_ga4("gg - visit sim order page", 
                                       segment_id="gaid::ri14N0vtRvOM_Vm5Er421Q") 
#2012-first half
start_date <- "2012-01-01"
end_date <- "2012-05-31"

gg_observed_2012_a <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                         metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                         dimensions = c('ga:Date', 'ga:segment'),
                                         segments = segment_rhydon2_visited,
                                         anti_sample = TRUE)
#2012-sec half
start_date <- "2012-06-01"
end_date <- "2012-12-31"

gg_observed_2012_b <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                         metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                         dimensions = c('ga:Date', 'ga:segment'),
                                         segments = segment_rhydon2_visited,
                                         anti_sample = TRUE)

#2013-first half
start_date <- "2013-01-01"
end_date <- "2013-05-31"

gg_observed_2013_a <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                         metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                         dimensions = c('ga:Date', 'ga:segment'),
                                         segments = segment_rhydon2_visited,
                                         anti_sample = TRUE)

#2013-sec half
start_date <- "2013-06-01"
end_date <- "2013-12-31"

gg_observed_2013_b <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                         metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                         dimensions = c('ga:Date', 'ga:segment'),
                                         segments = segment_rhydon2_visited,
                                         anti_sample = TRUE)

#2014.01.01 - 2014.04.08
start_date <- "2014-01-01"
end_date <- "2014-04-08"

gg_observed_2014_a <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                         metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                         dimensions = c('ga:Date', 'ga:segment'),
                                         segments = segment_rhydon2_visited,
                                         anti_sample = TRUE)

#Stitch together data from 2012.01.01 to 2014.04.08
gg_observed_12_14.4 <- rbind(gg_observed_2012_a, gg_observed_2012_b,
                            gg_observed_2013_a, gg_observed_2013_b,
                            gg_observed_2014_a)

 

Reformat BQ data from GA API data and stitch them together

gg_observed_ga <- gg_observed_12_14.4 %>%
  select(date=Date, conversionRate=goal1ConversionRate)

gg_observed_bq <- observed_14 %>%
  rbind(observed_1516, observed_17) %>%
  select(date, conversionRate) %>%
  mutate(conversionRate = conversionRate*100)

gg_observed_whole <- rbind(gg_observed_ga, gg_observed_bq)
gg_observed_whole$date <- as.Date(gg_observed_whole$date, format = "%Y%m%d")
gg_observed_whole <- arrange(gg_observed_whole, date)

 

Prepare Data During the Rhydon2 Test

Fetch data from GA API and then reformat:

#authorisation
ga_auth()

viewID <- 20792028 #0.1 Giffgaff
start_date <- "2017-02-15"
end_date <- "2017-02-28"

# Segments
segment_rhydon2_original <- segment_ga4("gg - rhydon II Original", 
                                         segment_id="gaid::Yrbq9Do3TWyBlHdIgIP5Rw") 
segment_rhydon2_treatment <- segment_ga4("gg - rhydon II No thanks CTA", 
                                          segment_id="gaid::2z6IzDmXQv2mDOVZjYt2IQ") 

#Fetch raw data
# Original:
gg_rhydon2_original <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                           metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                                           dimensions = c('ga:Date', 'ga:segment'),
                                           segments = segment_rhydon2_original,
                                           anti_sample = TRUE)
gg_rhydon2_original$Date <- as.Date(gg_rhydon2_original$Date, format = "%Y%m%d")
save(gg_rhydon2_original, file="gg_rhydon2_original_goal1")

# No thanks:
gg_rhydon2_treatment <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                                            metrics = c("ga:goal1Completions", 'ga:goal1ConversionRate'), 
                                            dimensions = c('ga:Date', 'ga:segment'),
                                            segments = segment_rhydon2_treatment,
                                            anti_sample = TRUE)
gg_rhydon2_treatment$Date <- as.Date(gg_rhydon2_treatment$Date, format = "%Y%m%d")
save(gg_rhydon2_treatment, file="gg_rhydon2_treatment_goal1")

 

Prepare Obersved Data After the Test

#authorisation
ga_auth()

viewID <- 20792028 #0.1 Giffgaff
start_date <- "2017-03-01"
end_date <- "2017-04-19"

#Apply a segment of sessions that has viewed the test page (sim order page)
segment_rhydon2_visited <- segment_ga4("gg - visit sim order page", 
                                       segment_id="gaid::ri14N0vtRvOM_Vm5Er421Q")

observed_301_419 <- google_analytics_4(viewID, date_range = c(start_date, end_date),
                               metrics = c('ga:goal1Completions', 'ga:goal1ConversionRate'), 
                               dimensions = c('ga:Date', 'ga:segment'),
                               segments = segment_rhydon2_visited,
                               anti_sample = TRUE)

Forecast (if no treatment)

Forecast future sim order conversion rate as if the winning variation (rhydon2) was not implemented to all sessions, using control(original) data from 2012.01.01 - 2017.04.19

gg_observed_whole <- read.csv(file="gg_observed_whole.csv")

gg_observed_whole$ds <- gg_observed_whole$date
gg_observed_whole$y <- gg_observed_whole$conversionRate

gg_observed_whole_mdl <- prophet(gg_observed_whole)
gg_observed_whole_future <- make_future_dataframe(gg_observed_whole_mdl, periods = 365)

gg_observed_whole_forecast <- predict(gg_observed_whole_mdl, gg_observed_whole_future,
                                       level = 0.99)
write.csv(gg_observed_whole_forecast, file="gg_observed_whole_forecast.csv", row.names = F)
#tail(gg_observed_whole_forecast[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')])

Plot the control data - historical (2012.01.01 - 2017.04.19) and forecasted (2017.04.20 - 2018.4.19).

plot(gg_observed_whole_mdl, gg_observed_whole_forecast)

Trend, day of the week seasonality and yearly seasonality:

prophet_plot_components(gg_observed_whole_mdl, gg_observed_whole_forecast)

 

Forecast (if treatment)

Forecast future sim order conversion rate based on the historic data (2012.01.01 - 2017.02.14) and treatment data (2017.02.15 - 2017.04.19)

gg_observed_whole <- read.csv(file="gg_observed_whole.csv")

prior_end <- which(gg_observed_whole$date == "2017-02-14")
gg_prior_treatment <- gg_observed_whole[1:prior_end,]
gg_prior_treatment$date <- as.Date(gg_prior_treatment$date, format = "%Y-%m-%d")

after_start <- which(gg_observed_whole$date == "2017-03-01")
gg_after_treatment <- gg_observed_whole[after_start:nrow(gg_observed_whole),]
gg_after_treatment$date <- as.Date(gg_after_treatment$date, format = "%Y-%m-%d")

load("gg_rhydon2_treatment_goal1")
gg_treatment <- gg_rhydon2_treatment %>%
  dplyr::select(date=Date, conversionRate=goal1ConversionRate)

treatment_forecast <- rbind(gg_prior_treatment, gg_treatment, gg_after_treatment)

treatment_forecast$ds <- treatment_forecast$date
treatment_forecast$y <- treatment_forecast$conversionRate

treatment_forecast_mdl <- prophet(treatment_forecast)
treatment_forecast_future <- make_future_dataframe(treatment_forecast_mdl, periods = 365)

treatment_forecast_forecast <- predict(treatment_forecast_mdl, treatment_forecast_future,
                                       level = 0.99)
write.csv(treatment_forecast_forecast, file="treatment_forecast_forecast.csv", row.names = F)
#tail(treatment_forecast_forecast[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')])

 

Plot the historical data (2012.01.01 - 2017.02.14), treatment (2017.02.15 - 2017.04.19) and forecasted data after implementing the treatment (2017.04.20 - 2018.4.19).

plot(treatment_forecast_mdl, treatment_forecast_forecast)

 

Trend, day of the week seasonality and yearly seasonality:

prophet_plot_components(treatment_forecast_mdl, treatment_forecast_forecast)

 

Forecasted Control vs. Treatment

Hard to tell if there is a uplift from forecasted treatment comparing to control.

Thus we visualise it in a different way…

forecast_control <- read.csv("gg_observed_whole_forecast.csv") 
forecast_treatment <- read.csv("treatment_forecast_forecast.csv")
forecast_control$ds <- as.Date(forecast_control$ds, format="%Y-%m-%d")
forecast_treatment$ds <- as.Date(forecast_treatment$ds, format="%Y-%m-%d")
forecast_control <- forecast_control %>%
  filter(ds > "2017-02-28")
forecast_treatment <- forecast_treatment %>%
  filter(ds > "2017-02-28")

compare2 <- data.frame(date = forecast_control$ds,
                      original = forecast_control$yhat,
                      treatment = forecast_treatment$yhat,
                      original_lower = forecast_control$yhat_lower,
                      original_upper = forecast_control$yhat_upper,
                      treatment_lower = forecast_treatment$yhat_lower,
                      treatment_upper = forecast_treatment$yhat_upper,
                      original_trend_lower = forecast_control$trend_lower,
                      original_trend_upper = forecast_control$trend_upper,
                      treatment_trend_lower = forecast_treatment$trend_lower,
                      treatment_trend_upper = forecast_treatment$trend_upper) %>%
  mutate(diff_yhat = treatment - original)
                     

ggplot(compare2, aes(date)) + 
  geom_line(aes(y = diff_yhat), size=1.05) +
  ylab('Forecasted Uplift in CR') +
  ggtitle("Sim Order Conversion Rate Forecasted Uplift")

On April 19th 2018, the control was forecasted to have a conversiton rate between 20.39% and 35.09%. The test moved up this forecast to a band between 21.07% and 36.49%.

Noted that the forecasted figure are slightly different each time we run the script, but overall the conclusion that treatment had an uplift in CR stays the same. The difference in forecasted figures are normal because Prophet is simulating the future CR based on trend, seasonality, variance and potantially other stuff, but there is still some randomness in there.

 

Evaluate Forecast Accuracy

Evaluation - Forecast Accuracy Y2016

Here we evaluate the forecasting accuracy of Prophet by using Y2012-Y2015 daily conversion rate to predict Y2016’s daily conversion rate (pretend we don’t know the 2016 data), then comparing the forecasted Y2016 data with the real Y2016 data.

#Get the real Y2016 daily conversion rate.
start1 <- which(gg_observed_whole$date=="2016-01-01")
end1 <- which(gg_observed_whole$date=="2016-12-31")
gg_rhydon2_whole_check <- gg_observed_whole[start1:end1,]

#Use Y2012-15 data to forecast Y2016
start2 <- which(gg_observed_whole_forecast$ds=="2016-01-01")
end2 <- which(gg_observed_whole_forecast$ds=="2016-12-31")
gg_observed_whole$ds <- gg_observed_whole$date
gg_observed_whole$y <- gg_observed_whole$conversionRate
gg_observed_whole_mdl_check <- prophet(gg_observed_whole[1:(start2-1),])
gg_observed_whole_future_check <- make_future_dataframe(gg_observed_whole_mdl_check, periods = 367)

gg_observed_whole_forecast_check <- predict(gg_observed_whole_mdl_check, gg_observed_whole_future_check)

#Compare the forecast against real data
accuracy <- data.frame(date = gg_observed_whole_forecast_check$ds[start2:end2],
                       forecast = gg_observed_whole_forecast_check$yhat[start2:end2], 
                       real = gg_rhydon2_whole_check$conversionRate) %>%
  mutate(accuracy = abs(forecast-real)/real)
accuracy_avg_original <- mean(accuracy$accuracy)
cat("Original data Y2016 average forecasting accuracy for Prophet is:", 
    sprintf("%1.2f%%", 100*(1-accuracy_avg_original)))

 

Visualisation - Forecast Accuracy Y2016

ggplot(accuracy, aes(date)) + 
  geom_line(aes(y = forecast, colour = "Forecasted"), size=1.02) +
  geom_line(aes(y = real, colour = "Real"), size=1.02) + 
  xlab('Date') +
  ylab('Sim Order Conversion Rate') +
  ggtitle(paste("Average Forecasting Accuracy: ", 
                sprintf("%1.2f%%", 100*(1-accuracy_avg_original))))

 

Evaluation - Forecast Accuracy 2017.03.01 - 2017.04.19

#Get the real daily conversion rate.
start1 <- which(gg_observed_whole$date=="2017-03-01")
end1 <- which(gg_observed_whole$date=="2017-04-19")
gg_treatment_check <- gg_observed_whole[start1:end1,]

#Get the predicted daily conversion rate after implementing the treatment
start2 <- which(treatment_forecast_forecast$ds=="2017-03-01")
end2 <- which(treatment_forecast_forecast$ds=="2017-04-19")
gg_treatment_predicted_check <- treatment_forecast_forecast[start2:end2,]

#Compare the forecast against real data
accuracy <- data.frame(date = gg_treatment_check$date,
                       forecast = gg_treatment_predicted_check$yhat, 
                       real = gg_treatment_check$conversionRate) %>%
  mutate(accuracy = abs(forecast-real)/real)
accuracy_avg_original <- mean(accuracy$accuracy)
cat("2017.03.01 - 2017.04.19 treatment forecasting average accuracy for Prophet is:", 
    sprintf("%1.2f%%", 100*(1-accuracy_avg_original)))

 

Visualisation - Forecast Accuracy 2017.03.01 - 2017.04.19

accuracy$date <- as.Date(accuracy$date, format = "%Y-%m-%d")
ggplot(accuracy, aes(date)) + 
  geom_line(aes(y = forecast, colour = "Forecasted"), size=1.02) +
  geom_line(aes(y = real, colour = "Real"), size=1.02) + 
  xlab('Date') +
  ylab('Sim Order Conversion Rate') +
  ggtitle(paste("Average Forecasting Accuracy: ", 
                sprintf("%1.2f%%", 100*(1-accuracy_avg_original))))

 

Assess Incrementality the new way

Calculate incrementality by comparing observed(after deploying treatment 100%) against forecast(if no treatment is applied).

#Retrive data prepared in the first section
load("gg_rhydon2_original_goal1")
load("gg_rhydon2_treatment_goal1")
observed_301_419 <- read.csv(file="gg_observed_306-419.csv")

#Extract and reformat forecast data from 2017.03.01 to 2017.04.19
start_index <- which(gg_observed_whole_forecast$ds=="2017-03-01")
end_index <- which(gg_observed_whole_forecast$ds=="2017-04-19")
forecast <- gg_observed_whole_forecast[start_index:end_index,]
forecast2 <- forecast[,c("ds", "yhat")] 
colnames(forecast2) <- c("Date", "goal1ConversionRate")

#Extract and reformat observed data from 2017.03.01 to 2017.04.19
observed_301_419$Date <- as.Date(as.character(observed_301_419$Date), format = "%Y%m%d")
observed2 <- observed_301_419[,c("Date", "goal1ConversionRate")]

#Stitch up control/treatment with after test data (forecasted/observed)
control_forecast <- rbind(gg_rhydon2_original[,c(1,4)], forecast2) %>%
  mutate(group = "control") 

treatment_observed <- rbind(gg_rhydon2_treatment[,c(1,4)], observed2) %>%
  mutate(group = "treatment") 

#Calculate incrementality by comparing observed(after deploying treatment 100%) against forecast(if no treatment is applied)
compare <- control_forecast[,1:2] %>%
  inner_join(treatment_observed[,1:2], by="Date", suffix = c(".control", ".treatment")) %>%
  mutate(incrementality = goal1ConversionRate.treatment - goal1ConversionRate.control,
         cumDiff = cumsum(incrementality)) %>%
  dplyr::select(date = Date,
         treatment = goal1ConversionRate.treatment,
         control = goal1ConversionRate.control,
         incrementality, cumDiff)

 

Visualise Incrementality

Control+Forecasted vs.Treatment+Observed:

forecast_upper <- c(gg_rhydon2_original$goal1ConversionRate, forecast$yhat_upper)
forecast_lower <- c(gg_rhydon2_original$goal1ConversionRate, forecast$yhat_lower)
forecast_band <- data.frame(date=control_forecast$Date,
                            forecast_lower,
                            forecastCR = control_forecast$goal1ConversionRate,
                            forecast_upper)

ggplot(compare, aes(date)) + 
  geom_line(aes(y = control, colour = "Control/Forecasted"), size=1.05) +
  geom_ribbon(data=forecast_band,aes(ymin=forecast_lower,ymax=forecast_upper),alpha=0.2) +
  geom_line(aes(y = treatment, colour = "Treatment/Observed"), size=1.05) + 
  geom_vline(xintercept=as.numeric(compare$date[which(compare$date=="2017-03-01")]), 
             linetype="dashed", color = "#99A3A4", size=1.1) +
  xlab('Date') +
  ylab('Sim Order Conversion Rate') +
  ggtitle("Sim Order Conversion Rate: Control+Forecasted vs.Treatment+Observed") 

 

Incrementality:

ggplot(compare, aes(x=date, y=incrementality)) +
  geom_line(color="#2E86C1") + 
  geom_point(alpha=.3) +
  geom_smooth(alpha=.2, size=1, color="#7FB3D5") +
  geom_vline(xintercept=as.numeric(compare$date[which(compare$date=="2017-03-01")]), 
             linetype="dashed", color = "#99A3A4", size=1.1) +
  geom_hline(yintercept=0, linetype="dotdash", color="#CD6155", size=1.05) + 
  xlab('Date') +
  ylab('Incrementality') +
  ggtitle("Sim Order Incrementality: Predicted - Observed")

 

Accumulated Incrementality:

ggplot(compare, aes(x=date, y=cumDiff, colour=cumDiff)) +
  geom_line(color="#2E86C1") + 
  geom_vline(xintercept=as.numeric(compare$date[which(compare$date=="2017-03-01")]), 
             linetype="dashed", color = "#99A3A4", size=1.1) +
  xlab('Date') +
  ylab('Accumulated Incrementality') +
  ggtitle("Sim Order Accumulated Incrementality")

 

Statistical Significance

Here we perform a one-sided two sample t test to assess whether the uplift in SIM order conversion rate after implementing the Rhydon2 is statistically significant.

Null hypothesis: SIM order conversion rate after implementing Rhydon2 is not significantally better than the forecasted original.

Alternative hypothesis: SIM order conversion rate after implementing Rhydon2 is significantally better than the forecasted original.

mdl <- t.test(observed2$goal1ConversionRate, forecast2$goal1ConversionRate,
              alternative = "greater")
mdl
## 
##  Welch Two Sample t-test
## 
## data:  observed2$goal1ConversionRate and forecast2$goal1ConversionRate
## t = 2.6107, df = 79.451, p-value = 0.005399
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.3630795       Inf
## sample estimates:
## mean of x mean of y 
##  33.27703  32.27549

P value is 0.0053988, which means there is a 0.54% probablity that the treatment uplift occurred by chance. Since p value is small enough, we can safely reject the null hypothesis and conclude that the incremental uplift after implementing the winning varition is statistically significant.