A/B Testing means analyzing two marketing strategies to choose the best marketing strategy that can convert more traffic into sales (or more traffic into your desired goal) effectively and efficiently. A/B testing is one of the valuable concepts that every Data Science professional should know. In this article, I will take you through the task of A/B Testing using Python.
import pandas as pd
import datetime
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
Both files are in csv format;
control_data
## Campaign Name Date ... # of Add to Cart # of Purchase
## 0 Control Campaign 1.08.2019 ... 1819.0 618.0
## 1 Control Campaign 2.08.2019 ... 1219.0 511.0
## 2 Control Campaign 3.08.2019 ... 1134.0 372.0
## 3 Control Campaign 4.08.2019 ... 1183.0 340.0
## 4 Control Campaign 5.08.2019 ... NaN NaN
## 5 Control Campaign 6.08.2019 ... 784.0 764.0
## 6 Control Campaign 7.08.2019 ... 1166.0 499.0
## 7 Control Campaign 8.08.2019 ... 930.0 462.0
## 8 Control Campaign 9.08.2019 ... 645.0 501.0
## 9 Control Campaign 10.08.2019 ... 1629.0 734.0
## 10 Control Campaign 11.08.2019 ... 1887.0 475.0
## 11 Control Campaign 12.08.2019 ... 1439.0 794.0
## 12 Control Campaign 13.08.2019 ... 1794.0 766.0
## 13 Control Campaign 14.08.2019 ... 1339.0 788.0
## 14 Control Campaign 15.08.2019 ... 1641.0 366.0
## 15 Control Campaign 16.08.2019 ... 1613.0 438.0
## 16 Control Campaign 17.08.2019 ... 878.0 222.0
## 17 Control Campaign 18.08.2019 ... 1695.0 243.0
## 18 Control Campaign 19.08.2019 ... 908.0 542.0
## 19 Control Campaign 20.08.2019 ... 1709.0 299.0
## 20 Control Campaign 21.08.2019 ... 1460.0 800.0
## 21 Control Campaign 22.08.2019 ... 819.0 387.0
## 22 Control Campaign 23.08.2019 ... 1913.0 766.0
## 23 Control Campaign 24.08.2019 ... 1146.0 585.0
## 24 Control Campaign 25.08.2019 ... 883.0 386.0
## 25 Control Campaign 26.08.2019 ... 1448.0 251.0
## 26 Control Campaign 27.08.2019 ... 980.0 605.0
## 27 Control Campaign 28.08.2019 ... 1711.0 643.0
## 28 Control Campaign 29.08.2019 ... 1486.0 334.0
## 29 Control Campaign 30.08.2019 ... 442.0 670.0
##
## [30 rows x 10 columns]
test_data
## Campaign Name Date ... # of Add to Cart # of Purchase
## 0 Test Campaign 1.08.2019 ... 894 255
## 1 Test Campaign 2.08.2019 ... 879 677
## 2 Test Campaign 3.08.2019 ... 1268 578
## 3 Test Campaign 4.08.2019 ... 566 340
## 4 Test Campaign 5.08.2019 ... 956 768
## 5 Test Campaign 6.08.2019 ... 882 488
## 6 Test Campaign 7.08.2019 ... 1301 890
## 7 Test Campaign 8.08.2019 ... 1240 431
## 8 Test Campaign 9.08.2019 ... 1200 845
## 9 Test Campaign 10.08.2019 ... 424 275
## 10 Test Campaign 11.08.2019 ... 1075 668
## 11 Test Campaign 12.08.2019 ... 1382 709
## 12 Test Campaign 13.08.2019 ... 1391 812
## 13 Test Campaign 14.08.2019 ... 779 340
## 14 Test Campaign 15.08.2019 ... 1090 398
## 15 Test Campaign 16.08.2019 ... 1059 487
## 16 Test Campaign 17.08.2019 ... 383 238
## 17 Test Campaign 18.08.2019 ... 461 257
## 18 Test Campaign 19.08.2019 ... 788 512
## 19 Test Campaign 20.08.2019 ... 1047 730
## 20 Test Campaign 21.08.2019 ... 278 245
## 21 Test Campaign 22.08.2019 ... 367 276
## 22 Test Campaign 23.08.2019 ... 632 473
## 23 Test Campaign 24.08.2019 ... 327 269
## 24 Test Campaign 25.08.2019 ... 1228 651
## 25 Test Campaign 26.08.2019 ... 346 284
## 26 Test Campaign 27.08.2019 ... 992 771
## 27 Test Campaign 28.08.2019 ... 1009 721
## 28 Test Campaign 29.08.2019 ... 1168 677
## 29 Test Campaign 30.08.2019 ... 1034 572
##
## [30 rows x 10 columns]
print(control_data.head())
## Campaign Name Date ... # of Add to Cart # of Purchase
## 0 Control Campaign 1.08.2019 ... 1819.0 618.0
## 1 Control Campaign 2.08.2019 ... 1219.0 511.0
## 2 Control Campaign 3.08.2019 ... 1134.0 372.0
## 3 Control Campaign 4.08.2019 ... 1183.0 340.0
## 4 Control Campaign 5.08.2019 ... NaN NaN
##
## [5 rows x 10 columns]
control_data.columns = ["Campaign Name", "Date", "Amount Spent", "Number of Impressions", "Reach", "Website Clicks", "Search Received", "Content Viewed", "Added to Cart", "Purchases"]
test_data.columns = ["Campaign Name", "Date", "Amount Spent", "Number of Impressions", "Reach", "Website Clicks", "Search Received", "Content Viewed", "Added to Cart", "Purchases"]
control_data.isnull().sum()
## Campaign Name 0
## Date 0
## Amount Spent 0
## Number of Impressions 1
## Reach 1
## Website Clicks 1
## Search Received 1
## Content Viewed 1
## Added to Cart 1
## Purchases 1
## dtype: int64
test_data.isnull().sum()
## Campaign Name 0
## Date 0
## Amount Spent 0
## Number of Impressions 0
## Reach 0
## Website Clicks 0
## Search Received 0
## Content Viewed 0
## Added to Cart 0
## Purchases 0
## dtype: int64
control_data["Number of Impressions"].fillna(value=control_data["Number of Impressions"].mean(),inplace=True)
control_data["Reach"].fillna(value=control_data["Reach"].mean(), inplace = True)
control_data["Website Clicks"].fillna(value=control_data["Website Clicks"].mean(), inplace = True)
control_data["Search Received"].fillna(value=control_data["Search Received"].mean(), inplace = True)
control_data["Content Viewed"].fillna(value=control_data["Content Viewed"].mean(), inplace = True)
control_data["Added to Cart"].fillna(value=control_data["Added to Cart"].mean(), inplace = True)
control_data["Purchases"].fillna(value=control_data["Purchases"].mean(), inplace = True)
control_data.isnull().sum()
## Campaign Name 0
## Date 0
## Amount Spent 0
## Number of Impressions 0
## Reach 0
## Website Clicks 0
## Search Received 0
## Content Viewed 0
## Added to Cart 0
## Purchases 0
## dtype: int64
ab_data = control_data.merge(test_data, how = "outer").sort_values("Date")
## C:\Users\Punalur\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\pandas\core\reshape\merge.py:1089: UserWarning:
##
## You are merging on int and float columns where the float values are not equal to their int representation
ab_data = ab_data.reset_index(drop = True)
print(ab_data.head())
## Campaign Name Date ... Added to Cart Purchases
## 0 Control Campaign 1.08.2019 ... 1819.0 618.0
## 1 Test Campaign 1.08.2019 ... 894.0 255.0
## 2 Test Campaign 10.08.2019 ... 424.0 275.0
## 3 Control Campaign 10.08.2019 ... 1629.0 734.0
## 4 Test Campaign 11.08.2019 ... 1075.0 668.0
##
## [5 rows x 10 columns]
print(ab_data["Campaign Name"].value_counts())
## Control Campaign 30
## Test Campaign 30
## Name: Campaign Name, dtype: int64
figure = px.scatter(data_frame = ab_data,
x="Number of Impressions",
y="Amount Spent",
size="Amount Spent",
color= "Campaign Name",
trendline = "ols")
figure.show()