library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
campaignData <- read_xlsx("Rocket fuel excel B5896-XLS-ENG.xlsx", sheet = "2017_9_rocketfuel_B5896", col_names = TRUE)
str(campaignData)
## tibble [588,101 x 6] (S3: tbl_df/tbl/data.frame)
## $ user_id : num [1:588101] 1069124 1119715 1144181 1435133 1015700 ...
## $ test : num [1:588101] 1 1 1 1 1 1 1 1 1 1 ...
## $ converted : num [1:588101] 0 0 0 0 0 0 0 0 0 0 ...
## $ tot_impr : num [1:588101] 130 93 21 355 276 734 264 17 21 142 ...
## $ mode_impr_day : num [1:588101] 1 2 2 2 5 6 3 7 2 1 ...
## $ mode_impr_hour: num [1:588101] 20 22 18 10 14 10 13 18 19 14 ...
campaignData$userType <- "Exposed Group"
campaignData$userType [campaignData$test == 0] <- "Control Group"
campaignData$userBought <- "Yes"
campaignData$userBought [campaignData$converted == 0] <- "No"
campaignData$dayOfMostImpressions <- "Sunday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 1] <- "Monday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 2] <- "Tuesday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 3] <- "Wednesday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 4] <- "Thursday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 5] <- "Friday"
campaignData$dayOfMostImpressions [campaignData$mode_impr_day == 6] <- "Saturday"
campaignData$userType <- factor(campaignData$userType, levels = c("Control Group", "Exposed Group"))
campaignData$userBought <- factor(campaignData$userBought, levels = c("Yes", "No"))
campaignData$dayOfMostImpressions <- factor (campaignData$dayOfMostImpressions, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
t.test(campaignData$converted~campaignData$userType, alternative = "less", var.equal = TRUE)
##
## Two Sample t-test
##
## data: campaignData$converted by campaignData$userType
## t = -7.3704, df = 588099, p-value = 8.517e-14
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -0.005975726
## sample estimates:
## mean in group Control Group mean in group Exposed Group
## 0.01785411 0.02554656
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = userType) +
geom_bar(fill = "#ef562d") +
geom_text(stat = "count", aes(label=after_stat(count)), vjust = "inward") +
labs(title = "Converted Users by Group") +
theme_minimal()
totalUsers <- nrow(campaignData) # Total users targeted in the campaign
noOfEGUsers <- nrow(subset(campaignData, campaignData$test == 1)) # No. of users in Exposed Group
noOfConvertedEGUsers <- nrow(subset(campaignData, campaignData$test == 1 & campaignData$converted == 1)) # No. of Converted users in Exposed Group
pctConvertedinEG <- (noOfConvertedEGUsers / noOfEGUsers) * 100 # Percentage of converted users in Exposed Group
noOfCGUsers <- totalUsers - noOfEGUsers # No. of users in Control Group
noOfConvertedCGUsers <- nrow(subset(campaignData, campaignData$test == 0 & campaignData$converted == 1)) # No. of Converted users in Control Group
pctConvertedinCG <- (noOfConvertedCGUsers / noOfCGUsers) * 100 # Percentage of converted users in Control Group
diffConversionPct <- pctConvertedinEG - pctConvertedinCG # Difference in conversion percentage between two groups
noOfEGUsersConvertedDueToCampaign <- (diffConversionPct / 100) * noOfEGUsers # No. of users from Exposed Group that would have converted due to the campaign
pctControlGroup <- (noOfCGUsers / totalUsers) * 100 # Percentage of Control Group in all users
As we can see from above numbers, we have extra converted users from Exposed Group due to the campaign. Hence we can again conclude that the campaign was effective.
Assuming the same percentage of users as in Control Group would have converted from Exposed Group without the campaign, the worth of extra converted users from Exposed Group due to the campaign is the more money that TaskaBella made.
moneyMadeFromCampaign <- noOfEGUsersConvertedDueToCampaign * 40
totalImpressions <- sum(campaignData$tot_impr) # Find the total number of impressions served during the campaign
costOfCampaign <- (totalImpressions / 1000) * 9 # Cost per thousand impressions (CPM) is $9
profitFromCampaign <- moneyMadeFromCampaign - costOfCampaign
roiOfCampaign <- (profitFromCampaign / costOfCampaign) * 100
If there was no control group, 0.77% users from control group would have converted. The worth of these users is the opportunity cost.
opportunityCost <- (diffConversionPct / 100) * noOfCGUsers * 40
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = tot_impr) +
geom_histogram(bins = 30L, fill = "#ef562d") +
labs(x = "Total Impressions", y = "Converted Users", title = "Histogram of Total Impressions vs Converted Users", subtitle = "Irrespective of the Group") +
theme_minimal()
As we can see most of the conversions happened until 250 impressions. So, let us zoom in and see a better histogram until that range
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = tot_impr) +
geom_histogram(bins = 30L, fill = "#ef562d") +
labs(x = "Total Impressions", y = "Converted Users", title = "Histogram of Total Impressions vs Converted Users", subtitle = "Irrespective of the Group with up to 250 impressions served") +
theme_minimal() +
xlim(0L, 250L)
## Warning: Removed 577 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
We can see that most of the conversions happened within the range of 1 - 100 impressions.
Now, let us see the conversions by Group.
Histogram of converted users in Control Group
campaignData %>%
filter(userType %in% "Control Group") %>%
filter(userBought %in%
"Yes") %>%
ggplot() +
aes(x = tot_impr) +
geom_histogram(bins = 30L, fill = "#ef562d") +
labs(x = "Total Impressions", y = "Converted Users", title = "Histogram of Total Impressions vs Converted Users", subtitle = "Control Group") +
theme_minimal()
Histogram of converted users in Exposed Group with upto 250 impressions
campaignData %>%
filter(userType %in% "Exposed Group") %>%
filter(userBought %in%
"Yes") %>%
ggplot() +
aes(x = tot_impr) +
geom_histogram(bins = 30L, fill = "#ef562d") +
labs(x = "Total Impressions", y = "Converted Users", title = "Histogram of Total Impressions vs Converted Users", subtitle = "Exposed Group with up to 250 impressions") +
theme_minimal() +
xlim(0L, 250L)
## Warning: Removed 554 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
As we can see in both groups, most of the conversions are happening under the range of 250 impressions. So, let us create a data frame with No. of impressions (in multiples of 30), No. of users in that impressions range, No. of converted users in that range, total no. of impressions served in that range. Using that let us understand the conversion rates and further analyze the campaign results.
campaignData$ImpressionRange <- "271+"
campaignData$ImpressionRange[campaignData$tot_impr <= 30] <- "1-30"
campaignData$ImpressionRange[campaignData$tot_impr > 30 & campaignData$tot_impr <= 60] <- "31-60"
campaignData$ImpressionRange[campaignData$tot_impr > 60 & campaignData$tot_impr <= 90] <- "61-90"
campaignData$ImpressionRange[campaignData$tot_impr > 90 & campaignData$tot_impr <= 120] <- "91-120"
campaignData$ImpressionRange[campaignData$tot_impr > 120 & campaignData$tot_impr <= 150] <- "121-150"
campaignData$ImpressionRange[campaignData$tot_impr > 150 & campaignData$tot_impr <= 180] <- "151-180"
campaignData$ImpressionRange[campaignData$tot_impr > 180 & campaignData$tot_impr <= 210] <- "181-210"
campaignData$ImpressionRange[campaignData$tot_impr > 210 & campaignData$tot_impr <= 240] <- "211-240"
campaignData$ImpressionRange[campaignData$tot_impr > 240 & campaignData$tot_impr <= 270] <- "241-270"
campaignData$ImpressionRange <- factor(campaignData$ImpressionRange, levels = c("1-30", "31-60", "61-90", "91-120", "121-150", "151-180", "181-210", "211-240", "241-270", "271+"))
impressionGroups <- group_by(campaignData, ImpressionRange)
campaignSummary <- summarise(impressionGroups, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
campaignSummary$ConversionRate <- (campaignSummary$Conversions / campaignSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
campaignSummary$ImprPerConversion <- campaignSummary$TotalNoOfImpressions / campaignSummary$Conversions
kable(campaignSummary, format = "html")
| ImpressionRange | Users | Conversions | TotalNoOfImpressions | ConversionRate | ImprPerConversion |
|---|---|---|---|---|---|
| 1-30 | 457381 | 3151 | 4770365 | 0.6889224 | 1513.9210 |
| 31-60 | 77335 | 3823 | 3261125 | 4.9434279 | 853.0277 |
| 61-90 | 25590 | 3187 | 1875987 | 12.4540836 | 588.6373 |
| 91-120 | 11293 | 1926 | 1171092 | 17.0548127 | 608.0436 |
| 121-150 | 5825 | 1037 | 779726 | 17.8025751 | 751.9055 |
| 151-180 | 3304 | 567 | 543154 | 17.1610169 | 957.9436 |
| 181-210 | 2024 | 323 | 394103 | 15.9584980 | 1220.1331 |
| 211-240 | 1393 | 202 | 312790 | 14.5010768 | 1548.4653 |
| 241-270 | 878 | 139 | 223135 | 15.8314351 | 1605.2878 |
| 271+ | 3078 | 488 | 1265705 | 15.8544509 | 2593.6578 |
ggplot(campaignSummary) +
aes(x = ImpressionRange, weight = Conversions) +
geom_bar(fill = "#ef562d") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = ImpressionRange, y = ImprPerConversion), size = 1, color = "black", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Impressions Range vs Converted Users and Impressions per Conversion") +
theme_minimal()
ggplot(campaignSummary) +
geom_line(aes(x = ImpressionRange, y = ConversionRate), size = 1, color = "#ef562d", group = 1) +
geom_text(aes(x = ImpressionRange, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "inward") +
labs(title = "Impressions Range vs Conversion Rate") +
theme_minimal()
The above analysis / charts don’t distinguish between Exposed Group and Control Group. Let us do this by group and interpret the results.
# Extract Exposed Group Data and analyze
exposedGroupData <- subset(campaignData, campaignData$test == 1)
exposedImpressionGroups <- group_by(exposedGroupData, ImpressionRange)
exposedCampaignSummary <- summarise(exposedImpressionGroups, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
exposedCampaignSummary$ConversionRate <- (exposedCampaignSummary$Conversions / exposedCampaignSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
exposedCampaignSummary$ImprPerConversion <- exposedCampaignSummary$TotalNoOfImpressions / exposedCampaignSummary$Conversions
kable(exposedCampaignSummary, format = "html")
| ImpressionRange | Users | Conversions | TotalNoOfImpressions | ConversionRate | ImprPerConversion |
|---|---|---|---|---|---|
| 1-30 | 438983 | 3026 | 4586696 | 0.6893205 | 1515.7621 |
| 31-60 | 74470 | 3726 | 3139252 | 5.0033571 | 842.5260 |
| 61-90 | 24538 | 3125 | 1798915 | 12.7353493 | 575.6528 |
| 91-120 | 10819 | 1885 | 1121799 | 17.4230520 | 595.1188 |
| 121-150 | 5567 | 1005 | 745123 | 18.0528112 | 741.4159 |
| 151-180 | 3151 | 550 | 517931 | 17.4547763 | 941.6927 |
| 181-210 | 1918 | 312 | 373293 | 16.2669447 | 1196.4519 |
| 211-240 | 1334 | 192 | 299489 | 14.3928036 | 1559.8385 |
| 241-270 | 842 | 131 | 214014 | 15.5581948 | 1633.6947 |
| 271+ | 2955 | 471 | 1218189 | 15.9390863 | 2586.3885 |
ggplot(exposedCampaignSummary) +
aes(x = ImpressionRange, weight = Conversions) +
geom_bar(fill = "#ef562d") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = ImpressionRange, y = ImprPerConversion), size = 1, color = "black", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Impressions Range vs Converted Users and Impressions per Conversion", subtitle = "Exposed Group") +
theme_minimal()
ggplot(exposedCampaignSummary) +
geom_line(aes(x = ImpressionRange, y = ConversionRate), size = 1, color = "#ef562d", group = 1) +
geom_text(aes(x = ImpressionRange, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "inward") +
labs(title = "Impressions Range vs Conversion Rate", subtitle = "Exposed Group") +
theme_minimal()
# Extract Control Group Data and analyze
controlGroupData <- subset(campaignData, campaignData$test == 0)
controlImpressionGroups <- group_by(controlGroupData, ImpressionRange)
controlCampaignSummary <- summarise(controlImpressionGroups, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
controlCampaignSummary$ConversionRate <- (controlCampaignSummary$Conversions / controlCampaignSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
controlCampaignSummary$ImprPerConversion <- controlCampaignSummary$TotalNoOfImpressions / controlCampaignSummary$Conversions
kable(controlCampaignSummary, format = "html")
| ImpressionRange | Users | Conversions | TotalNoOfImpressions | ConversionRate | ImprPerConversion |
|---|---|---|---|---|---|
| 1-30 | 18398 | 125 | 183669 | 0.6794217 | 1469.352 |
| 31-60 | 2865 | 97 | 121873 | 3.3856894 | 1256.423 |
| 61-90 | 1052 | 62 | 77072 | 5.8935361 | 1243.097 |
| 91-120 | 474 | 41 | 49293 | 8.6497890 | 1202.268 |
| 121-150 | 258 | 32 | 34603 | 12.4031008 | 1081.344 |
| 151-180 | 153 | 17 | 25223 | 11.1111111 | 1483.706 |
| 181-210 | 106 | 11 | 20810 | 10.3773585 | 1891.818 |
| 211-240 | 59 | 10 | 13301 | 16.9491525 | 1330.100 |
| 241-270 | 36 | 8 | 9121 | 22.2222222 | 1140.125 |
| 271+ | 123 | 17 | 47516 | 13.8211382 | 2795.059 |
ggplot(controlCampaignSummary) +
aes(x = ImpressionRange, weight = Conversions) +
geom_bar(fill = "#ef562d") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = ImpressionRange, y = ImprPerConversion), size = 1, color = "black", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Impressions Range vs Converted Users and Impressions per Conversion", subtitle = "Control Group") +
theme_minimal()
ggplot(controlCampaignSummary) +
geom_line(aes(x = ImpressionRange, y = ConversionRate), size = 1, color = "#ef562d", group = 1) +
geom_text(aes(x = ImpressionRange, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "inward") +
labs(title = "Impressions Range vs Conversion Rate", subtitle = "Control Group") +
theme_minimal()
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = dayOfMostImpressions) +
geom_bar(fill = "#238b45") +
geom_text(stat = "count", aes(label=after_stat(count)), vjust = "inward") +
labs(title = "Converted Users by Day of Week", subtitle = "Irrespective of the Group") +
theme_minimal()
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = mode_impr_hour) +
geom_histogram(bins = 50L, fill = "#238b45") +
geom_text(stat = "count", aes(label=after_stat(count)), vjust = "inward") +
labs(title = "Conversions by Time", subtitle = "Irrespective of the Group") +
theme_minimal()
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = dayOfMostImpressions, y = mode_impr_hour) +
geom_boxplot(fill = "#238b45") +
labs(title = "Converted Users by Day of Week and Time during the day", subtitle = "Irrespective of the Group") +
theme_minimal()
campaignData %>%
filter(userBought %in% "Yes") %>%
ggplot() +
aes(x = dayOfMostImpressions, y = mode_impr_hour) +
geom_boxplot(fill = "#238b45") +
labs(title = "Converted Users by Day of Week and Time during the day", subtitle = "Irrespective of the Group (excluding midnight to 8 AM)") +
theme_minimal() +
ylim(8L, 23L)
## Warning: Removed 420 rows containing non-finite values (stat_boxplot).
The above charts aren’t segregated by group. Let us segregate the data by group and analyze the conversion rates as well.
exposedGroupWeekData <- group_by(exposedGroupData, dayOfMostImpressions)
exposedGroupWeekSummary <- summarise(exposedGroupWeekData, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
exposedGroupWeekSummary$ConversionRate <- (exposedGroupWeekSummary$Conversions / exposedGroupWeekSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
exposedGroupWeekSummary$ImprPerConversion <- exposedGroupWeekSummary$TotalNoOfImpressions / exposedGroupWeekSummary$Conversions
exposedGroupTimeData <- group_by(exposedGroupData, mode_impr_hour)
exposedGroupTimeSummary <- summarise(exposedGroupTimeData, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
exposedGroupTimeSummary$ConversionRate <- (exposedGroupTimeSummary$Conversions / exposedGroupTimeSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
exposedGroupTimeSummary$ImprPerConversion <- exposedGroupTimeSummary$TotalNoOfImpressions / exposedGroupTimeSummary$Conversions
ggplot(exposedGroupWeekSummary) +
aes(x = dayOfMostImpressions, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = dayOfMostImpressions, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . / 2, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions and Impressions per Conversion by Week Day", subtitle = "Exposed Group", x = "Week Day", y = "Conversions") +
theme_minimal()
ggplot(exposedGroupWeekSummary) +
aes(x = dayOfMostImpressions, weight = ConversionRate) +
geom_line(aes(x = dayOfMostImpressions, y = ConversionRate), size = 1, color = "red", group = 1) +
geom_text(aes(x = dayOfMostImpressions, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "inward") +
labs(title = "Conversion Rate by Week Day", subtitle = "Exposed Group", x = "Week Day", y = "Conversion Rate") +
theme_minimal()
ggplot(exposedGroupTimeSummary) +
aes(x = mode_impr_hour, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = mode_impr_hour, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions by Hour of the Day", subtitle = "Exposed Group") +
theme_minimal()
ggplot(exposedGroupTimeSummary) +
aes(x = mode_impr_hour, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = mode_impr_hour, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions by Hour of the Day", subtitle = "Exposed Group") +
theme_minimal() +
xlim(8L, 23L)
## Warning: Removed 8 rows containing non-finite values (stat_count).
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 8 rows containing missing values (geom_text).
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 8 rows containing missing values (geom_text).
ggplot(exposedGroupTimeSummary) +
aes(x = mode_impr_hour, y = ConversionRate) +
geom_line(size = 1L, colour = "#cb181d") +
geom_text(aes(x = mode_impr_hour, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "outward") +
labs(x = "Hour", y = "Conversion Rate", title = "Conversion Rate by Hour of the Day", subtitle = "Exposed Group") +
theme_minimal()
controlGroupWeekData <- group_by(controlGroupData, dayOfMostImpressions)
controlGroupWeekSummary <- summarise(controlGroupWeekData, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
controlGroupWeekSummary$ConversionRate <- (controlGroupWeekSummary$Conversions / controlGroupWeekSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
controlGroupWeekSummary$ImprPerConversion <- controlGroupWeekSummary$TotalNoOfImpressions / controlGroupWeekSummary$Conversions
controlGroupTimeData <- group_by(controlGroupData, mode_impr_hour)
controlGroupTimeSummary <- summarise(controlGroupTimeData, Users = n(), Conversions = sum(converted), TotalNoOfImpressions = sum(tot_impr))
## `summarise()` ungrouping output (override with `.groups` argument)
# Calculate the conversion rate which is percentage of users converted in the impression range
controlGroupTimeSummary$ConversionRate <- (controlGroupTimeSummary$Conversions / controlGroupTimeSummary$Users) * 100
# Calculate the no. of impressions served per conversion in the same impression ranges
controlGroupTimeSummary$ImprPerConversion <- controlGroupTimeSummary$TotalNoOfImpressions / controlGroupTimeSummary$Conversions
ggplot(controlGroupWeekSummary) +
aes(x = dayOfMostImpressions, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = dayOfMostImpressions, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . / 2, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions and Impressions per Conversion by Week Day", subtitle = "Control Group", x = "Week Day", y = "Conversions") +
theme_minimal()
ggplot(controlGroupWeekSummary) +
aes(x = dayOfMostImpressions, weight = ConversionRate) +
geom_line(aes(x = dayOfMostImpressions, y = ConversionRate), size = 1, color = "red", group = 1) +
geom_text(aes(x = dayOfMostImpressions, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "inward") +
labs(title = "Conversion Rate by Week Day", subtitle = "Control Group", x = "Week Day", y = "Conversion Rate") +
theme_minimal()
ggplot(controlGroupTimeSummary) +
aes(x = mode_impr_hour, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = mode_impr_hour, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions by Hour of the Day", subtitle = "Control Group") +
theme_minimal()
ggplot(controlGroupTimeSummary) +
aes(x = mode_impr_hour, weight = Conversions) +
geom_bar(fill = "#238b45") +
geom_text(aes(y = Conversions, label = Conversions), vjust = "inward") +
geom_line(aes(x = mode_impr_hour, y = ImprPerConversion), size = 1, color = "red", group = 1) +
scale_y_continuous(name = "Conversions", sec.axis = sec_axis(~ . * 1, name = "Impressions per Conversion")) +
geom_text(aes(y = ImprPerConversion, label = round(ImprPerConversion)), vjust = "outward") +
labs(title = "Conversions by Hour of the Day", subtitle = "Control Group") +
theme_minimal() +
xlim(8L, 23L)
## Warning: Removed 8 rows containing non-finite values (stat_count).
## Warning: Removed 2 rows containing missing values (geom_bar).
## Warning: Removed 8 rows containing missing values (geom_text).
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 8 rows containing missing values (geom_text).
ggplot(controlGroupTimeSummary) +
aes(x = mode_impr_hour, y = ConversionRate) +
geom_line(size = 1L, colour = "#cb181d") +
geom_text(aes(x = mode_impr_hour, y = ConversionRate, label = format(ConversionRate, digits = 2, format = "f")), vjust = "outward") +
labs(x = "Hour", y = "Conversion Rate", title = "Conversion Rate by Hour of the Day", subtitle = "Control Group") +
theme_minimal()