The company has displayed a redesigned landing page and wants to assess its effectiveness compared to the old page. The aim is to test whether the new page leads to higher user time spent and conversion rates, and whether language preference plays a role in these outcomes.
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
# Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(tidyr)
library(broom)
## Warning: package 'broom' was built under R version 4.5.2
library(car)
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.2
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.5.2
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.5.2
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
# 2. READ DATA
data <- read.csv("C:/Users/Administrator/Documents/abtest.csv")
data
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## 7 546448 treatment new 5.25 yes
## 8 546581 control old 6.53 yes
## 9 546461 treatment new 10.71 yes
## 10 546548 control old 2.08 no
## 11 546588 control old 6.21 yes
## 12 546546 control old 2.58 yes
## 13 546491 treatment new 5.86 yes
## 14 546478 treatment new 6.03 yes
## 15 546578 control old 8.72 yes
## 16 546466 treatment new 6.27 yes
## 17 546443 treatment new 8.73 no
## 18 546555 control old 0.40 no
## 19 546493 control old 10.30 yes
## 20 546549 control old 3.88 yes
## 21 546560 control old 2.66 no
## 22 546584 control old 7.03 yes
## 23 546450 treatment new 3.65 no
## 24 546475 treatment new 7.02 yes
## 25 546456 treatment new 6.18 no
## 26 546455 treatment new 4.39 no
## 27 546469 treatment new 9.49 yes
## 28 546586 control old 4.05 no
## 29 546471 treatment new 7.81 yes
## 30 546575 control old 4.28 no
## 31 546464 treatment new 5.41 yes
## 32 546556 control old 3.52 yes
## 33 546585 control old 5.39 yes
## 34 546577 control old 4.52 no
## 35 546587 control old 4.46 no
## 36 546552 control old 8.50 yes
## 37 546551 control old 3.13 no
## 38 546557 control old 6.04 yes
## 39 546487 treatment new 1.65 no
## 40 546589 control old 0.19 no
## 41 546559 control old 8.46 yes
## 42 546570 control old 1.92 no
## 43 546489 treatment new 7.16 yes
## 44 546453 treatment new 7.16 yes
## 45 546488 treatment new 3.91 no
## 46 546565 control old 8.02 no
## 47 546460 treatment new 5.37 yes
## 48 546458 treatment new 7.23 yes
## 49 546492 treatment new 8.08 yes
## 50 546473 treatment new 10.50 yes
## 51 546554 control old 0.22 no
## 52 546457 treatment new 5.65 no
## 53 546479 treatment new 6.47 yes
## 54 546576 control old 4.71 no
## 55 546482 treatment new 6.41 yes
## 56 546563 control old 0.93 no
## 57 546569 control old 1.81 no
## 58 546454 treatment new 8.30 yes
## 59 546562 control old 7.40 yes
## 60 546574 control old 9.15 no
## 61 546470 treatment new 6.01 yes
## 62 546467 treatment new 6.79 yes
## 63 546572 control old 4.18 no
## 64 546590 control old 5.47 no
## 65 546553 control old 5.96 yes
## 66 546445 treatment new 7.27 yes
## 67 546545 control old 6.60 yes
## 68 546582 control old 4.75 yes
## 69 546484 treatment new 6.70 no
## 70 546579 control old 2.23 no
## 71 546568 control old 0.40 no
## 72 546476 treatment new 5.42 yes
## 73 546452 treatment new 5.08 yes
## 74 546444 treatment new 7.46 yes
## 75 546591 control old 4.87 no
## 76 546583 control old 6.57 yes
## 77 546573 control old 1.44 no
## 78 546485 treatment new 3.88 no
## 79 546486 treatment new 9.12 yes
## 80 546547 control old 3.21 yes
## 81 546490 treatment new 4.68 no
## 82 546449 treatment new 5.26 yes
## 83 546463 treatment new 5.74 yes
## 84 546580 control old 2.90 no
## 85 546571 control old 4.30 no
## 86 546564 control old 0.91 no
## 87 546465 treatment new 6.71 no
## 88 546480 treatment new 3.68 no
## 89 546447 treatment new 3.30 no
## 90 546561 control old 6.04 yes
## 91 546477 treatment new 5.40 no
## 92 546451 treatment new 8.47 yes
## 93 546566 control old 8.35 no
## 94 546474 treatment new 4.94 no
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
## 7 French
## 8 Spanish
## 9 French
## 10 English
## 11 Spanish
## 12 English
## 13 Spanish
## 14 French
## 15 Spanish
## 16 Spanish
## 17 English
## 18 English
## 19 English
## 20 English
## 21 French
## 22 Spanish
## 23 English
## 24 English
## 25 Spanish
## 26 English
## 27 English
## 28 Spanish
## 29 French
## 30 French
## 31 English
## 32 English
## 33 Spanish
## 34 Spanish
## 35 Spanish
## 36 English
## 37 English
## 38 English
## 39 Spanish
## 40 Spanish
## 41 French
## 42 French
## 43 Spanish
## 44 English
## 45 English
## 46 French
## 47 French
## 48 Spanish
## 49 Spanish
## 50 English
## 51 English
## 52 English
## 53 Spanish
## 54 Spanish
## 55 Spanish
## 56 French
## 57 French
## 58 French
## 59 French
## 60 French
## 61 French
## 62 French
## 63 French
## 64 Spanish
## 65 English
## 66 French
## 67 English
## 68 Spanish
## 69 Spanish
## 70 Spanish
## 71 French
## 72 French
## 73 English
## 74 English
## 75 Spanish
## 76 Spanish
## 77 French
## 78 Spanish
## 79 French
## 80 English
## 81 French
## 82 French
## 83 Spanish
## 84 Spanish
## 85 French
## 86 French
## 87 English
## 88 French
## 89 French
## 90 French
## 91 French
## 92 English
## 93 French
## 94 French
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
# 3. EXPLORATORY DATA ANALYSIS
head(data)
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
tail(data)
## user_id group landing_page time_spent_on_the_page converted
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
# Statistical summary
summary(data)
## user_id group landing_page time_spent_on_the_page
## Min. :546443 Length:100 Length:100 Min. : 0.190
## 1st Qu.:546468 Class :character Class :character 1st Qu.: 3.880
## Median :546493 Mode :character Mode :character Median : 5.415
## Mean :546517 Mean : 5.378
## 3rd Qu.:546567 3rd Qu.: 7.022
## Max. :546592 Max. :10.710
## converted language_preferred
## Length:100 Length:100
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Shape of dataset
dim(data)
## [1] 100 6
# Check missing values
colSums(is.na(data))
## user_id group landing_page
## 0 0 0
## time_spent_on_the_page converted language_preferred
## 0 0 0
# Check duplicates
sum(duplicated(data))
## [1] 0
# 4. UNIVARIATE ANALYSIS
ggplot(data, aes(x = time_spent_on_the_page)) +
geom_histogram(binwidth = 5, fill="blue", color="black") +
labs(title="Distribution of Time Spent on Page")
# Frequency of coversions
table(data$converted)
##
## no yes
## 46 54
# 5. BIVARIATE ANALYSIS
ggplot(data, aes(x = landing_page, y = time_spent_on_the_page, fill = landing_page)) +
geom_boxplot() +
labs(title="Time Spent by Landing Page")
ggplot(data, aes(x = landing_page, fill = as.factor(converted))) +
geom_bar(position="fill") +
labs(title="Conversion Rate by Landing Page", y="Proportion")
# 6. HYPOTHESIS TESTING
# 6.1 Time Spent: Old vs New Page
# Insight:
# If p-value < 0.05, users spend significantly more time on the new page than the old page.
# If p-value > 0.05, there is no evidence that the new page increases time spent.
t_out <- t.test(time_spent_on_the_page ~ landing_page,
data = data,
alternative = "greater")
t_out
##
## Welch Two Sample t-test
##
## data: time_spent_on_the_page by landing_page
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means between group new and group old is greater than 0
## 95 percent confidence interval:
## 0.9485536 Inf
## sample estimates:
## mean in group new mean in group old
## 6.2232 4.5324
# Observation
# The p-value is < 0.05,this shows that the users spend more time on the new page.
# 6.2 Conversion Rate: Old vs New Page
# Insight:
# If p-value < 0.05, the new page has a significantly higher conversion rate than the old page.
# If p-value > 0.05, conversion rates are statistically similar between pages.
data$converted <- ifelse(data$converted == "Yes", 1, 0)
conv_new <- sum(data$converted[data$landing_page == "new"])
conv_old <- sum(data$converted[data$landing_page == "old"])
n_new <- sum(data$landing_page == "new")
n_old <- sum(data$landing_page == "old")
prop.test(x = c(conv_new, conv_old),
n = c(n_new, n_old),
alternative = "greater",
correct = FALSE)
## Warning in prop.test(x = c(conv_new, conv_old), n = c(n_new, n_old),
## alternative = "greater", : Chi-squared approximation may be incorrect
##
## 2-sample test for equality of proportions without continuity correction
##
## data: c(conv_new, conv_old) out of c(n_new, n_old)
## X-squared = NaN, df = 1, p-value = NA
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0 1
## sample estimates:
## prop 1 prop 2
## 0 0
# Observation
# The p-value suggests that the coversion rate is equal.
# 6.3 Conversion vs Language Independence
# Insight:
# If p-value < 0.05, conversion rates differ significantly across languages.
# If p-value > 0.05, language preference does not significantly affect conversion.
table_lang <- table(data$converted, data$language_preferred)
chisq.test(table_lang)
##
## Chi-squared test for given probabilities
##
## data: table_lang
## X-squared = 0.08, df = 2, p-value = 0.9608
# Observation
# The p-value is> 0.05,this shows that language preference does not significantly affect conversion.
# 6.4 Time Spent Across Languages (New Page Only)
# Insight:
# If p-value < 0.05, average time spent differs significantly across languages on the new page.
# If p-value > 0.05, time spent is consistent across languages.
anova_result <- aov(time_spent_on_the_page ~ language_preferred,
data = filter(data, landing_page=="new"))
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred 2 5.68 2.838 0.854 0.432
## Residuals 47 156.10 3.321
# Observation
#The p-value is >0.05,thus time spent is consistent across languages.