knitr::opts_chunk$set(echo = TRUE)
#Problem Statement
#E‑News Express wants to test whether a newly designed landing page leads to higher user engagement and more subscriber conversions than the existing page, using A/B test data on time spent, conversion, and preferred language.
#Objectives
#Use statistical analysis to answer:
#Time Spent: Do users stay longer on the new page?
#Conversion: Does the new page get more subscribers?
#Language vs. Conversion: Does language choice affect subscription rates?
#Language vs. Time: Do different language users spend the same amount of time on the new page?
#importing the dataset
setwd("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder")
data<- read.csv("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder\\abtest[1].csv")
data
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## 7 546448 treatment new 5.25 yes
## 8 546581 control old 6.53 yes
## 9 546461 treatment new 10.71 yes
## 10 546548 control old 2.08 no
## 11 546588 control old 6.21 yes
## 12 546546 control old 2.58 yes
## 13 546491 treatment new 5.86 yes
## 14 546478 treatment new 6.03 yes
## 15 546578 control old 8.72 yes
## 16 546466 treatment new 6.27 yes
## 17 546443 treatment new 8.73 no
## 18 546555 control old 0.40 no
## 19 546493 control old 10.30 yes
## 20 546549 control old 3.88 yes
## 21 546560 control old 2.66 no
## 22 546584 control old 7.03 yes
## 23 546450 treatment new 3.65 no
## 24 546475 treatment new 7.02 yes
## 25 546456 treatment new 6.18 no
## 26 546455 treatment new 4.39 no
## 27 546469 treatment new 9.49 yes
## 28 546586 control old 4.05 no
## 29 546471 treatment new 7.81 yes
## 30 546575 control old 4.28 no
## 31 546464 treatment new 5.41 yes
## 32 546556 control old 3.52 yes
## 33 546585 control old 5.39 yes
## 34 546577 control old 4.52 no
## 35 546587 control old 4.46 no
## 36 546552 control old 8.50 yes
## 37 546551 control old 3.13 no
## 38 546557 control old 6.04 yes
## 39 546487 treatment new 1.65 no
## 40 546589 control old 0.19 no
## 41 546559 control old 8.46 yes
## 42 546570 control old 1.92 no
## 43 546489 treatment new 7.16 yes
## 44 546453 treatment new 7.16 yes
## 45 546488 treatment new 3.91 no
## 46 546565 control old 8.02 no
## 47 546460 treatment new 5.37 yes
## 48 546458 treatment new 7.23 yes
## 49 546492 treatment new 8.08 yes
## 50 546473 treatment new 10.50 yes
## 51 546554 control old 0.22 no
## 52 546457 treatment new 5.65 no
## 53 546479 treatment new 6.47 yes
## 54 546576 control old 4.71 no
## 55 546482 treatment new 6.41 yes
## 56 546563 control old 0.93 no
## 57 546569 control old 1.81 no
## 58 546454 treatment new 8.30 yes
## 59 546562 control old 7.40 yes
## 60 546574 control old 9.15 no
## 61 546470 treatment new 6.01 yes
## 62 546467 treatment new 6.79 yes
## 63 546572 control old 4.18 no
## 64 546590 control old 5.47 no
## 65 546553 control old 5.96 yes
## 66 546445 treatment new 7.27 yes
## 67 546545 control old 6.60 yes
## 68 546582 control old 4.75 yes
## 69 546484 treatment new 6.70 no
## 70 546579 control old 2.23 no
## 71 546568 control old 0.40 no
## 72 546476 treatment new 5.42 yes
## 73 546452 treatment new 5.08 yes
## 74 546444 treatment new 7.46 yes
## 75 546591 control old 4.87 no
## 76 546583 control old 6.57 yes
## 77 546573 control old 1.44 no
## 78 546485 treatment new 3.88 no
## 79 546486 treatment new 9.12 yes
## 80 546547 control old 3.21 yes
## 81 546490 treatment new 4.68 no
## 82 546449 treatment new 5.26 yes
## 83 546463 treatment new 5.74 yes
## 84 546580 control old 2.90 no
## 85 546571 control old 4.30 no
## 86 546564 control old 0.91 no
## 87 546465 treatment new 6.71 no
## 88 546480 treatment new 3.68 no
## 89 546447 treatment new 3.30 no
## 90 546561 control old 6.04 yes
## 91 546477 treatment new 5.40 no
## 92 546451 treatment new 8.47 yes
## 93 546566 control old 8.35 no
## 94 546474 treatment new 4.94 no
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
## 7 French
## 8 Spanish
## 9 French
## 10 English
## 11 Spanish
## 12 English
## 13 Spanish
## 14 French
## 15 Spanish
## 16 Spanish
## 17 English
## 18 English
## 19 English
## 20 English
## 21 French
## 22 Spanish
## 23 English
## 24 English
## 25 Spanish
## 26 English
## 27 English
## 28 Spanish
## 29 French
## 30 French
## 31 English
## 32 English
## 33 Spanish
## 34 Spanish
## 35 Spanish
## 36 English
## 37 English
## 38 English
## 39 Spanish
## 40 Spanish
## 41 French
## 42 French
## 43 Spanish
## 44 English
## 45 English
## 46 French
## 47 French
## 48 Spanish
## 49 Spanish
## 50 English
## 51 English
## 52 English
## 53 Spanish
## 54 Spanish
## 55 Spanish
## 56 French
## 57 French
## 58 French
## 59 French
## 60 French
## 61 French
## 62 French
## 63 French
## 64 Spanish
## 65 English
## 66 French
## 67 English
## 68 Spanish
## 69 Spanish
## 70 Spanish
## 71 French
## 72 French
## 73 English
## 74 English
## 75 Spanish
## 76 Spanish
## 77 French
## 78 Spanish
## 79 French
## 80 English
## 81 French
## 82 French
## 83 Spanish
## 84 Spanish
## 85 French
## 86 French
## 87 English
## 88 French
## 89 French
## 90 French
## 91 French
## 92 English
## 93 French
## 94 French
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
#view the first 6 rows
head(data, 6)
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
#view the first 6 rows
tail(data, 6)
## user_id group landing_page time_spent_on_the_page converted
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
#to check the dataset shape
nrow(data)
## [1] 100
ncol(data)
## [1] 6
#to get statistical summary
summary(data)
## user_id group landing_page time_spent_on_the_page
## Min. :546443 Length:100 Length:100 Min. : 0.190
## 1st Qu.:546468 Class :character Class :character 1st Qu.: 3.880
## Median :546493 Mode :character Mode :character Median : 5.415
## Mean :546517 Mean : 5.378
## 3rd Qu.:546567 3rd Qu.: 7.022
## Max. :546592 Max. :10.710
## converted language_preferred
## Length:100 Length:100
## Class :character Class :character
## Mode :character Mode :character
##
##
##
colSums(is.na(data))
## user_id group landing_page
## 0 0 0
## time_spent_on_the_page converted language_preferred
## 0 0 0
sum(duplicated(data))
## [1] 0
data[duplicated(data), ]
## [1] user_id group landing_page
## [4] time_spent_on_the_page converted language_preferred
## <0 rows> (or 0-length row.names)
# Convert to factors
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
df <- data %>%
mutate(
group = factor(group),
landing_page = factor(landing_page),
converted = factor(converted, levels = c(0,1), labels = c("No","Yes")),
language_preferred = factor(language_preferred)
)
# Univariate
table(data$landing_page)
##
## new old
## 50 50
prop.table(table(data$landing_page))
##
## new old
## 0.5 0.5
table(data$converted)
##
## no yes
## 46 54
prop.table(table(data$converted))
##
## no yes
## 0.46 0.54
table(data$language_preferred)
##
## English French Spanish
## 32 34 34
prop.table(table(data$language_preferred))
##
## English French Spanish
## 0.32 0.34 0.34
summary(data$time_spent_on_the_page)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.190 3.880 5.415 5.378 7.022 10.710
#univariate
ggplot(data, aes(x = time_spent_on_the_page)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
geom_density(aes(y = ..count..), color = "red", size = 1) +
ggtitle("Distribution of Time Spent on the Page")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data, aes(x = "", y = time_spent_on_the_page)) +
geom_boxplot(fill="blue") +
ggtitle("Boxplot of Time Spent on the Page") +
xlab("")

# Bivariate
data %>%
group_by(landing_page) %>%
summarise(
mean_time = mean(time_spent_on_the_page),
sd_time = sd(time_spent_on_the_page),
conv_rate = mean(converted == "Yes")
)
## # A tibble: 2 × 4
## landing_page mean_time sd_time conv_rate
## <chr> <dbl> <dbl> <dbl>
## 1 new 6.22 1.82 0
## 2 old 4.53 2.58 0
data %>%
group_by(language_preferred) %>%
summarise(conv_rate = mean(converted == "Yes"))
## # A tibble: 3 × 2
## language_preferred conv_rate
## <chr> <dbl>
## 1 English 0
## 2 French 0
## 3 Spanish 0
#bivariuate
ggplot(data, aes(x = landing_page, y = time_spent_on_the_page)) +
geom_boxplot()

ggtitle("Time Spent vs Landing Page")
## <ggplot2::labels> List of 1
## $ title: chr "Time Spent vs Landing Page"
#question1
#to get time spent new vs old page
time_old <- data %>%
filter(landing_page == "old") %>%
pull(time_spent_on_the_page)
time_new <- data %>%
filter(landing_page == "new") %>%
pull(time_spent_on_the_page)
# Two-sample t-test, one-sided (new > old), Welch by default
tt <- t.test(time_new, time_old,
alternative = "greater",
var.equal = FALSE)
tt
##
## Welch Two Sample t-test
##
## data: time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.9485536 Inf
## sample estimates:
## mean of x mean of y
## 6.2232 4.5324
#question2
#conversion rate new vs old
tab_page_conv <- table(data$landing_page, data$converted)
tab_page_conv
##
## no yes
## new 17 33
## old 29 21
rownames(tab_page_conv)
## [1] "new" "old"
colnames(tab_page_conv)
## [1] "no" "yes"
n_new <- sum(data$landing_page == 'new')
n_old <- sum(data$landing_page == 'old')
conv_new <- sum(data$landing_page == 'new' & data$converted == 'yes')
conv_old <- sum(data$landing_page == 'old' & data$converted == 'yes')
# Perform Proportion Test
prop_test_result <- prop.test(x = c(conv_new, conv_old), n = c(n_new, n_old), alternative = "greater")
prop_test_result
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(conv_new, conv_old) out of c(n_new, n_old)
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.66 0.42
#question3
#conversion vs language(chi-square)
cont_table <- table(data$converted, data$language_preferred)
cont_table
##
## English French Spanish
## no 11 19 16
## yes 21 15 18
#chisquare
chisq_result <- chisq.test(cont_table)
chisq_result
##
## Pearson's Chi-squared test
##
## data: cont_table
## X-squared = 3.093, df = 2, p-value = 0.213
#question4
#time on new page across languages using ANOVA
data_new <- data %>%
filter(landing_page == "new")
#Check groups
table(data_new$language_preferred)
##
## English French Spanish
## 16 17 17
#ANOVA mode
anova_model <- aov(time_spent_on_the_page ~ language_preferred, data = data_new)
anova_summary<-summary(anova_model)
anova_summary
## Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred 2 5.68 2.838 0.854 0.432
## Residuals 47 156.10 3.321
#Conclusions and Recommendations
#People are spending more time on the new landing page than on the old one concluding the new design is more engaging.
#A higher share of visitors who see the new page end up converting, and this improvement is unlikely to be due to chance.
#The chances of converting and the time spent on the new page look very similar across the different languages, so language does not seem to be a major driver here.
#Recommendations
#Make the new landing page the main page for all users because it keeps people longer and converts better.
#Push more traffic to this new page through ads, promotions, and email campaigns to get the better performance.
#Keep the current language setup, but keep tracking results such as addwing language‑specific tweaks later.