knitr::opts_chunk$set(echo = TRUE)
#Problem Statement
#E-news Express has declining subscriber growth. They need to verify if their New Landing Page is more effective than the old one at engaging users and getting new subscribers.
#Objectives
#Use statistical analysis to answer:
#Time Spent: Do users stay longer on the new page?
#Conversion: Does the new page get more subscribers?
#Language vs. Conversion: Does language choice affect subscription rates?
#Language vs. Time: Do different language users spend the same amount of time on the new page?
# Install
install.packages(c("tidyverse","ggplot2", "broom","dplyr", "effectsize"), repos = "http://cran.us.r-project.org")
## Installing packages into 'C:/Users/ADMIN/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## also installing the dependency 'performance'
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): URL
## 'https://lib.stat.cmu.edu/R/CRAN/bin/windows/contrib/4.5/ggplot2_4.0.1.zip':
## Timeout of 60 seconds was reached
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): URL
## 'https://lib.stat.cmu.edu/R/CRAN/bin/windows/contrib/4.5/broom_1.0.11.zip':
## Timeout of 60 seconds was reached
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): some
## files were not downloaded
## Warning in download.packages(pkgs, destdir = tmpd, available = available, :
## download of package 'ggplot2' failed
## Warning in download.packages(pkgs, destdir = tmpd, available = available, :
## download of package 'broom' failed
## package 'performance' successfully unpacked and MD5 sums checked
## package 'tidyverse' successfully unpacked and MD5 sums checked
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\ADMIN\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\ADMIN\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## package 'effectsize' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\ADMIN\AppData\Local\Temp\RtmpoliMti\downloaded_packages
# Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
## Warning: package 'broom' was built under R version 4.5.2
library(ggplot2)
library(dplyr)
#Reading the data into a data frame
data<-setwd("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder")
data<- read.csv("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder\\abtest[1].csv")
data
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## 7 546448 treatment new 5.25 yes
## 8 546581 control old 6.53 yes
## 9 546461 treatment new 10.71 yes
## 10 546548 control old 2.08 no
## 11 546588 control old 6.21 yes
## 12 546546 control old 2.58 yes
## 13 546491 treatment new 5.86 yes
## 14 546478 treatment new 6.03 yes
## 15 546578 control old 8.72 yes
## 16 546466 treatment new 6.27 yes
## 17 546443 treatment new 8.73 no
## 18 546555 control old 0.40 no
## 19 546493 control old 10.30 yes
## 20 546549 control old 3.88 yes
## 21 546560 control old 2.66 no
## 22 546584 control old 7.03 yes
## 23 546450 treatment new 3.65 no
## 24 546475 treatment new 7.02 yes
## 25 546456 treatment new 6.18 no
## 26 546455 treatment new 4.39 no
## 27 546469 treatment new 9.49 yes
## 28 546586 control old 4.05 no
## 29 546471 treatment new 7.81 yes
## 30 546575 control old 4.28 no
## 31 546464 treatment new 5.41 yes
## 32 546556 control old 3.52 yes
## 33 546585 control old 5.39 yes
## 34 546577 control old 4.52 no
## 35 546587 control old 4.46 no
## 36 546552 control old 8.50 yes
## 37 546551 control old 3.13 no
## 38 546557 control old 6.04 yes
## 39 546487 treatment new 1.65 no
## 40 546589 control old 0.19 no
## 41 546559 control old 8.46 yes
## 42 546570 control old 1.92 no
## 43 546489 treatment new 7.16 yes
## 44 546453 treatment new 7.16 yes
## 45 546488 treatment new 3.91 no
## 46 546565 control old 8.02 no
## 47 546460 treatment new 5.37 yes
## 48 546458 treatment new 7.23 yes
## 49 546492 treatment new 8.08 yes
## 50 546473 treatment new 10.50 yes
## 51 546554 control old 0.22 no
## 52 546457 treatment new 5.65 no
## 53 546479 treatment new 6.47 yes
## 54 546576 control old 4.71 no
## 55 546482 treatment new 6.41 yes
## 56 546563 control old 0.93 no
## 57 546569 control old 1.81 no
## 58 546454 treatment new 8.30 yes
## 59 546562 control old 7.40 yes
## 60 546574 control old 9.15 no
## 61 546470 treatment new 6.01 yes
## 62 546467 treatment new 6.79 yes
## 63 546572 control old 4.18 no
## 64 546590 control old 5.47 no
## 65 546553 control old 5.96 yes
## 66 546445 treatment new 7.27 yes
## 67 546545 control old 6.60 yes
## 68 546582 control old 4.75 yes
## 69 546484 treatment new 6.70 no
## 70 546579 control old 2.23 no
## 71 546568 control old 0.40 no
## 72 546476 treatment new 5.42 yes
## 73 546452 treatment new 5.08 yes
## 74 546444 treatment new 7.46 yes
## 75 546591 control old 4.87 no
## 76 546583 control old 6.57 yes
## 77 546573 control old 1.44 no
## 78 546485 treatment new 3.88 no
## 79 546486 treatment new 9.12 yes
## 80 546547 control old 3.21 yes
## 81 546490 treatment new 4.68 no
## 82 546449 treatment new 5.26 yes
## 83 546463 treatment new 5.74 yes
## 84 546580 control old 2.90 no
## 85 546571 control old 4.30 no
## 86 546564 control old 0.91 no
## 87 546465 treatment new 6.71 no
## 88 546480 treatment new 3.68 no
## 89 546447 treatment new 3.30 no
## 90 546561 control old 6.04 yes
## 91 546477 treatment new 5.40 no
## 92 546451 treatment new 8.47 yes
## 93 546566 control old 8.35 no
## 94 546474 treatment new 4.94 no
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
## 7 French
## 8 Spanish
## 9 French
## 10 English
## 11 Spanish
## 12 English
## 13 Spanish
## 14 French
## 15 Spanish
## 16 Spanish
## 17 English
## 18 English
## 19 English
## 20 English
## 21 French
## 22 Spanish
## 23 English
## 24 English
## 25 Spanish
## 26 English
## 27 English
## 28 Spanish
## 29 French
## 30 French
## 31 English
## 32 English
## 33 Spanish
## 34 Spanish
## 35 Spanish
## 36 English
## 37 English
## 38 English
## 39 Spanish
## 40 Spanish
## 41 French
## 42 French
## 43 Spanish
## 44 English
## 45 English
## 46 French
## 47 French
## 48 Spanish
## 49 Spanish
## 50 English
## 51 English
## 52 English
## 53 Spanish
## 54 Spanish
## 55 Spanish
## 56 French
## 57 French
## 58 French
## 59 French
## 60 French
## 61 French
## 62 French
## 63 French
## 64 Spanish
## 65 English
## 66 French
## 67 English
## 68 Spanish
## 69 Spanish
## 70 Spanish
## 71 French
## 72 French
## 73 English
## 74 English
## 75 Spanish
## 76 Spanish
## 77 French
## 78 Spanish
## 79 French
## 80 English
## 81 French
## 82 French
## 83 Spanish
## 84 Spanish
## 85 French
## 86 French
## 87 English
## 88 French
## 89 French
## 90 French
## 91 French
## 92 English
## 93 French
## 94 French
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
# Create dataset for 100 users
n <- 100
user_id <- 1:100
group <- c(rep('control', 50), rep('treatment', 50))
landing_page <- c(rep('old', 50), rep('new', 50))
# Generate Time Spent (Normal distribution)
# Control: mean=4.5, sd=1.5 | Treatment: mean=6.2, sd=1.2
time_spent <- c(rnorm(50, mean = 4.5, sd = 1.5),
rnorm(50, mean = 6.2, sd = 1.2))
# Generate Conversion (Binomial/categorical)
# Control: ~42% yes | Treatment: ~65% yes
converted_control <- sample(c('no', 'yes'), 50, replace = TRUE, prob = c(0.58, 0.42))
converted_treat <- sample(c('no', 'yes'), 50, replace = TRUE, prob = c(0.35, 0.65))
converted <- c(converted_control, converted_treat)
# Generate Language Preference
languages <- c('English', 'French', 'Spanish')
language_preferred <- sample(languages, 100, replace = TRUE)
# Combine into a Data Frame
df <- data.frame(
user_id = user_id,
group = as.factor(group),
landing_page = as.factor(landing_page),
time_spent_on_the_page = time_spent,
converted = as.factor(converted),
language_preferred = as.factor(language_preferred))
# View Data structure
str(df)
## 'data.frame': 100 obs. of 6 variables:
## $ user_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ group : Factor w/ 2 levels "control","treatment": 1 1 1 1 1 1 1 1 1 1 ...
## $ landing_page : Factor w/ 2 levels "new","old": 2 2 2 2 2 2 2 2 2 2 ...
## $ time_spent_on_the_page: num 4.3 4.4 4.84 4.07 5.05 ...
## $ converted : Factor w/ 2 levels "no","yes": 1 1 2 1 1 1 1 2 2 1 ...
## $ language_preferred : Factor w/ 3 levels "English","French",..: 2 1 1 1 1 1 2 2 1 2 ...
head(data)
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
tail(data)
## user_id group landing_page time_spent_on_the_page converted
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
glimpse(data)
## Rows: 100
## Columns: 6
## $ user_id <int> 546592, 546468, 546462, 546567, 546459, 546558,…
## $ group <chr> "control", "treatment", "treatment", "control",…
## $ landing_page <chr> "old", "new", "new", "old", "new", "old", "new"…
## $ time_spent_on_the_page <dbl> 3.48, 7.13, 4.40, 3.02, 4.75, 5.28, 5.25, 6.53,…
## $ converted <chr> "no", "yes", "no", "no", "yes", "yes", "yes", "…
## $ language_preferred <chr> "Spanish", "English", "Spanish", "French", "Spa…
summary(data)
## user_id group landing_page time_spent_on_the_page
## Min. :546443 Length:100 Length:100 Min. : 0.190
## 1st Qu.:546468 Class :character Class :character 1st Qu.: 3.880
## Median :546493 Mode :character Mode :character Median : 5.415
## Mean :546517 Mean : 5.378
## 3rd Qu.:546567 3rd Qu.: 7.022
## Max. :546592 Max. :10.710
## converted language_preferred
## Length:100 Length:100
## Class :character Class :character
## Mode :character Mode :character
##
##
##
#Missing values
colSums(is.na(data))
## user_id group landing_page
## 0 0 0
## time_spent_on_the_page converted language_preferred
## 0 0 0
#Duplicates
sum(duplicated(data))
## [1] 0
#Univariate
# Time spent distribution
ggplot(data, aes(time_spent_on_the_page)) +
geom_histogram(binwidth = 1, fill = "orange", color = "white") +
labs(title = "Distribution of Time Spent on Page", x = "Minutes")
# Conversion counts
table(data$converted)
##
## no yes
## 46 54
# Conversion bar plot
ggplot(data, aes(x = converted)) +
geom_bar(fill = "purple") +
labs(title = "Conversion Counts", x = "Converted", y = "Count")
# Language distribution
table(data$language_preferred)
##
## English French Spanish
## 32 34 34
# Language distribution bar plot
ggplot(data, aes(x = language_preferred)) +
geom_bar(fill = "blue") +
labs(title = "Language Distribution", x = "Language", y = "Count")
#Bivariate
# Time by group
ggplot(data, aes(group, time_spent_on_the_page, fill = group)) +
geom_boxplot() +
labs(title = "Time on Page by Group")
# conversion by group
table(data$group, data$converted)
##
## no yes
## control 29 21
## treatment 17 33
# Table of percentages
prop.table(table(df$group, data$converted), 1) * 100
##
## no yes
## control 40 60
## treatment 52 48
# Simple Plot
ggplot(data, aes(x = group, fill = converted)) +
geom_bar(position = "fill") +
labs(title = "Conversion by Group", y = "Proportion")
#conversion by language
table(data$language_preferred, data$converted)
##
## no yes
## English 11 21
## French 19 15
## Spanish 16 18
# Table of percentages
prop.table(table(data$language_preferred, data$converted), 1) * 100
##
## no yes
## English 34.37500 65.62500
## French 55.88235 44.11765
## Spanish 47.05882 52.94118
# Simple Plot
ggplot(data, aes(x = language_preferred, fill = converted)) +
geom_bar(position = "dodge") +
labs(title = "Conversion by Language")
#quiz 1
time_new <- data$time_spent_on_the_page[data$landing_page == 'new']
time_old <- data$time_spent_on_the_page[data$landing_page == 'old']
# (F-test)
var_test <- var.test(time_new, time_old)
print(paste("Variance test p-value:", round(var_test$p.value, 4)))
## [1] "Variance test p-value: 0.0154"
# Perform T-test
t_test_result <- t.test(time_new, time_old, alternative = "greater", var.equal = (var_test$p.value > 0.05))
print(t_test_result)
##
## Welch Two Sample t-test
##
## data: time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.9485536 Inf
## sample estimates:
## mean of x mean of y
## 6.2232 4.5324
if(t_test_result$p.value < 0.05){
print("Result: Reject Null Hypothesis. Users spend significantly more time on the new page.")
} else {
print("Result: Fail to reject Null Hypothesis.")
}
## [1] "Result: Reject Null Hypothesis. Users spend significantly more time on the new page."
#Users spend significantly more time on the new landing page.
#quiz 2
table_conv <- table(data$landing_page, data$converted)
table_conv
##
## no yes
## new 17 33
## old 29 21
# Extract counts for 'yes' (converted)
n_new <- sum(data$landing_page == 'new')
n_old <- sum(data$landing_page == 'old')
conv_new <- sum(data$landing_page == 'new' & data$converted == 'yes')
conv_old <- sum(data$landing_page == 'old' & data$converted == 'yes')
# Perform Proportion Test
prop_test_result <- prop.test(x = c(conv_new, conv_old), n = c(n_new, n_old), alternative = "greater")
prop_test_result
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(conv_new, conv_old) out of c(n_new, n_old)
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.66 0.42
if(prop_test_result$p.value < 0.05){
print("Result: Reject Null Hypothesis. The new page has a significantly higher conversion rate.")
} else {
print("Result: Fail to reject Null Hypothesis.")
}
## [1] "Result: Reject Null Hypothesis. The new page has a significantly higher conversion rate."
#new page has a higher conversion rate than the old page.
#quiz 3
cont_table <- table(data$converted, data$language_preferred)
cont_table
##
## English French Spanish
## no 11 19 16
## yes 21 15 18
# Perform Chi-Square Test
chisq_result <- chisq.test(cont_table)
chisq_result
##
## Pearson's Chi-squared test
##
## data: cont_table
## X-squared = 3.093, df = 2, p-value = 0.213
if(chisq_result$p.value < 0.05){
print("Result: Reject Null Hypothesis. Conversion status depends on language.")
} else {
print("Result: Fail to reject Null Hypothesis. Conversion status is independent of language.")
}
## [1] "Result: Fail to reject Null Hypothesis. Conversion status is independent of language."
#Conversion status is independent of language
#quiz4
# Filter data for NEW landing page only
data_new <- data %>% filter(landing_page == 'new')
# Visual check
ggplot(data_new, aes(x = language_preferred, y = time_spent_on_the_page, fill = language_preferred)) +
geom_boxplot() +
labs(title = "Time Spent on New Page by Language")
# Perform ANOVA
anova_model <- aov(time_spent_on_the_page ~ language_preferred, data = data_new)
anova_summary <- summary(anova_model)
anova_summary
## Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred 2 5.68 2.838 0.854 0.432
## Residuals 47 156.10 3.321
# Extract p-value
p_val_anova <- anova_summary[[1]][["Pr(>F)"]][1]
if(p_val_anova < 0.05){
print("Result: Reject Null Hypothesis. Mean time spent varies by language.")
} else {
print("Result: Fail to reject Null Hypothesis. Mean time spent is similar across languages.")
}
## [1] "Result: Fail to reject Null Hypothesis. Mean time spent is similar across languages."
#The average time spent on the new page is the same across all languages
Conclusion The New Landing Page is a success.
It works: Users spend significantly more time on the new page and are much more likely to subscribe.
It’s universal: The new design is effective across all language groups (English, French, and Spanish).
Business Recommendations Launch Immediately: Switch 100% of website traffic to the New Landing Page to maximize subscriptions and revenue.
Keep it Interactive: The multimedia elements are driving the increased engagement. Keep using them in future designs.
No Customization Needed: Since the design works well for all languages, you can use the same layout globally without extra costs for localization.