E-News Express wants to test whether a new landing page improves user engagement and conversions compared to the old page.
# Reading the file
data <- read.csv("C:\\Users\\njugu\\Downloads\\New folder\\abtest.csv")
View(data)
head(data)#first few row
## user_id group landing_page time_spent_on_the_page converted
## 1 546592 control old 3.48 no
## 2 546468 treatment new 7.13 yes
## 3 546462 treatment new 4.40 no
## 4 546567 control old 3.02 no
## 5 546459 treatment new 4.75 yes
## 6 546558 control old 5.28 yes
## language_preferred
## 1 Spanish
## 2 English
## 3 Spanish
## 4 French
## 5 Spanish
## 6 English
tail(data)# the last rows
## user_id group landing_page time_spent_on_the_page converted
## 95 546550 control old 3.05 no
## 96 546446 treatment new 5.15 no
## 97 546544 control old 6.52 yes
## 98 546472 treatment new 7.07 yes
## 99 546481 treatment new 6.20 yes
## 100 546483 treatment new 5.86 yes
## language_preferred
## 95 English
## 96 Spanish
## 97 English
## 98 Spanish
## 99 Spanish
## 100 English
dim(data)#shape of data
## [1] 100 6
str(data)#structure of the data
## 'data.frame': 100 obs. of 6 variables:
## $ user_id : int 546592 546468 546462 546567 546459 546558 546448 546581 546461 546548 ...
## $ group : chr "control" "treatment" "treatment" "control" ...
## $ landing_page : chr "old" "new" "new" "old" ...
## $ time_spent_on_the_page: num 3.48 7.13 4.4 3.02 4.75 ...
## $ converted : chr "no" "yes" "no" "no" ...
## $ language_preferred : chr "Spanish" "English" "Spanish" "French" ...
summary(data)# summary statistics
## user_id group landing_page time_spent_on_the_page
## Min. :546443 Length:100 Length:100 Min. : 0.190
## 1st Qu.:546468 Class :character Class :character 1st Qu.: 3.880
## Median :546493 Mode :character Mode :character Median : 5.415
## Mean :546517 Mean : 5.378
## 3rd Qu.:546567 3rd Qu.: 7.022
## Max. :546592 Max. :10.710
## converted language_preferred
## Length:100 Length:100
## Class :character Class :character
## Mode :character Mode :character
##
##
##
colSums(is.na(data))# check for missing error
## user_id group landing_page
## 0 0 0
## time_spent_on_the_page converted language_preferred
## 0 0 0
sum(duplicated(data))#check for duplicates
## [1] 0
# UNIVARIATE ANALYSIS
# Histogram
hist(data$time_spent_on_the_page,
main="Histogram of Time Spent",
xlab="Minutes",
col="skyblue",
border="black")
# Boxplot
boxplot(data$time_spent_on_the_page,
main="Boxplot of Time Spent",
col="lightgreen")
# Frequency table
table(data$landing_page)
##
## new old
## 50 50
# Proportions
prop.table(table(data$landing_page))
##
## new old
## 0.5 0.5
# Landing Page
table(data$landing_page)
##
## new old
## 50 50
prop.table(table(data$landing_page))
##
## new old
## 0.5 0.5
barplot(table(data$landing_page),
main='DISTRIBUTION OF LANDING PAGE',
col='lightgreen')
# Converted
table(data$converted)
##
## no yes
## 46 54
prop.table(table(data$converted))
##
## no yes
## 0.46 0.54
barplot(table(data$converted),
main='DISTRIBUTION OF CONVERSION RATE',
col='skyblue')
# Language Preferred
table(data$language_preferred)
##
## English French Spanish
## 32 34 34
prop.table(table(data$language_preferred))
##
## English French Spanish
## 0.32 0.34 0.34
barplot(
table(data$language_preferred),
main = "DISTRIBUTION OF LANGUAGE PREFERRED",
col = c("skyblue", "green", "lightgreen")
)
# BIVARIATE ANALYSIS
# Do users spend more time on the new landing page?
# hypothesis
#Null hypothesis : Users spend the same amount of time on old and new pages.
#Alternative hypothesis : Users spend more time on the new page
#If p-value < 0.05, users spend significantly more time on the new page.
# Separate groups
time_old <- data %>% filter(landing_page=="old") %>% pull(time_spent_on_the_page)
time_new <- data %>% filter(landing_page=="new") %>% pull(time_spent_on_the_page)
# Boxplot for comparison
boxplot(time_spent_on_the_page ~ landing_page, data=data,
main="Time Spent by Landing Page",
col=c("lightblue","lightgreen"))
# Step 5: t-test -one-tailed
t.test(time_new, time_old, alternative="greater")
##
## Welch Two Sample t-test
##
## data: time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.9485536 Inf
## sample estimates:
## mean of x mean of y
## 6.2232 4.5324
#Is conversion rate higher for new page?
#H_0: Conversion rate is the same for old and new pages.
#H_1: Conversion rate is higher for the new page.
# Contingency table
conv_tab <- table(data$landing_page, data$converted)
conv_tab
##
## no yes
## new 17 33
## old 29 21
# Proportion test
prop.test(x = c(conv_tab["new","yes"], conv_tab["old","yes"]),
n = c(sum(conv_tab["new",]), sum(conv_tab["old",])),
alternative = "greater")
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(conv_tab["new", "yes"], conv_tab["old", "yes"]) out of c(sum(conv_tab["new", ]), sum(conv_tab["old", ]))
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.66 0.42
#Is conversion independent of preferred language?
#H_0:Conversion is independent of language.
#H_1:Conversion depends on language.
lang_tab <- table(data$language_preferred, data$converted)
lang_tab
##
## no yes
## English 11 21
## French 19 15
## Spanish 16 18
chisq.test(lang_tab)
##
## Pearson's Chi-squared test
##
## data: lang_tab
## X-squared = 3.093, df = 2, p-value = 0.213
#Is time spent same for different language users (new page only)?
#H_0:Mean time spent is the same across languages.
#H_1:some of the language group spend different time
new_page <- data %>% filter(landing_page=="new")
anova_result <- aov(time_spent_on_the_page ~ language_preferred, data=new_page)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred 2 5.68 2.838 0.854 0.432
## Residuals 47 156.10 3.321
#Since users stay longer on the new page the business should add more personalized recomendation to the users on the new page ## Based on the analysis the new landing page performs better than the old landing page and should be adopted as the primary page which will help the E-NEWS Express to get more subscribers and be more competitive