E-News Express — Business Statistics Project

Problem Statement

E-News Express wants to test whether a new landing page improves user engagement and conversions compared to the old page.

Objectives

  1. To perform univariate analysis
  2. To perform bivariate analysis
  3. To provide insight and business recommendations
# Reading the file
data <- read.csv("C:\\Users\\njugu\\Downloads\\New folder\\abtest.csv")

View(data)
head(data)#first few row
##   user_id     group landing_page time_spent_on_the_page converted
## 1  546592   control          old                   3.48        no
## 2  546468 treatment          new                   7.13       yes
## 3  546462 treatment          new                   4.40        no
## 4  546567   control          old                   3.02        no
## 5  546459 treatment          new                   4.75       yes
## 6  546558   control          old                   5.28       yes
##   language_preferred
## 1            Spanish
## 2            English
## 3            Spanish
## 4             French
## 5            Spanish
## 6            English
tail(data)# the last rows
##     user_id     group landing_page time_spent_on_the_page converted
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
dim(data)#shape of data
## [1] 100   6
str(data)#structure of the data
## 'data.frame':    100 obs. of  6 variables:
##  $ user_id               : int  546592 546468 546462 546567 546459 546558 546448 546581 546461 546548 ...
##  $ group                 : chr  "control" "treatment" "treatment" "control" ...
##  $ landing_page          : chr  "old" "new" "new" "old" ...
##  $ time_spent_on_the_page: num  3.48 7.13 4.4 3.02 4.75 ...
##  $ converted             : chr  "no" "yes" "no" "no" ...
##  $ language_preferred    : chr  "Spanish" "English" "Spanish" "French" ...
summary(data)# summary statistics
##     user_id          group           landing_page       time_spent_on_the_page
##  Min.   :546443   Length:100         Length:100         Min.   : 0.190        
##  1st Qu.:546468   Class :character   Class :character   1st Qu.: 3.880        
##  Median :546493   Mode  :character   Mode  :character   Median : 5.415        
##  Mean   :546517                                         Mean   : 5.378        
##  3rd Qu.:546567                                         3rd Qu.: 7.022        
##  Max.   :546592                                         Max.   :10.710        
##   converted         language_preferred
##  Length:100         Length:100        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
colSums(is.na(data))# check for missing error
##                user_id                  group           landing_page 
##                      0                      0                      0 
## time_spent_on_the_page              converted     language_preferred 
##                      0                      0                      0
sum(duplicated(data))#check for duplicates
## [1] 0
# UNIVARIATE ANALYSIS
# Histogram
hist(data$time_spent_on_the_page,
     main="Histogram of Time Spent",
     xlab="Minutes",
     col="skyblue",
     border="black")

# Boxplot
boxplot(data$time_spent_on_the_page,
        main="Boxplot of Time Spent",
        col="lightgreen")

# Frequency table
table(data$landing_page)
## 
## new old 
##  50  50
# Proportions
prop.table(table(data$landing_page))
## 
## new old 
## 0.5 0.5
# Landing Page
table(data$landing_page)
## 
## new old 
##  50  50
prop.table(table(data$landing_page))
## 
## new old 
## 0.5 0.5
barplot(table(data$landing_page),
              main='DISTRIBUTION OF LANDING PAGE',
              col='lightgreen')

# Converted
table(data$converted)
## 
##  no yes 
##  46  54
prop.table(table(data$converted))
## 
##   no  yes 
## 0.46 0.54
barplot(table(data$converted),
        main='DISTRIBUTION OF CONVERSION RATE',
        col='skyblue')

# Language Preferred
table(data$language_preferred)
## 
## English  French Spanish 
##      32      34      34
prop.table(table(data$language_preferred))
## 
## English  French Spanish 
##    0.32    0.34    0.34
barplot(
  table(data$language_preferred),
  main = "DISTRIBUTION OF LANGUAGE PREFERRED",
  col = c("skyblue", "green", "lightgreen")
)

# BIVARIATE ANALYSIS
# Do users spend more time on the new landing page?
# hypothesis

#Null hypothesis : Users spend the same amount of time on old and new pages.

#Alternative hypothesis : Users spend more time on the new page

#If p-value < 0.05, users spend significantly more time on the new page.

# Separate groups
time_old <- data %>% filter(landing_page=="old") %>% pull(time_spent_on_the_page)
time_new <- data %>% filter(landing_page=="new") %>% pull(time_spent_on_the_page)

# Boxplot for comparison
boxplot(time_spent_on_the_page ~ landing_page, data=data,
        main="Time Spent by Landing Page",
        col=c("lightblue","lightgreen"))

# Step 5: t-test -one-tailed
t.test(time_new, time_old, alternative="greater")
## 
##  Welch Two Sample t-test
## 
## data:  time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.9485536       Inf
## sample estimates:
## mean of x mean of y 
##    6.2232    4.5324
#Is conversion rate higher for new page?
#H_0: Conversion rate is the same for old and new pages.
#H_1: Conversion rate is higher for the new page.

# Contingency table
conv_tab <- table(data$landing_page, data$converted)
conv_tab
##      
##       no yes
##   new 17  33
##   old 29  21
# Proportion test
prop.test(x = c(conv_tab["new","yes"], conv_tab["old","yes"]),
          n = c(sum(conv_tab["new",]), sum(conv_tab["old",])),
          alternative = "greater")
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(conv_tab["new", "yes"], conv_tab["old", "yes"]) out of c(sum(conv_tab["new", ]), sum(conv_tab["old", ]))
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2 
##   0.66   0.42
#Is conversion independent of preferred language?
#H_0:Conversion is independent of language.
#H_1:Conversion depends on language.


lang_tab <- table(data$language_preferred, data$converted)
lang_tab
##          
##           no yes
##   English 11  21
##   French  19  15
##   Spanish 16  18
chisq.test(lang_tab)
## 
##  Pearson's Chi-squared test
## 
## data:  lang_tab
## X-squared = 3.093, df = 2, p-value = 0.213
#Is time spent same for different language users (new page only)?
#H_0:Mean time spent is the same across languages.
#H_1:some of the language group spend different time


new_page <- data %>% filter(landing_page=="new")

anova_result <- aov(time_spent_on_the_page ~ language_preferred, data=new_page)
summary(anova_result)
##                    Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred  2   5.68   2.838   0.854  0.432
## Residuals          47 156.10   3.321

we conclude that there is no significance in language and time spent

CONCLUSION

The new landing page is more effective at engaging users and encouraging conversions. It performs consistently well across different languages. This suggest that the redesign of the new page is successful in the conversion of users

RECOMMENDATION

The new page should replace the old page

#Since users stay longer on the new page the business should add more personalized recomendation to the users on the new page ## Based on the analysis the new landing page performs better than the old landing page and should be adopted as the primary page which will help the E-NEWS Express to get more subscribers and be more competitive