knitr::opts_chunk$set(echo = TRUE)
library(reticulate)
## Warning: package 'reticulate' was built under R version 4.5.2

Problem:

E-news Express wants to know if the NEW landing page is better than the OLD one.

Objectives:

1. Do users spend more time on the new page?

2. Is conversion rate higher for the new page?

3. Is conversion independent of preferred language?

4. Do different language users spend the same time on the new page?

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(readr)
library(stats)

##Read the Dataset

df <- read_csv("C:\\Users\\ADMIN\\Desktop\\y2s1\\abtest.csv")
## Rows: 100 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): group, landing_page, converted, language_preferred
## dbl (2): user_id, time_spent_on_the_page
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 6
##   user_id group landing_page time_spent_on_the_page converted language_preferred
##     <dbl> <chr> <chr>                         <dbl> <chr>     <chr>             
## 1  546592 cont… old                            3.48 no        Spanish           
## 2  546468 trea… new                            7.13 yes       English           
## 3  546462 trea… new                            4.4  no        Spanish           
## 4  546567 cont… old                            3.02 no        French            
## 5  546459 trea… new                            4.75 yes       Spanish           
## 6  546558 cont… old                            5.28 yes       English

##Exploratory Data Analysis

head(df)
## # A tibble: 6 × 6
##   user_id group landing_page time_spent_on_the_page converted language_preferred
##     <dbl> <chr> <chr>                         <dbl> <chr>     <chr>             
## 1  546592 cont… old                            3.48 no        Spanish           
## 2  546468 trea… new                            7.13 yes       English           
## 3  546462 trea… new                            4.4  no        Spanish           
## 4  546567 cont… old                            3.02 no        French            
## 5  546459 trea… new                            4.75 yes       Spanish           
## 6  546558 cont… old                            5.28 yes       English
tail(df)
## # A tibble: 6 × 6
##   user_id group landing_page time_spent_on_the_page converted language_preferred
##     <dbl> <chr> <chr>                         <dbl> <chr>     <chr>             
## 1  546550 cont… old                            3.05 no        English           
## 2  546446 trea… new                            5.15 no        Spanish           
## 3  546544 cont… old                            6.52 yes       English           
## 4  546472 trea… new                            7.07 yes       Spanish           
## 5  546481 trea… new                            6.2  yes       Spanish           
## 6  546483 trea… new                            5.86 yes       English
dim(df)
## [1] 100   6
summary(df)
##     user_id          group           landing_page       time_spent_on_the_page
##  Min.   :546443   Length:100         Length:100         Min.   : 0.190        
##  1st Qu.:546468   Class :character   Class :character   1st Qu.: 3.880        
##  Median :546493   Mode  :character   Mode  :character   Median : 5.415        
##  Mean   :546517                                         Mean   : 5.378        
##  3rd Qu.:546567                                         3rd Qu.: 7.022        
##  Max.   :546592                                         Max.   :10.710        
##   converted         language_preferred
##  Length:100         Length:100        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
colSums(is.na(df))
##                user_id                  group           landing_page 
##                      0                      0                      0 
## time_spent_on_the_page              converted     language_preferred 
##                      0                      0                      0
sum(duplicated(df))
## [1] 0
# Remove duplicates if needed

df <- df %>% distinct()

Univariate Analysis

##Time spent

hist(df$time_spent_on_the_page,
main="Distribution of Time Spent",
xlab="Minutes",
col="lightblue",
breaks=20)

### Group distribution

table(df$group)
## 
##   control treatment 
##        50        50
barplot(table(df$group), main="Group Distribution")

### Conversion distribution

table(df$converted)
## 
##  no yes 
##  46  54
barplot(table(df$converted), main="Conversion Distribution")

### Language preference

table(df$language_preferred)
## 
## English  French Spanish 
##      32      34      34
barplot(table(df$language_preferred), main="Language Distribution")

## Bivariate Analysis ### Time spent by group

ggplot(df, aes(x=group, y=time_spent_on_the_page)) +
geom_boxplot() +
ggtitle("Time Spent by Group")

### Conversion rate by landing page

prop.table(table(df$landing_page, df$converted), 1) * 100
##      
##       no yes
##   new 34  66
##   old 58  42
### Conversion vs language

prop.table(table(df$language_preferred, df$converted), 1) * 100
##          
##                 no      yes
##   English 34.37500 65.62500
##   French  55.88235 44.11765
##   Spanish 47.05882 52.94118

Do users spend more time on the NEW page?

### Data splitting

old_time <- df %>% filter(landing_page == "old") %>% pull(time_spent_on_the_page)
new_time <- df %>% filter(landing_page == "new") %>% pull(time_spent_on_the_page)

### Two-tailed t-test

t.test(new_time, old_time, alternative="greater")
## 
##  Welch Two Sample t-test
## 
## data:  new_time and old_time
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.9485536       Inf
## sample estimates:
## mean of x mean of y 
##    6.2232    4.5324
#### Interpretation

# If p-value < 0.05 → Users spend more time on the new page.

Is conversion rate higher for the NEW page?

### Convert table

tbl <- table(df$landing_page, df$converted)
tbl
##      
##       no yes
##   new 17  33
##   old 29  21
### Two-proportion z-test
old_success <- tbl["old",1]       # Use numeric index instead of "1"
old_total   <- sum(tbl["old", ])

new_success <- tbl["new",1]
new_total   <- sum(tbl["new", ])

prop.test(c(new_success, old_success),
c(new_total, old_total),
alternative="greater")
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(new_success, old_success) out of c(new_total, old_total)
## X-squared = 4.8712, df = 1, p-value = 0.9863
## alternative hypothesis: greater
## 95 percent confidence interval:
##  -0.4191348  1.0000000
## sample estimates:
## prop 1 prop 2 
##   0.34   0.58
### Interpretation

# p < 0.05 → New page has higher conversion.

Is conversion independent of preferred language?

### Chi-square test

tbl2 <- table(df$language_preferred, df$converted)
chisq.test(tbl2)
## 
##  Pearson's Chi-squared test
## 
## data:  tbl2
## X-squared = 3.093, df = 2, p-value = 0.213
### Interpretation

# p < 0.05 → Conversion depends on language

# p > 0.05 → Conversion is independent of language

For the NEW page, do different languages spend equal time?

### Filtering of the new page rows

newdf <- df %>% filter(landing_page == "new")

### Perform ANOVA test

anova_result <- aov(time_spent_on_the_page ~ language_preferred, data=newdf)
summary(anova_result)
##                    Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred  2   5.68   2.838   0.854  0.432
## Residuals          47 156.10   3.321
### Interpretation

# p < 0.05 → Time differs across languages

# p > 0.05 → All languages spend equal time

Conclusion & Business Recommendations

1. Time Spent: ✓ NEW page engages users longer

2. Conversion: ✓ NEW page converts better

3. Language Impact: ✗ Language doesn’t affect conversion

4. New Page Language Effect: ✗ Consistent engagement across languages

RECOMMENDATIONS:

STRONGLY RECOMMEND adopting the new landing page.

- Improves both user engagement (time spent) and conversions

- Expected business impact: Higher subscription rates towards E-news Express

```