knitr::opts_chunk$set(echo = TRUE)
#Problem Statement
#E‑News Express wants to test whether a newly designed landing page leads to higher user engagement and more subscriber conversions than the existing page, using A/B test data on time spent, conversion, and preferred language.
#Objectives
#Use statistical analysis to answer:

   #Time Spent: Do users stay longer on the new page?

   #Conversion: Does the new page get more subscribers?

   #Language vs. Conversion: Does language choice affect subscription rates?

   #Language vs. Time: Do different language users spend the same amount of time on the new page?
#importing the dataset
setwd("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder")
data<- read.csv("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder\\abtest[1].csv")
data
##     user_id     group landing_page time_spent_on_the_page converted
## 1    546592   control          old                   3.48        no
## 2    546468 treatment          new                   7.13       yes
## 3    546462 treatment          new                   4.40        no
## 4    546567   control          old                   3.02        no
## 5    546459 treatment          new                   4.75       yes
## 6    546558   control          old                   5.28       yes
## 7    546448 treatment          new                   5.25       yes
## 8    546581   control          old                   6.53       yes
## 9    546461 treatment          new                  10.71       yes
## 10   546548   control          old                   2.08        no
## 11   546588   control          old                   6.21       yes
## 12   546546   control          old                   2.58       yes
## 13   546491 treatment          new                   5.86       yes
## 14   546478 treatment          new                   6.03       yes
## 15   546578   control          old                   8.72       yes
## 16   546466 treatment          new                   6.27       yes
## 17   546443 treatment          new                   8.73        no
## 18   546555   control          old                   0.40        no
## 19   546493   control          old                  10.30       yes
## 20   546549   control          old                   3.88       yes
## 21   546560   control          old                   2.66        no
## 22   546584   control          old                   7.03       yes
## 23   546450 treatment          new                   3.65        no
## 24   546475 treatment          new                   7.02       yes
## 25   546456 treatment          new                   6.18        no
## 26   546455 treatment          new                   4.39        no
## 27   546469 treatment          new                   9.49       yes
## 28   546586   control          old                   4.05        no
## 29   546471 treatment          new                   7.81       yes
## 30   546575   control          old                   4.28        no
## 31   546464 treatment          new                   5.41       yes
## 32   546556   control          old                   3.52       yes
## 33   546585   control          old                   5.39       yes
## 34   546577   control          old                   4.52        no
## 35   546587   control          old                   4.46        no
## 36   546552   control          old                   8.50       yes
## 37   546551   control          old                   3.13        no
## 38   546557   control          old                   6.04       yes
## 39   546487 treatment          new                   1.65        no
## 40   546589   control          old                   0.19        no
## 41   546559   control          old                   8.46       yes
## 42   546570   control          old                   1.92        no
## 43   546489 treatment          new                   7.16       yes
## 44   546453 treatment          new                   7.16       yes
## 45   546488 treatment          new                   3.91        no
## 46   546565   control          old                   8.02        no
## 47   546460 treatment          new                   5.37       yes
## 48   546458 treatment          new                   7.23       yes
## 49   546492 treatment          new                   8.08       yes
## 50   546473 treatment          new                  10.50       yes
## 51   546554   control          old                   0.22        no
## 52   546457 treatment          new                   5.65        no
## 53   546479 treatment          new                   6.47       yes
## 54   546576   control          old                   4.71        no
## 55   546482 treatment          new                   6.41       yes
## 56   546563   control          old                   0.93        no
## 57   546569   control          old                   1.81        no
## 58   546454 treatment          new                   8.30       yes
## 59   546562   control          old                   7.40       yes
## 60   546574   control          old                   9.15        no
## 61   546470 treatment          new                   6.01       yes
## 62   546467 treatment          new                   6.79       yes
## 63   546572   control          old                   4.18        no
## 64   546590   control          old                   5.47        no
## 65   546553   control          old                   5.96       yes
## 66   546445 treatment          new                   7.27       yes
## 67   546545   control          old                   6.60       yes
## 68   546582   control          old                   4.75       yes
## 69   546484 treatment          new                   6.70        no
## 70   546579   control          old                   2.23        no
## 71   546568   control          old                   0.40        no
## 72   546476 treatment          new                   5.42       yes
## 73   546452 treatment          new                   5.08       yes
## 74   546444 treatment          new                   7.46       yes
## 75   546591   control          old                   4.87        no
## 76   546583   control          old                   6.57       yes
## 77   546573   control          old                   1.44        no
## 78   546485 treatment          new                   3.88        no
## 79   546486 treatment          new                   9.12       yes
## 80   546547   control          old                   3.21       yes
## 81   546490 treatment          new                   4.68        no
## 82   546449 treatment          new                   5.26       yes
## 83   546463 treatment          new                   5.74       yes
## 84   546580   control          old                   2.90        no
## 85   546571   control          old                   4.30        no
## 86   546564   control          old                   0.91        no
## 87   546465 treatment          new                   6.71        no
## 88   546480 treatment          new                   3.68        no
## 89   546447 treatment          new                   3.30        no
## 90   546561   control          old                   6.04       yes
## 91   546477 treatment          new                   5.40        no
## 92   546451 treatment          new                   8.47       yes
## 93   546566   control          old                   8.35        no
## 94   546474 treatment          new                   4.94        no
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 1              Spanish
## 2              English
## 3              Spanish
## 4               French
## 5              Spanish
## 6              English
## 7               French
## 8              Spanish
## 9               French
## 10             English
## 11             Spanish
## 12             English
## 13             Spanish
## 14              French
## 15             Spanish
## 16             Spanish
## 17             English
## 18             English
## 19             English
## 20             English
## 21              French
## 22             Spanish
## 23             English
## 24             English
## 25             Spanish
## 26             English
## 27             English
## 28             Spanish
## 29              French
## 30              French
## 31             English
## 32             English
## 33             Spanish
## 34             Spanish
## 35             Spanish
## 36             English
## 37             English
## 38             English
## 39             Spanish
## 40             Spanish
## 41              French
## 42              French
## 43             Spanish
## 44             English
## 45             English
## 46              French
## 47              French
## 48             Spanish
## 49             Spanish
## 50             English
## 51             English
## 52             English
## 53             Spanish
## 54             Spanish
## 55             Spanish
## 56              French
## 57              French
## 58              French
## 59              French
## 60              French
## 61              French
## 62              French
## 63              French
## 64             Spanish
## 65             English
## 66              French
## 67             English
## 68             Spanish
## 69             Spanish
## 70             Spanish
## 71              French
## 72              French
## 73             English
## 74             English
## 75             Spanish
## 76             Spanish
## 77              French
## 78             Spanish
## 79              French
## 80             English
## 81              French
## 82              French
## 83             Spanish
## 84             Spanish
## 85              French
## 86              French
## 87             English
## 88              French
## 89              French
## 90              French
## 91              French
## 92             English
## 93              French
## 94              French
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
#view the first 6 rows
head(data, 6)
##   user_id     group landing_page time_spent_on_the_page converted
## 1  546592   control          old                   3.48        no
## 2  546468 treatment          new                   7.13       yes
## 3  546462 treatment          new                   4.40        no
## 4  546567   control          old                   3.02        no
## 5  546459 treatment          new                   4.75       yes
## 6  546558   control          old                   5.28       yes
##   language_preferred
## 1            Spanish
## 2            English
## 3            Spanish
## 4             French
## 5            Spanish
## 6            English
#view the first 6 rows
tail(data, 6)
##     user_id     group landing_page time_spent_on_the_page converted
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
#to check the dataset shape
nrow(data)
## [1] 100
ncol(data)
## [1] 6
#to get statistical summary
summary(data)
##     user_id          group           landing_page       time_spent_on_the_page
##  Min.   :546443   Length:100         Length:100         Min.   : 0.190        
##  1st Qu.:546468   Class :character   Class :character   1st Qu.: 3.880        
##  Median :546493   Mode  :character   Mode  :character   Median : 5.415        
##  Mean   :546517                                         Mean   : 5.378        
##  3rd Qu.:546567                                         3rd Qu.: 7.022        
##  Max.   :546592                                         Max.   :10.710        
##   converted         language_preferred
##  Length:100         Length:100        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
colSums(is.na(data))
##                user_id                  group           landing_page 
##                      0                      0                      0 
## time_spent_on_the_page              converted     language_preferred 
##                      0                      0                      0
sum(duplicated(data))
## [1] 0
data[duplicated(data), ]
## [1] user_id                group                  landing_page          
## [4] time_spent_on_the_page converted              language_preferred    
## <0 rows> (or 0-length row.names)
# Convert to factors
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
df <- data %>%
  mutate(
    group = factor(group),
    landing_page = factor(landing_page),
    converted = factor(converted, levels = c(0,1), labels = c("No","Yes")),
    language_preferred = factor(language_preferred)
  )

# Univariate
table(data$landing_page)
## 
## new old 
##  50  50
prop.table(table(data$landing_page))
## 
## new old 
## 0.5 0.5
table(data$converted)
## 
##  no yes 
##  46  54
prop.table(table(data$converted))
## 
##   no  yes 
## 0.46 0.54
table(data$language_preferred)
## 
## English  French Spanish 
##      32      34      34
prop.table(table(data$language_preferred))
## 
## English  French Spanish 
##    0.32    0.34    0.34
summary(data$time_spent_on_the_page)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.190   3.880   5.415   5.378   7.022  10.710
#univariate
ggplot(data, aes(x = time_spent_on_the_page)) +
  geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
  geom_density(aes(y = ..count..), color = "red", size = 1) +
  ggtitle("Distribution of Time Spent on the Page")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data, aes(x = "", y = time_spent_on_the_page)) +
  geom_boxplot(fill="blue") +
  ggtitle("Boxplot of Time Spent on the Page") +
  xlab("")

# Bivariate
data %>%
  group_by(landing_page) %>%
  summarise(
    mean_time = mean(time_spent_on_the_page),
    sd_time   = sd(time_spent_on_the_page),
    conv_rate = mean(converted == "Yes")
  )
## # A tibble: 2 × 4
##   landing_page mean_time sd_time conv_rate
##   <chr>            <dbl>   <dbl>     <dbl>
## 1 new               6.22    1.82         0
## 2 old               4.53    2.58         0
data %>%
  group_by(language_preferred) %>%
  summarise(conv_rate = mean(converted == "Yes"))
## # A tibble: 3 × 2
##   language_preferred conv_rate
##   <chr>                  <dbl>
## 1 English                    0
## 2 French                     0
## 3 Spanish                    0
#bivariuate
ggplot(data, aes(x = landing_page, y = time_spent_on_the_page)) +
  geom_boxplot()

ggtitle("Time Spent vs Landing Page")
## <ggplot2::labels> List of 1
##  $ title: chr "Time Spent vs Landing Page"
#question1
#to get time spent new vs old page
time_old <- data %>%
  filter(landing_page == "old") %>%
  pull(time_spent_on_the_page)

time_new <- data %>%
  filter(landing_page == "new") %>%
  pull(time_spent_on_the_page)

# Two-sample t-test, one-sided (new > old), Welch by default
tt <- t.test(time_new, time_old,
             alternative = "greater",
             var.equal   = FALSE)
tt
## 
##  Welch Two Sample t-test
## 
## data:  time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.9485536       Inf
## sample estimates:
## mean of x mean of y 
##    6.2232    4.5324
#question2
#conversion rate new vs old
tab_page_conv <- table(data$landing_page, data$converted)
tab_page_conv
##      
##       no yes
##   new 17  33
##   old 29  21
rownames(tab_page_conv)
## [1] "new" "old"
colnames(tab_page_conv)
## [1] "no"  "yes"
n_new <- sum(data$landing_page == 'new')
n_old <- sum(data$landing_page == 'old')
conv_new <- sum(data$landing_page == 'new' & data$converted == 'yes')
conv_old <- sum(data$landing_page == 'old' & data$converted == 'yes')

# Perform Proportion Test
prop_test_result <- prop.test(x = c(conv_new, conv_old), n = c(n_new, n_old), alternative = "greater")

prop_test_result
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(conv_new, conv_old) out of c(n_new, n_old)
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2 
##   0.66   0.42
#question3
#conversion vs language(chi-square)
cont_table <- table(data$converted, data$language_preferred)
cont_table
##      
##       English French Spanish
##   no       11     19      16
##   yes      21     15      18
#chisquare
chisq_result <- chisq.test(cont_table)
chisq_result
## 
##  Pearson's Chi-squared test
## 
## data:  cont_table
## X-squared = 3.093, df = 2, p-value = 0.213
#question4
#time on new page across languages using ANOVA
data_new <- data %>%
  filter(landing_page == "new")

#Check groups
table(data_new$language_preferred)
## 
## English  French Spanish 
##      16      17      17
#ANOVA mode
anova_model <- aov(time_spent_on_the_page ~ language_preferred, data = data_new)
anova_summary<-summary(anova_model)
anova_summary
##                    Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred  2   5.68   2.838   0.854  0.432
## Residuals          47 156.10   3.321
#Conclusions and Recommendations
#People are spending more time on the new landing page than on the old one concluding the new design is more engaging.
#A higher share of visitors who see the new page end up converting, and this improvement is unlikely to be due to chance.
#The chances of converting and the time spent on the new page look very similar across the different languages, so language does not seem to be a major driver here.
#Recommendations
#Make the new landing page the main page for all users because it keeps people longer and converts better.
#Push more traffic to this new page through ads, promotions, and email campaigns to get the better performance.
#Keep the current language setup, but keep tracking results such as addwing language‑specific tweaks later.