knitr::opts_chunk$set(echo = TRUE)
#Problem Statement
#E-news Express has declining subscriber growth. They need to verify if their New Landing Page is more effective than the old one at engaging users and getting new subscribers.

#Objectives
#Use statistical analysis to answer:
  
  #Time Spent: Do users stay longer on the new page?
  
  #Conversion: Does the new page get more subscribers?
  
  #Language vs. Conversion: Does language choice affect subscription rates?
  
  #Language vs. Time: Do different language users spend the same amount of time on the new page?

# Install 
install.packages(c("tidyverse","ggplot2", "broom","dplyr", "effectsize"), repos = "http://cran.us.r-project.org")
## Installing packages into 'C:/Users/ADMIN/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## also installing the dependency 'performance'
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): URL
## 'https://lib.stat.cmu.edu/R/CRAN/bin/windows/contrib/4.5/ggplot2_4.0.1.zip':
## Timeout of 60 seconds was reached
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): URL
## 'https://lib.stat.cmu.edu/R/CRAN/bin/windows/contrib/4.5/broom_1.0.11.zip':
## Timeout of 60 seconds was reached
## Warning in download.file(urls, destfiles, "libcurl", mode = "wb", ...): some
## files were not downloaded
## Warning in download.packages(pkgs, destdir = tmpd, available = available, :
## download of package 'ggplot2' failed
## Warning in download.packages(pkgs, destdir = tmpd, available = available, :
## download of package 'broom' failed
## package 'performance' successfully unpacked and MD5 sums checked
## package 'tidyverse' successfully unpacked and MD5 sums checked
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\ADMIN\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\ADMIN\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## package 'effectsize' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ADMIN\AppData\Local\Temp\RtmpoliMti\downloaded_packages
# Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.2
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
## Warning: package 'broom' was built under R version 4.5.2
library(ggplot2)
library(dplyr)
#Reading the data into a data frame
data<-setwd("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder")
data<- read.csv("C:\\Users\\ADMIN\\OneDrive\\Desktop\\data folder\\abtest[1].csv")
data
##     user_id     group landing_page time_spent_on_the_page converted
## 1    546592   control          old                   3.48        no
## 2    546468 treatment          new                   7.13       yes
## 3    546462 treatment          new                   4.40        no
## 4    546567   control          old                   3.02        no
## 5    546459 treatment          new                   4.75       yes
## 6    546558   control          old                   5.28       yes
## 7    546448 treatment          new                   5.25       yes
## 8    546581   control          old                   6.53       yes
## 9    546461 treatment          new                  10.71       yes
## 10   546548   control          old                   2.08        no
## 11   546588   control          old                   6.21       yes
## 12   546546   control          old                   2.58       yes
## 13   546491 treatment          new                   5.86       yes
## 14   546478 treatment          new                   6.03       yes
## 15   546578   control          old                   8.72       yes
## 16   546466 treatment          new                   6.27       yes
## 17   546443 treatment          new                   8.73        no
## 18   546555   control          old                   0.40        no
## 19   546493   control          old                  10.30       yes
## 20   546549   control          old                   3.88       yes
## 21   546560   control          old                   2.66        no
## 22   546584   control          old                   7.03       yes
## 23   546450 treatment          new                   3.65        no
## 24   546475 treatment          new                   7.02       yes
## 25   546456 treatment          new                   6.18        no
## 26   546455 treatment          new                   4.39        no
## 27   546469 treatment          new                   9.49       yes
## 28   546586   control          old                   4.05        no
## 29   546471 treatment          new                   7.81       yes
## 30   546575   control          old                   4.28        no
## 31   546464 treatment          new                   5.41       yes
## 32   546556   control          old                   3.52       yes
## 33   546585   control          old                   5.39       yes
## 34   546577   control          old                   4.52        no
## 35   546587   control          old                   4.46        no
## 36   546552   control          old                   8.50       yes
## 37   546551   control          old                   3.13        no
## 38   546557   control          old                   6.04       yes
## 39   546487 treatment          new                   1.65        no
## 40   546589   control          old                   0.19        no
## 41   546559   control          old                   8.46       yes
## 42   546570   control          old                   1.92        no
## 43   546489 treatment          new                   7.16       yes
## 44   546453 treatment          new                   7.16       yes
## 45   546488 treatment          new                   3.91        no
## 46   546565   control          old                   8.02        no
## 47   546460 treatment          new                   5.37       yes
## 48   546458 treatment          new                   7.23       yes
## 49   546492 treatment          new                   8.08       yes
## 50   546473 treatment          new                  10.50       yes
## 51   546554   control          old                   0.22        no
## 52   546457 treatment          new                   5.65        no
## 53   546479 treatment          new                   6.47       yes
## 54   546576   control          old                   4.71        no
## 55   546482 treatment          new                   6.41       yes
## 56   546563   control          old                   0.93        no
## 57   546569   control          old                   1.81        no
## 58   546454 treatment          new                   8.30       yes
## 59   546562   control          old                   7.40       yes
## 60   546574   control          old                   9.15        no
## 61   546470 treatment          new                   6.01       yes
## 62   546467 treatment          new                   6.79       yes
## 63   546572   control          old                   4.18        no
## 64   546590   control          old                   5.47        no
## 65   546553   control          old                   5.96       yes
## 66   546445 treatment          new                   7.27       yes
## 67   546545   control          old                   6.60       yes
## 68   546582   control          old                   4.75       yes
## 69   546484 treatment          new                   6.70        no
## 70   546579   control          old                   2.23        no
## 71   546568   control          old                   0.40        no
## 72   546476 treatment          new                   5.42       yes
## 73   546452 treatment          new                   5.08       yes
## 74   546444 treatment          new                   7.46       yes
## 75   546591   control          old                   4.87        no
## 76   546583   control          old                   6.57       yes
## 77   546573   control          old                   1.44        no
## 78   546485 treatment          new                   3.88        no
## 79   546486 treatment          new                   9.12       yes
## 80   546547   control          old                   3.21       yes
## 81   546490 treatment          new                   4.68        no
## 82   546449 treatment          new                   5.26       yes
## 83   546463 treatment          new                   5.74       yes
## 84   546580   control          old                   2.90        no
## 85   546571   control          old                   4.30        no
## 86   546564   control          old                   0.91        no
## 87   546465 treatment          new                   6.71        no
## 88   546480 treatment          new                   3.68        no
## 89   546447 treatment          new                   3.30        no
## 90   546561   control          old                   6.04       yes
## 91   546477 treatment          new                   5.40        no
## 92   546451 treatment          new                   8.47       yes
## 93   546566   control          old                   8.35        no
## 94   546474 treatment          new                   4.94        no
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 1              Spanish
## 2              English
## 3              Spanish
## 4               French
## 5              Spanish
## 6              English
## 7               French
## 8              Spanish
## 9               French
## 10             English
## 11             Spanish
## 12             English
## 13             Spanish
## 14              French
## 15             Spanish
## 16             Spanish
## 17             English
## 18             English
## 19             English
## 20             English
## 21              French
## 22             Spanish
## 23             English
## 24             English
## 25             Spanish
## 26             English
## 27             English
## 28             Spanish
## 29              French
## 30              French
## 31             English
## 32             English
## 33             Spanish
## 34             Spanish
## 35             Spanish
## 36             English
## 37             English
## 38             English
## 39             Spanish
## 40             Spanish
## 41              French
## 42              French
## 43             Spanish
## 44             English
## 45             English
## 46              French
## 47              French
## 48             Spanish
## 49             Spanish
## 50             English
## 51             English
## 52             English
## 53             Spanish
## 54             Spanish
## 55             Spanish
## 56              French
## 57              French
## 58              French
## 59              French
## 60              French
## 61              French
## 62              French
## 63              French
## 64             Spanish
## 65             English
## 66              French
## 67             English
## 68             Spanish
## 69             Spanish
## 70             Spanish
## 71              French
## 72              French
## 73             English
## 74             English
## 75             Spanish
## 76             Spanish
## 77              French
## 78             Spanish
## 79              French
## 80             English
## 81              French
## 82              French
## 83             Spanish
## 84             Spanish
## 85              French
## 86              French
## 87             English
## 88              French
## 89              French
## 90              French
## 91              French
## 92             English
## 93              French
## 94              French
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
# Create dataset for 100 users
n <- 100
user_id <- 1:100
group <- c(rep('control', 50), rep('treatment', 50))
landing_page <- c(rep('old', 50), rep('new', 50))

# Generate Time Spent (Normal distribution)
# Control: mean=4.5, sd=1.5 | Treatment: mean=6.2, sd=1.2
time_spent <- c(rnorm(50, mean = 4.5, sd = 1.5), 
                rnorm(50, mean = 6.2, sd = 1.2))

# Generate Conversion (Binomial/categorical)
# Control: ~42% yes | Treatment: ~65% yes
converted_control <- sample(c('no', 'yes'), 50, replace = TRUE, prob = c(0.58, 0.42))
converted_treat <- sample(c('no', 'yes'), 50, replace = TRUE, prob = c(0.35, 0.65))
converted <- c(converted_control, converted_treat)


# Generate Language Preference
languages <- c('English', 'French', 'Spanish')
language_preferred <- sample(languages, 100, replace = TRUE)

# Combine into a Data Frame
df <- data.frame(
  user_id = user_id,
  group = as.factor(group),
  landing_page = as.factor(landing_page),
  time_spent_on_the_page = time_spent,
  converted = as.factor(converted),
  language_preferred = as.factor(language_preferred))


# View Data structure
str(df)
## 'data.frame':    100 obs. of  6 variables:
##  $ user_id               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ group                 : Factor w/ 2 levels "control","treatment": 1 1 1 1 1 1 1 1 1 1 ...
##  $ landing_page          : Factor w/ 2 levels "new","old": 2 2 2 2 2 2 2 2 2 2 ...
##  $ time_spent_on_the_page: num  4.3 4.4 4.84 4.07 5.05 ...
##  $ converted             : Factor w/ 2 levels "no","yes": 1 1 2 1 1 1 1 2 2 1 ...
##  $ language_preferred    : Factor w/ 3 levels "English","French",..: 2 1 1 1 1 1 2 2 1 2 ...
head(data)
##   user_id     group landing_page time_spent_on_the_page converted
## 1  546592   control          old                   3.48        no
## 2  546468 treatment          new                   7.13       yes
## 3  546462 treatment          new                   4.40        no
## 4  546567   control          old                   3.02        no
## 5  546459 treatment          new                   4.75       yes
## 6  546558   control          old                   5.28       yes
##   language_preferred
## 1            Spanish
## 2            English
## 3            Spanish
## 4             French
## 5            Spanish
## 6            English
tail(data)
##     user_id     group landing_page time_spent_on_the_page converted
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
glimpse(data)
## Rows: 100
## Columns: 6
## $ user_id                <int> 546592, 546468, 546462, 546567, 546459, 546558,…
## $ group                  <chr> "control", "treatment", "treatment", "control",…
## $ landing_page           <chr> "old", "new", "new", "old", "new", "old", "new"…
## $ time_spent_on_the_page <dbl> 3.48, 7.13, 4.40, 3.02, 4.75, 5.28, 5.25, 6.53,…
## $ converted              <chr> "no", "yes", "no", "no", "yes", "yes", "yes", "…
## $ language_preferred     <chr> "Spanish", "English", "Spanish", "French", "Spa…
summary(data)
##     user_id          group           landing_page       time_spent_on_the_page
##  Min.   :546443   Length:100         Length:100         Min.   : 0.190        
##  1st Qu.:546468   Class :character   Class :character   1st Qu.: 3.880        
##  Median :546493   Mode  :character   Mode  :character   Median : 5.415        
##  Mean   :546517                                         Mean   : 5.378        
##  3rd Qu.:546567                                         3rd Qu.: 7.022        
##  Max.   :546592                                         Max.   :10.710        
##   converted         language_preferred
##  Length:100         Length:100        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
#Missing values
colSums(is.na(data))
##                user_id                  group           landing_page 
##                      0                      0                      0 
## time_spent_on_the_page              converted     language_preferred 
##                      0                      0                      0
#Duplicates
sum(duplicated(data))
## [1] 0
#Univariate
# Time spent distribution
ggplot(data, aes(time_spent_on_the_page)) +
  geom_histogram(binwidth = 1, fill = "orange", color = "white") +
  labs(title = "Distribution of Time Spent on Page", x = "Minutes")

# Conversion counts
table(data$converted)
## 
##  no yes 
##  46  54
# Conversion bar plot
ggplot(data, aes(x = converted)) +
  geom_bar(fill = "purple") +
  labs(title = "Conversion Counts", x = "Converted", y = "Count")

# Language distribution
table(data$language_preferred)
## 
## English  French Spanish 
##      32      34      34
# Language distribution bar plot
ggplot(data, aes(x = language_preferred)) +
  geom_bar(fill = "blue") +
  labs(title = "Language Distribution", x = "Language", y = "Count")

#Bivariate
# Time by group
ggplot(data, aes(group, time_spent_on_the_page, fill = group)) +
  geom_boxplot() +
  labs(title = "Time on Page by Group")

# conversion by group
table(data$group, data$converted)
##            
##             no yes
##   control   29  21
##   treatment 17  33
# Table of percentages 
prop.table(table(df$group, data$converted), 1) * 100
##            
##             no yes
##   control   40  60
##   treatment 52  48
# Simple Plot
ggplot(data, aes(x = group, fill = converted)) +
  geom_bar(position = "fill") +
  labs(title = "Conversion by Group", y = "Proportion")

#conversion by language
table(data$language_preferred, data$converted)
##          
##           no yes
##   English 11  21
##   French  19  15
##   Spanish 16  18
# Table of percentages 
prop.table(table(data$language_preferred, data$converted), 1) * 100
##          
##                 no      yes
##   English 34.37500 65.62500
##   French  55.88235 44.11765
##   Spanish 47.05882 52.94118
# Simple Plot
ggplot(data, aes(x = language_preferred, fill = converted)) +
  geom_bar(position = "dodge") +
  labs(title = "Conversion by Language")

#quiz 1
time_new <- data$time_spent_on_the_page[data$landing_page == 'new']
time_old <- data$time_spent_on_the_page[data$landing_page == 'old']

# (F-test)
var_test <- var.test(time_new, time_old)
print(paste("Variance test p-value:", round(var_test$p.value, 4)))
## [1] "Variance test p-value: 0.0154"
# Perform T-test
t_test_result <- t.test(time_new, time_old, alternative = "greater", var.equal = (var_test$p.value > 0.05))

print(t_test_result)
## 
##  Welch Two Sample t-test
## 
## data:  time_new and time_old
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.9485536       Inf
## sample estimates:
## mean of x mean of y 
##    6.2232    4.5324
if(t_test_result$p.value < 0.05){
  print("Result: Reject Null Hypothesis. Users spend significantly more time on the new page.")
} else {
  print("Result: Fail to reject Null Hypothesis.")
}
## [1] "Result: Reject Null Hypothesis. Users spend significantly more time on the new page."
#Users spend significantly more time on the new landing page.

#quiz 2
table_conv <- table(data$landing_page, data$converted)
table_conv
##      
##       no yes
##   new 17  33
##   old 29  21
# Extract counts for 'yes' (converted)
n_new <- sum(data$landing_page == 'new')
n_old <- sum(data$landing_page == 'old')
conv_new <- sum(data$landing_page == 'new' & data$converted == 'yes')
conv_old <- sum(data$landing_page == 'old' & data$converted == 'yes')

# Perform Proportion Test
prop_test_result <- prop.test(x = c(conv_new, conv_old), n = c(n_new, n_old), alternative = "greater")

prop_test_result
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(conv_new, conv_old) out of c(n_new, n_old)
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2 
##   0.66   0.42
if(prop_test_result$p.value < 0.05){
  print("Result: Reject Null Hypothesis. The new page has a significantly higher conversion rate.")
} else {
  print("Result: Fail to reject Null Hypothesis.")
}
## [1] "Result: Reject Null Hypothesis. The new page has a significantly higher conversion rate."
#new page has a higher conversion rate than the old page.

#quiz 3
cont_table <- table(data$converted, data$language_preferred)
cont_table
##      
##       English French Spanish
##   no       11     19      16
##   yes      21     15      18
# Perform Chi-Square Test
chisq_result <- chisq.test(cont_table)
chisq_result
## 
##  Pearson's Chi-squared test
## 
## data:  cont_table
## X-squared = 3.093, df = 2, p-value = 0.213
if(chisq_result$p.value < 0.05){
  print("Result: Reject Null Hypothesis. Conversion status depends on language.")
} else {
  print("Result: Fail to reject Null Hypothesis. Conversion status is independent of language.")
}
## [1] "Result: Fail to reject Null Hypothesis. Conversion status is independent of language."
#Conversion status is independent of language

#quiz4
# Filter data for NEW landing page only
data_new <- data %>% filter(landing_page == 'new')

# Visual check
ggplot(data_new, aes(x = language_preferred, y = time_spent_on_the_page, fill = language_preferred)) +
  geom_boxplot() +
  labs(title = "Time Spent on New Page by Language")

# Perform ANOVA
anova_model <- aov(time_spent_on_the_page ~ language_preferred, data = data_new)
anova_summary <- summary(anova_model)

anova_summary
##                    Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred  2   5.68   2.838   0.854  0.432
## Residuals          47 156.10   3.321
# Extract p-value
p_val_anova <- anova_summary[[1]][["Pr(>F)"]][1]

if(p_val_anova < 0.05){
  print("Result: Reject Null Hypothesis. Mean time spent varies by language.")
} else {
  print("Result: Fail to reject Null Hypothesis. Mean time spent is similar across languages.")
}
## [1] "Result: Fail to reject Null Hypothesis. Mean time spent is similar across languages."
#The average time spent on the new page is the same across all languages

Conclusion The New Landing Page is a success.

It works: Users spend significantly more time on the new page and are much more likely to subscribe.

It’s universal: The new design is effective across all language groups (English, French, and Spanish).

Business Recommendations Launch Immediately: Switch 100% of website traffic to the New Landing Page to maximize subscriptions and revenue.

Keep it Interactive: The multimedia elements are driving the increased engagement. Keep using them in future designs.

No Customization Needed: Since the design works well for all languages, you can use the same layout globally without extra costs for localization.