knitr::opts_chunk$set(echo = TRUE)

data importation

knitr::opts_chunk$set(echo = TRUE)
setwd("C:\\Users\\Admin\\Desktop\\statistical computing\\data")
abtest <- read.csv("abtest.csv")
View(abtest)

viewing and shape of data

knitr::opts_chunk$set(echo = TRUE)
head(abtest)
##   user_id     group landing_page time_spent_on_the_page converted
## 1  546592   control          old                   3.48        no
## 2  546468 treatment          new                   7.13       yes
## 3  546462 treatment          new                   4.40        no
## 4  546567   control          old                   3.02        no
## 5  546459 treatment          new                   4.75       yes
## 6  546558   control          old                   5.28       yes
##   language_preferred
## 1            Spanish
## 2            English
## 3            Spanish
## 4             French
## 5            Spanish
## 6            English
tail(abtest)
##     user_id     group landing_page time_spent_on_the_page converted
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
nrow(abtest)
## [1] 100
ncol(abtest)
## [1] 6

univariate analysis:numerical

knitr::opts_chunk$set(echo = TRUE)
mean(abtest$time_spent)
## [1] 5.3778
median(abtest$time_spent_on_the_page)
## [1] 5.415
sd(abtest$time_spent_on_the_page)
## [1] 2.378166
IQR(abtest$time_spent_on_the_page)
## [1] 3.1425
var(abtest$time_spent_on_the_page)
## [1] 5.655674
quartile <- quantile(abtest$time_spent_on_the_page)
print(quartile)
##      0%     25%     50%     75%    100% 
##  0.1900  3.8800  5.4150  7.0225 10.7100

visualization

knitr::opts_chunk$set(echo = TRUE)
hist(abtest$time_spent_on_the_page)

boxplot(abtest$time_spent_on_the_page)

barplot(abtest$time_spent_on_the_page)

barplot(table(abtest$landing_page))

barplot(table(abtest$converted))

barplot(table(abtest$group))

barplot(table(abtest$language_preferred))

## bivariate analysis

knitr::opts_chunk$set(echo = TRUE)
boxplot(time_spent_on_the_page ~ landing_page, data = abtest,
        main = "Time Spent on Page: Old vs. New Landing Page")

barplot(table(abtest$landing_page, abtest$converted))

barplot(table(abtest$language_preferred, abtest$converted))

boxplot(time_spent_on_the_page ~ converted, data = abtest,
        main = "Time Spent by Conversion Status")

## HYPOTHESIS TESTS ## question one ## onetailed test ## null hpothesis – The average time users spend on the new landing page is equal to the average time on the existing landing page. ## alternate hpothesis - - The average time users spend on the new landing page is greater than the average time on the existing landing page. ## the appropriate test is one tailed t-test

knitr::opts_chunk$set(echo = TRUE)
alpha <- 0.05
old_time <- abtest$time_spent_on_the_page[abtest$landing_page == "old"]
new_time <- abtest$time_spent_on_the_page[abtest$landing_page == "new"]
test_result <- t.test(new_time, old_time, alternative = "greater")
pvalue <- test_result$p.value
paste(ifelse(pvalue < alpha, "reject","fail to reject"),"the null hypothesis")
## [1] "reject the null hypothesis"
## users spend more time on the new landing page compared to the exsting new page

users spend more time on the new landing page compared to the exsting new page

question two

null hypothesis – The conversion rate of the new landing page is equal to the conversion rate of the old landing page.

alternate hypothesis – The conversion rate of the new landing page is greater than the conversion rate of the old landing page.

appropriate test is one-tailed two-proportion z-test

knitr::opts_chunk$set(echo = TRUE)
alpha <- 0.05
n_new <- sum(abtest$landing_page == "new")
n_old <- sum(abtest$landing_page == "old")
x_new <- sum(abtest$landing_page == "new" & abtest$converted == "yes")
x_old <- sum(abtest$landing_page == "old" & abtest$converted == "yes")
p_new <- x_new / n_new
p_old <- x_old / n_old
cat("New page conversion rate:", p_new, "\n")
## New page conversion rate: 0.66
cat("Old page conversion rate:", p_old, "\n")
## Old page conversion rate: 0.42
result <- prop.test(x = c(x_new, x_old), n = c(n_new, n_old), alternative = "greater")
pvalue <- result$p.value
paste(ifelse(pvalue< alpha, "reject","fail to reject"), "null hypothesis")
## [1] "reject null hypothesis"

we conclude that the new landing page achieves a significantly higher conversion rate compared to the old page.

question three

null hpothesis - Conversion and preferred language have no relationship

the chi square test for independence

knitr::opts_chunk$set(echo = TRUE)
alpha <- 0.05
table(abtest$converted, abtest$language_preferred)
##      
##       English French Spanish
##   no       11     19      16
##   yes      21     15      18
result <- chisq.test(table(abtest$converted, abtest$language_preferred))
pvalue <- result$p.value
paste(ifelse(pvalue > alpha, "reject","fail to reject"), "null hypothesis")
## [1] "reject null hypothesis"

we conclude that we fail to reject the null hypothesis and conclude that conversion and language preference are statistically independent.

question four

null hypothesis – The average time spent on the new page is the equal across all language groups.

alternate hypothesis– The average time spent on the new page differs for at least one language group.

the anova test

knitr::opts_chunk$set(echo = TRUE)
alpha <- 0.05
new_page_data <- subset(abtest, landing_page == "new")
aov(time_spent_on_the_page ~ language_preferred, data = new_page_data)
## Call:
##    aov(formula = time_spent_on_the_page ~ language_preferred, data = new_page_data)
## 
## Terms:
##                 language_preferred Residuals
## Sum of Squares              5.6755  156.1030
## Deg. of Freedom                  2        47
## 
## Residual standard error: 1.822454
## Estimated effects may be unbalanced
pvalue <- result$p.value
paste(ifelse(pvalue > alpha,"reject","fail to reject"), "null hypothesis")
## [1] "reject null hypothesis"

we conclude that the average time spent on the new page differs across language groups.Thus time spent is independent of preferred language

RECOMMENDATIONS

adopt the new landing page this is because users spend more time on it

there is no need for specific language optimization this is because conversion rate and time spend are not affected by language preferred

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.