knitr::opts_chunk$set(echo = TRUE)
data <- read.csv("C:\\Users\\Admin\\Desktop\\statistical computing\\data\\abtest.csv")
View(data)
nrow(data)
## [1] 100
ncol(data)
## [1] 6
summary(data)
##     user_id          group           landing_page       time_spent_on_the_page
##  Min.   :546443   Length:100         Length:100         Min.   : 0.190        
##  1st Qu.:546468   Class :character   Class :character   1st Qu.: 3.880        
##  Median :546493   Mode  :character   Mode  :character   Median : 5.415        
##  Mean   :546517                                         Mean   : 5.378        
##  3rd Qu.:546567                                         3rd Qu.: 7.022        
##  Max.   :546592                                         Max.   :10.710        
##   converted         language_preferred
##  Length:100         Length:100        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
head(data)
##   user_id     group landing_page time_spent_on_the_page converted
## 1  546592   control          old                   3.48        no
## 2  546468 treatment          new                   7.13       yes
## 3  546462 treatment          new                   4.40        no
## 4  546567   control          old                   3.02        no
## 5  546459 treatment          new                   4.75       yes
## 6  546558   control          old                   5.28       yes
##   language_preferred
## 1            Spanish
## 2            English
## 3            Spanish
## 4             French
## 5            Spanish
## 6            English
tail(data)
##     user_id     group landing_page time_spent_on_the_page converted
## 95   546550   control          old                   3.05        no
## 96   546446 treatment          new                   5.15        no
## 97   546544   control          old                   6.52       yes
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 95             English
## 96             Spanish
## 97             English
## 98             Spanish
## 99             Spanish
## 100            English
sum(is.na(data))
## [1] 0
sum(duplicated(data))
## [1] 0
print("##Univariate analysis")  
## [1] "##Univariate analysis"
mean(data$time_spent_on_the_page)  
## [1] 5.3778
sd(data$time_spent_on_the_page)
## [1] 2.378166
median(data$time_spent_on_the_page)
## [1] 5.415
quantile(data$time_spent_on_the_page)
##      0%     25%     50%     75%    100% 
##  0.1900  3.8800  5.4150  7.0225 10.7100
IQR(data$time_spent_on_the_page)
## [1] 3.1425
hist(data$time_spent_on_the_page)

barplot(data$time_spent_on_the_page)

boxplot(data$time_spent_on_the_page)

###Bivariate analysis
print("Question one")
## [1] "Question one"
     print("DEFINING HYPOTHESIS")
## [1] "DEFINING HYPOTHESIS"
print("Null :time_spent_on_new = time_spent_on_exixting")
## [1] "Null :time_spent_on_new = time_spent_on_exixting"
print("Alternate:time_spent_on_new> time_spent_on_existing")
## [1] "Alternate:time_spent_on_new> time_spent_on_existing"
     print("APPROPRIATE TEST")
## [1] "APPROPRIATE TEST"
print("two sampled t-test")     
## [1] "two sampled t-test"
     print("SIGNIFICANCE LEVEL")
## [1] "SIGNIFICANCE LEVEL"
alpha <- 0.05    
     print("COLLECTING AND PREPARING DATA")
## [1] "COLLECTING AND PREPARING DATA"
print("Visual analysis")     
## [1] "Visual analysis"
boxplot(time_spent_on_the_page~landing_page,data=data)

     print("Collecting data")
## [1] "Collecting data"
new_time_spent_on_page <- data$time_spent_on_the_page[data$landing_page=="new"]     
old_time_spent_on_page <- data$time_spent_on_the_page[data$landing_page=="old"]
    print("Calculating p-value")
## [1] "Calculating p-value"
t_test <- t.test(new_time_spent_on_page,old_time_spent_on_page,alternative = "greater",var.equal = FALSE)    
print(t_test)
## 
##  Welch Two Sample t-test
## 
## data:  new_time_spent_on_page and old_time_spent_on_page
## t = 3.7868, df = 87.975, p-value = 0.0001392
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.9485536       Inf
## sample estimates:
## mean of x mean of y 
##    6.2232    4.5324
t_test$p.value
## [1] 0.0001392381
    print("Comparing the p-value with alpha")
## [1] "Comparing the p-value with alpha"
alpha <- 0.05
t_test$p.value < alpha
## [1] TRUE
   ###Drawing inference
if(t_test$p.value < alpha){print("Reject Null,users spend more time on the new landing page than on the old landing page")} else{print("Fail to reject Null,users spend more time on the old landing page than on the new landing page")}
## [1] "Reject Null,users spend more time on the new landing page than on the old landing page"
print("users spend more time on the new landing page than on the old landing page")
## [1] "users spend more time on the new landing page than on the old landing page"
     ###Question Two)
print("Defining hypothesis")    
## [1] "Defining hypothesis"
print("Null: conversion_rate_on_new = conversion_rate_on_old")
## [1] "Null: conversion_rate_on_new = conversion_rate_on_old"
print("Alternate: conversion_rate_on_new > conversion_rate_on_old")
## [1] "Alternate: conversion_rate_on_new > conversion_rate_on_old"
    ###Appropriate test
print("Two-proportion z-test")
## [1] "Two-proportion z-test"
   ###Significance level
alpha <- 0.05
   ###Preparing and collecting data
print("visual analysis")
## [1] "visual analysis"
conv_table <- table(data$landing_page,data$converted)
View(conv_table)
  ###collecting data
success <- c(conv_table["new","yes"],conv_table["old","yes"])
total <- c(sum(conv_table["new",]),sum(conv_table["old",]))
   ###calculating p-value  
prop.test <- prop.test(success,total,alternative = "greater")
print(prop.test)
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  success out of total
## X-squared = 4.8712, df = 1, p-value = 0.01365
## alternative hypothesis: greater
## 95 percent confidence interval:
##  0.06086519 1.00000000
## sample estimates:
## prop 1 prop 2 
##   0.66   0.42
prop.test$p.value
## [1] 0.01365445
   ###comparing p-value
prop.test$p.value <= alpha
## [1] TRUE
   ###Drawing inference
if(prop.test$p.value < alpha){print("Reject Null,the conversion page on new page is greater than on old page")} else{print("Fail to reject Null,the conversion page on old page is greater than on new page")}
## [1] "Reject Null,the conversion page on new page is greater than on old page"
    ###Question Three
print("Defining hypothesis")
## [1] "Defining hypothesis"
print("Null: Conversion and preffered language are independent")
## [1] "Null: Conversion and preffered language are independent"
print("Alternate: Conversion and preffered language are dependent")
## [1] "Alternate: Conversion and preffered language are dependent"
    ###Appropriate test
print("Chi-square test of independence")
## [1] "Chi-square test of independence"
   ###Significance level 
alpha <- 0.05
    ###Preparing and collecting data
print("visual analysis")
## [1] "visual analysis"
conv_table <- table(data$converted,data$language_preferred)
View(conv_table)
    ###Calculating p-value
chisq.test<-chisq.test(conv_table,correct = TRUE)
print(chisq.test)
## 
##  Pearson's Chi-squared test
## 
## data:  conv_table
## X-squared = 3.093, df = 2, p-value = 0.213
chisq.test$p.value      
## [1] 0.2129889
   ###comparing p-value
chisq.test$p.value <= alpha
## [1] FALSE
    ###Drawing inference
if(chisq.test$p.value < alpha){print("Reject Null,conversion and preffered language are dependent")} else{print("Fail to reject Null,conversion and prefferd language are independent")}
## [1] "Fail to reject Null,conversion and prefferd language are independent"
  ###Question Four
print("Defining hypothesis")
## [1] "Defining hypothesis"
print("Null: the time spent on new page is same in all languages")
## [1] "Null: the time spent on new page is same in all languages"
print("Alternate: time spent on new page is different in all languages")
## [1] "Alternate: time spent on new page is different in all languages"
   ###Appropriate test
print("One way ANOVA test")
## [1] "One way ANOVA test"
   ###Significance level
alpha <- 0.05
   ###Preparing and collecting data
new_time_spent_on_page <- subset(data,landing_page =="new")
print(new_time_spent_on_page)
##     user_id     group landing_page time_spent_on_the_page converted
## 2    546468 treatment          new                   7.13       yes
## 3    546462 treatment          new                   4.40        no
## 5    546459 treatment          new                   4.75       yes
## 7    546448 treatment          new                   5.25       yes
## 9    546461 treatment          new                  10.71       yes
## 13   546491 treatment          new                   5.86       yes
## 14   546478 treatment          new                   6.03       yes
## 16   546466 treatment          new                   6.27       yes
## 17   546443 treatment          new                   8.73        no
## 23   546450 treatment          new                   3.65        no
## 24   546475 treatment          new                   7.02       yes
## 25   546456 treatment          new                   6.18        no
## 26   546455 treatment          new                   4.39        no
## 27   546469 treatment          new                   9.49       yes
## 29   546471 treatment          new                   7.81       yes
## 31   546464 treatment          new                   5.41       yes
## 39   546487 treatment          new                   1.65        no
## 43   546489 treatment          new                   7.16       yes
## 44   546453 treatment          new                   7.16       yes
## 45   546488 treatment          new                   3.91        no
## 47   546460 treatment          new                   5.37       yes
## 48   546458 treatment          new                   7.23       yes
## 49   546492 treatment          new                   8.08       yes
## 50   546473 treatment          new                  10.50       yes
## 52   546457 treatment          new                   5.65        no
## 53   546479 treatment          new                   6.47       yes
## 55   546482 treatment          new                   6.41       yes
## 58   546454 treatment          new                   8.30       yes
## 61   546470 treatment          new                   6.01       yes
## 62   546467 treatment          new                   6.79       yes
## 66   546445 treatment          new                   7.27       yes
## 69   546484 treatment          new                   6.70        no
## 72   546476 treatment          new                   5.42       yes
## 73   546452 treatment          new                   5.08       yes
## 74   546444 treatment          new                   7.46       yes
## 78   546485 treatment          new                   3.88        no
## 79   546486 treatment          new                   9.12       yes
## 81   546490 treatment          new                   4.68        no
## 82   546449 treatment          new                   5.26       yes
## 83   546463 treatment          new                   5.74       yes
## 87   546465 treatment          new                   6.71        no
## 88   546480 treatment          new                   3.68        no
## 89   546447 treatment          new                   3.30        no
## 91   546477 treatment          new                   5.40        no
## 92   546451 treatment          new                   8.47       yes
## 94   546474 treatment          new                   4.94        no
## 96   546446 treatment          new                   5.15        no
## 98   546472 treatment          new                   7.07       yes
## 99   546481 treatment          new                   6.20       yes
## 100  546483 treatment          new                   5.86       yes
##     language_preferred
## 2              English
## 3              Spanish
## 5              Spanish
## 7               French
## 9               French
## 13             Spanish
## 14              French
## 16             Spanish
## 17             English
## 23             English
## 24             English
## 25             Spanish
## 26             English
## 27             English
## 29              French
## 31             English
## 39             Spanish
## 43             Spanish
## 44             English
## 45             English
## 47              French
## 48             Spanish
## 49             Spanish
## 50             English
## 52             English
## 53             Spanish
## 55             Spanish
## 58              French
## 61              French
## 62              French
## 66              French
## 69             Spanish
## 72              French
## 73             English
## 74             English
## 78             Spanish
## 79              French
## 81              French
## 82              French
## 83             Spanish
## 87             English
## 88              French
## 89              French
## 91              French
## 92             English
## 94              French
## 96             Spanish
## 98             Spanish
## 99             Spanish
## 100            English
View(new_time_spent_on_page)
  ###calculating p-value
anova_test <- aov(time_spent_on_the_page~language_preferred,data =new_time_spent_on_page)
print(anova_test)
## Call:
##    aov(formula = time_spent_on_the_page ~ language_preferred, data = new_time_spent_on_page)
## 
## Terms:
##                 language_preferred Residuals
## Sum of Squares              5.6755  156.1030
## Deg. of Freedom                  2        47
## 
## Residual standard error: 1.822454
## Estimated effects may be unbalanced
summary(anova_test)
##                    Df Sum Sq Mean Sq F value Pr(>F)
## language_preferred  2   5.68   2.838   0.854  0.432
## Residuals          47 156.10   3.321
   ###comparing p_value
p_value <- summary(anova_test)[[1]][["Pr(>F)"]][1]
print(p_value)
## [1] 0.4320414
summary(anova_test)[[1]][["Pr(>F)"]][1]<alpha
## [1] FALSE
   ###Drawing inference
if(p_value < alpha){print("Reject Null,the time spent on new page is different in all languages")} else{print("Fail to reject Null,time spent on new page is same in all languages")}
## [1] "Fail to reject Null,time spent on new page is same in all languages"