##########################################
# section 3.3 Statistical Methods for Evaluation
##########################################


##########################################
# section 3.3.1 Hypothesis Testing
##########################################

# generate random observations from the two populations
x <- rnorm(10, mean=100, sd=5) # normal distribution centered at 100
y <- rnorm(20, mean=105, sd=5) # normal distribution centered at 105

# Student's t-test
t.test(x, y, var.equal=TRUE) # run the Student's t-test
## 
##  Two Sample t-test
## 
## data:  x and y
## t = -3.0312, df = 28, p-value = 0.0052
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -11.90850  -2.30399
## sample estimates:
## mean of x mean of y 
##   99.8523  106.9585
# obtain t value for a two-sided test at a 0.05 significance level
qt(p=0.05/2, df=28, lower.tail= FALSE)
## [1] 2.048407
# Welch's t-test
t.test(x, y, var.equal=FALSE) # run the Welch's t-test
## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = -2.5236, df = 11.911, p-value = 0.02686
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -13.246741  -0.965751
## sample estimates:
## mean of x mean of y 
##   99.8523  106.9585
# Wilcoxon Rank-Sum Test
wilcox.test(x, y, conf.int = TRUE)
## 
##  Wilcoxon rank sum test
## 
## data:  x and y
## W = 43, p-value = 0.01108
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -13.318635  -1.765371
## sample estimates:
## difference in location 
##              -7.222526
##########################################
# section 3.3.6 ANOVA
##########################################

offers <- sample(c("offer1", "offer2", "nopromo"), size=500, replace=T)

# Simulated 500 observations of purchase sizes on the 3 offer options
purchasesize <- ifelse(offers=="offer1", rnorm(500, mean=80, sd=30),
                       ifelse(offers=="offer2", rnorm(500, mean=85, sd=30),
                              rnorm(500, mean=40, sd=30)))

# create a data frame of offer option and purchase size
offertest <- data.frame(offer=as.factor(offers),
                        purchase_amt=purchasesize)

# display a summary of offertest where offer="offer1"
summary(offertest[offertest$offer=="offer1",])
##      offer      purchase_amt    
##  nopromo:  0   Min.   :  4.344  
##  offer1 :171   1st Qu.: 54.322  
##  offer2 :  0   Median : 73.341  
##                Mean   : 76.625  
##                3rd Qu.: 97.037  
##                Max.   :166.014
# display a summary of offertest where offer="offer2"
summary(offertest[offertest$offer=="offer2",])
##      offer      purchase_amt   
##  nopromo:  0   Min.   : 14.97  
##  offer1 :  0   1st Qu.: 65.81  
##  offer2 :169   Median : 88.32  
##                Mean   : 87.07  
##                3rd Qu.:107.46  
##                Max.   :155.58
# display a summary of offertest where offer="nopromo"
summary(offertest[offertest$offer=="nopromo",])
##      offer      purchase_amt   
##  nopromo:160   Min.   :-45.66  
##  offer1 :  0   1st Qu.: 12.04  
##  offer2 :  0   Median : 36.15  
##                Mean   : 36.33  
##                3rd Qu.: 58.93  
##                Max.   :105.14
# fit ANOVA test
model <- aov(purchase_amt ~ offers, data=offertest)
summary(model)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## offers        2 234375  117187   129.3 <2e-16 ***
## Residuals   497 450373     906                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey's Honest Significant Difference (HSD) on all
# pair-wise tests for difference of means
TukeyHSD(model)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = purchase_amt ~ offers, data = offertest)
## 
## $offers
##                    diff       lwr      upr     p adj
## offer1-nopromo 40.29274 32.509310 48.07618 0.0000000
## offer2-nopromo 50.73950 42.933841 58.54517 0.0000000
## offer2-offer1  10.44676  2.771144 18.12238 0.0041754