This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
##########################################
# section 3.3 Statistical Methods for Evaluation
##########################################
##########################################
# section 3.3.1 Hypothesis Testing
##########################################
# generate random observations from the two populations
x <- rnorm(10, mean=100, sd=5) # normal distribution centered at 100
y <- rnorm(20, mean=105, sd=5) # normal distribution centered at 105
# Student's t-test
t.test(x, y, var.equal=TRUE) # run the Student's t-test
##
## Two Sample t-test
##
## data: x and y
## t = -2.5849, df = 28, p-value = 0.01524
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.185008 -1.063474
## sample estimates:
## mean of x mean of y
## 98.95722 104.08146
# obtain t value for a two-sided test at a 0.05 significance level
qt(p=0.05/2, df=28, lower.tail= FALSE)
## [1] 2.048407
# Welch's t-test
t.test(x, y, var.equal=FALSE) # run the Welch's t-test
##
## Welch Two Sample t-test
##
## data: x and y
## t = -2.5954, df = 18.302, p-value = 0.01811
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.2672858 -0.9811964
## sample estimates:
## mean of x mean of y
## 98.95722 104.08146
# Wilcoxon Rank-Sum Test
wilcox.test(x, y, conf.int = TRUE)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 57, p-value = 0.06069
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -9.2376939 0.1978693
## sample estimates:
## difference in location
## -4.985041
##########################################
# section 3.3.6 ANOVA
##########################################
offers <- sample(c("offer1", "offer2", "nopromo"), size=500, replace=T)
# Simulated 500 observations of purchase sizes on the 3 offer options
purchasesize <- ifelse(offers=="offer1", rnorm(500, mean=80, sd=30),
ifelse(offers=="offer2", rnorm(500, mean=85, sd=30),
rnorm(500, mean=40, sd=30)))
# create a data frame of offer option and purchase size
offertest <- data.frame(offer=as.factor(offers),
purchase_amt=purchasesize)
# display a summary of offertest where offer="offer1"
summary(offertest[offertest$offer=="offer1",])
## offer purchase_amt
## nopromo: 0 Min. :-22.38
## offer1 :159 1st Qu.: 61.28
## offer2 : 0 Median : 78.88
## Mean : 79.44
## 3rd Qu.:100.39
## Max. :162.44
# display a summary of offertest where offer="offer2"
summary(offertest[offertest$offer=="offer2",])
## offer purchase_amt
## nopromo: 0 Min. : 4.491
## offer1 : 0 1st Qu.: 63.457
## offer2 :177 Median : 83.926
## Mean : 83.867
## 3rd Qu.:106.005
## Max. :160.672
# display a summary of offertest where offer="nopromo"
summary(offertest[offertest$offer=="nopromo",])
## offer purchase_amt
## nopromo:164 Min. :-51.33
## offer1 : 0 1st Qu.: 16.71
## offer2 : 0 Median : 37.47
## Mean : 36.01
## 3rd Qu.: 55.10
## Max. :117.94
# fit ANOVA test
model <- aov(purchase_amt ~ offers, data=offertest)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## offers 2 232431 116216 130.2 <2e-16 ***
## Residuals 497 443670 893
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey's Honest Significant Difference (HSD) on all
# pair-wise tests for difference of means
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = purchase_amt ~ offers, data = offertest)
##
## $offers
## diff lwr upr p adj
## offer1-nopromo 43.430655 35.613661 51.24765 0.0000000
## offer2-nopromo 47.855871 40.243366 55.46838 0.0000000
## offer2-offer1 4.425216 -3.249165 12.09960 0.3652731