R Activity Section_3

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

##########################################
# section 3.3 Statistical Methods for Evaluation
##########################################


##########################################
# section 3.3.1 Hypothesis Testing
##########################################

# generate random observations from the two populations
x <- rnorm(10, mean=100, sd=5) # normal distribution centered at 100
y <- rnorm(20, mean=105, sd=5) # normal distribution centered at 105

# Student's t-test
t.test(x, y, var.equal=TRUE) # run the Student's t-test

## 
##  Two Sample t-test
## 
## data:  x and y
## t = -2.5849, df = 28, p-value = 0.01524
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.185008 -1.063474
## sample estimates:
## mean of x mean of y 
##  98.95722 104.08146

# obtain t value for a two-sided test at a 0.05 significance level
qt(p=0.05/2, df=28, lower.tail= FALSE)

## [1] 2.048407

# Welch's t-test
t.test(x, y, var.equal=FALSE) # run the Welch's t-test

## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = -2.5954, df = 18.302, p-value = 0.01811
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.2672858 -0.9811964
## sample estimates:
## mean of x mean of y 
##  98.95722 104.08146

# Wilcoxon Rank-Sum Test
wilcox.test(x, y, conf.int = TRUE)

## 
##  Wilcoxon rank sum test
## 
## data:  x and y
## W = 57, p-value = 0.06069
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -9.2376939  0.1978693
## sample estimates:
## difference in location 
##              -4.985041

##########################################
# section 3.3.6 ANOVA
##########################################

offers <- sample(c("offer1", "offer2", "nopromo"), size=500, replace=T)

# Simulated 500 observations of purchase sizes on the 3 offer options
purchasesize <- ifelse(offers=="offer1", rnorm(500, mean=80, sd=30),
                       ifelse(offers=="offer2", rnorm(500, mean=85, sd=30),
                              rnorm(500, mean=40, sd=30)))

# create a data frame of offer option and purchase size
offertest <- data.frame(offer=as.factor(offers),
                        purchase_amt=purchasesize)

# display a summary of offertest where offer="offer1"
summary(offertest[offertest$offer=="offer1",])

##      offer      purchase_amt   
##  nopromo:  0   Min.   :-22.38  
##  offer1 :159   1st Qu.: 61.28  
##  offer2 :  0   Median : 78.88  
##                Mean   : 79.44  
##                3rd Qu.:100.39  
##                Max.   :162.44

# display a summary of offertest where offer="offer2"
summary(offertest[offertest$offer=="offer2",])

##      offer      purchase_amt    
##  nopromo:  0   Min.   :  4.491  
##  offer1 :  0   1st Qu.: 63.457  
##  offer2 :177   Median : 83.926  
##                Mean   : 83.867  
##                3rd Qu.:106.005  
##                Max.   :160.672

# display a summary of offertest where offer="nopromo"
summary(offertest[offertest$offer=="nopromo",])

##      offer      purchase_amt   
##  nopromo:164   Min.   :-51.33  
##  offer1 :  0   1st Qu.: 16.71  
##  offer2 :  0   Median : 37.47  
##                Mean   : 36.01  
##                3rd Qu.: 55.10  
##                Max.   :117.94

# fit ANOVA test
model <- aov(purchase_amt ~ offers, data=offertest)
summary(model)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## offers        2 232431  116216   130.2 <2e-16 ***
## Residuals   497 443670     893                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tukey's Honest Significant Difference (HSD) on all
# pair-wise tests for difference of means
TukeyHSD(model)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = purchase_amt ~ offers, data = offertest)
## 
## $offers
##                     diff       lwr      upr     p adj
## offer1-nopromo 43.430655 35.613661 51.24765 0.0000000
## offer2-nopromo 47.855871 40.243366 55.46838 0.0000000
## offer2-offer1   4.425216 -3.249165 12.09960 0.3652731

R Activity Section_3_3

Raviteja Yerrapati

January 18, 2019

R Markdown