library(readxl)
library(moments)
## Warning: package 'moments' was built under R version 4.1.3
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.1.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('compute.es')


#RESEARCH QUESTION
#Is there a method that significantly produces more creme?

#Hypothesis:
#H0: there is no difference between methods in terms of producing the most creme.
#HA: there is at least one method that differs than the others in terms of producing creme.  

#get data
espresso <- read.csv("EspressoData.csv",  header = TRUE)
espresso = espresso %>% mutate(brewmethod = brewmethod %>% as.factor())

plot(density(espresso$cereme))

qqnorm(espresso$cereme)

#D'Agostino skewness test:
agostino.test(espresso$cereme)
## 
##  D'Agostino skewness test
## 
## data:  espresso$cereme
## skew = 0.54679, z = 1.32787, p-value = 0.1842
## alternative hypothesis: data have a skewness
#Shapiro skewness
shapiro.test(espresso$cereme)
## 
##  Shapiro-Wilk normality test
## 
## data:  espresso$cereme
## W = 0.92201, p-value = 0.04414
#observation independence
eruption.lm = lm(cereme ~ brewmethod, data=espresso) 
eruption.res = resid(eruption.lm)
plot(espresso$cereme, eruption.res, ylab="Residuals", xlab="brewmethod", main="Espresso") 
abline(0, 0)

#variance equality
bartlett.test(espresso$cereme, espresso$brewmethod)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  espresso$cereme and espresso$brewmethod
## Bartlett's K-squared = 0.96331, df = 2, p-value = 0.6178
tapply(espresso$cereme, espresso$brewmethod, var)
##         1         2         3 
##  53.29088 102.02220  59.30182
#perform model
summary(aov(cereme ~ factor(brewmethod), data = espresso))
##                    Df Sum Sq Mean Sq F value  Pr(>F)    
## factor(brewmethod)  2   4065  2032.6   28.41 4.7e-07 ***
## Residuals          24   1717    71.5                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model <- aov(cereme ~ factor(brewmethod), espresso)

#Results suggest that method affects creme production, p < .001)

#post hoc test
pairwise.t.test(espresso$cereme, espresso$brewmethod, paired = FALSE, 
                p.adjust.method = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  espresso$cereme and espresso$brewmethod 
## 
##   1       2      
## 2 5.2e-07 -      
## 3 0.24    4.4e-05
## 
## P value adjustment method: bonferroni
#discussion with specifically which method differed
#Tukey’s Test
TukeyHSD(model)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cereme ~ factor(brewmethod), data = espresso)
## 
## $`factor(brewmethod)`
##      diff        lwr       upr     p adj
## 2-1  28.9  18.942931  38.85707 0.0000005
## 3-1   7.3  -2.657069  17.25707 0.1811000
## 3-2 -21.6 -31.557069 -11.64293 0.0000419
#method 1&2 and 2&3 differ widely; however, 1&3 don’t

#effect size
t = by(espresso$cereme, espresso$brewmethod, stat.desc)
mes(39.7, 61.3, 7.700768, 10.1006039, 9, 9)
## Mean Differences ES: 
##  
##  d [ 95 %CI] = -2.41 [ -3.62 , -1.19 ] 
##   var(d) = 0.38 
##   p-value(d) = 0 
##   U3(d) = 0.81 % 
##   CLES(d) = 4.45 % 
##   Cliff's Delta = -0.91 
##  
##  g [ 95 %CI] = -2.29 [ -3.45 , -1.14 ] 
##   var(g) = 0.35 
##   p-value(g) = 0 
##   U3(g) = 1.1 % 
##   CLES(g) = 5.27 % 
##  
##  Correlation ES: 
##  
##  r [ 95 %CI] = -0.79 [ -0.92 , -0.51 ] 
##   var(r) = 0.01 
##   p-value(r) = 0 
##  
##  z [ 95 %CI] = -1.06 [ -1.57 , -0.56 ] 
##   var(z) = 0.07 
##   p-value(z) = 0 
##  
##  Odds Ratio ES: 
##  
##  OR [ 95 %CI] = 0.01 [ 0 , 0.12 ] 
##   p-value(OR) = 0 
##  
##  Log OR [ 95 %CI] = -4.36 [ -6.56 , -2.16 ] 
##   var(lOR) = 1.26 
##   p-value(Log OR) = 0 
##  
##  Other: 
##  
##  NNT = -5.01 
##  Total N = 18
aggregate(espresso[, 1], list(espresso$brewmethod), mean)
##   Group.1    x
## 1       1 32.4
## 2       2 61.3
## 3       3 39.7
# Normality: fair
# Independence of observations: fair
# Variance Equality: Good
# Is there any significant result?  Yes.
# Do we need a post hoc? Yes.
# The result of the post hoc? Method 2 produces most
# Effect size?  large.

#SUMMARY
#All assumptions are met for ANOVA. We can see that the second brewing method produces more creme 
#We can also see that method 1&2 and 2&3 differ widely; however, 1&3 don’t.