#Importing the data
espresso <- read.csv("EspressoData.csv")
head(espresso)
## cereme brewmethod
## 1 36.64 1
## 2 39.65 1
## 3 37.74 1
## 4 35.96 1
## 5 38.52 1
## 6 21.02 1
# Research Question:
#Is there any significant difference amongst the 3 different brew methods in terms of the amount of cereme they generate
#HYPOTHESIS:
#H0: There is NO significant difference amongst the 3 different brew methods in terms of the amount of cereme they generate
#Ha: There IS significant difference amongst the 3 different brew methods in terms of the amount of cereme they generate
#Overall Distribution
plot(density(espresso$cereme))
qqnorm(espresso$cereme)
#D’Agostino Skewness Test:
library(moments)
agostino.test(espresso$cereme)
##
## D'Agostino skewness test
##
## data: espresso$cereme
## skew = 0.54679, z = 1.32787, p-value = 0.1842
## alternative hypothesis: data have a skewness
#Shapiro-Wilks Normality Test
shapiro.test(espresso$cereme)
##
## Shapiro-Wilk normality test
##
## data: espresso$cereme
## W = 0.92201, p-value = 0.04414
Since overall the data seems to be normal we can proceed to our next assumption
#Independence of Observations:
eruption.lm = lm(cereme ~ brewmethod, data = espresso)
eruption.res = resid(eruption.lm)
plot(espresso$cereme, eruption.res, xlab = "Brew Methods", ylab = "Residuals", main = "Cereme Quality")
abline(0,0)
#from above we can say that the data somewhat meets the assumptions, so now we will proceed with our next assumption
#Variance Equality ### Bartlett Test
bartlett.test(espresso$cereme, espresso$brewmethod)
##
## Bartlett test of homogeneity of variances
##
## data: espresso$cereme and espresso$brewmethod
## Bartlett's K-squared = 0.96331, df = 2, p-value = 0.6178
tapply(espresso$cereme,espresso$brewmethod, var)
## 1 2 3
## 53.29088 102.02220 59.30182
#we need to check the value for the (largest varience)/(smallest varience) <3 for the data to be balanced and for it to be ok for the ANOVA testing
#102/53.3 = 1.91 which is < 3 hence we can say that the data it is okay for ANOVA testing
#ANOVA TESTING
aov_model <- aov(cereme ~ factor(brewmethod), data = espresso)
summary(aov_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(brewmethod) 2 4065 2032.6 28.41 4.7e-07 ***
## Residuals 24 1717 71.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#POST-HOC TEST
library(pgirmess)
###BONFERRONI TEST
pairwise.t.test(espresso$cereme, espresso$brewmethod, paired = FALSE, p.adjust.method = "bonferroni" )
##
## Pairwise comparisons using t tests with pooled SD
##
## data: espresso$cereme and espresso$brewmethod
##
## 1 2
## 2 5.2e-07 -
## 3 0.24 4.4e-05
##
## P value adjustment method: bonferroni
#Here we can see that METHOD 1 and METHOD 2 has a significant difference
#and METHOD 2 and METHOD 3 has a significance difference
###KRUSKAL WALLIS
kruskalmc(cereme ~factor(brewmethod), data = espresso)
## Multiple comparison test after Kruskal-Wallis
## p.value: 0.05
## Comparisons
## obs.dif critical.dif difference
## 1-2 14.666667 8.957452 TRUE
## 1-3 3.666667 8.957452 FALSE
## 2-3 11.000000 8.957452 TRUE
#here we see that the METHOD 1 and METHOD 2 have TRUE against the difference column showing that there is a significant difference
#Similarly METHOD 2 and METHOD 3 also has a TRUE agaisnt the difference column showing significant difference
###TUKEY TEST
TukeyHSD(aov_model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cereme ~ factor(brewmethod), data = espresso)
##
## $`factor(brewmethod)`
## diff lwr upr p adj
## 2-1 28.9 18.942931 38.85707 0.0000005
## 3-1 7.3 -2.657069 17.25707 0.1811000
## 3-2 -21.6 -31.557069 -11.64293 0.0000419
#here we can see that METHOD 2 and METHOD 1 have a p value<0.001 showing that the are sigificantly different
#Similarly the METHOD 3 and METHOD 2 also have a p-value < 0.001 showing there are significant difference
#EFFECT SIZE
library(pastecs)
library(compute.es)
by(espresso$cereme, espresso$brewmethod, stat.desc)
## espresso$brewmethod: 1
## nbr.val nbr.null nbr.na min max range
## 9.0000000 0.0000000 0.0000000 21.0200000 39.6500000 18.6300000
## sum median mean SE.mean CI.mean.0.95 var
## 291.6000000 35.9600000 32.4000000 2.4333533 5.6113228 53.2908750
## std.dev coef.var
## 7.3000599 0.2253105
## ------------------------------------------------------------
## espresso$brewmethod: 2
## nbr.val nbr.null nbr.na min max range
## 9.0000000 0.0000000 0.0000000 46.6800000 73.1900000 26.5100000
## sum median mean SE.mean CI.mean.0.95 var
## 551.7000000 62.5300000 61.3000000 3.3668680 7.7640115 102.0222000
## std.dev coef.var
## 10.1006039 0.1647733
## ------------------------------------------------------------
## espresso$brewmethod: 3
## nbr.val nbr.null nbr.na min max range
## 9.000000 0.000000 0.000000 32.680000 56.190000 23.510000
## sum median mean SE.mean CI.mean.0.95 var
## 357.300000 37.120000 39.700000 2.566923 5.919334 59.301825
## std.dev coef.var
## 7.700768 0.193974
#METHOD 1: n = 9, M = 32.4, std.dev = 7.3
#METHOD 2: n = 9, M = 61.3, std.dev = 10.1
#METHOD 3: n = 9, M = 39.7, std.dev = 7.7
###mes1-2
mes(61.3,32.4,10.1,7.3,9,9)
## Mean Differences ES:
##
## d [ 95 %CI] = 3.28 [ 1.86 , 4.69 ]
## var(d) = 0.52
## p-value(d) = 0
## U3(d) = 99.95 %
## CLES(d) = 98.98 %
## Cliff's Delta = 0.98
##
## g [ 95 %CI] = 3.12 [ 1.78 , 4.47 ]
## var(g) = 0.47
## p-value(g) = 0
## U3(g) = 99.91 %
## CLES(g) = 98.64 %
##
## Correlation ES:
##
## r [ 95 %CI] = 0.87 [ 0.67 , 0.95 ]
## var(r) = 0
## p-value(r) = 0
##
## z [ 95 %CI] = 1.32 [ 0.81 , 1.83 ]
## var(z) = 0.07
## p-value(z) = 0
##
## Odds Ratio ES:
##
## OR [ 95 %CI] = 383.22 [ 29.45 , 4987.18 ]
## p-value(OR) = 0
##
## Log OR [ 95 %CI] = 5.95 [ 3.38 , 8.51 ]
## var(lOR) = 1.71
## p-value(Log OR) = 0
##
## Other:
##
## NNT = 1.26
## Total N = 18
###mes2-3
mes(61.3,39.7,10.1,7.7,9,9)
## Mean Differences ES:
##
## d [ 95 %CI] = 2.41 [ 1.19 , 3.62 ]
## var(d) = 0.38
## p-value(d) = 0
## U3(d) = 99.19 %
## CLES(d) = 95.55 %
## Cliff's Delta = 0.91
##
## g [ 95 %CI] = 2.29 [ 1.14 , 3.45 ]
## var(g) = 0.35
## p-value(g) = 0
## U3(g) = 98.9 %
## CLES(g) = 94.74 %
##
## Correlation ES:
##
## r [ 95 %CI] = 0.79 [ 0.51 , 0.92 ]
## var(r) = 0.01
## p-value(r) = 0
##
## z [ 95 %CI] = 1.06 [ 0.56 , 1.57 ]
## var(z) = 0.07
## p-value(z) = 0
##
## Odds Ratio ES:
##
## OR [ 95 %CI] = 78.46 [ 8.69 , 707.96 ]
## p-value(OR) = 0
##
## Log OR [ 95 %CI] = 4.36 [ 2.16 , 6.56 ]
## var(lOR) = 1.26
## p-value(Log OR) = 0
##
## Other:
##
## NNT = 1.35
## Total N = 18
##mes1-3
mes(39.7,32.4,7.7,7.3,9,9)
## Mean Differences ES:
##
## d [ 95 %CI] = 0.97 [ 0 , 1.95 ]
## var(d) = 0.25
## p-value(d) = 0.07
## U3(d) = 83.47 %
## CLES(d) = 75.43 %
## Cliff's Delta = 0.51
##
## g [ 95 %CI] = 0.93 [ 0 , 1.86 ]
## var(g) = 0.23
## p-value(g) = 0.07
## U3(g) = 82.29 %
## CLES(g) = 74.38 %
##
## Correlation ES:
##
## r [ 95 %CI] = 0.46 [ -0.01 , 0.76 ]
## var(r) = 0.03
## p-value(r) = 0.07
##
## z [ 95 %CI] = 0.5 [ -0.01 , 1 ]
## var(z) = 0.07
## p-value(z) = 0.07
##
## Odds Ratio ES:
##
## OR [ 95 %CI] = 5.84 [ 0.99 , 34.36 ]
## p-value(OR) = 0.07
##
## Log OR [ 95 %CI] = 1.76 [ -0.01 , 3.54 ]
## var(lOR) = 0.82
## p-value(Log OR) = 0.07
##
## Other:
##
## NNT = 2.84
## Total N = 18
#Cohen's d = 0.97
#Hedge's g = 0.93
#r = 0.46
#since the Cohen's d value is much greater than 0.4 we can say that the effect was large
#SUMMARY:
#From above we can conclude that the the brew methods 1 and 2 had a significant difference and brew method 2 and 3 also had a significant difference as their p- values in the Tukey Test was less than 0.001(p-val<0.001) and in the Kruskal-Wallis test both have TRUE against the difference column showing a significant difference.
#While checking the effect size we see that all the 3 groups : 1-2,2-3,1-3 all have a Cohen's d > 0.4 showing that the effect was large