#Research Question: What is the effect of different brewing techniques on the amount of crème. #H0: Method 1 = Method 2 = Method 3 #H1: At least one pair of methods differ from one another.
espresso <- read.csv("C:/Users/apoor/Downloads/EspressoData (2).csv")
str(espresso)
## 'data.frame': 27 obs. of 2 variables:
## $ cereme : num 36.6 39.6 37.7 36 38.5 ...
## $ brewmethod: int 1 1 1 1 1 1 1 1 1 2 ...
summary(espresso)
## cereme brewmethod
## Min. :21.02 Min. :1
## 1st Qu.:35.66 1st Qu.:1
## Median :38.52 Median :2
## Mean :44.47 Mean :2
## 3rd Qu.:55.23 3rd Qu.:3
## Max. :73.19 Max. :3
#Check if the data is normally distributed
plot(density(espresso$cereme))
qqnorm(espresso$cereme)
#Check if there is any skewness in the data
library("moments")
agostino.test(espresso$cereme)
##
## D'Agostino skewness test
##
## data: espresso$cereme
## skew = 0.54679, z = 1.32787, p-value = 0.1842
## alternative hypothesis: data have a skewness
#Since the p value is > 0.05, we reject the alternative hypothesis.
#Normality test
shapiro.test(espresso$cereme)
##
## Shapiro-Wilk normality test
##
## data: espresso$cereme
## W = 0.92201, p-value = 0.04414
#Scatterplot of residuals
eruption.lm = lm(cereme ~ brewmethod, espresso)
eruption.res = resid(eruption.lm)
plot(espresso$cereme, eruption.res, ylab="Residuals", xlab="brewmethod", main="Title")
abline(0, 0)
bartlett.test(espresso$cereme, espresso$brewmethod)
##
## Bartlett test of homogeneity of variances
##
## data: espresso$cereme and espresso$brewmethod
## Bartlett's K-squared = 0.96331, df = 2, p-value = 0.6178
tapply(espresso$cereme, espresso$brewmethod, var)
## 1 2 3
## 53.29088 102.02220 59.30182
#We need to factorize as there are categorical variables in the data set.
m = (aov(cereme ~ factor(brewmethod), data = espresso))
summary(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(brewmethod) 2 4065 2032.6 28.41 4.7e-07 ***
## Residuals 24 1717 71.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#We see that there is high corelation between the brewing method and the amount of cereme. So we will now seehow each brewing technique relates to each other using the pairwise t-test.
library(pgirmess)
pairwise.t.test(espresso$cereme, espresso$brewmethod, p.adjust.method = "bonferroni", paired = FALSE)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: espresso$cereme and espresso$brewmethod
##
## 1 2
## 2 5.2e-07 -
## 3 0.24 4.4e-05
##
## P value adjustment method: bonferroni
#Based on the values here, we can see technique 1 and 3 are similar, while 2 is not.
TukeyHSD(m)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cereme ~ factor(brewmethod), data = espresso)
##
## $`factor(brewmethod)`
## diff lwr upr p adj
## 2-1 28.9 18.942931 38.85707 0.0000005
## 3-1 7.3 -2.657069 17.25707 0.1811000
## 3-2 -21.6 -31.557069 -11.64293 0.0000419
aggregate(espresso[, 1], list(espresso$brewmethod), mean)
## Group.1 x
## 1 1 32.4
## 2 2 61.3
## 3 3 39.7
#Summary: After running the analysis, we can say that the Espresso Data holds all the assumptions for ANOVA. We saw that the second brewing method produces the highest cereme, hence we failed to reject HO as the methods differ from each other.