510 11/5

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

week2 = read.csv("/Users/FrankLin/Desktop/HU 510/EspressoData.csv", header = TRUE)
week2

##    cereme brewmethod
## 1   36.64          1
## 2   39.65          1
## 3   37.74          1
## 4   35.96          1
## 5   38.52          1
## 6   21.02          1
## 7   24.81          1
## 8   34.18          1
## 9   23.08          1
## 10  70.84          2
## 11  46.68          2
## 12  73.19          2
## 13  57.78          2
## 14  48.61          2
## 15  72.77          2
## 16  65.04          2
## 17  62.53          2
## 18  54.26          2
## 19  56.19          3
## 20  36.67          3
## 21  35.35          3
## 22  40.11          3
## 23  33.52          3
## 24  37.12          3
## 25  37.33          3
## 26  32.68          3
## 27  48.33          3

## 1.First, I use brewmethod as my IV. 
## 2.The research question is “There is a significant difference among the three brew method: method 1, method 2 and method 3, in terms of their cereme they can make”
## 3.Hypothesis:
##   H0: there is no difference in terms of cereme it can make among the three methods.
##   H1: there is at least one group differs than the others in terms of cerem it can make among the three methods.

check the assumption of normality

plot(density(week2$cereme))

## to do a D'agostino skewness test. we don't want a skewness. 
library("moments")
agostino.test(week2$cereme)

## 
##  D'Agostino skewness test
## 
## data:  week2$cereme
## skew = 0.54679, z = 1.32787, p-value = 0.1842
## alternative hypothesis: data have a skewness

## we can see the p value and it's not significant, we fail reject the null hypothesis. the data don't have a skewness which is good!

qqnorm(week2$cereme)

shapiro.test(week2$cereme)

## 
##  Shapiro-Wilk normality test
## 
## data:  week2$cereme
## W = 0.92201, p-value = 0.04414

## we want the result to be significant so we can reject the Ho

## we went through all tests and I think overall the data meet the normality assumption. We can move to the next assumption

let’s move to check next assumption Independence of observations

## residual plot
eruption.lm = lm(cereme ~ brewmethod, data = week2)
eruption.res = resid(eruption.lm)

plot(week2$cereme, eruption.res, ylab = "Residuals", xlab = "brewmethod", main = "residual plot")
abline(0,0)

## From the plot, I think it roughly meets the assumption.

check the last assumption :Variance Equality

## equal variance want it fail to reject
bartlett.test(week2$cereme, week2$brewmethod)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  week2$cereme and week2$brewmethod
## Bartlett's K-squared = 0.96331, df = 2, p-value = 0.6178

tapply(week2$cereme, week2$brewmethod, var)

##         1         2         3 
##  53.29088 102.02220  59.30182

## it meets the third assumption

Let’s perform the model

## Anova
summary(aov(cereme ~ factor(brewmethod), data = week2))

##                    Df Sum Sq Mean Sq F value  Pr(>F)    
## factor(brewmethod)  2   4065  2032.6   28.41 4.7e-07 ***
## Residuals          24   1717    71.5                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model <- aov(cereme ~ factor(brewmethod), week2)
model

## Call:
##    aov(formula = cereme ~ factor(brewmethod), data = week2)
## 
## Terms:
##                 factor(brewmethod) Residuals
## Sum of Squares            4065.180  1716.919
## Deg. of Freedom                  2        24
## 
## Residual standard error: 8.458032
## Estimated effects may be unbalanced

summary(model)

##                    Df Sum Sq Mean Sq F value  Pr(>F)    
## factor(brewmethod)  2   4065  2032.6   28.41 4.7e-07 ***
## Residuals          24   1717    71.5                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## from the result we can get F(2,24)=28.41, P value<0.001, so it's significant we can reject the H0.

## post hoc tests
library(pgirmess)
pairwise.t.test(week2$cereme, week2$brewmethod, paired = FALSE, p.adjust.method = "bonferroni" )

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  week2$cereme and week2$brewmethod 
## 
##   1       2      
## 2 5.2e-07 -      
## 3 0.24    4.4e-05
## 
## P value adjustment method: bonferroni

kruskalmc(cereme ~ factor(brewmethod), data = week2)

## Multiple comparison test after Kruskal-Wallis 
## p.value: 0.05 
## Comparisons
##       obs.dif critical.dif difference
## 1-2 14.666667     8.957452       TRUE
## 1-3  3.666667     8.957452      FALSE
## 2-3 11.000000     8.957452       TRUE

TukeyHSD(model)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cereme ~ factor(brewmethod), data = week2)
## 
## $`factor(brewmethod)`
##      diff        lwr       upr     p adj
## 2-1  28.9  18.942931  38.85707 0.0000005
## 3-1   7.3  -2.657069  17.25707 0.1811000
## 3-2 -21.6 -31.557069 -11.64293 0.0000419

## effect size
library(pastecs)
library(compute.es)
by(week2$cereme, week2$brewmethod, stat.desc)

## week2$brewmethod: 1
##      nbr.val     nbr.null       nbr.na          min          max 
##    9.0000000    0.0000000    0.0000000   21.0200000   39.6500000 
##        range          sum       median         mean      SE.mean 
##   18.6300000  291.6000000   35.9600000   32.4000000    2.4333533 
## CI.mean.0.95          var      std.dev     coef.var 
##    5.6113228   53.2908750    7.3000599    0.2253105 
## -------------------------------------------------------- 
## week2$brewmethod: 2
##      nbr.val     nbr.null       nbr.na          min          max 
##    9.0000000    0.0000000    0.0000000   46.6800000   73.1900000 
##        range          sum       median         mean      SE.mean 
##   26.5100000  551.7000000   62.5300000   61.3000000    3.3668680 
## CI.mean.0.95          var      std.dev     coef.var 
##    7.7640115  102.0222000   10.1006039    0.1647733 
## -------------------------------------------------------- 
## week2$brewmethod: 3
##      nbr.val     nbr.null       nbr.na          min          max 
##     9.000000     0.000000     0.000000    32.680000    56.190000 
##        range          sum       median         mean      SE.mean 
##    23.510000   357.300000    37.120000    39.700000     2.566923 
## CI.mean.0.95          var      std.dev     coef.var 
##     5.919334    59.301825     7.700768     0.193974

mes(61.3,32.4,10.1,7.3,9,9)

## Mean Differences ES: 
##  
##  d [ 95 %CI] = 3.28 [ 1.75 , 4.81 ] 
##   var(d) = 0.52 
##   p-value(d) = 0 
##   U3(d) = 99.95 % 
##   CLES(d) = 98.98 % 
##   Cliff's Delta = 0.98 
##  
##  g [ 95 %CI] = 3.12 [ 1.67 , 4.58 ] 
##   var(g) = 0.47 
##   p-value(g) = 0 
##   U3(g) = 99.91 % 
##   CLES(g) = 98.64 % 
##  
##  Correlation ES: 
##  
##  r [ 95 %CI] = 0.85 [ 0.62 , 0.95 ] 
##   var(r) = 0 
##   p-value(r) = 0 
##  
##  z [ 95 %CI] = 1.27 [ 0.72 , 1.82 ] 
##   var(z) = 0.07 
##   p-value(z) = 0 
##  
##  Odds Ratio ES: 
##  
##  OR [ 95 %CI] = 383.22 [ 23.88 , 6148.86 ] 
##   p-value(OR) = 0 
##  
##  Log OR [ 95 %CI] = 5.95 [ 3.17 , 8.72 ] 
##   var(lOR) = 1.71 
##   p-value(Log OR) = 0 
##  
##  Other: 
##  
##  NNT = 1.26 
##  Total N = 18

mes(39.7,61.3,7.7,10.1,9,9)

## Mean Differences ES: 
##  
##  d [ 95 %CI] = -2.41 [ -3.72 , -1.09 ] 
##   var(d) = 0.38 
##   p-value(d) = 0 
##   U3(d) = 0.81 % 
##   CLES(d) = 4.45 % 
##   Cliff's Delta = -0.91 
##  
##  g [ 95 %CI] = -2.29 [ -3.54 , -1.04 ] 
##   var(g) = 0.35 
##   p-value(g) = 0 
##   U3(g) = 1.1 % 
##   CLES(g) = 5.26 % 
##  
##  Correlation ES: 
##  
##  r [ 95 %CI] = -0.77 [ -0.92 , -0.44 ] 
##   var(r) = 0.01 
##   p-value(r) = 0 
##  
##  z [ 95 %CI] = -1.02 [ -1.56 , -0.47 ] 
##   var(z) = 0.07 
##   p-value(z) = 0 
##  
##  Odds Ratio ES: 
##  
##  OR [ 95 %CI] = 0.01 [ 0 , 0.14 ] 
##   p-value(OR) = 0 
##  
##  Log OR [ 95 %CI] = -4.36 [ -6.74 , -1.98 ] 
##   var(lOR) = 1.26 
##   p-value(Log OR) = 0 
##  
##  Other: 
##  
##  NNT = -5.01 
##  Total N = 18

mes(32.4,39.7,7.3,7.7,9,9)

## Mean Differences ES: 
##  
##  d [ 95 %CI] = -0.97 [ -2.03 , 0.08 ] 
##   var(d) = 0.25 
##   p-value(d) = 0.07 
##   U3(d) = 16.53 % 
##   CLES(d) = 24.57 % 
##   Cliff's Delta = -0.51 
##  
##  g [ 95 %CI] = -0.93 [ -1.93 , 0.08 ] 
##   var(g) = 0.23 
##   p-value(g) = 0.07 
##   U3(g) = 17.71 % 
##   CLES(g) = 25.62 % 
##  
##  Correlation ES: 
##  
##  r [ 95 %CI] = -0.44 [ -0.77 , 0.08 ] 
##   var(r) = 0.03 
##   p-value(r) = 0.09 
##  
##  z [ 95 %CI] = -0.47 [ -1.02 , 0.08 ] 
##   var(z) = 0.07 
##   p-value(z) = 0.09 
##  
##  Odds Ratio ES: 
##  
##  OR [ 95 %CI] = 0.17 [ 0.03 , 1.16 ] 
##   p-value(OR) = 0.07 
##  
##  Log OR [ 95 %CI] = -1.76 [ -3.68 , 0.15 ] 
##   var(lOR) = 0.82 
##   p-value(Log OR) = 0.07 
##  
##  Other: 
##  
##  NNT = -6.05 
##  Total N = 18

## Summary
## Observations from the study were analyzed by conducting a one-way analysis of variance using R version 3.6.1. First, all assumptions are met and there is no adjustment made. Results suggest F(2,24)=28.41, P value<0.001
## Continue the discussion with specifically which groups differed, a Tukey’s hoc test was established. The result suggested that there is a significant difference between method 1 and method 2 (p < .001) and method 2 and method 3 (p< .001).  The method 2 can create the most cereme.