Homework 6

library(ggplot2)
library(agricolae)
library(multcomp)
library(dplyr)
library(ggpubr)

10.

dat3.10 <- data.frame(
  Sham = c(4.51,7.95,4.97,3.00,7.97,2.23,3.95,5.64,9.35,6.52,4.96,6.10,7.19,4.03,2.72,9.19,5.17,5.70,5.85,6.45),
  PEMF1h_day =c(5.32,6.00,5.12,7.08,5.48,6.52,4.09,6.28,7.77,5.68,8.47,4.58,4.11,5.72,5.91,6.89,6.99,4.98,9.94,6.38),
  PEMF2h_day = c( 4.73 ,5.81 , 5.69 , 3.86 , 4.06 , 6.56, 8.34, 3.01 , 6.71 ,6.51 , 1.70, 5.89 , 6.55 , 5.34 , 5.88 , 7.50 , 3.28 , 5.38 , 7.30 ,5.46),
  PEMF4h_day = c(7.03, 4.65 ,6.65,5.49 ,6.98,4.85,7.26,5.92 ,5.58 ,7.91,4.90,4.54 ,8.18 ,5.42 ,6.03 ,7.04 ,5.17 ,7.60 ,7.90, 7.91),
  stringsAsFactors = FALSE
)
wdat3.10 <- stack(dat3.10) # because the dataframe above seperated the factors by columns
wdat3.10 <- transform(wdat3.10, ind = as.factor(wdat3.10$ind), values = as.numeric(wdat3.10$values))

Checking for assumptions:
Normality:

ggqqplot(wdat3.10$values)

As the graph shows, the data is normaly distributed.
Equal Variances:

boxplot(dat3.10)

Using the boxplot, the variability is represented by the vertical size of the boxplots. Looking at them we can see that there’s very little variance.

Independence:
Couldn’t find how to do the time plot you mentioned online and when searching up on independence I get some alont the lines of “This assumption can only be satisfied by correctly randomising your experimental design” everytime.

Pairwise Comparisons:

TukeyHSD(aov(wdat3.10$values~wdat3.10$ind), dat=wdat3.10)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = wdat3.10$values ~ wdat3.10$ind)
## 
## $`wdat3.10$ind`
##                          diff        lwr       upr     p adj
## PEMF1h_day-Sham        0.4930 -0.8410774 1.8270774 0.7664479
## PEMF2h_day-Sham       -0.1945 -1.5285774 1.1395774 0.9807516
## PEMF4h_day-Sham        0.6780 -0.6560774 2.0120774 0.5437964
## PEMF2h_day-PEMF1h_day -0.6875 -2.0215774 0.6465774 0.5320811
## PEMF4h_day-PEMF1h_day  0.1850 -1.1490774 1.5190774 0.9833473
## PEMF4h_day-PEMF2h_day  0.8725 -0.4615774 2.2065774 0.3216071

The “p adj” values mean that the means are not statistically significant.

LSDTest <- LSD.test(wdat3.10$values,wdat3.10$ind,DFerror = 76,MSerror = 2.579)
LSDTest

## $statistics
##   MSerror Df     Mean       CV  t.value      LSD
##     2.579 76 5.916625 27.14261 1.991673 1.011448
## 
## $parameters
##         test p.ajusted       name.t ntr alpha
##   Fisher-LSD      none wdat3.10$ind   4  0.05
## 
## $means
##            wdat3.10$values      std  r      LCL      UCL  Min  Max    Q25
## PEMF1h_day          6.1655 1.443944 20 5.450298 6.880702 4.09 9.94 5.2700
## PEMF2h_day          5.4780 1.644786 20 4.762798 6.193202 1.70 8.34 4.5625
## PEMF4h_day          6.3505 1.231821 20 5.635298 7.065702 4.54 8.18 5.3575
## Sham                5.6725 2.002422 20 4.957298 6.387702 2.23 9.35 4.3900
##              Q50    Q75
## PEMF1h_day 5.955 6.9150
## PEMF2h_day 5.750 6.5525
## PEMF4h_day 6.340 7.3450
## Sham       5.670 6.6875
## 
## $comparison
## NULL
## 
## $groups
##            wdat3.10$values groups
## PEMF4h_day          6.3505      a
## PEMF1h_day          6.1655      a
## Sham                5.6725      a
## PEMF2h_day          5.4780      a
## 
## attr(,"class")
## [1] "group"

The minimum differen between the pairs is 1.011. Since all the pairs are within that, grouped by the same group “a”, the pairs are not significantly different.

11.

dat3.11 <- data.frame(
  "1" = c(3129, 3000, 2865, 2890),
  "2" = c(3200, 3300, 2975, 3150),
  "3" = c(2800, 2900, 2985, 3050),
  "4" = c(2600, 2700, 2600, 2765),
stringsAsFactors = FALSE
)
wdat3.11 <- stack(dat3.11)

Checking for assumptions:
Normality:

ggqqplot(wdat3.11$values)

As the graph shows, the data is normaly distributed.
Equal Variances:

boxplot(dat3.11)

TukeyHSD(aov(wdat3.11$values~as.factor(wdat3.11$ind)), dat=wdat3.11)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = wdat3.11$values ~ as.factor(wdat3.11$ind))
## 
## $`as.factor(wdat3.11$ind)`
##          diff        lwr        upr     p adj
## X2-X1  185.25  -52.50029  423.00029 0.1493561
## X3-X1  -37.25 -275.00029  200.50029 0.9652776
## X4-X1 -304.75 -542.50029  -66.99971 0.0115923
## X3-X2 -222.50 -460.25029   15.25029 0.0693027
## X4-X2 -490.00 -727.75029 -252.24971 0.0002622
## X4-X3 -267.50 -505.25029  -29.74971 0.0261838

From the test, we can tell that the differences X2-X1, X3-X1, and X3-X2 are not statistically significant. But X4-X1, X4-X2, and X4-X3 are statistically significant.

pairwise.t.test(wdat3.11$values, as.factor(wdat3.11$ind), p.adjust.method="bonf")

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  wdat3.11$values and as.factor(wdat3.11$ind) 
## 
##    X1      X2      X3     
## X2 0.23544 -       -      
## X3 1.00000 0.10019 -      
## X4 0.01503 0.00031 0.03530
## 
## P value adjustment method: bonferroni

Here we get the same results but the numbers are different. X3-X1 gives us 1.00 while in TukeyHSD we get .9652. The difference between these two numbers give me some idea at why TukeyHSD is considered perfect. Because of the precision.

16.

dat3.16 <- data.frame(
  "20g" = c(24, 28, 37, 30),
  "30g" = c(37, 44, 31, 35),
  "40g" = c(42, 47, 52, 38),
  stringsAsFactors = FALSE
)
wdat3.16 <- stack(dat3.16)
wdat3.16 <- transform(wdat3.16, ind = as.factor(wdat3.16$ind), values = as.numeric(wdat3.16$values))

Checking for assumptions:
Normality:

ggqqplot(wdat3.16$values)

As the graph shows, the data is normaly distributed.
Equal Variances:

boxplot(dat3.16)

TukeyHSD(aov(wdat3.16$values~as.factor(wdat3.16$ind)), dat=wdat3.16)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = wdat3.16$values ~ as.factor(wdat3.16$ind))
## 
## $`as.factor(wdat3.16$ind)`
##           diff       lwr      upr     p adj
## X30g-X20g    7 -4.172869 18.17287 0.2402975
## X40g-X20g   15  3.827131 26.17287 0.0114434
## X40g-X30g    8 -3.172869 19.17287 0.1680265

From TukeyHSD test, we get that the differences between 30g and 20g, and 40g and 30g are not statistically significant. While the difference between 40g and 20g is statistically significant.

ScheffeTest <- scheffe.test(aov(wdat3.16$values~wdat3.16$ind), "wdat3.16$ind")
ScheffeTest

## $statistics
##    MSerror Df        F     Mean       CV  Scheffe CriticalDifference
##   32.02778  9 4.256495 37.08333 15.26106 2.917703           11.67588
## 
## $parameters
##      test       name.t ntr alpha
##   Scheffe wdat3.16$ind   3  0.05
## 
## $means
##      wdat3.16$values      std r Min Max Q25  Q50   Q75
## X20g           29.75 5.439056 4  24  37  27 29.0 31.75
## X30g           36.75 5.439056 4  31  44  34 36.0 38.75
## X40g           44.75 6.075909 4  38  52  41 44.5 48.25
## 
## $comparison
## NULL
## 
## $groups
##      wdat3.16$values groups
## X40g           44.75      a
## X30g           36.75     ab
## X20g           29.75      b
## 
## attr(,"class")
## [1] "group"

This test tell us that means of 40g and 20g are statistically significant while the mean of 30g is not statistically significant from the rest, 40g and 20g. which again corresponds with Tukey’s test.

17.

dat3.17 <- data.frame(
  Subcompact = c(3, 5, 3, 7, 6, 5, 3, 2, 1, 6 ),
  Compact = c(1, 3, 4, 7, 5, 6, 3, 2, 1, 7 ),
  Midsize = c(4, 1, 3, 5, 7, 1, 2, 4, 2, 7 ),
  Fullsize = c(3, 5, 7, 5, 10, 3, 4, 7, 2, 7),
  stringsAsFactors = FALSE
)
wdat3.17 <- stack(dat3.17)
wdat3.16 <- transform(wdat3.17, ind = as.factor(wdat3.17$ind), values = as.numeric(wdat3.17$values))

Checking for assumptions:
Normality:

ggqqplot(wdat3.17$values)

As the graph shows, the data is normaly distributed.
Equal Variances:

boxplot(dat3.17)

TukeyHSD(aov(wdat3.17$values~wdat3.17$ind), dat=wdat3.17)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = wdat3.17$values ~ wdat3.17$ind)
## 
## $`wdat3.17$ind`
##                     diff        lwr      upr     p adj
## Compact-Subcompact  -0.2 -2.8954706 2.495471 0.9971215
## Midsize-Subcompact  -0.5 -3.1954706 2.195471 0.9586389
## Fullsize-Subcompact  1.2 -1.4954706 3.895471 0.6314346
## Midsize-Compact     -0.3 -2.9954706 2.395471 0.9904787
## Fullsize-Compact     1.4 -1.2954706 4.095471 0.5082612
## Fullsize-Midsize     1.7 -0.9954706 4.395471 0.3392640

Tukey’s HSD test tells us that none of the difference in means are statistically significant.

aov <- aov(values~ind,dat=wdat3.17)
summary(glht(aov, linfct = mcp(ind = "Dunnett")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Dunnett Contrasts
## 
## 
## Fit: aov(formula = values ~ ind, data = wdat3.17)
## 
## Linear Hypotheses:
##                            Estimate Std. Error t value Pr(>|t|)
## Compact - Subcompact == 0    -0.200      1.001  -0.200    0.994
## Midsize - Subcompact == 0    -0.500      1.001  -0.500    0.925
## Fullsize - Subcompact == 0    1.200      1.001   1.199    0.495
## (Adjusted p values reported -- single-step method)

The adjusted p-values suggest that all of the differences are not statistically significant.

Homework 6

Kevin Torres

March 14, 2019