library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   2.0.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Question 1 (2 sources of variation)

Reading the data

data <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_1.csv")

Exploring and manipulating data

data$color2 <- as.factor(data$color)
data$time2 <- as.factor(data$time)

data %>%
  group_by(color2, time2) %>%
  summarise(N = n(), Mean = mean(outcome, na.rm=TRUE),  SD = sd(outcome, na.rm=TRUE))
## `summarise()` has grouped output by 'color2'. You can override using the `.groups` argument.
## # A tibble: 6 x 5
## # Groups:   color2 [3]
##   color2 time2     N  Mean    SD
##   <fct>  <fct> <int> <dbl> <dbl>
## 1 blue   no       10  8.39 0.977
## 2 blue   yes      10  2.97 1.68 
## 3 green  no       10  5.09 0.820
## 4 green  yes      10  7.08 1.27 
## 5 yellow no       10 11.3  1.05 
## 6 yellow yes      10  1.96 2.31

Model without interaction

modela <- aov(outcome ~ color2 + time2, data = data)
summary(modela)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## color2       2    9.6    4.79   0.602    0.551    
## time2        1  274.2  274.18  34.434 2.49e-07 ***
## Residuals   56  445.9    7.96                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model with interaction

modelb<- aov(outcome ~ color2 + time2 + color2:time2, data = data)
summary(modelb)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## color2        2    9.6    4.79   2.301     0.11    
## time2         1  274.2  274.18 131.600 4.25e-16 ***
## color2:time2  2  333.4  166.69  80.010  < 2e-16 ***
## Residuals    54  112.5    2.08                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD for model with interaction

TukeyHSD(modelb)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = outcome ~ color2 + time2 + color2:time2, data = data)
## 
## $color2
##                diff        lwr      upr     p adj
## green-blue   0.4050 -0.6950245 1.505025 0.6505841
## yellow-blue  0.9745 -0.1255245 2.074525 0.0922706
## yellow-green 0.5695 -0.5305245 1.669525 0.4307567
## 
## $time2
##             diff       lwr       upr p adj
## yes-no -4.275333 -5.022523 -3.528144     0
## 
## $`color2:time2`
##                        diff          lwr        upr     p adj
## green:no-blue:no     -3.301  -5.20814725 -1.3938528 0.0000608
## yellow:no-blue:no     2.956   1.04885275  4.8631472 0.0003846
## blue:yes-blue:no     -5.425  -7.33214725 -3.5178528 0.0000000
## green:yes-blue:no    -1.314  -3.22114725  0.5931472 0.3363986
## yellow:yes-blue:no   -6.432  -8.33914725 -4.5248528 0.0000000
## yellow:no-green:no    6.257   4.34985275  8.1641472 0.0000000
## blue:yes-green:no    -2.124  -4.03114725 -0.2168528 0.0207284
## green:yes-green:no    1.987   0.07985275  3.8941472 0.0365060
## yellow:yes-green:no  -3.131  -5.03814725 -1.2238528 0.0001524
## blue:yes-yellow:no   -8.381 -10.28814725 -6.4738528 0.0000000
## green:yes-yellow:no  -4.270  -6.17714725 -2.3628528 0.0000003
## yellow:yes-yellow:no -9.388 -11.29514725 -7.4808528 0.0000000
## green:yes-blue:yes    4.111   2.20385275  6.0181472 0.0000006
## yellow:yes-blue:yes  -1.007  -2.91414725  0.9001472 0.6277450
## yellow:yes-green:yes -5.118  -7.02514725 -3.2108528 0.0000000

Responce based on my result

  1. The test statistic for the interaction effect is larger than 3.168. ( Test statistics is 80.010)
  2. The test statistic for the main effect of time, for the model with an interaction (131.600), is larger than the main effect of time for a model without an interaction (34.434).
  3. The test statistic for color for the model with an interaction (2.301) is larger than the main effect of color for a model without an interaction (0.602).
  4. The adjusted p-value in TukeyHSD test for the model with an interaction indicates that the largest observed difference is between group yellow and group blue.(0.9745)
  5. The mean squared error for the model with an interaction (2.08), is smaller than the mean squared error for a model without an interaction ( 7.96).

Question 2 (Block Design)

Reading the data

data2 <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_2.csv")

Exploring and manipulating data

head(data2)
##           y trt  block
## 1 1.1926725   a block1
## 2 0.4913769   a block1
## 3 5.3965574   b block1
## 4 6.1324357   b block1
## 5 0.1325303   c block1
## 6 0.4126703   c block1
str(data2)
## 'data.frame':    18 obs. of  3 variables:
##  $ y    : num  1.193 0.491 5.397 6.132 0.133 ...
##  $ trt  : chr  "a" "a" "b" "b" ...
##  $ block: chr  "block1" "block1" "block1" "block1" ...
data2$trt2 <- as.factor(data2$trt)
data2$block2 <- as.factor(data2$block)

data2 %>%
  group_by(trt2,block2) %>%
  summarise(N = n(), Mean = mean(y, na.rm=TRUE),  SD = sd(y, na.rm=TRUE))
## `summarise()` has grouped output by 'trt2'. You can override using the `.groups` argument.
## # A tibble: 9 x 5
## # Groups:   trt2 [3]
##   trt2  block2     N   Mean    SD
##   <fct> <fct>  <int>  <dbl> <dbl>
## 1 a     block1     2  0.842 0.496
## 2 a     block2     2  1.60  0.672
## 3 a     block3     2  0.675 0.361
## 4 b     block1     2  5.76  0.520
## 5 b     block2     2  5.24  0.398
## 6 b     block3     2  4.79  0.632
## 7 c     block1     2  0.273 0.198
## 8 c     block2     2 -0.916 0.964
## 9 c     block3     2 -0.685 0.533

Model without interaction

bmodela <- lm(y ~ block2 + trt2, data = data2)
anova(bmodela)
## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## block2     2   1.480   0.740   1.9379    0.1834    
## trt2       2 105.110  52.555 137.6540 1.785e-09 ***
## Residuals 13   4.963   0.382                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model with interaction

bmodelb <- lm(y ~ block2 + trt2 + block2:trt2, data = data2)
anova(bmodelb)
## Analysis of Variance Table
## 
## Response: y
##             Df  Sum Sq Mean Sq  F value    Pr(>F)    
## block2       2   1.480   0.740   2.2875    0.1573    
## trt2         2 105.110  52.555 162.4850 8.658e-08 ***
## block2:trt2  4   2.052   0.513   1.5863    0.2592    
## Residuals    9   2.911   0.323                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD for block model with interaction

library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
summary(glht(bmodelb,  linfct=mcp(trt2="Tukey")))
## Warning in mcp2matrix(model, linfct = linfct): covariate interactions found --
## default contrast might be inappropriate
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = y ~ block2 + trt2 + block2:trt2, data = data2)
## 
## Linear Hypotheses:
##            Estimate Std. Error t value Pr(>|t|)    
## b - a == 0   4.9225     0.5687   8.655   <0.001 ***
## c - a == 0  -0.5694     0.5687  -1.001    0.594    
## c - b == 0  -5.4919     0.5687  -9.657   <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Responce based on my result

  1. The adjusted p-value in TukeyHSD test (<0.001 ***), for the model with an interaction, indicates that the largest observed difference in the main effect of treatment is between b and c group (-5.4919).
  2. The mean squared error for the model with an interaction (0.323), is smaller than the mean squared error for a model without an interaction ( 0.382).
  3. The mean squared error for the model with an interaction suggests that the block is helpful in the model. (since it is smaller, it is helping us reduce variation. Therefore helpful)
  4. The test statistic for the main effect of the treatment, for the model with an interaction(162.4850), is larger than the main effect of the main effect of the treatment for a model without an interaction (137.6540).

Question 3 (Ancova)

Reading the data

data3 <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_3.csv")

Exploring and manipulating data

head(data3)
##     treat      pre      post
## 1 Group A 45.00901  8.211521
## 2 Group A 29.50241 23.563446
## 3 Group A 35.25924 18.953642
## 4 Group A 24.08080 21.328479
## 5 Group A 25.41393 23.719227
## 6 Group A 50.83524  8.650689
str(data3)
## 'data.frame':    40 obs. of  3 variables:
##  $ treat: chr  "Group A" "Group A" "Group A" "Group A" ...
##  $ pre  : num  45 29.5 35.3 24.1 25.4 ...
##  $ post : num  8.21 23.56 18.95 21.33 23.72 ...
data3$treat2 <- as.factor(data3$treat)

data3 %>%
  group_by(treat2) %>%
  summarise(N = n(), Mean = mean(post,na.rm=TRUE),  SD = sd(post, na.rm=TRUE))
## # A tibble: 2 x 4
##   treat2      N   Mean    SD
##   <fct>   <int>  <dbl> <dbl>
## 1 Group A    20 17.8    7.95
## 2 Group B    20  0.413  8.40

Ancova model without interaction

aov1 <- aov(post ~ treat + pre, data = data3)
summary(aov1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treat        1 3036.7  3036.7   205.0  < 2e-16 ***
## pre          1 1991.6  1991.6   134.5 6.99e-14 ***
## Residuals   37  548.1    14.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#TukeyHSD(aov1)

Ancova model with interaction

aov2 <- aov(post ~ treat * pre, data = data3)
summary(aov2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treat        1 3036.7  3036.7 206.067  < 2e-16 ***
## pre          1 1991.6  1991.6 135.150 9.59e-14 ***
## treat:pre    1   17.6    17.6   1.191    0.282    
## Residuals   36  530.5    14.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#TukeyHSD(aov2)

Tukey for model without interaction

aov1 <- aov(post ~ treat + pre, data = data3)
TukeyHSD(aov1)
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: pre
## Warning in TukeyHSD.aov(aov1): 'which' specified some non-factors which will be
## dropped
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = post ~ treat + pre, data = data3)
## 
## $treat
##                      diff      lwr       upr p adj
## Group B-Group A -17.42607 -19.8921 -14.96005     0

Tukey for model with interaction

aov2 <- aov(post ~ treat * pre, data = data3)
TukeyHSD(aov2)
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: pre
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: treat,
## pre
## Warning in TukeyHSD.aov(aov2): 'which' specified some non-factors which will be
## dropped
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = post ~ treat * pre, data = data3)
## 
## $treat
##                      diff       lwr      upr p adj
## Group B-Group A -17.42607 -19.88805 -14.9641     0

Responce based on my result

  1. The mean squared error for the model with an interaction (14.7), is smaller than the mean squared error for a model without an interaction (14.8).
  2. The mean squared error for the model with an interaction suggests that the interaction is needed in the model.(It reduced residual error, so it is useful)