library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 2.0.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Question 1 (2 sources of variation)
Reading the data
data <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_1.csv")
Exploring and manipulating data
data$color2 <- as.factor(data$color)
data$time2 <- as.factor(data$time)
data %>%
group_by(color2, time2) %>%
summarise(N = n(), Mean = mean(outcome, na.rm=TRUE), SD = sd(outcome, na.rm=TRUE))
## `summarise()` has grouped output by 'color2'. You can override using the `.groups` argument.
## # A tibble: 6 x 5
## # Groups: color2 [3]
## color2 time2 N Mean SD
## <fct> <fct> <int> <dbl> <dbl>
## 1 blue no 10 8.39 0.977
## 2 blue yes 10 2.97 1.68
## 3 green no 10 5.09 0.820
## 4 green yes 10 7.08 1.27
## 5 yellow no 10 11.3 1.05
## 6 yellow yes 10 1.96 2.31
Model without interaction
modela <- aov(outcome ~ color2 + time2, data = data)
summary(modela)
## Df Sum Sq Mean Sq F value Pr(>F)
## color2 2 9.6 4.79 0.602 0.551
## time2 1 274.2 274.18 34.434 2.49e-07 ***
## Residuals 56 445.9 7.96
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model with interaction
modelb<- aov(outcome ~ color2 + time2 + color2:time2, data = data)
summary(modelb)
## Df Sum Sq Mean Sq F value Pr(>F)
## color2 2 9.6 4.79 2.301 0.11
## time2 1 274.2 274.18 131.600 4.25e-16 ***
## color2:time2 2 333.4 166.69 80.010 < 2e-16 ***
## Residuals 54 112.5 2.08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD for model with interaction
TukeyHSD(modelb)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = outcome ~ color2 + time2 + color2:time2, data = data)
##
## $color2
## diff lwr upr p adj
## green-blue 0.4050 -0.6950245 1.505025 0.6505841
## yellow-blue 0.9745 -0.1255245 2.074525 0.0922706
## yellow-green 0.5695 -0.5305245 1.669525 0.4307567
##
## $time2
## diff lwr upr p adj
## yes-no -4.275333 -5.022523 -3.528144 0
##
## $`color2:time2`
## diff lwr upr p adj
## green:no-blue:no -3.301 -5.20814725 -1.3938528 0.0000608
## yellow:no-blue:no 2.956 1.04885275 4.8631472 0.0003846
## blue:yes-blue:no -5.425 -7.33214725 -3.5178528 0.0000000
## green:yes-blue:no -1.314 -3.22114725 0.5931472 0.3363986
## yellow:yes-blue:no -6.432 -8.33914725 -4.5248528 0.0000000
## yellow:no-green:no 6.257 4.34985275 8.1641472 0.0000000
## blue:yes-green:no -2.124 -4.03114725 -0.2168528 0.0207284
## green:yes-green:no 1.987 0.07985275 3.8941472 0.0365060
## yellow:yes-green:no -3.131 -5.03814725 -1.2238528 0.0001524
## blue:yes-yellow:no -8.381 -10.28814725 -6.4738528 0.0000000
## green:yes-yellow:no -4.270 -6.17714725 -2.3628528 0.0000003
## yellow:yes-yellow:no -9.388 -11.29514725 -7.4808528 0.0000000
## green:yes-blue:yes 4.111 2.20385275 6.0181472 0.0000006
## yellow:yes-blue:yes -1.007 -2.91414725 0.9001472 0.6277450
## yellow:yes-green:yes -5.118 -7.02514725 -3.2108528 0.0000000
Responce based on my result
- The test statistic for the interaction effect is larger than 3.168. ( Test statistics is 80.010)
- The test statistic for the main effect of time, for the model with an interaction (131.600), is larger than the main effect of time for a model without an interaction (34.434).
- The test statistic for color for the model with an interaction (2.301) is larger than the main effect of color for a model without an interaction (0.602).
- The adjusted p-value in TukeyHSD test for the model with an interaction indicates that the largest observed difference is between group yellow and group blue.(0.9745)
- The mean squared error for the model with an interaction (2.08), is smaller than the mean squared error for a model without an interaction ( 7.96).
Question 2 (Block Design)
Reading the data
data2 <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_2.csv")
Exploring and manipulating data
head(data2)
## y trt block
## 1 1.1926725 a block1
## 2 0.4913769 a block1
## 3 5.3965574 b block1
## 4 6.1324357 b block1
## 5 0.1325303 c block1
## 6 0.4126703 c block1
str(data2)
## 'data.frame': 18 obs. of 3 variables:
## $ y : num 1.193 0.491 5.397 6.132 0.133 ...
## $ trt : chr "a" "a" "b" "b" ...
## $ block: chr "block1" "block1" "block1" "block1" ...
data2$trt2 <- as.factor(data2$trt)
data2$block2 <- as.factor(data2$block)
data2 %>%
group_by(trt2,block2) %>%
summarise(N = n(), Mean = mean(y, na.rm=TRUE), SD = sd(y, na.rm=TRUE))
## `summarise()` has grouped output by 'trt2'. You can override using the `.groups` argument.
## # A tibble: 9 x 5
## # Groups: trt2 [3]
## trt2 block2 N Mean SD
## <fct> <fct> <int> <dbl> <dbl>
## 1 a block1 2 0.842 0.496
## 2 a block2 2 1.60 0.672
## 3 a block3 2 0.675 0.361
## 4 b block1 2 5.76 0.520
## 5 b block2 2 5.24 0.398
## 6 b block3 2 4.79 0.632
## 7 c block1 2 0.273 0.198
## 8 c block2 2 -0.916 0.964
## 9 c block3 2 -0.685 0.533
Model without interaction
bmodela <- lm(y ~ block2 + trt2, data = data2)
anova(bmodela)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## block2 2 1.480 0.740 1.9379 0.1834
## trt2 2 105.110 52.555 137.6540 1.785e-09 ***
## Residuals 13 4.963 0.382
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model with interaction
bmodelb <- lm(y ~ block2 + trt2 + block2:trt2, data = data2)
anova(bmodelb)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## block2 2 1.480 0.740 2.2875 0.1573
## trt2 2 105.110 52.555 162.4850 8.658e-08 ***
## block2:trt2 4 2.052 0.513 1.5863 0.2592
## Residuals 9 2.911 0.323
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD for block model with interaction
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
summary(glht(bmodelb, linfct=mcp(trt2="Tukey")))
## Warning in mcp2matrix(model, linfct = linfct): covariate interactions found --
## default contrast might be inappropriate
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = y ~ block2 + trt2 + block2:trt2, data = data2)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## b - a == 0 4.9225 0.5687 8.655 <0.001 ***
## c - a == 0 -0.5694 0.5687 -1.001 0.594
## c - b == 0 -5.4919 0.5687 -9.657 <0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Responce based on my result
- The adjusted p-value in TukeyHSD test (<0.001 ***), for the model with an interaction, indicates that the largest observed difference in the main effect of treatment is between b and c group (-5.4919).
- The mean squared error for the model with an interaction (0.323), is smaller than the mean squared error for a model without an interaction ( 0.382).
- The mean squared error for the model with an interaction suggests that the block is helpful in the model. (since it is smaller, it is helping us reduce variation. Therefore helpful)
- The test statistic for the main effect of the treatment, for the model with an interaction(162.4850), is larger than the main effect of the main effect of the treatment for a model without an interaction (137.6540).
Question 3 (Ancova)
Reading the data
data3 <- read.csv("C:/Users/mmojoyinola/Downloads/assig2_3.csv")
Exploring and manipulating data
head(data3)
## treat pre post
## 1 Group A 45.00901 8.211521
## 2 Group A 29.50241 23.563446
## 3 Group A 35.25924 18.953642
## 4 Group A 24.08080 21.328479
## 5 Group A 25.41393 23.719227
## 6 Group A 50.83524 8.650689
str(data3)
## 'data.frame': 40 obs. of 3 variables:
## $ treat: chr "Group A" "Group A" "Group A" "Group A" ...
## $ pre : num 45 29.5 35.3 24.1 25.4 ...
## $ post : num 8.21 23.56 18.95 21.33 23.72 ...
data3$treat2 <- as.factor(data3$treat)
data3 %>%
group_by(treat2) %>%
summarise(N = n(), Mean = mean(post,na.rm=TRUE), SD = sd(post, na.rm=TRUE))
## # A tibble: 2 x 4
## treat2 N Mean SD
## <fct> <int> <dbl> <dbl>
## 1 Group A 20 17.8 7.95
## 2 Group B 20 0.413 8.40
Ancova model without interaction
aov1 <- aov(post ~ treat + pre, data = data3)
summary(aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 1 3036.7 3036.7 205.0 < 2e-16 ***
## pre 1 1991.6 1991.6 134.5 6.99e-14 ***
## Residuals 37 548.1 14.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#TukeyHSD(aov1)
Ancova model with interaction
aov2 <- aov(post ~ treat * pre, data = data3)
summary(aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 1 3036.7 3036.7 206.067 < 2e-16 ***
## pre 1 1991.6 1991.6 135.150 9.59e-14 ***
## treat:pre 1 17.6 17.6 1.191 0.282
## Residuals 36 530.5 14.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#TukeyHSD(aov2)
Tukey for model without interaction
aov1 <- aov(post ~ treat + pre, data = data3)
TukeyHSD(aov1)
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: pre
## Warning in TukeyHSD.aov(aov1): 'which' specified some non-factors which will be
## dropped
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = post ~ treat + pre, data = data3)
##
## $treat
## diff lwr upr p adj
## Group B-Group A -17.42607 -19.8921 -14.96005 0
Tukey for model with interaction
aov2 <- aov(post ~ treat * pre, data = data3)
TukeyHSD(aov2)
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: pre
## Warning in replications(paste("~", xx), data = mf): non-factors ignored: treat,
## pre
## Warning in TukeyHSD.aov(aov2): 'which' specified some non-factors which will be
## dropped
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = post ~ treat * pre, data = data3)
##
## $treat
## diff lwr upr p adj
## Group B-Group A -17.42607 -19.88805 -14.9641 0
Responce based on my result
- The mean squared error for the model with an interaction (14.7), is smaller than the mean squared error for a model without an interaction (14.8).
- The mean squared error for the model with an interaction suggests that the interaction is needed in the model.(It reduced residual error, so it is useful)