Daniel J Wilson
a/b/c: See Word document.
See Word document.
See Word document.
a. 100 participants in 4 groups (2 x2 factorial design with equal sample sizes). No main effects but a significant interaction effect.
# ---------------------
# 4A
# ---------------------
#DV: Depression
#Factor A: Drug
#Factor B: Massage
#AB
AB = rnorm(25, mean = 12, sd = 1.5)
cond1 <- data.frame(AB)
cond1$condition <- "group1"
cond1$drug <- 1
cond1$massage <- 1
#AnoB
AB = rnorm(25, mean = 8, sd = 1.5)
cond2 <- data.frame(AB)
cond2$condition <- "group2"
cond2$drug <- 1
cond2$massage <- -1
#noAB
AB = rnorm(25, mean = 8, sd = 1.5)
cond3 <- data.frame(AB)
cond3$condition <- "group3"
cond3$drug <- -1
cond3$massage <- 1
#noAnoB
AB = rnorm(25, mean = 12, sd = 1.5)
cond4 <- data.frame(AB)
cond4$condition <- "group4"
cond4$drug <- -1
cond4$massage <- -1
dat <- rbind(cond1, cond2, cond3, cond4)
rm(cond1, cond2, cond3, cond4, AB)
head(dat)## AB condition drug massage
## 1 10.78904 group1 1 1
## 2 14.70104 group1 1 1
## 3 14.06172 group1 1 1
## 4 13.59904 group1 1 1
## 5 14.11009 group1 1 1
## 6 13.05987 group1 1 1
b. Compute using R or Excel the full ANOVA table for a one-way anova for these four groups
# --------------------------
# 4B
# --------------------------
# Calculate Cell means
anova.means <- aggregate(dat$AB, list(dat$condition), mean)
# rename the first column
colnames(anova.means)[1] <- "condition"
numGroups <- length(anova.means$x)
# Merge the cell means back into the data
dat2 <- merge(dat, anova.means, by = "condition")
# change cell being renamed
colnames(dat2)[5] <- "grpM"
#Calculate the Grand Mean
dat2$GM <- mean(dat2$AB)
#Calculate SSTotal
dat2$ssGM <- (dat2$AB-dat2$GM)^2
SSTotal <- sum(dat2$ssGM)
SSTotal## [1] 711.7707
#Calculate SSError
dat2$ssError <- (dat2$AB-dat2$grpM)^2
SSError <- sum(dat2$ssError)
SSError## [1] 244.9746
#Calculate SSEffect
dat2$ssEffect <- (dat2$grpM-dat2$GM)^2
SSEffect <- sum(dat2$ssEffect)
SSEffect## [1] 466.7961
#Calculate MSError
MSError <- SSError/(100-4)
MSError## [1] 2.551819
#Calculate MSEffect
MSEffect <- SSEffect/3
MSEffect## [1] 155.5987
#Calculate F
Fval <- MSEffect/MSError
Fval## [1] 60.9756
ANOVA TABLE Note that this is based on one running of the code so the data will not likely exactly match when you run it and generate a new data set.
| Df | Sum Sq | Mean Sq | F value | R^2 | |
|---|---|---|---|---|---|
| drug | 1 | 0.3592924 | 0.3592924 | 0.1395968 | 0.0005792 |
| massage | 1 | 10.6763157 | 10.6763157 | 4.1480969 | 0.0172128 |
| drug:massage | 1 | 362.1328536 | 362.1328536 | 140.7004253 | 0.5838480 |
| Residuals | 96 | 247.0835029 | 2.5737865 | NA | NA |
| Total | 99 | 620.2519646 | NA | NA | NA |
c. Compute the factorial ANOVA by writing out the equations in R or Excel using contrasts
# --------------------------
# 4C
# --------------------------
#contrast 1 (drug)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, 1, -1, -1)
dat2$contrast1 <- values[match(dat2$condition, index)]
#contrast 2 (massage)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, -1, 1, -1)
dat2$contrast2 <- values[match(dat2$condition, index)]
#contrast 3 (interaction)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, -1, -1, 1)
dat2$contrast3 <- values[match(dat2$condition, index)]
# calculate contrasts
dat2$c1 <- dat2$contrast1 * dat2$grpM
dat2$c2 <- dat2$contrast2 * dat2$grpM
dat2$c3 <- dat2$contrast3 * dat2$grpM
# calculate SS and MS
SS_c1 <- sum(dat2$c1)^2/sum((dat2$contrast1)^2)
SS_c2 <- sum(dat2$c2)^2/sum((dat2$contrast2)^2)
SS_c3 <- sum(dat2$c3)^2/sum((dat2$contrast3)^2)
SS_c1## [1] 8.709651
SS_c2## [1] 0.035679
SS_c3## [1] 458.0508
# Find F Value for the drug treatment
FvalDrug_Contrasts = SS_c1/MSError
FvalDrug_Contrasts## [1] 3.413115
# Find F Value for the massage treatment
FvalMassage_Contrasts = SS_c2/MSError
FvalMassage_Contrasts## [1] 0.01398179
# Find F Value for the interaction between treatments
FvalInter_Contrasts = SS_c3/MSError
FvalInter_Contrasts## [1] 179.4997
d. Compute the factorial ANOVA using row, cell, and interaction means in R or Excel
# --------------------------
# 4D
# --------------------------
#means of Drug and no Drug treatment
mDrug <- mean(subset(dat2, drug == 1)$AB)
mNoDrug <- mean(subset(dat2, drug == -1)$AB)
#make new Column containing these means
index <- c(1, -1)
values <- c(mDrug, mNoDrug)
dat2$mDrug <- values[match(dat2$drug, index)]
dat2$ssDrug <- (dat2$mDrug - dat2$GM)^2
ssDrugTotal <- sum(dat2$ssDrug)
ssDrugTotal## [1] 8.709651
#Find F Value for the drug treatment
FvalDrug <- ssDrugTotal/MSError
FvalDrug## [1] 3.413115
#means of Massage and no Massage treatment
mMassage <- mean(subset(dat2, massage == 1)$AB)
mNoMassage <- mean(subset(dat2, massage == -1)$AB)
#make new Column containing these means
index <- c(1, -1)
values <- c(mMassage, mNoMassage)
dat2$mMassage <- values[match(dat2$massage, index)]
dat2$ssMassage <- (dat2$mMassage - dat2$GM)^2
ssMassageTotal <- sum(dat2$ssMassage)
ssMassageTotal## [1] 0.035679
#Find F Value for the massage treatment
FvalMassage <- ssMassageTotal/MSError
FvalMassage## [1] 0.01398179
#means of interaction
mInter1 <- (mean(subset(dat2, condition == "group1")$AB) + mean(subset(dat2, condition == "group4")$AB))/2
mInter2 <- (mean(subset(dat2, condition == "group2")$AB) + mean(subset(dat2, condition == "group3")$AB))/2
#make new Column containing these means
index <- c("group1", "group2", "group3", "group4")
values <- c(mInter1, mInter2, mInter2, mInter1)
dat2$mInter <- values[match(dat2$condition, index)]
dat2$ssInter <- (dat2$mInter - dat2$GM)^2
ssInterTotal <- sum(dat2$ssInter)
ssInterTotal## [1] 458.0508
#Find F Value for the interaction of treatments
FvalInter <- ssInterTotal/MSError
FvalInter## [1] 179.4997
e. Plot the results of the ANOVA (extra points for the prettiness of a ggplot2 graph if you choose).
# ---------------------
# 4E
# ---------------------
library(ggplot2)
boxplot(dat2$AB ~ dat2$drug*dat$massage)dat$massage <- as.factor(dat$massage)
# Showing interaction but no main effect
ggplot(dat, aes(x=drug, y=AB, color=massage)) +
geom_point(shape=1) +
scale_colour_hue(l=50) +
labs(y = "Score", x = "Drug", title = "Interaction but No Main Effect") +
geom_smooth(method=lm,
se=FALSE) f. Check your answers using the built in lm functions in R.
# ---------------------
# 4F USE R FUNCTIONS
# ---------------------
# Answer as a GLM
t <- lm(AB ~ condition, data=dat)
anova(t)## Analysis of Variance Table
##
## Response: AB
## Df Sum Sq Mean Sq F value Pr(>F)
## condition 3 466.80 155.599 60.976 < 2.2e-16 ***
## Residuals 96 244.97 2.552
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.2 <- lm(AB ~ drug * massage, data=dat)
anova(t.2)## Analysis of Variance Table
##
## Response: AB
## Df Sum Sq Mean Sq F value Pr(>F)
## drug 1 8.71 8.71 3.4131 0.06776 .
## massage 1 0.04 0.04 0.0140 0.90612
## drug:massage 1 458.05 458.05 179.4997 < 2e-16 ***
## Residuals 96 244.97 2.55
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
a. Generate 100 participants (two groups of 50 participants), and one continuous variable. In this example, there should be a significant interaction, but also one of the main effects should be significant.
# ---------------------
# 5A
# ---------------------
#DV: Age
#Factor A: Weight
#Factor B: Sex
#Girls
ages <- rep(9:18, each=5)
cond1 <- data.frame(ages)
cond1$sex <- 1
#heights for girls
cond1$height = 60 + cond1$ages*5.5 + rnorm(50, mean =0, sd = 6)
#Boys
ages <- rep(9:18, each =5)
cond2 <- data.frame(ages)
cond2$sex <- 0
#heights for boys
cond2$height = 60 + cond2$ages*6.5 + rnorm(50, mean =0, sd = 7)
dat <- rbind(cond1, cond2)
rm(cond1, cond2)
head(dat)## ages sex height
## 1 9 1 115.74608
## 2 9 1 96.18628
## 3 9 1 105.05129
## 4 9 1 125.70049
## 5 9 1 120.50665
## 6 10 1 113.13362
b. Compute the multiple regression using just correlations (and adjusting the correlations).
# --------------------------
# 5B
# --------------------------
cor(dat$ages, dat$height)## [1] 0.8535608
cor(dat$ages, dat$sex)## [1] 0
cor(dat$height, dat$sex)## [1] -0.3874709
Correlations calculated in R, the correlation matrix and beta weights are in the word doc.
c. Compare your results to the results from a lm command in R.
# --------------------------
# 5C
# --------------------------
m <- lm(ages ~ sex*height, data=dat)
summary(m)##
## Call:
## lm(formula = ages ~ sex * height, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1717 -0.6621 0.0267 0.6866 2.7539
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.70935 1.29086 -6.747 1.14e-09 ***
## sex 0.39844 1.83561 0.217 0.829
## height 0.14958 0.00863 17.332 < 2e-16 ***
## sex:height 0.01308 0.01296 1.009 0.315
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.102 on 96 degrees of freedom
## Multiple R-squared: 0.8588, Adjusted R-squared: 0.8544
## F-statistic: 194.6 on 3 and 96 DF, p-value: < 2.2e-16
d. Plot your results.
# --------------------------
# 5D
# --------------------------
ggplot(dat, aes(x=height, y=ages, color=sex)) +
geom_point(shape=1) + # Use hollow circles
geom_smooth(method=lm) # Add linear regression line Daniel J Wilson