Homework Assignment Final

Daniel J Wilson

Demonstrate that the tests are the same…

a/b/c: See Word document.

Unequal Sample Sizes and ANOVA.

See Word document.

Beta weights…

See Word document.

Generate the following data using R (or by hand) that has the following characteristics:

a. 100 participants in 4 groups (2 x2 factorial design with equal sample sizes). No main effects but a significant interaction effect.

# ---------------------
# 4A
# ---------------------

#DV: Depression
#Factor A: Drug
#Factor B: Massage

#AB
AB = rnorm(25, mean = 12, sd = 1.5)
cond1 <- data.frame(AB)
cond1$condition <- "group1"
cond1$drug <- 1
cond1$massage <- 1

#AnoB
AB = rnorm(25, mean = 8, sd = 1.5)
cond2 <- data.frame(AB)
cond2$condition <- "group2"
cond2$drug <- 1
cond2$massage <- -1

#noAB
AB = rnorm(25, mean = 8, sd = 1.5)
cond3 <- data.frame(AB)
cond3$condition <- "group3"
cond3$drug <- -1
cond3$massage <- 1

#noAnoB
AB = rnorm(25, mean = 12, sd = 1.5)
cond4 <- data.frame(AB)
cond4$condition <- "group4"
cond4$drug <- -1
cond4$massage <- -1

dat <- rbind(cond1, cond2, cond3, cond4)
rm(cond1, cond2, cond3, cond4, AB)

head(dat)

##         AB condition drug massage
## 1 10.78904    group1    1       1
## 2 14.70104    group1    1       1
## 3 14.06172    group1    1       1
## 4 13.59904    group1    1       1
## 5 14.11009    group1    1       1
## 6 13.05987    group1    1       1

b. Compute using R or Excel the full ANOVA table for a one-way anova for these four groups

# --------------------------
# 4B
# --------------------------

# Calculate Cell means
anova.means <- aggregate(dat$AB, list(dat$condition), mean)
# rename the first column
colnames(anova.means)[1] <- "condition"
numGroups <- length(anova.means$x)

# Merge the cell means back into the data
dat2 <- merge(dat, anova.means, by = "condition")
# change cell being renamed
colnames(dat2)[5] <- "grpM"

#Calculate the Grand Mean
dat2$GM <- mean(dat2$AB)

#Calculate SSTotal
dat2$ssGM <- (dat2$AB-dat2$GM)^2
SSTotal <- sum(dat2$ssGM)
SSTotal

## [1] 711.7707

#Calculate SSError
dat2$ssError <- (dat2$AB-dat2$grpM)^2
SSError <- sum(dat2$ssError)
SSError

## [1] 244.9746

#Calculate SSEffect
dat2$ssEffect <- (dat2$grpM-dat2$GM)^2
SSEffect <- sum(dat2$ssEffect)
SSEffect

## [1] 466.7961

#Calculate MSError
MSError <- SSError/(100-4)
MSError

## [1] 2.551819

#Calculate MSEffect
MSEffect <- SSEffect/3
MSEffect

## [1] 155.5987

#Calculate F
Fval <- MSEffect/MSError
Fval

## [1] 60.9756

ANOVA TABLE
Note that this is based on one running of the code so the data will not likely exactly match when you run it and generate a new data set.

	Df	Sum Sq	Mean Sq	F value	R^2
drug	1	0.3592924	0.3592924	0.1395968	0.0005792
massage	1	10.6763157	10.6763157	4.1480969	0.0172128
drug:massage	1	362.1328536	362.1328536	140.7004253	0.5838480
Residuals	96	247.0835029	2.5737865	NA	NA
Total	99	620.2519646	NA	NA	NA

c. Compute the factorial ANOVA by writing out the equations in R or Excel using contrasts

# --------------------------
# 4C
# --------------------------

#contrast 1 (drug)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, 1, -1, -1)
dat2$contrast1 <- values[match(dat2$condition, index)]

#contrast 2 (massage)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, -1, 1, -1)
dat2$contrast2 <- values[match(dat2$condition, index)]

#contrast 3 (interaction)
index <- c("group1", "group2", "group3", "group4")
values <- c(1, -1, -1, 1)
dat2$contrast3 <- values[match(dat2$condition, index)]

# calculate contrasts
dat2$c1 <- dat2$contrast1 * dat2$grpM
dat2$c2 <- dat2$contrast2 * dat2$grpM
dat2$c3 <- dat2$contrast3 * dat2$grpM

# calculate SS and MS
SS_c1 <- sum(dat2$c1)^2/sum((dat2$contrast1)^2)
SS_c2 <- sum(dat2$c2)^2/sum((dat2$contrast2)^2)
SS_c3 <- sum(dat2$c3)^2/sum((dat2$contrast3)^2)

SS_c1

## [1] 8.709651

SS_c2

## [1] 0.035679

SS_c3

## [1] 458.0508

# Find F Value for the drug treatment
FvalDrug_Contrasts = SS_c1/MSError
FvalDrug_Contrasts

## [1] 3.413115

# Find F Value for the massage treatment
FvalMassage_Contrasts = SS_c2/MSError
FvalMassage_Contrasts

## [1] 0.01398179

# Find F Value for the interaction between treatments
FvalInter_Contrasts = SS_c3/MSError
FvalInter_Contrasts

## [1] 179.4997

d. Compute the factorial ANOVA using row, cell, and interaction means in R or Excel

# --------------------------
# 4D
# --------------------------

#means of Drug and no Drug treatment
mDrug <- mean(subset(dat2, drug == 1)$AB)
mNoDrug <- mean(subset(dat2, drug == -1)$AB)

#make new Column containing these means
index <- c(1, -1)
values <- c(mDrug, mNoDrug)
dat2$mDrug <- values[match(dat2$drug, index)]
dat2$ssDrug <- (dat2$mDrug - dat2$GM)^2
ssDrugTotal <- sum(dat2$ssDrug)
ssDrugTotal

## [1] 8.709651

#Find F Value for the drug treatment
FvalDrug <- ssDrugTotal/MSError
FvalDrug

## [1] 3.413115

#means of Massage and no Massage treatment
mMassage <- mean(subset(dat2, massage == 1)$AB)
mNoMassage <- mean(subset(dat2, massage == -1)$AB)

#make new Column containing these means
index <- c(1, -1)
values <- c(mMassage, mNoMassage)
dat2$mMassage <- values[match(dat2$massage, index)]
dat2$ssMassage <- (dat2$mMassage - dat2$GM)^2
ssMassageTotal <- sum(dat2$ssMassage)
ssMassageTotal

## [1] 0.035679

#Find F Value for the massage treatment
FvalMassage <- ssMassageTotal/MSError
FvalMassage

## [1] 0.01398179

#means of interaction
mInter1 <- (mean(subset(dat2, condition == "group1")$AB) + mean(subset(dat2, condition == "group4")$AB))/2
mInter2 <- (mean(subset(dat2, condition == "group2")$AB) + mean(subset(dat2, condition == "group3")$AB))/2

#make new Column containing these means
index <- c("group1", "group2", "group3", "group4")
values <- c(mInter1, mInter2, mInter2, mInter1)
dat2$mInter <- values[match(dat2$condition, index)]
dat2$ssInter <- (dat2$mInter - dat2$GM)^2
ssInterTotal <- sum(dat2$ssInter)
ssInterTotal

## [1] 458.0508

#Find F Value for the interaction of treatments
FvalInter <- ssInterTotal/MSError
FvalInter

## [1] 179.4997

e. Plot the results of the ANOVA (extra points for the prettiness of a ggplot2 graph if you choose).

# ---------------------
# 4E
# ---------------------

library(ggplot2)

boxplot(dat2$AB ~ dat2$drug*dat$massage)

dat$massage <- as.factor(dat$massage)

# Showing interaction but no main effect
ggplot(dat, aes(x=drug, y=AB, color=massage)) +
    geom_point(shape=1) +
    scale_colour_hue(l=50) + 
    labs(y = "Score", x = "Drug", title = "Interaction but No Main Effect") +
    geom_smooth(method=lm,   
                se=FALSE)

f. Check your answers using the built in lm functions in R.

# ---------------------
# 4F USE R FUNCTIONS
# ---------------------

# Answer as a GLM
t <- lm(AB ~ condition, data=dat)
anova(t)

## Analysis of Variance Table
## 
## Response: AB
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## condition  3 466.80 155.599  60.976 < 2.2e-16 ***
## Residuals 96 244.97   2.552                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

t.2 <- lm(AB ~ drug * massage, data=dat)
anova(t.2)

## Analysis of Variance Table
## 
## Response: AB
##              Df Sum Sq Mean Sq  F value  Pr(>F)    
## drug          1   8.71    8.71   3.4131 0.06776 .  
## massage       1   0.04    0.04   0.0140 0.90612    
## drug:massage  1 458.05  458.05 179.4997 < 2e-16 ***
## Residuals    96 244.97    2.55                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Repeat the exercise in question 4, but have one of the variables be continuous, and the other categorical.

a. Generate 100 participants (two groups of 50 participants), and one continuous variable. In this example, there should be a significant interaction, but also one of the main effects should be significant.

# ---------------------
# 5A
# ---------------------

#DV: Age
#Factor A: Weight
#Factor B: Sex

#Girls
ages <- rep(9:18, each=5)
cond1 <- data.frame(ages)
cond1$sex <- 1
#heights for girls
cond1$height = 60 + cond1$ages*5.5 + rnorm(50, mean =0, sd = 6)

#Boys
ages <- rep(9:18, each =5)
cond2 <- data.frame(ages)
cond2$sex <- 0
#heights for boys
cond2$height = 60 + cond2$ages*6.5 + rnorm(50, mean =0, sd = 7)

dat <- rbind(cond1, cond2)
rm(cond1, cond2)

head(dat)

##   ages sex    height
## 1    9   1 115.74608
## 2    9   1  96.18628
## 3    9   1 105.05129
## 4    9   1 125.70049
## 5    9   1 120.50665
## 6   10   1 113.13362

b. Compute the multiple regression using just correlations (and adjusting the correlations).

# --------------------------
# 5B
# --------------------------

cor(dat$ages, dat$height)

## [1] 0.8535608

cor(dat$ages, dat$sex)

## [1] 0

cor(dat$height, dat$sex)

## [1] -0.3874709

Correlations calculated in R, the correlation matrix and beta weights are in the word doc.

c. Compare your results to the results from a lm command in R.

# --------------------------
# 5C
# --------------------------

m <- lm(ages ~ sex*height, data=dat)
summary(m)

## 
## Call:
## lm(formula = ages ~ sex * height, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1717 -0.6621  0.0267  0.6866  2.7539 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -8.70935    1.29086  -6.747 1.14e-09 ***
## sex          0.39844    1.83561   0.217    0.829    
## height       0.14958    0.00863  17.332  < 2e-16 ***
## sex:height   0.01308    0.01296   1.009    0.315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.102 on 96 degrees of freedom
## Multiple R-squared:  0.8588, Adjusted R-squared:  0.8544 
## F-statistic: 194.6 on 3 and 96 DF,  p-value: < 2.2e-16

d. Plot your results.

# --------------------------
# 5D
# --------------------------
ggplot(dat, aes(x=height, y=ages, color=sex)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm)   # Add linear regression line

Daniel J Wilson