library(foreign); library(reshape2); library(ez); library(knitr); library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
rm1 <- read.spss("612_Lab9_RMA1.sav", to.data.frame=TRUE)
## Warning in read.spss("612_Lab9_RMA1.sav", to.data.frame = TRUE):
## 612_Lab9_RMA1.sav: Unrecognized record type 7, subtype 18 encountered in
## system file
# Make sure subj ID is a variable. Here, it's just implied by the row numbers, but we can make that explicit. It should be a factor.
rm1$id <- as.factor(1:nrow(rm1))
# Your data should be in "long" format (multiple rows for each subject, with each row being a time point for that subj)
# The data are currently in "wide" format (one row for each subject, and each time point is a different variable), so we need to convert it.
rm1.long <- melt(rm1, measure.vars=c("W2", "W4", "W6", "W8"), variable.name="time", value.name="BDI")
# View(rm1.long)
# str(rm1.long)
# make time into an ordered factor (instead of an unordered one, which is the default)
# levels(rm1.long$time)
rm1.long$time <- as.ordered(rm1.long$time)
# set polynomial trend contrasts for time
contrasts(rm1.long$time) <- contr.poly
# There are multiple ways to do this, but we'll use the ezANOVA() command in the ez library.
rm.model1 <- ezANOVA(data=rm1.long, dv=BDI, wid=id, within=time, between=dep, type=3, detailed=TRUE)
rm.model1 # the full results
## $ANOVA
## Effect DFn DFd SSn SSd F p p<.05
## 1 (Intercept) 1 4 345.04167 2.666667 517.5625 2.211323e-05 *
## 2 dep 1 4 57.04167 2.666667 85.5625 7.594191e-04 *
## 3 time 3 12 52.45833 1.333333 157.3750 6.728744e-10 *
## 4 dep:time 3 12 12.45833 1.333333 37.3750 2.292950e-06 *
## ges
## 1 0.9885401
## 2 0.9344710
## 3 0.9291513
## 4 0.7569620
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 3 time 0.6328125 0.9441218
## 4 dep:time 0.6328125 0.9441218
##
## $`Sphericity Corrections`
## Effect GGe p[GG] p[GG]<.05 HFe p[HF]
## 3 time 0.7619048 6.147723e-08 * 1.833333 6.728744e-10
## 4 dep:time 0.7619048 3.069155e-05 * 1.833333 2.292950e-06
## p[HF]<.05
## 3 *
## 4 *
mauchlys <- kable(rm.model1[[2]], row.names=FALSE, caption="Mauchly's test for Sphericity")
sphericity.assumed <- kable(rm.model1$ANOVA, row.names=FALSE, caption="Tests of between (dep) and within (time, dep:time) effects, \n with sphericity assumed") # ges is a generalized Eta-Squared measure of effect size
sphericity.violation.corrections <- kable(rm.model1[[3]], row.names=FALSE, caption="Greenhouse-Geisser (GG) and Huynh-Feldt (HF) corrections") # GGe and HFe are the epsilon values for each, p[GG] and p[HF] are the corrected p values.
rm.model1 <- ezANOVA(data=rm1.long, dv=BDI, wid=id, within=time, between=dep, type=3, detailed=TRUE, return_aov=TRUE) # running the same model again just to add the aov format output option to use with contrasts
summary(rm.model1$aov, split = list(time = list(linear=1, quad=2, cubic=3)), expand.split = TRUE) # expand.split tells it to show the contrasts for any interactions involving time as well as the main effect of time.
##
## Error: id
## Df Sum Sq Mean Sq F value Pr(>F)
## dep 1 57.04 57.04 85.56 0.000759 ***
## Residuals 4 2.67 0.67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: id:time
## Df Sum Sq Mean Sq F value Pr(>F)
## time 3 52.46 17.49 157.375 6.73e-10 ***
## time: linear 1 52.01 52.01 468.075 5.56e-11 ***
## time: quad 1 0.37 0.37 3.375 0.0911 .
## time: cubic 1 0.07 0.07 0.675 0.4273
## dep:time 3 12.46 4.15 37.375 2.29e-06 ***
## dep:time: linear 1 0.21 0.21 1.875 0.1960
## dep:time: quad 1 12.04 12.04 108.375 2.32e-07 ***
## dep:time: cubic 1 0.21 0.21 1.875 0.1960
## Residuals 12 1.33 0.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot just BDI over time (collapsing across depression level)
plot(BDI ~ time, data=rm1.long, main="BDI scores over time")

# get plot of interaction
with(rm1.long, interaction.plot(x.factor=time, trace.factor=dep, response=BDI, fun=mean, type="b", legend=T, ylab="BDI scores", main="Interaction Plot \n(time and depression severity)", pch=c(1,19), col=c(4,3)))

rm2 <- read.spss("612_Lab9_RMA2.sav", to.data.frame=TRUE)
## Warning in read.spss("612_Lab9_RMA2.sav", to.data.frame = TRUE):
## 612_Lab9_RMA2.sav: Unrecognized record type 7, subtype 18 encountered in
## system file
# look at your data
# View(rm2)
rm2$id <- as.factor(1:nrow(rm2))
# Your data should be in "long" format
rm2.long <- melt(rm2, measure.vars=c("t1", "t2", "t3", "t4"), variable.name="time", value.name="social.func")
# View(rm2.long)
# str(rm2.long)
# make time into an ordered factor (instead of an unordered one, which is the default)
# levels(rm2.long$time)
rm2.long$time <- as.ordered(rm2.long$time)
# set polynomial trend contrasts for time
contrasts(rm2.long$time) <- contr.poly
summarize(group_by(rm2, extra, selfest), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2), time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))
## Source: local data frame [4 x 10]
## Groups: extra
##
## extra selfest time1 sd1 time2 sd2 time3 sd3
## 1 Low threat 8.000000 2.0000000 10.666667 1.527525 13.333333 1.1547005
## 2 Low enhance 5.000000 2.0000000 12.000000 2.000000 15.333333 2.8867513
## 3 High threat 1.333333 0.5773503 7.666667 2.081666 11.333333 2.0816660
## 4 High enhance 1.666667 0.5773503 6.000000 1.732051 9.666667 0.5773503
## Variables not shown: time4 (dbl), sd4 (dbl)
summarize(group_by(rm2, selfest), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2), time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))
## Source: local data frame [2 x 9]
##
## selfest time1 sd1 time2 sd2 time3 sd3 time4
## 1 threat 4.666667 3.881580 9.166667 2.316607 12.33333 1.861899 15.00000
## 2 enhance 3.333333 2.250926 9.000000 3.687818 12.50000 3.619392 15.16667
## Variables not shown: sd4 (dbl)
summarize(group_by(rm2, extra), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2), time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))
## Source: local data frame [2 x 9]
##
## extra time1 sd1 time2 sd2 time3 sd3 time4
## 1 Low 6.5 2.4289916 11.333333 1.75119 14.33333 2.250926 15.16667
## 2 High 1.5 0.5477226 6.833333 1.94079 10.50000 1.643168 15.00000
## Variables not shown: sd4 (dbl)
# Note how we specify multiple factors for the bewteen-subejcts option. If you had multiple within-subjects factors, you could use the same notation for that.
rm.model2 <- ezANOVA(data=rm2.long, dv=social.func, wid=id, within=time, between= .(extra, selfest), type=3, detailed=TRUE)
# Just checking Mauchly's W first, since that's the first thing we need to look at
names(rm.model2)[[2]]
## [1] "Mauchly's Test for Sphericity"
rm.model2[[2]]
## Effect W p p<.05
## 5 time 0.4717867 0.4141938
## 6 extra:time 0.4717867 0.4141938
## 7 selfest:time 0.4717867 0.4141938
## 8 extra:selfest:time 0.4717867 0.4141938
# model results for sphericity assumed
names(rm.model2)[[1]]
## [1] "ANOVA"
rm.model2[[1]]
## Effect DFn DFd SSn SSd F p
## 1 (Intercept) 1 8 4941.020833 61.83333 639.2695418 6.412624e-09
## 2 extra 1 8 136.687500 61.83333 17.6846361 2.974856e-03
## 3 selfest 1 8 1.020833 61.83333 0.1320755 7.257058e-01
## 5 time 3 24 821.229167 42.16667 155.8063241 7.250137e-16
## 4 extra:selfest 1 8 4.687500 61.83333 0.6064690 4.585338e-01
## 6 extra:time 3 24 43.229167 42.16667 8.2015810 6.241761e-04
## 7 selfest:time 3 24 4.562500 42.16667 0.8656126 4.724459e-01
## 8 extra:selfest:time 3 24 22.562500 42.16667 4.2806324 1.483581e-02
## p<.05 ges
## 1 * 0.979385615
## 2 * 0.567904440
## 3 0.009720294
## 5 * 0.887595416
## 4 0.043128235
## 6 * 0.293618226
## 7 0.042026482
## 8 * 0.178271605
rm.model2 <- ezANOVA(data=rm2.long, dv=social.func, wid=id, within=time, between= .(extra, selfest), type=3, detailed=TRUE, return_aov=TRUE) # running the same model again just to add the aov format output option to use with contrasts
summary(rm.model2$aov, split = list(time = list(linear=1, quad=2, cubic=3)), expand.split = TRUE)
##
## Error: id
## Df Sum Sq Mean Sq F value Pr(>F)
## extra 1 136.69 136.69 17.685 0.00297 **
## selfest 1 1.02 1.02 0.132 0.72571
## extra:selfest 1 4.69 4.69 0.606 0.45853
## Residuals 8 61.83 7.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: id:time
## Df Sum Sq Mean Sq F value Pr(>F)
## time 3 821.2 273.7 155.806 7.25e-16 ***
## time: linear 1 803.0 803.0 457.046 < 2e-16 ***
## time: quad 1 17.5 17.5 9.972 0.004252 **
## time: cubic 1 0.7 0.7 0.401 0.532668
## extra:time 3 43.2 14.4 8.202 0.000624 ***
## extra:time: linear 1 34.5 34.5 19.639 0.000176 ***
## extra:time: quad 1 7.5 7.5 4.281 0.049483 *
## extra:time: cubic 1 1.2 1.2 0.685 0.415899
## selfest:time 3 4.6 1.5 0.866 0.472446
## selfest:time: linear 1 3.5 3.5 1.994 0.170711
## selfest:time: quad 1 1.0 1.0 0.581 0.453336
## selfest:time: cubic 1 0.0 0.0 0.021 0.885066
## extra:selfest:time 3 22.6 7.5 4.281 0.014836 *
## extra:selfest:time: linear 1 9.2 9.2 5.239 0.031189 *
## extra:selfest:time: quad 1 13.0 13.0 7.411 0.011882 *
## extra:selfest:time: cubic 1 0.3 0.3 0.192 0.665099
## Residuals 24 42.2 1.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot just social functioning over time (collapsing across extraversion and self esteem)
plot(social.func ~ time, data=rm2.long, main="Social functioning scores over time")

# 2-way interaction plot
with(rm2.long, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(collapsing across self-esteem)", pch=c(1,19), col=c(4,3)))

# 3-way interaction plots
par(mfrow=c(1,2)) # change plotting parameters so it puts two plots side by side
threat.only <- filter(rm2.long, selfest=="threat")
with(threat.only, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(self-esteem threatened)", pch=c(1,19), col=c(4,3)))
enhance.only <- filter(rm2.long, selfest=="enhance")
with(enhance.only, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(self-esteem enhanced)", pch=c(1,19), col=c(4,3)))

par(mfrow=c(1,1)) # change plotting parameters back to one plot at a time
summary(rm.model2$aov)[[1]]
## Df Sum Sq Mean Sq F value Pr(>F)
## extra 1 136.69 136.69 17.685 0.00297 **
## selfest 1 1.02 1.02 0.132 0.72571
## extra:selfest 1 4.69 4.69 0.606 0.45853
## Residuals 8 61.83 7.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lsmeans)
est.marginal.means <- lsmeans(rm.model2$aov, specs="time")
## Warning in lsm.basis.aovlist(object, trms, xlev, grid, ...): Some predictors are correlated with the intercept - results are biased.
## May help to re-fit with different contrasts, e.g. 'contr.sum'
est.marginal.means
## time lsmean SE df lower.CL upper.CL
## t1 2.479167 0.8682777 10.86 0.5649836 4.393350
## t2 7.562500 0.8682777 10.86 5.6483169 9.476683
## t3 10.895833 0.8682777 10.86 8.9816502 12.810016
## t4 13.562500 0.8682777 10.86 11.6483169 15.476683
##
## Results are averaged over the levels of: extra, selfest
## Confidence level used: 0.95
# run all pairwise comparisons
pairs(est.marginal.means)
## contrast estimate SE df t.ratio p.value
## t1 - t2 -5.083333 0.5411322 24 -9.394 <.0001
## t1 - t3 -8.416667 0.5411322 24 -15.554 <.0001
## t1 - t4 -11.083333 0.5411322 24 -20.482 <.0001
## t2 - t3 -3.333333 0.5411322 24 -6.160 <.0001
## t2 - t4 -6.000000 0.5411322 24 -11.088 <.0001
## t3 - t4 -2.666667 0.5411322 24 -4.928 0.0003
##
## Results are averaged over the levels of: extra, selfest
## P value adjustment: tukey method for comparing a family of 4 estimates
library(foreign); library(ggplot2)
ac <- read.spss("612_Lab9_Jewelry_Sales.sav", to.data.frame=TRUE)
## re-encoding from CP1252
# look at your data
# View(ac)
# str(ac)
ggplot(ac, aes(y=jewel, x=Time)) +
geom_line() +
labs(title="Jewelry Sales Over Time") # add a title

ggplot(ac, aes(y=jewel, x=quarter, group=as.factor(YEAR))) +
geom_line(aes(color=as.factor(YEAR))) + # color lines by the group factor
labs(title="Jewelry Sales by Quarter") + # add a title
scale_colour_discrete(name = "Year") # this changes the name of the legend

#### Step 1. Create dummy variables (R will do this on its own, but you may want to set the reference group)
# check that quarter is being interpreted as a factor, and force it to be if it's not already
str(ac)
## 'data.frame': 40 obs. of 4 variables:
## $ quarter: num 1 2 3 4 1 2 3 4 1 2 ...
## $ YEAR : num 1989 1989 1989 1989 1990 ...
## $ Time : num 1 2 3 4 5 6 7 8 9 10 ...
## $ jewel : num 14815 12743 14190 25086 13054 ...
## - attr(*, "variable.labels")= Named chr
## ..- attr(*, "names")= chr
## - attr(*, "codepage")= int 1252
ac$quarter <- as.factor(ac$quarter)
#### Step 2. Run a regression checking for a time trend and a seasonal trend.
model1 <- lm(jewel ~ Time, data=ac)
model2 <- lm(jewel ~ Time + relevel(ac$quarter, ref="4"), data=ac) # set the reference group to quarter 4
anova(model1, model2)
## Analysis of Variance Table
##
## Model 1: jewel ~ Time
## Model 2: jewel ~ Time + relevel(ac$quarter, ref = "4")
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 38 834755325
## 2 35 125094074 3 709661251 66.185 1.676e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(x = model2$residuals, main="ACF of jewelry sales \n(residuals)")

# R gives you lag=0 for ACF in addition to the rest. What does lag of 0 mean?
pacf(x = model2$residuals, main="Partial ACF of jewelry sales \n(residuals)")

acf(x = ac$jewel, main="ACF of jewelry sales")

# R gives you lag=0 for ACF in addition to the rest. What does lag of 0 mean?
pacf(x = ac$jewel, main="Partial ACF of jewelry sales")

ac$residuals <- model2$residuals
ggplot(ac, aes(y=residuals, x=Time)) +
geom_line() +
labs(title="Residuals for Jewelry Sales Over Time") # add a title

ggplot(ac, aes(y=residuals, x=quarter, group=as.factor(YEAR))) +
geom_line(aes(color=as.factor(YEAR))) + # color lines by the group factor
labs(title="Residuals for Jewelry Sales by Quarter") + # add a title
scale_colour_discrete(name = "Year") # this changes the name of the legend
