612 Lab9 r code

library(foreign); library(reshape2); library(ez); library(knitr); library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

rm1 <- read.spss("612_Lab9_RMA1.sav", to.data.frame=TRUE)

## Warning in read.spss("612_Lab9_RMA1.sav", to.data.frame = TRUE):
## 612_Lab9_RMA1.sav: Unrecognized record type 7, subtype 18 encountered in
## system file

# Make sure subj ID is a variable. Here, it's just implied by the row numbers, but we can make that explicit. It should be a factor.
rm1$id <- as.factor(1:nrow(rm1))

# Your data should be in "long" format (multiple rows for each subject, with each row being a time point for that subj)
# The data are currently in "wide" format (one row for each subject, and each time point is a different variable), so we need to convert it.
rm1.long <- melt(rm1, measure.vars=c("W2", "W4", "W6", "W8"), variable.name="time", value.name="BDI")
# View(rm1.long)
# str(rm1.long)

# make time into an ordered factor (instead of an unordered one, which is the default)
# levels(rm1.long$time)
rm1.long$time <- as.ordered(rm1.long$time)

# set polynomial trend contrasts for time
contrasts(rm1.long$time) <- contr.poly

# There are multiple ways to do this, but we'll use the ezANOVA() command in the ez library.
rm.model1 <- ezANOVA(data=rm1.long, dv=BDI, wid=id, within=time, between=dep, type=3, detailed=TRUE)

rm.model1 # the full results

## $ANOVA
##        Effect DFn DFd       SSn      SSd        F            p p<.05
## 1 (Intercept)   1   4 345.04167 2.666667 517.5625 2.211323e-05     *
## 2         dep   1   4  57.04167 2.666667  85.5625 7.594191e-04     *
## 3        time   3  12  52.45833 1.333333 157.3750 6.728744e-10     *
## 4    dep:time   3  12  12.45833 1.333333  37.3750 2.292950e-06     *
##         ges
## 1 0.9885401
## 2 0.9344710
## 3 0.9291513
## 4 0.7569620
## 
## $`Mauchly's Test for Sphericity`
##     Effect         W         p p<.05
## 3     time 0.6328125 0.9441218      
## 4 dep:time 0.6328125 0.9441218      
## 
## $`Sphericity Corrections`
##     Effect       GGe        p[GG] p[GG]<.05      HFe        p[HF]
## 3     time 0.7619048 6.147723e-08         * 1.833333 6.728744e-10
## 4 dep:time 0.7619048 3.069155e-05         * 1.833333 2.292950e-06
##   p[HF]<.05
## 3         *
## 4         *

mauchlys <- kable(rm.model1[[2]], row.names=FALSE, caption="Mauchly's test for Sphericity")

sphericity.assumed <- kable(rm.model1$ANOVA, row.names=FALSE, caption="Tests of between (dep) and within (time, dep:time) effects, \n with sphericity assumed") # ges is a generalized Eta-Squared measure of effect size

sphericity.violation.corrections <- kable(rm.model1[[3]], row.names=FALSE, caption="Greenhouse-Geisser (GG) and Huynh-Feldt (HF) corrections") # GGe and HFe are the epsilon values for each, p[GG] and p[HF] are the corrected p values.

rm.model1 <- ezANOVA(data=rm1.long, dv=BDI, wid=id, within=time, between=dep, type=3, detailed=TRUE, return_aov=TRUE) # running the same model again just to add the aov format output option to use with contrasts

summary(rm.model1$aov, split = list(time = list(linear=1, quad=2, cubic=3)), expand.split = TRUE) # expand.split tells it to show the contrasts for any interactions involving time as well as the main effect of time.

## 
## Error: id
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## dep        1  57.04   57.04   85.56 0.000759 ***
## Residuals  4   2.67    0.67                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: id:time
##                    Df Sum Sq Mean Sq F value   Pr(>F)    
## time                3  52.46   17.49 157.375 6.73e-10 ***
##   time: linear      1  52.01   52.01 468.075 5.56e-11 ***
##   time: quad        1   0.37    0.37   3.375   0.0911 .  
##   time: cubic       1   0.07    0.07   0.675   0.4273    
## dep:time            3  12.46    4.15  37.375 2.29e-06 ***
##   dep:time: linear  1   0.21    0.21   1.875   0.1960    
##   dep:time: quad    1  12.04   12.04 108.375 2.32e-07 ***
##   dep:time: cubic   1   0.21    0.21   1.875   0.1960    
## Residuals          12   1.33    0.11                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# plot just BDI over time (collapsing across depression level) 
plot(BDI ~ time, data=rm1.long, main="BDI scores over time")

# get plot of interaction
with(rm1.long, interaction.plot(x.factor=time, trace.factor=dep, response=BDI, fun=mean, type="b", legend=T, ylab="BDI scores", main="Interaction Plot \n(time and depression severity)", pch=c(1,19), col=c(4,3)))

rm2 <- read.spss("612_Lab9_RMA2.sav", to.data.frame=TRUE)

## Warning in read.spss("612_Lab9_RMA2.sav", to.data.frame = TRUE):
## 612_Lab9_RMA2.sav: Unrecognized record type 7, subtype 18 encountered in
## system file

# look at your data
# View(rm2)

rm2$id <- as.factor(1:nrow(rm2))

# Your data should be in "long" format 
rm2.long <- melt(rm2, measure.vars=c("t1", "t2", "t3", "t4"), variable.name="time", value.name="social.func")
# View(rm2.long)
# str(rm2.long)

# make time into an ordered factor (instead of an unordered one, which is the default)
# levels(rm2.long$time)
rm2.long$time <- as.ordered(rm2.long$time)

# set polynomial trend contrasts for time
contrasts(rm2.long$time) <- contr.poly

summarize(group_by(rm2, extra, selfest), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2),  time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))

## Source: local data frame [4 x 10]
## Groups: extra
## 
##   extra selfest    time1       sd1     time2      sd2     time3       sd3
## 1   Low  threat 8.000000 2.0000000 10.666667 1.527525 13.333333 1.1547005
## 2   Low enhance 5.000000 2.0000000 12.000000 2.000000 15.333333 2.8867513
## 3  High  threat 1.333333 0.5773503  7.666667 2.081666 11.333333 2.0816660
## 4  High enhance 1.666667 0.5773503  6.000000 1.732051  9.666667 0.5773503
## Variables not shown: time4 (dbl), sd4 (dbl)

summarize(group_by(rm2, selfest), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2),  time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))

## Source: local data frame [2 x 9]
## 
##   selfest    time1      sd1    time2      sd2    time3      sd3    time4
## 1  threat 4.666667 3.881580 9.166667 2.316607 12.33333 1.861899 15.00000
## 2 enhance 3.333333 2.250926 9.000000 3.687818 12.50000 3.619392 15.16667
## Variables not shown: sd4 (dbl)

summarize(group_by(rm2, extra), time1=mean(t1), sd1=sd(t1), time2=mean(t2), sd2=sd(t2),  time3=mean(t3), sd3=sd(t3), time4=mean(t4), sd4=sd(t4))

## Source: local data frame [2 x 9]
## 
##   extra time1       sd1     time2     sd2    time3      sd3    time4
## 1   Low   6.5 2.4289916 11.333333 1.75119 14.33333 2.250926 15.16667
## 2  High   1.5 0.5477226  6.833333 1.94079 10.50000 1.643168 15.00000
## Variables not shown: sd4 (dbl)

# Note how we specify multiple factors for the bewteen-subejcts option. If you had multiple within-subjects factors, you could use the same notation for that.
rm.model2 <- ezANOVA(data=rm2.long, dv=social.func, wid=id, within=time, between= .(extra, selfest), type=3, detailed=TRUE) 


# Just checking Mauchly's W first, since that's the first thing we need to look at
names(rm.model2)[[2]]

## [1] "Mauchly's Test for Sphericity"

rm.model2[[2]]

##               Effect         W         p p<.05
## 5               time 0.4717867 0.4141938      
## 6         extra:time 0.4717867 0.4141938      
## 7       selfest:time 0.4717867 0.4141938      
## 8 extra:selfest:time 0.4717867 0.4141938

# model results for sphericity assumed
names(rm.model2)[[1]]

## [1] "ANOVA"

rm.model2[[1]]

##               Effect DFn DFd         SSn      SSd           F            p
## 1        (Intercept)   1   8 4941.020833 61.83333 639.2695418 6.412624e-09
## 2              extra   1   8  136.687500 61.83333  17.6846361 2.974856e-03
## 3            selfest   1   8    1.020833 61.83333   0.1320755 7.257058e-01
## 5               time   3  24  821.229167 42.16667 155.8063241 7.250137e-16
## 4      extra:selfest   1   8    4.687500 61.83333   0.6064690 4.585338e-01
## 6         extra:time   3  24   43.229167 42.16667   8.2015810 6.241761e-04
## 7       selfest:time   3  24    4.562500 42.16667   0.8656126 4.724459e-01
## 8 extra:selfest:time   3  24   22.562500 42.16667   4.2806324 1.483581e-02
##   p<.05         ges
## 1     * 0.979385615
## 2     * 0.567904440
## 3       0.009720294
## 5     * 0.887595416
## 4       0.043128235
## 6     * 0.293618226
## 7       0.042026482
## 8     * 0.178271605

rm.model2 <- ezANOVA(data=rm2.long, dv=social.func, wid=id, within=time, between= .(extra, selfest), type=3, detailed=TRUE, return_aov=TRUE) # running the same model again just to add the aov format output option to use with contrasts

summary(rm.model2$aov, split = list(time = list(linear=1, quad=2, cubic=3)), expand.split = TRUE)

## 
## Error: id
##               Df Sum Sq Mean Sq F value  Pr(>F)   
## extra          1 136.69  136.69  17.685 0.00297 **
## selfest        1   1.02    1.02   0.132 0.72571   
## extra:selfest  1   4.69    4.69   0.606 0.45853   
## Residuals      8  61.83    7.73                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: id:time
##                              Df Sum Sq Mean Sq F value   Pr(>F)    
## time                          3  821.2   273.7 155.806 7.25e-16 ***
##   time: linear                1  803.0   803.0 457.046  < 2e-16 ***
##   time: quad                  1   17.5    17.5   9.972 0.004252 ** 
##   time: cubic                 1    0.7     0.7   0.401 0.532668    
## extra:time                    3   43.2    14.4   8.202 0.000624 ***
##   extra:time: linear          1   34.5    34.5  19.639 0.000176 ***
##   extra:time: quad            1    7.5     7.5   4.281 0.049483 *  
##   extra:time: cubic           1    1.2     1.2   0.685 0.415899    
## selfest:time                  3    4.6     1.5   0.866 0.472446    
##   selfest:time: linear        1    3.5     3.5   1.994 0.170711    
##   selfest:time: quad          1    1.0     1.0   0.581 0.453336    
##   selfest:time: cubic         1    0.0     0.0   0.021 0.885066    
## extra:selfest:time            3   22.6     7.5   4.281 0.014836 *  
##   extra:selfest:time: linear  1    9.2     9.2   5.239 0.031189 *  
##   extra:selfest:time: quad    1   13.0    13.0   7.411 0.011882 *  
##   extra:selfest:time: cubic   1    0.3     0.3   0.192 0.665099    
## Residuals                    24   42.2     1.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# plot just social functioning over time (collapsing across extraversion and self esteem) 
plot(social.func ~ time, data=rm2.long, main="Social functioning scores over time")

# 2-way interaction plot
with(rm2.long, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(collapsing across self-esteem)", pch=c(1,19), col=c(4,3)))

# 3-way interaction plots
par(mfrow=c(1,2)) # change plotting parameters so it puts two plots side by side

threat.only <- filter(rm2.long, selfest=="threat")
with(threat.only, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(self-esteem threatened)", pch=c(1,19), col=c(4,3)))

enhance.only <- filter(rm2.long, selfest=="enhance")
with(enhance.only, interaction.plot(x.factor=time, trace.factor=extra, response=social.func, fun=mean, type="b", legend=T, ylab="Social functioning scores", main="Extraversion by Time Interaction \n(self-esteem enhanced)", pch=c(1,19), col=c(4,3)))

par(mfrow=c(1,1)) # change plotting parameters back to one plot at a time

summary(rm.model2$aov)[[1]]

##               Df Sum Sq Mean Sq F value  Pr(>F)   
## extra          1 136.69  136.69  17.685 0.00297 **
## selfest        1   1.02    1.02   0.132 0.72571   
## extra:selfest  1   4.69    4.69   0.606 0.45853   
## Residuals      8  61.83    7.73                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(lsmeans)
est.marginal.means <- lsmeans(rm.model2$aov, specs="time")

## Warning in lsm.basis.aovlist(object, trms, xlev, grid, ...): Some predictors are correlated with the intercept - results are biased.
## May help to re-fit with different contrasts, e.g. 'contr.sum'

est.marginal.means

##  time    lsmean        SE    df   lower.CL  upper.CL
##  t1    2.479167 0.8682777 10.86  0.5649836  4.393350
##  t2    7.562500 0.8682777 10.86  5.6483169  9.476683
##  t3   10.895833 0.8682777 10.86  8.9816502 12.810016
##  t4   13.562500 0.8682777 10.86 11.6483169 15.476683
## 
## Results are averaged over the levels of: extra, selfest 
## Confidence level used: 0.95

# run all pairwise comparisons
pairs(est.marginal.means)

##  contrast   estimate        SE df t.ratio p.value
##  t1 - t2   -5.083333 0.5411322 24  -9.394  <.0001
##  t1 - t3   -8.416667 0.5411322 24 -15.554  <.0001
##  t1 - t4  -11.083333 0.5411322 24 -20.482  <.0001
##  t2 - t3   -3.333333 0.5411322 24  -6.160  <.0001
##  t2 - t4   -6.000000 0.5411322 24 -11.088  <.0001
##  t3 - t4   -2.666667 0.5411322 24  -4.928  0.0003
## 
## Results are averaged over the levels of: extra, selfest 
## P value adjustment: tukey method for comparing a family of 4 estimates

library(foreign); library(ggplot2)

ac <- read.spss("612_Lab9_Jewelry_Sales.sav", to.data.frame=TRUE)

## re-encoding from CP1252

# look at your data
# View(ac) 
# str(ac)

ggplot(ac, aes(y=jewel, x=Time)) +
  geom_line() + 
  labs(title="Jewelry Sales Over Time") # add a title

ggplot(ac, aes(y=jewel, x=quarter, group=as.factor(YEAR))) + 
  geom_line(aes(color=as.factor(YEAR))) + # color lines by the group factor
  labs(title="Jewelry Sales by Quarter") +  # add a title
  scale_colour_discrete(name = "Year") # this changes the name of the legend

#### Step 1. Create dummy variables (R will do this on its own, but you may want to set the reference group)
# check that quarter is being interpreted as a factor, and force it to be if it's not already
str(ac)

## 'data.frame':    40 obs. of  4 variables:
##  $ quarter: num  1 2 3 4 1 2 3 4 1 2 ...
##  $ YEAR   : num  1989 1989 1989 1989 1990 ...
##  $ Time   : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ jewel  : num  14815 12743 14190 25086 13054 ...
##  - attr(*, "variable.labels")= Named chr 
##   ..- attr(*, "names")= chr 
##  - attr(*, "codepage")= int 1252

ac$quarter <- as.factor(ac$quarter)

#### Step 2. Run a regression checking for a time trend and a seasonal trend. 
model1 <- lm(jewel ~ Time, data=ac)
model2 <- lm(jewel ~ Time + relevel(ac$quarter, ref="4"), data=ac) # set the reference group to quarter 4
anova(model1, model2)

## Analysis of Variance Table
## 
## Model 1: jewel ~ Time
## Model 2: jewel ~ Time + relevel(ac$quarter, ref = "4")
##   Res.Df       RSS Df Sum of Sq      F    Pr(>F)    
## 1     38 834755325                                  
## 2     35 125094074  3 709661251 66.185 1.676e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

acf(x = model2$residuals, main="ACF of jewelry sales \n(residuals)")

# R gives you lag=0 for ACF in addition to the rest. What does lag of 0 mean?
pacf(x = model2$residuals, main="Partial ACF of jewelry sales \n(residuals)")

acf(x = ac$jewel, main="ACF of jewelry sales")

# R gives you lag=0 for ACF in addition to the rest. What does lag of 0 mean?
pacf(x = ac$jewel, main="Partial ACF of jewelry sales")

ac$residuals <- model2$residuals

ggplot(ac, aes(y=residuals, x=Time)) +
  geom_line() + 
  labs(title="Residuals for Jewelry Sales Over Time") # add a title

ggplot(ac, aes(y=residuals, x=quarter, group=as.factor(YEAR))) + 
  geom_line(aes(color=as.factor(YEAR))) + # color lines by the group factor
  labs(title="Residuals for Jewelry Sales by Quarter") +  # add a title
  scale_colour_discrete(name = "Year") # this changes the name of the legend

612 Lab9 r code

Rose Maier

March 3, 2015