The data is taken from the correction note http://www.sciencedirect.com/science/article/pii/S0022103115000918
to
Brummert Lennings, H. I., & Warburton, W. A. (2011). The effect of auditory versus visual violent media exposure on aggressive behaviour: The role of song lyrics, video clips and musical tone (Journal of Experimental Social Psychology 47 794–799).
See also
https://pubpeer.com/publications/D9E7062DA9161CDA79C1D3E64C2DE9
I downloaded the data from the correction note as a csv file, but I needed to make some minor edits to it before getting this version. Namely, I deleted the first two of the three header rows, and added an empty first field to all the lines except the six which start with the name of a song (or other text). In other words, I added a “comma” at the start of every line which otherwise would begin with “No” or with “Yes”.
Here is my resulting “table.csv”:
Song stimulus,Lyrics present,Video present,N,Mean,SD
Let the bodies hit the floor,No,No,13,.47,.62
,No,Yes,13,.69,.45
,Yes,No,14,.71,.62
,Yes,Yes,13,.83,.52
Fight music,No,No,13,.72,.58
,No,Yes,15,.46,.37
,Yes,No,12,.65,.43
,Yes,Yes,13,.64,.69
You can't bring me down,No,No,14,.61,.50
,No,Yes,13,.67,.35
,Yes,No,15,.94,.40
,Yes,Yes,14,1.00,.53
Overall means averaged across song stimuli,No,No,40,.60,.56
,No,Yes,41,.60,.40
,Yes,No,41,.78,.50
,Yes,Yes,40,.83,.59
Control group (baseline),–,–,32,.50,.29
require(Hmisc) # provides function "errbar()" for adding error bars to a plot
## Loading required package: Hmisc
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
require(splitstackshape) # provides function "expandRows()"
## Loading required package: splitstackshape
## Loading required package: data.table
# which I use to expand the data set from the format: one row per cell of the design
# to the format: one row per subject
chili <- read.csv("table.csv")
chili
## Song.stimulus Lyrics.present Video.present
## 1 Let the bodies hit the floor No No
## 2 No Yes
## 3 Yes No
## 4 Yes Yes
## 5 Fight music No No
## 6 No Yes
## 7 Yes No
## 8 Yes Yes
## 9 You can't bring me down No No
## 10 No Yes
## 11 Yes No
## 12 Yes Yes
## 13 Overall means averaged across song stimuli No No
## 14 No Yes
## 15 Yes No
## 16 Yes Yes
## 17 Control group (baseline) – –
## N Mean SD
## 1 13 0.47 0.62
## 2 13 0.69 0.45
## 3 14 0.71 0.62
## 4 13 0.83 0.52
## 5 13 0.72 0.58
## 6 15 0.46 0.37
## 7 12 0.65 0.43
## 8 13 0.64 0.69
## 9 14 0.61 0.50
## 10 13 0.67 0.35
## 11 15 0.94 0.40
## 12 14 1.00 0.53
## 13 40 0.60 0.56
## 14 41 0.60 0.40
## 15 41 0.78 0.50
## 16 40 0.83 0.59
## 17 32 0.50 0.29
par(mfrow = c(2, 2))
song1 <- as.character(chili$Song.stimulus[1])
means1 <- chili$Mean[1:4]
dim(means1) <- c(2, 2)
bars1 <- barplot(means1 - 0.35, beside = TRUE, offset = 0.35, ylim = c(0.35, 1.05),
col = c("white", "black"),
main = song1,
xlab = "No lyrics Lyrics",
ylab = "mean log10 hot sauce")
sds1 <- chili$SD[1:4]
Ns1 <- chili$N[1:4]
errbar(bars1, means1, yplus = means1 + sds1/sqrt(Ns1), yminus = means1 - sds1/sqrt(Ns1),
add = TRUE, errbar.col = "grey", col = "grey")
legend(x = "topleft", c("video", "no video"), fill = c("black", "white"), bty = "n")
box()
song2 <- as.character(chili$Song.stimulus[5])
means2 <- chili$Mean[5:8]
dim(means2) <- c(2, 2)
library(Hmisc)
bars2 <- barplot(means2 - 0.35, beside = TRUE, offset = 0.35, ylim = c(0.35, 1.05),
col = c("white", "black"),
main = song2,
xlab = "No lyrics Lyrics",
ylab = "mean log10 hot sauce")
sds2 <- chili$SD[5:8]
Ns2 <- chili$N[5:8]
errbar(bars2, means2, yplus = means2 + sds2/sqrt(Ns2), yminus = means2 - sds2/sqrt(Ns2),
add = TRUE, errbar.col = "grey", col = "grey")
legend(x = "topleft", c("video", "no video"), fill = c("black", "white"), bty = "n")
box()
song3 <- as.character(chili$Song.stimulus[9])
means3 <- chili$Mean[9:12]
dim(means3) <- c(2, 2)
bars3 <- barplot(means3 - 0.35, beside = TRUE, offset = 0.35, ylim = c(0.35, 1.05),
col = c("white", "black"),
main = song3,
xlab = "No lyrics Lyrics",
ylab = "mean log10 hot sauce")
sds3 <- chili$SD[9:12]
Ns3 <- chili$N[9:12]
errbar(bars1, means3, yplus = means3 + sds3/sqrt(Ns3), yminus = means3 - sds3/sqrt(Ns3),
add = TRUE, errbar.col = "grey", col = "grey")
legend(x = "topleft", c("video", "no video"), fill = c("black", "white"), bty = "n")
box()
means4 <- chili$Mean[c(17, 13:16)]
bars4 <- barplot(means4 - 0.35, beside = TRUE, offset = 0.35, ylim = c(0.35, 1.05),
col = c("grey", "white", "black", "white", "black"),
main = "Across all songs, with baseline",
space = c(0.2, 0.5, 0.2, 0.5, 0.2),
xlab = "No lyrics Lyrics",
ylab = "mean log10 hot sauce")
sds4 <- chili$SD[c(17, 13:16)]
Ns4 <- chili$N[c(17, 13:16)]
errbar(bars4, means4, yplus = means4 + sds4/sqrt(Ns4), yminus = means4 - sds4/sqrt(Ns4),
add = TRUE, errbar.col = "grey", col = "grey")
legend(x = "topleft", c("video", "no video", "no media"), fill = c("black", "white", "grey"), bty = "n")
box()
For comparison, here’s the original graphic.
Now I will perform one of the paper’s F tests on an artificial data set where the number of observations, sample mean, and sample standard deviation within each of the 12 experimental groups are the same as in the newly published summary statistics. I omit the control group so that we have a simple (but slightly unbalanced) 3x2x2 design.
Since the 12 group-wise sample averages and sample standard deviations are sufficient statistics within the standard ANOVA models, any standard F tests which I perform on this artificial data set should exactly coincide with the same tests on the actual data.
chili <- read.csv("table.csv")
SongNames <- as.character(chili$Song.stimulus[c(1, 5, 9)])
chiliSongs <- chili[1:12, ]
chiliSongs$Song.stimulus <- as.factor(c(rep(1, 4), rep(2, 4), rep(3, 4)))
chiliExpand <- expandRows(chiliSongs, "N")
nep <- function(N, Mean, SD) { ## generates N observations with specified sample mean and SD
x <- rnorm(N) ## "nep" is Dutch for "artificiL"
Mean + SD * (x - mean(x)) / sd(x)
}
scores <- numeric(0)
for (i in 1:12) scores <- c(scores, nep(chiliSongs[i, 4], chiliSongs[i, 5], chiliSongs[i, 6]))
length(scores)
## [1] 162
## The vector "scores" now contains artificial scores which per cell of the design have the same
## sample mean and sample standard deviation as specified in the summary statistics table.
## Here's a check of this for the first cell.
## Consequently, any ANOVA on the 3 x 2 x 2 design using these artificial scores should give
## identical results to the same ANOVA on the original (not available) data.
(N <- chiliSongs$N[1])
## [1] 13
chiliSongs$Mean[1]
## [1] 0.47
chiliSongs$SD[1]
## [1] 0.62
mean(scores[1:N])
## [1] 0.47
sd(scores[1:N])
## [1] 0.62
sum(chiliSongs[ , 4])
## [1] 162
chiliExpand <- expandRows(chiliSongs, "N")
chiliExpand$scores <- scores
dim(chiliExpand)
## [1] 162 6
chiliExpand[1:14, ]
## Song.stimulus Lyrics.present Video.present Mean SD scores
## 1 1 No No 0.47 0.62 0.53250433
## 1.1 1 No No 0.47 0.62 0.76466032
## 1.2 1 No No 0.47 0.62 -0.02946638
## 1.3 1 No No 0.47 0.62 1.35432891
## 1.4 1 No No 0.47 0.62 0.67923785
## 1.5 1 No No 0.47 0.62 -0.17474301
## 1.6 1 No No 0.47 0.62 0.59723040
## 1.7 1 No No 0.47 0.62 -0.13353991
## 1.8 1 No No 0.47 0.62 1.27512400
## 1.9 1 No No 0.47 0.62 0.60309651
## 1.10 1 No No 0.47 0.62 -0.27023908
## 1.11 1 No No 0.47 0.62 1.30335554
## 1.12 1 No No 0.47 0.62 -0.39154948
## 2 1 No Yes 0.69 0.45 0.61829154
## Test of effect of "song"
anova(
aov(scores ~ Lyrics.present * Video.present, data = chiliExpand),
aov(scores ~ Song.stimulus * Lyrics.present * Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Lyrics.present * Video.present
## Model 2: scores ~ Song.stimulus * Lyrics.present * Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 158 41.997
## 2 150 39.597 8 2.3999 1.1364 0.3422
I confirm that there is no significant effect of “Song” (including no interaction with “Lyrics” or “Video”). My numbers of degrees of freedom differ from those published in the correction note! This is a point for further discussion with the authors.
The subsequent analyses pool the data from the three different songs. So we have a 2x2 factorial design.
## Effect of video, additive model (lyrics vs. lyrics + video)
anova(
aov(scores ~ Lyrics.present, data = chiliExpand),
aov(scores ~ Lyrics.present + Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Lyrics.present
## Model 2: scores ~ Lyrics.present + Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 160 42.050
## 2 159 42.024 1 0.025743 0.0974 0.7554
## Effect of video, allowing interaction (lyrics vs. lyrics * video)
anova(
aov(scores ~ Lyrics.present, data = chiliExpand),
aov(scores ~ Lyrics.present * Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Lyrics.present
## Model 2: scores ~ Lyrics.present * Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 160 42.050
## 2 158 41.997 2 0.053014 0.0997 0.9051
## Effect of lyrics, additive (video vs. lyrics + video)
anova(
aov(scores ~ Video.present, data = chiliExpand),
aov(scores ~ Lyrics.present + Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Video.present
## Model 2: scores ~ Lyrics.present + Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 160 43.681
## 2 159 42.024 1 1.657 6.2693 0.01329 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Effect of lyrics, allowing interaction (video vs. lyrics * video)
anova(
aov(scores ~ Video.present, data = chiliExpand),
aov(scores ~ Lyrics.present * Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Video.present
## Model 2: scores ~ Lyrics.present * Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 160 43.681
## 2 158 41.997 2 1.6843 3.1683 0.04476 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Test of interaction between video and lyrics
anova(
aov(scores ~ Lyrics.present + Video.present, data = chiliExpand),
aov(scores ~ Lyrics.present * Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ Lyrics.present + Video.present
## Model 2: scores ~ Lyrics.present * Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 159 42.024
## 2 158 41.997 1 0.027271 0.1026 0.7492
## Combined effect of video and lyrics
anova(
aov(scores ~ 1, data = chiliExpand),
aov(scores ~ Lyrics.present * Video.present, data = chiliExpand))
## Analysis of Variance Table
##
## Model 1: scores ~ 1
## Model 2: scores ~ Lyrics.present * Video.present
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 161 43.702
## 2 158 41.997 3 1.7052 2.1384 0.09754 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Conclusion: “video” has some effect, “lyrics” has little effect (and there’s little interaction)