To replicate Fig 2. the usual packages were loaded and data was formatted as per the previous weeks.
library(tidyverse)
library(dplyr)
library(ggplot2)
library(car)
library(ggeasy)
data <- read.csv("beliefsuperiority_all.csv")
data <- filter(data,Q62 == 1)
data_attn= filter(data,AC_a==3) %>%
filter(AC_b==5)
data_attn=dplyr::select(data_attn,-starts_with('AC'))
data_attn$Q37_2 = recode(data_attn$Q37_2, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_4 = recode(data_attn$Q37_4, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_5 = recode(data_attn$Q37_5, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_7 = recode(data_attn$Q37_7, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_10 = recode(data_attn$Q37_10, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_11 = recode(data_attn$Q37_11, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_13 = recode(data_attn$Q37_13, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_16 = recode(data_attn$Q37_16, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_18 = recode(data_attn$Q37_18, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_19 = recode(data_attn$Q37_19, '1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
dogscale=dplyr::select(data_attn,starts_with('Q37'))
data_attn$meanDog=rowMeans(dogscale,na.rm = TRUE)
data_attn$vote_a = recode(data_attn$vote_a, '2=1; 3=2.3333; 4=3.6667; 5=5')
data_attn$torture_a = recode(data_attn$torture_a, '2=1; 3=2.3333; 4=3.6667; 5=5')
data_attn$affirmaction_a = recode(data_attn$affirmaction_a, '2=1; 3=2.3333; 4=3.6667; 5=5')
attitudes=dplyr::select(data_attn,ends_with('_a'))
data_attn$meanAtt=rowMeans(attitudes,na.rm = TRUE)
data_attn$meanD_c= data_attn$meanDog-mean(data_attn$meanDog,na.rm=TRUE)
data_attn$meanA_c= data_attn$meanAtt-mean(data_attn$meanAtt,na.rm=TRUE)
data_attn$immigration_a_c=data_attn$immigration_a-mean(data_attn$immigration_a,na.rm=TRUE)
data_attn$abortion_a_c=data_attn$abortion_a-mean(data_attn$abortion_a,na.rm=TRUE)
data_attn$vote_a_c=data_attn$vote_a-mean(data_attn$vote_a,na.rm=TRUE)
data_attn$tax_a_c=data_attn$tax_a-mean(data_attn$tax_a,na.rm=TRUE)
data_attn$torture_a_c=data_attn$torture_a-mean(data_attn$torture_a,na.rm=TRUE)
data_attn$affirmaction_a_c=data_attn$affirmaction_a-mean(data_attn$affirmaction_a,na.rm=TRUE)
data_attn$military_a_c=data_attn$military_a-mean(data_attn$military_a,na.rm=TRUE)
data_attn$covidgov_a_c=data_attn$covidgov_a-mean(data_attn$covidgov_a,na.rm=TRUE)The original code used by the researchers was then loaded to see the graph I had to replicate.
data_attn$PO_c= data_attn$Q12-mean(data_attn$Q12,na.rm=TRUE)
ggplot(data_attn, aes(x=PO_c, y=meanDog,color=PO_c)) +
geom_point(aes(y = meanDog), position = position_jitter(width = .15), size = 2.5, alpha = 0.6) +
stat_summary(fun.y=mean, geom='point', size=2, color="black") +
stat_summary(fun.data = mean_cl_boot,geom='errorbar', fun.args=list(conf.int=.95),
size=1.5, aes(width=.3), color="black")+
labs(x='Political Orientation', y='Dogmatism') +
stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 2) +
theme_minimal()+
theme(axis.title.y = element_text(size=16, face="bold"))+
theme(axis.title.x = element_text(size=16, face="bold"))+
theme(axis.text.y=element_text(color = "black", size = 14))+
theme(axis.text.x=element_text(color = "black", size = 14))+
theme(legend.text = element_text(color = "black", size = 14))+
theme(legend.title = element_text(color = "black", size = 14))+
theme(axis.line= element_line(color="black")) +
theme(axis.ticks.y = element_line(color="black")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(strip.text.x=element_text(color = "black", size = 14, face="bold")) +
theme(legend.position = "none")+
scale_x_continuous(breaks = c(-3,-2,-1,0,1,2,3),lim=c(-3.1,3.1))+
scale_y_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9),lim=c(1,9))## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 23 rows containing missing values (geom_point).
Just like last week, most of the code, specifically theme() functions, were to edit the appearance of the graph rather than format features such as error bars. Hence, I simplified the chunk and used ggeasy functions. The resulting code looked like this:
data_attn$PO_c= data_attn$Q12-mean(data_attn$Q12,na.rm=TRUE)
ggplot(data_attn, aes(x=PO_c, y=meanDog,color=PO_c)) +
geom_point(aes(y = meanDog), position = position_jitter(width = .15), size = 2.5, alpha = 0.6) +
stat_summary(fun.y=mean, geom='point', size=2, color="black") +
stat_summary(fun.data = mean_cl_boot, geom='errorbar', fun.args=list(conf.int=.95),
size=1.5, aes(width=.3), color="black") +
labs(x='Political Orientation', y='Dogmatism') +
stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 2) +
theme_minimal() +
theme(axis.line= element_line(color="black")) + theme(axis.ticks.y = element_line(color="black")) + easy_remove_legend() +
scale_x_continuous(breaks = c(-3,-2,-1,0,1,2,3),lim=c(-3.1,3.1))+
scale_y_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9),lim=c(1,9)) + easy_all_text_size(size = 16) + scale_colour_gradient2(low = "blue", mid = "black", high = "red") ## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
Like last week, the first line of the chunk included code on mean centering that the original researchers have done in a separate chunk. Since Fig 2. plots the average dogmatism scores across different political orientations, the code chunk on mean centering the mean scores for political orientation (PO_c) was included. This line of code creates a new column, PO_c, that contains values obtained from subtracting the grand mean political orientation score from the average political orientation score of each participant.
Ggplot was used to plot the mean centered political orientation scores against dogmatism scores with a blue gradient applied via the color= function in aesthetics. Geom_point was used to create a scatterplot with position_jitter() being used to insert noise to the data and adjust the size and opacity (alpha) of the dots.
The stat_summary() function was used to create error bars on the graph with functions from the ggplot2 package. Fun.data is to specify a summary function on a data frame whereas fun.y is a old function that specifies a summary on a vector. Fun.args() is used to add on additionaly arguments in this case, to list 95% confidence intervals
However, I don’t fully understand all these functions.
The x-axis was labelled ‘Political Orientation’ and the y-axis labelled ‘Dogmatism’ via the labs() function from the ggplot2 package. The stat_smooth() function was used to graph a regression line via the formula y ~ x + I(x^2) and the ‘lm’ smoothing method. The line size was also adjusted via size= in stat_smooth. The theme() function was used to include solid black axes lines (axis.line=…) and black ticks (axis.ticks=…). Ggeasy was used to remove the legend (easy_remove_legend) and adjust the size of all texts on the graph (easy_all_text_size). Since both variables were continuous variables, scale_…_continuous(breaks=…) was used to create breaks in both axes (-3 to 3 for the x axis; 0 to 9 for y axis) and set limits on both axes (-3.1 to 3.1 for the x-axis; 1 to 9 for y-axis). Finally, as with last week, scale_colour_gradient2 was used to recreate the gradient in the original graph of the study.
Last week the scale_color_gradient2() function didn’t work for Fig 1. however, for reasons unknown it worked for Fig 2. (see above). Unfortunately, despite copying my code into a new file, scale_color_gradient2() still didn’t work for Fig 1. (see below). Additionally, other functions such as scale_colour_gradient() for making two colour gradients also didn’t work.
data_attn$meanA_c= data_attn$meanAtt-mean(data_attn$meanAtt,na.rm=TRUE)
Dog_plot2 = ggplot(data_attn, aes(x=meanA_c, y=meanDog)) +
geom_point(aes(y = meanDog), position = position_jitter(width = .15), size = 2.5, alpha = 0.5) +
labs(x='Average Attitude', y='Dogmatism') +
stat_smooth(method = "lm", formula = y ~ x + I(x^2), size = 2) +
theme_minimal() +
xlim(c(-2,2)) + scale_colour_gradient2(low ="blue", mid = "black", high = "red") +
scale_y_continuous(breaks = c(0,1,2,3,4,5,6,7,8,9),lim=c(1,9)) + easy_remove_legend() + theme(axis.line= element_line(color="black"))
plot(Dog_plot2)## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
I also encountered error messages on variable x not being numeric which resulted in a failure in calculating the mean scores on Dogmatism and attitudes for each participant. After consulting my teammates, I found that I had neglected to load the car package which, when loaded seemed to solve the issue. However, I’m not sure of the exact function involved in this issue.
Unlike last week, this week was much more smooth-sailing. This was likely due to the similarities between the code chunks for Fig 1. and Fig 2.. Ggeasy was also used again, which made simplifying code a lot easier. As a group, we did encounter issues in flipping the graph- that is, Fig 1. and Fig 2. shown in the original article are flipped versions of the graphs we obtained from the original code chunk.
original Fig 1. from article
replicated Fig 1.
Unfortunately, flipping the graph also reverses the order of the limits on the x-axis. For example, -2, -1, 0, 1, 2 was flipped to 2, 1, 0, -1, -2. At first I thought that this could be a mistake in coding on the original researchers’ part. However, upon consulting the original article I found that the researchers state “We sought to directly and conceptually replicate Toner et al.’s (2013) finding regarding dogmatism…” which, found that dogmatism tended to be higher among conservatives. Perhaps then the original researchers did some data manipulation without reporting it as our line of regression would demonstrate the opposite result.
Some of my teammates also had trouble getting error bars to appear until they installed the hmsic package. However, the package did not need to be loaded for errorbars to appear and the geom = “errorbars” function is part of the ggplot2 package.
There was also an issue of whiskers not showing on the error bars on either end of the graph despite following much of the original code used. In fact, to complicate this issue further, flipping Fig 2. via scale_reverse_x() fixed this issue but narrowed the set limits on the x-axis.
Next week we aim to finish replicating Fig 3. as well as the descriptive statistics from the pilot study and record our presentation for week 8.
Given that the original graphs in the articles are flipped, would it be appropriate for us to also format the graphs so that it fully replicates the graph? Wouldn’t this count as malpractice?
The original article also displayed graphs with complete error bars i.e. without the ends missing however, we couldn’t find any solutions on Google as to why this occurred.
Why does the Hmsic package need to be installed in order for errorbars to appear when the errorbars function is part of the ggplot2 package?
Are there any alternative functions to creating gradients in scatterplots?