The data set that I will be working with for the verification report and group project is from Harris and Van Bavel’s (2021) replication study on belief superiority and dogmatism between differing political views. The data can be found here.
My goals for this week were:
Our group, very luckily, has access to the authors’ final code that they used to get their results. We thus aim to verify this code by understanding the code itself and its output.
# I installed the 'car' package, then downloaded packages tidyverse, car, dplyr and ggplot2 into the project.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(dplyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
# I downloaded the data from OSF and saved it to my desktop. I then changed the working directory to read files from my desktop, which allowed me to read the data.
setwd("~/Desktop")
data <- read_csv("beliefsuperiority_all.csv")
## Warning: Duplicated column names deduplicated: 'Q62' => 'Q62_1' [49]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## Q62_1 = col_number(),
## Q58 = col_character(),
## rid = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
# I then glimpsed the data to get a sense of the variables. I immediately noticed that all variables were numerical as answers to the questions in the study were based on scales.
glimpse(data)
## Rows: 1,454
## Columns: 65
## $ Q62 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ immigration_a <dbl> 2, 2, 1, 1, 1, 1, 4, 4, 3, 5, 2, 1, 1, 2, 3, 1, 4, 3, …
## $ immigration_b <dbl> 3, 2, 5, 5, 3, 5, 2, 2, 1, 5, 3, 2, 3, 2, 1, 3, 1, 2, …
## $ abortion_a <dbl> 3, 5, 1, 1, 2, 2, 3, 3, 1, 4, 3, 2, 1, 4, 5, 3, 2, 5, …
## $ abortion_b <dbl> 3, 2, 5, 5, 4, 2, 4, 3, 1, 3, 3, 3, 2, 3, 1, 3, 5, 5, …
## $ vote_a <dbl> 3, 3, 2, 2, 2, 2, 3, 4, 3, 5, 3, 2, 2, 3, 2, 2, 2, 2, …
## $ vote_b <dbl> 3, 2, 4, 5, 5, 3, 4, 2, 1, 5, 1, 2, 5, 3, 1, 3, 5, 3, …
## $ tax_a <dbl> 2, 2, 1, 1, 3, 3, 3, 3, 2, 2, 2, 3, 3, 2, 1, 1, 3, 1, …
## $ tax_b <dbl> 3, 2, 4, 1, 1, 3, 1, 2, 1, 2, 1, 5, 3, 3, 2, 3, 1, 5, …
## $ torture_a <dbl> 3, 3, 2, 3, 3, 5, 4, 4, 2, 3, 3, 3, 4, 3, 3, 3, 2, 5, …
## $ torture_b <dbl> 4, 1, 1, 1, 1, 5, 1, 4, 1, 2, 3, 1, 1, 2, 3, 3, 4, 4, …
## $ affirmaction_a <dbl> 4, 4, 2, 3, 3, 2, 4, 3, 4, 5, 4, 2, 2, 2, 4, 3, 2, 2, …
## $ affirmaction_b <dbl> 3, 2, 5, 1, 1, 5, 3, 3, 1, 5, 3, 2, 2, 1, 3, 3, 1, 2, …
## $ military_a <dbl> 3, 2, 1, 1, 5, 3, 4, 2, 2, 4, 3, 2, 2, 3, 5, 2, 3, 3, …
## $ military_b <dbl> 3, 1, 5, 1, 3, 4, 3, 3, 1, 2, 2, 3, 4, 3, 4, 3, 1, 5, …
## $ covidgov_a <dbl> 2, 4, 1, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 4, 3, 4, 2, 2, …
## $ covidgov_b <dbl> 2, 3, 5, 1, 3, 3, 2, 3, 1, 2, 3, 4, 3, 3, 2, 3, 1, 4, …
## $ AC_a <dbl> 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 2, 3, 3, 3, 2, 2, …
## $ AC_b <dbl> 2, 5, 5, 5, 5, 5, 5, 1, 1, 5, 5, 5, 4, 5, 5, 5, 5, 1, …
## $ Q37_1 <dbl> 7, 4, 9, 6, 5, 5, 7, 7, 5, 3, 5, 2, 4, 6, 1, 7, 2, 2, …
## $ Q37_2 <dbl> 6, 9, 9, 9, 9, 6, 4, 4, 5, 4, 5, 2, 7, 5, 9, 8, 4, 6, …
## $ Q37_3 <dbl> 6, 3, 9, 3, 4, 5, 8, 6, 1, 4, 5, 4, 7, 8, 5, 7, 6, 4, …
## $ Q37_4 <dbl> 8, 9, 9, 6, 8, 5, 6, 8, 6, 3, 5, 4, 3, 5, 8, 7, 8, 4, …
## $ Q37_5 <dbl> 5, 7, 9, 5, 8, 5, 3, 8, 4, 9, 5, 8, 6, 6, 9, 7, 6, 7, …
## $ Q37_6 <dbl> 6, 5, 9, 6, 4, 5, 8, 6, 7, 8, 1, 9, 5, 5, 5, 7, 5, 7, …
## $ Q37_7 <dbl> 2, 7, 9, 5, 8, 6, 7, 4, 1, 3, 5, 1, 3, 2, 7, 7, 6, 2, …
## $ Q37_8 <dbl> 5, 4, 9, 5, 6, 6, 9, 4, 3, 6, 5, 8, 2, 7, 6, 7, 3, 3, …
## $ Q37_9 <dbl> 6, 2, 8, 2, 4, 5, 2, 6, 4, 4, 5, 4, 6, 5, 7, 7, 8, 5, …
## $ Q37_10 <dbl> 8, 9, 9, 5, 8, 6, 6, 5, 5, 4, 5, 9, 3, 5, 7, 7, 5, 5, …
## $ Q37_11 <dbl> 7, 6, 8, 7, 9, 7, 6, 7, 7, 4, 5, 4, 2, 5, 9, 7, 4, 2, …
## $ Q37_12 <dbl> 7, 5, 9, 4, 6, 4, 9, 6, 5, 7, 5, 9, 6, 6, 6, 7, 6, 9, …
## $ Q37_13 <dbl> 8, 9, 7, 8, 8, 5, 6, 2, 5, 3, 5, 2, 5, 5, 5, 7, 7, 7, …
## $ Q37_14 <dbl> 1, 2, 8, 2, 5, 4, 5, 5, 1, 3, 3, 9, 6, 5, 5, 7, 6, 2, …
## $ Q37_15 <dbl> 5, 6, 9, 4, 3, 6, 9, 5, 5, 1, 5, 6, 4, 6, 3, 8, 5, 3, …
## $ Q37_16 <dbl> 8, 8, 9, 5, 7, 4, 8, 7, 3, 4, 5, 4, 3, 4, 7, 7, 3, 3, …
## $ Q37_17 <dbl> 7, 7, 9, 7, 2, 5, 1, 7, 4, 1, 5, 3, 5, 5, 2, 7, 6, 1, …
## $ Q37_18 <dbl> 9, 9, 9, 5, 9, 5, 7, 3, 5, 3, 4, 1, 5, 5, 7, 8, 5, 7, …
## $ Q37_19 <dbl> 6, 8, 9, 5, 6, 5, 8, 8, 5, 3, 5, 3, 7, 7, 5, 8, 5, 1, …
## $ Q37_20 <dbl> 7, 5, 9, 2, 3, 3, 5, 7, 5, 1, 1, 4, 8, 5, 1, 7, 2, 2, …
## $ Q8 <dbl> 1, 4, 1, 4, 4, 5, 4, 3, 1, 4, 4, 2, 4, 1, 1, 4, 2, 4, …
## $ Q10 <dbl> 5, 6, 4, 7, 2, 4, 6, 3, 4, 3, 1, 5, 5, 4, 4, 6, 4, 1, …
## $ Q12 <dbl> 5, 1, 4, 1, 4, 4, 2, 3, 3, 5, 2, 2, 5, 4, 4, 6, 3, 4, …
## $ Q39 <dbl> 5, 4, 7, 1, 4, 4, 6, 1, 2, 5, 2, 1, 4, 4, 4, 6, 3, 4, …
## $ Q40 <dbl> 5, 2, 7, 1, 4, 4, 4, 4, 3, 5, 2, 1, 3, 4, 5, 6, 3, 4, …
## $ Q14 <dbl> 6, 5, 7, 4, 6, 6, 6, 5, 6, 6, 5, 6, 4, 6, 5, 6, 5, 4, …
## $ Q16 <dbl> 1, 1, 2, 2, 3, 2, 3, 2, 1, 1, 3, 1, 1, 2, 1, 3, 3, 3, …
## $ Q18 <dbl> 1, 2, 1, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, …
## $ Q20 <dbl> 26, 83, 41, 55, 35, 35, 24, 28, 49, 24, 53, 32, 49, 25…
## $ Q62_1 <dbl> 4, 5, 5, 5, 5, 5, 5, 7, 5, 4, 4, 3, 3, 2, 5, 2, 5, 5, …
## $ Q44_1 <dbl> 5, 5, 1, 6, 5, 7, 6, 6, 4, 7, 4, 3, 2, 4, 4, 6, 4, 1, …
## $ Q44_2 <dbl> 4, 5, 7, 7, 6, 6, 6, 2, 7, 6, 6, 7, 5, 4, 7, 6, 6, 2, …
## $ Q44_3 <dbl> 5, 7, 7, 7, 4, 6, 5, 7, 7, 7, 7, 2, 7, 4, 7, 6, 6, 3, …
## $ Q44_4 <dbl> 6, 7, 1, 7, 4, 6, 4, 2, 7, 7, 7, 5, 3, 4, 4, 7, 6, 4, …
## $ Q44_5 <dbl> 5, 2, 1, 1, 2, 5, 3, 7, 1, 1, 1, 5, 1, 4, 4, 3, 4, 5, …
## $ Q58 <chr> "it was so cool.", "IT SEEMED WELL DESIGNED", "i like …
## $ rid <chr> "5eb32f06-1a93-fad6-f05d-1a2c5bd2c3b3", "5eb3308c-7aa0…
## $ age <dbl> 21, 83, 41, 55, 35, 35, 24, 28, 49, 24, 53, 33, 49, 25…
## $ gender <dbl> 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, …
## $ hhi <dbl> 11, 2, 20, 3, 1, 11, 2, 9, 13, 2, 12, 6, 3, 19, 12, 17…
## $ ethnicity <dbl> 11, 1, 1, 1, 1, 1, 11, 3, 1, 15, 1, 2, 2, 4, 1, 4, 1, …
## $ hispanic <dbl> 8, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ education <dbl> 1, 2, 7, 2, 6, 6, 1, 6, 7, 6, 4, 6, 2, 6, 4, 6, 6, 2, …
## $ political_party <dbl> 9, 1, 10, 10, 4, 9, 7, 5, 2, 2, 3, 1, 2, 9, 2, 5, 4, 7…
## $ region <dbl> 3, 4, 1, 3, 4, 3, 3, 3, 2, 4, 1, 3, 3, 4, 1, 4, 1, 1, …
## $ zip <dbl> 33904, 92346, 13207, 34761, 83617, 30022, 76661, 74820…
# This was the first step to filter and select the data to follow the study's exclusion criteria. This meant removing the rows for participants who did not consent to participation. Question 62 concerns those who consented to participate in the experiment where 1 denoted 'yes' and 2 denoted 'no'. This filtering excluded all '2' values to only leave '1' for Q62.
data=filter(data,Q62==1)
# I then filtered the data to exclude participants who failed the attention checks and made this into a separate data frame (data_attn).
data_attn=filter(data,AC_a==3)%>%filter(AC_b==5)
# I also removed attention check items from the data frame so that participants who failed both attention checks (ACa and ACb) were removed in the data_attn variable.
data_attn=dplyr::select(data_attn,-starts_with('AC'))
# To create a mean dogmatism score, I first needed to reverse the scores on the items below from the data_attn data frame. This is to make sure that there is no confusion for responses across all questions. This also created a new data frame 'dogscale' which directly relates to the dogmatism scale used in the main study.
data_attn$Q37_2 =recode(data_attn$Q37_2,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_4 =recode(data_attn$Q37_4,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_5 =recode(data_attn$Q37_5,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_7 =recode(data_attn$Q37_7,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_10 =recode(data_attn$Q37_10,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_11 =recode(data_attn$Q37_11,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_13 =recode(data_attn$Q37_13,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_16 =recode(data_attn$Q37_16,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_18 =recode(data_attn$Q37_18,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
data_attn$Q37_19 =recode(data_attn$Q37_19,'1=9; 2=8; 3=7; 4=6; 6=4; 7=3; 8=2; 9=1')
# To get rid of an error (which I further explain in my 'challenges' section), I included this code which changed the data from factor to numeric.
data[] <- lapply(data, function(x) {if(is.factor(x)) as.numeric(as.character(x)) else x})
# As Q37 directly related to dogmatism, I selected data to look at items in the data_attn data frame with only Q37. I then used rowMeans to obtain mean values across rows.
dogscale=dplyr::select(data_attn,starts_with('Q37'))
data_attn$meanD=rowMeans(dogscale,na.rm = TRUE)
# I created a histogram of participants' dogmatism scores, excluding those who failed the attention checks. This was done by specifically looking at the dogmatic scale in the data_attn data frame and the data's mean.
hist(data_attn$meanD)