My Coding Goals This Week

  1. Start working on the descriptive statistics for our group verification report
  2. Attend Week 4 Q&A

How I Made Progress Towards My Goals

Start work on our Verification Report

Step 1: Load packages

library (tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.4     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library (dplyr)

Step 2: Load data from CSV files

expone <- "Study 8 data.csv" %>% 
  read_csv() %>% 
  rename(
    recall_score = SC0, #nicer name for recall score
    condition = FL_10_DO #nicer name for condition
  )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

Step 3: Descriptive Statistics - Experiment 1

  • Calculate Mean, SD, Range & Count Males/Females
  • Number of participants: 294 (126 males, 168 females)
  • Age: (Mean: 34.29, SD: 12.97, Range: 18-69)
# Remove first 2 rows of data as they do not include data
expone <- expone %>% 
  slice(-1:-2)

# List all duplicate IDs
expone$Prolific_PID[duplicated(expone$Prolific_PID)] #59 duplicates found
##  [1] "5cd1836da6f34300017e240c" "5c76f2d92819ab0015b94b4c"
##  [3] "5c72de7b96a9600001870966" "5c3d48955fd1050001a99364"
##  [5] "5eb18525b95d6127da6815ee" "5eaac092d8a78e0172680a0e"
##  [7] "5edff6e7c53e0f33aed588ce" "5d6a55cb78fce300014e078e"
##  [9] "5e8b765e88065404c4d50fe2" "5ed22fc8bc5d7c01191e3f78"
## [11] "5ea4397b9b06662614d6e4f6" "5d371c527a04ed00018ac1c8"
## [13] "5e9c29bb77f86e13d6435f3d" "5ea3ef637e4bd537fe45cf6c"
## [15] "5ebb0f6aff8da30d1f29dc29" "5cd59eea8618af0001bdab4b"
## [17] "5eac3189c4a262061aed16d5" "5e3842b92fdfd2000fc286a4"
## [19] "5ec504eab4fbc3423f29cd97" "5e332f72c43078000cda48dd"
## [21] "5ea5e76a147a4909b3ff0482" "5c28b31a0091e40001ca5030"
## [23] "5eb089604b5931137dd66a19" "5ed0d82c81c0bd1bccc8ceea"
## [25] "5ed94fdd9a2ae04cf21bfa48" "5be1cc598c6a19000137a503"
## [27] "5cd813aca9a8c4001963a0aa" "5e925105a981b55934a34813"
## [29] "5eb30ae9bb223e0e176edecd" "5ec637f5e0bca2000ae6f211"
## [31] "5eb179d6d6fcb726f9275f97" "5dcbd8542bdeaa8740d52630"
## [33] "5b570f4cc146600001b82d8d" "5ed793104268812282fdc90d"
## [35] "5edf50e72ef80a1fe0267aeb" "5d7f598628843a00181eb444"
## [37] "5ec2ec9fdaef0d11e3109f74" "5d1290103b20b0000102e8e3"
## [39] "57eee744e62704000199d5ec" "5e6a839e2fd3b003a0f7f248"
## [41] "5e7365f19674532b961c2bb6" "5eb94e5731298c01178531c0"
## [43] "5e9db2995b38950c6f669b55" "584bb2b8bd873800015531da"
## [45] "5eac351c63858608351866b4" "5ec0260375bf15077a00e645"
## [47] "5cc3289b9e21e200015f0bb3" "5eb16a0602af57258fd3f8e1"
## [49] "5ed7578ef05e671db283844e" "5ebc1ae1ea22c801479541ab"
## [51] "5edfd94cb54d22309545cf06" "559ab96cfdf99b219a612bcf"
## [53] "5eac35df3043c62536d14f14" "5e9ff2a0cf50621a9b17c94f"
## [55] "5e5d7f349238db09c60bcbab" "5ea611dda778214a5e89fbf2"
## [57] "5c62d8e2a34174000187a003" "5ed6a937eb466b1029493c39"
## [59] "5ebfabc7676c2502837188cf"
# Removing second attempts for 59 duplicate IDs 
expone <- expone[!duplicated(expone$Prolific_PID), ]

# We end up with n = 312, which corresponds to paper's total n 
# Apply pre-registered exclusion criteria - if they did not complete the task, declared they did not respond seriously, failed an attention check by recalling <4 headlines 
exponefinal <- expone %>% 
  filter(
    Consent == "1", #filter to include those who consented
    Finished == "1", #filter to include those who Finished
    Serious_check == "1", #filter to include those who answered they passed Serious Check
    recall_score >= "4", #filter to include recalls core 4 and above only
    )

exponefinaldata <- subset(
  exponefinal, select = c (Finished, `Duration (in seconds)`, Gender, Age, Serious_check, recall_score, condition, contradiction_1:advancement)
)

# Count final participant n in Exp 1
exponefinaldata %>% 
  count(Serious_check)
## # A tibble: 1 x 2
##   Serious_check     n
##   <chr>         <int>
## 1 1               294
# Count males and females
exponefinaldata %>% 
  count(
    Gender
  )
## # A tibble: 2 x 2
##   Gender     n
##   <chr>  <int>
## 1 1        126
## 2 2        168
# Other descriptive statistics
exponefinaldata$Age <- as.numeric(exponefinal$Age) #change from Character to Numeric

mean(
  exponefinaldata$Age #Mean Age for Exp 1
)
## [1] 34.29252
sd(
  exponefinaldata$Age #SD Age for Exp 1
)
## [1] 12.96633
range(
  exponefinaldata$Age #Range of age for Exp 1
)
## [1] 18 69

Yay! Got the descriptive statistics to match!

  • N = 294 (males = 126, females = 168)
  • Mean: 34.29
  • SD: 12.97
  • Range: 18-69

Step 4: Plots of descriptive stats for each variable measured in Experiment 1 (recreating Figure 1)

# Change Condition from Chr to Factor to allow grouping in the plot
exponefinaldata$condition <- as.factor(exponefinaldata$condition)

# Contradiction Plot
# Change from Chr to Numberic for all contradiction variables
exponefinaldata$contradiction_1 <- as.numeric(exponefinaldata$contradiction_1)
exponefinaldata$contradiction_2 <- as.numeric(exponefinaldata$contradiction_2)
exponefinaldata$contradiction_3 <- as.numeric(exponefinaldata$contradiction_3)
exponefinaldata$contradiction_4 <- as.numeric(exponefinaldata$contradiction_4)
exponefinaldata$contradiction_5 <- as.numeric(exponefinaldata$contradiction_5)
exponefinaldata$contradiction_6 <- as.numeric(exponefinaldata$contradiction_6)

# Sum contradiction variables into a new variable 'Contradiction'
exponefinaldata <- exponefinaldata %>% 
  rowwise() %>% 
  mutate(
    contradiction = sum(contradiction_1, contradiction_2, contradiction_3, contradiction_4, contradiction_5, contradiction_6)
  )
#Replicate Condition column as Condition2 so that we can separate it into 4 variables
exponefinaldata <- exponefinaldata %>%  
  mutate(
    condition2 = condition
)

#Separate the data in Condition2 into 4 columns
exponefinaldata <- separate(data = exponefinaldata, col = condition2, into = c("block", "number", "format", "conflict"))

#Convert format and conflict to Factors to allow grouping in plot
exponefinaldata$format <- as.factor(exponefinaldata$format)
exponefinaldata$conflict <- as.factor(exponefinaldata$conflict)

#Rename from Conflict/Consistent to Conf./Non-Conf.
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Conflict"] <- "Conf."
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Consistent"] <- "Non-Conf."
#Plot Contradiction
contradiction <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = contradiction
    ) 
  )  +
  ggtitle(
    label = "Contradiction"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL
  ) +
  scale_y_continuous(
    name = "Perceived Contradiction",
    limits = c(0,30)
  ) +
  facet_wrap(
    vars(format),
    strip.position = "bottom"
  ) 

plot(contradiction)

#Plot Confusion
exponefinaldata$confusion <- as.numeric(exponefinaldata$confusion)

confusion <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = confusion
    ) 
  ) +
  ggtitle(
    label = "Confusion"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL
  ) +
  scale_y_continuous(
    name = "Perceived Confusion"
  ) +
  facet_wrap(
    vars(format),
    strip.position = "bottom"
  )

plot(confusion)

#Plot Advancement
exponefinaldata$advancement <- as.numeric(exponefinaldata$advancement)

advancement <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = advancement
    ) 
  ) +
  ggtitle(
    label = "Advancement" #plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL #remove label for x-axis
  ) +
  scale_y_continuous(
    name = "Perceived Scientific Advancement" #Label for y-axis
  ) +
  facet_wrap(
    vars(format), #facet wrap Genetic/Qualified
    strip.position = "bottom"
  )

plot(advancement)

Wins

  • Coding like caterpillars is FUN - troubleshooting for each other and learning together as we go! (I am really grateful for our group and how organised/committed everyone is!)

Challenges

  • Figuring out whether we are able to replicate the plot without using the same packages the authors used
  • Still need to figure out: Add labels ‘Conflict’ and ‘Format’ on X-axis; how to add the Means & CIs onto the plot (is it possible?)

Next Steps in My Coding Journey

  • Continue working on our Verification report and figuring out the answer to the above
  • Attempting Jenny’s challenge on Penguins data