My Coding Goals This Week

Start working on the descriptive statistics for our group verification report
Attend Week 4 Q&A

How I Made Progress Towards My Goals

Start work on our Verification Report

Step 1: Load packages

library (tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.4     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library (dplyr)

Step 2: Load data from CSV files

expone <- "Study 8 data.csv" %>% 
  read_csv() %>% 
  rename(
    recall_score = SC0, #nicer name for recall score
    condition = FL_10_DO #nicer name for condition
  )

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

Step 3: Descriptive Statistics - Experiment 1

Calculate Mean, SD, Range & Count Males/Females
Number of participants: 294 (126 males, 168 females)
Age: (Mean: 34.29, SD: 12.97, Range: 18-69)

# Remove first 2 rows of data as they do not include data
expone <- expone %>% 
  slice(-1:-2)

# List all duplicate IDs
expone$Prolific_PID[duplicated(expone$Prolific_PID)] #59 duplicates found

##  [1] "5cd1836da6f34300017e240c" "5c76f2d92819ab0015b94b4c"
##  [3] "5c72de7b96a9600001870966" "5c3d48955fd1050001a99364"
##  [5] "5eb18525b95d6127da6815ee" "5eaac092d8a78e0172680a0e"
##  [7] "5edff6e7c53e0f33aed588ce" "5d6a55cb78fce300014e078e"
##  [9] "5e8b765e88065404c4d50fe2" "5ed22fc8bc5d7c01191e3f78"
## [11] "5ea4397b9b06662614d6e4f6" "5d371c527a04ed00018ac1c8"
## [13] "5e9c29bb77f86e13d6435f3d" "5ea3ef637e4bd537fe45cf6c"
## [15] "5ebb0f6aff8da30d1f29dc29" "5cd59eea8618af0001bdab4b"
## [17] "5eac3189c4a262061aed16d5" "5e3842b92fdfd2000fc286a4"
## [19] "5ec504eab4fbc3423f29cd97" "5e332f72c43078000cda48dd"
## [21] "5ea5e76a147a4909b3ff0482" "5c28b31a0091e40001ca5030"
## [23] "5eb089604b5931137dd66a19" "5ed0d82c81c0bd1bccc8ceea"
## [25] "5ed94fdd9a2ae04cf21bfa48" "5be1cc598c6a19000137a503"
## [27] "5cd813aca9a8c4001963a0aa" "5e925105a981b55934a34813"
## [29] "5eb30ae9bb223e0e176edecd" "5ec637f5e0bca2000ae6f211"
## [31] "5eb179d6d6fcb726f9275f97" "5dcbd8542bdeaa8740d52630"
## [33] "5b570f4cc146600001b82d8d" "5ed793104268812282fdc90d"
## [35] "5edf50e72ef80a1fe0267aeb" "5d7f598628843a00181eb444"
## [37] "5ec2ec9fdaef0d11e3109f74" "5d1290103b20b0000102e8e3"
## [39] "57eee744e62704000199d5ec" "5e6a839e2fd3b003a0f7f248"
## [41] "5e7365f19674532b961c2bb6" "5eb94e5731298c01178531c0"
## [43] "5e9db2995b38950c6f669b55" "584bb2b8bd873800015531da"
## [45] "5eac351c63858608351866b4" "5ec0260375bf15077a00e645"
## [47] "5cc3289b9e21e200015f0bb3" "5eb16a0602af57258fd3f8e1"
## [49] "5ed7578ef05e671db283844e" "5ebc1ae1ea22c801479541ab"
## [51] "5edfd94cb54d22309545cf06" "559ab96cfdf99b219a612bcf"
## [53] "5eac35df3043c62536d14f14" "5e9ff2a0cf50621a9b17c94f"
## [55] "5e5d7f349238db09c60bcbab" "5ea611dda778214a5e89fbf2"
## [57] "5c62d8e2a34174000187a003" "5ed6a937eb466b1029493c39"
## [59] "5ebfabc7676c2502837188cf"

# Removing second attempts for 59 duplicate IDs 
expone <- expone[!duplicated(expone$Prolific_PID), ]

# We end up with n = 312, which corresponds to paper's total n

# Apply pre-registered exclusion criteria - if they did not complete the task, declared they did not respond seriously, failed an attention check by recalling <4 headlines 
exponefinal <- expone %>% 
  filter(
    Consent == "1", #filter to include those who consented
    Finished == "1", #filter to include those who Finished
    Serious_check == "1", #filter to include those who answered they passed Serious Check
    recall_score >= "4", #filter to include recalls core 4 and above only
    )

exponefinaldata <- subset(
  exponefinal, select = c (Finished, `Duration (in seconds)`, Gender, Age, Serious_check, recall_score, condition, contradiction_1:advancement)
)

# Count final participant n in Exp 1
exponefinaldata %>% 
  count(Serious_check)

## # A tibble: 1 x 2
##   Serious_check     n
##   <chr>         <int>
## 1 1               294

# Count males and females
exponefinaldata %>% 
  count(
    Gender
  )

## # A tibble: 2 x 2
##   Gender     n
##   <chr>  <int>
## 1 1        126
## 2 2        168

# Other descriptive statistics
exponefinaldata$Age <- as.numeric(exponefinal$Age) #change from Character to Numeric

mean(
  exponefinaldata$Age #Mean Age for Exp 1
)

## [1] 34.29252

sd(
  exponefinaldata$Age #SD Age for Exp 1
)

## [1] 12.96633

range(
  exponefinaldata$Age #Range of age for Exp 1
)

## [1] 18 69

Yay! Got the descriptive statistics to match!

N = 294 (males = 126, females = 168)
Mean: 34.29
SD: 12.97
Range: 18-69

Step 4: Plots of descriptive stats for each variable measured in Experiment 1 (recreating Figure 1)

# Change Condition from Chr to Factor to allow grouping in the plot
exponefinaldata$condition <- as.factor(exponefinaldata$condition)

# Contradiction Plot
# Change from Chr to Numberic for all contradiction variables
exponefinaldata$contradiction_1 <- as.numeric(exponefinaldata$contradiction_1)
exponefinaldata$contradiction_2 <- as.numeric(exponefinaldata$contradiction_2)
exponefinaldata$contradiction_3 <- as.numeric(exponefinaldata$contradiction_3)
exponefinaldata$contradiction_4 <- as.numeric(exponefinaldata$contradiction_4)
exponefinaldata$contradiction_5 <- as.numeric(exponefinaldata$contradiction_5)
exponefinaldata$contradiction_6 <- as.numeric(exponefinaldata$contradiction_6)

# Sum contradiction variables into a new variable 'Contradiction'
exponefinaldata <- exponefinaldata %>% 
  rowwise() %>% 
  mutate(
    contradiction = sum(contradiction_1, contradiction_2, contradiction_3, contradiction_4, contradiction_5, contradiction_6)
  )

#Replicate Condition column as Condition2 so that we can separate it into 4 variables
exponefinaldata <- exponefinaldata %>%  
  mutate(
    condition2 = condition
)

#Separate the data in Condition2 into 4 columns
exponefinaldata <- separate(data = exponefinaldata, col = condition2, into = c("block", "number", "format", "conflict"))

#Convert format and conflict to Factors to allow grouping in plot
exponefinaldata$format <- as.factor(exponefinaldata$format)
exponefinaldata$conflict <- as.factor(exponefinaldata$conflict)

#Rename from Conflict/Consistent to Conf./Non-Conf.
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Conflict"] <- "Conf."
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Consistent"] <- "Non-Conf."

#Plot Contradiction
contradiction <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = contradiction
    ) 
  )  +
  ggtitle(
    label = "Contradiction"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL
  ) +
  scale_y_continuous(
    name = "Perceived Contradiction",
    limits = c(0,30)
  ) +
  facet_wrap(
    vars(format),
    strip.position = "bottom"
  ) 

plot(contradiction)

#Plot Confusion
exponefinaldata$confusion <- as.numeric(exponefinaldata$confusion)

confusion <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = confusion
    ) 
  ) +
  ggtitle(
    label = "Confusion"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL
  ) +
  scale_y_continuous(
    name = "Perceived Confusion"
  ) +
  facet_wrap(
    vars(format),
    strip.position = "bottom"
  )

plot(confusion)

#Plot Advancement
exponefinaldata$advancement <- as.numeric(exponefinaldata$advancement)

advancement <- ggplot(
  data = exponefinaldata
)  + 
  geom_violin(
    mapping = aes(
      x = conflict,
      y = advancement
    ) 
  ) +
  ggtitle(
    label = "Advancement" #plot title
  ) +
  theme(
    plot.title = element_text(hjust = 0.5) #center the plot title
  ) +
  scale_x_discrete(
    name = NULL #remove label for x-axis
  ) +
  scale_y_continuous(
    name = "Perceived Scientific Advancement" #Label for y-axis
  ) +
  facet_wrap(
    vars(format), #facet wrap Genetic/Qualified
    strip.position = "bottom"
  )

plot(advancement)

Wins

Coding like caterpillars is FUN - troubleshooting for each other and learning together as we go! (I am really grateful for our group and how organised/committed everyone is!)

Challenges

Figuring out whether we are able to replicate the plot without using the same packages the authors used
Still need to figure out: Add labels ‘Conflict’ and ‘Format’ on X-axis; how to add the Means & CIs onto the plot (is it possible?)

Next Steps in My Coding Journey

Continue working on our Verification report and figuring out the answer to the above
Attempting Jenny’s challenge on Penguins data

Learning Log Week 4

Jia Ni Teo

27/06/2021

My Coding Goals This Week

How I Made Progress Towards My Goals

Start work on our Verification Report

Step 1: Load packages

Step 2: Load data from CSV files

Step 3: Descriptive Statistics - Experiment 1

Yay! Got the descriptive statistics to match!

Step 4: Plots of descriptive stats for each variable measured in Experiment 1 (recreating Figure 1)

Wins

Challenges

Next Steps in My Coding Journey