knitr::opts_chunk$set(message = FALSE, warning = FALSE)

What were the goals for this week?

The goal for this week were to finish figure 2 and then get started on helping Angelina with figure 1.

How did I achieve these goals?

When I last left off, the only things left to do were the error bars for the 3rd plot, the SE for religious condition in the first 2 plots, and minor formatting. I first got started on the error bars.

The first thing I needed to do was to figure out how to get the CIs for each condition based on whether the participant was religious affiliated or non-affiliated. JennyS helped me out here by posting a formula by which I could find the CIs by using a grouped table.

library(tidyverse)
library(janitor)
library(gridExtra)
library(gmodels)


data1 <- read.csv("Nichols_et_al_data.csv")
dataA <- data1 %>%                                                          
  filter(include == 0) %>% 
  
  rename(cond = con,
         claimpercent = claim,
         claimmoney = moneyclaim,
         CT_practice = completion.time..practice.included.,
         CT_payments = completion.time..payments.only.,
         religiosity = relig,
         religion = Religion) %>% 
  
  mutate(religiosity = abs(religiosity - 5),
         ritual = abs(ritual - 7),              # reverse coding
         claimpercent = claimpercent * 100,     #turning this into a percentage value
         affil = as.factor(affil_cong)) %>%      
         
  
  select(cond:ritual, -claimmoney, -sex, -age, -Religion.Text, -religion, -starts_with("CT")) %>% # selecting only the columns required
  
  as_data_frame() 

# Re-order conditions to: religous, secular, noise, and control

dataA$cond[dataA$cond==4] <- 0 # Make religious prime the reference category
dataA$cond[dataA$cond==1] <- 4 # This is in a weird order as R reads the code line by line, so if we go from top to bottom, 
dataA$cond[dataA$cond==3] <- 1 # we're changing the number twice which screws up our dataframe
dataA$cond[dataA$cond==4] <- 3


# treatment variable
dataA$cond <- factor(dataA$cond, levels= c(0,1,2,3),
                labels = c("Religious", "Secular", "Noise","Control"))

  
#Make a new dataframe that contains CI limits for all conditions based on whether participants are religiously affiliated or not

groupA <- dataA %>% 
  group_by(cond, affil) %>% 
  summarise(mean = mean(claimpercent, na.rm = TRUE),
            sd = sd(claimpercent, na.rm = TRUE),
            n = n()) %>% 
  drop_na() %>% 
  mutate(se = sd / sqrt(n),
         lowerCI = mean - qt(1 - (0.05/2), n - 1) * se,
         upperCI = mean + qt(1 - (0.05/2), n - 1) * se) %>% 
  ungroup()
  
groupA
## # A tibble: 8 x 8
##   cond      affil  mean    sd     n    se lowerCI upperCI
##   <fct>     <fct> <dbl> <dbl> <int> <dbl>   <dbl>   <dbl>
## 1 Religious 0      31.2  32.2    79  3.62   24.0     38.4
## 2 Religious 1      14.0  15.2    18  3.57    6.49    21.6
## 3 Secular   0      30.1  31.9    74  3.70   22.7     37.4
## 4 Secular   1      33.3  29.9    26  5.86   21.2     45.4
## 5 Noise     0      30.0  31.1    82  3.43   23.2     36.9
## 6 Noise     1      27.9  32.3    20  7.23   12.8     43.1
## 7 Control   0      21.1  23.7    79  2.66   15.8     26.4
## 8 Control   1      28.5  24.7    20  5.53   16.9     40.1

Now that we have our table of values including the means of each condition depending on levels of affiliation, as well as upper and lower CI limits, we can get started on our graph. I used geom_error() in order to plot the error bars, and geom_line() to plot the lines.

# labelling the levels of religious affiliation 

groupA$affil <- factor(groupA$affil, levels = c(0, 1), 
                       labels = c("Non-affiliated", "Affiliated"))


# plotting the figure

figA <- ggplot(dataA, aes(religiosity, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE) + #method = "lm" creates a straight line of best fit
  theme_light() + #Gives white background to plot
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + #Removes gridlines from plot
  coord_cartesian(ylim = c(0, 50)) + #Sets y limit to 50
  labs(x = "Religiosity", y = "Percentage claimed") + #axis labels 
  theme(legend.position = "none") +  #removes legend
  facet_grid(. ~ "Condition*Religiosity") # title, we use facet_grid to put it in a grey box

figB <- ggplot(dataA, aes(ritual, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0, 50)) +
  labs(x = "Ritual frequency", y = "Percentage claimed") +
  theme(legend.position = "none") +
  facet_grid(. ~ "Condition*Ritual frequency")

figC <- ggplot(groupA, aes(affil, mean, color = cond)) +
  geom_line(aes(group = cond)) + #group our lines by condition
  geom_errorbar(data = groupA, aes(ymin = lowerCI, ymax = upperCI), width = 0.2) +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ylim(0, 50) +
  scale_x_discrete() +
  labs(x = "Religious affiliation", y = "Percentage claimed") +
  facet_grid(. ~ "Condition*Religious affiliation")

grid.arrange(figA, figB, figC, ncol = 3)

The last thing to figure out with plot 3 was how to avoid all the points from stacking on each other. Using position = “dodge” helped solve this issue.

figC <- ggplot(groupA, aes(affil, mean, color = cond)) +
  geom_line(aes(group = cond), position = position_dodge(width = 0.5)) + #group our lines by condition, we use position dodge so nothing overlaps
  geom_errorbar(data = groupA, aes(ymin = lowerCI, ymax = upperCI), width = 0.2, position = position_dodge(width = 0.5)) +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ylim(0, 50) +
  scale_x_discrete() +
  labs(x = "Religious affiliation", y = "Percentage claimed") +
  facet_grid(. ~ "Condition*Religious affiliation")

grid.arrange(figA, figB, figC, ncol = 3)

Now, the next thing to do was to fix up the colours as well as the x axis tick marks for figure 2.

figA <- ggplot(dataA, aes(religiosity, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE, size = 1.5) + #method = "lm" creates a straight line of best fit
  theme_light() + #Gives white background to plot
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + #Removes gridlines from plot
  coord_cartesian(ylim = c(0, 50)) + #Sets y limit to 50
  labs(x = "Religiosity", y = "Percentage claimed") + #axis labels 
  theme(legend.position = "none") +  #removes legend
  facet_grid(. ~ "Condition*Religiosity") + # title, we use facet_grid to put it in a grey box
  scale_color_manual(values = c("#dea520", "#5ab3e4", "#094689", "#a4a4a4")) #change the colours of the lines to fit source

figB <- ggplot(dataA, aes(ritual, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE, size = 1.5) +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0, 50)) +
  labs(x = "Ritual frequency", y = "Percentage claimed") +
  theme(legend.position = "none") +
  facet_grid(. ~ "Condition*Ritual frequency") +
  scale_x_continuous(breaks = seq(0, 6, 1)) +  #adjusting the tick marks so 7 marks appear
  scale_color_manual(values = c("#dea520", "#5ab3e4", "#094689", "#a4a4a4"))

figC <- ggplot(groupA, aes(affil, mean, color = cond)) +
  geom_line(aes(group = cond), position = position_dodge(width = 0.5), size = 1.5) + #group our lines by condition, we use position dodge so nothing overlaps
  geom_errorbar(data = groupA, aes(ymin = lowerCI, ymax = upperCI), width = 0.2, position = position_dodge(width = 0.5), size = 1.3) +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ylim(0, 50) +
  scale_x_discrete() +
  labs(x = "Religious affiliation", y = "Percentage claimed") +
  facet_grid(. ~ "Condition*Religious affiliation") +
  scale_color_manual(values = c("#dea520", "#5ab3e4", "#094689", "#a4a4a4"))


grid.arrange(figA, figB, figC, ncol = 3)

This was done via the scale_color_manual() function, and then using an eyedropper tool to figure out the color values that the authors had used in their original plots. I also used the size argument to make the lines thicker and easier to read. Now the last step was figuring out how to have the SE appear for the religious condition alone in the first 2 plots.

I did this by making a new table, which copied the original table, selecting for only participants in the religious condition. Then, I added another geom_smooth() with the SE argument set to true. Finally, I used the fill argument to set the colour to be identical to the paper.

dataB <- dataA %>% 
  filter(cond == "Religious")

figA <- ggplot(dataA, aes(religiosity, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE, size = 1.5) + #method = "lm" creates a straight line of best fit
  geom_smooth(data = dataB, size = 0, se = TRUE, method = "lm", fill = "#fbf1d8") + #this creates the SE for the religious condition
  theme_light() + #Gives white background to plot
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + #Removes gridlines from plot
  coord_cartesian(ylim = c(0, 50)) + #Sets y limit to 50
  labs(x = "Religiosity", y = "Percentage claimed") + #axis labels 
  theme(legend.position = "none") +  #removes legend
  facet_grid(. ~ "Condition*Religiosity") + # title, we use facet_grid to put it in a grey box
  scale_color_manual(values = c("#dea520", "#5ab3e4", "#094689", "#a4a4a4")) #change the colours of the lines to fit source

figB <- ggplot(dataA, aes(ritual, claimpercent, color = cond)) +
  geom_smooth(method = "lm", se = FALSE, size = 1.5) +
  geom_smooth(data = dataB, size = 0, se = TRUE, method = "lm", fill = "#fbf1d8") +
  theme_light() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  coord_cartesian(ylim = c(0, 50)) +
  labs(x = "Ritual frequency", y = "Percentage claimed") +
  theme(legend.position = "none") +
  facet_grid(. ~ "Condition*Ritual frequency") +
  scale_x_continuous(breaks = seq(0, 6, 1)) +  #adjusting the tick marks so 7 marks appear
  scale_color_manual(values = c("#dea520", "#5ab3e4", "#094689", "#a4a4a4"))

grid.arrange(figA, figB, figC, ncol = 3)

After this, I started work on figure 1 with Angelina. When I first started working on it, this is where the figure was at.

library(tidyverse)
library(janitor)
library(dplyr)
library(ggplot2)
library(ggdist) #used for first raincloud plot
library(data.table)

data1 <- read_csv("Nichols_et_al_data.csv")

data1 <- data1 %>%  
  filter(include == 0) %>% 
  rename(cond = con,
         claimpercent = claim,
         claimmoney = moneyclaim,
         CT_practice = `completion time (practice included)`,
         CT_payments = `completion time (payments only)`,
         religiosity = relig,
         religion = Religion) %>% 
  select(site, claimpercent, cond)

data1$cond[data1$cond==4] <- 0 # Make religious prime the reference category
data1$cond[data1$cond==1] <- 4 # This is in a weird order as R reads the code line by line, so if we go from top to bottom, 
data1$cond[data1$cond==3] <- 1 # we're changing the number twice which screws up our dataframe
data1$cond[data1$cond==4] <- 3


fig1_condition_rp <- ggplot(data1, aes(x = cond, y = claimpercent)) +
  geom_col(
    aes(x = as.numeric(cond)+.5),
    width = .3
  ) +
  ggdist::stat_halfeye(
    adjust = .5,
    width = .5,
    .width = 0,
    point_colour = NA
  ) +
  coord_flip()


plot(fig1_condition_rp)

My first thought was that the halfeye plot was not scaling correctly. However, after some consideration, I realized that it was actually the column graph that did not make sense. As claimpercent had not been converted to a percentage value yet and was still a decimal value, the column graph values should not be > 1. After reading some documentation online, I realized that the column graph formula was additive, meaning that it was adding all of the values of claimpercent up, rather than plotting the mean. In order to solve this, I theorized that I could create a new variable that was a copy of claimpercent, and then divide the value of that value by the number of participants in the condition that the value was referring to. That way, when the column graph added them up, it would end up scaling properly. I first attempted to use the data.table package in order to use more advanced conditional arguments to alter the values.

data2 <- data1

data1 <- data1 %>% 
  mutate(claimpercent = claimpercent * 100,
         claimpercent2 = claimpercent) %>% 
  as_tibble()


setDT(data1)

data1[cond == 0, claimpercent2 := claimpercent2 / 100 ]

This current code would make a data table out of data1, then, if the condition was control, it would take the claimpercent2 value and set it to be equal to claimpercent2 / 100 (the number of participants in the control condition). However, this had an unexpected effect.

data1
##      site claimpercent cond claimpercent2
##   1:    1    0.0000000    0     0.0000000
##   2:    1   10.8333333    1    10.8333333
##   3:    1   19.1666667    2    19.1666667
##   4:    1   71.6666667    3    71.6666667
##   5:    1    0.6583333    0     0.6583333
##  ---                                     
## 404:    3    0.4833333    0     0.4833333
## 405:    3    0.8916667    0     0.8916667
## 406:    3    8.3333333    1     8.3333333
## 407:    3   14.1666667    2    14.1666667
## 408:    3   10.0000000    3    10.0000000

Both claimpercent and claimpercent2 were being replaced with the new value. My theory was that because we made claimpercent2 a copy of claimpercent, any changes applied to claimpercent2 would also apply to claimpercent. I hopped on a call with the rest of my team to work this out and Sam posed a solution by which we would create a new column that would contain the number of participants in that condition. Then, we could divide claimpercent2 by this new variable. By using conditional statements, I was able to make this work.

data2 <- data2 %>% 
  mutate(claimpercent = claimpercent * 100,
         numberOf = (cond == 0) * 100 + (cond == 1 | cond == 2) * 103 + (cond == 3) * 102,
         claimpercent2 = claimpercent / numberOf)

fig1_condition_rp <- ggplot(data2, aes(x = cond)) +
  geom_col(
    aes(x = as.numeric(cond)+.5, y = claimpercent2),
    width = .3
  ) +
  ggdist::stat_halfeye(
    aes(y = claimpercent),
    adjust = .5,
    width = .5,
    .width = 0,
    point_colour = NA
  ) +
  coord_flip()

plot(fig1_condition_rp)

After working all of this out, we found out that there was an argument for geom_col() that changed the formula so that it would graph the means rather than adding all the values up. The value of hindsight!

What are the goals for next week?

The goals for next week are to finish off figure 1. As we solved the biggest issue we were facing so far already, and the second plot in figure 1 is nearly identical to the first plot, I do not foresee too much issue. I will also get started on producing the rmd file for figure 2 which we will use for our report.