Goals

Not a lot of time to spend on R this week, so my goal is to just add comments to all of the code for my reproducability report.

Progress

Setup

library(tidyverse) # Allows for easy data wrangling with shared language across packages
library(ggplot2) # Allows for detailed data visualisation
library(here) # Allows for construction of a path to project files
library(dplyr) # Allows for simple data manipulation
library(papaja) # Allows for APA formatting of results and tables
library(psych) # Includes "describe" function (M and SD)
library(MOTE) # Effect size calculator
library(wesanderson) # Cool colour palettes for graphs
library(ggpubr) # Allows for quick creation of quantile plots


master <- read.csv("AgeAdvantagesEmotionCovid_Data.csv", header = TRUE)

Demographics

Here are the demographics we are trying to reproduce:

# Demographics

## Percentage of females

### Filter female participants
n_female <- master %>%  
  select(gender_bin_f) %>% 
  filter(gender_bin_f == "Female")  

### Calculate percentage of females
100 * nrow(n_female) / nrow(master) 
## [1] 49.20635
## Age range, mean and SD

### Use range function to find age range
age_range <- master$age %>% range() 

### Use summarise function to find age mean and SD
age_mean_sd <- master %>%
  summarise(mean_age = mean(age), sd_age = sd(age)) 

age_mean_sd; age_range
##   mean_age   sd_age
## 1 45.15238 16.78756
## [1] 18 76
## Percentage white

### Filter white participants
n_white <- master %>%
  select(race_bin) %>% 
  filter(race_bin == "White")

### Calculate percentage white
100 * nrow(n_white) / nrow(master)
## [1] 75.44974
## Median household income

### Reorganise and rename income answers
dem_income <- master %>%
  select(income) %>% 
  filter(income != "Decline to answer") %>% 
  separate(income, c("low", "to", "high"), sep = " ") %>%
  select(low, high) %>%
  mutate(low2 = ifelse(low == "Less", 0,
                       ifelse(low == "Greater", 275000, low))) %>% 
  select(-low)

dem_income$high <- gsub('[$]','',dem_income$high) 

dem_income$low2 <- gsub('[$]','',dem_income$low2)

dem_income$high <- as.numeric(gsub(',','',dem_income$high))

dem_income$low2 <- as.numeric(gsub(',','',dem_income$low2))

### Calculate median income
dem_income %>%
  mutate(midpoint = (high+low2)/2) %>%
  summarise(med_income = median(midpoint))
##   med_income
## 1      55000
## Percentage working for pay

### Filter participants that work for pay
n_pay <- master %>%
  select(emp_bin) %>% 
  filter(emp_bin == "Working")

### Calculate percentage working for pay
100 * nrow(n_pay) / nrow(master)
## [1] 56.19048
## Percentage educated

### Filter eductaed participants
n_educated <- master %>%
  select(educ) %>% 
  filter(educ != "Graduated high school (or GED)", 
         educ != "Less than high school")

### Calculate percentage educated
100 * nrow(n_educated) / nrow(master)
## [1] 87.72487
## Percentage living alone

### Filter participants living alone
n_alone <- master %>% 
  select(livealone) %>% 
  filter(livealone == "Yes")

### Calculate percentage living alone
100 * nrow(n_alone) / nrow(master)
## [1] 22.96296

Success! All of the demographic statistics were reproduced.

Summary Stats

We intend to reproduce the means and SD for all of the following summary statistics:

# Summary Statistics

### Select the appropriate variables from the data set
sum_stat <- master %>% 
  select(FTP_av, FTP_Ext, FTP_Opp, FTP_Con, 
         risk_self, risk_comp, risk_pop, emp_affect,
         health, Extraversion, Conscientiousness, avg_f_pos,
         avg_f_neg, avg_i_pos, avg_i_neg) 

### Create a table of summary statistics using the describe function
sum_table <- describe(sum_stat, na.rm = TRUE, interp=FALSE,
         skew = FALSE, ranges = FALSE,trim=.1,
         type=3,check=FALSE,fast=NULL,quant=NULL,
         IQR=FALSE,omit=TRUE,data=NULL) %>%
  select(-starts_with("vars"))

print(sum_table)
##                     n mean   sd   se
## FTP_av            943 4.09 1.35 0.04
## FTP_Ext           943 4.02 1.91 0.06
## FTP_Opp           943 4.73 1.62 0.05
## FTP_Con           943 3.74 1.76 0.06
## risk_self         945 2.36 1.11 0.04
## risk_comp         945 2.45 1.34 0.04
## risk_pop          945 2.97 0.94 0.03
## emp_affect        945 1.41 1.46 0.05
## health            945 3.36 0.99 0.03
## Extraversion      945 3.44 1.65 0.05
## Conscientiousness 945 5.22 1.33 0.04
## avg_f_pos         945 1.97 0.56 0.02
## avg_f_neg         945 1.42 0.66 0.02
## avg_i_pos         945 1.93 0.59 0.02
## avg_i_neg         942 1.77 0.70 0.02

Success! All of the summary statistics were reproduced.

Table 1

Here is the table we are trying to reproduce:

# Table 1 - Mean Frequencies of Emotions

### Select the frequencies of each emotion from master data set.
emot <- master %>%
  select(f_calm, f_qui, f_app, f_int, f_cont, f_hap, f_rela, f_pea,
         f_ener, f_aff, f_amu, f_acc, f_joy, f_pro, f_reli, f_exc,
         f_conc, f_anx, f_bor, f_fru, f_irr, f_sad, f_lon, f_fear,
         f_ang, f_dis, f_gui, f_emb, f_sha)

### Create a new data frame with the descriptives (M and SD) for the emotions.
emot_des <- emot %>%
  summarise(
    across(.cols = everything(), na.rm = TRUE, list(M = mean, SD = sd)))

### Rename the emotions to table-friendly names.
emot_des <- emot_des %>%
  rename(Calm = f_calm_M, Quiet = f_qui_M, Appreciative = f_app_M,
         Interested = f_int_M, Content = f_cont_M, Happy = f_hap_M,
         Relaxed = f_rela_M, Peaceful = f_pea_M, Energetic = f_ener_M,
         Affectionate = f_aff_M, Amused = f_amu_M, Accomplished = f_acc_M,
         Joyful = f_joy_M, Proud = f_pro_M, Relieved = f_reli_M,
         Excited = f_exc_M, Concerned = f_conc_M, "Anxious/worried" = f_anx_M,
         Bored = f_bor_M, Frustrated = f_fru_M, Irritated = f_irr_M,
         Sad = f_sad_M, Lonely = f_lon_M, Fearful = f_fear_M,
         Angry = f_ang_M, Disgusted = f_dis_M, Guilty = f_gui_M,
         Embarrassed = f_emb_M, Ashamed = f_sha_M)

### Pivot the descriptive twice - one creates two variable with the name and mean of each emotion, the other creates the SD.
emot_pivot1 <- emot_des %>%
  select(-starts_with("f_")) %>%
  pivot_longer(cols = everything(), 
               names_to = "Valence and emotion",
               values_to = "M")

emot_pivot2 <- emot_des %>%
  select(starts_with("f_")) %>%
  pivot_longer(cols = everything(),
               names_to = NULL,
               values_to = "SD")

### Bind them together to create table-friendly descriptive stats.
emot_des_table <- bind_cols(emot_pivot1, emot_pivot2)

### Create lower and upper 95% CI limits
n <- 945; se <- emot_des_table$SD / sqrt(n)
emot_des_table <- emot_des_table %>%
  mutate(lower_CI = M - qt(1 - (0.05 / 2), n - 1) * se,
         upper_CI = M + qt(1 - (0.05 / 2), n - 1) * se)

### Round CIs to 2 d.p. (can't do this above because of non-numeric variable)
emot_des_table$lower_CI <- format(round(emot_des_table$lower_CI, 2), nsmall = 2)
emot_des_table$upper_CI <- format(round(emot_des_table$upper_CI, 2), nsmall = 2)

### Combine the CIs into one column
emot_des_table <- emot_des_table %>%
  unite(col = "95% CI", 
        lower_CI:upper_CI,
        sep = ", ") %>%
  mutate("95% CI" = paste0("[", `95% CI`, "]"))

### Split the data into positive/negative emotions
pos_emot_table <- emot_des_table %>%
  slice(1:16)

neg_emot_table <- emot_des_table %>%
  slice(17:29)

### Apply the apa_table function (NOTE: Only produces correct output in APA formatted markdown template which comes with the papaja package)
apa_table(list('Positive emotions' = pos_emot_table,
               'Negative emotions' = neg_emot_table), 
          caption = "Mean Frequencies of Emotions",
          note = "N = 945. CI = confidence interval",
          align = c('l', 'c', 'c', 'c'),
          font_size = "scriptsize")
(#tab:Table 1)
Mean Frequencies of Emotions
Valence and emotion M SD 95% CI
Positive emotions
   Calm 2.44 0.87 [2.39, 2.50]
   Quiet 2.43 0.87 [2.38, 2.49]
   Appreciative 2.40 0.93 [2.35, 2.46]
   Interested 2.28 0.83 [2.23, 2.33]
   Content 2.15 0.94 [2.09, 2.21]
   Happy 2.13 0.80 [2.08, 2.19]
   Relaxed 2.13 0.89 [2.07, 2.19]
   Peaceful 2.05 0.95 [1.99, 2.11]
   Energetic 1.90 0.80 [1.85, 1.95]
   Affectionate 1.89 0.86 [1.83, 1.94]
   Amused 1.87 0.72 [1.83, 1.92]
   Accomplished 1.84 0.87 [1.78, 1.89]
   Joyful 1.71 0.90 [1.65, 1.76]
   Proud 1.67 0.97 [1.61, 1.73]
   Relieved 1.48 0.88 [1.42, 1.53]
   Excited 1.46 0.79 [1.41, 1.51]
Negative emotions
   Concerned 2.23 0.91 [2.17, 2.29]
   Anxious/worried 2.00 1.06 [1.94, 2.07]
   Bored 1.88 1.12 [1.80, 1.95]
   Frustrated 1.85 0.93 [1.79, 1.90]
   Irritated 1.75 0.89 [1.70, 1.81]
   Sad 1.69 0.97 [1.63, 1.75]
   Lonely 1.55 1.25 [1.47, 1.63]
   Fearful 1.38 1.06 [1.31, 1.44]
   Angry 1.35 0.89 [1.29, 1.40]
   Disgusted 1.16 0.99 [1.10, 1.22]
   Guilty 0.63 0.87 [0.58, 0.69]
   Embarrassed 0.51 0.76 [0.47, 0.56]
   Ashamed 0.44 0.76 [0.40, 0.49]

Note. N = 945. CI = confidence interval

 

Success! Table 1 was reproduced.

Scatterplots

Here are the scatterplots we are trying to reproduce:

### Select relevant variables, omit missing data, reorganise data into long form, create new variables for intensity and valence.
plot_data <- master %>% 
  select(age, avg_i_pos, avg_i_neg, avg_f_pos, avg_f_neg) %>% 
  na.omit() %>% 
  pivot_longer(cols = -age,
               names_to = "fivalence",
               values_to = "means") %>% 
  separate(fivalence, c("average", "freqint", "valence", sep = "_")) %>% 
  select(age, freqint, valence, means) %>% 
  mutate(freqint2 = ifelse(freqint == "i", "Intensity of \n Emotional Experience",
                           ifelse(freqint == "f", "Frequency of \n Emotional Experience", freqint))) %>% 
  mutate(valence2 = ifelse(valence == "pos", "Positive Emotions", 
                           ifelse(valence == "neg", "Negative Emotions", valence))) %>% 
  select(valence2, age, freqint2, means)

### Create scatter plots with ggplot and geom_point. geom_smooth adds regression lines and facet_grid arranges the four plots together. Rest of code makes aesthetic changes.
scatter_plot <- ggplot(plot_data, aes(x = age, y = means)) +
  geom_point(alpha = 1/13, size = 2) +
  coord_cartesian(ylim = c(0,2.5)) +
  theme_bw() +
  theme(axis.text = element_text(family="Arial Narrow", size = 12)) +
  theme (panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(panel.background = element_rect(colour = "black", size = 1/40),
        strip.text.x = element_text(family="Arial Narrow", size = 14, face = "bold"),
        strip.text.y = element_text(family="Arial Narrow", size = 13, face = "bold"),
        strip.placement = "outside",
        strip.background = element_blank(),
        axis.title.x = element_text(family="Arial Narrow", size = 14, face = "bold")) +
  geom_smooth(method = "lm", colour = "black", size = 1.5) +
  facet_grid(freqint2 ~ valence2, switch = "y") +
  xlab("Age (Years)") +
  ylab("")

print(scatter_plot)

Challenges and Successes

No real challenges this week. The comments were easy to add and make my code a lot more understandable I hope. I didn’t have a lot of time this week so perhaps the comments are not as detailed as they could be.

Next Step

Not sure what my next step will be. Now that my verification report is essentially finished, I will have to find some interesting things to do in RStudio. Unfortunately, I can’t attend Jenny’s Q&As this week, but perhaps I could read her Rpubs docs and try some new things.