Not a lot of time to spend on R this week, so my goal is to just add comments to all of the code for my reproducability report.
library(tidyverse) # Allows for easy data wrangling with shared language across packages
library(ggplot2) # Allows for detailed data visualisation
library(here) # Allows for construction of a path to project files
library(dplyr) # Allows for simple data manipulation
library(papaja) # Allows for APA formatting of results and tables
library(psych) # Includes "describe" function (M and SD)
library(MOTE) # Effect size calculator
library(wesanderson) # Cool colour palettes for graphs
library(ggpubr) # Allows for quick creation of quantile plots
master <- read.csv("AgeAdvantagesEmotionCovid_Data.csv", header = TRUE)
Here are the demographics we are trying to reproduce:
# Demographics
## Percentage of females
### Filter female participants
n_female <- master %>%
select(gender_bin_f) %>%
filter(gender_bin_f == "Female")
### Calculate percentage of females
100 * nrow(n_female) / nrow(master)
## [1] 49.20635
## Age range, mean and SD
### Use range function to find age range
age_range <- master$age %>% range()
### Use summarise function to find age mean and SD
age_mean_sd <- master %>%
summarise(mean_age = mean(age), sd_age = sd(age))
age_mean_sd; age_range
## mean_age sd_age
## 1 45.15238 16.78756
## [1] 18 76
## Percentage white
### Filter white participants
n_white <- master %>%
select(race_bin) %>%
filter(race_bin == "White")
### Calculate percentage white
100 * nrow(n_white) / nrow(master)
## [1] 75.44974
## Median household income
### Reorganise and rename income answers
dem_income <- master %>%
select(income) %>%
filter(income != "Decline to answer") %>%
separate(income, c("low", "to", "high"), sep = " ") %>%
select(low, high) %>%
mutate(low2 = ifelse(low == "Less", 0,
ifelse(low == "Greater", 275000, low))) %>%
select(-low)
dem_income$high <- gsub('[$]','',dem_income$high)
dem_income$low2 <- gsub('[$]','',dem_income$low2)
dem_income$high <- as.numeric(gsub(',','',dem_income$high))
dem_income$low2 <- as.numeric(gsub(',','',dem_income$low2))
### Calculate median income
dem_income %>%
mutate(midpoint = (high+low2)/2) %>%
summarise(med_income = median(midpoint))
## med_income
## 1 55000
## Percentage working for pay
### Filter participants that work for pay
n_pay <- master %>%
select(emp_bin) %>%
filter(emp_bin == "Working")
### Calculate percentage working for pay
100 * nrow(n_pay) / nrow(master)
## [1] 56.19048
## Percentage educated
### Filter eductaed participants
n_educated <- master %>%
select(educ) %>%
filter(educ != "Graduated high school (or GED)",
educ != "Less than high school")
### Calculate percentage educated
100 * nrow(n_educated) / nrow(master)
## [1] 87.72487
## Percentage living alone
### Filter participants living alone
n_alone <- master %>%
select(livealone) %>%
filter(livealone == "Yes")
### Calculate percentage living alone
100 * nrow(n_alone) / nrow(master)
## [1] 22.96296
Success! All of the demographic statistics were reproduced.
We intend to reproduce the means and SD for all of the following summary statistics:
# Summary Statistics
### Select the appropriate variables from the data set
sum_stat <- master %>%
select(FTP_av, FTP_Ext, FTP_Opp, FTP_Con,
risk_self, risk_comp, risk_pop, emp_affect,
health, Extraversion, Conscientiousness, avg_f_pos,
avg_f_neg, avg_i_pos, avg_i_neg)
### Create a table of summary statistics using the describe function
sum_table <- describe(sum_stat, na.rm = TRUE, interp=FALSE,
skew = FALSE, ranges = FALSE,trim=.1,
type=3,check=FALSE,fast=NULL,quant=NULL,
IQR=FALSE,omit=TRUE,data=NULL) %>%
select(-starts_with("vars"))
print(sum_table)
## n mean sd se
## FTP_av 943 4.09 1.35 0.04
## FTP_Ext 943 4.02 1.91 0.06
## FTP_Opp 943 4.73 1.62 0.05
## FTP_Con 943 3.74 1.76 0.06
## risk_self 945 2.36 1.11 0.04
## risk_comp 945 2.45 1.34 0.04
## risk_pop 945 2.97 0.94 0.03
## emp_affect 945 1.41 1.46 0.05
## health 945 3.36 0.99 0.03
## Extraversion 945 3.44 1.65 0.05
## Conscientiousness 945 5.22 1.33 0.04
## avg_f_pos 945 1.97 0.56 0.02
## avg_f_neg 945 1.42 0.66 0.02
## avg_i_pos 945 1.93 0.59 0.02
## avg_i_neg 942 1.77 0.70 0.02
Success! All of the summary statistics were reproduced.
Here is the table we are trying to reproduce:
# Table 1 - Mean Frequencies of Emotions
### Select the frequencies of each emotion from master data set.
emot <- master %>%
select(f_calm, f_qui, f_app, f_int, f_cont, f_hap, f_rela, f_pea,
f_ener, f_aff, f_amu, f_acc, f_joy, f_pro, f_reli, f_exc,
f_conc, f_anx, f_bor, f_fru, f_irr, f_sad, f_lon, f_fear,
f_ang, f_dis, f_gui, f_emb, f_sha)
### Create a new data frame with the descriptives (M and SD) for the emotions.
emot_des <- emot %>%
summarise(
across(.cols = everything(), na.rm = TRUE, list(M = mean, SD = sd)))
### Rename the emotions to table-friendly names.
emot_des <- emot_des %>%
rename(Calm = f_calm_M, Quiet = f_qui_M, Appreciative = f_app_M,
Interested = f_int_M, Content = f_cont_M, Happy = f_hap_M,
Relaxed = f_rela_M, Peaceful = f_pea_M, Energetic = f_ener_M,
Affectionate = f_aff_M, Amused = f_amu_M, Accomplished = f_acc_M,
Joyful = f_joy_M, Proud = f_pro_M, Relieved = f_reli_M,
Excited = f_exc_M, Concerned = f_conc_M, "Anxious/worried" = f_anx_M,
Bored = f_bor_M, Frustrated = f_fru_M, Irritated = f_irr_M,
Sad = f_sad_M, Lonely = f_lon_M, Fearful = f_fear_M,
Angry = f_ang_M, Disgusted = f_dis_M, Guilty = f_gui_M,
Embarrassed = f_emb_M, Ashamed = f_sha_M)
### Pivot the descriptive twice - one creates two variable with the name and mean of each emotion, the other creates the SD.
emot_pivot1 <- emot_des %>%
select(-starts_with("f_")) %>%
pivot_longer(cols = everything(),
names_to = "Valence and emotion",
values_to = "M")
emot_pivot2 <- emot_des %>%
select(starts_with("f_")) %>%
pivot_longer(cols = everything(),
names_to = NULL,
values_to = "SD")
### Bind them together to create table-friendly descriptive stats.
emot_des_table <- bind_cols(emot_pivot1, emot_pivot2)
### Create lower and upper 95% CI limits
n <- 945; se <- emot_des_table$SD / sqrt(n)
emot_des_table <- emot_des_table %>%
mutate(lower_CI = M - qt(1 - (0.05 / 2), n - 1) * se,
upper_CI = M + qt(1 - (0.05 / 2), n - 1) * se)
### Round CIs to 2 d.p. (can't do this above because of non-numeric variable)
emot_des_table$lower_CI <- format(round(emot_des_table$lower_CI, 2), nsmall = 2)
emot_des_table$upper_CI <- format(round(emot_des_table$upper_CI, 2), nsmall = 2)
### Combine the CIs into one column
emot_des_table <- emot_des_table %>%
unite(col = "95% CI",
lower_CI:upper_CI,
sep = ", ") %>%
mutate("95% CI" = paste0("[", `95% CI`, "]"))
### Split the data into positive/negative emotions
pos_emot_table <- emot_des_table %>%
slice(1:16)
neg_emot_table <- emot_des_table %>%
slice(17:29)
### Apply the apa_table function (NOTE: Only produces correct output in APA formatted markdown template which comes with the papaja package)
apa_table(list('Positive emotions' = pos_emot_table,
'Negative emotions' = neg_emot_table),
caption = "Mean Frequencies of Emotions",
note = "N = 945. CI = confidence interval",
align = c('l', 'c', 'c', 'c'),
font_size = "scriptsize")
| Valence and emotion | M | SD | 95% CI |
|---|---|---|---|
| Positive emotions | |||
| Calm | 2.44 | 0.87 | [2.39, 2.50] |
| Quiet | 2.43 | 0.87 | [2.38, 2.49] |
| Appreciative | 2.40 | 0.93 | [2.35, 2.46] |
| Interested | 2.28 | 0.83 | [2.23, 2.33] |
| Content | 2.15 | 0.94 | [2.09, 2.21] |
| Happy | 2.13 | 0.80 | [2.08, 2.19] |
| Relaxed | 2.13 | 0.89 | [2.07, 2.19] |
| Peaceful | 2.05 | 0.95 | [1.99, 2.11] |
| Energetic | 1.90 | 0.80 | [1.85, 1.95] |
| Affectionate | 1.89 | 0.86 | [1.83, 1.94] |
| Amused | 1.87 | 0.72 | [1.83, 1.92] |
| Accomplished | 1.84 | 0.87 | [1.78, 1.89] |
| Joyful | 1.71 | 0.90 | [1.65, 1.76] |
| Proud | 1.67 | 0.97 | [1.61, 1.73] |
| Relieved | 1.48 | 0.88 | [1.42, 1.53] |
| Excited | 1.46 | 0.79 | [1.41, 1.51] |
| Negative emotions | |||
| Concerned | 2.23 | 0.91 | [2.17, 2.29] |
| Anxious/worried | 2.00 | 1.06 | [1.94, 2.07] |
| Bored | 1.88 | 1.12 | [1.80, 1.95] |
| Frustrated | 1.85 | 0.93 | [1.79, 1.90] |
| Irritated | 1.75 | 0.89 | [1.70, 1.81] |
| Sad | 1.69 | 0.97 | [1.63, 1.75] |
| Lonely | 1.55 | 1.25 | [1.47, 1.63] |
| Fearful | 1.38 | 1.06 | [1.31, 1.44] |
| Angry | 1.35 | 0.89 | [1.29, 1.40] |
| Disgusted | 1.16 | 0.99 | [1.10, 1.22] |
| Guilty | 0.63 | 0.87 | [0.58, 0.69] |
| Embarrassed | 0.51 | 0.76 | [0.47, 0.56] |
| Ashamed | 0.44 | 0.76 | [0.40, 0.49] |
Note. N = 945. CI = confidence interval
Success! Table 1 was reproduced.
Here are the scatterplots we are trying to reproduce:
### Select relevant variables, omit missing data, reorganise data into long form, create new variables for intensity and valence.
plot_data <- master %>%
select(age, avg_i_pos, avg_i_neg, avg_f_pos, avg_f_neg) %>%
na.omit() %>%
pivot_longer(cols = -age,
names_to = "fivalence",
values_to = "means") %>%
separate(fivalence, c("average", "freqint", "valence", sep = "_")) %>%
select(age, freqint, valence, means) %>%
mutate(freqint2 = ifelse(freqint == "i", "Intensity of \n Emotional Experience",
ifelse(freqint == "f", "Frequency of \n Emotional Experience", freqint))) %>%
mutate(valence2 = ifelse(valence == "pos", "Positive Emotions",
ifelse(valence == "neg", "Negative Emotions", valence))) %>%
select(valence2, age, freqint2, means)
### Create scatter plots with ggplot and geom_point. geom_smooth adds regression lines and facet_grid arranges the four plots together. Rest of code makes aesthetic changes.
scatter_plot <- ggplot(plot_data, aes(x = age, y = means)) +
geom_point(alpha = 1/13, size = 2) +
coord_cartesian(ylim = c(0,2.5)) +
theme_bw() +
theme(axis.text = element_text(family="Arial Narrow", size = 12)) +
theme (panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(panel.background = element_rect(colour = "black", size = 1/40),
strip.text.x = element_text(family="Arial Narrow", size = 14, face = "bold"),
strip.text.y = element_text(family="Arial Narrow", size = 13, face = "bold"),
strip.placement = "outside",
strip.background = element_blank(),
axis.title.x = element_text(family="Arial Narrow", size = 14, face = "bold")) +
geom_smooth(method = "lm", colour = "black", size = 1.5) +
facet_grid(freqint2 ~ valence2, switch = "y") +
xlab("Age (Years)") +
ylab("")
print(scatter_plot)
No real challenges this week. The comments were easy to add and make my code a lot more understandable I hope. I didn’t have a lot of time this week so perhaps the comments are not as detailed as they could be.
Not sure what my next step will be. Now that my verification report is essentially finished, I will have to find some interesting things to do in RStudio. Unfortunately, I can’t attend Jenny’s Q&As this week, but perhaps I could read her Rpubs docs and try some new things.