Here we need to load our packages. We will use tidyverse and dplyr to assist in any data manipulation.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

Read the data in and check the dataset is what we think it is using head().

study_2 <- read.csv(file = "study_2_data.csv")
head(study_2)
##   Participant_ID Gender Age Ethnicity Country T1Extraversion T1SWLS T2SWLS
## 1              1   Male  23     White  Canada       4.750000    4.8    5.0
## 2              2 Female  19  Hispanic  Mexico       3.833333    4.4    3.2
## 3              3 Female  20     White      UK       4.000000    5.8    4.0
## 4              4   Male  19     White     USA       3.916667    5.4    6.0
## 5              5   Male  22     White      UK       4.250000    5.6    5.4
## 6              6 Female  21     White      UK       4.333333    4.6    4.2
##   SWLS_Diff T1Lonely T2Lonely Lonely_Diff   T1BMPN   T2BMPN  BMPN_Diff
## 1       0.2 1.421053 1.473684  0.05263158 5.166667 5.500000  0.3333333
## 2      -1.2 3.157895 2.894737 -0.26315789 3.000000 1.666667 -1.3333333
## 3      -1.8 2.684211 2.736842  0.05263158 5.166667 3.833333 -1.3333333
## 4       0.6 1.421053 2.210526  0.78947368 6.000000 5.333333 -0.6666667
## 5      -0.2 1.421053 1.105263 -0.31578947 6.500000 6.166667 -0.3333333
## 6      -0.4 1.842105 1.842105  0.00000000 5.000000 3.500000 -1.5000000
##   SocialDistancing SixFeet
## 1                1       0
## 2                1       4
## 3                0       0
## 4                1       4
## 5                1       2
## 6                1       0

As we had already created a similar histogram for study 1, we were able to replicate the method/s we used and learnt from previous troubleshooting. Therefore, we will go straight to using the hist() function to develop a plot that looks closest to the one in the paper.

We first use par() to set the graph parameters allowing the plots to appear side by side - adjusting the number of rows and columns i.e. 1 row, with 2 columns and the mgp to change graphical location of the axis title, label and line. We then create the basic histograms using hist() and define the limits of each axis with the minimum and maximum scores. Axis labels are also added.

par(mfrow = c(1, 2), mgp = c(1, 0.1, 0))

# Basic plots

hist(study_2$BMPN_Diff,
     main = "Distribution of Relatedness Difference Scores", 
     xlab = "Relatedness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 80),
     xlim = c(-5.7, 4))

hist(study_2$Lonely_Diff,
     main = "Distribution of Loneliness Scores", 
     xlab = "Loneliness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 50),
     xlim = c(-2, 2))

Stylisation

The x-axis is separated again, so we will use xaxs to connect them. The plots do not require the y-axis to extend beyond the final score but the numbers needs to be horizontal, therefore we will use yaxs and las to modify the histograms. Also, the amount of bins do not match up instantly, therfore, we need to modify the amount of breaks in the histogram.

par(mfrow = c(1, 2), mgp = c(1, 0.1, 0))

hist(study_2$BMPN_Diff,
     main = "Distribution of Relatedness Difference Scores", 
     xlab = "Relatedness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 80),
     xlim = c(-5.7, 4),
     xaxs = "i", 
     yaxs = "i", 
     las = 1,
     breaks = 18)

We will further customise the sizing of the main title, axis labels and ticks within the hist() function as well as make a custom x-axis using axis().

par(mfrow = c(1, 2), mgp = c(1, 0.1, 0))

hist(study_2$BMPN_Diff,
     main = "Distribution of Relatedness Difference Scores", 
     xlab = "Relatedness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 80),
     xlim = c(-5.7, 4),
     xaxs = "i", 
     yaxs = "i", 
     las = 1,
     breaks = 18,
     col = "grey",
     cex.axis = 0.7, # Size of axis labels
     tck = -0.015,  # Make tick marks shorter
     xaxt = 'n',  # Hide default x-axis
     cex.main = 0.8,  # Decrease size of the main title
     cex.lab = 0.7
     )

axis(1, at = seq(-4, 4, by = 2),
     labels = seq(-4, 4, by = 2),
     cex.axis = 0.7,  # Make the axis annotation smaller
     tck = -0.015  # Make tick marks shorter
)

As the relatedness difference scores graph extends from the last labelled x-axis tick, we then use the segments() function to add a line to the plot which visually ensures the x-axis is continued from -5 to -6. CHECK W/ GROUP WHY NOT ABLINE

par(mfrow = c(1, 2), mgp = c(1, 0.1, 0))

hist(study_2$BMPN_Diff,
     main = "Distribution of Relatedness Difference Scores", 
     xlab = "Relatedness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 80),
     xlim = c(-5.7, 4),
     xaxs = "i", 
     yaxs = "i", 
     las = 1,
     breaks = 18,
     col = "grey",
     cex.axis = 0.7, # Size of axis labels
     tck = -0.015,  # Make tick marks shorter
     xaxt = 'n',  # Hide default x-axis
     cex.main = 0.8,  # Decrease size of the main title
     cex.lab = 0.7
     )

axis(1, at = seq(-4, 4, by = 2),
     labels = seq(-4, 4, by = 2),
     cex.axis = 0.7,  # Make the axis annotation smaller
     tck = -0.015  # Make tick marks shorter
)

segments(x0 = -6, y0 = 0, x1 = -5, y1 = 0, col = "black", lwd = 1)
# or
# abline(h = 0, col = "black", lwd = 2)

We apply the same stylisation to the Distribution of Loneliness Scores however, there is no need to replace the x-axis as the data points fall within the range of axis ticks

par(mfrow = c(1, 2), mgp = c(1, 0.1, 0),
    family = "times")
# changed font as it it is different to graph 1

hist(study_2$BMPN_Diff,
     main = "Distribution of Relatedness Difference Scores", 
     xlab = "Relatedness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 80),
     xlim = c(-5.7, 4),
     xaxs = "i", 
     yaxs = "i", 
     las = 1,
     breaks = 18,
     col = "grey",
     cex.axis = 0.7, # Size of axis labels
     tck = -0.015,  # Make tick marks shorter
     xaxt = 'n',  # Hide default x-axis
     cex.main = 0.8,  # Decrease size of the main title
     cex.lab = 0.7
     )

axis(1, at = seq(-4, 4, by = 2),
     labels = seq(-4, 4, by = 2),
     cex.axis = 0.7,  # Make the axis annotation smaller
     tck = -0.015  # Make tick marks shorter
)

segments(x0 = -6, y0 = 0, x1 = -5, y1 = 0, col = "black", lwd = 1)


hist(study_2$Lonely_Diff,
     main = "Distribution of Loneliness Scores", 
     xlab = "Loneliness Difference Score (T2 - T1)",
     ylab = "Frequency",
     ylim = c(0, 50),
     xlim = c(-2, 2),
     xaxs = "i", 
     yaxs = "i", 
     las = 1,
     breaks = 28,
     col = "grey",
     tck = -0.015,  
     cex.axis = 0.7, 
     cex.main = 0.8,  
     cex.lab = 0.7,)