The echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Note: CODE SCRIPT ONLY! Please refer to the docs file for the actual assignment content.

Part 2: Verification (35%)

Open Data

Load packages

We loaded the packages needed to reproduce results mentioned in our verification goals.

library(tidyverse) # general data wrangling
library(psych)     # to use describeBy() function
library(flextable) # to format correlation table
library(gridExtra) # to combine two plots into one frame

Read Data

We uploaded and read the open raw data files provided by the authors.

Data1 <- read.csv("Study 1 data.csv")
Data2 <- read.csv("Study 2.csv")

Study 1

Demographic Descriptives

Number of participants (467 participants)
count(Data1)
Data1 %>% 
  group_by(Participant = "Participant.ID") %>% 
  count()
Age mean and SD (M = 20.89, SD = 3.03)
# Filter out no responses (decline to answer)
Data1_Age <- Data1 %>% 
  filter (Age != "[Decline to Answer]")

# Change the age frame from character to numeric
Data1_Age$Age <- as.numeric(Data1_Age$Age)

# Calculate mean and SD
Data1_Age %>% 
  summarise(M = mean(Age),
            SD = sd(Age))
Gender proportion (77% female)
## Filter out the no responses (decline to answer)
Data1_Gender <- Data1 %>% 
  filter (Gender != "[Decline to Answer]") 

## Calculate percentage of woman
Data1_Gender %>% 
  group_by(Gender) %>% 
  filter(Gender == 'Woman') %>% 
  summarise(Percentage = n()/nrow(Data1_Gender)*100)
Proportion of participants who practiced social distancing (98.5%)
Contact within 6 feet with people outside their household (Mode = 0, M = 0.77, SD = 1.39)
# Create new function to calculate mode
find_mode <- function(x) {
  u <- unique(x)
  tab <- tabulate(match(x, u))
  u[tab == max(tab)]}
  
# Calculate mode, mean and SD
Data1 %>% 
  group_by("SixFeet") %>% 
  summarise(Mode = find_mode(SixFeet),
            M = mean(SixFeet), 
            SD = sd(SixFeet))

In-text Mean and SD

Has social connection changed as a result of the COVID-19 pandemic?

Social Connectedness at Time 1 (M = 4.11, SD = 0.88)
Social Connectedness at Time 2 (M = 3.97, SD = 0.85)

Has social connection changed more for extraverts or introverts?

We needed to create a new data frame including only participants who met the most introverted and most extraverted cut-off scores.

quantile(Data1$EXTRAVERSION) # to find quartiles' cut-off scores

Data1_Extraversion <- Data1 %>% 
  mutate(Quartile = case_when(
    EXTRAVERSION >= 4.83333 ~ 'Most Extraverted',
    EXTRAVERSION <= 3.41667 ~ 'Most Introverted')) %>%
  filter(Quartile == 'Most Extraverted' | Quartile == 'Most Introverted') 
Social Connectedness based on Extraversion

Most Introverted at T1 (M = 3.45, SD = 0.70) and at T2 (M = 3.35, SD = 0.66). Most Extraverted at T1 (M = 4.70, SD = 0.72) and at T2 (M = 4.45, SD = 0.84).

Is the effect of change in social connection on well- being different for extraverts and introverts?

Lethargy at Time 1 (M = 2.60, SD = 1.16)
Data1 %>%                              # Base data frame
  group_by("T1 Lethargy") %>%          # Self-explanatory output
  summarise(M = mean(LETHAVERAGE.T1),  # Calculate mean
            SD = sd(LETHAVERAGE.T1))   # Calculate SD
Lethargy at Time 2 (M = 3.16, SD = 1.27)
Data1 %>%                              # Base data frame
  group_by("T2 Lethargy") %>%          # Self-explanatory output
  summarise(M = mean(LETHAVERAGE.T2),  # Calculate mean
            SD = sd(LETHAVERAGE.T2))   # Calculate SD

Tables and Figures

Table 1
# Create a new data frame for correlation 
Corr1 <- Data1 %>% 
  select(LETHAVERAGE.T1:EXTRAVERSION) %>%   # Select variables needed
  cor() %>%                                 # Calculate corr. value between each variable
  round(digits=2) %>%                       # Round corr. value of to 2 dp
  str_remove("^0+") %>%                     # Remove pos leading zeros
  str_remove("(?<=-)(.*?)(.)") %>%          # Remove neg leading zeros
  matrix(nrow=7, ncol=7) %>%                # Turn characters from string back to table
  as.data.frame()                           # Convert to data frame

# Calculate mean and SD for each selected variable
Data1 %>% 
  select(LETHAVERAGE.T1:EXTRAVERSION) %>% 
  describeBy()

# Insert a row of mean and SD values to table
new_row <- c("2.60 (1.16)", "3.16 (1.27)", "0.56 (1.33)", "4.11 (0.88)", 
             "3.97 (0.85)", "-0.14 (0.71)", "4.17 (1.01)") 
Corr1 <- Corr1 %>% rbind(new_row)

# Rename the variables 
rownames(Corr1) <- c("T1 Lethargy", "T2 Lethargy", "Lethargy diff (T2-T1)", 
                     "T1 Social Connectedness", "T2 Social Connectedness", 
                     "Connectedness diff (T2-T1)", "Extraversion", "Mean (SD)")
colnames(Corr1) <- c("T1 Lethargy", "T2 Lethargy", "Lethargy diff (T2-T1)", 
                     "T1 Social Connectedness", "T2 Social Connectedness", 
                     "Connectedness diff (T2-T1)", "Extraversion")

# Remove duplicate values in upper triangular part of table
Corr1[upper.tri(Corr1)]<- ""
# Insert row names 
new_col <- c("T1 Lethargy", "T2 Lethargy", "Lethargy diff (T2-T1)", 
             "T1 Social Connectedness", "T2 Social Connectedness", 
             "Connectedness diff (T2-T1)", "Extraversion", "Mean (SD)")
Corr1 <- Corr1 %>% bind_cols(new_col)
Corr1 <- Corr1[,c(8,1,2,3,4,5,6,7)] 

# Table formatting
Corr1 %>% flextable() %>% 
  font(fontname = "Times New Roman", part = "all") %>%
  bold(bold = TRUE, part = "header") %>% 
  set_header_labels( ...8 = "") %>% 
  border_remove() %>% 
  add_header_row(colwidths = c(8), 
                 values = c("Table 1: Means and Correlations Among Variables at Time 1 and Time 2 (Study 1)")) %>% 
  hline_top()
Figure 1
hist(Data1$SCdiff,
     main = "Distribution of Social Connectedness Difference Score", # title
     xlab ="Social Connectedness Difference Score (T2-T1)",          # x-axis label
     xlim=c(-3,3), ylim = c(0,200),     # x-axis and y-axis limits
     las = 1,                           # y-axis orientation to horizontal
     xaxt = "n")                        # remove existing x-axis
axis(1, pos=0)                          # add axis and positioning

Study 2

Demographic Descriptives

Number of participants (336 participants)
Data2 %>%                                        # Base data frame
  group_by(Participant = "Participant.ID") %>%   # Self-explanatory output
  count()                                        # Calculate n in defined group
Age mean and SD (M = 32.03, SD = 11.94)
# Filter out no responses (decline to answer)
Data2_Age <- Data2 %>%                   # Create new data frame
  filter (Age != "[Decline to Answer]")  # Exclude no-answer participants
  
# Change the age frame from character to numeric
Data2_Age$Age <- as.numeric(Data2_Age$Age)

# Calculate mean and SD
Data2_Age %>%                 # Age data frame
  group_by("Age") %>%         # Self-explanatory output
  summarise(M = mean(Age),    # Calculate mean
            SD = sd(Age))     # Calculate SD
Gender proportion (55% male)
# Have to filter out the no responses (decline to answer)
Data2_Gender <- Data2 %>%                   # Create new data frame
  filter (Gender != "[Decline to Answer]")  # Exclude no-answer participants

# Calculate percentage of male
Data2_Gender %>%                                     # Gender data frame
  group_by(Gender) %>%                               # Self-explanatory output           
  filter(Gender == 'Male') %>%                       # Filter males
  summarise(Percentage = n()/nrow(Data2_Gender)*100) # Calculate percentage
Racial proportion (80% White)
Data2 %>%                                      # Base data frame
  group_by(Ethnicity) %>%                      # Self-explanatory output 
  filter(Ethnicity == 'White') %>%             # Filter whites
  summarise(Percentage = n()/nrow(Data2)*100)  # Calculate percentage
Country of residence proportion (32% U.S., 27% U.K.)
Data2 %>%                                         # Base data frame
  group_by(Country) %>%                           # Self-explanatory output 
  filter(Country == 'USA' | Country == 'UK') %>%  # Filter US, UK residents
  summarise(Percentage = n()/nrow(Data2)*100)     # Calculate percentage
Extraversion score at Time 1 (M = 3.90, SD = 0.79)
Data2 %>%                             # Base data frame
  group_by("Extraversion") %>%        # Self-explanatory output
  summarise(M = mean(T1Extraversion), # Calculate mean
            SD = sd(T1Extraversion))  # Calculate SD
Proportion of participants who practiced social distancing (92.9%)
Contact within 6 feet with people outside their household (Mode = 0, M = 1.11, SD = 0.75)
Data2 %>%                               # Base data frame
  group_by("SixFeet") %>%               # Self-explanatory output
  summarise(Mode = find_mode(SixFeet),  # Calculate mode
            M = mean(SixFeet),          # Calculate mean
            SD = sd(SixFeet))           # Calculate SD

In-text Mean and SD

Has social connection changed as a result of the COVID-19 pandemic?

Relatedness at Time 1 (M = 4.90, SD = 1.11)
Data2 %>%                      # Base data frame
  group_by("T1SC") %>%         # Self-explanatory output
  summarise(M = mean(T1BMPN),  # Calculate mean
            SD = sd(T1BMPN))   # Calculate SD
Relatedness at Time 2 (M = 4.91, SD = 1.15)
Data2 %>%                      # Base data frame
  group_by("T2SC") %>%         # Self-explanatory output
  summarise(M = mean(T2BMPN),  # Calculate mean
            SD = sd(T2BMPN))   # Calculate SD
Loneliness at Time 1 (M = 2.14, SD = 0.67)
Data2 %>%                       # Base data frame
  group_by("T1Lonely") %>%      # Self-explanatory output
  summarise(M = mean(T1Lonely), # Calculate mean
            SD = sd(T1Lonely))  # Calculate SD
Loneliness at Time 2 (M = 2.07, SD = 0.63)
Data2 %>%                       # Base data frame
  group_by("T2Lonely") %>%      # Self-explanatory output
  summarise(M = mean(T2Lonely), # Calculate mean
            SD = sd(T2Lonely))  # Calculate SD

Has social connection changed more for extraverts or introverts?

We needed to create a new data frame including only participants who met the most introverted and most extraverted cut-off scores.

quantile(Data2$T1Extraversion) # to find quartiles' cut-off scores

Data2_Extraversion <- Data2 %>% 
  mutate(Quartile = case_when(
    T1Extraversion >= 4.416667 ~ 'Most Extraverted',
    T1Extraversion <= 3.333333 ~ 'Most Introverted')) %>%
  filter(Quartile == 'Most Extraverted' | Quartile == 'Most Introverted') 
Loneliness

Most Introverted at T1 (M = 2.56, SD = 0.63) and at T2 (M = 2.31, SD = 0.63). Most Extraverted at T1 (M = 1.64, SD = 0.51) and at T2 (M = 1.67, SD = 0.49).

Data2_Extraversion %>% 
  select(T1Lonely, T2Lonely) %>% 
  describeBy(group = Data2_Extraversion$Quartile)

Is the effect of change in social connection on well-being different for extraverts and introverts?

Life Satisfaction at Time 1 (M = 3.96, SD = 1.56)
Data2 %>%                     # Base data frame
  group_by("T1SWLS") %>%      # Self-explanatory output
  summarise(M = mean(T1SWLS), # Calculate mean
            SD = sd(T1SWLS))  # Calculate SD
Life Satisfaction at Time 2 (M = 3.98, SD = 1.46)
Data2 %>%                     # Base data frame
  group_by("T2SWLS") %>%      # Self-explanatory output
  summarise(M = mean(T2SWLS), # Calculate mean
            SD = sd(T2SWLS))  # Calculate SD

Tables and Figures

Figure 2
# Combining two histograms into one frame
par(mfrow = c(1,2))

# Relatedness histogram
histBMPN <- hist(Data2$BMPN_Diff,
                 main = "Distribution of Relatedness Difference Scores", # title
                 xlab = "Relatedness Difference Score (T2-T1)",          # x-axis label
                 xlim = c(-4,4), ylim = c(0,80),     # x-axis and y-axis limits
                 las = 1,                           # y-axis orientation to horizontal
                 breaks = 13,
                 xaxt = "n")                        # remove existing x-axis
axis(1, pos=0)                                      # add axis and positioning

# Loneliness histogram
histLonely <- hist(Data2$Lonely_Diff,
                   main = "Distribution of Loneliness Scores",   # title
                   xlab = "Loneliness Difference Score (T2-T1)", # x-axis label
                   xlim = c(-2,2), ylim = c(0,50),  # x-axis and y-axis limits
                   breaks = 40,                     # define number of bars
                   xaxt = "n", yaxt = "n")          # remove existing x- and y-axis
axis(1, pos = 0)                # add x-axis and positioning
axis(2, pos = -2,               # add y-axis and positioning
     las = 1)                   # y-axis orientation to horizontal
Table 3
# Create a new data frame for correlation 
Corr2 <- Data2 %>% 
  select(T1Extraversion:BMPN_Diff) %>%   # Select variables needed
  cor() %>%                              # Calculate corr. value between each variable
  round(digits=2) %>%                    # Round corr. value of to 2 dp
  str_remove("^0+") %>%                  # Remove pos leading zeros
  str_remove("(?<=-)(.*?)(.)") %>%       # Remove neg leading zeros
  matrix(nrow=10, ncol=10) %>%           # Turn characters from string back to table
  as.data.frame()                        # Convert to data frame

# Rearrange Extraversion and Relatedness variables to match the order in original table 
Corr2 <- Corr2[,c(2,3,4,8,9,10,5,6,7,1)] # Move columns
Corr2 <- Corr2[c(2,3,4,8,9,10,5,6,7,1),] # Move rows

# Calculate mean and SD for each selected variable
Data2 %>% 
  select(T1Extraversion:BMPN_Diff) %>% 
  describeBy()

# Insert a row of mean and SD values to table
new_row2 <- c("3.97 (1.53)", "3.99 (1.45)", "0.02 (0.88)", "4.92 (1.09)", 
              "4.91 (1.14)", "–0.01 (1.11)", "2.12 (0.65)", "2.06 (0.62)", 
              "–0.06 (0.40)", "3.90 (0.79)")
Corr2 <- Corr2 %>% rbind(new_row2)

# Rename the variables 
rownames(Corr2) <- c("T1 Life Satisfaction", "T2 Life Satisfaction", 
                     "Life Satisfaction Change (T2-T1)", "T1 Relatedness", 
                     "T2 Relatedness", "Relatedness change (T2-T1)", "T1 Loneliness", 
                     "T2 Loneliness", "Loneliness change (T2-T1)", 
                     "T1 Extraversion", "Mean (SD)")
colnames(Corr2) <- c("T1 Life Satisfaction", "T2 Life Satisfaction", 
                     "Life Satisfaction change (T2-T1)", "T1 Relatedness", 
                     "T2 Relatedness", "Relatedness change (T2-T1)", 
                     "T1 Loneliness", "T2 Loneliness", 
                     "Loneliness change (T2-T1)", "T1 Extraversion")

# Remove duplicate values in upper triangular part of table
Corr2[upper.tri(Corr2)]<- ""

# Insert row names 
new_col2 <- c("T1 Life Satisfaction", "T2 Life Satisfaction", 
              "Life Satisfaction change (T2-T1)", "T1 Relatedness", 
              "T2 Relatedness", "Relatedness change (T2-T1)", 
              "T1 Loneliness", "T2 Loneliness", "Loneliness change (T2-T1)", 
              "T1 Extraversion", "Mean(SD)")
Corr2 <- Corr2 %>% bind_cols(new_col2)
Corr2 <- Corr2[,c(11,1,2,3,4,5,6,7,8,9,10)] 

# Table formatting
Corr2 %>%  flextable() %>% 
  font(fontname = "Times New Roman", part = "all") %>% 
  bold(bold = TRUE, part = "header") %>% 
  set_header_labels( ...11 = "") %>% 
  border_remove() %>%
  add_header_row(colwidths = c(11), 
                 values = c("Table 3: Correlations Among Variables for Time 1 and Time 2 (Study 2)")) %>% 
  hline_top()
Figure 3
# Social connectedness figure

## Create new data frame
Extraversion <- c("Most Introverted", "Most Introverted", 
              "Most Extraverted", "Most Extraverted")
N <- c("119", "119", "130", "130")
Time <- c("Before Pandemic", "During Pandemic", 
              "Before Pandemic", "During Pandemic")
Mean <- c("3.45", "3.35", "4.70", "4.45")
SE <- c("0.06", "0.06", "0.06", "0.07")
Fig3_SC <- data.frame(Extraversion, N, Time, Mean, SE)

## Set numerical variables to numerics format
Fig3_SC$N <- as.numeric(Fig3_SC$N)
Fig3_SC$Mean <- as.numeric(Fig3_SC$Mean)
Fig3_SC$SE <- as.numeric(Fig3_SC$SE)

## Calculate CI
Fig3_SC <- Fig3_SC %>% 
  mutate(LowerCI = Mean - qt(1 - (0.05 / 2), N - 1) * SE,
         UpperCI = Mean + qt(1 - (0.05 / 2), N - 1) * SE)

## Plot graph
ggplotSC <- ggplot(Fig3_SC, aes(x = Time, y = Mean, group = Extraversion)) +
  geom_point(aes(group = Mean), size = 3.5) +
  geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI),
                width = 0.1) +
  geom_line(aes(linetype = Extraversion), size = 1.65) +
  ggtitle("Social Connectedness Changes based on Extraversion") + 
  theme(plot.title = element_text(hjust = 0.5),
        legend.position = "none",
        panel.background = element_blank(),
        axis.line = element_line()) +
  ylab("Mean Social Connectedness") +
  expand_limits(y=3) + expand_limits(y=5)


# Loneliness figure

## Create new data frame
Extraversion <- c("Most Introverted", "Most Introverted", 
              "Most Extraverted", "Most Extraverted")
N <- c("80", "80", "83", "83")
Time <- c("Before Pandemic", "During Pandemic", 
              "Before Pandemic", "During Pandemic")
Mean <- c("2.53", "2.31", "1.63", "1.67")
SE <- c("0.07", "0.07", "0.05", "0.05")
Fig3_Lonely <- data.frame(Extraversion, N, Time, Mean, SE)

## Set numerical variables to numerics format
Fig3_Lonely$N <- as.numeric(Fig3_Lonely$N)
Fig3_Lonely$Mean <- as.numeric(Fig3_Lonely$Mean)
Fig3_Lonely$SE <- as.numeric(Fig3_Lonely$SE)

## Calculate CI
Fig3_Lonely <- Fig3_Lonely %>% 
  mutate(LowerCI = Mean - qt(1 - (0.05 / 2), N - 1) * SE,
         UpperCI = Mean + qt(1 - (0.05 / 2), N - 1) * SE)

## Plot graph
ggplotLonely <- ggplot(Fig3_Lonely, aes(x = Time, 
                                        y = Mean, 
                                        group = Extraversion)) +
  geom_point(aes(group = Mean), size = 3.5) +
  geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI),
                width = 0.1) +
  geom_line(aes(linetype = Extraversion), size = 1.65) +
  ggtitle("Loneliness Changes based on Extraversion") + 
  theme(plot.title = element_text(hjust = 0.5),
        legend.title=element_blank(),
        panel.background = element_blank(), 
        axis.line = element_line()) +
  ylab("Mean Loneliness") + 
  expand_limits(y=1) + expand_limits(y=3)


# Combining two ggplots into one frame
grid.arrange(ggplotSC, ggplotLonely, ncol=2, widths=c(3,4))

Part 3: Exploratory Analysis (40%)

Question 1: Were older populations more lonely than other age group?

# Categorise Age Groups
Data2_AgeGroup <- Data2_Age %>% 
  mutate(AgeGroup = case_when(Age >= 50  & Age <= 72 ~ 'Seniors',  
                              Age >= 36  & Age <= 49 ~ 'Middle Age',  
                              Age >= 23  & Age <= 35 ~ 'Adults', 
                              Age >= 18  & Age <= 22 ~ 'Young Adults'))

# Calculate mean
Data2_AgeGroup %>%               
  group_by(AgeGroup) %>%       
  summarise(T1 = mean(T1Lonely),
            T2 = mean(T2Lonely))  # ANSWER TO QUESTION 1

Question 2: Has loneliness decreased more in older population due to social isolation?

Continuing from previous codes..

# Create new data frame based on descriptive statistics
AgeGroup <- c("Young Adults", "Young Adults", "Adults", "Adults",
              "Middle Aged", "Middle Aged", "Seniors", "Seniors")
Time <- c("Before Pandemic", "During Pandemic", 
          "Before Pandemic", "During Pandemic",
          "Before Pandemic", "During Pandemic", 
          "Before Pandemic", "During Pandemic")
Mean <- c("2.25", "2.15", "2.13", "2.12", 
          "2.03", "1.96", "1.99", "1.80")
Exploratory1 <- data.frame(AgeGroup, Time, Mean)

# Set variable Mean to numerics format
Exploratory1$Mean <- as.numeric(Exploratory1$Mean)

# Plot graph
ggplotOlderAge <- ggplot(Exploratory1, aes(x = Time, y = Mean,
                                           group = AgeGroup)) +
  geom_point(aes(group = Mean, colour = AgeGroup)) +
  geom_line(aes(colour = AgeGroup)) +
  ggtitle("Loneliness Changes based on Age Groups") + 
  theme(plot.title = element_text(hjust = 0.5),
        panel.background = element_blank(), 
        legend.title=element_blank(),
        axis.line = element_line()) +
  ylab("Mean Loneliness") +
  scale_y_continuous(limits = c(1.75, 2.25), 
                     breaks = seq(1.75, 2.25, by = 0.25))
print(ggplotOlderAge)   # ANSWER TO QUESTION 2

Question 3: Has loneliness decreased more in older population due to social isolation?

# Add extraversion variable to data frame
Data2_AgeExtraversion <- Data2_Extraversion %>% 
  select(Quartile, Age, T1Lonely, T2Lonely) %>% 
  mutate(AgeGroup = case_when(Age >= 50  & Age <= 72 ~ 'Seniors',  
                              Age >= 36  & Age <= 49 ~ 'Middle Age',  
                              Age >= 23  & Age <= 35 ~ 'Adults', 
                              Age >= 18  & Age <= 22 ~ 'Young Adults')) %>% 
  filter(AgeGroup == 'Seniors')

# Calculate mean
Data2_AgeExtraversion %>%               
  group_by(Quartile) %>%       
  summarise(T1 = mean(T1Lonely),
            T2 = mean(T2Lonely))

# Create new variables and table
Extraversion <- c("Most introverted", "Most introverted",
                  "Most extraverted", "Most extraverted")
Time2 <- c("Before Pandemic", "During Pandemic", 
          "Before Pandemic", "During Pandemic")
Mean2 <- c("2.60", "2.32", "1.55", "1.40")
Exploratory2 <- data.frame(Extraversion, Time2, Mean2)

# Set variable Mean to numerics format
Exploratory2$Mean2 <- as.numeric(Exploratory2$Mean2)

# Plot graph
ggplotSeniors <- ggplot(Exploratory2, aes(x = Time2, y = Mean2,
                                           group = Extraversion)) +
  geom_point(aes(group = Mean2, colour = Extraversion)) +
  geom_line(aes(colour = Extraversion)) +
  ggtitle("Loneliness Changes in Seniors based on Extraversion") + 
  theme(plot.title = element_text(hjust = 0.5),
        panel.background = element_blank(), 
        legend.title=element_blank(),
        axis.line = element_line()) +
  ylab("Mean Loneliness") + xlab("Time") +
  scale_y_continuous(limits = c(1, 3), 
                     breaks = seq(1, 3, by = 0.5))
print(ggplotSeniors)