Load libraries

# install.packages("tidyverse")
library(tidyverse)
library(jtools) # for theme_apa in figures

Load data

raw_data <- read_csv("survey.csv")

Clean data

Compute scale items

# Friendship satisfaction
scales_data <- raw_data %>% 
  mutate(id = row_number()) %>% 
  group_by(id) %>% 
  mutate(FriendSatis = sum(FriendSatis1_1, FriendSatis1_2, FriendSatis1_3, FriendSatis1_4, FriendSatis1_5, FriendSatis1_6, FriendSatis1_7, na.rm=T)/7)

Recode relationship status using labels

scales_data <- scales_data %>% 
  mutate(RelStatus = ifelse(RelStatus == 5, "Single", "In Relatioship"))

Select desired columns

clean_data <- scales_data %>% 
  select(id, age, ethnicity, ethnicity_other, gender, gender_other, CloseFriends, CloseFriendsBefore, RelStatus, RelStatus_Other, DesireRel, DesireFriends, FriendSatis) %>% 
  filter(!is.na(CloseFriends)) # take out empty rows

Descriptives

Create tables

# relationship status
clean_data %>% 
  group_by(gender) %>% 
  count(RelStatus)

## # A tibble: 7 x 3
## # Groups:   gender [4]
##   gender     RelStatus          n
##   <chr>      <chr>          <int>
## 1 Female     In Relatioship    21
## 2 Female     Single            45
## 3 Male       In Relatioship    16
## 4 Male       Single            33
## 5 Non-binary In Relatioship     2
## 6 Non-binary Single             2
## 7 Other      Single             1

# friendship number
clean_data %>% 
  group_by(gender) %>% 
  dplyr::summarise(meanFriends = mean(CloseFriends, na.rm=T))

## # A tibble: 4 x 2
##   gender     meanFriends
## * <chr>            <dbl>
## 1 Female            5.91
## 2 Male              6.47
## 3 Non-binary        3.75
## 4 Other             7

clean_data %>% 
  group_by(gender) %>% 
  dplyr::summarise(meanFriendsBefore = mean(CloseFriendsBefore, na.rm=T))

## # A tibble: 4 x 2
##   gender     meanFriendsBefore
## * <chr>                  <dbl>
## 1 Female                  6.94
## 2 Male                    5.53
## 3 Non-binary              4   
## 4 Other                   7

Visualizations

# rel status overall
ggplot(clean_data, aes(x = RelStatus)) +
  geom_bar() +
  theme_apa()

# by gender
ggplot(clean_data, aes(x = RelStatus)) +
  facet_grid(~gender) +
  geom_bar() +
  theme_apa()

Data analysis

# RESEARCH QUESTION 1: 
# Do people with more friends have greater friendship satisfaction?
lm(data = clean_data, FriendSatis ~ CloseFriends) %>% summary()

## 
## Call:
## lm(formula = FriendSatis ~ CloseFriends, data = clean_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7661 -0.4084  0.1614  0.7395  1.3723 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.55739    0.17483   43.23  < 2e-16 ***
## CloseFriends  0.07031    0.02450    2.87  0.00486 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.005 on 118 degrees of freedom
## Multiple R-squared:  0.06526,    Adjusted R-squared:  0.05734 
## F-statistic: 8.239 on 1 and 118 DF,  p-value: 0.004861

# RESEARCH QUESTION 2: 
# do people with more friends (quantity) have less desire for a romantic relationship?
lm(data = clean_data, DesireRel ~ CloseFriends) %>% summary()

## 
## Call:
## lm(formula = DesireRel ~ CloseFriends, data = clean_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7805 -0.7423  0.2450  0.3722  1.4358 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.81866    0.23104  16.528   <2e-16 ***
## CloseFriends -0.01273    0.03190  -0.399    0.691    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.11 on 79 degrees of freedom
##   (39 observations deleted due to missingness)
## Multiple R-squared:  0.00201,    Adjusted R-squared:  -0.01062 
## F-statistic: 0.1591 on 1 and 79 DF,  p-value: 0.6911

# do people who have higher friendship satisfaction (quality) have less desire for a romantic relationship?
lm(data = clean_data, DesireRel ~ FriendSatis) %>% summary()

## 
## Call:
## lm(formula = DesireRel ~ FriendSatis, data = clean_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8466 -0.7260  0.2287  0.4247  1.6659 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   2.8969     0.9851   2.941   0.0043 **
## FriendSatis   0.1055     0.1222   0.863   0.3906   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.106 on 79 degrees of freedom
##   (39 observations deleted due to missingness)
## Multiple R-squared:  0.009346,   Adjusted R-squared:  -0.003194 
## F-statistic: 0.7453 on 1 and 79 DF,  p-value: 0.3906

Visualize results above

# RESEARCH QUESTION 1: 
# Do people with more friends have greater friendship satisfaction?
ggplot(clean_data, aes(x = CloseFriends, y = FriendSatis)) +
  geom_point() +
  geom_smooth(method="lm") +
  theme_apa()

# RESEARCH QUESTION 2: 
# do people with more friends (quantity) have less desire for a romantic relationship?
ggplot(clean_data, aes(x = CloseFriends, y = DesireRel)) +
  geom_point() +
  geom_smooth(method="lm") +
  theme_apa()

# do people who have higher friendship satisfaction (quality) have less desire for a romantic relationship?
ggplot(clean_data, aes(x = FriendSatis, y = DesireRel)) +
  geom_point() +
  geom_smooth(method="lm") +
  theme_apa()

Extra

# close friends now by close friends before pandemic
ggplot(clean_data, aes(x = CloseFriends, y = CloseFriendsBefore)) +
  geom_point() +
  geom_smooth(method="lm") +
  geom_abline() +
  theme_apa()

# for comparing before/after, reshape data
long_data <- clean_data %>% 
  rename(CloseFriendsAfter = CloseFriends) %>% 
  pivot_longer(cols = c(CloseFriendsBefore, CloseFriendsAfter), names_to = "Time", values_to = "Number")

# plot bar plot
ggplot(data = long_data, aes(x = Time, y = Number)) +
  geom_bar(position = "dodge", stat = "summary", fun = "mean") +
  stat_summary(geom = "errorbar", fun.data = mean_se, width = 0.2) +
  scale_x_discrete(limits=c("CloseFriendsBefore", "CloseFriendsAfter"),
                   labels=c("Before", "After")) +
  theme_apa()

# statistical analysis
lm(data = long_data, Number ~ Time) %>% summary()

## 
## Call:
## lm(formula = Number ~ Time, data = long_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.267 -2.267 -1.075  1.733 23.733 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              6.0750     0.3589  16.929   <2e-16 ***
## TimeCloseFriendsBefore   0.1917     0.5075   0.378    0.706    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.931 on 238 degrees of freedom
## Multiple R-squared:  0.000599,   Adjusted R-squared:  -0.0036 
## F-statistic: 0.1426 on 1 and 238 DF,  p-value: 0.706

Data Analysis Tutorial

Julie

8/18/2021

Load libraries

Load data

Clean data

Compute scale items

Recode relationship status using labels

Select desired columns

Descriptives

Create tables

Visualizations

Data analysis

Visualize results above

Extra