Load and clean data

Composite <- read.csv("./Winter_2020_Composite.csv")

# filter only meaningful rows
Composite <- Composite %>% 
  dplyr::filter(grepl("@",Q13)) %>% 
  select(age = Q15, 
         ethnicity = Q17,
         gender = Q19,
         attach_closeness = Q31,
         attach_independence = Q32,
         n_girlfriends = Q27_1,
         n_boyfriends = Q27_2,
         rel_status = Q28,
         rel_status_other = Q28_4_TEXT,
         selected_partner = Q29,
         # passionate love scale
         PL1 = Q15.1,
         PL2 = Q16,
         PL3 = Q17.1,
         PL4 = Q18,
         PL5 = Q19.1,
         PL6 = Q20,
         PL7 = Q21,
         PL8 = Q22,
         PL9 = Q23, 
         PL10 = Q24,
         PL11 = Q25,
         PL12 = Q26,
         # family values
         famvalues1 = Q11_1,
         famvalues2 = Q11_2,
         famvalues3 = Q11_3,
         famvalues4 = Q11_4,
         famvalues5 = Q11_5,
         famvalues6 = Q11_6,
         famvalues7 = Q11_7,
         famvalues8 = Q11_8,
         famvalues9 = Q11_9,
         famvalues10 = Q11_10,
         famvalues11 = Q11_11,
         famvalues12 = Q11_12,
         #ideals
         ideal_cute = Q34_1,
         ideal_safe = Q34_2,
         ideal_trustworthy = Q34_3, 
         ideal_sincere = Q34_4,
         ideal_bright = Q34_5,
         ideal_upbeat = Q34_6,
         ideal_fun = Q34_7,
         ideal_funny = Q34_8,
         ideal_honest = Q34_9,
         ideal_respectful = Q34_10,
         ideal_adventurous = Q34_11,
         ideal_curious = Q34_12,
         ideal_social = Q34_13,
         ideal_affectionate = Q34_14,
         ideal_sexy = Q34_15,
         ideal_niceBody = Q34_16,
         ideal_attractiveApp = Q34_17,
         ideal_goodLover = Q34_18,
         ideal_outgoing = Q34_19,
         ideal_kind = Q34_21,
         ideal_supportive = Q34_22,
         ideal_understanding = Q34_23,
         ideal_considerate = Q34_24,
         ideal_sensitive = Q34_25,
         ideal_goodListener = Q34_26,
         ideal_successful = Q34_27,
         ideal_niceHouse = Q34_28,
         ideal_financiallySec = Q34_29,
         ideal_dressWell = Q34_30,
         ideal_goodJob = Q34_31,
         ideal_smart = Q34_32,
         ideal_interesting = Q34_33,
         ideal_weird = Q34_34,
         # other demographic vars
         education = Q12, 
         famSES = Q13.1)

# manually fix text entries
Composite$n_girlfriends[Composite$n_girlfriends == "none"] <- 0
Composite$n_boyfriends[Composite$n_boyfriends == "2 serious, lots of casual"] <- 2
Composite$rel_status[Composite$rel_status_other == "Exclusive"] <- 2
Composite$rel_status[Composite$rel_status == 4] <- 2

# make numeric
Composite <- Composite %>% 
  mutate_at(vars(starts_with("fam")), funs(as.numeric(as.character(.)))) %>%
  mutate_at(vars(starts_with("ideal_")), funs(as.numeric(as.character(.)))) %>% 
  mutate_at(vars(starts_with("PL")), funs(as.numeric(as.character(.)))) %>% 
  mutate_at(vars(age, attach_closeness, attach_independence, n_girlfriends, n_boyfriends), funs(as.numeric(as.character(.))))

# fix scoring of famvalues
Composite <- Composite %>% 
  mutate_at(vars(starts_with("famvalues")), funs(recode(., `5`=4, `6`=5, `7`=6)))

# take out unnecessary rel_status_other
Composite <- Composite %>% 
  select(-(rel_status_other))
# specify factors
Composite$ethnicity <- as.character(Composite$ethnicity) # for step below
Composite$ethnicity[grepl(",", Composite$ethnicity)] <- "7" # mixed ethnicity as 7

Composite <- Composite %>%
  mutate(ethnicity = factor(ethnicity,
                            levels = c(1:7),
                            labels = c("Asian/American",
                                       "Black",
                                       "Hispanic",
                                       "Nat Amer",
                                       "Other",
                                       "White",
                                       "Mixed")),
         gender = factor(gender,
                         levels = c(1, 2),
                         labels = c("Male", "Female")),
         education = factor(education,
                            levels = c(1:9),
                            labels = c("< HS",
                                       "HS/GED",
                                       "College (not curr enrolled)",
                                       "College (curr enrolled)",
                                       "Assoc",
                                       "BA/BS",
                                       "Masters",
                                       "Doct",
                                       "Professional")),
         rel_status = factor(rel_status,
                             levels = c(1,2),
                             labels = c("Single",
                                        "Partnered")),
         selected_partner = factor(selected_partner,
                                   levels = c(1:3),
                                   labels = c("Current partner",
                                              "Past partner",
                                              "Ideal partner")))
# compute passionate love scale
Composite <- Composite %>%
  ungroup() %>% 
  mutate(Passion = rowMeans(dplyr::select(., starts_with("PL")), na.rm = T))

Demographics

Age

ggplot(Composite, aes(x = age)) +
  geom_bar() +
  xlab("Age")

Ethnicity

ggplot(Composite, aes(x = ethnicity)) +
  geom_bar() +
  xlab("Ethnicity")

Education

ggplot(Composite, aes(x = education)) +
  geom_bar() +
  xlab("Education") 

Family SES

ggplot(Composite, aes(x = famSES)) +
  geom_bar() +
  xlab("Family SES") +
  scale_x_discrete(limits=c(1:6),
                   labels=c("Lower",
               "Lower\nMiddle",
               "Middle",
               "Upper\nMiddle",
               "Upper",
               "I don't\nknow"))
## Warning: Removed 3 rows containing non-finite values (stat_count).

Partners

# table

Composite %>%
  group_by(gender) %>% 
  summarize(n_gf = sum(n_girlfriends, na.rm = T),
            n_gf_sd = sd(n_girlfriends, na.rm = T),
            n_bf = sum(n_boyfriends, na.rm = T),
            n_bf_sd = sd(n_boyfriends, na.rm = T)) %>% 
  pander()
gender n_gf n_gf_sd n_bf n_bf_sd
Male 68 2.035 0 0
Female 5 0.3575 104 1.346
# visualize by gender

Composite_Long <- Composite %>% 
  pivot_longer(cols = c(n_girlfriends, n_boyfriends), names_to = "partner_gender", values_to = "number")

ggplot(Composite_Long, aes(x = gender, y = number, fill = partner_gender)) +
  geom_bar(position="dodge", stat = "summary", fun.y = "sum") +
  xlab("Gender of participant") +
  scale_fill_manual(values=c("#94b3d7", "#d99594"))
## Warning: Removed 43 rows containing non-finite values (stat_summary).

# visualize number of partners regardless of gender

Composite <- Composite %>% 
  rowwise() %>% 
  mutate(totalPartners = sum(n_girlfriends + n_boyfriends, na.rm = T)) %>% 
  ungroup()

ggplot(Composite, aes(x = totalPartners)) +
  geom_bar() +
  xlab("Number of total partners") 

Current relationship satus

ggplot(Composite, aes(x = rel_status)) +
  geom_bar() +
  xlab("Relationship Status") 

Passionate Love

# overall distribution
ggplot(Composite, aes(x = Passion)) +
  geom_bar() +
  xlab("Passionate Love") 
## Warning: Removed 3 rows containing non-finite values (stat_count).

# passion by *who* is imagined and gender
ggplot(data = subset(Composite, !is.na(selected_partner)), aes(x = selected_partner, y = Passion, fill = gender)) +
  geom_bar(position="dodge", stat = "summary", fun.y = "mean") +
  xlab("Type of partner selected") +
  ylab("Passionate Love (1-6)") +
  stat_summary(geom = "errorbar", fun.data = mean_se, position=position_dodge(.9), width = 0.2) +
  scale_fill_manual(values=c("#94b3d7", "#d99594")) +
  coord_cartesian(ylim = c(1, 6))

write.csv(Composite, "./Cleaned_Data.csv")