In this analysis, I am working with a data set from fivethiryeight.com which contains survey responses from American adults who are married, in a domestic partnership, in a civil union, or cohabitating with a partner.
I want to answer the following questions:
library(readr)
library(dplyr)
library(tidyr)
# retrieve the csv file from GitHub
urlfile = "https://raw.githubusercontent.com/kristinlussi/607Project2/main/sleeping-alone-data.csv"
# read the csv
coupleSleep <- read_csv(url(urlfile), show_col_types = FALSE)
# show a glimpse of the data, obviously this needs to be cleaned
head(coupleSleep)
## # A tibble: 6 × 31
## StartDate EndDate Which of the following be…¹ How long have you be…²
## <chr> <chr> <chr> <chr>
## 1 <NA> <NA> Response Response
## 2 7/10/14 18:37 7/10/14 18:39 Single, but cohabiting wit… 1-5 years
## 3 7/10/14 15:54 7/10/14 15:56 Single, but cohabiting wit… 1-5 years
## 4 7/10/14 15:44 7/10/14 15:49 Married 1-5 years
## 5 7/10/14 13:47 7/10/14 13:47 Married 1-5 years
## 6 7/10/14 13:13 7/10/14 13:14 Married 1-5 years
## # ℹ abbreviated names:
## # ¹`Which of the following best describes your current relationship status?`,
## # ²`How long have you been in your current relationship? If you are not currently in a relationship, please answer according to your last relationship.`
## # ℹ 27 more variables:
## # `When both you and your partner are at home, how often do you sleep in separate beds?` <chr>,
## # `When you're not sleeping in the same bed as your partner, where do you typically sleep?` <chr>,
## # ...7 <chr>, …
Here, we will use dplyr and tidyr to clean the data.
# remove the empty row
coupleSleep <- coupleSleep[-1,]
# rename the columns
colnames(coupleSleep) <- c("StartDate", "EndDate", "RelationshipStatus",
"RelationshipLength", "SeparateBeds", "selfSleepLocation",
"remove1", "partnerSleepLocation", "remove2", "Snoring",
"BathroomTrips", "Sick", "NoIntimacy", "TemperaturePreference",
"Argument", "Space", "Covers", "SleepWithChild",
"DifferentSleepSchedule", "remove3", "firstTimeSleepDifferentBeds",
"SleepingSeparatelyStayTogether", "SleepBetterSeparately",
"SexLifeImproved", "Occupation", "remove4",
"Gender", "Age", "Income", "Education", "Location")
# SleepingSeparatelyStayTogehter = Sleeping in separate beds helps us stay together
# SleepBetterSeparately = We sleep better sleeping in separate beds
# SexLifeImproved = Our sex life has improved as a result of sleeping in separate beds
# drop rows that have NAs in RelationshipStatus column using tidyr
coupleSleep <- drop_na(coupleSleep, RelationshipStatus)
# select the columns that are useful for analysis
coupleSleep <- coupleSleep %>%
select(RelationshipStatus, RelationshipLength, SeparateBeds, selfSleepLocation,
partnerSleepLocation, Snoring, BathroomTrips, Sick, NoIntimacy,
TemperaturePreference, Argument, Space, Covers, SleepWithChild,
DifferentSleepSchedule, firstTimeSleepDifferentBeds,
SleepingSeparatelyStayTogether, SleepBetterSeparately,
SexLifeImproved, Occupation, Gender, Age, Income, Education, Location) %>%
# rename the contents in the RelationshipStatus column
mutate(RelationshipStatus = case_when(
grepl("single", RelationshipStatus, ignore.case = TRUE) ~ "cohabitant",
grepl("domestic", RelationshipStatus, ignore.case = TRUE) ~ "domesticPartner",
grepl("married", RelationshipStatus, ignore.case = TRUE) ~ "married",
TRUE ~ RelationshipStatus))%>%
# rename the contents in the selfSleepLocation
mutate(selfSleepLocation = case_when(
grepl("couch", selfSleepLocation, ignore.case = TRUE) ~ "couch/chair",
grepl("partner", selfSleepLocation, ignore.case = TRUE) ~ "partner sleeps elsewhere",
grepl("separate", selfSleepLocation, ignore.case = TRUE) ~ "separate room",
grepl("other", selfSleepLocation, ignore.case = TRUE) ~ "other",
grepl("shared bedroom", selfSleepLocation, ignore.case = TRUE) ~ "couch/chair",
grepl("shared bedroom", selfSleepLocation, ignore.case = TRUE) ~ "shared room, different bed",
TRUE ~ selfSleepLocation)) %>%
# rename the contents in the partnerSleepLocation column
mutate(partnerSleepLocation = case_when(
grepl("I'm the one", partnerSleepLocation, ignore.case = TRUE) ~ "partner in bed, I sleep elsewhere",
grepl("other", partnerSleepLocation, ignore.case = TRUE) ~ "other",
grepl("couch", partnerSleepLocation, ignore.case = TRUE) ~ "couch/chair",
grepl("separate bedroom", partnerSleepLocation, ignore.case = TRUE) ~ "separate room",
TRUE ~ partnerSleepLocation)) %>%
## rename reasons columns; 1 = yes, 0 = no
mutate(
# partner snores column
snores = case_when(
grepl("snores", Snoring, ignore.case = TRUE) ~ "1",
is.na(Snoring) ~ "0",
TRUE ~ Snoring),
# frequent bathroom trips column
bathroom = case_when(
grepl("bathroom", BathroomTrips, ignore.case = TRUE) ~ "1",
is.na(BathroomTrips) ~ "0",
TRUE ~ BathroomTrips),
# one of us is sick column
sick = case_when(
grepl("sick", Sick, ignore.case = TRUE) ~ "1",
is.na(Sick) ~ "0",
TRUE ~ Sick),
# not intimate anymore column
noIntimacy = case_when(
grepl("intimate", NoIntimacy, ignore.case = TRUE) ~ "1",
is.na(NoIntimacy) ~ "0",
TRUE ~ NoIntimacy),
# difference temperature preferences column
tempPreference = case_when(
grepl("temperature", TemperaturePreference, ignore.case = TRUE) ~ "1",
is.na(TemperaturePreference) ~ "0",
TRUE ~ TemperaturePreference),
# had a fight/argument column
fight = case_when(
grepl("Argument", Argument, ignore.case = TRUE) ~ "1",
is.na(Argument) ~ "0",
TRUE ~ Argument),
# not enough space column
space = case_when(
grepl("space", Space, ignore.case = TRUE) ~ "1",
is.na(Space) ~ "0",
TRUE ~ Space),
# don't want to share covers column
covers = case_when(
grepl("covers", Covers, ignore.case = TRUE) ~ "1",
is.na(Covers) ~ "0",
TRUE ~ Covers),
# sleep with child column
sleepWithChild = case_when(
grepl("child", SleepWithChild, ignore.case = TRUE) ~ "1",
is.na(SleepWithChild) ~ "0",
TRUE ~ SleepWithChild),
# different sleep schedules column
differentSchedule = case_when(
grepl("night", DifferentSleepSchedule, ignore.case = TRUE) ~ "1",
is.na(DifferentSleepSchedule) ~ "0",
TRUE ~ DifferentSleepSchedule)
) %>%
# removes initial reasons columns
select(-c(Snoring, BathroomTrips, Sick, NoIntimacy,
TemperaturePreference, Argument, Space, Covers,
SleepWithChild, DifferentSleepSchedule)) %>%
# change reasons columns from character to integer
mutate_at(vars(snores, bathroom, sick,
noIntimacy, tempPreference,
fight, space, covers,
sleepWithChild, differentSchedule), as.integer) %>%
## agree/disagree columns
# 1 = strongly disagree
# 2 = somewhat disagree
# 3 = neutral
# 4 = somewhat agree
# 5 = strongly agree
mutate(across(c(SleepingSeparatelyStayTogether, SleepBetterSeparately, SexLifeImproved) , ~ case_when(
grepl("Strongly Disagree", ., ignore.case = TRUE) ~ "1",
grepl("Somewhat Disagree", ., ignore.case = TRUE) ~ "2",
grepl("Neither Agree nor Disagree", ., ignore.case = TRUE) ~ "3",
grepl("Somewhat Agree", ., ignore.case = TRUE) ~ "4",
grepl("Strongly Agree", ., ignore.case = TRUE) ~ "5",
TRUE ~ .
))) %>%
# convert columns from character to integer
mutate_at(vars(SleepingSeparatelyStayTogether, SleepBetterSeparately, SexLifeImproved), as.integer) %>%
mutate(case_when(
grepl("NA", SeparateBeds, ignore.case = TRUE) ~ NA
)) %>%
mutate(
Education = case_when(
grepl("Bachelor", Education, ignore.case = TRUE) ~ "bachelor",
grepl("graduate", Education, ignore.case = TRUE) ~ "graduate",
grepl("High", Education, ignore.case = TRUE) ~ "high school",
grepl("Associate", Education, ignore.case = TRUE) ~ "associate"
)
) %>%
## assign a number to each income range
# $0 - $25k = 1
# $25k - $50k = 2
# $50k - $100k = 3
# $100k - $150k = 4
# $150k and greater = 5
mutate(
IncomeRange = case_when(
grepl("24,999", Income, ignore.case = TRUE) ~ "1",
grepl("49,999", Income, ignore.case = TRUE) ~ "2",
grepl("99,999", Income, ignore.case = TRUE) ~ "3",
grepl("149,999", Income, ignore.case = TRUE) ~ "4",
grepl("150,000", Income, ignore.case = TRUE) ~ "5")
) %>%
as.data.frame()
# convert IncomeRange to integer
coupleSleep$IncomeRange <- as.integer(coupleSleep$IncomeRange)
# rearrange
coupleSleep <- coupleSleep %>%
select(Gender, Age, Income, IncomeRange, Education,
Location, Occupation, RelationshipStatus, RelationshipLength,
SeparateBeds, selfSleepLocation, partnerSleepLocation,
firstTimeSleepDifferentBeds, snores, bathroom,
sick, noIntimacy, tempPreference, fight, space,
covers, sleepWithChild, differentSchedule,
SleepingSeparatelyStayTogether, SleepBetterSeparately,
SexLifeImproved)
head(coupleSleep)
## Gender Age Income IncomeRange Education Location
## 1 Male 18-29 $0 - $24,999 1 associate Pacific
## 2 Male 18-29 <NA> NA bachelor South Atlantic
## 3 Male 18-29 $150,000.00 5 graduate South Atlantic
## 4 Male 18-29 $25,000 - $49,999 2 bachelor Middle Atlantic
## 5 Male 18-29 $100,000 - $149,999 2 graduate South Atlantic
## 6 Male 18-29 $0 - $24,999 1 high school East North Central
## Occupation RelationshipStatus
## 1 Other (please specify) cohabitant
## 2 Healthcare Support Occupations cohabitant
## 3 Legal Occupations married
## 4 Life, Physical, and Social Science Occupations married
## 5 Office and Administrative Support Occupations married
## 6 Arts, Design, Entertainment, Sports, and Media Occupations cohabitant
## RelationshipLength SeparateBeds selfSleepLocation
## 1 1-5 years Once a year or less couch/chair
## 2 1-5 years A few times per month partner sleeps elsewhere
## 3 1-5 years Never <NA>
## 4 1-5 years Never <NA>
## 5 1-5 years Never <NA>
## 6 1-5 years Never <NA>
## partnerSleepLocation
## 1 partner in bed, I sleep elsewhere
## 2 separate room
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## firstTimeSleepDifferentBeds snores bathroom sick
## 1 Within the first 1-5 years of our relationship 0 0 1
## 2 Immediately/ We've always slept in separate beds 1 1 1
## 3 <NA> 0 0 0
## 4 <NA> 0 0 0
## 5 <NA> 0 0 0
## 6 <NA> 0 0 0
## noIntimacy tempPreference fight space covers sleepWithChild differentSchedule
## 1 0 0 0 0 0 0 0
## 2 0 0 1 0 1 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## SleepingSeparatelyStayTogether SleepBetterSeparately SexLifeImproved
## 1 1 4 1
## 2 4 5 1
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
First, we will determine what percentage of surveyors sleep in separate beds from their partners regularly. Regularly will be defined as “a few times per week”, “a few times a month”, and “every night”
library(ggplot2)
# filter out the NAs
sepBeds <- coupleSleep[!is.na(coupleSleep$SeparateBeds), ]
# classify each as "regular" and "irregular/never"
sepBeds <- sepBeds %>%
mutate(
freq_sepBeds =
case_when(
grepl("a few times per week", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("a few times per month", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("every night", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("never", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never",
grepl("once a month or less", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never",
grepl("once a year or less", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never"
)
)
ggplot(sepBeds) +
geom_bar(aes(x = SeparateBeds, fill = freq_sepBeds)) +
ggtitle(
"How Often Do You Sleep in a Separate Bed from Your Partner?"
) +
labs(
x = "",
y = "Frequency") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(
fill = guide_legend(title = NULL))
From the above graph, we can tell that the majority of the surveyors either irregularly or never sleep separately from their partner.
library(gt)
# calculate the count of "regularly"
regularCount <- sepBeds %>%
filter(freq_sepBeds == "Regularly") %>%
summarize(Count = n())
# calculate the count of "irregularly"
irregularCount <- sepBeds %>%
filter(freq_sepBeds == "Irregular/Never") %>%
summarize(Count = n())
nrows <- nrow(sepBeds)
# calculate "regular" count percentage
regCountPercent <- round((regularCount$Count / nrows * 100), digits = 2)
# calculate "irregular/never" count percentage
irregCountPercent <- round((irregularCount$Count / nrows * 100), digits = 2)
# create data frame
freqCount <- data.frame(
Frequency = c("Regularly", "Irregularly/Never"),
Percentage = c(regCountPercent, irregCountPercent)
)
# change data frame to gt table
freqCountTbl <- gt(freqCount)
freqCountTbl <- freqCountTbl |>
tab_header(
title = md("**People Who Sleep in Separate Beds as Their Partner**"),
subtitle = md("From FiveThirtyEight Survey Monkey Survey Data")
) %>%
cols_label(
Frequency = md("**Frequency**"),
Percentage = md("**Percentage (%)**")
)
freqCountTbl
People Who Sleep in Separate Beds as Their Partner | |
From FiveThirtyEight Survey Monkey Survey Data | |
Frequency | Percentage (%) |
---|---|
Regularly | 23.63 |
Irregularly/Never | 76.37 |
From the above table, we can see that about 24% of the surveyors regularly sleep in separate beds as their partner.
Next, we will determine what the most common reasons are for couples sleeping in separate beds.
# create a new data frame with just the columns needed to create a graph showing
# the different reasons for couples sleeping in separate beds
reasonSeparate <- coupleSleep %>%
select(Gender, Age, snores, bathroom, sick, noIntimacy,
tempPreference, fight, space, covers,
sleepWithChild, differentSchedule) %>%
pivot_longer(cols = -c(Gender, Age), names_to = "Reason", values_to = "Count") %>%
mutate(
Reason = case_when(
grepl("bathroom", Reason, ignore.case = TRUE) ~ "Frequent Bathroom Trips",
grepl("covers", Reason, ignore.case = TRUE) ~ "Don't Want to Share Covers",
grepl("differentSchedule", Reason, ignore.case = TRUE) ~ "Different Sleep Schedule",
grepl("fight", Reason, ignore.case = TRUE) ~ "Argument",
grepl("noIntimacy", Reason, ignore.case = TRUE) ~ "No Longer Intimate",
grepl("sick", Reason, ignore.case = TRUE) ~ "Sick",
grepl("sleepWithChild", Reason, ignore.case = TRUE) ~ "Parent Sleeps with Child",
grepl("snores", Reason, ignore.case = TRUE) ~ "One Partner Snores",
grepl("space", Reason, ignore.case = TRUE) ~ "Not Enough Space",
grepl("temp", Reason, ignore.case = TRUE) ~ "Different Temperature Preferences",
TRUE ~ "Other"
)
)
reasonSeparate <- reasonSeparate %>%
filter(Count == 1)
ggplot(reasonSeparate) +
geom_bar(aes(x = Reason, fill = Reason), show.legend = FALSE) +
ggtitle(
"Reasons Couples Sleep in Separate Beds"
) +
labs(
x = "",
y = "Count",
fill = "Reason"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1))
From the above graph, we can see that the most common reasons why couples sleep in separate beds is that one partner is sick or one partner snores.
Lastly, we will determine what the most common benefits are of sleeping in separate beds.
# create a subset of the data that shows only the surveyors that regularly sleep in separate beds
# as their partner. We will then determine what the most common benefits are based on the survey
# questions asked
surveyQuestions <- coupleSleep %>%
select(SeparateBeds, SleepingSeparatelyStayTogether, SleepBetterSeparately, SexLifeImproved) %>%
mutate(
freq_sepBeds =
case_when(
grepl("a few times per week", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("a few times per month", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("every night", SeparateBeds, ignore.case = TRUE) ~ "Regularly",
grepl("never", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never",
grepl("once a month or less", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never",
grepl("once a year or less", SeparateBeds, ignore.case = TRUE) ~ "Irregular/Never"
)) %>%
filter(
freq_sepBeds == "Regularly"
) %>%
pivot_longer(cols = -c(SeparateBeds, freq_sepBeds), names_to = "Benefit", values_to = "Count") %>%
mutate(
Benefit = case_when(
grepl("stay", Benefit, ignore.case = TRUE) ~ "\"I feel like it helps us stay together\"",
grepl("better", Benefit, ignore.case = TRUE) ~ "\"I feel like we sleep better\"",
grepl("sex", Benefit, ignore.case = TRUE) ~ "\"I feel like our sex life has improved\""
)
) %>%
# filter where "Count" equals 4 or 5, because 4 or 5 means "strongly agree" or "somewhat agree"
filter(
Count %in% c(4, 5)
)
ggplot(surveyQuestions) +
geom_bar(aes(x = Benefit, fill = Benefit), show.legend = FALSE) +
ggtitle(
"Benefits of Sleeping in Separate Beds",
subtitle = "From People who Regularly Sleep in Separate Beds as their Partner"
) +
labs(
x = "",
y = "Count",
fill = "Benefit"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1))
From the above graph, we can see that the most common benefit is that the couples tend to sleep better separate than when they sleep together.
There are many things you could do with this data set. Here are some other things I’d be interested to see in future analyses:
FiveThirtyEight. Dear Mona: How Many Couples Sleep in Separate Beds? https://fivethirtyeight.com/features/dear-mona-how-many-couples-sleep-in-separate-beds/
FiveThirtyEight. fivethirtyeight/data: Sleeping Alone Data. https://github.com/fivethirtyeight/data/tree/master/sleeping-alone-data