## Loading required package: pacman
covid_well_being <- import("https://docs.google.com/uc?id=1QIzVc9xOwKbPJVZKZwNzWAAUe6MmOfBu&export=download",
format = "csv",
setclass = "tibble")
head(covid_well_being) %>%
select(respondent_id, survey_number, survey_time,
fell_asleep_time, woke_up_time)
## # A tibble: 6 x 5
## respondent_id survey_number survey_time fell_asleep_time woke_up_time
## <int> <int> <dttm> <int> <int>
## 1 1 1 2020-03-23 16:11:36 82800 16200
## 2 1 2 2020-03-24 10:38:09 83700 25200
## 3 1 3 2020-03-25 17:33:05 81000 16200
## 4 1 4 2020-03-26 20:18:40 82800 24300
## 5 1 5 2020-03-27 09:16:29 84600 20700
## 6 1 6 2020-03-28 17:54:44 80100 26100
covid_demog <-
covid_well_being %>%
filter(survey_number == 1) %>% # Filter rows to keep only the first survey day
select(respondent_id:household_income) # Select columns from respondent_id to household_income
covid_demog %>%
group_by(gender) %>% # Group the data by gender
summarize(
num_of_people = n(), # Calculate the number of people per gender group
median_age = median(age1, na.rm = TRUE) # Calculate the median age per gender group
)
## # A tibble: 3 x 3
## gender num_of_people median_age
## <chr> <int> <dbl>
## 1 Female 193 31
## 2 Male 56 33
## 3 Non-binary 1 21
covid_demog %>%
group_by(state) %>% # Group the data by state
summarize(
num_of_respondents = n(), # Calculate the number of respondents per state
median_age = median(age1, na.rm = TRUE) # Calculate the median age per state
) %>%
arrange(desc(num_of_respondents)) %>% # Arrange the results by the count of respondents in descending order
head(3) # Select the top three states with the most respondents
## # A tibble: 3 x 3
## state num_of_respondents median_age
## <chr> <int> <dbl>
## 1 MA 104 33
## 2 NY 19 28
## 3 IL 13 30
social_contact_and_stress <-
covid_well_being %>%
group_by(respondent_id) %>%
summarize(
mean_daily_contacts = mean(num_people_contacted, na.rm = TRUE),
mean_stress_levels = mean(stress_1_low_7_high, na.rm = TRUE)
)
social_contact_and_stress %>%
signif(digits = 2) %>%
head(2)
## # A tibble: 2 x 3
## respondent_id mean_daily_contacts mean_stress_levels
## <dbl> <dbl> <dbl>
## 1 1 1.8 3.9
## 2 4 1.4 4.2
ggplot(data = social_contact_and_stress) +
aes(x = mean_daily_contacts, y = mean_stress_levels) +
geom_point()

cor(x = social_contact_and_stress$mean_daily_contacts,
y = social_contact_and_stress$mean_stress_levels) %>%
signif(digits = 2)
## [1] -0.028
result_table <- covid_well_being %>%
mutate(actual_minutes_asleep = minutes_from_sleep_to_wake - minutes_awake_at_night) %>%
group_by(respondent_id) %>%
summarize(
mean_daily_minutes_asleep = mean(actual_minutes_asleep, na.rm = TRUE),
mean_stress_levels = mean(stress_1_low_7_high, na.rm = TRUE)
) %>%
mutate(
mean_daily_minutes_asleep = signif(mean_daily_minutes_asleep, digits = 3),
mean_stress_levels = signif(mean_stress_levels, digits = 3)
)
result_table
## # A tibble: 250 x 3
## respondent_id mean_daily_minutes_asleep mean_stress_levels
## <int> <dbl> <dbl>
## 1 1 407 3.89
## 2 4 496 4.18
## 3 11 688 4.8
## 4 14 503 6.33
## 5 15 397 4.5
## 6 16 529 3
## 7 19 364 6.12
## 8 20 508 3.29
## 9 22 534 4.88
## 10 23 432 5.11
## # i 240 more rows
ggplot(data = result_table) +
aes(x = mean_daily_minutes_asleep, y = mean_stress_levels) +
geom_point() +
labs(x = "Mean Daily Minutes Asleep", y = "Mean Stress Levels")

cor(x = result_table$mean_daily_minutes_asleep,
y = result_table$mean_stress_levels) %>%
signif(digits = 2)
## [1] 0.039