time of the day analysis

Author

Peer, Brandt, Chapkovski

Getting libraries

library(pacman)
p_load(tidyverse, lubridate, readr, ggplot2, scales, lutz,jsonlite, glue, ggpubr, kableExtra)

Time of the day analysis

local_folder='/Users/chapkovski/Documents/peer_brandt_paper/'
fn<-glue('{local_folder}full_data.csv')
data<-read_csv(fn) %>% 
  mutate(StartDate = dmy_hm(StartDate, tz="Asia/Jerusalem"))
Rows: 2517 Columns: 119
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (20): StartDate, EndDate, IPAddress, RecordedDate, ResponseId, Distribut...
dbl (85): Status, Progress, Duration..in.seconds., Finished, LocationLatitud...
lgl (14): RecipientLastName, RecipientFirstName, RecipientEmail, ExternalRef...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Getting country info

#let’s merge country info with current data

data %>%
  rename(country_code=Country) %>% 
  mutate(country_code=as.factor(country_code)) %>% 
  left_join(country_correspondence) %>% 
  mutate(timezone=tz_lookup_coords(LocationLatitude, LocationLongitude)) ->  df_with_countries  
Joining with `by = join_by(country_code)`
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `timezone = tz_lookup_coords(LocationLatitude,
  LocationLongitude)`.
Caused by warning:
! Using 'fast' method. This can cause inaccuracies in time zones
  near boundaries away from populated ares. Use the 'accurate'
  method if accuracy is more important than speed.
df_with_countries  %>% 
rowwise() %>%
  mutate(local_time =  with_tz(StartDate, tzone = timezone)) %>% ungroup()->df_with_countries
 

 

df_with_countries <- df_with_countries %>%
  mutate(hour = hour(local_time),
         part_of_day = case_when(
           hour >= 0 & hour < 12 ~ 'Morning (0 till 12pm)',
           hour >= 12 & hour < 18 ~ 'Afternoon (12 till 18)',
           hour >= 18 & hour < 24 ~ 'Evening (18 till 24)'
         ),
         part_of_day = factor(part_of_day, levels = c('Morning (0 till 12pm)', 'Afternoon (12 till 18)', 'Evening (18 till 24)'), ordered = TRUE))
df_with_countries %>% write_csv(glue('{local_folder}df_with_countries.csv'))


df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  group_by(part_of_day, sample) %>% 
  summarise(mean_attention=mean(high_attention)) %>% 
  mutate(mean_attention=percent(mean_attention, accuracy=.01)) %>% 
  pivot_wider(names_from=part_of_day, values_from=mean_attention) %>% 
  kbl(caption='Share of those who paid high attention') %>% kable_classic_2
`summarise()` has grouped output by 'part_of_day'. You can override using the
`.groups` argument.
Share of those who paid high attention
sample Morning (0 till 12pm) Afternoon (12 till 18) Evening (18 till 24)
Besample 41.58% 36.14% 24.36%
CR 85.59% 91.56% 91.20%
Connect 90.91% 92.18% 92.08%
MTurk 42.64% 53.01% 53.45%
Prolific 85.57% 75.00% 83.16%
Toloka 28.37% 57.14% 29.82%
df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  group_by(part_of_day, sample) %>% 
  tally() %>% 
  pivot_wider(names_from=part_of_day, values_from=n) %>% kbl(caption='Number of observations') %>% kable_classic_2
Number of observations
sample Morning (0 till 12pm) Afternoon (12 till 18) Evening (18 till 24)
Besample 101 285 78
CR 118 154 125
Connect 121 179 101
MTurk 129 166 116
Prolific 194 4 196
Toloka 215 7 228
df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  ggbarplot(x = "part_of_day", y='high_attention', facet.by='sample', palette = "jco", position = position_dodge(), add = "mean_ci", ylab = "Count", xlab = "Part of Day", title = "Part of Day (local time)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +scale_y_continuous(labels = scales::percent_format()) -> p1
ggpar(p1, ylim=c(0,1))