time of the day analysis

Author

Peer, Brandt, Chapkovski

Getting libraries

library(pacman)
p_load(tidyverse, lubridate, readr, ggplot2, scales, lutz,jsonlite, glue, ggpubr, kableExtra)

Time of the day analysis

local_folder='/Users/chapkovski/Documents/peer_brandt_paper/'
fn<-glue('{local_folder}full_data.csv')
data<-read_csv(fn) %>% 
  mutate(StartDate = dmy_hm(StartDate, tz="Asia/Jerusalem"))

Rows: 2517 Columns: 119
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (20): StartDate, EndDate, IPAddress, RecordedDate, ResponseId, Distribut...
dbl (85): Status, Progress, Duration..in.seconds., Finished, LocationLatitud...
lgl (14): RecipientLastName, RecipientFirstName, RecipientEmail, ExternalRef...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Getting country info

#let’s merge country info with current data

data %>%
  rename(country_code=Country) %>% 
  mutate(country_code=as.factor(country_code)) %>% 
  left_join(country_correspondence) %>% 
  mutate(timezone=tz_lookup_coords(LocationLatitude, LocationLongitude)) ->  df_with_countries

Joining with `by = join_by(country_code)`

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `timezone = tz_lookup_coords(LocationLatitude,
  LocationLongitude)`.
Caused by warning:
! Using 'fast' method. This can cause inaccuracies in time zones
  near boundaries away from populated ares. Use the 'accurate'
  method if accuracy is more important than speed.

df_with_countries  %>% 
rowwise() %>%
  mutate(local_time =  with_tz(StartDate, tzone = timezone)) %>% ungroup()->df_with_countries
 

 

df_with_countries <- df_with_countries %>%
  mutate(hour = hour(local_time),
         part_of_day = case_when(
           hour >= 0 & hour < 12 ~ 'Morning (0 till 12pm)',
           hour >= 12 & hour < 18 ~ 'Afternoon (12 till 18)',
           hour >= 18 & hour < 24 ~ 'Evening (18 till 24)'
         ),
         part_of_day = factor(part_of_day, levels = c('Morning (0 till 12pm)', 'Afternoon (12 till 18)', 'Evening (18 till 24)'), ordered = TRUE))
df_with_countries %>% write_csv(glue('{local_folder}df_with_countries.csv'))


df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  group_by(part_of_day, sample) %>% 
  summarise(mean_attention=mean(high_attention)) %>% 
  mutate(mean_attention=percent(mean_attention, accuracy=.01)) %>% 
  pivot_wider(names_from=part_of_day, values_from=mean_attention) %>% 
  kbl(caption='Share of those who paid high attention') %>% kable_classic_2

`summarise()` has grouped output by 'part_of_day'. You can override using the
`.groups` argument.

Share of those who paid high attention
sample	Morning (0 till 12pm)	Afternoon (12 till 18)	Evening (18 till 24)
Besample	41.58%	36.14%	24.36%
CR	85.59%	91.56%	91.20%
Connect	90.91%	92.18%	92.08%
MTurk	42.64%	53.01%	53.45%
Prolific	85.57%	75.00%	83.16%
Toloka	28.37%	57.14%	29.82%

df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  group_by(part_of_day, sample) %>% 
  tally() %>% 
  pivot_wider(names_from=part_of_day, values_from=n) %>% kbl(caption='Number of observations') %>% kable_classic_2

Number of observations
sample	Morning (0 till 12pm)	Afternoon (12 till 18)	Evening (18 till 24)
Besample	101	285	78
CR	118	154	125
Connect	121	179	101
MTurk	129	166	116
Prolific	194	4	196
Toloka	215	7	228

df_with_countries %>% 
  mutate(high_attention=if_else(attention.level=='High',1,0)) %>%
  ggbarplot(x = "part_of_day", y='high_attention', facet.by='sample', palette = "jco", position = position_dodge(), add = "mean_ci", ylab = "Count", xlab = "Part of Day", title = "Part of Day (local time)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +scale_y_continuous(labels = scales::percent_format()) -> p1
ggpar(p1, ylim=c(0,1))