Dataset

The time spent per User per day was calculated by finding the difference between the first time of the day and the last time of the day.

The daily time difference was then summed together by group of each unique user (study_id) to get the total day spent per unique User.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(lubridate)
library(knitr)

setwd("/Users/owad/Desktop/Work")
df <- read.csv("CLEANED_Events Tab.csv") %>%
    mutate(date = as.Date(timestamp))
head(df)

##   event_id              name           timestamp                       note
## 1      817 StartedEnrollment 2019-10-03 04:41:02   /enrollment/get-started/
## 2      820   ScreenerStarted 2019-10-03 04:41:03     /enrollment/step-one/1
## 3      826 ScreenerCompleted 2019-10-03 04:41:30    /enrollment/step-one/36
## 4      829   AcceptedConsent 2019-10-03 04:42:27       /enrollment/consent/
## 5      832    AccountCreated 2019-10-03 04:43:01 /enrollment/account-setup/
## 6      833      UserLoggedIn 2019-10-03 04:43:01 /enrollment/account-setup/
##   study_id       ppt_timestamp Real       date
## 1        7 2019-10-03 00:41:02    1 2019-10-03
## 2        7 2019-10-03 00:41:03    1 2019-10-03
## 3        7 2019-10-03 00:41:30    1 2019-10-03
## 4        7 2019-10-03 00:42:27    1 2019-10-03
## 5        7 2019-10-03 00:43:01    1 2019-10-03
## 6        7 2019-10-03 00:43:01    1 2019-10-03

Summary Table

df_summary <- df %>%
    group_by(study_id, date) %>%
    summarize(time_diff = difftime(last(timestamp), first(timestamp), units = "mins"))

## `summarise()` has grouped output by 'study_id'. You can override using the
## `.groups` argument.

## Time Spent per Day per User
head(df_summary)

## # A tibble: 6 × 3
## # Groups:   study_id [1]
##   study_id date       time_diff     
##      <int> <date>     <drtn>        
## 1        7 2019-10-03  24.06667 mins
## 2        7 2020-01-01   0.00000 mins
## 3        7 2020-01-06   0.00000 mins
## 4        7 2020-01-07 788.35000 mins
## 5        7 2020-03-30   0.00000 mins
## 6        7 2020-08-10   0.00000 mins

df2 <- df_summary %>%
    group_by(study_id) %>%
    summarize(total_time = sum(time_diff))

## Total Time spent per User
head(df2)

## # A tibble: 6 × 2
##   study_id total_time    
##      <int> <drtn>        
## 1        7 825.03333 mins
## 2        8  12.90000 mins
## 3        9  50.90000 mins
## 4       10 209.26667 mins
## 5       11  82.18333 mins
## 6       12 552.25000 mins

Summary Statistics for User Time

sum_stat <- df2 %>%
    summarize(mean = mean(total_time),
        std = sd(total_time),
        "25%" = quantile(total_time, probs = 0.25),
        "50%" = quantile(total_time, probs = 0.5),
        "75%" = quantile(total_time, probs = 0.75))
kable(sum_stat, caption = "Statistics Table", align = c("c", "c", "c", "c"))

Statistics Table
mean	std	25%	50%	75%
404.1509 mins	466.0054	25.14167 mins	227.7667 mins	641.1333 mins

Summary Statistics for User Times

Dataset

Summary Table

Summary Statistics for User Time