Setup

# read data
D <- read.csv("//datastore01.psy.miami.edu/Groups/AHeller_Lab/Undergrad/Measuring Daily Life (Exam EMA)/RawData/Premed5000s/Premed5000s_NonDense_2020-10-12.csv")

# process dates
D$Date <- ymd_hms(D$StartDate, tz = "America/New_York")
D$Date <- floor_date(D$Date, unit = "day")
D <- D[order(D$Date),]

# make summary df
df <- data.frame(date = unique(D$Date), nResp = NA)
for (i in 1:length(unique(D$Date))) {
  date <- unique(D$Date)[i]
  df$nResp[i] <- nrow(D[which(D$Date == date),])
}

# bin width = 2 days
for (i in seq(1,nrow(df),2)) {
    df$nResp[i] <- df$nResp[i] + df$nResp[i + 1] 
    df$nResp[i + 1] <- 0
}
df <- df[-c(which(df$nResp == 0)),]

Plots

# plot nResp x date
df.gg <- ggplot(df, aes(x = date, y = nResp)) + 
  geom_line() + 
  ylab("N Responses") +
  xlab("Date (2-day bins)") + 
  ggtitle("Number of Responses over time \n Including Dense Days")

plot(df.gg)

# exclude sept 12, sept 28, oct 4 (dense dates)
df.reg_nd <- df[-c(6, 13, 16),]

df.reg_nd.gg <- ggplot(df.reg_nd, aes(x = date, y = nResp)) + 
  geom_line() + 
  ylab("N Responses") +
  xlab("Date (2-day bins)") + 
  ggtitle("Number of Responses over time \n Excluding Dense Days")

plot(df.reg_nd.gg)