Load packages.

rm(list = ls())

library(dplyr)
library(ggplot2)
library(lubridate)
library(broom)
library(knitr)


theme_set(theme_bw())

opts_chunk$set(echo = T, message = F, warning = F, 
               error = F, cache = F, tidy = F)

Read in data.

d = read.csv("Health Data.csv") %>%
  rename(Steps = Steps..count.) %>%
  select(Start,Finish,Steps)

d = mutate(d, Start = as.POSIXct(as.character(Start), format = "%d-%b-%Y %H:%M"),
           Finish = as.POSIXct(as.character(Finish), format = "%d-%b-%Y %H:%M"),
           weekday = wday(Start, label = TRUE),
           month = month(Start, label = TRUE),
           year = as.factor(year(Start)),
           hour = hour(Start),
           day = day(Start),
           date = date(Start),
           hour = hour(Start),
           my = format(date, format = '%b %Y'))

Steps by day

by.day = d %>%
    group_by(date) %>%
    summarize(Steps = sum(Steps))

ggplot(by.day, aes(x = Steps)) +
  geom_histogram() +
  theme_bw()

Steps by day + year

by.day.year = d %>%
    group_by(date, year) %>%
    summarize(Steps = sum(Steps))

ggplot(by.day.year, aes(x = Steps, group = year, fill = year)) +
  geom_density(alpha = .2) +
  theme_bw()

ggplot(by.day.year, aes(x = Steps, group = year, fill = year)) +
  geom_histogram(alpha = .2) +
  theme_bw()

Steps by day + year

by.date = d %>%
    group_by(date) %>%
    summarize(Steps = sum(Steps)) %>%
    mutate(date2 = 1:n())

ggplot(by.date, aes(x = date, y = Steps)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_bw()

tidy(lm(Steps ~ date2, by.date))
##          term    estimate  std.error statistic      p.value
## 1 (Intercept) 6080.954807 368.584905 16.498111 3.952957e-51
## 2       date2    4.410002   1.004181  4.391642 1.318484e-05

Steps by month + year

by.my = d %>%
    group_by(date, my) %>%
    summarize(Steps = sum(Steps)) %>%
    group_by(my) %>%
    summarize(Steps = mean(Steps)) %>%
    left_join(d %>% group_by(my) %>% select(my, date) %>% slice(1), by = "my") %>%
    ungroup()
    
ggplot(by.my, aes(x = date, y = Steps)) +
  geom_point() +
  geom_line() +
  xlab("") + 
  scale_x_date(date_labels ="%b %Y", date_breaks = "month") +
  theme_bw() +
  theme(axis.text.x = element_text(size = rel(1), angle = 90)) 

Steps by month

by.m = d %>%
    group_by(date, month) %>%
    summarize(Steps = sum(Steps)) %>%
    group_by(month) %>%
    summarize(Steps = mean(Steps)) %>%
    left_join(d %>% group_by(month) %>% select(month, date) %>% slice(1), by = "month")
    
ggplot(by.m, aes(x = date, y = Steps)) +
  geom_point() +
  geom_line() +
  xlab("") + 
  scale_x_date(date_labels ="%b", date_breaks = "month") +
  theme_bw() +
  theme(axis.text.x = element_text(size = rel(1), angle = 90)) 

Steps by month / year

by.my2 = d %>%
    group_by(date, month, year) %>%
    summarize(Steps = sum(Steps)) %>%
    group_by(month, year) %>%
    summarize(Steps = mean(Steps)) %>%
    left_join(d %>% group_by(month, year) %>% select(month, year,date) %>% slice(1), by = c("month", "year"))
    
ggplot(by.my2, aes(x = month, y = Steps, group = year, color = year)) +
  geom_point() +
  geom_line() +
  xlab("") + 
  theme_bw() +
  theme(axis.text.x = element_text(size = rel(1), angle = 90)) 

Steps by day of the week

by.d = d %>%
    group_by(date, weekday) %>%
    summarize(Steps = sum(Steps)) %>%
    group_by(weekday) %>%
    summarize(Steps = mean(Steps)) %>%
    left_join(d %>% group_by(weekday) %>% select(weekday, date) %>% slice(1), by = "weekday")
    
ggplot(by.d, aes(x = date, y = Steps)) +
  geom_point() +
  geom_line() +
  xlab("") + 
  scale_x_date(date_labels ="%a", date_breaks = "day") +
  theme_bw()

Steps by hour

by.h = d %>%
    group_by(hour) %>%
    summarize(Steps = mean(Steps)) 
    
ggplot(by.h, aes(x = hour, y = Steps)) +
  geom_point() +
  geom_line() +
  scale_x_continuous(breaks =seq(0,24,2)) +
  xlab("") + 
  theme_bw()

Steps by hour + weekday

by.h = d %>%
    group_by(hour, weekday) %>%
    summarize(Steps = mean(Steps)) 
    
ggplot(by.h, aes(x = hour, y = Steps, gorup = weekday, color = weekday)) +
  geom_point() +
  geom_line() +
  xlab("") + 
  scale_x_continuous(breaks =seq(0,24,2)) +
  geom_vline(aes(xintercept = 12)) +
  geom_vline(aes(xintercept = 17)) +
  geom_vline(aes(xintercept = 8)) +
  theme_bw()

Steps by hour

by.hy = d %>%
    group_by(year, hour) %>%
    summarize(Steps = mean(Steps)) 
    
ggplot(by.hy, aes(x = hour, y = Steps, group = year, color = as.factor(year))) +
  geom_point() +
  geom_line() +
  scale_x_continuous(breaks =seq(0,24,2)) +
  geom_vline(aes(xintercept = 12)) +
  geom_vline(aes(xintercept = 17)) +
  geom_vline(aes(xintercept = 8)) +
  xlab("") + 
  theme_bw()

Steps by hour - workweek only

by.hy = d %>%
    filter(!is.element(weekday, c("Sat", "Sun"))) %>%
    group_by(year, hour) %>%
    summarize(Steps = mean(Steps))
    
ggplot(by.hy, aes(x = hour, y = Steps, group = year, color = as.factor(year))) +
  geom_point() +
  geom_line() +
  scale_x_continuous(breaks =seq(0,24,2)) +
  geom_vline(aes(xintercept = 12)) +
  geom_vline(aes(xintercept = 17)) +
  geom_vline(aes(xintercept = 8)) +
  xlab("") + 
  theme_bw()