Load packages.
rm(list = ls())
library(dplyr)
library(ggplot2)
library(lubridate)
library(broom)
library(knitr)
theme_set(theme_bw())
opts_chunk$set(echo = T, message = F, warning = F,
error = F, cache = F, tidy = F)
Read in data.
d = read.csv("Health Data.csv") %>%
rename(Steps = Steps..count.) %>%
select(Start,Finish,Steps)
d = mutate(d, Start = as.POSIXct(as.character(Start), format = "%d-%b-%Y %H:%M"),
Finish = as.POSIXct(as.character(Finish), format = "%d-%b-%Y %H:%M"),
weekday = wday(Start, label = TRUE),
month = month(Start, label = TRUE),
year = as.factor(year(Start)),
hour = hour(Start),
day = day(Start),
date = date(Start),
hour = hour(Start),
my = format(date, format = '%b %Y'))
by.day = d %>%
group_by(date) %>%
summarize(Steps = sum(Steps))
ggplot(by.day, aes(x = Steps)) +
geom_histogram() +
theme_bw()
by.day.year = d %>%
group_by(date, year) %>%
summarize(Steps = sum(Steps))
ggplot(by.day.year, aes(x = Steps, group = year, fill = year)) +
geom_density(alpha = .2) +
theme_bw()
ggplot(by.day.year, aes(x = Steps, group = year, fill = year)) +
geom_histogram(alpha = .2) +
theme_bw()
by.date = d %>%
group_by(date) %>%
summarize(Steps = sum(Steps)) %>%
mutate(date2 = 1:n())
ggplot(by.date, aes(x = date, y = Steps)) +
geom_point() +
geom_smooth(method = "lm") +
theme_bw()
tidy(lm(Steps ~ date2, by.date))
## term estimate std.error statistic p.value
## 1 (Intercept) 6080.954807 368.584905 16.498111 3.952957e-51
## 2 date2 4.410002 1.004181 4.391642 1.318484e-05
by.my = d %>%
group_by(date, my) %>%
summarize(Steps = sum(Steps)) %>%
group_by(my) %>%
summarize(Steps = mean(Steps)) %>%
left_join(d %>% group_by(my) %>% select(my, date) %>% slice(1), by = "my") %>%
ungroup()
ggplot(by.my, aes(x = date, y = Steps)) +
geom_point() +
geom_line() +
xlab("") +
scale_x_date(date_labels ="%b %Y", date_breaks = "month") +
theme_bw() +
theme(axis.text.x = element_text(size = rel(1), angle = 90))
by.m = d %>%
group_by(date, month) %>%
summarize(Steps = sum(Steps)) %>%
group_by(month) %>%
summarize(Steps = mean(Steps)) %>%
left_join(d %>% group_by(month) %>% select(month, date) %>% slice(1), by = "month")
ggplot(by.m, aes(x = date, y = Steps)) +
geom_point() +
geom_line() +
xlab("") +
scale_x_date(date_labels ="%b", date_breaks = "month") +
theme_bw() +
theme(axis.text.x = element_text(size = rel(1), angle = 90))
by.my2 = d %>%
group_by(date, month, year) %>%
summarize(Steps = sum(Steps)) %>%
group_by(month, year) %>%
summarize(Steps = mean(Steps)) %>%
left_join(d %>% group_by(month, year) %>% select(month, year,date) %>% slice(1), by = c("month", "year"))
ggplot(by.my2, aes(x = month, y = Steps, group = year, color = year)) +
geom_point() +
geom_line() +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(size = rel(1), angle = 90))
by.d = d %>%
group_by(date, weekday) %>%
summarize(Steps = sum(Steps)) %>%
group_by(weekday) %>%
summarize(Steps = mean(Steps)) %>%
left_join(d %>% group_by(weekday) %>% select(weekday, date) %>% slice(1), by = "weekday")
ggplot(by.d, aes(x = date, y = Steps)) +
geom_point() +
geom_line() +
xlab("") +
scale_x_date(date_labels ="%a", date_breaks = "day") +
theme_bw()
by.h = d %>%
group_by(hour) %>%
summarize(Steps = mean(Steps))
ggplot(by.h, aes(x = hour, y = Steps)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks =seq(0,24,2)) +
xlab("") +
theme_bw()
by.h = d %>%
group_by(hour, weekday) %>%
summarize(Steps = mean(Steps))
ggplot(by.h, aes(x = hour, y = Steps, gorup = weekday, color = weekday)) +
geom_point() +
geom_line() +
xlab("") +
scale_x_continuous(breaks =seq(0,24,2)) +
geom_vline(aes(xintercept = 12)) +
geom_vline(aes(xintercept = 17)) +
geom_vline(aes(xintercept = 8)) +
theme_bw()
by.hy = d %>%
group_by(year, hour) %>%
summarize(Steps = mean(Steps))
ggplot(by.hy, aes(x = hour, y = Steps, group = year, color = as.factor(year))) +
geom_point() +
geom_line() +
scale_x_continuous(breaks =seq(0,24,2)) +
geom_vline(aes(xintercept = 12)) +
geom_vline(aes(xintercept = 17)) +
geom_vline(aes(xintercept = 8)) +
xlab("") +
theme_bw()
by.hy = d %>%
filter(!is.element(weekday, c("Sat", "Sun"))) %>%
group_by(year, hour) %>%
summarize(Steps = mean(Steps))
ggplot(by.hy, aes(x = hour, y = Steps, group = year, color = as.factor(year))) +
geom_point() +
geom_line() +
scale_x_continuous(breaks =seq(0,24,2)) +
geom_vline(aes(xintercept = 12)) +
geom_vline(aes(xintercept = 17)) +
geom_vline(aes(xintercept = 8)) +
xlab("") +
theme_bw()