## Import all required libraries
library(XML)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ lubridate::intersect()   masks base::intersect()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ lubridate::setdiff()     masks base::setdiff()
## ✖ lubridate::union()       masks base::union()
library(ggplot2)
library(gganimate)
library(gifski)
library(png)

setwd("~/Documents/R Projects")
## Read my health data exported from my iPhone
xml <- xmlParse("myHealthData.xml")
summary(xml) ## Check the summary
## $nameCounts
## 
##        Record MetadataEntry    ExportDate    HealthData            Me 
##         15783             2             1             1             1 
## 
## $numNodes
## [1] 15788
## Convert the XML object (Health Records Only) to data frame
myDF <- XML:::xmlAttrsToDataFrame(xml["//Record"])

## Convert record creation date to standard time zone
myDF$creationDate <- ymd_hms(myDF$creationDate, tz="UTC")

## Create new time period features for granular analysis
myDF$month <- format(myDF$creationDate, "%m")
myDF$year <- format(myDF$creationDate, "%Y")
myDF$dayofweek <- wday(myDF$creationDate, label=TRUE, abbr=FALSE)
myDF$hour <- format(myDF$creationDate, "%H")
myDF$date <- format(myDF$creationDate, "%Y-%m-%d")

## Analyze total Step count, Total Flights Climbed, and Total Distance Covered
## 1. Total Steps Count
step_count <- myDF %>%
  filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
  group_by(date, year, month) %>%
  summarize(step_count = sum(as.numeric(as.character(value))))

## Filter the date part from date-timestamps
step_count$date <- as.Date(step_count$date, "%Y-%m-%d")

## Plot the total number of steps I took every month
plotSteps <- ggplot(step_count, aes(x=date, y=step_count, group=year)) + 
  geom_line(aes(colour = year)) +
  geom_smooth(se = FALSE) +
  ggtitle("Total number of Steps - Monthly") +
  xlab("Months-Year") + 
  ylab("Number of Steps") +
  scale_x_date(date_labels = "%m-%Y")
print (plotSteps)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Check last year vs this year's step counts per week
plot_weekly_steps <- function(myDF, YEAR) {
  weekly_step_count <- myDF %>%
    filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
    filter(year == YEAR) %>% 
    group_by(dayofweek, year, month) %>%
    summarize(weekly_step_count = mean(as.numeric(as.character(value))))
  
  plot_return <- ggplot(weekly_step_count, aes(x=month, y=weekly_step_count, group=dayofweek)) + 
    geom_line(aes(colour = dayofweek), size = 1) +
    ggtitle(paste("Weekly Average Step Count", "For", YEAR))
  
  return (plot_return)
}

plot_weekly_steps(myDF, '2018')

plot_weekly_steps(myDF, '2019')

## Create a gif to see progress per week in each month
plot_weekly_steps(myDF, '2018') +
  geom_point() +
  transition_reveal(as.numeric(month))

plot_weekly_steps(myDF, '2019') +
  geom_point() +
  transition_reveal(as.numeric(month))

## Total Flights climbed
totalFlights <- myDF %>%
  filter(type == 'HKQuantityTypeIdentifierFlightsClimbed') %>%
  group_by(year, month) %>%
  summarize(totalFlights = sum(as.numeric(as.character(value)))) %>%
  ggplot(aes(x=month, y=totalFlights, fill=year)) + 
  geom_bar(position='stack', stat='identity') +
  scale_y_continuous(labels = scales::comma) +
  theme(panel.grid.major = element_blank()) +
  ggtitle("Total Flights climbed per month YoY") +
  xlab("Months") +
  ylab("Total Flights Climbed (Count)")
totalFlights

## Total Distance Covered
totalDistanceCovered <- myDF %>%
  filter(type == 'HKQuantityTypeIdentifierDistanceWalkingRunning') %>%
  group_by(year, month) %>%
  summarize(totalDistanceCovered = sum(as.numeric(as.character(value)))) %>%
  ggplot(aes(x=month, y=totalDistanceCovered, fill=year)) + 
  geom_bar(position='dodge', stat='identity') +
  scale_y_continuous(labels = scales::comma) +
  theme(panel.grid.major = element_blank()) +
  ggtitle("Total Distance covered per month YoY") +
  xlab("Months") +
  ylab("Total Distance (miles)")
totalDistanceCovered