## Import all required libraries
library(XML)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::intersect() masks base::intersect()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::setdiff() masks base::setdiff()
## ✖ lubridate::union() masks base::union()
library(ggplot2)
library(gganimate)
library(gifski)
library(png)
setwd("~/Documents/R Projects")
## Read my health data exported from my iPhone
xml <- xmlParse("myHealthData.xml")
summary(xml) ## Check the summary
## $nameCounts
##
## Record MetadataEntry ExportDate HealthData Me
## 15783 2 1 1 1
##
## $numNodes
## [1] 15788
## Convert the XML object (Health Records Only) to data frame
myDF <- XML:::xmlAttrsToDataFrame(xml["//Record"])
## Convert record creation date to standard time zone
myDF$creationDate <- ymd_hms(myDF$creationDate, tz="UTC")
## Create new time period features for granular analysis
myDF$month <- format(myDF$creationDate, "%m")
myDF$year <- format(myDF$creationDate, "%Y")
myDF$dayofweek <- wday(myDF$creationDate, label=TRUE, abbr=FALSE)
myDF$hour <- format(myDF$creationDate, "%H")
myDF$date <- format(myDF$creationDate, "%Y-%m-%d")
## Analyze total Step count, Total Flights Climbed, and Total Distance Covered
## 1. Total Steps Count
step_count <- myDF %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(date, year, month) %>%
summarize(step_count = sum(as.numeric(as.character(value))))
## Filter the date part from date-timestamps
step_count$date <- as.Date(step_count$date, "%Y-%m-%d")
## Plot the total number of steps I took every month
plotSteps <- ggplot(step_count, aes(x=date, y=step_count, group=year)) +
geom_line(aes(colour = year)) +
geom_smooth(se = FALSE) +
ggtitle("Total number of Steps - Monthly") +
xlab("Months-Year") +
ylab("Number of Steps") +
scale_x_date(date_labels = "%m-%Y")
print (plotSteps)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Check last year vs this year's step counts per week
plot_weekly_steps <- function(myDF, YEAR) {
weekly_step_count <- myDF %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
filter(year == YEAR) %>%
group_by(dayofweek, year, month) %>%
summarize(weekly_step_count = mean(as.numeric(as.character(value))))
plot_return <- ggplot(weekly_step_count, aes(x=month, y=weekly_step_count, group=dayofweek)) +
geom_line(aes(colour = dayofweek), size = 1) +
ggtitle(paste("Weekly Average Step Count", "For", YEAR))
return (plot_return)
}
plot_weekly_steps(myDF, '2018')

plot_weekly_steps(myDF, '2019')

## Create a gif to see progress per week in each month
plot_weekly_steps(myDF, '2018') +
geom_point() +
transition_reveal(as.numeric(month))
plot_weekly_steps(myDF, '2019') +
geom_point() +
transition_reveal(as.numeric(month))

## Total Flights climbed
totalFlights <- myDF %>%
filter(type == 'HKQuantityTypeIdentifierFlightsClimbed') %>%
group_by(year, month) %>%
summarize(totalFlights = sum(as.numeric(as.character(value)))) %>%
ggplot(aes(x=month, y=totalFlights, fill=year)) +
geom_bar(position='stack', stat='identity') +
scale_y_continuous(labels = scales::comma) +
theme(panel.grid.major = element_blank()) +
ggtitle("Total Flights climbed per month YoY") +
xlab("Months") +
ylab("Total Flights Climbed (Count)")
totalFlights

## Total Distance Covered
totalDistanceCovered <- myDF %>%
filter(type == 'HKQuantityTypeIdentifierDistanceWalkingRunning') %>%
group_by(year, month) %>%
summarize(totalDistanceCovered = sum(as.numeric(as.character(value)))) %>%
ggplot(aes(x=month, y=totalDistanceCovered, fill=year)) +
geom_bar(position='dodge', stat='identity') +
scale_y_continuous(labels = scales::comma) +
theme(panel.grid.major = element_blank()) +
ggtitle("Total Distance covered per month YoY") +
xlab("Months") +
ylab("Total Distance (miles)")
totalDistanceCovered
