library("rio")
library("data.table")
library("ggplot2")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(chron)
library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.1 v purrr 0.3.2
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
SleepData <- read.csv(file="C:/Users/innymp/Downloads/sleepdata_Final.csv", header=TRUE, sep=";")
SleepData$Start <- as.Date(SleepData$Start)
SleepData$End <- as.Date(SleepData$End)
# SleepData$Hour <- as.numeric(SleepData$Hour)
# SleepData$Minutes <- as.numeric(SleepData$Minutes)
# SleepData$Time.in.bed <- times(SleepData$Time.in.bed)
# SleepData$Time.in.bed <- 60 * hours(SleepData$Time.in.bed) + minutes(SleepData$Time.in.bed)
SleepData <- SleepData %>% separate(Time.in.bed, into = c("Hour", "Minutes"))
SleepData$Hour <- as.numeric(SleepData$Hour)
SleepData$Minutes <- as.numeric(SleepData$Minutes)
SleepData <- mutate(SleepData, minutes = Hour*60 + Minutes)
SleepData %>% select(-c(Hour,Minutes))
## Start End Sleep.quality Wake.up Sleep.Notes Heart.rate
## 1 2019-05-03 2019-05-04 71% NA NA 0
## 2 2019-05-04 2019-05-05 73% NA NA 0
## 3 2019-05-05 2019-05-06 75% NA NA 0
## 4 2019-05-06 2019-05-07 81% NA NA 0
## 5 2019-05-08 2019-05-08 0% NA NA 0
## 6 2019-05-09 2019-05-09 7% NA NA 0
## 7 2019-05-09 2019-05-10 100% NA NA 0
## 8 2019-05-10 2019-05-11 100% NA NA 0
## 9 2019-05-11 2019-05-11 4% NA NA 0
## 10 2019-05-11 2019-05-12 75% NA NA 0
## 11 2019-05-12 2019-05-13 77% NA NA 0
## 12 2019-05-13 2019-05-13 8% NA NA 0
## 13 2019-05-14 2019-05-15 88% NA NA 0
## 14 2019-05-15 2019-05-16 99% NA NA 0
## 15 2019-05-16 2019-05-17 86% NA NA 0
## 16 2019-05-17 2019-05-18 94% NA NA 0
## 17 2019-05-20 2019-05-21 95% NA NA 0
## 18 2019-05-21 2019-05-22 85% NA NA 0
## 19 2019-05-22 2019-05-23 78% NA NA 0
## 20 2019-05-23 2019-05-24 85% NA NA 0
## 21 2019-05-24 2019-05-25 99% NA NA 0
## 22 2019-05-25 2019-05-26 77% NA NA 0
## 23 2019-05-28 2019-05-29 92% NA NA 0
## 24 2019-05-29 2019-05-30 90% NA NA 0
## 25 2019-05-30 2019-05-31 90% NA NA 0
## 26 2019-05-31 2019-06-01 87% NA NA 0
## Activity..steps. minutes
## 1 0 481
## 2 0 485
## 3 0 511
## 4 0 469
## 5 0 0
## 6 0 31
## 7 0 495
## 8 0 522
## 9 0 20
## 10 0 451
## 11 0 522
## 12 0 37
## 13 0 500
## 14 0 504
## 15 0 500
## 16 0 509
## 17 0 520
## 18 0 510
## 19 0 508
## 20 0 492
## 21 0 497
## 22 0 447
## 23 0 528
## 24 0 464
## 25 0 483
## 26 0 463
SleepData$minutes <- as.numeric(SleepData$minutes)
SleepData <- filter(SleepData,minutes > 60)
SleepData <- select (SleepData,-c("Hour","Minutes","Wake.up","Sleep.Notes","Heart.rate","Activity..steps."))
SleepData$Sleep.quality<- as.numeric(sub("%","",SleepData$Sleep.quality))
plot1 <- ggplot(SleepData, aes(x = Start, y = minutes,col = Sleep.quality)) + geom_jitter(size = 5, stat="identity") + labs(title = "Minutes of Sleeping During May with Sleeping Quality Colored", x = "Date in May", y = "Sleeping Time in Minutes")
line <- geom_hline(yintercept=437, linetype="dashed", color = "red")
plot2 <- plot1 + line + labs(fill = "Sleep Quality in Percent", caption = "Red dashed line represents an American average sleeping time.") + theme_dark() + theme(panel.border = element_blank(), plot.caption = element_text(hjust = 0.5),panel.grid.minor = element_blank())
SleepData$Day <- format(as.Date(SleepData$Start,format="%Y-%m-%d"), "%d")
library(dplyr)
library(ggplot2)
day = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
sleep_minute <- c("292","143","272", "435","320","307","318")
sleep1<-cbind(day,sleep_minute)
library("XML")
library("methods")
library("tidyverse")
library("lubridate")
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:chron':
##
## days, hours, minutes, seconds, years
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library("ggthemes")
library("ggplot2")
#unzip the file
path <- "C:/Users/innymp/Downloads"
zip <- paste(path,'export_final.zip', sep = '/')
unzip(zip, exdir = path)
Sys.sleep(3)
list.files(paste0(path,'/apple_health_export'))
## [1] "export.xml" "export_cda.xml"
xml <- xmlParse(paste0(path, '/apple_health_export/export.xml'))
summary(xml)
## $nameCounts
##
## Record ExportDate HealthData Me
## 14749 1 1 1
##
## $numNodes
## [1] 14752
xml_data<-xmlToList(xml)
df_record <- XML:::xmlAttrsToDataFrame(xml["//Record"])
df <- df_record %>%
mutate(device = gsub(".*(name:)|,.*", "",device),
value = as.numeric(as.character(value)),
endDate = ymd_hms(endDate,tz="America/Los_Angeles"),
date = date(endDate),
year = year(endDate),
month = month(endDate),
day = day(endDate),
yday = yday(endDate),
wday = wday(endDate),
hour = hour(endDate),
minute = minute(endDate),
type = str_remove(type, "HKQuantityTypeIdentifier")
)
## Date in ISO8601 format; converting timezone from UTC to "America/Los_Angeles".
xml_unlist<-unlist(xml_data)
Record.type<-as.vector(xml_unlist[grep("Record.type|Record..attrs.type",names(xml_unlist))])
Record.unit<-as.vector(xml_unlist[grep("Record.unit|Record..attrs.unit",names(xml_unlist))])
Record.value<-as.vector(xml_unlist[grep("Record.value|Record..attrs.value",names(xml_unlist))])
Record.creationDate<-as.vector(xml_unlist[grep("Record.creationDate|Record..attrs.creationDate",names(xml_unlist))])
healthData<-data.frame(cbind(Record.creationDate,Record.type,Record.unit,Record.value),stringsAsFactors=F)
healthData$Record.value<-as.numeric(healthData$Record.value)
healthData<-healthData %>% separate(Record.creationDate,c('date','time','misc'),sep=" ")
healthData<-healthData %>% separate(date,c('year','month','day'),sep="-")
healthData$Record.type<-gsub('HKQuantityTypeIdentifier',"",healthData$Record.type)
healthData_summary<-arrange(summarise(group_by(healthData,Record.type,year,month,day),sum=sum(Record.value)),desc(year)) %>% data.frame()
healthData_summary_May <- healthData_summary[c(123:151),]
summary(healthData_summary_May)
## Record.type year month
## Length:29 Length:29 Length:29
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## day sum
## Length:29 Min. : 2.037
## Class :character 1st Qu.: 4.453
## Mode :character Median : 5.354
## Mean : 5.779
## 3rd Qu.: 7.099
## Max. :11.239
plot3<- ggplot(healthData_summary_May,aes(x=day,y=sum, colour = "blue", fill = "blue"))+geom_bar(stat="identity")+
facet_grid(.~month)+geom_text(aes(label=round(sum,2), color = "blue"),position=position_dodge(width=0.9),vjust=-0.25) + theme_dark() + theme(plot.title = element_text(hjust = 0.5)) +
labs(title="Total Steps per Day for May 2019",x="Day",y="Total Distance (mile)") + theme(legend.position = "none",panel.grid.minor = element_blank(),panel.grid.major = element_blank())
#plot4 <- plot3 + geom_density(aes(x = day, y= sum),healthData_summary_May)
#plot4 <-plot3 + geom_density(aes(x=day,y=sum,group = 1,color = "blue"),stat = "identity")
#plot3
healthData_summary_May_selected_1 <- healthData_summary_May[-c(5,6,11,16,17,24,25),]
steps_selected <- healthData_summary_May_selected_1[,c(5)]
sleep_selected <- SleepData[,c(3,5)]
steps_Sleep_selected <- cbind(steps_selected,sleep_selected)
names(steps_Sleep_selected)[1] <- "Miles.Walked"
plot5 <- ggplot(steps_Sleep_selected, aes(x=Day,y = Sleep.quality,col = Miles.Walked))+ geom_point(size=5) + labs(title = "Sleep Quality vs. Number of Miles Walked per Day in May 2019",x = "Day in May 2019",y = "Sleep Quality in Percent",caption = "The dashed line represents the American average sleep quality in percent")
line2 <- geom_hline(yintercept=72, linetype="dashed", color = "red",size = 2)
plot6 <- plot5 + line2 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme_dark()
#plot6
Mon <- mean(steps_Sleep_selected[c(4,13),2])
Tue <- mean(steps_Sleep_selected[c(9,14,19),2])
Wed <- mean(steps_Sleep_selected[c(10,15,20),2])
Thr <- mean(steps_Sleep_selected[c(5,11,16,21),2])
Fri <- mean(steps_Sleep_selected[c(1,6,12,17,22),2])
Sat <- mean(steps_Sleep_selected[c(2,7,18),2])
Sun <- mean(steps_Sleep_selected[c(3,8),2])
mean_all <- c(Fri,Sat,Sun,Mon,Tue,Wed,Thr)
library(dplyr)
library(ggplot2)
day = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
screenTimeWeekly_minute <- c("292","143","272", "435","320","307","318")
screenTimeWeekly_minute_number <- as.numeric(screenTimeWeekly_minute)
colnames(sleep1)<- c("day","screenTimeWeekly_minute")
sleep <- data.frame(matrix(ncol=2,nrow = 7))
sleep1<-cbind(day,screenTimeWeekly_minute)
sleep.data <- as.data.frame(sleep1)
plot_screen<- ggplot(sleep.data, aes(x= day,y = screenTimeWeekly_minute)) + geom_point(size = 5, col = "blue") + theme_minimal() +
labs(y = "Average Weekly Screen Time (minutes)",x = "Day of the Week",
title = "Average Weekly Screen Time for Each Day") + theme(panel.background = element_rect(fill = "white", colour = "grey50"),axis.text.x = element_text(angle = 45,hjust = 1)) + theme_dark()
#plot_screen
screen_sleep_data <- cbind(sleep.data,mean_all)
plot_screen_sleep <- ggplot(screen_sleep_data, aes(x=day,y = mean_all,col = screenTimeWeekly_minute)) + geom_point(size = 5) + theme_dark() + theme(panel.border = element_blank(), plot.caption = element_text(hjust = 0.5),panel.grid.minor = element_blank()) + labs(y = "Average Sleep Quality (Percent)",x = "Day of the Week",
title = "Average Sleep Quality per Day of the Week (Percent)") + theme(panel.background = element_rect(fill = "grey50", colour = "grey50"),axis.text.x = element_text(angle = 45,hjust = 1))