library("rio")
library("data.table")
library("ggplot2")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(chron)
library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.1     v purrr   0.3.2
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
SleepData <- read.csv(file="C:/Users/innymp/Downloads/sleepdata_Final.csv", header=TRUE, sep=";")

SleepData$Start <- as.Date(SleepData$Start)
SleepData$End <- as.Date(SleepData$End)
# SleepData$Hour <- as.numeric(SleepData$Hour)
# SleepData$Minutes <- as.numeric(SleepData$Minutes)
# SleepData$Time.in.bed <- times(SleepData$Time.in.bed)
# SleepData$Time.in.bed <- 60 * hours(SleepData$Time.in.bed) + minutes(SleepData$Time.in.bed)

SleepData <- SleepData %>% separate(Time.in.bed, into = c("Hour", "Minutes"))

SleepData$Hour <- as.numeric(SleepData$Hour)
SleepData$Minutes <- as.numeric(SleepData$Minutes)
SleepData <- mutate(SleepData, minutes = Hour*60 + Minutes)
SleepData %>% select(-c(Hour,Minutes))
##         Start        End Sleep.quality Wake.up Sleep.Notes Heart.rate
## 1  2019-05-03 2019-05-04           71%      NA          NA          0
## 2  2019-05-04 2019-05-05           73%      NA          NA          0
## 3  2019-05-05 2019-05-06           75%      NA          NA          0
## 4  2019-05-06 2019-05-07           81%      NA          NA          0
## 5  2019-05-08 2019-05-08            0%      NA          NA          0
## 6  2019-05-09 2019-05-09            7%      NA          NA          0
## 7  2019-05-09 2019-05-10          100%      NA          NA          0
## 8  2019-05-10 2019-05-11          100%      NA          NA          0
## 9  2019-05-11 2019-05-11            4%      NA          NA          0
## 10 2019-05-11 2019-05-12           75%      NA          NA          0
## 11 2019-05-12 2019-05-13           77%      NA          NA          0
## 12 2019-05-13 2019-05-13            8%      NA          NA          0
## 13 2019-05-14 2019-05-15           88%      NA          NA          0
## 14 2019-05-15 2019-05-16           99%      NA          NA          0
## 15 2019-05-16 2019-05-17           86%      NA          NA          0
## 16 2019-05-17 2019-05-18           94%      NA          NA          0
## 17 2019-05-20 2019-05-21           95%      NA          NA          0
## 18 2019-05-21 2019-05-22           85%      NA          NA          0
## 19 2019-05-22 2019-05-23           78%      NA          NA          0
## 20 2019-05-23 2019-05-24           85%      NA          NA          0
## 21 2019-05-24 2019-05-25           99%      NA          NA          0
## 22 2019-05-25 2019-05-26           77%      NA          NA          0
## 23 2019-05-28 2019-05-29           92%      NA          NA          0
## 24 2019-05-29 2019-05-30           90%      NA          NA          0
## 25 2019-05-30 2019-05-31           90%      NA          NA          0
## 26 2019-05-31 2019-06-01           87%      NA          NA          0
##    Activity..steps. minutes
## 1                 0     481
## 2                 0     485
## 3                 0     511
## 4                 0     469
## 5                 0       0
## 6                 0      31
## 7                 0     495
## 8                 0     522
## 9                 0      20
## 10                0     451
## 11                0     522
## 12                0      37
## 13                0     500
## 14                0     504
## 15                0     500
## 16                0     509
## 17                0     520
## 18                0     510
## 19                0     508
## 20                0     492
## 21                0     497
## 22                0     447
## 23                0     528
## 24                0     464
## 25                0     483
## 26                0     463
SleepData$minutes <- as.numeric(SleepData$minutes)
SleepData <- filter(SleepData,minutes > 60)
SleepData <- select (SleepData,-c("Hour","Minutes","Wake.up","Sleep.Notes","Heart.rate","Activity..steps."))


SleepData$Sleep.quality<- as.numeric(sub("%","",SleepData$Sleep.quality))
plot1 <- ggplot(SleepData, aes(x = Start, y = minutes,col = Sleep.quality)) + geom_jitter(size = 5, stat="identity") + labs(title = "Minutes of Sleeping During May with Sleeping Quality Colored", x = "Date in May", y = "Sleeping Time in Minutes") 
line <- geom_hline(yintercept=437, linetype="dashed", color = "red") 

plot2 <- plot1 + line + labs(fill = "Sleep Quality in Percent", caption = "Red dashed line represents an American average sleeping time.") + theme_dark() + theme(panel.border = element_blank(), plot.caption = element_text(hjust = 0.5),panel.grid.minor = element_blank()) 
                                                                                
SleepData$Day <- format(as.Date(SleepData$Start,format="%Y-%m-%d"), "%d")

library(dplyr)
library(ggplot2)

day = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
sleep_minute <- c("292","143","272", "435","320","307","318")
sleep1<-cbind(day,sleep_minute)

library("XML")
library("methods")
library("tidyverse")
library("lubridate")
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:chron':
## 
##     days, hours, minutes, seconds, years
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday,
##     week, yday, year
## The following object is masked from 'package:base':
## 
##     date
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library("ggthemes")
library("ggplot2")
#unzip the file
path <- "C:/Users/innymp/Downloads"
zip <- paste(path,'export_final.zip', sep = '/')
unzip(zip, exdir = path)
Sys.sleep(3)
list.files(paste0(path,'/apple_health_export'))
## [1] "export.xml"     "export_cda.xml"
xml <- xmlParse(paste0(path, '/apple_health_export/export.xml'))
summary(xml)
## $nameCounts
## 
##     Record ExportDate HealthData         Me 
##      14749          1          1          1 
## 
## $numNodes
## [1] 14752
xml_data<-xmlToList(xml)

df_record <-   XML:::xmlAttrsToDataFrame(xml["//Record"])

df <- df_record %>%
  mutate(device = gsub(".*(name:)|,.*", "",device),
         value = as.numeric(as.character(value)),
         endDate = ymd_hms(endDate,tz="America/Los_Angeles"),
         date = date(endDate),
         year = year(endDate),
         month = month(endDate),
         day = day(endDate),
         yday = yday(endDate),
         wday = wday(endDate),
         hour = hour(endDate),
         minute = minute(endDate),
         type = str_remove(type, "HKQuantityTypeIdentifier")
  )
## Date in ISO8601 format; converting timezone from UTC to "America/Los_Angeles".
xml_unlist<-unlist(xml_data)

Record.type<-as.vector(xml_unlist[grep("Record.type|Record..attrs.type",names(xml_unlist))])
Record.unit<-as.vector(xml_unlist[grep("Record.unit|Record..attrs.unit",names(xml_unlist))])
Record.value<-as.vector(xml_unlist[grep("Record.value|Record..attrs.value",names(xml_unlist))])
Record.creationDate<-as.vector(xml_unlist[grep("Record.creationDate|Record..attrs.creationDate",names(xml_unlist))])

healthData<-data.frame(cbind(Record.creationDate,Record.type,Record.unit,Record.value),stringsAsFactors=F)
healthData$Record.value<-as.numeric(healthData$Record.value)

healthData<-healthData %>% separate(Record.creationDate,c('date','time','misc'),sep=" ")
healthData<-healthData %>% separate(date,c('year','month','day'),sep="-")
healthData$Record.type<-gsub('HKQuantityTypeIdentifier',"",healthData$Record.type)
healthData_summary<-arrange(summarise(group_by(healthData,Record.type,year,month,day),sum=sum(Record.value)),desc(year)) %>% data.frame()
healthData_summary_May <- healthData_summary[c(123:151),]
summary(healthData_summary_May)
##  Record.type            year              month          
##  Length:29          Length:29          Length:29         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##      day                 sum        
##  Length:29          Min.   : 2.037  
##  Class :character   1st Qu.: 4.453  
##  Mode  :character   Median : 5.354  
##                     Mean   : 5.779  
##                     3rd Qu.: 7.099  
##                     Max.   :11.239
plot3<- ggplot(healthData_summary_May,aes(x=day,y=sum, colour = "blue", fill = "blue"))+geom_bar(stat="identity")+
  facet_grid(.~month)+geom_text(aes(label=round(sum,2), color = "blue"),position=position_dodge(width=0.9),vjust=-0.25) + theme_dark() + theme(plot.title = element_text(hjust = 0.5)) + 
  labs(title="Total Steps per Day for May 2019",x="Day",y="Total Distance (mile)") + theme(legend.position = "none",panel.grid.minor = element_blank(),panel.grid.major = element_blank())
#plot4 <- plot3 + geom_density(aes(x = day, y= sum),healthData_summary_May)
#plot4 <-plot3 + geom_density(aes(x=day,y=sum,group = 1,color = "blue"),stat = "identity")
#plot3

healthData_summary_May_selected_1 <- healthData_summary_May[-c(5,6,11,16,17,24,25),] 

steps_selected <- healthData_summary_May_selected_1[,c(5)]
sleep_selected <- SleepData[,c(3,5)]
steps_Sleep_selected <- cbind(steps_selected,sleep_selected)
names(steps_Sleep_selected)[1] <- "Miles.Walked"
plot5 <- ggplot(steps_Sleep_selected, aes(x=Day,y = Sleep.quality,col = Miles.Walked))+ geom_point(size=5) + labs(title = "Sleep Quality vs. Number of Miles Walked per Day in May 2019",x = "Day in May 2019",y = "Sleep Quality in Percent",caption = "The dashed line represents the American average sleep quality in percent")
line2 <- geom_hline(yintercept=72, linetype="dashed", color = "red",size = 2)
plot6 <- plot5 + line2 + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme_dark()
#plot6
Mon <- mean(steps_Sleep_selected[c(4,13),2])
Tue <- mean(steps_Sleep_selected[c(9,14,19),2])
Wed <- mean(steps_Sleep_selected[c(10,15,20),2])
Thr <- mean(steps_Sleep_selected[c(5,11,16,21),2])
Fri <- mean(steps_Sleep_selected[c(1,6,12,17,22),2])
Sat <- mean(steps_Sleep_selected[c(2,7,18),2])
Sun <- mean(steps_Sleep_selected[c(3,8),2])
mean_all <- c(Fri,Sat,Sun,Mon,Tue,Wed,Thr)

library(dplyr)
library(ggplot2)

day = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
screenTimeWeekly_minute <- c("292","143","272", "435","320","307","318")
screenTimeWeekly_minute_number <- as.numeric(screenTimeWeekly_minute)


colnames(sleep1)<- c("day","screenTimeWeekly_minute")
sleep <- data.frame(matrix(ncol=2,nrow = 7))
sleep1<-cbind(day,screenTimeWeekly_minute)
sleep.data <- as.data.frame(sleep1)
                 


plot_screen<- ggplot(sleep.data, aes(x= day,y = screenTimeWeekly_minute)) + geom_point(size = 5, col = "blue") + theme_minimal() + 
        labs(y = "Average Weekly Screen Time (minutes)",x = "Day of the Week",
        title = "Average Weekly Screen Time for Each Day") + theme(panel.background = element_rect(fill = "white", colour = "grey50"),axis.text.x = element_text(angle = 45,hjust = 1)) + theme_dark()
#plot_screen

screen_sleep_data <- cbind(sleep.data,mean_all)

plot_screen_sleep <- ggplot(screen_sleep_data, aes(x=day,y = mean_all,col = screenTimeWeekly_minute)) + geom_point(size = 5) + theme_dark() + theme(panel.border = element_blank(), plot.caption = element_text(hjust = 0.5),panel.grid.minor = element_blank()) +  labs(y = "Average Sleep Quality (Percent)",x = "Day of the Week",
                              title = "Average Sleep Quality per Day of the Week (Percent)") + theme(panel.background = element_rect(fill = "grey50", colour = "grey50"),axis.text.x = element_text(angle = 45,hjust = 1))