The Sleep Efficiency Dataset has 452 observations of individuals of their sleeping habits, including Age, Gender, Bedtime, Wake-up time, Sleep duration, Sleep efficiency, REM sleep percentage, Deep sleep percentage, Light sleep percentage. The dataset also includes lifestyle habits that can influence sleeping patterns: Number of awakenings, Caffeine and / or Alcohol usage, Smoking status and Exercise frequency. Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency
library(lubridate)
library(dplyr)
library(scales)
library(ggthemes)
library(ggplot2)
library(ggrepel)
library(plotly)
library(reshape2)
sleep_df <- read.csv("Sleep_Efficiency.csv",row.names = "ID")
str(sleep_df)
## 'data.frame': 452 obs. of 14 variables:
## $ Age : int 65 69 40 40 57 36 27 53 41 11 ...
## $ Gender : chr "Female" "Male" "Female" "Female" ...
## $ Bedtime : chr "2021-03-06 01:00:00" "2021-12-05 02:00:00" "2021-05-25 21:30:00" "2021-11-03 02:30:00" ...
## $ Wakeup.time : chr "2021-03-06 07:00:00" "2021-12-05 09:00:00" "2021-05-25 05:30:00" "2021-11-03 08:30:00" ...
## $ Sleep.duration : num 6 7 8 6 8 7.5 6 10 6 9 ...
## $ Sleep.efficiency : num 0.88 0.66 0.89 0.51 0.76 0.9 0.54 0.9 0.79 0.55 ...
## $ REM.sleep.percentage : int 18 24 20 28 27 28 28 28 28 18 ...
## $ Deep.sleep.percentage : int 70 28 70 25 55 60 25 57 60 35 ...
## $ Light.sleep.percentage: int 10 53 10 52 18 17 52 20 17 45 ...
## $ Awakenings : num 0 3 1 3 3 0 2 0 3 4 ...
## $ Caffeine.consumption : num 0 0 0 50 0 NA 50 50 50 0 ...
## $ Alcohol.consumption : num 0 3 0 5 3 0 0 0 0 3 ...
## $ Smoking.status : chr "Yes" "Yes" "No" "Yes" ...
## $ Exercise.frequency : num 3 3 3 1 3 1 1 3 1 0 ...
summary(sleep_df)
## Age Gender Bedtime Wakeup.time
## Min. : 9.00 Length:452 Length:452 Length:452
## 1st Qu.:29.00 Class :character Class :character Class :character
## Median :40.00 Mode :character Mode :character Mode :character
## Mean :40.29
## 3rd Qu.:52.00
## Max. :69.00
##
## Sleep.duration Sleep.efficiency REM.sleep.percentage Deep.sleep.percentage
## Min. : 5.000 Min. :0.5000 Min. :15 Min. :20.00
## 1st Qu.: 7.000 1st Qu.:0.6975 1st Qu.:20 1st Qu.:51.25
## Median : 7.500 Median :0.8200 Median :22 Median :60.00
## Mean : 7.466 Mean :0.7889 Mean :23 Mean :52.96
## 3rd Qu.: 8.000 3rd Qu.:0.9000 3rd Qu.:27 3rd Qu.:63.00
## Max. :10.000 Max. :0.9900 Max. :30 Max. :75.00
##
## Light.sleep.percentage Awakenings Caffeine.consumption
## Min. : 7.00 Min. :0.000 Min. : 0.00
## 1st Qu.:15.00 1st Qu.:1.000 1st Qu.: 0.00
## Median :18.00 Median :1.000 Median : 25.00
## Mean :24.83 Mean :1.641 Mean : 23.65
## 3rd Qu.:27.25 3rd Qu.:3.000 3rd Qu.: 50.00
## Max. :56.00 Max. :4.000 Max. :200.00
## NA's :20 NA's :25
## Alcohol.consumption Smoking.status Exercise.frequency
## Min. :0.000 Length:452 Min. :0.000
## 1st Qu.:0.000 Class :character 1st Qu.:0.000
## Median :0.000 Mode :character Median :2.000
## Mean :1.245 Mean :1.791
## 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :5.000 Max. :5.000
## NA's :16 NA's :6
sleep2 <- sleep_df%>%
mutate(Gender = recode(Gender,
"Female" = "0",
"Male" = "1",),
Smoking.status = recode(Smoking.status,
"Yes" = "1",
"No" = "0"),
Bedtime = hour(ymd_hms(Bedtime)),
Wakeup.time = hour(ymd_hms(Wakeup.time)))%>%
mutate(Gender = as.numeric(Gender),
Smoking.status = as.numeric(Smoking.status))%>%
data.frame()
str(sleep2)
## 'data.frame': 452 obs. of 14 variables:
## $ Age : int 65 69 40 40 57 36 27 53 41 11 ...
## $ Gender : num 0 1 0 0 1 0 0 1 0 0 ...
## $ Bedtime : int 1 2 21 2 1 21 21 0 2 1 ...
## $ Wakeup.time : int 7 9 5 8 9 4 3 10 8 10 ...
## $ Sleep.duration : num 6 7 8 6 8 7.5 6 10 6 9 ...
## $ Sleep.efficiency : num 0.88 0.66 0.89 0.51 0.76 0.9 0.54 0.9 0.79 0.55 ...
## $ REM.sleep.percentage : int 18 24 20 28 27 28 28 28 28 18 ...
## $ Deep.sleep.percentage : int 70 28 70 25 55 60 25 57 60 35 ...
## $ Light.sleep.percentage: int 10 53 10 52 18 17 52 20 17 45 ...
## $ Awakenings : num 0 3 1 3 3 0 2 0 3 4 ...
## $ Caffeine.consumption : num 0 0 0 50 0 NA 50 50 50 0 ...
## $ Alcohol.consumption : num 0 3 0 5 3 0 0 0 0 3 ...
## $ Smoking.status : num 1 1 0 1 0 0 1 1 0 1 ...
## $ Exercise.frequency : num 3 3 3 1 3 1 1 3 1 0 ...
cormat <- round(cor(sleep2),2)
head(cormat)
## Age Gender Bedtime Wakeup.time Sleep.duration
## Age 1.00 0.24 -0.07 0.08 -0.06
## Gender 0.24 1.00 -0.03 0.01 -0.04
## Bedtime -0.07 -0.03 1.00 -0.77 -0.11
## Wakeup.time 0.08 0.01 -0.77 1.00 0.51
## Sleep.duration -0.06 -0.04 -0.11 0.51 1.00
## Sleep.efficiency 0.10 0.01 0.12 -0.14 -0.03
## Sleep.efficiency REM.sleep.percentage Deep.sleep.percentage
## Age 0.10 -0.02 0.02
## Gender 0.01 -0.15 0.06
## Bedtime 0.12 -0.07 0.06
## Wakeup.time -0.14 -0.05 -0.08
## Sleep.duration -0.03 0.01 -0.03
## Sleep.efficiency 1.00 0.09 0.79
## Light.sleep.percentage Awakenings Caffeine.consumption
## Age -0.04 NA NA
## Gender -0.05 NA NA
## Bedtime -0.09 NA NA
## Wakeup.time 0.11 NA NA
## Sleep.duration 0.03 NA NA
## Sleep.efficiency -0.82 NA NA
## Alcohol.consumption Smoking.status Exercise.frequency
## Age NA -0.04 NA
## Gender NA 0.11 NA
## Bedtime NA -0.23 NA
## Wakeup.time NA 0.21 NA
## Sleep.duration NA 0.03 NA
## Sleep.efficiency NA -0.33 NA
melted_cormat <- melt(cormat)
head(melted_cormat)
## Var1 Var2 value
## 1 Age Age 1.00
## 2 Gender Age 0.24
## 3 Bedtime Age -0.07
## 4 Wakeup.time Age 0.08
## 5 Sleep.duration Age -0.06
## 6 Sleep.efficiency Age 0.10
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
lower_tri <- get_lower_tri(cormat)
lower_tri
## Age Gender Bedtime Wakeup.time Sleep.duration
## Age 1.00 NA NA NA NA
## Gender 0.24 1.00 NA NA NA
## Bedtime -0.07 -0.03 1.00 NA NA
## Wakeup.time 0.08 0.01 -0.77 1.00 NA
## Sleep.duration -0.06 -0.04 -0.11 0.51 1.00
## Sleep.efficiency 0.10 0.01 0.12 -0.14 -0.03
## REM.sleep.percentage -0.02 -0.15 -0.07 -0.05 0.01
## Deep.sleep.percentage 0.02 0.06 0.06 -0.08 -0.03
## Light.sleep.percentage -0.04 -0.05 -0.09 0.11 0.03
## Awakenings NA NA NA NA NA
## Caffeine.consumption NA NA NA NA NA
## Alcohol.consumption NA NA NA NA NA
## Smoking.status -0.04 0.11 -0.23 0.21 0.03
## Exercise.frequency NA NA NA NA NA
## Sleep.efficiency REM.sleep.percentage
## Age NA NA
## Gender NA NA
## Bedtime NA NA
## Wakeup.time NA NA
## Sleep.duration NA NA
## Sleep.efficiency 1.00 NA
## REM.sleep.percentage 0.09 1.00
## Deep.sleep.percentage 0.79 -0.15
## Light.sleep.percentage -0.82 0.07
## Awakenings NA NA
## Caffeine.consumption NA NA
## Alcohol.consumption NA NA
## Smoking.status -0.33 0.05
## Exercise.frequency NA NA
## Deep.sleep.percentage Light.sleep.percentage Awakenings
## Age NA NA NA
## Gender NA NA NA
## Bedtime NA NA NA
## Wakeup.time NA NA NA
## Sleep.duration NA NA NA
## Sleep.efficiency NA NA NA
## REM.sleep.percentage NA NA NA
## Deep.sleep.percentage 1.00 NA NA
## Light.sleep.percentage -0.99 1.00 NA
## Awakenings NA NA 1
## Caffeine.consumption NA NA NA
## Alcohol.consumption NA NA NA
## Smoking.status -0.25 0.27 NA
## Exercise.frequency NA NA NA
## Caffeine.consumption Alcohol.consumption Smoking.status
## Age NA NA NA
## Gender NA NA NA
## Bedtime NA NA NA
## Wakeup.time NA NA NA
## Sleep.duration NA NA NA
## Sleep.efficiency NA NA NA
## REM.sleep.percentage NA NA NA
## Deep.sleep.percentage NA NA NA
## Light.sleep.percentage NA NA NA
## Awakenings NA NA NA
## Caffeine.consumption 1 NA NA
## Alcohol.consumption NA 1 NA
## Smoking.status NA NA 1
## Exercise.frequency NA NA NA
## Exercise.frequency
## Age NA
## Gender NA
## Bedtime NA
## Wakeup.time NA
## Sleep.duration NA
## Sleep.efficiency NA
## REM.sleep.percentage NA
## Deep.sleep.percentage NA
## Light.sleep.percentage NA
## Awakenings NA
## Caffeine.consumption NA
## Alcohol.consumption NA
## Smoking.status NA
## Exercise.frequency 1
melted_cormat <- melt(lower_tri, na.rm = TRUE)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue",high = "red", mid = "white",midpoint = 0, limit = c(-1,1),space = "Lab",name = "Correlation\nValue")+
theme_minimal()+
labs(x = "",y="",title = "Correlation Heatmap")+
theme(plot.title = element_text(hjust = 0.5))
The Correlation Heatmap is flawed, however, we can see slight
correlations between Sleep efficiency and Deep sleep and Smoking status
predicting light sleep percentage.
ggplot(sleep_df,aes(x= Age , group = Sleep.efficiency))+
geom_histogram(bins = 15)+
labs(x = "Age", y = "Sleep Efficiency", title = "Sleep Efficiency by Age", caption = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency")+
theme_grey()+
theme(plot.title = element_text(hjust = 0.5))
Above it can be inferred that there is a significant drop in sleep
efficiency in individuals in their late 30’s. Efficiency is primed for
most individuals in their early 30’s. This may be credited to childbirth
and parenting responsibilities, aging, or other factors.
ggplot(sleep_df,aes(x=Sleep.efficiency, y = Exercise.frequency))+
geom_bar(colour="pink", fill="pink", stat="identity")+
labs(x = "Sleep Efficiency", y = "Exercise Frequency", title = "Sleep Efficiency by Exercise Efficiency", caption = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency")+
theme_light()+
theme(plot.title = element_text(hjust = 0.5))
From the graph it is clear that those who have a higher frequency of
exercise in turn have a higher sleep efficiency. The correlation has a
near linear relationship.
ggplot(sleep_df,aes(x=Age, y=Sleep.efficiency, group = Smoking.status))+
geom_line(aes(color=Smoking.status), size = 1)+
labs(x = "Age", y = "Sleep Efficiency",color='Smoking Status', title = "Does Smoking Influence Sleep Efficiency?", caption = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency")+
theme(plot.title = element_text(hjust = 0.5))
Smoking does influence Sleep efficiency as the peaks for the red line
representing non-smokers are consistently higher than that of the blue
line representing smokers.
boxplot(Deep.sleep.percentage~Awakenings, data=sleep_df,
col=("darkgreen"),
main="Deep Sleep Percentage by Number of Awakenings", xlab="Number of Awakenings",ylab = "Deep Sleep Percentage",sub = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency")
The box plot above shows that those with less awakenings have a higher
Deep sleep percentage than that of those with 2 or more awakenings.
Outliers are represented by the circles.
bedtime_df <- sleep_df%>%
select(Bedtime)%>%
dplyr::mutate(hour24 = hour(ymd_hms(Bedtime)))%>%
dplyr::mutate(hour24labs = recode(hour24,"21" = "9:00 pm",
"22" = "10:00 pm",
"23" = "11:00 pm",
"0" = "12:00 am",
"1" = "1:00 am",
"2" = "2:00 am",)) %>%
group_by(hour24,hour24labs)%>%
dplyr::summarise(n=length(Bedtime), .groups = 'keep')%>%
data.frame()
wakeup_df <- sleep_df%>%
select(Wakeup.time)%>%
dplyr::mutate(hour24 = hour(ymd_hms(Wakeup.time)))%>%
dplyr::mutate(hour24labs = recode(hour24,"3" = "3:00 am",
"4" = "4:00 am",
"5" = "5:00 am",
"6" = "6:00 am",
"7" = "7:00 am",
"8" = "8:00 am",
"9" = "9:00 am",
"10" = "10:00 am",
"11" = "11:00 am",
"12" = "12:00 pm",)) %>%
group_by(hour24,hour24labs)%>%
dplyr::summarise(n=length(Wakeup.time), .groups = 'keep')%>%
data.frame()
Data Cleaning for a more understandable format.
plot_ly(bedtime_df, labels = ~hour24labs,values = ~n, type = "pie",
textposition = "outside",textinfo= "Label + Percent")%>%
layout(title= "Hour of Bedtime",annotations =
list(x = 1, y = -0.1, text = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency",
showarrow = F, xref='paper', yref='paper',
xanchor='right', yanchor='auto', xshift=0, yshift=0,
font=list(size=15, color="black")))
The most common Bedtime for individuals is midnight
plot_ly(wakeup_df, labels = ~hour24labs,values = ~n, type = "pie",
textposition = "outside",textinfo= "Label + Percent")%>%
layout(title= "Hour of Wake-up",annotations =
list(x = 1, y = -0.1, text = "Source: https://www.kaggle.com/datasets/equilibriumm/sleep-efficiency",
showarrow = F, xref='paper', yref='paper',
xanchor='right', yanchor='auto', xshift=0, yshift=0,
font=list(size=15, color="black")))
The hours that individuals wake-up has a wider distribution that that of the bedtime hours with 5:00 am and 7:00 am being the time for most.
In sum it can be concluded that the best sleep routine can be attained by waking at sunrise, exercising often, refraining from excessive alcohol, caffeine, and tobacco.