Introduction

Many people believe that sleep, health and lifestyle of people are somehow related to each other.

However, I was curious to investigate the significance of this variables using knowledge from this course and R studio.

Dataset from Kaggle platform includes details such as gender, age, occupation, sleep duration, quality of sleep, physical activity level, stress levels, BMI category, blood pressure, heart rate, daily steps, and the presence or absence of sleep disorders.

Loading, installing and glimpsing the data

install.packages("tidyverse")
install.packages("DHARMa")
install.packages("performance")
install.packages("ggeffects")
install.packages("ggrain")
library(tidyverse)
library(performance)
library(DHARMa)
library(ggeffects)
library(ggrain) 

data <- read.csv("sleep_health_and_lifestyle_dataset.csv") %>%
as_tibble()
data
glimpse (data)
## Rows: 374
## Columns: 13
## $ Person.ID               <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
## $ Gender                  <chr> "Male", "Male", "Male", "Male", "Male", "Male"…
## $ Age                     <int> 27, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29…
## $ Occupation              <chr> "Software Engineer", "Doctor", "Doctor", "Sale…
## $ Sleep.Duration          <dbl> 6.1, 6.2, 6.2, 5.9, 5.9, 5.9, 6.3, 7.8, 7.8, 7…
## $ Quality.of.Sleep        <int> 6, 6, 6, 4, 4, 4, 6, 7, 7, 7, 6, 7, 6, 6, 6, 6…
## $ Physical.Activity.Level <int> 42, 60, 60, 30, 30, 30, 40, 75, 75, 75, 30, 75…
## $ Stress.Level            <int> 6, 8, 8, 8, 8, 8, 7, 6, 6, 6, 8, 6, 8, 8, 8, 8…
## $ BMI.Category            <chr> "Overweight", "Normal", "Normal", "Obese", "Ob…
## $ Blood.Pressure          <chr> "126/83", "125/80", "125/80", "140/90", "140/9…
## $ Heart.Rate              <int> 77, 75, 75, 85, 85, 85, 82, 70, 70, 70, 70, 70…
## $ Daily.Steps             <int> 4200, 10000, 10000, 3000, 3000, 3000, 3500, 80…
## $ Sleep.Disorder          <chr> "None", "None", "None", "Sleep Apnea", "Sleep …
data %>% 
  ggplot(
    aes(
      x = Sleep.Duration, y = Stress.Level
    )
  )+
  geom_point(
    size = 5, 
    color = "black"
    )+
  geom_smooth(
    method = "lm",
    color = "darkred"
    )+
  theme_bw()+
  labs(
    x = "Sleep.Duration",
    y = "Stress.Level",
    title = "Association"
  )
## `geom_smooth()` using formula = 'y ~ x'

model1 <- lm(Sleep.Duration ~ Stress.Level + Physical.Activity.Level + Age, data = data)

summary(model1)
## 
## Call:
## lm(formula = Sleep.Duration ~ Stress.Level + Physical.Activity.Level + 
##     Age, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03185 -0.27032  0.01139  0.24488  0.83882 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8.817344   0.176384  49.989  < 2e-16 ***
## Stress.Level            -0.367458   0.014266 -25.757  < 2e-16 ***
## Physical.Activity.Level  0.007286   0.001120   6.507 2.49e-10 ***
## Age                     -0.003262   0.002965  -1.100    0.272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4427 on 370 degrees of freedom
## Multiple R-squared:  0.6929, Adjusted R-squared:  0.6904 
## F-statistic: 278.3 on 3 and 370 DF,  p-value: < 2.2e-16
data %>% 
  ggplot(
    aes(
      x = Daily.Steps, y = Occupation
    )
  )+
  geom_point(
    size = 5, 
    color = "black"
    )+
  geom_smooth(
    method = "lm",
    color = "darkred"
    )+
  theme_bw()+
  labs(
    x = "Daily steps",
    y = "Occupation",
    title = "Association"
  )
## `geom_smooth()` using formula = 'y ~ x'

model1 <- lm(Sleep.Duration ~ Stress.Level + Physical.Activity.Level + Age, data = data)

summary(model1)
## 
## Call:
## lm(formula = Sleep.Duration ~ Stress.Level + Physical.Activity.Level + 
##     Age, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03185 -0.27032  0.01139  0.24488  0.83882 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              8.817344   0.176384  49.989  < 2e-16 ***
## Stress.Level            -0.367458   0.014266 -25.757  < 2e-16 ***
## Physical.Activity.Level  0.007286   0.001120   6.507 2.49e-10 ***
## Age                     -0.003262   0.002965  -1.100    0.272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4427 on 370 degrees of freedom
## Multiple R-squared:  0.6929, Adjusted R-squared:  0.6904 
## F-statistic: 278.3 on 3 and 370 DF,  p-value: < 2.2e-16
data_clean <- data %>% 
  mutate(
    Occupation = case_when(
      Occupation == "Doctor" ~ "Medical",
      Occupation == "Nurse" ~ "Medical",
      .default = "Non-Medical"
    )
  )
data <- read.csv("Sleep_health_and_lifestyle_dataset.csv") %>% 
  as_tibble()
data
## # A tibble: 374 × 13
##    Person.ID Gender   Age Occupation           Sleep.Duration Quality.of.Sleep
##        <int> <chr>  <int> <chr>                         <dbl>            <int>
##  1         1 Male      27 Software Engineer               6.1                6
##  2         2 Male      28 Doctor                          6.2                6
##  3         3 Male      28 Doctor                          6.2                6
##  4         4 Male      28 Sales Representative            5.9                4
##  5         5 Male      28 Sales Representative            5.9                4
##  6         6 Male      28 Software Engineer               5.9                4
##  7         7 Male      29 Teacher                         6.3                6
##  8         8 Male      29 Doctor                          7.8                7
##  9         9 Male      29 Doctor                          7.8                7
## 10        10 Male      29 Doctor                          7.8                7
## # ℹ 364 more rows
## # ℹ 7 more variables: Physical.Activity.Level <int>, Stress.Level <int>,
## #   BMI.Category <chr>, Blood.Pressure <chr>, Heart.Rate <int>,
## #   Daily.Steps <int>, Sleep.Disorder <chr>
fit <- glm(
  data = data,
  family = poisson,
  formula = Daily.Steps ~ Sleep.Disorder
)

summary(fit)
## 
## Call:
## glm(formula = Daily.Steps ~ Sleep.Disorder, family = poisson, 
##     data = data)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               8.682928   0.001483  5853.1   <2e-16 ***
## Sleep.DisorderNone        0.149509   0.001693    88.3   <2e-16 ***
## Sleep.DisorderSleep Apnea 0.255503   0.001971   129.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 144812  on 373  degrees of freedom
## Residual deviance: 127754  on 371  degrees of freedom
## AIC: 131738
## 
## Number of Fisher Scoring iterations: 4
exp(coef(fit))
##               (Intercept)        Sleep.DisorderNone Sleep.DisorderSleep Apnea 
##               5901.298701                  1.161264                  1.291111
data_clean <- data %>% 
  mutate(
    Occupation = case_when(
      Occupation == "Doctor" ~ "Medical",
      Occupation == "Nurse" ~ "Medical",
      .default = "Non-Medical"
    )
    
  )
data %>% 
  ggplot(
    aes(
      x = Daily.Steps
    )
  )+
  geom_histogram()+
  facet_wrap(~Occupation)
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

fit_poisson <- glm(
  Daily.Steps ~ Sleep.Disorder,
  family  = poisson(link = "log"),
  data    = data
)
summary(fit_poisson)
## 
## Call:
## glm(formula = Daily.Steps ~ Sleep.Disorder, family = poisson(link = "log"), 
##     data = data)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               8.682928   0.001483  5853.1   <2e-16 ***
## Sleep.DisorderNone        0.149509   0.001693    88.3   <2e-16 ***
## Sleep.DisorderSleep Apnea 0.255503   0.001971   129.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 144812  on 373  degrees of freedom
## Residual deviance: 127754  on 371  degrees of freedom
## AIC: 131738
## 
## Number of Fisher Scoring iterations: 4
raw <- data %>% 
  ggplot(
    aes(
      x = Daily.Steps,
      y = Quality.of.Sleep
    )
  )+
  geom_point(
    size = 5,
    alpha = .5
  )+
  labs(
    x = "Daily.Steps",
    y = "Quality.of.Sleep",
    title = "Association"
  )
raw

Conclusion I had certain relationships and their significance levels. I used regressions, methods that we covered in class. So R studio can offer an opportunity to create different plots, see if the correlations are signiificantly tied or just random case.