sleep_health_and_lifestyle_dataset <- read_excel("DATA/sleep_health_and_lifestyle_dataset.xlsx")
sleep_health_and_lifestyle_dataset2<- clean_names(sleep_health_and_lifestyle_dataset)
colnames(sleep_health_and_lifestyle_dataset2)
##  [1] "person_id"               "gender"                 
##  [3] "age"                     "occupation"             
##  [5] "duration_of_sleep"       "quality_of_sleep"       
##  [7] "physical_activity_level" "stress_level"           
##  [9] "bmi_category"            "blood_pressure"         
## [11] "heart_rate_while_sleep"  "daily_steps"            
## [13] "sleep_disorder"
library(table1)
sleep_health_and_lifestyle_dataset2<-sleep_health_and_lifestyle_dataset2%>% 
  mutate(duration_of_sleep=abs(duration_of_sleep))

table1(~duration_of_sleep,data=sleep_health_and_lifestyle_dataset2)
Overall
(N=374)
duration_of_sleep
Mean (SD) 7.13 (0.798)
Median [Min, Max] 7.20 [5.80, 8.50]
Missing 4 (1.1%)
colnames(sleep_health_and_lifestyle_dataset2)
##  [1] "person_id"               "gender"                 
##  [3] "age"                     "occupation"             
##  [5] "duration_of_sleep"       "quality_of_sleep"       
##  [7] "physical_activity_level" "stress_level"           
##  [9] "bmi_category"            "blood_pressure"         
## [11] "heart_rate_while_sleep"  "daily_steps"            
## [13] "sleep_disorder"
sapply(sleep_health_and_lifestyle_dataset2, class)
##               person_id                  gender                     age 
##               "numeric"             "character"               "numeric" 
##              occupation       duration_of_sleep        quality_of_sleep 
##             "character"               "numeric"               "numeric" 
## physical_activity_level            stress_level            bmi_category 
##               "numeric"               "numeric"             "character" 
##          blood_pressure  heart_rate_while_sleep             daily_steps 
##             "character"               "numeric"               "numeric" 
##          sleep_disorder 
##             "character"
min_sleep<- sleep_health_and_lifestyle_dataset2 %>% 
  group_by() %>% 
  summarise(
    across(.cols=c(age,duration_of_sleep,quality_of_sleep,physical_activity_level,stress_level,heart_rate_while_sleep,daily_steps), #select the columns
           .fns=~min(.,na.rm = TRUE) #apply the function
    ))%>% 
  ungroup()
sleep_fixed<- abs(min_sleep)

sleep_health_and_lifestyle_dataset2<-sleep_health_and_lifestyle_dataset2%>% 
  mutate(person_id= as.character(person_id))
mean_sleep2 <- sleep_health_and_lifestyle_dataset2 %>%
  group_by(sleep_disorder) %>%
  summarise(
    across(
      .cols = ends_with("_sleep"),  # Automatically selects columns ending with "_sleep"
      .fns = ~ median(., na.rm = TRUE)  # Apply median function with NA removal
    )
  ) %>%
  ungroup()

sleep_health_and_lifestyle_dataset2 <- sleep_health_and_lifestyle_dataset2 %>%
  mutate(sleep_disorder = factor(sleep_disorder, levels = c("Sleep Apnea", "Insomnia", "Non")))


sleep_health_and_lifestyle_dataset_filter <- sleep_health_and_lifestyle_dataset2%>% filter(!is.na(sleep_disorder))

mean_sleep3 <- sleep_health_and_lifestyle_dataset_filter %>%
  group_by(sleep_disorder) %>%
  summarise(
    across(
      .cols = ends_with("_sleep"), 
      .fns = ~ round(mean(., na.rm = TRUE)),  
      .names = "{.col}_mean"
    )
  ) %>%
  ungroup()

General Function Explanations

clean_names()

Cleans the column names of a data frame to follow a consistent and standard naming convention. It replaces spaces with underscores, converts all letters to lowercase, and ensures valid column names.

mutate()

Creates or modifies columns in a data frame based on a function or transformation. You can use mutate() to add new columns or modify existing ones.

group_by()

Groups the data by one or more columns. It is typically used before performing aggregation or summarization operations (like calculating the mean or sum) on subsets of data.

summarise()

Aggregates data by applying a function (e.g., mean() or median()) to one or more columns, typically after using group_by(). This reduces the dataset to one row per group with summary statistics.

across()

Applies a function (e.g., mean(), min()) to multiple columns in a data frame. It allows you to apply the same operation across a range of columns. It can be used with selection helpers like ends_with() to select columns based on their names.

ends_with()

Selects columns whose names end with a specific string (e.g., “_sleep”). It is often used inside across() to apply a function to columns that match certain naming patterns.

factor()

Converts a variable into a factor with specified levels. This is especially useful for categorical data, allowing you to control the order of levels and perform operations like grouping and summarizing based on those levels.

filter()

Filters rows based on conditions. For example, filter(!is.na()) removes rows with missing values in a column. It is useful for subsetting the data based on specific criteria.

round()

Rounds numbers to a specified number of decimal places. For example, round(mean_value, 2) would round the mean value to two decimal places.

ungroup()

Removes the grouping structure from the data frame. It is used after performing group_by() to return to a normal data frame without any grouping applied.