Targel 9

sleep_health_and_lifestyle_dataset <- read_excel("DATA/sleep_health_and_lifestyle_dataset.xlsx")
sleep_health_and_lifestyle_dataset2<- clean_names(sleep_health_and_lifestyle_dataset)
colnames(sleep_health_and_lifestyle_dataset2)

##  [1] "person_id"               "gender"                 
##  [3] "age"                     "occupation"             
##  [5] "duration_of_sleep"       "quality_of_sleep"       
##  [7] "physical_activity_level" "stress_level"           
##  [9] "bmi_category"            "blood_pressure"         
## [11] "heart_rate_while_sleep"  "daily_steps"            
## [13] "sleep_disorder"

library(table1)
sleep_health_and_lifestyle_dataset2<-sleep_health_and_lifestyle_dataset2%>% 
  mutate(duration_of_sleep=abs(duration_of_sleep))

table1(~duration_of_sleep,data=sleep_health_and_lifestyle_dataset2)

	Overall (N=374)
duration_of_sleep
Mean (SD)	7.13 (0.798)
Median [Min, Max]	7.20 [5.80, 8.50]
Missing	4 (1.1%)

colnames(sleep_health_and_lifestyle_dataset2)

##  [1] "person_id"               "gender"                 
##  [3] "age"                     "occupation"             
##  [5] "duration_of_sleep"       "quality_of_sleep"       
##  [7] "physical_activity_level" "stress_level"           
##  [9] "bmi_category"            "blood_pressure"         
## [11] "heart_rate_while_sleep"  "daily_steps"            
## [13] "sleep_disorder"

sapply(sleep_health_and_lifestyle_dataset2, class)

##               person_id                  gender                     age 
##               "numeric"             "character"               "numeric" 
##              occupation       duration_of_sleep        quality_of_sleep 
##             "character"               "numeric"               "numeric" 
## physical_activity_level            stress_level            bmi_category 
##               "numeric"               "numeric"             "character" 
##          blood_pressure  heart_rate_while_sleep             daily_steps 
##             "character"               "numeric"               "numeric" 
##          sleep_disorder 
##             "character"

min_sleep<- sleep_health_and_lifestyle_dataset2 %>% 
  group_by() %>% 
  summarise(
    across(.cols=c(age,duration_of_sleep,quality_of_sleep,physical_activity_level,stress_level,heart_rate_while_sleep,daily_steps), #select the columns
           .fns=~min(.,na.rm = TRUE) #apply the function
    ))%>% 
  ungroup()
sleep_fixed<- abs(min_sleep)

sleep_health_and_lifestyle_dataset2<-sleep_health_and_lifestyle_dataset2%>% 
  mutate(person_id= as.character(person_id))

mean_sleep2 <- sleep_health_and_lifestyle_dataset2 %>%
  group_by(sleep_disorder) %>%
  summarise(
    across(
      .cols = ends_with("_sleep"),  # Automatically selects columns ending with "_sleep"
      .fns = ~ median(., na.rm = TRUE)  # Apply median function with NA removal
    )
  ) %>%
  ungroup()

sleep_health_and_lifestyle_dataset2 <- sleep_health_and_lifestyle_dataset2 %>%
  mutate(sleep_disorder = factor(sleep_disorder, levels = c("Sleep Apnea", "Insomnia", "Non")))


sleep_health_and_lifestyle_dataset_filter <- sleep_health_and_lifestyle_dataset2%>% filter(!is.na(sleep_disorder))

mean_sleep3 <- sleep_health_and_lifestyle_dataset_filter %>%
  group_by(sleep_disorder) %>%
  summarise(
    across(
      .cols = ends_with("_sleep"), 
      .fns = ~ round(mean(., na.rm = TRUE)),  
      .names = "{.col}_mean"
    )
  ) %>%
  ungroup()

General Function Explanations

`clean_names()`

Cleans the column names of a data frame to follow a consistent and standard naming convention. It replaces spaces with underscores, converts all letters to lowercase, and ensures valid column names.

`mutate()`

Creates or modifies columns in a data frame based on a function or transformation. You can use mutate() to add new columns or modify existing ones.

`group_by()`

Groups the data by one or more columns. It is typically used before performing aggregation or summarization operations (like calculating the mean or sum) on subsets of data.

`summarise()`

Aggregates data by applying a function (e.g., mean() or median()) to one or more columns, typically after using group_by(). This reduces the dataset to one row per group with summary statistics.

`across()`

Applies a function (e.g., mean(), min()) to multiple columns in a data frame. It allows you to apply the same operation across a range of columns. It can be used with selection helpers like ends_with() to select columns based on their names.

`ends_with()`

Selects columns whose names end with a specific string (e.g., “_sleep”). It is often used inside across() to apply a function to columns that match certain naming patterns.

`factor()`

Converts a variable into a factor with specified levels. This is especially useful for categorical data, allowing you to control the order of levels and perform operations like grouping and summarizing based on those levels.

`filter()`

Filters rows based on conditions. For example, filter(!is.na()) removes rows with missing values in a column. It is useful for subsetting the data based on specific criteria.

`round()`

Rounds numbers to a specified number of decimal places. For example, round(mean_value, 2) would round the mean value to two decimal places.

`ungroup()`

Removes the grouping structure from the data frame. It is used after performing group_by() to return to a normal data frame without any grouping applied.