# Load all required packages in one place using pacman for convenience
pacman::p_load(tidyverse, # Core tidyverse packages (dplyr, ggplot2, etc.)
here, # For managing file paths easily
janitor, # For cleaning variable names
labelled, # For handling variable labels
readxl # For reading Excel files (e.g., dictionary files))
)Data Dictionary
Packages
(01) Review the raw data
# Create the raw data (df)
df <- tibble(
Var1 = c("a", "b", "c"),
Var2 = c(2, NA, 3.6),
Var3 = c(3.6, 8.5, NA)
)(02) Import and review the data dictionary
dict <- read_excel(here("02_dictionary", "dictionary.xlsx"))(03) Create named vector for renaming
dict_names <- dict %>%
select(new_name, old_name) %>%
deframe()(04) Rename variables in df
df <- df %>%
rename(all_of(dict_names))(05) Add the labels
dict_labels <- dict %>%
select(new_name, label) %>%
deframe() %>%
as.list()df <- df |>
set_variable_labels(.labels = dict_labels, .strict = FALSE)Housecleaning
rm(dict, dict_names, dict_labels)(06) Store the cleaned dataset
write_rds(df, here("03_data_clean", "df.rds"))