Data Dictionary

Packages

# Load all required packages in one place using pacman for convenience
pacman::p_load(tidyverse, # Core tidyverse packages (dplyr, ggplot2, etc.)
               here, # For managing file paths easily
               janitor, # For cleaning variable names
               labelled, # For handling variable labels
               readxl         # For reading Excel files (e.g., dictionary files))
)

(01) Review the raw data

# Create the raw data (df)
df <- tibble(
  Var1 = c("a", "b", "c"),
  Var2 = c(2, NA, 3.6),
  Var3 = c(3.6, 8.5, NA)
)

(02) Import and review the data dictionary

dict <- read_excel(here("02_dictionary", "dictionary.xlsx"))

(03) Create named vector for renaming

dict_names <- dict %>%
  select(new_name, old_name) %>%
  deframe()

(04) Rename variables in df

df <- df %>%
  rename(all_of(dict_names))

(05) Add the labels

dict_labels <- dict %>%
  select(new_name, label) %>%
  deframe() %>%
  as.list()
df <-  df |> 
set_variable_labels(.labels = dict_labels, .strict = FALSE)

Housecleaning

rm(dict, dict_names, dict_labels)

(06) Store the cleaned dataset

write_rds(df, here("03_data_clean", "df.rds"))