df_ntd_cases <-
readxl::read_xlsx("ntd-data.xlsx", sheet = 1) %>%
janitor::clean_names() %>%
mutate(
case_control = rep("Case", by = nrow(.)),
id = row_number(),
presc = as.character(presc))
df_ntd_control <-
readxl::read_xlsx("ntd-data.xlsx", sheet = 2) %>%
janitor::clean_names() %>%
drop_na(bw) %>%
mutate(
case_control = rep("Control", by = nrow(.)),
id = row_number()+100,
fa_start = as.character(fa_start),
prev_ano = as.character(prev_ano),
booking = as.character(booking))
df_ntd <-
df_ntd_cases %>%
full_join(df_ntd_control)
df_ntd <-
df_ntd %>%
rename(
bwt = bw,
gestage = ga) %>%
mutate(
sex = toupper(sex) %>%
factor(
levels = c("F", "M"),
labels = c("Female", "Male")),
type_gest = factor(
type_gest,
levels = c("M","S"), c("Multiple", "Singleton")),
mode_del = factor(mode_del),
prev_ano = parse_integer(prev_ano),
across(
c(consanguinity, feb_ill, rash, herbal, alcohol, illicit_drugs,
anc_dx, smokin, prev_ano),
~factor(.x, levels = c(0,1), labels = c("No", "Yes"))),
case_control = factor(case_control),
edu_level = stringr::str_to_sentence(edu_level) %>%
factor(),
booking = parse_integer(booking),
fa_start = parse_integer(fa_start),
anc_attendance = case_when(
anc_att == "non-att" ~ "Non-Attendant",
anc_att == "non Attend" ~ "Non-Attendant",
anc_att == "reg" ~ "Regular Attendant") %>%
factor(),
anencephaly = case_when(
anoma_ac1_ac34ly == "anencephaly" ~ "Yes",
case_control == "Case" ~ "No"),
myelomen = case_when(
str_detect(anoma_ac1_ac34ly, "MM") ~ "Yes",
case_control == "Case" ~ "No"),
encephalocele = case_when(
str_detect(anoma_ac1_ac34ly, "locele") ~ "Yes",
case_control == "Case" ~ "No"),
hydroceph = case_when(
str_detect(anoma_ac1_ac34ly, "HC") ~ "Yes",
case_control == "Case" ~ "No"),
presc = case_when(presc == "0" ~ "No", TRUE ~ "Yes"),
resid_mining = case_when(residence == "M" ~ "Yes", TRUE ~ "No"),
resid_farming = case_when(residence == "F" ~ "Yes", TRUE ~ "No"),
mat_dx = case_when(mat_dx == "0" ~ "No", TRUE ~ "Yes"),
residence = factor(
residence,
levels = c("C", "F", "M"),
labels = c("None", "Farming","Mining")),
case_control = factor(case_control, levels = c("Control", "Case"))
) %>%
select(-c(anc_att))
labelled::var_label(df_ntd) <-
list(
bwt = "Birth weight",
gestage = "Gestational age",
sex = "Sex",
type_gest = "Gestation type",
mode_del = "Delivery mode",
consanguinity = "Consanguinity",
mat_age = "Mother's age",
edu_level = "Educational Level",
booking = "Age at Booking ",
anc_attendance = "ANC attendance",
id = "Generate study ID",
case_control = "Case or Control",
parity = "Parity",
herbal = "Herbal drug use",
rash = "Rash",
fa_start = "Folic Acid start",
feb_ill = "Febirle illness",
prev_ano = "Previous Anomalies",
smokin = "Smoking",
alcohol = "Alcohol use",
illicit_drugs = "Illicit drug use",
encephalocele = "Encephalocele",
hydroceph = "Hydrocephalus",
myelomen = "Myelomeningocele",
anencephaly = "Anencephaly",
unpres = "Unprescribed meds used",
presc = "Prescribed meds use",
resid_mining = "Mining area residence",
resid_farming = "Farming area residence",
mat_dx = "Maternal disease",
residence = "Mining or Farming")