knitr::opts_chunk$set(echo = T, message = F, warning = F, 
                      class.source = "tgc-code-block" ## defined in css file
                      )

if (!require(pacman)) install.packages("pacman")
pacman::p_load(tidyverse,
               rio,
               here,
               highcharter,
               janitor,
               visdat, 
               inspectdf,
               htmltools,
               paletteer,
               scales,
               glue,
               desctable,
               reactable)


#' HTML stars
#'
#' @param rating Numeric value between 0 and 5 
#'
#' @return Some stars!
#' 
#' @note The CSS class descriptions (that do most of the work) should be included, in a style file or otherwise.
#'
#' @examples
#' rating_stars(3.4)
rating_stars <- function(rating){
  htmltools::HTML(paste0('<div class="ratings">
  <div class="empty-stars"></div>
  <div class="full-stars" style="width:', rating * 20, '%"></div>
  </div>'))
}

#' Reactable table with a download button
#'
#' @param df A data frame
#' @param name Character name (used as file name when file is downloaded) 
#'
#' @return A reactable HTML widget
#'
#' @examples 
#' table_and_download(cars, "cars")
table_and_download <- function(df, name){
  browsable(tagList(
    tags$button(tagList(fontawesome::fa("download"), "Download as CSV"),
                onclick = paste0("Reactable.downloadDataCSV('", 
                                 name , 
                                 "-download-table', '", 
                                 name , 
                                 ".csv')")),
    
    reactable::reactable(df, 
                         elementId = paste0(name, "-download-table"),
                         defaultPageSize = 5, 
                         striped = TRUE,
                         highlight = TRUE,
                         resizable = TRUE,
                         defaultColDef = reactable::colDef(align = "left", 
                                                           html = TRUE, 
                                                           class = "border-left", 
                                                           na = "NA"), 
                         wrap = FALSE,
                         bordered = TRUE,
                         theme = reactable::reactableTheme(stripedColor = "#f2f7f7", 
                                                           cellPadding = "2px 5px 2px 5px", 
                                                           borderColor = "#e1f2f2", 
                                                           borderWidth = "1.5px")))
  )
}


#' Prefix column names with column numbers
#' 
#' Sometimes useful for arranging.
#' I use it here to force `inspectdf::inspect_num()` and `inspectdf::inspect_cat()` to arrange
#' the columns properly in the their respective graphics.
#'
#' @param df 
#'
#' @return Data frame with modified column names
#'
#' @examples
#' prefix_col_nums(iris)
prefix_col_nums <- function(df) {
  names(df) <-
    names(df) %>%
    paste(stringr::str_pad(1:ncol(df), 2, pad = 0), .)
  df
}

Introduction: The Global Burden of Disease

Let’s take a quick look at the diseases with the highest global DALY burden, based on IHME estimates, to help target our search.

The goal is to try to find relevant datasets for some of these diseases/causes, with a (partial) focus on LMICs.

Strategy is manual search through Zenodo (which also searches Dryad). Harvard Dataverse and Figshare are also good sources.

gbd <- 
  read_csv(here("data/ihme_gbd_2019_data.csv")) %>% 
  filter(measure == "DALYs (Disability-Adjusted Life Years)") %>% 
  select(lvl_3_cause = cause, 
         dalys_2019 = val) %>% 
  mutate(dalys_2019 = round(dalys_2019))

hierarchy <- 
  rio::import(here("data/ihme_gbd_2019_cause_hierarchy.xlsx")) %>% 
  clean_names() %>% 
  as_tibble() %>% 
  select(cause_name, parent_name)

gbd_joined  <- 
  gbd %>% 
  left_join(hierarchy, by = c("lvl_3_cause" = "cause_name")) %>% 
  ## join again to get level 1 cause
  rename(lvl_2_cause = parent_name) %>% 
  left_join(hierarchy, by = c("lvl_2_cause" = "cause_name")) %>% 
  rename(lvl_1_cause = parent_name) %>% 
  arrange(-dalys_2019)

Plot

n_top <- 25
gbd_joined %>% 
  head(n_top) %>% 
  ## custom tooltip
  mutate(my_tooltip = paste0("<span style='font-size:9.5'>", lvl_1_cause ,"</span>", "<br>",
                           "<b>", lvl_3_cause, ":</b> ", comma(dalys_2019) )) %>%
  hchart("bar", hcaes(x = lvl_3_cause, 
                      y = dalys_2019, 
                      group = lvl_1_cause),
         dataLabels = list(enabled = TRUE,
                           formatter = JS("function(){return(this.point.data_label)}"))
         ) %>%
  hc_plotOptions(bar = list(stacking = "n")) %>% ## avoid dodging
  hc_tooltip(formatter = JS("function(){return(this.point.my_tooltip)}")) %>% 
  hc_xAxis(title = list(text = "Cause (IHME level 3)")) %>% 
  hc_yAxis(title = list(text = "DALYs lost in 2019")) %>% 
  hc_title(text = glue::glue("Top {n_top} causes of DALY loss globally (Highest disease/cause burden)")) %>% 
  hc_add_theme(hc_theme_smpl()) %>% 
  hc_size(height = 550) 

Source data

gbd_joined %>%
  select(lvl_1_cause, lvl_2_cause, lvl_3_cause, dalys_2019) %>%
  table_and_download(name = "gbd_joined")

1 DIARRHEA || MALI & BANGLADESH || External validation of a mobile clinical decision support system for diarrhea etiology prediction in children: A multicenter study in Bangladesh and Mali

https://zenodo.org/record/5487109

Rating: 4/5

Decent combo of datasets. Seems useful for cleaning, joining, plotting.

In this document, we only include the datasets from Mali, but a similar dataset from Bangladesh could be nice for student practice.

1.1 Data

1.1.1 Survey dataset

diarrhea_mali_leung_survey <-
  rio::import(here("data/diarrhea_mali_leung_survey.csv")) %>%
  as_tibble()  %>%
  type_convert() %>% 
  mutate(Quartier = iconv(Quartier, 'utf-8', 'ascii', sub='')) ## remove unrecognized symbols


table_and_download(diarrhea_mali_leung_survey, name = "diarrhea_mali_leung_survey")

1.1.2 mRNA test dataset

diarrhea_mali_leung_mrna_test <-
  rio::import(here("data/diarrhea_mali_leung_mrna_test.csv")) %>%
  as_tibble()  %>%
  type_convert()

table_and_download(diarrhea_mali_leung_mrna_test, name = "diarrhea_mali_leung_mrna_test")

1.1.3 Weather dataset

diarrhea_mali_leung_weather <-
  rio::import(here("data/diarrhea_mali_leung_weather.csv")) %>%
  as_tibble()  %>%
  type_convert()

table_and_download(diarrhea_mali_leung_weather, name = "diarrhea_mali_leung_weather")

1.2 Data summary

Survey data:

diarrhea_mali_leung_survey_prefixed <- diarrhea_mali_leung_survey %>% prefix_col_nums()

inspect_num(diarrhea_mali_leung_survey_prefixed) %>%
  show_plot()

inspect_cat(diarrhea_mali_leung_survey_prefixed) %>%
  show_plot()

1.3 Abstract

Background: Diarrheal illness is a leading cause of antibiotic use for children in low- and middle-income countries. Determination of diarrhea etiology at the point-of-care without reliance on laboratory testing has the potential to reduce inappropriate antibiotic use.

Methods: This prospective observational study aimed to develop and externally validate the accuracy of a mobile software application (‘App’) for the prediction of viral-only etiology of acute diarrhea in children 0-59 months in Bangladesh and Mali. The App used a previously derived and internally validated model consisting of patient-specific (‘present patient’) clinical variables (age, blood in stool, vomiting, breastfeeding status, and mid-upper arm circumference) as well as location-specific viral diarrhea seasonality curves. The performance of additional models using the ‘present patient’ data combined with other external data sources including location-specific climate, data, recent patient data, and historical population-based prevalence were also evaluated in secondary analysis. Diarrhea etiology was determined with TaqMan Array Card using episode-specific attributable fraction (AFe) >0.5.

Results: Of 302 children with acute diarrhea enrolled, 199 had etiologies above the AFe threshold. Viral-only pathogens were detected in 22% of patients in Mali and 63% in Bangladesh. Rotavirus was the most common pathogen detected (16% Mali; 60% Bangladesh). The present patient+ viral seasonality model had an AUC of 0.754 (0.665-0.843) for the sites combined, with calibration-in-the-large α = -0.393 (-0.455–0.331) and calibration slope β = 1.287 (1.207-1.367). By site, the present patient+ recent patient model performed best in Mali with an AUC of 0.783 (0.705-0.86); the present patient+ viral seasonality model performed best in Bangladesh with AUC 0.710 (0.595-0.825).

Conclusions: The App accurately identified children with high likelihood of viral-only diarrhea etiology. Further studies to evaluate the App’s potential use in diagnostic and antimicrobial stewardship are underway.

2 CHOLERA || NIGERIA || Data from: Descriptive epidemiology of the 2018 cholera outbreak in Nigeria: implications for the global roadmap strategies

https://zenodo.org/record/5016772

Rating: 5/5

Outbreak linelist data perfect for the epidemic reporting course!

Quite extensive too. Full dataset is 43,996 row. (This document only shows a 500 row sample).

The data dictionary is not available, but we can use the data from Table 2 and 3 in their accompanying paper to figure out the code-value mapping.

2.1 Data

nigeria_cholera_elimian <- 
  rio::import(here("data/nigeria_cholera_elimian.dta")) %>%
  as_tibble() %>% 
  slice_sample(n = 500)

table_and_download(nigeria_cholera_elimian, name = "nigeria_cholera_elimian")

2.2 Data summary

nigeria_cholera_elimian_prefixed <- nigeria_cholera_elimian %>% prefix_col_nums()

inspect_num(nigeria_cholera_elimian_prefixed) %>%
  show_plot()

2.3 Abstract

Background

The cholera outbreak in 2018 in Nigeria reaffirms its public health threat to the country. Evidence on the current epidemiology of cholera required for the design and implementation of appropriate interventions towards attaining the global roadmap strategic goals for cholera elimination however seems lacking. Thus, this study aimed at addressing this gap by describing the epidemiology of the 2018 cholera outbreak in Nigeria.

Methods

This was a retrospective analysis of surveillance data collected between January 1st and November 19th, 2018. A cholera case was defined as an individual aged 2 years or older presenting with acute watery diarrhoea and severe dehydration or dying from acute watery diarrhoea. Descriptive analyses were performed and presented with respect to person, time and place using appropriate statistics.

Results

There were 43,996 cholera cases and 836 cholera deaths across 20 states in Nigeria during the outbreak period, with an attack rate (AR) of 127.43/100,000 population and a case fatality rate (CFR) of 1.90%. Individuals aged 15 years or older (47.76%) were the most affected age group, but the proportion of affected males and females was about the same (49.00 and 51.00% respectively). The outbreak was characterised by four distinct epidemic waves, with higher number of deaths recorded in the third and fourth waves. States from the north-west and north-east regions of the country recorded the highest ARs while those from the north-central recorded the highest CFRs.

Conclusion

The severity and wide-geographical distribution of cholera cases and deaths during the 2018 outbreak are indicative of an elevated burden, which was more notable in the northern region of the country. Overall, the findings reaffirm the strategic role of a multi-sectoral approach in the design and implementation of public health interventions aimed at preventing and controlling cholera in Nigeria.

3 TYPHOID || UGANDA || Temporal, spatial and household dynamics of typhoid fever in Kasese district, Uganda

https://zenodo.org/record/4958731

Rating: 4/5

Nice dataset. Lots of variables. The main dataset is a household survey. But the Zenodo repo for this study also includes some supplementary datasets used in the analysis. Do take a look. Very ugly column names.

3.1 Data

typhoid_uganda_mirembe <- 
  rio::import(here("data/typhoid_uganda_mirembe.csv")) %>% 
  as_tibble() 

table_and_download(typhoid_uganda_mirembe, name = "typhoid_uganda_mirembe")

3.2 Data summary

typhoid_uganda_mirembe %>%
  type_convert() %>% 
  mutate(across(where(~ is.character(.x) & 
                        length(unique(.x)) < 15), as.factor)) %>% 
  desctable(stats = stats_auto) %>%
  datatable()

3.3 Abstract

Typhoid fever affects 21 million people globally, 1% of whom succumb to the disease. The social, economic and public health consequences of this disease disproportionately affect people in Africa and Asia. In order to design context specific prevention strategies, we need to holistically characterise outbreaks in these settings. Here we used retrospective data (2013-2016) at national and district level to characterize temporal and spatial dynamics of typhoid fever outbreaks using time series and spatial analysis. We then selected cases matched with controls to investigate household socio-economic drivers using a conditional logistic regression model, in addition to develop a typhoid outbreak-forecasting framework. The incidence rate of typhoid fever at national and district level was ~ 160 and 60 cases per 100,000 persons per year, respectively, predominantly in urban areas. Bwera sub-county registered the highest incidence rate, followed by Kisinga, Kitholhu and Nyakiyumbu sub-counties. The male-female case ratio at district level was at 1.68 and outbreaks occurred between the 20th and 40th week (May and October) each year preceded by seven weeks of precipitation. Our forecasting framework predicts outbreaks better at the district rather than at the national level. We have identified a temporal window associated with typhoid fever outbreaks in Kasese district, which is preceded by precipitation, flooding and displacement of people. We also observed that high typhoid incidence areas also had high environmental contamination with limited water treatment. Taken together with the forecasting framework, this knowledge can inform the development of specific control and preparedness strategies at district and national levels.

4 ENTEROPATHY || ZAMBIA || Adaptation of the small intestine to microbial enteropathogens in Zambian children with stunting

https://zenodo.org/record/4571669

Rating: 3.5/5

Interesting topic. Could practice joining with this. Could practice association tests.

4.1 Data

4.1.1 Biomarkers dataset

enteropathy_zambia_kelly_biomarkers <- 
  rio::import(here("data/enteropathy_zambia_kelly_biomarkers.csv")) %>% 
  as_tibble()

table_and_download(enteropathy_zambia_kelly_biomarkers, name = "enteropathy_zambia_kelly_biomarkers")

4.1.2 Pathogens dataset

enteropathy_zambia_kelly_pathogens <- 
  rio::import(here("data/enteropathy_zambia_kelly_pathogens.csv")) %>% 
  as_tibble()

table_and_download(enteropathy_zambia_kelly_pathogens, name = "enteropathy_zambia_kelly_pathogens")

4.2 Data summary

enteropathy_zambia_kelly_biomarkers_prefixed <- enteropathy_zambia_kelly_biomarkers %>% prefix_col_nums()

inspect_num(enteropathy_zambia_kelly_biomarkers_prefixed) %>% 
  show_plot()

enteropathy_zambia_kelly_pathogens_prefixed <- enteropathy_zambia_kelly_pathogens %>% prefix_col_nums()

inspect_num(enteropathy_zambia_kelly_pathogens_prefixed) %>% 
  show_plot()

4.3 Data dictionary

knitr::include_graphics(here("data/enteropathy_zambia_kelly_dictionary.png"))

4.4 Abstract

Environmental enteropathy is a major contributor to growth faltering in millions of children in Africa and South Asia. We carried out a longitudinal, observational and interventional study in Lusaka, Zambia, of 297 children with stunting (aged 2–17 months at recruitment) and 46 control children who had good growth (aged 1–5 months at recruitment). Control children contributed data only at baseline. Children were provided with nutritional supplementation of daily cornmeal-soy blend, an egg and a micronutrient sprinkle, and were followed up to 24 months of age. Children whose growth did not improve over 4–6 months of nutritional supplementation were classified as having non-responsive stunting. We monitored microbial translocation from the gut lumen to the bloodstream in the cohort with non-responsive stunting (n = 108) by measuring circulating lipopolysaccharide (LPS), LPS-binding protein and soluble CD14 at baseline and when non-response was declared. We found that microbial translocation decreased with increasing age, such that LPS declined in 81 (75%) of 108 children with non-responsive stunting, despite sustained pathogen pressure and ongoing intestinal epithelial damage. We used confocal laser endomicroscopy and found that mucosal leakiness also declined with age. However, expression of brush border enzyme, nutrient transporter and mucosal barrier genes in intestinal biopsies did not change with age or correlate with biomarkers of microbial translocation. We propose that environmental enteropathy arises through adaptation to pathogen-mediated epithelial damage. Although environmental enteropathy reduces microbial translocation, it does so at the cost of impaired growth. The reduced epithelial surface area imposed by villus blunting may explain these findings.

5 FEBRILE DISEASES || BURKINA FASO || Differentiating causes of febrile illness in Burkina Faso: data from an accuracy study comparing gold standard culture techniques with a haemocytometry based algorithm (IMS), procalcitonin (PCT) and C-reactive protein (CRP)

https://zenodo.org/record/4541793

Rating: 4/5

Clean and simple dataset. Very rich. Lots of lab data though, which may not be intelligible to some audiences.

5.1 Data

febrile_diseases_burkina_faso_post <- 
  rio::import(here("data/febrile_diseases_burkina_faso_post.xlsx"), 
                               sheet = 1) %>% 
  as_tibble() %>% 
  clean_names()

## read in and wrangle dictionary
febrile_diseases_burkina_faso_post_dict <- 
  rio::import(here("data/febrile_diseases_burkina_faso_post.xlsx"), 
              sheet = 2) %>% 
  as_tibble() %>% 
  clean_names() %>% 
  select(variable, labels, x4:x6) %>% 
  pivot_longer(c(labels, x4,x5,x6)) %>% 
  filter(!is.na(value)) %>% 
  select(-name) %>% 
  separate(value, into = c("from", "to"), sep = ":") %>% 
  select(from, to, variable) %>% 
  mutate(variable = str_to_lower(variable))

## use recode dictionary 
febrile_diseases_burkina_faso_post <- 
  febrile_diseases_burkina_faso_post %>% 
  matchmaker::match_df(dictionary = febrile_diseases_burkina_faso_post_dict) %>% 
  mutate(across(.fns = str_trim)) %>% 
  type_convert() %>% 
  mutate(across(where(~ is.character(.x) & 
                        length(unique(.x)) < 15), as.factor))

table_and_download(febrile_diseases_burkina_faso_post, name = "febrile_diseases_burkina_faso_post")

5.2 Data summary

febrile_diseases_burkina_faso_post %>%
  desctable(stats = stats_auto) %>%
  datatable()

5.3 Abstract

Different causes of acute febrile illness due to different infectious diseases (e.g. bacterial, viral malaria) may present with a similar clinical presentation. We performed a clinical diagnostic study to assess the diagnostic accuracy of a new tool - the Infection Manager System (IMS) - an algorithm which uses haemocytometric data to predict the cause of infection (e.g. bacterial, viral, malaria). The current dataset is a subset of data obtained during this study which was performed in a rural setting in Burkina Faso. The study was registered at ClinicalTrials.org under Identifier NCT02669823. All data used for the manuscript entitled “Infection Manager System (IMS) as a new hemocytometry-based bacteremia detection tool: a diagnostic accuracy study in a malaria-endemic area of Burkina Faso” are included in the current subset of data.

To test the IMS we collected clinical and demographic data from approximately 900 patients aged between 3 months and 100 years presenting with an acute febrile illness. Upon inclusion, 2-5 ml EDTA anticoagulated blood was sampled for haemocytometry, malaria diagnostics (thick- and thin blood films and RDTs) and blood culture. A nasopharyngeal swab and aliquots of residual blood and plasma were stored at -80° for retrospective analyses. 1. A viral panel on nasopharyngeal swabs 2. PCR’s for malaria, Salmonella, S. aureus, H. influenzae, S. pneumoniae on whole blood or plasma samples and 3. C-reactive protein (CRP) and procalcitonin (PCT) levels on plasma samples. Additional diagnostics such as chest X-ray, echography, urinalysis, and culture of urine, stool, pus, or cerebrospinal fluid were performed on clinical indication.

In this cohort we attempted to provide a microbiologically proven diagnosis for all patients admitted with febrile illness using gold standard methods (e.g. blood culture, malaria microscopy and PCR). We then assessed the accuracy of the novel IMS to differentiate causes of infection against these conventional diagnostic methods. We furthermore assessed the accuracy of both CRP and PCT in differentiating causes of infection and compared them to the performance of the IMS.

We found that the IMS had a higher diagnostic accuracy to detect bacteremia than PCT at a cut of value of 0.5 µg/L, and was comparable in sensitivity, but superior in specificity to CRP at a cut of value of 20 mg/L. Subanalysis among patients below the age of five showed that they had a slightly lower accuracy of IMS, PCT and CRP. Combining the IMS and CRP did not significantly improve accuracy due to the high level of overlap between CRP and the IMS. The high negative predictive value of IMS –also in non-bacteremic bacterial infections – suggests that the IMS holds promise to rationalize antimicrobial prescription in healthcare facilities where hematology analyzers are available. The relatively low specificity and PPV demonstrate that it is not (yet) suitable as a diagnostic for bacteremia.

6 HIV || INDIA || Depressive symptoms and their sociodemographic determinants among people living with HIV/AIDS in Bangladesh: a cross-sectional study

https://f1000research.com/articles/11-239

Rating: 3.5/5

Decent data. Note yet peer-reviewed. Would be a fun exercise trying to wrangle the DOCX variable dictionary into a proper machine-readable variable dictionary.

6.1 Data

hiv_depression_india_rabeya <- 
  rio::import(here("data/hiv_depression_india_rabeya.xlsx")) %>% 
  as_tibble()  

table_and_download(hiv_depression_india_rabeya, name = "hiv_depression_india_rabeya")

6.2 Data summary

hiv_depression_india_rabeya_prefixed <- hiv_depression_india_rabeya %>% prefix_col_nums()

inspect_num(hiv_depression_india_rabeya_prefixed[,1:18]) %>% 
  show_plot()

inspect_num(hiv_depression_india_rabeya_prefixed[,19:36]) %>% 
  show_plot()

6.3 Data dictionary

knitr::include_graphics(here("data/hiv_depression_india_rabeya_dictionary.png"))

6.4 Abstract

Background: This study aimed to determine the prevalence of depression and its associated factors among people living with HIV/AIDS in Bangladesh.

Methods: This cross-sectional study, which took place in Dhaka, Bangladesh, from July to December 2020, included 338 HIV-positive people. The method used was a simple random sampling technique. The Beck Depression Inventory assessed depression in HIV-positive people (BDI).

Results: More than 62 percent of the 338 people surveyed had severe depression, 30.5 percent had moderate depression, 5.6 percent had mild depression, and 1.8 percent had no depression at all. Age, being a man, being married, and having a low monthly income were all significant predictors of depression.

Conclusions: This study found that depressive symptoms are highly prevalent among HIV-positive patients in Bangladesh. The authors recommend that health care providers address depressive disorders for people with HIV/ AIDS comprehensively.

7 MALARIA || NIGERIA || Data from: Long-lasting insecticidal net use and asymptomatic malaria parasitaemia among household members of laboratory-confirmed malaria patients attending selected health facilities in Abuja, Nigeria, 2016: a cross-sectional survey

https://zenodo.org/record/4977228

Rating: 3.5/5

Nice, straightforward survey data. Medium sized (602 rows by 40 columns).

7.1 Data

malaria_nigeria_onyiah <-   
  rio::import(here("data/malaria_nigeria_onyiah.xls")) %>%
  as_tibble() 

table_and_download(malaria_nigeria_onyiah, name = "malaria_nigeria_onyiah")

7.2 Data summary

malaria_nigeria_onyiah_prefixed <- 
  malaria_nigeria_onyiah %>% 
  prefix_col_nums() %>% 
  ## convert high-dimensional vars to numbers
  mutate(across(where(~ length(unique(.x)) > 20), ~ as.numeric(as.factor(.x)))) 


inspect_num(malaria_nigeria_onyiah_prefixed) %>%
  show_plot()

inspect_cat(malaria_nigeria_onyiah_prefixed[,1:20]) %>%
  show_plot()

inspect_cat(malaria_nigeria_onyiah_prefixed[,21:40]) %>%
  show_plot()

7.3 Abstract

Introduction In Nigeria, malaria remains a major burden. There is the presupposition that household members could have common exposure to malaria parasite and use of long-lasting insecticidal net (LLIN) could reduce transmission. This study was conducted to identify factors associated with asymptomatic malaria parasitaemia and LLIN use among households of confirmed malaria patients in Abuja, Nigeria.

Methods A cross-sectional survey was conducted from March to August 2016 in twelve health facilities selected from three area councils in Abuja, Nigeria. Participants were selected using multi-stage sampling technique. Overall, we recruited 602 participants from 107 households linked to 107 malaria patients attending the health facilities. Data on LLIN ownership, utilization, and house characteristics were collected using a semi-structured questionnaire. Blood samples of household members were examined for malaria parasitaemia using microscopy. Data were analyzed using descriptive statistics, Chi-square, and logistic regression (α = 0.05).

Results Median age of respondents was 16.5 years (Interquartile range: 23 years); 55.0% were females. Proportions of households that owned and used at least one LLIN were 44.8% and 33.6%, respectively. Parasitaemia was detected in at least one family member of 102 (95.3%) index malaria patients. Prevalence of asymptomatic malaria parasitaemia among study participants was 421/602 (69.9%). No association was found between individual LLIN use and malaria parasitaemia (odds ratio: 0.9, 95% confidence interval (95%CI): 0.6–1.3) among study participants. Having bushes around the homes was associated with having malaria parasitaemia (adjusted OR (aOR): 2.7, 95%CI: 1.7–4.2) and less use of LLIN (aOR: 0.4, 95%CI: 0.2–0.9). Living in Kwali (aOR: 0.1, 95% CI: 0.0–0.2) was associated with less use of LLIN.

Conclusion High prevalence of asymptomatic malaria and low use of LLIN among household members of malaria patients portend the risk of intra-household common source of malaria transmission. We recommend household health education on LLIN use and environmental management. Study to explore the role of preventive treatment of household members of confirmed malaria patient in curbing transmission is suggested. Strategies promoting LLIN use need to be intensified in Kwali.

8 ROAD INJURIES || COLOMBIA || Loss of years of healthy life due to road incidents of motorcyclists in the city of Medellin, 2012 to 2015

https://zenodo.org/record/4836304 Associated paper here: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0256758

Rating: 5/5

Amazing group of datasets! Quite detailed, quite large (e.g. the listing of motorcycle injuries is 80000 rows). Nice for time series analysis.

It is all in Spanish, so may need translation.

8.1 Data

8.1.1 Deaths dataset:

motorcycle_accidents_romero_deaths <- 
  rio::import(here("data/motorcycle_accidents_romero_deaths.xlsx")) %>%
  as_tibble()

table_and_download(motorcycle_accidents_romero_deaths, name = "motorcycle_accidents_romero_deaths")

8.1.2 Injuries from clinic records

The actual dataset is 45,018 rows, but I sample 500 here.

motorcycle_accidents_romero_injuries_clinic <- 
  rio::import(here("data/motorcycle_accidents_romero_injuries_clinic.xlsx")) %>%
  slice_sample(n = 500) %>% 
  as_tibble()

table_and_download(motorcycle_accidents_romero_injuries_clinic, name = "motorcycle_accidents_romero_injuries_clinic")

8.1.3 Injuries from police records

The actual dataset is 87,971 rows, but I sample 500 here.

motorcycle_accidents_romero_injuries_police <- 
  rio::import(here("data/motorcycle_accidents_romero_injuries_police.xlsx")) %>% 
  slice_sample(n = 500) %>% 
  as_tibble()

table_and_download(motorcycle_accidents_romero_injuries_police, name = "motorcycle_accidents_romero_injuries_police")

8.2 Data summary

8.2.1 Deaths dataset

motorcycle_accidents_romero_deaths_prefixed <- 
  motorcycle_accidents_romero_deaths %>% 
  prefix_col_nums() %>% 
  mutate(across(where(~ length(unique(.x)) > 20), ~ as.numeric(as.factor(.x)))) 

inspect_num(motorcycle_accidents_romero_deaths_prefixed) %>%
  show_plot()

inspect_cat(motorcycle_accidents_romero_deaths_prefixed) %>%
  show_plot()

8.2.2 Injuries from clinic records

motorcycle_accidents_romero_injuries_clinic_prefixed <- 
  motorcycle_accidents_romero_injuries_clinic %>% 
  prefix_col_nums() %>% 
  mutate(across(where(~ length(unique(.x)) > 20), ~ as.numeric(as.factor(.x)))) 

inspect_num(motorcycle_accidents_romero_injuries_clinic_prefixed) %>%
  show_plot()

inspect_cat(motorcycle_accidents_romero_injuries_clinic_prefixed) %>%
  show_plot()

8.2.3 Injuries from police records

motorcycle_accidents_romero_injuries_police_prefixed <- 
  motorcycle_accidents_romero_injuries_police %>% 
  prefix_col_nums() %>% 
  mutate(across(where(~ length(unique(.x)) > 20), ~ as.numeric(as.factor(.x)))) 

inspect_num(motorcycle_accidents_romero_injuries_police_prefixed) %>%
  show_plot()

inspect_cat(motorcycle_accidents_romero_injuries_police_prefixed) %>%
  show_plot()

8.3 Data dictionary

Please see Zenodo record for the data dictionary: https://zenodo.org/record/4836304

8.4 Abstract

Objective

Determine the loss of years of healthy life due to road incidents of motorcyclists in the city of Medellin from 2012 to 2015.

Methods

Descriptive study with data on health care of injured motorcyclists and deaths adjusted with the Preston and Coale method, and OPS proportional distribution for the period 2012–2015. The years of life lost due to premature death (YLLs), years lived with disability (YLDs), and the disability-adjusted life years (DALYs) were calculated according to the new methodology designed for that purpose.

Results

The loss of years of healthy life due to road incidents of motorcyclists in the four-year period was 80,046 DALYs (823.8 per 100,000 inhabitants), with a higher proportion in men (81.3% and a ratio of 5 to 1 compared to women); the YLDs was 66.6% with marked differences in favor of men. There was nearly a 38% difference in the ages of 15 to 19 as well as a 19% difference from 30 to 49, compared to women. Premature death (YLLs) contributed to 33.4% of DALYs, with significant presentation in the above-mentioned age groups.

Conclusions

The greatest loss of years of healthy life due to road incidents of motorcyclists in Medellin was due to non-fatal injuries and was concentrated in young men. If the trend of motorcycle road incidents continues, both local and national road safety plans will fail to accomplish the expected results, especially among motorcycle users.

9 DIET DIVERSITY || VIETNAM || Retail Diversity for Diet Diversity - Dietary Intake Data

https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/NRNCX0

Rating: 4/5

Survey dataset. Quite unique. Lots of mutate and summarize operations will be needed to make meaning from the data. Good practice for wrangling.

Could be used in parallel with the Retail Outlet Census data and Shopping Practices Household Survey data from the same organization.

9.1 Data

diet_diversity_vietnam_wertheim_heck <- 
  rio::import(here("data/diet_diversity_vietnam_wertheim-heck.xlsx")) %>%
  as_tibble() 

table_and_download(diet_diversity_vietnam_wertheim_heck, name = "diet_diversity_vietnam_wertheim_heck")

9.2 Data summary

diet_diversity_vietnam_wertheim_heck %>%
  type_convert() %>% 
  mutate(across(where(~ length(unique(.x)) < 15), as.factor)) %>% 
  desctable(stats = stats_auto) %>%
  datatable()

9.3 Data dictionary

diet_diversity_vietnam_wertheim_heck_dictionary <- 
  rio::import(here("data/diet_diversity_vietnam_wertheim-heck.xlsx"), sheet = 2, skip = 3) %>% 
  select(1:3) %>% 
  
  gt::gt()

9.4 Abstract

(This is not an abstract. Found it on their page here: https://www.wur.nl/en/project/Retail-Diversity-for-Dietary-Diversity-RD4DD.htm)

Nutrition insecurity among a growing number of urban poor in modernizing Southeast Asian metropolises is a critical issue. Serving to enlarge the capacity of local authorities in planning and implementing all-inclusive food-safe and nutrition-sensitive food retailing infrastructures, our proposed research seeks to answer the question ‘why do the urban poor eat the food they do’, in the context of transformations in the food retail environment and the organization of daily life. We want to understand in what way progressing retail modernization and restructuration policies impact the diet diversity and quality of the urban poor that depend on daily food shopping (purchasing foods on a day-to-day basis) often due to irregular and fluctuating daily income levels due to the nature of employment.

Serving as a case in point for similar developments in SEA, our research focuses on Hanoi, the capital of lower-middle income country (LMIC) Vietnam, listed among the world’s fastest growing economies. Our research specifically focuses on women, since nutrient deficiencies are particularly prevalent among women of reproductive age. Women are often the primary decision maker and mostly responsible for food purchases, meal preparation and household food allocation. They are thus key-actors in understanding and addressing nutrition vulnerability.

Households were randomly selected from the field sites, where women were asked to recall all the foods and drinks they consumed the previous day, and specifying where those foods were sourced from. An adapted quantiative 24hour recall methodology was applied

10 DIABETES || CHINA || Association of body mass index and age with incident diabetes in Chinese adults: a population-based cohort study

https://zenodo.org/record/4997196

Rating: 3.5/5

Lots of records! (Over 200,000 rows, but here I only sample 500.) Would be great for linear regression, perhaps survival analysis.

10.1 Data

diabetes_china_chen <- 
  rio::import(here("data/diabetes_china_chen.xlsx")) %>%
  slice_sample(n = 500) %>% 
  as_tibble() 

table_and_download(diabetes_china_chen, name = "diabetes_china_chen")

10.2 Data summary

diabetes_china_chen_prefixed <- diabetes_china_chen %>% prefix_col_nums()

inspect_num(diabetes_china_chen_prefixed[,1:12]) %>%
  show_plot()

inspect_num(diabetes_china_chen_prefixed[,13:25]) %>%
  show_plot()

10.3 Abstract

Objective Type 2 diabetes mellitus is increasing in young adults, and greater adiposity is considered a major risk factor. However, whether there is an association between obesity and diabetes and how this might be impacted by age is not clear. Therefore, we investigated the association between body mass index (BMI) and diabetes across a wide range of age groups (20–30, 30–40, 40–50, 50–60, 60–70 and ≥70 years old).

Design We performed a retrospective cohort study using healthy screening programme data.

Setting A total of 211 833 adult Chinese persons >20 years old across 32 sites and 11 cities in China (Shanghai, Beijing, Nanjing, Suzhou, Shenzhen, Changzhou, Chengdu, Guangzhou, Hefei, Wuhan, Nantong) were selected for the study; these persons were free of diabetes at baseline.

Primary and secondary outcome measures Fasting plasma glucose levels were measured and information regarding the history of diabetes was collected at each visit. Diabetes was diagnosed as fasting plasma glucose ≥7.00 mmol/L and/or self-reported diabetes. Patients were censored at the date of diagnosis or the final visit, whichever came first.

Results With a median follow-up of 3.1 years, 4174 of the 211 833 participants developed diabetes, with an age-adjusted incidence rate of 7.35 per 1000 persons. The risk of incident diabetes increased proportionally with increasing baseline BMI values, with a 23% increased risk of incident diabetes with each kg/m2 increase in BMI (95% CI 1.22 to 1.24). Across all age groups, there was a linear association between BMI and the risk of incident diabetes, although there was a stronger association between BMI and incident diabetes in the younger age groups (age×BMI interaction, p<0.0001).

Conclusions An increased BMI is also independently associated with a higher risk of developing diabetes in young adults and the effects of BMI on incident diabetes were accentuated in younger adults.