## Load libraries

suppressPackageStartupMessages(library(readr))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
## Conflicts with tidy packages ----------------------------------------------
options(digits = 2)

## Import files

gbd2013 <- read_csv("~/Downloads/download (25).csv", )
## Parsed with column specification:
## cols(
##   `Location` = col_character(),
##   Year = col_character(),
##   Age = col_character(),
##   Sex = col_character(),
##   `Cause of death or injury` = col_character(),
##   `Risk factor` = col_character(),
##   Measure = col_character(),
##   Value = col_double(),
##   `Lower bound` = col_double(),
##   `Upper bound` = col_double()
## )
## Warning: 141 parsing failures.
## row col   expected    actual
##   1  -- 10 columns 8 columns
##   3  -- 10 columns 8 columns
##   5  -- 10 columns 8 columns
##   6  -- 10 columns 8 columns
##   8  -- 10 columns 8 columns
## ... ... .......... .........
## See problems(...) for more details.
gbd2015 <- read_csv("~/Downloads/download (26).csv")
## Parsed with column specification:
## cols(
##   `Location` = col_character(),
##   Year = col_character(),
##   Age = col_character(),
##   Sex = col_character(),
##   `Cause of death or injury` = col_character(),
##   `Risk factor` = col_character(),
##   Measure = col_character(),
##   Value = col_double(),
##   `Lower bound` = col_double(),
##   `Upper bound` = col_double()
## )
## Warning: 144 parsing failures.
## row col   expected    actual
##   1  -- 10 columns 8 columns
##   3  -- 10 columns 8 columns
##   5  -- 10 columns 8 columns
##   6  -- 10 columns 8 columns
##   8  -- 10 columns 8 columns
## ... ... .......... .........
## See problems(...) for more details.
## Add dataset labels
gbd2015 <- gbd2015 %>%
  mutate(dataset = "gbd2015")

gbd2013 <- gbd2013 %>%
  mutate(dataset = "gbd2013")

## Create single dataset
gbdsum <- bind_rows(gbd2013, gbd2015)


## Tidy variable names
gbdsum <- gbdsum %>% 
  janitor::clean_names() 


## Compare risk factor values from GBD 2013 and GBD 2015
gbdsum %>%
  select(year, risk_factor, measure, value, dataset) %>%
  filter((year == "2013" | year == "2015") & measure != "DALY rank") %>%
  select(-year) %>%
  spread(dataset, value) %>%
  mutate(diff = gbd2015 - gbd2013, 
         abs_diff = abs(diff),
         ratio = gbd2015/gbd2013) %>%
  arrange(-abs_diff) %>%
  knitr::kable()
risk_factor measure gbd2013 gbd2015 diff abs_diff ratio
High body-mass index DALYs per 100,000 2435.34 1530.19 -905.15 905.15 0.63
Impaired kidney function DALYs per 100,000 872.02 476.67 -395.35 395.35 0.55
Alcohol use DALYs per 100,000 1032.83 656.45 -376.38 376.38 0.64
Diet high in processed meat DALYs per 100,000 489.46 188.89 -300.57 300.57 0.39
High fasting plasma glucose DALYs per 100,000 1276.28 984.64 -291.63 291.63 0.77
Diet low in fiber DALYs per 100,000 362.16 117.79 -244.37 244.37 0.33
Low bone mineral density DALYs per 100,000 399.20 170.42 -228.78 228.78 0.43
High total cholesterol DALYs per 100,000 934.90 1093.48 158.58 158.58 1.17
Diet low in seafood omega-3 fatty acids DALYs per 100,000 134.70 273.41 138.70 138.70 2.03
Ambient particulate matter pollution DALYs per 100,000 420.05 556.07 136.02 136.02 1.32
Occupational carcinogens DALYs per 100,000 259.36 393.95 134.60 134.60 1.52
Diet low in whole grains DALYs per 100,000 419.81 552.63 132.82 132.82 1.32
Diet low in fruits DALYs per 100,000 691.67 561.43 -130.23 130.23 0.81
Low physical activity DALYs per 100,000 738.75 612.15 -126.60 126.60 0.83
Occupational injuries DALYs per 100,000 146.91 63.76 -83.15 83.15 0.43
Diet low in polyunsaturated fatty acids DALYs per 100,000 166.60 86.35 -80.25 80.25 0.52
Diet high in sodium DALYs per 100,000 468.72 389.55 -79.18 79.18 0.83
Drug use DALYs per 100,000 495.13 563.98 68.86 68.86 1.14
Iron deficiency DALYs per 100,000 276.11 342.94 66.83 66.83 1.24
Diet high in red meat DALYs per 100,000 90.42 31.44 -58.98 58.98 0.35
Lead exposure DALYs per 100,000 29.82 73.83 44.01 44.01 2.48
Diet high in sugar-sweetened beverages DALYs per 100,000 61.69 20.15 -41.53 41.53 0.33
Secondhand smoke DALYs per 100,000 34.25 74.61 40.36 40.36 2.18
Residential radon DALYs per 100,000 52.17 12.24 -39.92 39.92 0.23
High systolic blood pressure DALYs per 100,000 1998.22 1958.73 -39.49 39.49 0.98
Diet low in nuts and seeds DALYs per 100,000 426.52 465.95 39.43 39.43 1.09
Diet high in trans fatty acids DALYs per 100,000 77.88 113.81 35.93 35.93 1.46
No handwashing with soap DALYs per 100,000 12.93 47.04 34.11 34.11 3.64
Childhood sexual abuse DALYs per 100,000 65.21 95.28 30.08 30.08 1.46
Occupational ergonomic factors DALYs per 100,000 230.21 203.06 -27.15 27.15 0.88
Occupational noise DALYs per 100,000 30.59 50.09 19.50 19.50 1.64
Diet suboptimal in calcium DALYs per 100,000 80.03 64.12 -15.91 15.91 0.80
Unsafe sex DALYs per 100,000 76.59 86.23 9.64 9.64 1.13
Ambient ozone pollution DALYs per 100,000 31.58 23.17 -8.41 8.41 0.73
Smoking DALYs per 100,000 2689.60 2695.47 5.87 5.87 1.00
Diet low in milk DALYs per 100,000 50.82 56.15 5.34 5.34 1.11
Occupational asthmagens DALYs per 100,000 22.61 27.02 4.41 4.41 1.19
Occupational particulate matter, gases, and fumes DALYs per 100,000 34.38 31.64 -2.75 2.75 0.92
Diet low in vegetables DALYs per 100,000 468.40 466.28 -2.12 2.12 1.00
Suboptimal breastfeeding DALYs per 100,000 10.84 9.42 -1.42 1.42 0.87
Intimate partner violence DALYs per 100,000 91.58 91.70 0.12 0.12 1.00
Childhood undernutrition DALYs per 100,000 0.17 0.07 -0.10 0.10 0.43
Vitamin A deficiency DALYs per 100,000 0.04 0.02 -0.02 0.02 0.44
Zinc deficiency DALYs per 100,000 0.21 0.22 0.01 0.01 1.05
Household air pollution from solid fuels DALYs per 100,000 0.00 0.00 0.00 0.00 NaN
Unsafe sanitation DALYs per 100,000 0.00 0.00 0.00 0.00 NaN
Unsafe water source DALYs per 100,000 0.00 0.00 0.00 0.00 NaN
## Compare 1990 risk factor values from GBD 2013 and GBD 2015 and plot differences

gbdsum %>% 
  filter(year == "1990" & measure != "DALY rank") %>%
  select(risk_factor, measure, value, dataset) %>%
  spread(dataset, value) %>%
  mutate(diff = gbd2015 - gbd2013, 
         abs_diff = abs(diff),
         ratio = gbd2015/gbd2013) %>%
  ggplot(aes(reorder(risk_factor, diff), diff)) +
  geom_point(aes(colour = gbd2013, size  = gbd2013)) +
  geom_hline(yintercept = 0) +
  coord_flip() +
  labs(x = "", 
       y="Difference between 2015 and 2013 based 1990 DALY estimates", 
       title = "Changes in abolsute DALYs per 100,000 for risk factors between \nGBD 2013 and GBD 2015 for England", 
       caption = "Source: 2015 - https://vizhub.healthdata.org/gbd-compare/; \n2013 - https://vizhub.healthdata.org/gbd-compare/england/")

ggsave("gbd_comparison.pdf", height = 10, width = 8)