library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'tibble' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'dplyr' was built under R version 4.5.3
## Warning: package 'stringr' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(stringr)
The data sets needs to have variables that specifies the country and the year
It also needs to have a variable estimate for NMR rate of that country
packageVersion("tidyverse")
## [1] '2.0.0'
packageVersion("janitor")
## [1] '2.2.1'
set.seed(853)
number_of_years <- 50
simulated_nmr_data <- tibble(
country = c(rep("Argentina",number_of_years),rep("Australi",number_of_years),rep("Canada",number_of_years),rep("Kenya",number_of_years)),
year = rep(c(1:number_of_years+1970),4),
nmr = runif(n=number_of_years*4,min = 0,max = 100))
head(simulated_nmr_data)
## # A tibble: 6 × 3
## country year nmr
## <chr> <dbl> <dbl>
## 1 Argentina 1971 35.9
## 2 Argentina 1972 12.0
## 3 Argentina 1973 48.4
## 4 Argentina 1974 31.6
## 5 Argentina 1975 3.74
## 6 Argentina 1976 40.4
# Testing our data
simulated_nmr_data$country %>%
unique() == c("Argentina", "Australi", "Canada", "Kenya")
## [1] TRUE TRUE TRUE TRUE
simulated_nmr_data$year |> min() == 1971
## [1] TRUE
simulated_nmr_data$year |> max() == 2020
## [1] TRUE
simulated_nmr_data$nmr |> min() >= 0
## [1] TRUE
simulated_nmr_data$nmr |> max() <= 1000
## [1] TRUE
simulated_nmr_data$nmr |> class() == "numeric"
## [1] TRUE
raw_igme_data <- read.csv("data/UNIGME-2021.csv")
write.csv(x= raw_igme_data,file = "igme.csv")
names(raw_igme_data)
## [1] "Geographic.area" "Indicator" "Sex"
## [4] "Wealth.Quintile" "Series.Name" "Series.Year"
## [7] "Regional.group" "TIME_PERIOD" "OBS_VALUE"
## [10] "COUNTRY_NOTES" "CONNECTION" "DEATH_CATEGORY"
## [13] "CATEGORY" "Observation.Status" "Unit.of.measure"
## [16] "Series.Category" "Series.Type" "STD_ERR"
## [19] "REF_DATE" "Age.Group.of.Women" "Time.Since.First.Birth"
## [22] "DEFINITION" "INTERVAL" "Series.Method"
## [25] "LOWER_BOUND" "UPPER_BOUND" "STATUS"
## [28] "YEAR_TO_ACHIEVE" "Model.Used"
dim(raw_igme_data)
## [1] 539365 29
tail(raw_igme_data)
## Geographic.area Indicator Sex Wealth.Quintile Series.Name
## 539360 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## 539361 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## 539362 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## 539363 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## 539364 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## 539365 Zimbabwe Deaths age 5 to 9 Total Total UN IGME estimate
## Series.Year Regional.group TIME_PERIOD OBS_VALUE COUNTRY_NOTES
## 539360 2021 2015-06 2229
## 539361 2021 2016-06 2094
## 539362 2021 2017-06 1997
## 539363 2021 2018-06 1921
## 539364 2021 2019-06 1858
## 539365 2021 2020-06 1794
## CONNECTION DEATH_CATEGORY CATEGORY Observation.Status Unit.of.measure
## 539360 NA Normal value Number of deaths
## 539361 NA Normal value Number of deaths
## 539362 NA Normal value Number of deaths
## 539363 NA Normal value Number of deaths
## 539364 NA Normal value Number of deaths
## 539365 NA Normal value Number of deaths
## Series.Category Series.Type STD_ERR REF_DATE Age.Group.of.Women
## 539360 NA 2015.5
## 539361 NA 2016.5
## 539362 NA 2017.5
## 539363 NA 2018.5
## 539364 NA 2019.5
## 539365 NA 2020.5
## Time.Since.First.Birth DEFINITION INTERVAL Series.Method LOWER_BOUND
## 539360 1 1650
## 539361 1 1368
## 539362 1 1097
## 539363 1 877
## 539364 1 716
## 539365 1 592
## UPPER_BOUND STATUS YEAR_TO_ACHIEVE Model.Used
## 539360 2959
## 539361 2957
## 539362 2974
## 539363 2988
## 539364 3002
## 539365 2991
cleaned_igme_data <-
clean_names(raw_igme_data) |>
filter(
sex == "Total",
series_name == "UN IGME estimate",
geographic_area %in%
c("Argentina", "Australia", "Canada", "Kenya"),
indicator == "Neonatal mortality rate"
) |>
select(
geographic_area,
time_period,
obs_value
)
head(cleaned_igme_data)
## geographic_area time_period obs_value
## 1 Argentina 1970-06 24.85574
## 2 Argentina 1971-06 24.74142
## 3 Argentina 1972-06 24.63325
## 4 Argentina 1973-06 24.57691
## 5 Argentina 1974-06 24.45925
## 6 Argentina 1975-06 24.07021
cleaned_igme_data <-
cleaned_igme_data |>
mutate(
time_period = str_remove(time_period, "-06"),
time_period = as.integer(time_period)
) |>
filter(time_period >= 1971) |>
rename(nmr = obs_value, year = time_period, country = geographic_area)
head(cleaned_igme_data)
## country year nmr
## 1 Argentina 1971 24.74142
## 2 Argentina 1972 24.63325
## 3 Argentina 1973 24.57691
## 4 Argentina 1974 24.45925
## 5 Argentina 1975 24.07021
## 6 Argentina 1976 23.33600
write_csv(x = cleaned_igme_data, file = "cleaned_igme_data.csv")
cleaned_igme_data <-
read_csv(
file = "cleaned_igme_data.csv",
show_col_types = FALSE
)
cleaned_igme_data |>
ggplot(aes(x = year, y = nmr, color = country)) +
geom_point() +
theme_minimal() +
labs(x = "Year", y = "Neonatal MortalityRate(NMR)", color = "Country") +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "bottom")
Neonatal mortality refers to a death that occurs within the first month of life.In particular,the neonatal mortality rate(NMR) is the number of neonatal deaths per 1,000 live births .We obtain estimates for NMR for four countries—Argentina,Australia,Canada,and Kenya—over the past 50 years.
The UN Inter-agency Group for Child Mortality Estimation(IGME) provides estimates of the NMR at the website: https:// child mortality.org/. We down-loaded their estimates then cleaned and tidied the data set using the statistical programming language R(Core Team 2023).
We found considerable change in the estimated NMR overtime and between the four countries of interest(Figure 2.8). We found that the 1970s tended to be associated with reductions in the estimated NMR. Australia and Canada were estimated to have a low NMR at that point and remained there through 2020, with further slight reductions.The estimates for Argentina and Kenya continued to have substantial reductions through 2020.
Results suggest considerable improvements in estimated NMR overtime. NMR estimates are based on a statistical model and underlying data.The double burden of data is that of ten high-quality data are less easily available for groups,in this case countries, with worse outcomes.Our conclusions are subject to the model that underpins the estimates and the quality of the underlying data,and we did not independently verify either of these.