library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'tibble' was built under R version 4.5.3
## Warning: package 'tidyr' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'purrr' was built under R version 4.5.3
## Warning: package 'dplyr' was built under R version 4.5.3
## Warning: package 'stringr' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(stringr)

Neonatal Mortality

Planning

  • The data sets needs to have variables that specifies the country and the year

  • It also needs to have a variable estimate for NMR rate of that country

Simulate

Preamble

  • Purpose: Obtain and prepare data about neonatal mortality for four countries for the past fifty years and create a graph.
  • Author:Khalifa Mohamed
  • Email:
  • Date:13.05.2026
  • Prerequisites:-

Work space setup

packageVersion("tidyverse")
## [1] '2.0.0'
packageVersion("janitor")
## [1] '2.2.1'

Simulate Data

set.seed(853)
number_of_years <- 50 
simulated_nmr_data <- tibble(
  country = c(rep("Argentina",number_of_years),rep("Australi",number_of_years),rep("Canada",number_of_years),rep("Kenya",number_of_years)),
  year = rep(c(1:number_of_years+1970),4),
  nmr = runif(n=number_of_years*4,min = 0,max = 100))

head(simulated_nmr_data)
## # A tibble: 6 × 3
##   country    year   nmr
##   <chr>     <dbl> <dbl>
## 1 Argentina  1971 35.9 
## 2 Argentina  1972 12.0 
## 3 Argentina  1973 48.4 
## 4 Argentina  1974 31.6 
## 5 Argentina  1975  3.74
## 6 Argentina  1976 40.4
# Testing our data 

simulated_nmr_data$country %>% 
unique() == c("Argentina", "Australi", "Canada", "Kenya")
## [1] TRUE TRUE TRUE TRUE
simulated_nmr_data$year |> min() == 1971
## [1] TRUE
simulated_nmr_data$year |> max() == 2020
## [1] TRUE
simulated_nmr_data$nmr |> min() >= 0
## [1] TRUE
simulated_nmr_data$nmr |> max() <= 1000
## [1] TRUE
simulated_nmr_data$nmr |> class() == "numeric"
## [1] TRUE

Acquire the data

raw_igme_data <- read.csv("data/UNIGME-2021.csv")

Preserve the csv

write.csv(x= raw_igme_data,file = "igme.csv")
names(raw_igme_data)
##  [1] "Geographic.area"        "Indicator"              "Sex"                   
##  [4] "Wealth.Quintile"        "Series.Name"            "Series.Year"           
##  [7] "Regional.group"         "TIME_PERIOD"            "OBS_VALUE"             
## [10] "COUNTRY_NOTES"          "CONNECTION"             "DEATH_CATEGORY"        
## [13] "CATEGORY"               "Observation.Status"     "Unit.of.measure"       
## [16] "Series.Category"        "Series.Type"            "STD_ERR"               
## [19] "REF_DATE"               "Age.Group.of.Women"     "Time.Since.First.Birth"
## [22] "DEFINITION"             "INTERVAL"               "Series.Method"         
## [25] "LOWER_BOUND"            "UPPER_BOUND"            "STATUS"                
## [28] "YEAR_TO_ACHIEVE"        "Model.Used"
dim(raw_igme_data)
## [1] 539365     29
tail(raw_igme_data)
##        Geographic.area         Indicator   Sex Wealth.Quintile      Series.Name
## 539360        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539361        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539362        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539363        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539364        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539365        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
##        Series.Year Regional.group TIME_PERIOD OBS_VALUE COUNTRY_NOTES
## 539360        2021                    2015-06      2229              
## 539361        2021                    2016-06      2094              
## 539362        2021                    2017-06      1997              
## 539363        2021                    2018-06      1921              
## 539364        2021                    2019-06      1858              
## 539365        2021                    2020-06      1794              
##        CONNECTION DEATH_CATEGORY CATEGORY Observation.Status  Unit.of.measure
## 539360                        NA                Normal value Number of deaths
## 539361                        NA                Normal value Number of deaths
## 539362                        NA                Normal value Number of deaths
## 539363                        NA                Normal value Number of deaths
## 539364                        NA                Normal value Number of deaths
## 539365                        NA                Normal value Number of deaths
##        Series.Category Series.Type STD_ERR REF_DATE Age.Group.of.Women
## 539360                                  NA   2015.5                   
## 539361                                  NA   2016.5                   
## 539362                                  NA   2017.5                   
## 539363                                  NA   2018.5                   
## 539364                                  NA   2019.5                   
## 539365                                  NA   2020.5                   
##        Time.Since.First.Birth DEFINITION INTERVAL Series.Method LOWER_BOUND
## 539360                                          1                      1650
## 539361                                          1                      1368
## 539362                                          1                      1097
## 539363                                          1                       877
## 539364                                          1                       716
## 539365                                          1                       592
##        UPPER_BOUND STATUS YEAR_TO_ACHIEVE Model.Used
## 539360        2959                                  
## 539361        2957                                  
## 539362        2974                                  
## 539363        2988                                  
## 539364        3002                                  
## 539365        2991
cleaned_igme_data <-
  clean_names(raw_igme_data) |>
  filter(
    sex == "Total",
    series_name == "UN IGME estimate",
    geographic_area %in%
      c("Argentina", "Australia", "Canada", "Kenya"),
    indicator == "Neonatal mortality rate"
  ) |>
  select(
    geographic_area,
    time_period,
    obs_value
  )

head(cleaned_igme_data)
##   geographic_area time_period obs_value
## 1       Argentina     1970-06  24.85574
## 2       Argentina     1971-06  24.74142
## 3       Argentina     1972-06  24.63325
## 4       Argentina     1973-06  24.57691
## 5       Argentina     1974-06  24.45925
## 6       Argentina     1975-06  24.07021
cleaned_igme_data <-
cleaned_igme_data |>
mutate(
time_period = str_remove(time_period, "-06"),
time_period = as.integer(time_period)
) |>
filter(time_period >= 1971) |>
rename(nmr = obs_value, year = time_period, country = geographic_area)
head(cleaned_igme_data)
##     country year      nmr
## 1 Argentina 1971 24.74142
## 2 Argentina 1972 24.63325
## 3 Argentina 1973 24.57691
## 4 Argentina 1974 24.45925
## 5 Argentina 1975 24.07021
## 6 Argentina 1976 23.33600
write_csv(x = cleaned_igme_data, file = "cleaned_igme_data.csv")

Explore

cleaned_igme_data <-
read_csv(
file = "cleaned_igme_data.csv",
show_col_types = FALSE
)
cleaned_igme_data |>
ggplot(aes(x = year, y = nmr, color = country)) +
  geom_point() +
theme_minimal() +
labs(x = "Year", y = "Neonatal MortalityRate(NMR)", color = "Country") +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "bottom")

Concluding Remarks