library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.5.3

## Warning: package 'tibble' was built under R version 4.5.3

## Warning: package 'tidyr' was built under R version 4.5.3

## Warning: package 'readr' was built under R version 4.5.3

## Warning: package 'purrr' was built under R version 4.5.3

## Warning: package 'dplyr' was built under R version 4.5.3

## Warning: package 'stringr' was built under R version 4.5.3

## Warning: package 'lubridate' was built under R version 4.5.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(stringr)

Neonatal Mortality

Refers to the death that occurs around the first month of Life (NMR - Neonatal mortality rate)
So here we will create a graph for the estimate NMR for the past 50 years for Argentina , Australia, Canada and Kenya

Planning

The data sets needs to have variables that specifies the country and the year
It also needs to have a variable estimate for NMR rate of that country

Simulate

Preamble

Purpose: Obtain and prepare data about neonatal mortality for four countries for the past fifty years and create a graph.
Author:Khalifa Mohamed
Email:caliphmoha@gmail.com
Date:13.05.2026
Prerequisites:-

Work space setup

packageVersion("tidyverse")

## [1] '2.0.0'

packageVersion("janitor")

## [1] '2.2.1'

Simulate Data

set.seed(853)

number_of_years <- 50 
simulated_nmr_data <- tibble(
  country = c(rep("Argentina",number_of_years),rep("Australi",number_of_years),rep("Canada",number_of_years),rep("Kenya",number_of_years)),
  year = rep(c(1:number_of_years+1970),4),
  nmr = runif(n=number_of_years*4,min = 0,max = 100))

head(simulated_nmr_data)

## # A tibble: 6 × 3
##   country    year   nmr
##   <chr>     <dbl> <dbl>
## 1 Argentina  1971 35.9 
## 2 Argentina  1972 12.0 
## 3 Argentina  1973 48.4 
## 4 Argentina  1974 31.6 
## 5 Argentina  1975  3.74
## 6 Argentina  1976 40.4

# Testing our data 

simulated_nmr_data$country %>% 
unique() == c("Argentina", "Australi", "Canada", "Kenya")

## [1] TRUE TRUE TRUE TRUE

simulated_nmr_data$year |> min() == 1971

## [1] TRUE

simulated_nmr_data$year |> max() == 2020

## [1] TRUE

simulated_nmr_data$nmr |> min() >= 0

## [1] TRUE

simulated_nmr_data$nmr |> max() <= 1000

## [1] TRUE

simulated_nmr_data$nmr |> class() == "numeric"

## [1] TRUE

Acquire the data

raw_igme_data <- read.csv("data/UNIGME-2021.csv")

Preserve the csv

write.csv(x= raw_igme_data,file = "igme.csv")

names(raw_igme_data)

##  [1] "Geographic.area"        "Indicator"              "Sex"                   
##  [4] "Wealth.Quintile"        "Series.Name"            "Series.Year"           
##  [7] "Regional.group"         "TIME_PERIOD"            "OBS_VALUE"             
## [10] "COUNTRY_NOTES"          "CONNECTION"             "DEATH_CATEGORY"        
## [13] "CATEGORY"               "Observation.Status"     "Unit.of.measure"       
## [16] "Series.Category"        "Series.Type"            "STD_ERR"               
## [19] "REF_DATE"               "Age.Group.of.Women"     "Time.Since.First.Birth"
## [22] "DEFINITION"             "INTERVAL"               "Series.Method"         
## [25] "LOWER_BOUND"            "UPPER_BOUND"            "STATUS"                
## [28] "YEAR_TO_ACHIEVE"        "Model.Used"

dim(raw_igme_data)

## [1] 539365     29

tail(raw_igme_data)

##        Geographic.area         Indicator   Sex Wealth.Quintile      Series.Name
## 539360        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539361        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539362        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539363        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539364        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
## 539365        Zimbabwe Deaths age 5 to 9 Total           Total UN IGME estimate
##        Series.Year Regional.group TIME_PERIOD OBS_VALUE COUNTRY_NOTES
## 539360        2021                    2015-06      2229              
## 539361        2021                    2016-06      2094              
## 539362        2021                    2017-06      1997              
## 539363        2021                    2018-06      1921              
## 539364        2021                    2019-06      1858              
## 539365        2021                    2020-06      1794              
##        CONNECTION DEATH_CATEGORY CATEGORY Observation.Status  Unit.of.measure
## 539360                        NA                Normal value Number of deaths
## 539361                        NA                Normal value Number of deaths
## 539362                        NA                Normal value Number of deaths
## 539363                        NA                Normal value Number of deaths
## 539364                        NA                Normal value Number of deaths
## 539365                        NA                Normal value Number of deaths
##        Series.Category Series.Type STD_ERR REF_DATE Age.Group.of.Women
## 539360                                  NA   2015.5                   
## 539361                                  NA   2016.5                   
## 539362                                  NA   2017.5                   
## 539363                                  NA   2018.5                   
## 539364                                  NA   2019.5                   
## 539365                                  NA   2020.5                   
##        Time.Since.First.Birth DEFINITION INTERVAL Series.Method LOWER_BOUND
## 539360                                          1                      1650
## 539361                                          1                      1368
## 539362                                          1                      1097
## 539363                                          1                       877
## 539364                                          1                       716
## 539365                                          1                       592
##        UPPER_BOUND STATUS YEAR_TO_ACHIEVE Model.Used
## 539360        2959                                  
## 539361        2957                                  
## 539362        2974                                  
## 539363        2988                                  
## 539364        3002                                  
## 539365        2991

cleaned_igme_data <-
  clean_names(raw_igme_data) |>
  filter(
    sex == "Total",
    series_name == "UN IGME estimate",
    geographic_area %in%
      c("Argentina", "Australia", "Canada", "Kenya"),
    indicator == "Neonatal mortality rate"
  ) |>
  select(
    geographic_area,
    time_period,
    obs_value
  )

head(cleaned_igme_data)

##   geographic_area time_period obs_value
## 1       Argentina     1970-06  24.85574
## 2       Argentina     1971-06  24.74142
## 3       Argentina     1972-06  24.63325
## 4       Argentina     1973-06  24.57691
## 5       Argentina     1974-06  24.45925
## 6       Argentina     1975-06  24.07021

cleaned_igme_data <-
cleaned_igme_data |>
mutate(
time_period = str_remove(time_period, "-06"),
time_period = as.integer(time_period)
) |>
filter(time_period >= 1971) |>
rename(nmr = obs_value, year = time_period, country = geographic_area)
head(cleaned_igme_data)

##     country year      nmr
## 1 Argentina 1971 24.74142
## 2 Argentina 1972 24.63325
## 3 Argentina 1973 24.57691
## 4 Argentina 1974 24.45925
## 5 Argentina 1975 24.07021
## 6 Argentina 1976 23.33600

write_csv(x = cleaned_igme_data, file = "cleaned_igme_data.csv")

Explore

cleaned_igme_data <-
read_csv(
file = "cleaned_igme_data.csv",
show_col_types = FALSE
)

cleaned_igme_data |>
ggplot(aes(x = year, y = nmr, color = country)) +
  geom_point() +
theme_minimal() +
labs(x = "Year", y = "Neonatal MortalityRate(NMR)", color = "Country") +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "bottom")

Concluding Remarks

Neonatal mortality refers to a death that occurs within the first month of life.In particular,the neonatal mortality rate(NMR) is the number of neonatal deaths per 1,000 live births .We obtain estimates for NMR for four countries—Argentina,Australia,Canada,and Kenya—over the past 50 years.
The UN Inter-agency Group for Child Mortality Estimation(IGME) provides estimates of the NMR at the website: https:// child mortality.org/. We down-loaded their estimates then cleaned and tidied the data set using the statistical programming language R(Core Team 2023).
We found considerable change in the estimated NMR overtime and between the four countries of interest(Figure 2.8). We found that the 1970s tended to be associated with reductions in the estimated NMR. Australia and Canada were estimated to have a low NMR at that point and remained there through 2020, with further slight reductions.The estimates for Argentina and Kenya continued to have substantial reductions through 2020.
Results suggest considerable improvements in estimated NMR overtime. NMR estimates are based on a statistical model and underlying data.The double burden of data is that of ten high-quality data are less easily available for groups,in this case countries, with worse outcomes.Our conclusions are subject to the model that underpins the estimates and the quality of the underlying data,and we did not independently verify either of these.

Portfolio_1

Khalifa Mohamed

Neonatal Mortality

Planning

Simulate

Preamble

Work space setup

Simulate Data

Acquire the data

Preserve the csv

Explore

Concluding Remarks