# Load all required libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggplot2)
library(lme4)
## Warning: package 'lme4' was built under R version 4.4.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(readr)
library(dplyr)
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE

2.Neonatal and Under 5 Years Old Mortality Rate

The analysis of under-five and neonatal mortality rates in East African Community (EAC) countries revealed that Somalia had the highest under-five mortality rate at 104.02 deaths per 1,000 live births, while South Sudan had the highest neonatal mortality rate at 40.24 deaths per 1,000 live births (2023 data). Choropleth maps and trend plots revealed regional declines, although Somalia and South Sudan continue to face substantial health-care system issues. To align with the shapefiles, data merging required the standardization of country names. The findings indicate critical disparities in healthcare in the EAC, emphasizing the importance of targeted efforts to reduce child mortality in the most vulnerable nations.

# Prepare mortality data
prepare_data <- function(df) {
  df %>%
    mutate(Year = as.numeric(substr(TIME_PERIOD, 1, 4))) %>%
    group_by(Geographic.area) %>%
    filter(Year == max(Year, na.rm = TRUE)) %>%
    ungroup()
}
# Define EAC countries (adjust based on your shapefile's NAME values)
eac_countries <- c("Kenya", "Uganda", "Tanzania", "Rwanda", "Burundi", 
                   "South Sudan", "Congo DRC", "Somalia")
# Read shapefile
eac_shape <- st_read("EAC_COUNTRIES.shp")
## Reading layer `EAC_COUNTRIES' from data source 
##   `D:\Data Analysis Projects\CEMA DATA SCIENCE\EAC_COUNTRIES.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1359718 ymin: -1512113 xmax: 5723082 ymax: 1357225
## Projected CRS: WGS 84 / Pseudo-Mercator
eac_shape$Country <- as.character(eac_shape$NAME)
eac_shape$Country
## [1] "Burundi"     "Congo DRC"   "Kenya"       "Rwanda"      "Tanzania"   
## [6] "South Sudan" "Uganda"      "Somalia"
# Read shapefile and standardize names
eac_shape <- st_read("EAC_COUNTRIES.shp") %>%
  mutate(Country = case_when(
    NAME == "Democratic Republic of the Congo" ~ "Congo DRC",
    NAME == "United Republic of Tanzania" ~ "Tanzania",
    TRUE ~ NAME
  ))
## Reading layer `EAC_COUNTRIES' from data source 
##   `D:\Data Analysis Projects\CEMA DATA SCIENCE\EAC_COUNTRIES.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1359718 ymin: -1512113 xmax: 5723082 ymax: 1357225
## Projected CRS: WGS 84 / Pseudo-Mercator
#Reading csv data
#neonatal data
neonatal <- read.csv("Neonatal Mortality Rate.csv") %>%
  # Standardize country names
  mutate(
    Geographic.area = case_when(
      Geographic.area %in% c("Democratic Republic of the Congo", "Congo, Dem. Rep.") ~ "Congo DRC",
      Geographic.area %in% c("United Republic of Tanzania", "Tanzania, United Rep.") ~ "Tanzania",
      TRUE ~ Geographic.area
    )
  ) %>%
  # Filter to EAC countries
  filter(Geographic.area %in% eac_countries)


#under 5 data
under5 <- read.csv("Under Five Mortality Rate.csv") %>%
  # Standardize names in one pipeline
  mutate(
    Geographic.area = case_when(
      Geographic.area %in% c("Democratic Republic of the Congo", "Congo, Dem. Rep.") ~ "Congo DRC",
      Geographic.area %in% c("United Republic of Tanzania", "Tanzania, United Rep.") ~ "Tanzania",
      TRUE ~ Geographic.area
    )
  ) %>%
  # Filter to EAC countries
  filter(Geographic.area %in% eac_countries)

Renaming the Mortality Indicators Data

#renaming the data
neonatal_latest <- prepare_data(neonatal) %>%
  rename(NeonatalRate = OBS_VALUE,
         Country = Geographic.area)  # Rename column

under5_latest <- prepare_data(under5) %>%
  rename(Under5Rate = OBS_VALUE,
         Country = Geographic.area)  # Rename column
# Check names in mortality data
cat("Neonatal data countries:", unique(neonatal_latest$Country), "\n")
## Neonatal data countries: Kenya Uganda Tanzania South Sudan Somalia Burundi Rwanda Congo DRC
cat("Under5 data countries:", unique(under5_latest$Country), "\n")
## Under5 data countries: Kenya Uganda Tanzania South Sudan Somalia Burundi Rwanda Congo DRC
# Check names in shapefile
cat("Shapefile countries:", unique(eac_shape$Country), "\n")
## Shapefile countries: Burundi Congo DRC Kenya Rwanda Tanzania South Sudan Uganda Somalia
# Merge shapefile with data
merge_shape <- function(shape, data) {
  merged <- shape %>% 
    left_join(data, by = "Country")  # Maintain geometry order
  return(merged)
}

Adding Labels in the Countries’ Shapefiles

# Add centroids and labels to the shapefiles
add_centroids <- function(df, rate_col) {
  centroids <- st_centroid(df) %>%  # Calculate from merged geometry
    st_coordinates() %>%
    as.data.frame() %>%
    rename(lon = X, lat = Y)
  
  df <- cbind(df, centroids)
  
  df$Label <- ifelse(
    is.na(df[[rate_col]]),
    paste0(df$Country, "\n(Data Missing)"),
    as.character(df$Country)
  )
  
  return(df)
}

# Apply corrected workflow
neonatal_map <- merge_shape(eac_shape, neonatal_latest) %>% 
  add_centroids(rate_col = "NeonatalRate")
## Warning: st_centroid assumes attributes are constant over geometries
under5_map <- merge_shape(eac_shape, under5_latest) %>% 
  add_centroids(rate_col = "Under5Rate")
## Warning: st_centroid assumes attributes are constant over geometries

Visualizations of Latest Estimates for Each Indicator

In order to show latest estimates for each indicator across geographical regions(EAC countries), a choropleth map is required.It shows various of ranges of colors with the colors representing the data(estimates). In the legend for under-5 mortality rate below color ranges from blue-yellow representing estimates with Uganda,Tanzania being on the lower scales and South Sudan and Somalia being high on the scale according to the visualization.

Almost similar results are also displayed for neonatal mortality rate visualization.

# Generate choropleth maps
plot_choropleth <- function(df, rate_var, title) {
  ggplot(df) +
    geom_sf(aes(fill = .data[[rate_var]]), color = "white") +
    geom_text(aes(x = lon, y = lat, label = Label), size = 3.5, color = "black") +
    scale_fill_viridis_c(
      name = "Deaths per 1,000",
      option = "C",
      na.value = "grey90",
      limits = range(c(df[[rate_var]], na.rm = TRUE))
    ) +
    labs(title = title) +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
      legend.position = "right"
    )
}

plot_choropleth(under5_map, "Under5Rate", "Under-Five Mortality Rate in EAC (2023)")

plot_choropleth(neonatal_map, "NeonatalRate", "Neonatal Mortality Rate in EAC (2023)")

Preparing Data for Trend Analysis

# Prepare trend data
prepare_trends <- function(df) {
  df %>%
    mutate(Year = as.numeric(substr(TIME_PERIOD, 1, 4))) %>%
    filter(Geographic.area %in% eac_shape$Country) %>%
    group_by(Year) %>%
    mutate(AvgRate = mean(OBS_VALUE, na.rm = TRUE)) %>%
    ungroup()
}

neonatal_trend <- prepare_trends(neonatal)
under5_trend <- prepare_trends(under5)

Highest Under 5 Mortality Rate Country

To find the country with the highest under 5 mortality rate in East Africa, the latest data was arranged in descending order. The estimates were arranged from the country with the highest under 5 mortality rate to that with the lowest mortality rate.We can observe that the under 5 mortality ranges from 38.81 to 104.02 deaths per 1000 live births. From the results, Somalia has the highest mortality rate of 104.02 deaths/1000 live births.

# Identify country with highest under_5 mortality rate
cat("Country with Highest Under-5 Mortality:\n")
## Country with Highest Under-5 Mortality:
under5_latest %>% 
  arrange(desc(Under5Rate)) %>% 
  select(Country, Under5Rate, Year)
## # A tibble: 8 × 3
##   Country     Under5Rate  Year
##   <chr>            <dbl> <dbl>
## 1 Somalia          104.   2023
## 2 South Sudan       98.7  2023
## 3 Congo DRC         73.2  2023
## 4 Burundi           49.2  2023
## 5 Rwanda            40.0  2023
## 6 Kenya             39.9  2023
## 7 Tanzania          38.9  2023
## 8 Uganda            38.8  2023

Highest Neonatal Mortality Rate Country

Similarly for the neonatal mortality rate, the results ranges from 17.85 to 40.24 deaths per 1000 live births with South Sudan exhibiting highest neonatal mortality rate of 40.24 deaths/1000 live births in East Africa.

cat("\nCountry with Highest Neonatal Mortality:\n")
## 
## Country with Highest Neonatal Mortality:
neonatal_latest %>% 
  arrange(desc(NeonatalRate)) %>% 
  select(Country, NeonatalRate, Year)
## # A tibble: 8 × 3
##   Country     NeonatalRate  Year
##   <chr>              <dbl> <dbl>
## 1 South Sudan         40.2  2023
## 2 Somalia             34.9  2023
## 3 Congo DRC           25.3  2023
## 4 Kenya               21.5  2023
## 5 Tanzania            20.6  2023
## 6 Burundi             19.6  2023
## 7 Rwanda              18.1  2023
## 8 Uganda              17.9  2023