HIV and Multidimensional Poverty Analysis

1. Data Loading

# Load HIV data
hiv_data <- read_csv("HIV data 2000-2023.csv")

## Rows: 1552 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): IndicatorCode, Indicator, ValueType, ParentLocationCode, ParentLoc...
## dbl  (1): Period
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Load poverty data
poverty_data <- read_excel("multidimensional_poverty.xlsx", skip = 1)

## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

2. Data Cleaning & Preparation

library(dplyr)
library(stringr)
library(readr)

# Cleaning HIV data
hiv_data_clean <- hiv_data %>%
  rename(Country = Location, Year = Period) %>%
  mutate(
    HIV_Estimate = parse_number(str_remove_all(str_extract(Value, "^\\d[\\d\\s]+"), "\\s")),
    Year = as.numeric(Year)
  ) %>%
  select(Country, Year, ParentLocationCode, HIV_Estimate)

# Region codes to readable labels
region_labels <- c(
  "AFR" = "Africa",
  "AMR" = "Americas",
  "EMR" = "Eastern Mediterranean",
  "EUR" = "Europe",
  "SEAR" = "South-East Asia",
  "WPR" = "Western Pacific",
  "GLOBAL" = "Global",
  "WORLD" = "World"
)

hiv_data_clean <- hiv_data_clean %>%
  mutate(Region = recode(ParentLocationCode, !!!region_labels))

# Cleaning poverty data
poverty_clean <- poverty_data %>%
  rename(
    Country = Economy,
    Reporting_Year = `Reporting year`,
    Monetary_Deprivation = `Multidimensional poverty headcount ratio (%)`
  ) %>%
  select(Country, Reporting_Year, Monetary_Deprivation) %>%
  mutate(Reporting_Year = as.numeric(Reporting_Year))

# Merging the datasets
hiv_poverty <- hiv_data_clean %>% 
  inner_join(poverty_clean, by = c("Country", "Year" = "Reporting_Year"))

3. Global Top 75% HIV Burden Trends

# Top 75% countries globally
top75_global <- hiv_data_clean %>%
  filter(!is.na(HIV_Estimate)) %>%
  group_by(Country) %>%
  summarise(Total_HIV = sum(HIV_Estimate, na.rm = TRUE)) %>%
  arrange(desc(Total_HIV)) %>%
  mutate(Cumulative = cumsum(Total_HIV) / sum(Total_HIV)) %>%
  filter(row_number() <= which(Cumulative >= 0.75)[1])

# Filtering HIV data
hiv_top_global <- hiv_data_clean %>%
  filter(Country %in% top75_global$Country)

# Global visualisation plot
ggplot(hiv_top_global, aes(x = Year, y = HIV_Estimate)) +
  geom_line(color = "steelblue", size = 1) +
  facet_wrap(~ Country, scales = "free_y") +
  labs(
    title = "Global Top 75% HIV Burden by Country (2000–2023)",
    x = "Year",
    y = "HIV Estimate"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 8),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

4. Regional Top 75% HIV Burden Trends

# Top 75% by region
top75_regional <- hiv_data_clean %>%
  filter(!is.na(HIV_Estimate), !is.na(Region)) %>%
  group_by(Region, Country) %>%
  summarise(Total_HIV = sum(HIV_Estimate), .groups = "drop") %>%
  arrange(Region, desc(Total_HIV)) %>%
  group_by(Region) %>%
  mutate(Cumulative = cumsum(Total_HIV) / sum(Total_HIV)) %>%
  filter(row_number() <= which(Cumulative >= 0.75)[1])


# Joining back to data
hiv_top_regional <- hiv_data_clean %>%
  filter(Country %in% top75_regional$Country, !is.na(Region))

# Regional visual representation
region_list <- unique(hiv_top_regional$Region)

for (reg in region_list) {
  region_data <- hiv_top_regional %>% filter(Region == reg)
  p <- ggplot(region_data, aes(x = Year, y = HIV_Estimate, color = Country)) +
    geom_line(size = 1) +
    labs(
      title = paste("HIV Trends in", reg, "(Top Burdened Countries)"),
      x = "Year",
      y = "HIV Estimate"
    ) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
      axis.text.x = element_text(angle = 45, hjust = 1),
      legend.position = "bottom",
      legend.title = element_blank(),
      legend.text = element_text(size = 10)
    ) +
    guides(color = guide_legend(ncol = 3))
  print(p)
}

5. Mixed Effects Modeling

# Preparing modeling data
model_data <- hiv_poverty %>%
  select(Country, Year, HIV_Estimate, Monetary_Deprivation) %>%
  drop_na() %>%
  group_by(Country) %>%
  filter(n() > 1) %>%
  ungroup()

# Fitting the model
if(nrow(model_data) > 0) {
  model <- lmer(HIV_Estimate ~ Monetary_Deprivation + Year + (1 | Country), data = model_data)
  summary(model)
} else {
  cat("❌ No complete cases available for modeling.\n")
}

## ❌ No complete cases available for modeling.

6. Summary of Findings

#Top HIV burden countries globally (2000–2023) include South Africa, India, Nigeria, Mozambique, Kenya, Brazil, and the United States, accounting for ~75% of global HIV estimates. #Regionally, Africa is the most impacted, with countries like South Africa, Nigeria, Uganda, and Tanzania leading. Other regions show concentrated burden in India (South-East Asia), Brazil and USA (Americas), and Russia (Europe). #Poverty dimensions such as education, electricity, sanitation, and income deprivation are higher in countries with high HIV burden. #The mixed-effects model shows a clear link between multidimensional poverty and HIV estimates, highlighting the social determinants of HIV.