1. Data Loading

# Load HIV data
hiv_data <- read_csv("HIV data 2000-2023.csv")
## Rows: 1552 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): IndicatorCode, Indicator, ValueType, ParentLocationCode, ParentLoc...
## dbl  (1): Period
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load poverty data
poverty_data <- read_excel("multidimensional_poverty.xlsx", skip = 1)
## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

2. Data Cleaning & Preparation

library(dplyr)
library(stringr)
library(readr)

# Cleaning HIV data
hiv_data_clean <- hiv_data %>%
  rename(Country = Location, Year = Period) %>%
  mutate(
    HIV_Estimate = parse_number(str_remove_all(str_extract(Value, "^\\d[\\d\\s]+"), "\\s")),
    Year = as.numeric(Year)
  ) %>%
  select(Country, Year, ParentLocationCode, HIV_Estimate)

# Region codes to readable labels
region_labels <- c(
  "AFR" = "Africa",
  "AMR" = "Americas",
  "EMR" = "Eastern Mediterranean",
  "EUR" = "Europe",
  "SEAR" = "South-East Asia",
  "WPR" = "Western Pacific",
  "GLOBAL" = "Global",
  "WORLD" = "World"
)

hiv_data_clean <- hiv_data_clean %>%
  mutate(Region = recode(ParentLocationCode, !!!region_labels))

# Cleaning poverty data
poverty_clean <- poverty_data %>%
  rename(
    Country = Economy,
    Reporting_Year = `Reporting year`,
    Monetary_Deprivation = `Multidimensional poverty headcount ratio (%)`
  ) %>%
  select(Country, Reporting_Year, Monetary_Deprivation) %>%
  mutate(Reporting_Year = as.numeric(Reporting_Year))

# Merging the datasets
hiv_poverty <- hiv_data_clean %>% 
  inner_join(poverty_clean, by = c("Country", "Year" = "Reporting_Year"))

5. Mixed Effects Modeling

# Preparing modeling data
model_data <- hiv_poverty %>%
  select(Country, Year, HIV_Estimate, Monetary_Deprivation) %>%
  drop_na() %>%
  group_by(Country) %>%
  filter(n() > 1) %>%
  ungroup()

# Fitting the model
if(nrow(model_data) > 0) {
  model <- lmer(HIV_Estimate ~ Monetary_Deprivation + Year + (1 | Country), data = model_data)
  summary(model)
} else {
  cat("❌ No complete cases available for modeling.\n")
}
## ❌ No complete cases available for modeling.

6. Summary of Findings

#Top HIV burden countries globally (2000–2023) include South Africa, India, Nigeria, Mozambique, Kenya, Brazil, and the United States, accounting for ~75% of global HIV estimates. #Regionally, Africa is the most impacted, with countries like South Africa, Nigeria, Uganda, and Tanzania leading. Other regions show concentrated burden in India (South-East Asia), Brazil and USA (Americas), and Russia (Europe). #Poverty dimensions such as education, electricity, sanitation, and income deprivation are higher in countries with high HIV burden. #The mixed-effects model shows a clear link between multidimensional poverty and HIV estimates, highlighting the social determinants of HIV.