# Load HIV data
hiv_data <- read_csv("HIV data 2000-2023.csv")
## Rows: 1552 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): IndicatorCode, Indicator, ValueType, ParentLocationCode, ParentLoc...
## dbl (1): Period
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load poverty data
poverty_data <- read_excel("multidimensional_poverty.xlsx", skip = 1)
## New names:
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
library(dplyr)
library(stringr)
library(readr)
# Cleaning HIV data
hiv_data_clean <- hiv_data %>%
rename(Country = Location, Year = Period) %>%
mutate(
HIV_Estimate = parse_number(str_remove_all(str_extract(Value, "^\\d[\\d\\s]+"), "\\s")),
Year = as.numeric(Year)
) %>%
select(Country, Year, ParentLocationCode, HIV_Estimate)
# Region codes to readable labels
region_labels <- c(
"AFR" = "Africa",
"AMR" = "Americas",
"EMR" = "Eastern Mediterranean",
"EUR" = "Europe",
"SEAR" = "South-East Asia",
"WPR" = "Western Pacific",
"GLOBAL" = "Global",
"WORLD" = "World"
)
hiv_data_clean <- hiv_data_clean %>%
mutate(Region = recode(ParentLocationCode, !!!region_labels))
# Cleaning poverty data
poverty_clean <- poverty_data %>%
rename(
Country = Economy,
Reporting_Year = `Reporting year`,
Monetary_Deprivation = `Multidimensional poverty headcount ratio (%)`
) %>%
select(Country, Reporting_Year, Monetary_Deprivation) %>%
mutate(Reporting_Year = as.numeric(Reporting_Year))
# Merging the datasets
hiv_poverty <- hiv_data_clean %>%
inner_join(poverty_clean, by = c("Country", "Year" = "Reporting_Year"))
# Top 75% countries globally
top75_global <- hiv_data_clean %>%
filter(!is.na(HIV_Estimate)) %>%
group_by(Country) %>%
summarise(Total_HIV = sum(HIV_Estimate, na.rm = TRUE)) %>%
arrange(desc(Total_HIV)) %>%
mutate(Cumulative = cumsum(Total_HIV) / sum(Total_HIV)) %>%
filter(row_number() <= which(Cumulative >= 0.75)[1])
# Filtering HIV data
hiv_top_global <- hiv_data_clean %>%
filter(Country %in% top75_global$Country)
# Global visualisation plot
ggplot(hiv_top_global, aes(x = Year, y = HIV_Estimate)) +
geom_line(color = "steelblue", size = 1) +
facet_wrap(~ Country, scales = "free_y") +
labs(
title = "Global Top 75% HIV Burden by Country (2000–2023)",
x = "Year",
y = "HIV Estimate"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 8),
axis.text.x = element_text(angle = 45, hjust = 1)
)
# Top 75% by region
top75_regional <- hiv_data_clean %>%
filter(!is.na(HIV_Estimate), !is.na(Region)) %>%
group_by(Region, Country) %>%
summarise(Total_HIV = sum(HIV_Estimate), .groups = "drop") %>%
arrange(Region, desc(Total_HIV)) %>%
group_by(Region) %>%
mutate(Cumulative = cumsum(Total_HIV) / sum(Total_HIV)) %>%
filter(row_number() <= which(Cumulative >= 0.75)[1])
# Joining back to data
hiv_top_regional <- hiv_data_clean %>%
filter(Country %in% top75_regional$Country, !is.na(Region))
# Regional visual representation
region_list <- unique(hiv_top_regional$Region)
for (reg in region_list) {
region_data <- hiv_top_regional %>% filter(Region == reg)
p <- ggplot(region_data, aes(x = Year, y = HIV_Estimate, color = Country)) +
geom_line(size = 1) +
labs(
title = paste("HIV Trends in", reg, "(Top Burdened Countries)"),
x = "Year",
y = "HIV Estimate"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 10)
) +
guides(color = guide_legend(ncol = 3))
print(p)
}
# Preparing modeling data
model_data <- hiv_poverty %>%
select(Country, Year, HIV_Estimate, Monetary_Deprivation) %>%
drop_na() %>%
group_by(Country) %>%
filter(n() > 1) %>%
ungroup()
# Fitting the model
if(nrow(model_data) > 0) {
model <- lmer(HIV_Estimate ~ Monetary_Deprivation + Year + (1 | Country), data = model_data)
summary(model)
} else {
cat("❌ No complete cases available for modeling.\n")
}
## ❌ No complete cases available for modeling.
#Top HIV burden countries globally (2000–2023) include South Africa, India, Nigeria, Mozambique, Kenya, Brazil, and the United States, accounting for ~75% of global HIV estimates. #Regionally, Africa is the most impacted, with countries like South Africa, Nigeria, Uganda, and Tanzania leading. Other regions show concentrated burden in India (South-East Asia), Brazil and USA (Americas), and Russia (Europe). #Poverty dimensions such as education, electricity, sanitation, and income deprivation are higher in countries with high HIV burden. #The mixed-effects model shows a clear link between multidimensional poverty and HIV estimates, highlighting the social determinants of HIV.