library(ggplot2)
library(tidyverse)
library(ggridges)
library(dplyr)
library(readr)
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(viridis)
library(knitr)
WHO_dep <- read.csv("/Users/melissapacheco/Desktop/INFM316 Data Analytics/Week 4/Assignment2/WHO_depression_table.csv")
WHO_dep <- WHO_dep %>%
select(Location, ParentLocation, Value)
WHO_dep$ParentLocation <- as.factor(WHO_dep$ParentLocation)
WHO_dep$Value <- as.numeric(WHO_dep$Value)
| Region | Mean | Median | Min | Max | Std Dev |
|---|---|---|---|---|---|
| Western Pacific | 3.78 | 3.6 | 2.9 | 5.9 | 0.78 |
| South-East Asia | 3.86 | 3.9 | 3.0 | 4.5 | 0.49 |
| Africa | 4.10 | 4.1 | 3.4 | 4.9 | 0.36 |
| Eastern Mediterranean | 4.36 | 4.5 | 3.3 | 5.1 | 0.59 |
| Americas | 4.76 | 4.7 | 3.7 | 5.9 | 0.49 |
| Europe | 4.98 | 5.0 | 3.8 | 6.3 | 0.48 |
p1 <- ggplot(WHO_dep, aes(x = Value, y = ParentLocation, fill = ParentLocation)) +
geom_density_ridges(alpha = 0.7, color = "white") +
labs(
title = "Distribution of Depression Prevalence by Continental Location",
x = "Prevalence",
y = "Parent Location"
) +
theme_ridges() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 11)
)
p1
map_df <- WHO_dep %>%
rename(
country = Location,
depression_prevalence = Value
)
map_df$country <- recode(map_df$country,
"United States of America" = "United States",
"United Kingdom of Great Britain and Northern Ireland" = "United Kingdom",
"Russian Federation" = "Russia",
"Viet Nam" = "Vietnam",
"Republic of Korea" = "South Korea",
"Democratic People's Republic of Korea" = "North Korea",
"United Republic of Tanzania" = "Tanzania",
"Iran (Islamic Republic of)" = "Iran",
"Syrian Arab Republic" = "Syria",
"Republic of Moldova" = "Moldova",
"Venezuela (Bolivarian Republic of)" = "Venezuela",
"Bolivia (Plurinational State of)" = "Bolivia",
"Brunei Darussalam" = "Brunei",
"Lao People's Democratic Republic" = "Laos",
"Micronesia (Federated States of)" = "Micronesia",
"Cabo Verde" = "Cape Verde",
"Congo" = "Republic of the Congo",
"Democratic Republic of the Congo" = "Dem. Rep. Congo",
"Gambia" = "The Gambia",
"Bahamas" = "The Bahamas"
)
world <- ne_countries(scale = "medium", returnclass = "sf")
map_data <- world %>%
left_join(map_df, by = c("name" = "country"))
p2 <- ggplot(map_data) +
geom_sf(aes(fill = depression_prevalence), color = "white", linewidth = 0.1) +
scale_fill_viridis_c(
option = "plasma",
na.value = "grey90",
name = "Depression Prevalence"
) +
labs(
title = "Global Depression Prevalence by Country",
subtitle = "Color-coded choropleth map",
caption = "Source: WHO depression dataset"
) +
theme_minimal()
p2
The summary table shows the mean, median, minimum, maximum, and standard deviation of depression prevalence for each continental region. This makes it easier to compare regional patterns and see how much variation exists within each area.
The ridge plot shows that depression prevalence is distributed differently across world regions. Some regions have a wider spread of values, which suggests more variation in depression prevalence across countries within those areas.
The choropleth map shows that depression prevalence varies across countries around the world. Countries with darker shading have higher reported prevalence, which makes the global pattern easier to see.