Week4_Assignment2

Load Packages

library(ggplot2)
library(tidyverse)
library(ggridges)
library(dplyr)
library(readr)
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(viridis)
library(knitr)

Read and Clean the Data

WHO_dep <- read.csv("/Users/melissapacheco/Desktop/INFM316 Data Analytics/Week 4/Assignment2/WHO_depression_table.csv")

WHO_dep <- WHO_dep %>%
  select(Location, ParentLocation, Value)

WHO_dep$ParentLocation <- as.factor(WHO_dep$ParentLocation)
WHO_dep$Value <- as.numeric(WHO_dep$Value)

Summary Table

Depression Prevalence Distribution by Continental Region
Region	Mean	Median	Min	Max	Std Dev
Western Pacific	3.78	3.6	2.9	5.9	0.78
South-East Asia	3.86	3.9	3.0	4.5	0.49
Africa	4.10	4.1	3.4	4.9	0.36
Eastern Mediterranean	4.36	4.5	3.3	5.1	0.59
Americas	4.76	4.7	3.7	5.9	0.49
Europe	4.98	5.0	3.8	6.3	0.48

Plot 1: Ridge Plot

p1 <- ggplot(WHO_dep, aes(x = Value, y = ParentLocation, fill = ParentLocation)) +
  geom_density_ridges(alpha = 0.7, color = "white") +
  labs(
    title = "Distribution of Depression Prevalence by Continental Location",
    x = "Prevalence",
    y = "Parent Location"
  ) +
  theme_ridges() +
  theme(
    legend.position = "none",
    plot.title = element_text(hjust = 0.5, face = "bold", size = 11)
  )

p1

Plot 2: Choropleth Map

map_df <- WHO_dep %>%
  rename(
    country = Location,
    depression_prevalence = Value
  )

map_df$country <- recode(map_df$country,
  "United States of America" = "United States",
  "United Kingdom of Great Britain and Northern Ireland" = "United Kingdom",
  "Russian Federation" = "Russia",
  "Viet Nam" = "Vietnam",
  "Republic of Korea" = "South Korea",
  "Democratic People's Republic of Korea" = "North Korea",
  "United Republic of Tanzania" = "Tanzania",
  "Iran (Islamic Republic of)" = "Iran",
  "Syrian Arab Republic" = "Syria",
  "Republic of Moldova" = "Moldova",
  "Venezuela (Bolivarian Republic of)" = "Venezuela",
  "Bolivia (Plurinational State of)" = "Bolivia",
  "Brunei Darussalam" = "Brunei",
  "Lao People's Democratic Republic" = "Laos",
  "Micronesia (Federated States of)" = "Micronesia",
  "Cabo Verde" = "Cape Verde",
  "Congo" = "Republic of the Congo",
  "Democratic Republic of the Congo" = "Dem. Rep. Congo",
  "Gambia" = "The Gambia",
  "Bahamas" = "The Bahamas"
)

world <- ne_countries(scale = "medium", returnclass = "sf")

map_data <- world %>%
  left_join(map_df, by = c("name" = "country"))

p2 <- ggplot(map_data) +
  geom_sf(aes(fill = depression_prevalence), color = "white", linewidth = 0.1) +
  scale_fill_viridis_c(
    option = "plasma",
    na.value = "grey90",
    name = "Depression Prevalence"
  ) +
  labs(
    title = "Global Depression Prevalence by Country",
    subtitle = "Color-coded choropleth map",
    caption = "Source: WHO depression dataset"
  ) +
  theme_minimal()

p2

Interpretation

The summary table shows the mean, median, minimum, maximum, and standard deviation of depression prevalence for each continental region. This makes it easier to compare regional patterns and see how much variation exists within each area.

The ridge plot shows that depression prevalence is distributed differently across world regions. Some regions have a wider spread of values, which suggests more variation in depression prevalence across countries within those areas.

The choropleth map shows that depression prevalence varies across countries around the world. Countries with darker shading have higher reported prevalence, which makes the global pattern easier to see.