# Load the libraries
library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(ggspatial)

Week 3 Challenge

  1. Load the dataset and clean any messy data.
# Load data from CSV
data = read_csv("wdr.csv")

# Extract rows and columns (Rows 4-170 and Columns 6-7)
clean = data[4:169, 6:7]

# Assign column names
colnames(clean) = c("Country", "RSF_Score2020")

# Convert to numeric
clean$RSF_Score2020 = as.numeric(as.character(clean$RSF_Score2020))

# Display cleaned data
head(clean)
## # A tibble: 6 × 2
##   Country     RSF_Score2020
##   <chr>               <dbl>
## 1 Afghanistan            38
## 2 Albania                30
## 3 Algeria                46
## 4 Angola                 34
## 5 Argentina              29
## 6 Armenia                29
  1. Summarize the RSF score and plot the distribution.
# Summarize the RSF
summary = clean %>%
  group_by(RSF_Score2020) %>%
  summarize(Number_of_Countries = n())

# Plot the RSF
ggplot(summary, aes(x = RSF_Score2020, y = Number_of_Countries)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(title = "Distribution of World Press Freedom Scores", x = "World Press Freedom Score", y = "Number of Countries") +
  theme_minimal()

  1. Using world map libraries, plot cleaned data values on a global map.
# Load world map
world = ne_countries(scale = "medium", returnclass = "sf")

# Merge RSF score data with the world map data
map = merge(world, clean, by.x = "name", by.y = "Country", all.x = TRUE)

# Plotting the RSF score distribution on a world map
ggplot(data = map) +
  geom_sf(aes(fill = RSF_Score2020)) +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(title = "World Press Freedom Scores by Country", fill = "World Press Freedom Score") +
  theme_minimal() +
  theme(legend.position = "top")