# Load the libraries
library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(rnaturalearth)
library(rnaturalearthdata)
library(sf)
library(ggspatial)
Week 3 Challenge
- Load the dataset and clean any messy data.
# Load data from CSV
data = read_csv("wdr.csv")
# Extract rows and columns (Rows 4-170 and Columns 6-7)
clean = data[4:169, 6:7]
# Assign column names
colnames(clean) = c("Country", "RSF_Score2020")
# Convert to numeric
clean$RSF_Score2020 = as.numeric(as.character(clean$RSF_Score2020))
# Display cleaned data
head(clean)
## # A tibble: 6 × 2
## Country RSF_Score2020
## <chr> <dbl>
## 1 Afghanistan 38
## 2 Albania 30
## 3 Algeria 46
## 4 Angola 34
## 5 Argentina 29
## 6 Armenia 29
- Summarize the RSF score and plot the distribution.
# Summarize the RSF
summary = clean %>%
group_by(RSF_Score2020) %>%
summarize(Number_of_Countries = n())
# Plot the RSF
ggplot(summary, aes(x = RSF_Score2020, y = Number_of_Countries)) +
geom_bar(stat = "identity", fill = "lightblue") +
labs(title = "Distribution of World Press Freedom Scores", x = "World Press Freedom Score", y = "Number of Countries") +
theme_minimal()

- Using world map libraries, plot cleaned data values on a global
map.
# Load world map
world = ne_countries(scale = "medium", returnclass = "sf")
# Merge RSF score data with the world map data
map = merge(world, clean, by.x = "name", by.y = "Country", all.x = TRUE)
# Plotting the RSF score distribution on a world map
ggplot(data = map) +
geom_sf(aes(fill = RSF_Score2020)) +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(title = "World Press Freedom Scores by Country", fill = "World Press Freedom Score") +
theme_minimal() +
theme(legend.position = "top")
