data_raw <- read_csv("API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_23178.csv", skip = 4)
country_meta <- read_csv("Metadata_Country_API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_23178.csv")
value_columns <- as.character(2000:2023)
df_long <- data_raw %>%
pivot_longer(cols = all_of(value_columns), names_to = "year", values_to = "value") %>%
mutate(year = as.integer(year), value = as.numeric(value)) %>%
rename(country = `Country Name`, code = `Country Code`, indicator = `Indicator Name`) %>%
drop_na(value)
country_meta_clean <- country_meta %>% select(`Country Code`, Region, IncomeGroup)
wdi <- df_long %>%
left_join(country_meta_clean, by = c("code" = "Country Code"))
top2022 <- wdi %>% filter(year == 2022) %>% arrange(desc(value)) %>% slice_head(n = 10)
ggplot(top2022, aes(x = reorder(country, value), y = value)) +
geom_col(fill = "#2a9d8f") +
coord_flip() +
labs(title = "Top 10 Countries by Renewable Energy Share (2022)",
x = NULL, y = "Renewables (% of total)") +
theme_minimal()

global_avg <- wdi %>% group_by(year) %>% summarize(global_avg = mean(value, na.rm = TRUE))
ggplot(global_avg, aes(x = year, y = global_avg)) +
geom_line(size = 1.2, color = "#e76f51") +
labs(title = "Global Average Renewable Share (2000–2022)",
x = "Year", y = "Average %") +
theme_minimal()

wdi_2022 <- wdi %>% filter(year == 2022)
ggplot(wdi_2022, aes(x = Region, y = value, fill = Region)) +
geom_boxplot() +
labs(title = "Renewable Energy Share by Region (2022)",
x = NULL, y = "% of Total Energy") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

wdi_2020 <- wdi %>% filter(year == 2020)
ggplot(wdi_2020, aes(x = IncomeGroup, y = value, color = IncomeGroup)) +
geom_jitter(width = 0.3, alpha = 0.6, size = 2) +
labs(title = "Renewable Share by Income Group (2020)",
x = "Income Group", y = "% of Renewables") +
theme_minimal()

# Simulated for demonstration (real version needs more indicators)
set.seed(1)
sim_data <- wdi_2020 %>%
filter(!is.na(Region)) %>%
mutate(gdp = runif(n(), 1000, 60000),
co2 = runif(n(), 0.5, 10),
life_exp = runif(n(), 55, 85)) %>%
select(value, gdp, co2, life_exp)
corr <- round(cor(sim_data, use = "complete.obs"), 2)
corr_df <- as.data.frame(as.table(corr))
ggplot(corr_df, aes(Var1, Var2, fill = Freq)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "#d7191c", high = "#1a9641", mid = "white", midpoint = 0) +
labs(title = "Simulated Correlation Matrix", x = NULL, y = NULL) +
theme_minimal()

dumbbell_data <- wdi %>%
filter(year %in% c(2000, 2022)) %>%
pivot_wider(names_from = year, values_from = value, names_prefix = "yr_") %>%
drop_na() %>%
arrange(desc(yr_2022)) %>%
slice_head(n = 10)
ggplot(dumbbell_data, aes(y = reorder(country, yr_2022))) +
geom_dumbbell(aes(x = yr_2000, xend = yr_2022),
size = 3, color = "#90a4ae",
colour_x = "#0077b6", colour_xend = "#00b4d8") +
labs(title = "Renewable Share: 2000 vs 2022", x = "% Renewable", y = NULL) +
theme_minimal()

library(tidytext)
anim_data <- wdi %>%
filter(year >= 2005) %>%
group_by(year) %>%
slice_max(order_by = value, n = 10) %>%
ungroup()
p_anim <- ggplot(anim_data, aes(x = reorder_within(country, value, year), y = value, fill = country)) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_x_reordered() +
labs(title = "Top 10 Renewable Energy Share by Country: {closest_state}",
x = NULL, y = "% of Total Energy") +
theme_minimal() +
transition_states(year, transition_length = 3, state_length = 1) +
ease_aes('linear')
animate(p_anim, nframes = 150, fps = 15, renderer = gifski_renderer())

selected <- c("United States", "Germany", "China", "Brazil", "India")
plot_df <- wdi %>% filter(country %in% selected)
p <- ggplot(plot_df, aes(x = year, y = value, color = country)) +
geom_line(size = 1.2) +
labs(title = "Renewable Energy Trend: Selected Countries",
x = "Year", y = "% of Total Energy") +
theme_minimal()
ggplotly(p)