data_raw <- read_csv("API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_23178.csv", skip = 4)
country_meta <- read_csv("Metadata_Country_API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_23178.csv")

value_columns <- as.character(2000:2023)
df_long <- data_raw %>%
  pivot_longer(cols = all_of(value_columns), names_to = "year", values_to = "value") %>%
  mutate(year = as.integer(year), value = as.numeric(value)) %>%
  rename(country = `Country Name`, code = `Country Code`, indicator = `Indicator Name`) %>%
  drop_na(value)

country_meta_clean <- country_meta %>% select(`Country Code`, Region, IncomeGroup)

wdi <- df_long %>%
  left_join(country_meta_clean, by = c("code" = "Country Code"))
top2022 <- wdi %>% filter(year == 2022) %>% arrange(desc(value)) %>% slice_head(n = 10)

ggplot(top2022, aes(x = reorder(country, value), y = value)) +
  geom_col(fill = "#2a9d8f") +
  coord_flip() +
  labs(title = "Top 10 Countries by Renewable Energy Share (2022)",
       x = NULL, y = "Renewables (% of total)") +
  theme_minimal()

global_avg <- wdi %>% group_by(year) %>% summarize(global_avg = mean(value, na.rm = TRUE))

ggplot(global_avg, aes(x = year, y = global_avg)) +
  geom_line(size = 1.2, color = "#e76f51") +
  labs(title = "Global Average Renewable Share (2000–2022)",
       x = "Year", y = "Average %") +
  theme_minimal()

wdi_2022 <- wdi %>% filter(year == 2022)

ggplot(wdi_2022, aes(x = Region, y = value, fill = Region)) +
  geom_boxplot() +
  labs(title = "Renewable Energy Share by Region (2022)",
       x = NULL, y = "% of Total Energy") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

wdi_2020 <- wdi %>% filter(year == 2020)

ggplot(wdi_2020, aes(x = IncomeGroup, y = value, color = IncomeGroup)) +
  geom_jitter(width = 0.3, alpha = 0.6, size = 2) +
  labs(title = "Renewable Share by Income Group (2020)",
       x = "Income Group", y = "% of Renewables") +
  theme_minimal()

# Simulated for demonstration (real version needs more indicators)
set.seed(1)
sim_data <- wdi_2020 %>%
  filter(!is.na(Region)) %>%
  mutate(gdp = runif(n(), 1000, 60000),
         co2 = runif(n(), 0.5, 10),
         life_exp = runif(n(), 55, 85)) %>%
  select(value, gdp, co2, life_exp)

corr <- round(cor(sim_data, use = "complete.obs"), 2)
corr_df <- as.data.frame(as.table(corr))

ggplot(corr_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "#d7191c", high = "#1a9641", mid = "white", midpoint = 0) +
  labs(title = "Simulated Correlation Matrix", x = NULL, y = NULL) +
  theme_minimal()

dumbbell_data <- wdi %>%
  filter(year %in% c(2000, 2022)) %>%
  pivot_wider(names_from = year, values_from = value, names_prefix = "yr_") %>%
  drop_na() %>%
  arrange(desc(yr_2022)) %>%
  slice_head(n = 10)

ggplot(dumbbell_data, aes(y = reorder(country, yr_2022))) +
  geom_dumbbell(aes(x = yr_2000, xend = yr_2022),
                size = 3, color = "#90a4ae",
                colour_x = "#0077b6", colour_xend = "#00b4d8") +
  labs(title = "Renewable Share: 2000 vs 2022", x = "% Renewable", y = NULL) +
  theme_minimal()

library(tidytext)

anim_data <- wdi %>%
  filter(year >= 2005) %>%
  group_by(year) %>%
  slice_max(order_by = value, n = 10) %>%
  ungroup()

p_anim <- ggplot(anim_data, aes(x = reorder_within(country, value, year), y = value, fill = country)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_x_reordered() +
  labs(title = "Top 10 Renewable Energy Share by Country: {closest_state}",
       x = NULL, y = "% of Total Energy") +
  theme_minimal() +
  transition_states(year, transition_length = 3, state_length = 1) +
  ease_aes('linear')

animate(p_anim, nframes = 150, fps = 15, renderer = gifski_renderer())

selected <- c("United States", "Germany", "China", "Brazil", "India")

plot_df <- wdi %>% filter(country %in% selected)

p <- ggplot(plot_df, aes(x = year, y = value, color = country)) +
  geom_line(size = 1.2) +
  labs(title = "Renewable Energy Trend: Selected Countries",
       x = "Year", y = "% of Total Energy") +
  theme_minimal()

ggplotly(p)