Susfin_HW4

Homework problem 1:

library(tidyverse) # because, always

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor) # for clean_names() - makes variable names snake_case


载入程辑包：'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

imf_climate_dashboards_green_debt_url <- "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"

green_debt <- imf_climate_dashboards_green_debt_url |> 
  read_csv()

Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# we want to compare these two indicators
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  # from the janitor package -- makes variables snake_case so they are easier to work with
  clean_names() |> 
  # filter for the vector of indicators we defined above
  filter(indicator %in% indicators_we_want) |> 
  # "f\\d{4}" is a regular expression (regex) that searches for all columns that are f + four digits.
  # Ask ChatGPT to explain this to you.
  select(country, iso3, indicator, matches("f\\d{4}")) 
    
    library(countrycode)

# Assuming your country column is named 'country' and it contains country names or ISO codes.
# This will create a new column in your data frame called 'region' with the corresponding region names.
green_debt_subset$region <- countrycode(green_debt_subset$country, "country.name", "region")
green_bonds_tidy <- green_debt_subset |> 
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )

# Load necessary libraries
library(tidyverse)
library(ggplot2)

# Read the dataset
green_bonds_data <-green_bonds_tidy  %>%
filter(year >= 2012) 
# Calculate cumulative issuance by region
green_bonds_cumulative <- green_bonds_data %>%
  group_by(region, year) %>%
  summarise(annual_issuance = sum(issuance_bn_usd)) %>%
  arrange(region, year) %>%
  group_by(region) %>%
  mutate(cumulative_issuance = cumsum(annual_issuance))

`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.

# Reshape data if necessary using pivot_longer and pivot_wider

# Create the visualization
ggplot(green_bonds_cumulative, aes(x = year, y = cumulative_issuance, color = region)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(min(green_bonds_cumulative$year), max(green_bonds_cumulative$year), by = 1)) +
  scale_y_continuous(labels = scales::dollar_format(suffix = "B", scale = 1)) +
  labs(title = "Cumulative Green Bond Issuance by Region",
       x = "Year",
       y = "Cumulative Issuance (Billion USD)",
       color = "Region") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1))

Homework problem 2:

green_debt_HWP2 <- green_debt |>  
  clean_names() |> 
  filter(type_of_issuer != "Not Applicable") |>
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )
# Load necessary libraries
library(tidyverse)
library(ggplot2)

# Filter the dataset for years after 2012
HWP2_issuer1 <- green_debt_HWP2 %>%
  filter(year > 2012)
# Group and summarize the data by type_of_issuer and year
  issuer_annual_totals <- HWP2_issuer1 %>%
     group_by(type_of_issuer, year) %>%
     summarise(issuance_bn_usd = sum(issuance_bn_usd, na.rm = TRUE),.groups = 'drop')

# Create a data visualization
ggplot(issuer_annual_totals, aes(x = year, y = issuance_bn_usd, color = type_of_issuer)) +
 geom_line() +
 geom_point() +
 labs(title = "Annual Green Bond Issuance by Type of Issuer (Post-2012)",
 x = "Year",
y = "Issuance (Billion USD)",
 color = "Type of Issuer") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1))

# Assuming green_debt_HWP2 has been read into R
# First, filter the dataset for years after 2012
HWP2_issuer2_filtered <- green_debt_HWP2 %>%
  filter(year >= 2012) %>%
  group_by(year) %>%
  mutate(total_annual_issuance = sum(issuance_bn_usd, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(type_of_issuer, year) %>%
  summarise(
    issuance_bn_usd = sum(issuance_bn_usd, na.rm = TRUE),
    total_annual_issuance = first(total_annual_issuance),
    market_share = issuance_bn_usd / first(total_annual_issuance),
    .groups = 'drop' 
  ) %>%
  ungroup() %>%
  group_by(type_of_issuer) %>%
  mutate(year_over_year_growth = (issuance_bn_usd / lag(issuance_bn_usd) - 1) * 100) %>%
  ungroup()


# Separate plot for Market Share by Issuer Type
ggplot(HWP2_issuer2_filtered, aes(x = year, y = market_share, fill = type_of_issuer)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_continuous(breaks = function(x) unique(HWP2_issuer2_filtered$year)) +  # Ensure x-axis has integer years
  labs(title = "Market Share by Issuer Type (Post-2012)",
       x = "Year",
       y = "Market Share",
       fill = "Type of Issuer") +
  theme_minimal() +
  theme(legend.position = "bottom")

Homework problem 3:

Q1：What are green bond proceeds used for?

green_debt_HWP3.1 <- green_debt |>  
  clean_names() |> 
  filter(use_of_proceed != "Not Applicable") |>
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE)

# Load necessary libraries
library(tidyverse)
library(ggplot2)


# Filter out data to focus on the absolute dollar amounts for the use of proceeds
green_debt_HWP3.2 <- green_debt_HWP3.1 %>%
filter(year == 2022 & unit == "Billion US Dollars")

# Aggregate issuance volumes by use_of_proceed
issuance_summary_2022 <- green_debt_HWP3.2 %>%
  group_by(use_of_proceed) %>%
  summarise(total_issuance = sum(issuance_bn_usd, na.rm = TRUE)) %>%
  ungroup()

# Identify the top 10 proceeds based on total issuance
top_issuance_2022 <- issuance_summary_2022 %>%
  top_n(10, total_issuance)

# Plotting the top 10 proceeds
ggplot(top_issuance_2022, aes(x = reorder(use_of_proceed, total_issuance), y = total_issuance, fill = use_of_proceed)) +
  geom_bar(stat = "identity") +
  coord_flip() + # Flip coordinates to make it a horizontal bar chart
  scale_fill_viridis_d() + # Use a discrete viridis color scale
  labs(title = "Top 10 Green Bond Proceeds in 2022 by Cumulative Issuance Volume",
       x = "Total Issuance (Billion USD)", y = "Use of Proceeds") +
  theme_minimal() +
  theme(legend.title = element_blank(), # Hide the legend title
        legend.position = "none") # Hide the legend as the information is already on the y-axis

# Show the plot
ggsave("top_10_proceeds_2022.png", width = 10, height = 8) # Save the plot as a PNG file

Q2：What do we know about the currency of issuance? Is that changing over time?

green_debt_HWP3.3 <- green_debt |>  
  clean_names() |> 
  filter(principal_currency != "Not Applicable") |>
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE)

library(ggplot2)
library(dplyr)
library(readr)

# Load the dataset
green_debt_currency <- green_debt_HWP3.3

# Summarize issuance volume by currency for 2022
currency_summary <- green_debt_currency %>%
   filter(year == 2022, unit == "Billion US Dollars") %>%
  group_by(principal_currency) %>%
  summarise(total_issuance = sum(issuance_bn_usd, na.rm = TRUE)) %>%
  mutate(proportion = total_issuance / sum(total_issuance)) %>%
  arrange(desc(total_issuance)) %>%
  slice_max(order_by = total_issuance, n = 10)

# Plotting the top 10 currencies
ggplot(currency_summary, aes(x = reorder(principal_currency, total_issuance), y = total_issuance)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = scales::percent(proportion)), vjust = -0.5, size = 3.5) +
  labs(title = "Top 10 Currencies for Green Bond Issuance Volume in 2022",
       subtitle = "Volume in Billion USD and Proportion of Total",
       x = "Currency", y = "Issuance Volume (Billion USD)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))