I am starting with DFC transaction data (“DFC_Transaction_Data”), which contains data on all active DFC projects as of 12/31/2021, as well as historical OPIC data (“OPIC_Historical_Data”), which is a dataset created by researchers at the Center for Global Development that includes all publicly available information on OPIC projects as of 2016, with supplementary country-level data.
After reviewing both datasets, I realized that I am missing projects that were committed between 2017-2021 but are no longer active. To fill in this missing data gap, I went through DFC’s annual reports and manually created an Excel spreadsheet containing this data. I have titled it “Missing_DFC_Projects_2017_2021”.
Once I clean each dataset, I need to append them and ensure that there are no duplicate project entries. From there, I can begin analyzing data to answer my first question, which is “To what extent did DFC’s financial flows change when it transformed from OPIC to DFC in 2020?”
To answer my second question, I will need to clean Boston University’s Chinese Loans to Africa Database (“Chinese_Loans_Africa”) and then merge it with a subset of my DFC database. It will need to be a subset because I will have to make sure it is comparing the same time period. From there, I should be able to perform analysis to answer my second question, which is “How does DFC financing in Africa compare to that of China’s Development Bank?”
Key variables include year, region, country, project name, NAICS sector/description, type, loan term, commitment level, exposure, and project description. I may keep additional variables for context but not use them in my analysis.
For the DFC and OPIC datasets, I will be appending them. I will first need to identify which columns in each dataset are relevant. Then, I will need to create a subset of each dataset consisting of only relevant variables, making sure they both have the same number of columns and column names so they can be appended properly. Since I am appending them (not merging them), I am using all the variables to combine them horizontally.
I am not yet sure if I will need to do this.
DFC_Transaction_Data <- read_excel("/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/00_data_raw/DFC Transaction Data.xlsx",
sheet = "DFC Transaction Data", range = "A1:S1135")
OPIC_Historical_Data <- read_excel("/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/00_data_raw/OPIC-scraped-portfolio-public.xlsx",
range = "A2:BH1638")
Missing_DFC_Projects_2017_2021 <- read_excel("/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/00_data_raw/Missing DFC Projects 2017-2021.xlsx",
sheet = "Sheet1", range = "A1:G50")
Chinese_Loans_Africa <- read_excel("/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/00_data_raw/CLA_2022.xlsx")
# Cleaning OPIC Data
OPIC_Historical_Final <- subset(OPIC_Historical_Data, select =c("Year","Region","Country","OPIC Classification","Project Name","NAICS Sector Description","Type", "Loan Term","OPIC Commitment","Exposure / MCL","Project Description (Annual Report)","Project Summary Link","Environmental Risk Category","Labor Category"))
OPIC_Historical_Final$Currency <- "USD"
OPIC_Historical_Final$Dataset <- "OPIC Historical"
OPIC_Historical_Final <- OPIC_Historical_Final %>%
rename("Fiscal Year (FY)" = "Year",
"Country Income Level (DFC Only)" = "OPIC Classification",
"NAICS Category/ Sector" = "NAICS Sector Description" ,
"Support Type" = "Type",
"Estimated Term (yrs)" = "Loan Term",
"Committed" = "OPIC Commitment" ,
"Exposure as of 12/31/21" = "Exposure / MCL",
"Description" = "Project Description (Annual Report)",
"Public Info Summary URL" = "Project Summary Link",
"Environment Risk Category" = "Environmental Risk Category",
"Labor Risk Category" = "Labor Category")
View(OPIC_Historical_Final)
# Cleaning DFC Data
DFC_Transaction_Final<- subset(DFC_Transaction_Data, select =c("Originating Agency","Fiscal Year (FY)","Region","Country", "Country Income Level (DFC Only)", "Project Name","NAICS Category/ Sector","Support Type","Currency", "Estimated Term (yrs)","Committed","Exposure as of 12/31/21","Description","Public Info Summary URL","Environment Risk Category" ,"Labor Risk Category")) %>%
filter(`Originating Agency` != "DCA" & `Originating Agency` != "Legacy USAID")
DFC_Transaction_Final <- subset(DFC_Transaction_Final, select = -c(`Originating Agency`))
DFC_Transaction_Final$Dataset <- "DFC Transaction"
#Combining all DFC data into one database
All_DFC_Data <- rbind(OPIC_Historical_Final, DFC_Transaction_Final)
View(All_DFC_Data)
saveRDS(object = All_DFC_Data, file = "/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/03_data_processed/All_DFC_Data.rds")
#How has the value of DFC transactions changed over time?
All_DFC_Data <- rename(All_DFC_Data, fiscal_year = "Fiscal Year (FY)", sector = "NAICS Category/ Sector")
All_DFC_Data %>%
drop_na(fiscal_year) %>%
drop_na(Committed) %>%
group_by(fiscal_year) %>%
summarize(Committed = sum(Committed)) %>%
ggplot(aes(x=fiscal_year,y=Committed)) + geom_bar(stat='identity',fill = "#BD1E3F") +
labs(x="Fiscal Year", y="Amount Committed") + scale_y_continuous(labels=scales::dollar_format())
#How do total DFC commitments vary across regions?
All_DFC_Data %>%
drop_na(Region) %>%
drop_na(Committed) %>%
group_by(Region) %>%
summarize(Committed = sum(Committed)) %>%
ggplot(aes(x=reorder(Region, -Committed),y=Committed)) + geom_bar(stat='identity',fill = "#0C1F42") +
labs(x="Region", y="Amount Committed") + scale_y_continuous(labels=scales::dollar_format())
#How do total DFC commitments vary across sectors?
All_DFC_Data %>%
drop_na(sector) %>%
drop_na(Committed) %>%
filter(Region=="Africa") %>%
group_by(sector) %>%
summarize(Committed = sum(Committed)) %>%
ggplot(aes(x=reorder(sector, -Committed),y=Committed)) + geom_bar(stat='identity',fill = "#BD1E3F") +
labs(x="Sector", y="Amount Committed") + scale_y_continuous(labels=scales::dollar_format()) + theme(axis.text.x = element_text(angle = 45, hjust=1))
#How do total DFC commitments vary within Sub-Saharan Africa?
All_DFC_Data %>%
drop_na(Country) %>%
drop_na(Committed) %>%
filter(Region=="Africa") %>%
group_by(Country) %>%
summarize(Committed = sum(Committed)) %>%
ggplot(aes(x=reorder(Country, -Committed),y=Committed)) + geom_bar(stat='identity',fill = "#0C1F42") +
labs(x="Country", y="Amount Committed") + scale_y_continuous(labels=scales::dollar_format())+ theme(axis.text.x = element_text(angle = 45, hjust=1))
saveRDS(object = All_DFC_Data, file = "/Users/juliakresky/Documents/SAIS/Fall 2022/Sustainable Finance/Final Project/03_data_processed/All_DFC_Data.rds")