Installing package into 'C:/Users/alyss/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'xfun' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'xfun'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\alyss\AppData\Local\R\win-library\4.2\00LOCK\xfun\libs\x64\xfun.dll to
C:\Users\alyss\AppData\Local\R\win-library\4.2\xfun\libs\x64\xfun.dll:
Permission denied
Warning: restored 'xfun'
The downloaded binary packages are in
C:\Users\alyss\AppData\Local\Temp\RtmpAnZtCM\downloaded_packages
library(xfun)
Attaching package: 'xfun'
The following objects are masked from 'package:base':
attr, isFALSE
library(esquisse)install.packages("stringr")
Warning: package 'stringr' is in use and will not be installed
library(stringr)library(openxlsx)
Import the data, rename variables that have confusing or similar names, and pivot the data to follow the “tidy” data rules.
#read in dataRaw_World_Investment_Data <-read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/00_data_raw/World_Energy_Investment_raw.xlsx", sheet =1)# renaming variables that have the same names Raw_World_Investment_Data[3, "World"] <-"Total Supply"Raw_World_Investment_Data[8, "World"] <-"Total End Use"Raw_World_Investment_Data[12, "World"] <-"Total Fuels"Raw_World_Investment_Data[13, "World"] <-"Fuels_Total Fossil Fuels"Raw_World_Investment_Data[14, "World"] <-"Fuels_Oil"Raw_World_Investment_Data[15, "World"] <-"Fuels_Gas"Raw_World_Investment_Data[16, "World"] <-"Fuels_Coal"Raw_World_Investment_Data[17, "World"] <-"Fuels_Clean Fuels"Raw_World_Investment_Data[18, "World"] <-"Total Power"Raw_World_Investment_Data[19, "World"] <-"Power_Generation"Raw_World_Investment_Data[20, "World"] <-"Power_Coal"Raw_World_Investment_Data[21, "World"] <-"Power_Oil_Natural_Gas"Raw_World_Investment_Data[22, "World"] <-"Power_Nuclear"Raw_World_Investment_Data[23, "World"] <-"Power_Renewables"Raw_World_Investment_Data[24, "World"] <-"Power_Fossil_Fuels_CCUS"Raw_World_Investment_Data[25, "World"] <-"Power_Storage"Raw_World_Investment_Data[2, "World"] <-"Total Clean Energy"Raw_World_Investment_Data[6, "World"] <-"Supply_Electricity Networks"Raw_World_Investment_Data[26, "World"] <-"Power_Electricity Networks"# pivot longer to make Year columnlong_Investment_data <-pivot_longer(Raw_World_Investment_Data, cols =-World, names_to ="Year", values_to ="Value")# pivot wider to make variable columns wide_Investment_data <-pivot_wider(long_Investment_data,names_from = World, values_from = Value)
Repeat manually or create a function for the remaining sheets
Above, I cleaned the first sheet of the IEA data, but the excel file I’m using has 6 tabs of data in a similar format. Here you can create a function to clean all sheets in a similar fashion, or manually run the same code multiple times. As I was updating unique variable names, I copied the code below for the remaining sheets.
Sheet 2:
# Sheet 2 Raw_World_Investment_Data_2 <-read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/00_data_raw/World_Energy_Investment_raw.xlsx", sheet =2)# renaming variables that have the same names Raw_World_Investment_Data_2[3, "Emerging market and developing economies"] <-"Total Supply"View(Raw_World_Investment_Data_2)Raw_World_Investment_Data_2[8, "Emerging market and developing economies"] <-"Total End Use"Raw_World_Investment_Data_2[12, "Emerging market and developing economies"] <-"Total Fuels"Raw_World_Investment_Data_2[13, "Emerging market and developing economies"] <-"Fuels_Total Fossil Fuels"Raw_World_Investment_Data_2[14, "Emerging market and developing economies"] <-"Fuels_Oil"Raw_World_Investment_Data_2[15, "Emerging market and developing economies"] <-"Fuels_Gas"Raw_World_Investment_Data_2[16, "Emerging market and developing economies"] <-"Fuels_Coal"Raw_World_Investment_Data_2[17, "Emerging market and developing economies"] <-"Fuels_Clean Fuels"Raw_World_Investment_Data_2[18, "Emerging market and developing economies"] <-"Total Power"Raw_World_Investment_Data_2[19, "Emerging market and developing economies"] <-"Power_Generation"Raw_World_Investment_Data_2[20, "Emerging market and developing economies"] <-"Power_Coal"Raw_World_Investment_Data_2[21, "Emerging market and developing economies"] <-"Power_Oil_Natural_Gas"Raw_World_Investment_Data_2[22, "Emerging market and developing economies"] <-"Power_Nuclear"Raw_World_Investment_Data_2[23, "Emerging market and developing economies"] <-"Power_Renewables"Raw_World_Investment_Data_2[24, "Emerging market and developing economies"] <-"Power_Fossil_Fuels_CCUS"Raw_World_Investment_Data_2[25, "Emerging market and developing economies"] <-"Power_Storage"Raw_World_Investment_Data_2[2, "Emerging market and developing economies"] <-"Total Clean Energy"Raw_World_Investment_Data_2[6, "Emerging market and developing economies"] <-"Supply_Electricity Networks"Raw_World_Investment_Data_2[26, "Emerging market and developing economies"] <-"Power_Electricity Networks"# pivot longer to create year columnlong_Investment_data_2 <-pivot_longer(Raw_World_Investment_Data_2, cols =-`Emerging market and developing economies`, names_to ="Year", values_to ="Value")# pivot wider to make variable columns wide_Investment_data_2 <-pivot_wider(long_Investment_data_2,names_from =`Emerging market and developing economies`, values_from = Value)# delete extraneous rowwide_Investment_data_2_2 <- wide_Investment_data_2[-10, ]
Sheet 3:
# Sheet 3 Raw_World_Investment_Data_3 <-read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/00_data_raw/World_Energy_Investment_raw.xlsx", sheet =3)# renaming variables that have the same names Raw_World_Investment_Data_3[3, "Central and South America"] <-"Total Supply"View(Raw_World_Investment_Data_3)Raw_World_Investment_Data_3[8, "Central and South America"] <-"Total End Use"Raw_World_Investment_Data_3[12, "Central and South America"] <-"Total Fuels"Raw_World_Investment_Data_3[13, "Central and South America"] <-"Fuels_Total Fossil Fuels"Raw_World_Investment_Data_3[14, "Central and South America"] <-"Fuels_Oil"Raw_World_Investment_Data_3[15, "Central and South America"] <-"Fuels_Gas"Raw_World_Investment_Data_3[16, "Central and South America"] <-"Fuels_Coal"Raw_World_Investment_Data_3[17, "Central and South America"] <-"Fuels_Clean Fuels"Raw_World_Investment_Data_3[18, "Central and South America"] <-"Total Power"Raw_World_Investment_Data_3[19, "Central and South America"] <-"Power_Generation"Raw_World_Investment_Data_3[20, "Central and South America"] <-"Power_Coal"Raw_World_Investment_Data_3[21, "Central and South America"] <-"Power_Oil_Natural_Gas"Raw_World_Investment_Data_3[22, "Central and South America"] <-"Power_Nuclear"Raw_World_Investment_Data_3[23, "Central and South America"] <-"Power_Renewables"Raw_World_Investment_Data_3[24, "Central and South America"] <-"Power_Fossil_Fuels_CCUS"Raw_World_Investment_Data_3[25, "Central and South America"] <-"Power_Storage"Raw_World_Investment_Data_3[2, "Central and South America"] <-"Total Clean Energy"Raw_World_Investment_Data_3[6, "Central and South America"] <-"Supply_Electricity Networks"Raw_World_Investment_Data_3[26, "Central and South America"] <-"Power_Electricity Networks"# pivot longer to create year columnlong_Investment_data_3 <-pivot_longer(Raw_World_Investment_Data_3, cols =-`Central and South America`, names_to ="Year", values_to ="Value")# pivot wider to make variable columns wide_Investment_data_3 <-pivot_wider(long_Investment_data_3,names_from =`Central and South America`, values_from = Value)
Now it’s time to combine the six data frames into one, and start grouping the data into a usable format for making graphics.
# Combine combined_Energy_Investment <-bind_rows(wide_Investment_data, wide_Investment_data_2_2, wide_Investment_data_3, wide_Investment_data_4, wide_Investment_data_5, wide_Investment_data_6)# Reorder columnscombined_Energy_Investment_1 <- combined_Energy_Investment %>%select(Region, everything())# Cleaning names combined_Energy_Investment_2 <- combined_Energy_Investment_1 |>rename(`total_billion`=`Total (billion $2022)`) |>clean_names()# Making sure the data is numericcombined_Energy_Investment_2$total_billion <-as.numeric(combined_Energy_Investment_2$total_billion)combined_Energy_Investment_2$total_clean_energy <-as.numeric(combined_Energy_Investment_2$total_clean_energy)# Creating a sum total of energy investments and clean energy investments Energy_investment_total_by_region <- combined_Energy_Investment_2 %>%group_by(region) %>%summarise(total_investment =sum(total_billion))Clean_energy_investment_by_region <- combined_Energy_Investment_2 %>%group_by(region) %>%summarise(total_investment =sum(total_clean_energy))Clean_energy_investment_by_region <- Clean_energy_investment_by_region |>rename(total_clean_investment = total_investment)# Merge the sum totals merged_total_invest_region <-merge(Clean_energy_investment_by_region, Energy_investment_total_by_region, by =c("region"), all =TRUE)# Pivot long to get different types of investment in one columntotal_invest_long_1 <-pivot_longer(merged_total_invest_region, cols =c(total_clean_investment, total_investment), names_to ="Investment_Type", values_to ="Investment_Amount")# Graph of investment amounts per regionggplot(total_invest_long_1, aes(x = region, y = Investment_Amount, fill = Investment_Type)) +geom_bar(stat ="identity", position ="dodge") +scale_fill_manual(values =c("total_clean_investment"="forestgreen", "total_investment"="darkblue"),labels =c("total_clean_investment"="Investments in Clean Energy", "total_investment"="Total Energy Investments")) +labs(title ="Comparison of Energy Investments by Region (2015-2023)",x ="",y ="Investment (USD billions)",caption ="Data: International Energy Agency",fill ="Investment Type") +theme_minimal() +theme(axis.text.x =element_text(size =6), axis.text.y =element_text(size =6), # Smaller y-axis titleaxis.title.y =element_text(size =6), # Smaller y-axis titleplot.title =element_text(size =8, hjust =0.5),legend.title =element_text(size =7), # Adjust legend title sizelegend.text =element_text(size =6), # Adjust legend text sizelegend.key.size =unit(0.5, "cm"),plot.caption =element_text(size=6))
# Reordering regions for the graphtotal_invest_long_1$region <-factor(total_invest_long_1$region,levels =c("Africa", "Asia Pacific", "Latin America", "Middle East","Emerging Markets", "World"))# Write out data write.xlsx(total_invest_long_1, file ="C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/03_data_processed.xlsx")
World Bank Data
5. Next I cleaned the World Bank data on access to electricity. After importing in the data, I adjusted the placement of the variables, cleaned the names and pivoted the data.
# Cleaning WB Data #read in dataRaw_WB_Electricity_Data <-read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/00_data_raw/API_EG.ELC.ACCS.ZS_DS2_en_excel_v2_42577.xls", sheet =1)
# drop columns before 1990 due to missing dataRaw_WB_Electricity_Data_1 <- Raw_WB_Electricity_Data[, -c(4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34)] # dropping first two rowsRaw_WB_Electricity_Data_2 <- Raw_WB_Electricity_Data_1[-(1:2), ]# making the new first row the column names colnames(Raw_WB_Electricity_Data_2) <-as.character(unlist(Raw_WB_Electricity_Data_2[1, ]))# dropping extra namesRaw_WB_Electricity_Data_2 <- Raw_WB_Electricity_Data_2[-1, ]# pivot longer to create year columnlong_WB_Electricity <-pivot_longer(Raw_WB_Electricity_Data_2, cols =!c(`Country Name`, `Country Code`, `Indicator Name`), names_to ="Year", values_to ="Value")# pivot wider to make variable columns wide_WB_Electricity <-pivot_wider(long_WB_Electricity,names_from =`Indicator Name`, values_from = Value)# clean names wide_WB_Electricity_1 <- wide_WB_Electricity |>rename(`iso_code`=`Country Code`) |>clean_names()# write out the datawrite.xlsx(wide_WB_Electricity_1, file ="C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/03_data_processed")
Our World in Data
Finally, I cleaned the third and final data set from Our World in Data on different types of energy access around the world. I made sure the data classification matched the data and then merged it with the World Bank data set.
# our world in data# read in dataRaw_OWID_Data <-read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/00_data_raw/owid-energy-data.xlsx", sheet =1)# make the numbers numericwide_WB_Electricity_1$access_to_electricity_percent_of_population <-as.numeric(wide_WB_Electricity_1$access_to_electricity_percent_of_population)# matching specifications class(Raw_OWID_Data$year)
[1] "numeric"
class(Raw_OWID_Data$iso_code)
[1] "character"
class(wide_WB_Electricity_1$year)
[1] "character"
class(wide_WB_Electricity_1$iso_code)
[1] "character"
# making year integer Raw_OWID_Data$year <-as.integer(Raw_OWID_Data$year)wide_WB_Electricity_1$year <-as.integer(wide_WB_Electricity_1$year)# merge WB and OWIDmerged_WB_OWID_Energy <-merge(Raw_OWID_Data, wide_WB_Electricity_1, by =c("iso_code", "year"), all =TRUE)merged_WB_OWID <-full_join(Raw_OWID_Data, wide_WB_Electricity_1, by =c("iso_code", "year"))"access_to_electricity_percent_of_population"%in%names(wide_WB_Electricity_1)
# Write out the datawrite.xlsx(merged_WB_OWID, file ="C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance/03_data_processed")
Example Graphics
# make some graphs # graph on electricity % of popfiltered_electric <- wide_WB_Electricity_1 %>%filter(country_name %in%c("Africa Eastern and Southern", "Africa Western and Central", "Middle East & North Africa", "Central Europe and the Baltics", "East Asia & Pacific (excluding high income)", "South Asia (IDA & IBRD)", "Latin America & Caribbean"))filtered_electric_2021 <- filtered_electric %>%filter(year ==2021)# graph one option, regular bar chartggplot(filtered_electric_2021) +aes(x = country_name,y = access_to_electricity_percent_of_population,fill = country_name ) +geom_col() +scale_fill_viridis_d(option ="viridis", direction =1) +labs(x ="Region",y ="Access to Electricity (% of population)",title ="Access to Electricity in 2021 by Region",caption ="Data: World Bank Database",fill ="Region" ) +theme_minimal()+theme(axis.text.x =element_text(angle =45, hjust =1))
# graoh 2, hortizontal bar chartggplot(filtered_electric_2021, aes(x = country_name, y = access_to_electricity_percent_of_population, fill = country_name)) +geom_bar(stat ="identity", width =0.5)+coord_flip() +labs(title="Access to Electricity in 2021 by Region", y="Access to Electricity (% of population)", x="Region") +theme_minimal()