scope_1_1 <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/scope_1_1")
Rows: 10920 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, domestic_emissions
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
scope_1_2 <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/scope_1_2")
Rows: 2016 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, domestic_export_emissions
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
scope_2 <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/scope_2")
Rows: 2016 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, imported_grid_emissions
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
scope_3 <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/scope_3")
Rows: 2016 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, imported_total_emissions
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nominal_GDP <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/nominal_GDP")
Rows: 16492 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, nominal_gdp
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
population <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/population")
Rows: 16492 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, population
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ppp_gdp <-read_csv("C:/Users/Lara/Documents/R/Sustainable Finance/Lara's R stuff/Week 1 folder/ppp_gdp")
Rows: 16492 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): country_name, iso3c
dbl (2): year, ppp_gdp
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
create datasets with which to work
#first join scope_1_1 and scope_1_2 since scope 1 emissions include emissions from exported goods and services #also keep scope_1_1 for analysis only based on this because I think there will be much info lost by joining them scope_1_merged <-left_join(scope_1_1, select(scope_1_2, country_name, year, domestic_export_emissions), by =c("country_name", "year"))#lets see how much information I lost by merging the two datasets by checking which countries have no information at all for "domestic_export_emissions" unique_countries_scope_1 <- scope_1_merged |>filter(!is.na(domestic_emissions)) |>group_by(country_name) |>filter(all(is.na(domestic_export_emissions))) |>select(country_name) |>distinct()unique_countries_scope_1
# A tibble: 145 × 1
# Groups: country_name [145]
country_name
<chr>
1 International Aviation
2 International Shipping
3 Afghanistan
4 Albania
5 Algeria
6 Angola
7 Anguilla
8 Antigua & Barbuda
9 Armenia
10 Aruba
# … with 135 more rows
#those are 145 countries for which no export emissions are available. #lets join them with ppp gdp and see which countries are biggest unique_countries_scope_1 <- unique_countries_scope_1 |>left_join(population, by ="country_name")
Warning in left_join(unique_countries_scope_1, population, by = "country_name"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 3 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
unique_countries_scope_1 <-left_join(unique_countries_scope_1, nominal_GDP, by =c("country_name", "iso3c", "year"))unique_countries_scope_1 <-left_join(unique_countries_scope_1, ppp_gdp, by =c("country_name", "iso3c", "year"))# filter joined_data to keep only year 2019cross_section_unique_scope_1_2018 <- unique_countries_scope_1 |>filter(year ==2018)#plot top 10 which countries don't hava data for exported emissions top_10_unique_2018_scope_1 <- cross_section_unique_scope_1_2018 |>group_by(country_name) |>summarize(total_population =sum(population, na.rm =TRUE)) |>arrange(desc(total_population)) |>slice(1:10)#turn this into a graph library(ggplot2)ggplot(top_10_unique_2018_scope_1, aes(x=reorder(country_name, total_population), y = total_population)) +geom_bar(stat ="identity") +labs(x ="Country", y ="Population") +scale_y_continuous(labels = scales::comma) +coord_flip() +theme_minimal() +ggtitle("Countries without data for exported emissions in 2018")
#build a masterdataset #begin with joining the basic data. masterdataset <-left_join(population, nominal_GDP, by =c("country_name", "iso3c", "year"))masterdataset <-left_join(masterdataset, ppp_gdp, by =c("country_name", "iso3c", "year"))masterdataset <-left_join(masterdataset, scope_1_merged, by =c("country_name", "iso3c", "year"))#add domestic emissions and domestic export emissions where it is possible masterdataset <- masterdataset |>mutate(scope_1 =ifelse(is.na(domestic_emissions), 0, domestic_emissions) +ifelse(is.na(domestic_export_emissions), 0, domestic_export_emissions))|>mutate(scope_1 =replace(scope_1, scope_1 ==0, NA))#now add other emissions data masterdataset <-left_join(masterdataset, scope_2, by =c("country_name", "iso3c", "year"))masterdataset <-left_join(masterdataset, scope_3, by =c("country_name", "iso3c", "year"))#create a crossection for 2018 Masterdataset_2018 <- masterdataset|>filter(year ==2018)#see who has the highest scope 1 emmissions Masterdataset_2018_scope1 <- Masterdataset_2018 |>group_by(country_name) |>summarize(total_scope_1 =sum(scope_1, na.rm =TRUE)) |>arrange(desc(total_scope_1)) |>slice(1:20)ggplot(Masterdataset_2018_scope1, aes(x=reorder(country_name, total_scope_1), y = total_scope_1)) +geom_bar(stat ="identity") +labs(x ="Country", y ="Scope 1 Emissions") +scale_y_continuous(labels = scales::comma) +coord_flip() +theme_minimal() +ggtitle("Countries with the highest scope 1 emissions in 2018")
#see who has the highest scope 2 emissions Masterdataset_2018_scope2 <- Masterdataset_2018 |>group_by(country_name) |>summarize(total_scope_2 =sum(imported_grid_emissions, na.rm =TRUE)) |>arrange(desc(total_scope_2)) |>slice(1:20)#see who has the highest scope 1 emmissions ggplot(Masterdataset_2018_scope2, aes(x=reorder(country_name, total_scope_2), y = total_scope_2)) +geom_bar(stat ="identity") +labs(x ="Country", y ="Scope 2 Emissions") +scale_y_continuous(labels = scales::comma) +coord_flip() +theme_minimal() +ggtitle("Countries with the highest scope 2 emissions in 2018")
#see who has the highest scope 3 emissions Masterdataset_2018_scope3 <- Masterdataset_2018 |>group_by(country_name) |>summarize(total_scope_3 =sum(imported_total_emissions, na.rm =TRUE)) |>arrange(desc(total_scope_3)) |>slice(1:20)#see who has the highest scope 1 emmissions ggplot(Masterdataset_2018_scope3, aes(x=reorder(country_name, total_scope_3), y = total_scope_3)) +geom_bar(stat ="identity") +labs(x ="Country", y ="Scope 3 Emissions") +scale_y_continuous(labels = scales::comma) +coord_flip() +theme_minimal() +ggtitle("Countries with the highest scope 3 emissions in 2018")