Data Preparation

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.1     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(readr)
library(readxl)
library(countrycode)
library(ggplot2)
temperature <- 
  read_csv("01_data_cleaning/Annual_Surface_Temperature_Change.csv")
Rows: 225 Columns: 72
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (9): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (63): ObjectId, F1961, F1962, F1963, F1964, F1965, F1966, F1967, F1968, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
temperature_mean <- temperature |> 
  select(Country, F1961:F2022) |> 
  pivot_longer(F1961:F2022, names_to = "year", values_to = "temp", values_drop_na = TRUE) |> 
  group_by(year) |> 
  summarize(avg_temperature = mean(temp))

year <- temperature_mean$year
year <- as.numeric(gsub("F","", year))
temperature_mean$year <- year

temperature_mean
# A tibble: 62 × 2
    year avg_temperature
   <dbl>           <dbl>
 1  1961         0.163  
 2  1962        -0.0135 
 3  1963        -0.00604
 4  1964        -0.0701 
 5  1965        -0.247  
 6  1966         0.106  
 7  1967        -0.111  
 8  1968        -0.199  
 9  1969         0.158  
10  1970         0.0925 
# … with 52 more rows
#view(temperature)
temperature_mean |> 
  ggplot(aes(x=year,y=avg_temperature))+
  geom_line() +
  scale_x_continuous(breaks = c(1960,1980,2000,2020)) +
  xlab("Year") +
  ylab("Mean Surface Temperature Change") +
  ggtitle("Annual Surface Mean Temperature Change Over Time")

disasters_frequency <- read_csv("01_data_cleaning/Climate-related_Disasters_Frequency.csv") |> 
  select(Country, Indicator, F1980:F2022) |> 
  filter(Indicator!="Climate related disasters frequency, Number of Disasters: TOTAL")
Rows: 970 Columns: 53
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (9): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (44): ObjectId, F1980, F1981, F1982, F1983, F1984, F1985, F1986, F1987, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
disasters_frequency$Indicator <- gsub("Climate related disasters frequency, Number of Disasters:", "", disasters_frequency$Indicator)
disasters_frequency
# A tibble: 755 × 45
   Country   Indic…¹ F1980 F1981 F1982 F1983 F1984 F1985 F1986 F1987 F1988 F1989
   <chr>     <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Afghanis… " Drou…    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 2 Afghanis… " Extr…    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 3 Afghanis… " Floo…     1    NA    NA    NA    NA    NA    NA    NA     1    NA
 4 Afghanis… " Land…    NA    NA    NA    NA    NA    NA    NA     1    NA    NA
 5 Afghanis… " Stor…    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 6 Afghanis… " Wild…    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 7 Albania   " Drou…    NA    NA    NA    NA    NA    NA    NA    NA    NA     1
 8 Albania   " Extr…    NA    NA    NA    NA    NA     1    NA    NA    NA    NA
 9 Albania   " Floo…    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
10 Albania   " Land…    NA    NA    NA    NA    NA     1    NA    NA    NA    NA
# … with 745 more rows, 33 more variables: F1990 <dbl>, F1991 <dbl>,
#   F1992 <dbl>, F1993 <dbl>, F1994 <dbl>, F1995 <dbl>, F1996 <dbl>,
#   F1997 <dbl>, F1998 <dbl>, F1999 <dbl>, F2000 <dbl>, F2001 <dbl>,
#   F2002 <dbl>, F2003 <dbl>, F2004 <dbl>, F2005 <dbl>, F2006 <dbl>,
#   F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>, F2011 <dbl>,
#   F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>, F2016 <dbl>,
#   F2017 <dbl>, F2018 <dbl>, F2019 <dbl>, F2020 <dbl>, F2021 <dbl>, …
disasters_frequency <- disasters_frequency |> 
  pivot_longer(F1980:F2022, names_to = "year", values_to = "count", values_drop_na = TRUE) 

disasters_frequency$year <- 
  as.numeric(gsub("F","", disasters_frequency$year))
disasters_frequency
# A tibble: 6,360 × 4
   Country                      Indicator               year count
   <chr>                        <chr>                  <dbl> <dbl>
 1 Afghanistan, Islamic Rep. of " Drought"              2000     1
 2 Afghanistan, Islamic Rep. of " Drought"              2006     1
 3 Afghanistan, Islamic Rep. of " Drought"              2008     1
 4 Afghanistan, Islamic Rep. of " Drought"              2011     1
 5 Afghanistan, Islamic Rep. of " Drought"              2018     1
 6 Afghanistan, Islamic Rep. of " Drought"              2021     1
 7 Afghanistan, Islamic Rep. of " Extreme temperature"  1990     1
 8 Afghanistan, Islamic Rep. of " Extreme temperature"  1991     1
 9 Afghanistan, Islamic Rep. of " Extreme temperature"  2001     2
10 Afghanistan, Islamic Rep. of " Extreme temperature"  2005     1
# … with 6,350 more rows
yearly_disasters <- disasters_frequency |> 
  group_by(year, Indicator) |> 
  summarize(totalDisaster = sum(count))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
yearly_disasters
# A tibble: 257 × 3
# Groups:   year [43]
    year Indicator              totalDisaster
   <dbl> <chr>                          <dbl>
 1  1980 " Drought"                        24
 2  1980 " Extreme temperature"             3
 3  1980 " Flood"                          39
 4  1980 " Landslide"                       3
 5  1980 " Storm"                          34
 6  1980 " Wildfire"                        1
 7  1981 " Drought"                        18
 8  1981 " Extreme temperature"             1
 9  1981 " Flood"                          38
10  1981 " Landslide"                       7
# … with 247 more rows
yearly_disasters |> 
  group_by(year) |> 
  ggplot(aes(x=year,y=totalDisaster,fill=Indicator)) +
  geom_bar(stat="identity", position = "stack") +
  labs(fill="Types of Disasters") +
  xlab("Year") +
  ylab("Number of Natural Disasters") +
  ggtitle("Frequency of Natural Disasters")

climate_risk_2022 <- read_csv("01_data_cleaning/Climate-driven_INFORM_Risk.csv") |>
  select(Country, ISO3, Indicator, F2022) |> 
  filter(Indicator=="Climate-driven INFORM Risk Indicator")
Rows: 764 Columns: 20
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (9): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (11): ObjectId, F2013, F2014, F2015, F2016, F2017, F2018, F2019, F2020, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
climate_risk_2022
# A tibble: 188 × 4
   Country                      ISO3  Indicator                            F2022
   <chr>                        <chr> <chr>                                <dbl>
 1 Afghanistan, Islamic Rep. of AFG   Climate-driven INFORM Risk Indicator   7.3
 2 Albania                      ALB   Climate-driven INFORM Risk Indicator   3.2
 3 Algeria                      DZA   Climate-driven INFORM Risk Indicator   3.3
 4 Angola                       AGO   Climate-driven INFORM Risk Indicator   4.8
 5 Antigua and Barbuda          ATG   Climate-driven INFORM Risk Indicator   2.9
 6 Argentina                    ARG   Climate-driven INFORM Risk Indicator   3.5
 7 Armenia, Rep. of             ARM   Climate-driven INFORM Risk Indicator   3.4
 8 Australia                    AUS   Climate-driven INFORM Risk Indicator   2.9
 9 Austria                      AUT   Climate-driven INFORM Risk Indicator   2.3
10 Azerbaijan, Rep. of          AZE   Climate-driven INFORM Risk Indicator   4.4
# … with 178 more rows
country_name_regex_to_iso3c <- function(country_name) {
  country_name |>
    countrycode(origin = "country.name", 
                                     destination = "iso3c",
                                     origin_regex = TRUE)
}

world <- map_data("world") |> 
  mutate(ISO3 = country_name_regex_to_iso3c(region))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `ISO3 = country_name_regex_to_iso3c(region)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Ascension Island, Azores, Barbuda, Bonaire, Canary Islands, Chagos Archipelago, Grenadines, Heard Island, Kosovo, Madeira Islands, Micronesia, Saba, Saint Martin, Siachen Glacier, Sint Eustatius, Virgin Islands
risk_map <- left_join(world, climate_risk_2022, by="ISO3") |> 
  ggplot(aes(x=long, y=lat, group=group)) +
  geom_polygon(aes(fill=F2022), color="#7f7f7f") +
  scale_fill_gradient(low="yellow", high="red", na.value="beige") +
  labs(fill="Risk Index") +
  xlab("") +
  ylab("") +
  ggtitle("Climate-driven Risk")
  
risk_map

world <- map_data(“world”) |> rename(Country = region)

risk_map <- left_join(world, climate_risk_2022, by=“Country”) #filter(!is.na(F2022)) risk_map

ggplot(aes(x=long, y=lat, map_id=Country, fill=F2022)) + geom_map(map=world, color=“#7f7f7f”) + scale_fill_gradient(low=“white”, high=“red”, guide=“legend”) + theme_void()

transition_preparedness <- read_csv("01_data_cleaning/Preparedness_of_Countries_for_a_Low-Carbon_Transition.csv") |> 
  select(Country, ISO3, Indicator, F2019) |> 
  pivot_wider(names_from = Indicator, values_from = F2019)
Rows: 202 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, C...
dbl (2): ObjectId, F2019

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
transition_preparedness
# A tibble: 101 × 4
   Country             ISO3  Exposure Resilience
   <chr>               <chr>    <dbl>      <dbl>
 1 Albania             ALB      0.265      0.717
 2 Algeria             DZA      0.605      0.800
 3 Angola              AGO      0.417      0.970
 4 Argentina           ARG      0.321      0.713
 5 Australia           AUS      0.353      0.330
 6 Austria             AUT      0.259      0.309
 7 Azerbaijan, Rep. of AZE      0.683      0.650
 8 Bangladesh          BGD      0.349      0.823
 9 Belgium             BEL      0.206      0.357
10 Bolivia             BOL      0.423      0.795
# … with 91 more rows
country_region <- read_csv("01_data_cleaning/Country_region.csv") |> 
  select(`Country Code`, Region, IncomeGroup) |> 
  rename(ISO3=`Country Code`)
New names:
Rows: 265 Columns: 6
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(5): Country Code, Region, IncomeGroup, SpecialNotes, TableName lgl (1): ...6
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...6`
country_region
# A tibble: 265 × 3
   ISO3  Region                     IncomeGroup        
   <chr> <chr>                      <chr>              
 1 ABW   Latin America & Caribbean  High income        
 2 AFE   <NA>                       <NA>               
 3 AFG   South Asia                 Low income         
 4 AFW   <NA>                       <NA>               
 5 AGO   Sub-Saharan Africa         Lower middle income
 6 ALB   Europe & Central Asia      Upper middle income
 7 AND   Europe & Central Asia      High income        
 8 ARB   <NA>                       <NA>               
 9 ARE   Middle East & North Africa High income        
10 ARG   Latin America & Caribbean  Upper middle income
# … with 255 more rows
exposure_resilience <- 
  left_join(transition_preparedness, country_region, by="ISO3") 
exposure_resilience |> 
  ggplot(aes(x=Resilience, y=Exposure, color=IncomeGroup)) +
  geom_point(alpha=0.8,size=2) +
  geom_text(aes(label=ISO3),size=2,color="grey25") +
  labs(fill="Income Group") +
  ggtitle("Exposure and Resilience to Low-Carbon Economy Transition") +
  xlab("Resilience from 0 (high resilience) to 1 (low resilience)") +
  ylab("Exposure from 0 (low exposure) to 1 (high exposure)") +
  theme(axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        title = element_text(size = 10))

GSCIscore_2022 <- read_excel("01_data_cleaning/GSCI_Scores_2022.xlsx", 
    sheet = "Sustainable Score") |> 
  mutate(ISO3 = country_name_regex_to_iso3c(Country)) |> 
  left_join(country_region, by="ISO3") 
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `ISO3 = country_name_regex_to_iso3c(Country)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Kyrgistan, Micronesia
GSCIscore_2022
# A tibble: 180 × 5
   Country        `Sustainable Competitiveness Score` ISO3  Region       Incom…¹
   <chr>                                        <dbl> <chr> <chr>        <chr>  
 1 Sweden                                        60.7 SWE   Europe & Ce… High i…
 2 Finland                                       59.3 FIN   Europe & Ce… High i…
 3 Switzerland                                   58.3 CHE   Europe & Ce… High i…
 4 Denmark                                       58.1 DNK   Europe & Ce… High i…
 5 Norway                                        57.6 NOR   Europe & Ce… High i…
 6 Iceland                                       57.1 ISL   Europe & Ce… High i…
 7 United Kingdom                                56.4 GBR   Europe & Ce… High i…
 8 France                                        56.3 FRA   Europe & Ce… High i…
 9 Slovenia                                      56.3 SVN   Europe & Ce… High i…
10 Japan                                         56.2 JPN   East Asia &… High i…
# … with 170 more rows, and abbreviated variable name ¹​IncomeGroup

top10

top10_sustainability <- GSCIscore_2022 |> 
  slice_max(order_by = `Sustainable Competitiveness Score`, n = 10)

top10_sustainability
# A tibble: 10 × 5
   Country        `Sustainable Competitiveness Score` ISO3  Region       Incom…¹
   <chr>                                        <dbl> <chr> <chr>        <chr>  
 1 Sweden                                        60.7 SWE   Europe & Ce… High i…
 2 Finland                                       59.3 FIN   Europe & Ce… High i…
 3 Switzerland                                   58.3 CHE   Europe & Ce… High i…
 4 Denmark                                       58.1 DNK   Europe & Ce… High i…
 5 Norway                                        57.6 NOR   Europe & Ce… High i…
 6 Iceland                                       57.1 ISL   Europe & Ce… High i…
 7 United Kingdom                                56.4 GBR   Europe & Ce… High i…
 8 France                                        56.3 FRA   Europe & Ce… High i…
 9 Slovenia                                      56.3 SVN   Europe & Ce… High i…
10 Japan                                         56.2 JPN   East Asia &… High i…
# … with abbreviated variable name ¹​IncomeGroup
top10_sustainability |> 
  ggplot(aes(x = `Sustainable Competitiveness Score`, y = fct_reorder(.f = Country, .x = `Sustainable Competitiveness Score`), fill = IncomeGroup)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = "steelblue") +
  ylab("") +
  ggtitle("Top 10 Countries by Sustainability Index")

FDI_net_inflows <- 
  read_csv("01_data_cleaning/FDI_net inflows.csv", skip = 3) |> 
  select(`Country Name`, `Country Code`, "2021") |> 
  rename(Country = `Country Name`, ISO3 = `Country Code`, FDI = "2021") |> 
  mutate(FDI_billions = FDI/1000000000) 
New names:
Rows: 266 Columns: 67
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(4): Country Name, Country Code, Indicator Name, Indicator Code dbl (52): 1970,
1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, ... lgl (11): 1960,
1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, ...67
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...67`
FDI_net_inflows
# A tibble: 266 × 4
   Country                     ISO3           FDI FDI_billions
   <chr>                       <chr>        <dbl>        <dbl>
 1 Aruba                       ABW     133545121.       0.134 
 2 Africa Eastern and Southern AFE   52974609490.      53.0   
 3 Afghanistan                 AFG      20600976.       0.0206
 4 Africa Western and Central  AFW   20677155529.      20.7   
 5 Angola                      AGO   -4355116553.      -4.36  
 6 Albania                     ALB    1218586452.       1.22  
 7 Andorra                     AND            NA       NA     
 8 Arab World                  ARB   54104486022.      54.1   
 9 United Arab Emirates        ARE   20667120490.      20.7   
10 Argentina                   ARG    6782348887.       6.78  
# … with 256 more rows
GDP_per_capita <- 
  read_csv("01_data_cleaning/GDP_per_capita.csv", skip = 3) |> 
  select(`Country Name`, "2021") |> 
  rename(Country = `Country Name`, GDP_per_capita = "2021") 
New names:
Rows: 266 Columns: 67
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(4): Country Name, Country Code, Indicator Name, Indicator Code dbl (62): 1960,
1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ... lgl (1): ...67
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...67`
GDP_per_capita
# A tibble: 266 × 2
   Country                     GDP_per_capita
   <chr>                                <dbl>
 1 Aruba                               29342.
 2 Africa Eastern and Southern          1550.
 3 Afghanistan                           369.
 4 Africa Western and Central           1757.
 5 Angola                               1954.
 6 Albania                              6493.
 7 Andorra                             42137.
 8 Arab World                           6271.
 9 United Arab Emirates                44316.
10 Argentina                           10636.
# … with 256 more rows
FDI_GDP <- left_join(FDI_net_inflows, GDP_per_capita, by = "Country")

FDI_GDP
# A tibble: 266 × 5
   Country                     ISO3           FDI FDI_billions GDP_per_capita
   <chr>                       <chr>        <dbl>        <dbl>          <dbl>
 1 Aruba                       ABW     133545121.       0.134          29342.
 2 Africa Eastern and Southern AFE   52974609490.      53.0             1550.
 3 Afghanistan                 AFG      20600976.       0.0206           369.
 4 Africa Western and Central  AFW   20677155529.      20.7             1757.
 5 Angola                      AGO   -4355116553.      -4.36            1954.
 6 Albania                     ALB    1218586452.       1.22            6493.
 7 Andorra                     AND            NA       NA              42137.
 8 Arab World                  ARB   54104486022.      54.1             6271.
 9 United Arab Emirates        ARE   20667120490.      20.7            44316.
10 Argentina                   ARG    6782348887.       6.78           10636.
# … with 256 more rows
sus_score <- GSCIscore_2022 |> 
  select(!Country)
GSCIscore_FDI_GDP <- left_join(sus_score, FDI_GDP, by="ISO3") |> 
  select(Country, everything()) 

GSCIscore_FDI_GDP
# A tibble: 180 × 8
   Country        Sustainable Com…¹ ISO3  Region Incom…²     FDI FDI_b…³ GDP_p…⁴
   <chr>                      <dbl> <chr> <chr>  <chr>     <dbl>   <dbl>   <dbl>
 1 Sweden                      60.7 SWE   Europ… High i… 5.27e10 52.7     61029.
 2 Finland                     59.3 FIN   Europ… High i… 2.39e10 23.9     53655.
 3 Switzerland                 58.3 CHE   Europ… High i… 9.31e 9  9.31    91992.
 4 Denmark                     58.1 DNK   Europ… High i… 1.47e10 14.7     68008.
 5 Norway                      57.6 NOR   Europ… High i… 1.42e10 14.2     89154.
 6 Iceland                     57.1 ISL   Europ… High i… 9.31e 7  0.0931  68728.
 7 United Kingdom              56.4 GBR   Europ… High i… 5.92e 9  5.92    46510.
 8 France                      56.3 FRA   Europ… High i… 8.84e10 88.4     43659.
 9 Slovenia                    56.3 SVN   Europ… High i… 2.15e 9  2.15    29291.
10 Japan                       56.2 JPN   East … High i… 3.31e10 33.1     39313.
# … with 170 more rows, and abbreviated variable names
#   ¹​`Sustainable Competitiveness Score`, ²​IncomeGroup, ³​FDI_billions,
#   ⁴​GDP_per_capita
GSCIscore_FDI_GDP |> 
  na.omit() |> 
  ggplot(aes(x=`Sustainable Competitiveness Score`, y=FDI_billions, 
             color = Region, shape = IncomeGroup)) +
  geom_point(alpha=0.8,size=2) +
  #geom_text(aes(label=ISO3, alpha =FDI_billions),size=2,color="grey25") +
  scale_y_continuous(labels = scales::label_dollar(suffix = "b")) +
  ylab("FDI in billion of dollars") +
  theme(legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
        legend.key.size = unit(3,"mm"),
        title = element_text(size = 9)) +
  ggtitle("Relation between Sustainable Competitiveness Score and FDI")

GSCIscore_FDI_GDP |> 
  na.omit() |> 
  ggplot(aes(x=`Sustainable Competitiveness Score`, y=GDP_per_capita, 
             color = Region, shape = IncomeGroup)) +
  geom_point(alpha=0.8,size=2) +
  #geom_text(aes(label=ISO3, alpha =FDI_billions),size=2,color="grey25") +
  scale_y_continuous(labels = scales::label_dollar()) +
  ylab("GDP per capita in dollars") +
  theme(legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
        legend.key.size = unit(3,"mm"),
        title = element_text(size = 9)) +
  ggtitle("Relation between Sustainable Competitiveness Score and GDP")