Exploratory Data Analysis

##Clear Environment and load libraries

rm(list = ls())

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.1.3

## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.6      v purrr   0.3.4 
## v tibble  3.1.8      v dplyr   1.0.10
## v tidyr   1.2.1      v stringr 1.4.1 
## v readr   2.1.2      v forcats 0.5.1

## Warning: package 'ggplot2' was built under R version 4.1.3

## Warning: package 'tibble' was built under R version 4.1.3

## Warning: package 'tidyr' was built under R version 4.1.3

## Warning: package 'readr' was built under R version 4.1.2

## Warning: package 'purrr' was built under R version 4.1.2

## Warning: package 'dplyr' was built under R version 4.1.3

## Warning: package 'stringr' was built under R version 4.1.3

## Warning: package 'forcats' was built under R version 4.1.2

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(here)

## Warning: package 'here' was built under R version 4.1.3

## here() starts at C:/Users/arink/OneDrive/SAIS/Fall 2022/Sustainable Finance/Final Project

library(janitor)

## Warning: package 'janitor' was built under R version 4.1.3

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(countrycode)

## Warning: package 'countrycode' was built under R version 4.1.3

library(readxl)
library(lubridate)

## Warning: package 'lubridate' was built under R version 4.1.3

## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(forcats)
library(esquisse)

## Warning: package 'esquisse' was built under R version 4.1.3

library(scales)

## Warning: package 'scales' was built under R version 4.1.3

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

options(scipen=10) # forces regular notation vs scientific notation (ie5)```

##Load the datasets we cleaned last week

library(readr)
clim_exp_merged <- read_csv("03_data_processed/clim_exp_merged.csv")

## Rows: 27281 Columns: 15
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (8): form_of_the_state, sector, indicator, measure, country_name, iso3c,...
## dbl (7): year, value, debt_gross_percent_of_gdp, nominal_gdp_bn_ppp, nominal...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(clim_exp_merged)


library(ggplot2) 
clim_exp_merged_filtered <-  filter(clim_exp_merged, measure == "Per capita (US dollars PPP real)" | measure == "Amount (US dollars PPP real)")


ggplot(clim_exp_merged_filtered) +
 aes(x = sector, y = value, colour = indicator) +
 geom_col(fill = "#112446") +
    scale_y_continuous(labels = comma)+
 scale_color_hue(direction = 1) +
 labs(title = "OECD Country Climate Spending by Level of Government", 
 caption = "Data Source: OECD Subnational Government Climate Finance Database") +
 theme_minimal() +
 theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
 theme(legend.position = "right") +
facet_wrap(vars(measure), scales = "free_y", labeller = labeller(measure = label_wrap_gen(20)))

##not sure why I'm getting the black lines?

#Look at top countries with local government spending

  clim_exp_top10_pc <- clim_exp_merged %>% 
  group_by(country_name) %>%
  filter(measure == "Per capita (US dollars PPP real)", year >= 2015) %>%
  summarize(mean_percap_exp = mean(value)) %>%
  arrange(desc(mean_percap_exp)) %>%
  slice (1:10)

clim_exp_top10_pc %>%
  ggplot(aes(fct_reorder(country_name, mean_percap_exp),mean_percap_exp)) +         
  geom_col() + 
  coord_flip() +
  scale_y_continuous(labels = comma)+
  scale_x_discrete (guide = guide_axis(n.dodge=1.75))+
  labs(
    x = "",
    y = "Per capita - USD PPP real",
    title = "Top 10 OECD Countries With Local Government Climate Spending Since 2015",
    subtitle = "Per Capita Spending",
    caption = "Data source: OECD Subnational Government Climate Finance Database")+
  theme_minimal()

 clim_exp_top10_overall <- clim_exp_merged %>% 
  group_by(country_name) %>%
  filter(measure == "Amount (US dollars PPP real)", year >= 2015, country_name != "All (weighted average)", country_name != "OECD weighted average", country_name != "European Union (weighted average)") %>%
  summarize(mean_exp = mean(value)) %>%
  arrange(desc(mean_exp)) %>%
  slice (1:10)

clim_exp_top10_overall %>%
  ggplot(aes(fct_reorder(country_name, mean_exp),mean_exp)) +         
  geom_col() + 
  coord_flip() +
  scale_y_continuous(labels = comma)+
  scale_x_discrete (guide = guide_axis(n.dodge=1.75))+
  labs(
    x = "",
    y = "Climate Expenditures - Real USD - PPP",
    title = "Top 10 OECD Countries With Local Government Climate Spending Since 2015",
    subtitle = "Overall Spending",
    caption = "Data source: OECD Subnational Government Climate Finance Database")+
  theme_minimal()

#Questions I’m thinking about ##Thinking about how to bring in the urbanization data - compare urbanization levels for loccal level spending since 2015 for top spenders?

##Is it informative to compare to population? National emission data?

Exploratory Data Analysis

Arin Kerstein

11/10/2022