Question: What sector has the highest infrastructure investment needs around the world?

Required libraries

library(readr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import, view, and clean dataset

The dataset describes infrastructure investment needs by sector and world regions.

infrastructure <- read_csv("Infrastructure_spending_needs.csv")
## Rows: 189 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): wbregion, scenario, capital vs maintenance, sector, goals, policies...
## dbl (2): Billion USD, Percent of GDP
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(infrastructure)
## [1] 189   8
# Check for NAs
sum(is.na(infrastructure))
## [1] 0
# Rename columns
names(infrastructure)
## [1] "wbregion"               "scenario"               "capital vs maintenance"
## [4] "sector"                 "goals"                  "policies and tech"     
## [7] "Billion USD"            "Percent of GDP"
infrastructure <- rename(infrastructure, type = "capital vs maintenance", policies_tech = "policies and tech", billionUSD = "Billion USD", percentGDP = "Percent of GDP")

# View dataset
head(infrastructure)
## # A tibble: 6 x 8
##   wbregion  scenario  type  sector goals   policies_tech   billionUSD percentGDP
##   <chr>     <chr>     <chr> <chr>  <chr>   <chr>                <dbl>      <dbl>
## 1 East Asi~ High spe~ Capi~ Energy Univer~ No investment ~      557.       3.63 
## 2 East Asi~ High spe~ Capi~ Flood~ Invest~ Let constructi~       63.4      0.399
## 3 East Asi~ High spe~ Capi~ Irrig~ End hu~ Subsidize both~       35.3      0.23 
## 4 East Asi~ High spe~ Capi~ Trans~ Follow~ Let cities spr~      337.       2.25 
## 5 East Asi~ High spe~ Capi~ Water~ Univer~ Sewerage syste~       56.6      0.37 
## 6 East Asi~ High spe~ Main~ Energy Univer~ No investment ~      112.       0.73

Count of infrastructure investment needs by sector

infrastructure %>% count(sector)
## # A tibble: 5 x 2
##   sector                          n
##   <chr>                       <int>
## 1 Energy                         42
## 2 Flood protection               42
## 3 Irrigation                     21
## 4 Transport                      42
## 5 Water supply and sanitation    42

Infrastructure investment needs by sector (in $ billion USD)

sector_needs <- infrastructure %>%
  group_by(sector) %>%
  summarise(sum(billionUSD))

names(sector_needs) <- c("sector","billionUSD")

sector_needs <- arrange(sector_needs, -billionUSD)
sector_needs
## # A tibble: 5 x 2
##   sector                      billionUSD
##   <chr>                            <dbl>
## 1 Transport                        6692.
## 2 Energy                           5286.
## 3 Water supply and sanitation      1422.
## 4 Flood protection                 1062.
## 5 Irrigation                        387.

Bar graph

sector_plot <- ggplot(data = sector_needs, aes(x = reorder(sector, -billionUSD), y = billionUSD)) + geom_bar(stat = "identity", width = 0.75, fill = "#4cbea3") + labs(title = "Infastructure investment needs by Sector", x = "Sector", y = "$ billion USD") + scale_y_continuous(labels = scales::comma) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
sector_plot