library(readr)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
The dataset describes infrastructure investment needs by sector and world regions.
infrastructure <- read_csv("Infrastructure_spending_needs.csv")
## Rows: 189 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): wbregion, scenario, capital vs maintenance, sector, goals, policies...
## dbl (2): Billion USD, Percent of GDP
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(infrastructure)
## [1] 189 8
# Check for NAs
sum(is.na(infrastructure))
## [1] 0
# Rename columns
names(infrastructure)
## [1] "wbregion" "scenario" "capital vs maintenance"
## [4] "sector" "goals" "policies and tech"
## [7] "Billion USD" "Percent of GDP"
infrastructure <- rename(infrastructure, type = "capital vs maintenance", policies_tech = "policies and tech", billionUSD = "Billion USD", percentGDP = "Percent of GDP")
# View dataset
head(infrastructure)
## # A tibble: 6 x 8
## wbregion scenario type sector goals policies_tech billionUSD percentGDP
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 East Asi~ High spe~ Capi~ Energy Univer~ No investment ~ 557. 3.63
## 2 East Asi~ High spe~ Capi~ Flood~ Invest~ Let constructi~ 63.4 0.399
## 3 East Asi~ High spe~ Capi~ Irrig~ End hu~ Subsidize both~ 35.3 0.23
## 4 East Asi~ High spe~ Capi~ Trans~ Follow~ Let cities spr~ 337. 2.25
## 5 East Asi~ High spe~ Capi~ Water~ Univer~ Sewerage syste~ 56.6 0.37
## 6 East Asi~ High spe~ Main~ Energy Univer~ No investment ~ 112. 0.73
infrastructure %>% count(sector)
## # A tibble: 5 x 2
## sector n
## <chr> <int>
## 1 Energy 42
## 2 Flood protection 42
## 3 Irrigation 21
## 4 Transport 42
## 5 Water supply and sanitation 42
sector_needs <- infrastructure %>%
group_by(sector) %>%
summarise(sum(billionUSD))
names(sector_needs) <- c("sector","billionUSD")
sector_needs <- arrange(sector_needs, -billionUSD)
sector_needs
## # A tibble: 5 x 2
## sector billionUSD
## <chr> <dbl>
## 1 Transport 6692.
## 2 Energy 5286.
## 3 Water supply and sanitation 1422.
## 4 Flood protection 1062.
## 5 Irrigation 387.
sector_plot <- ggplot(data = sector_needs, aes(x = reorder(sector, -billionUSD), y = billionUSD)) + geom_bar(stat = "identity", width = 0.75, fill = "#4cbea3") + labs(title = "Infastructure investment needs by Sector", x = "Sector", y = "$ billion USD") + scale_y_continuous(labels = scales::comma) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
sector_plot