Visualizing FDI Pakistan

Salahuddin

17 August, 2020


Initial setup.

setwd("C:/Users/Salahuddin/Desktop/R/SBP")

library(tidyr)
library(dplyr)
library(data.table)
library(ggplot2)

Before importing the data lets specify the column types so we can correctly import our data.

col_classes <- c('character', 'numeric', 'numeric', 'numeric',
                 'numeric', 'numeric', 'numeric')

In our data we have missing values which are denoted by “-”. We need to specify that these represent missing values by using the na.strings argument. Now lets import the data.

fdi_country <- read.csv("foreign_investment_by_country.csv", 
                        na.strings = "-",
                        colClasses = col_classes)

Lets verify that we have correctly imported our data.

str(fdi_country)
## 'data.frame':    60 obs. of  7 variables:
##  $ Country     : chr  "Argentina" "Australia" "Austria" "Bahamas" ...
##  $ Inflow_FY20 : num  NA 0.1 3.8 NA 20.1 NA 1.9 NA NA 0.3 ...
##  $ Outflow_FY20: num  NA NA NA NA NA NA 0 NA NA 0 ...
##  $ Net_FY20    : num  NA 0.1 3.8 NA 20.1 NA 1.9 NA NA 0.3 ...
##  $ Inflow_FY19 : num  NA 0.5 7.6 22.9 24.4 NA 4.9 NA 4.4 0.6 ...
##  $ Outflow_FY19: num  NA 24.6 NA NA 2.6 NA 2 NA NA 0.1 ...
##  $ Net_FY19    : num  NA -24.1 7.6 22.9 21.9 NA 2.9 NA 4.4 0.5 ...
tail(fdi_country)
##            Country Inflow_FY20 Outflow_FY20 Net_FY20 Inflow_FY19 Outflow_FY19
## 55  United Kingdom       179.9         62.6    117.3       225.0         40.0
## 56   United States       106.9          9.7     97.2       127.5         39.4
## 57          Others       221.2        149.5     71.7       137.6        124.4
## 58 Foreign Private      3285.8        724.6   2561.2      2785.2       1422.8
## 59  Foreign Public          NA           NA       NA          NA           NA
## 60           Total      3285.8        724.6   2561.2      2785.2       1422.8
##    Net_FY19
## 55    185.0
## 56     88.1
## 57     13.2
## 58   1362.4
## 59       NA
## 60   1362.4

We don’t need the last three rows since they represent totals. Lets drop those rows.

# drop the last three rows
fdi_country <- fdi_country[-c(58, 59, 60), ]

In the two columns that we are interested, Net_FY20 and Net_FY19, we have missing values. Lets remove them.

# remove rows with missing data from Net_FY20 and Net_FY19
fdi_country <- fdi_country %>%
  drop_na(Net_FY20, Net_FY19)

We want to work with a few countries since most countries have insignificant net investment values. Lets filter our data.

# filter the data to see which countries have low net investment values
fdi_country %>%
  arrange(Net_FY20) %>%
  head()
##       Country Inflow_FY20 Outflow_FY20 Net_FY20 Inflow_FY19 Outflow_FY19
## 1       U.A.E       137.6        181.8    -44.2       160.2         56.4
## 2     Finland         4.0          9.9     -6.0         0.6          3.1
## 3      France         2.9          4.1     -1.2        12.4           NA
## 4 Philippines          NA          0.2     -0.2         2.8          0.3
## 5   Indonesia          NA          0.1     -0.1         0.1           NA
## 6        Iran          NA          0.1     -0.1         0.3          0.4
##   Net_FY19
## 1    103.7
## 2     -2.5
## 3     12.4
## 4      2.6
## 5      0.1
## 6      0.0
# filter countries with significant net investment values
fdi_country <- fdi_country %>%
  filter(Net_FY20 >= 80 | Net_FY20 <= -10)

We are only interested in three columns. Lets drop the columns that we don’t need.

# drop columns from the filtered data

drops <- c("Inflow_FY20", "Outflow_FY20", 
                                 "Inflow_FY19", "Outflow_FY19")

# drop columns specified above
fdi_country <- fdi_country[ , !(names(fdi_country) %in% drops)]

head(fdi_country)
##       Country Net_FY20 Net_FY19
## 1       China    844.1    130.8
## 2    Hongkong    190.7    171.0
## 3       Malta    222.2   -139.9
## 4 Netherlands    133.2     69.0
## 5      Norway    401.9    115.8
## 6       U.A.E    -44.2    103.7

Since we’d be creating a side by side bar chart, we need to reshape our data. We will create two new columns, FY and Net_FDI.

# melt the data and re-stack the columns
fdi_country <- melt(fdi_country, measure.vars = c("Net_FY20", "Net_FY19"), 
     variable.name = "FY", value.name = "Net_FDI")

head(fdi_country)
##       Country       FY Net_FDI
## 1       China Net_FY20   844.1
## 2    Hongkong Net_FY20   190.7
## 3       Malta Net_FY20   222.2
## 4 Netherlands Net_FY20   133.2
## 5      Norway Net_FY20   401.9
## 6       U.A.E Net_FY20   -44.2

We are now ready to start plotting our graph.

p1 <- ggplot(fdi_country, aes(x = Country, y = Net_FDI, 
                               fill = FY)) + 
  geom_bar(stat = "identity", width = .7, position = "dodge")

p1

Lets add a horizontal line at \(y = 0\).

p2 <- p1 + geom_hline(yintercept = 0, linetype = 1, color = "#006991", 
             size=1)

p2

R automatically assigns colors to the bars and also assigns legend labels. But we can manually specify the colors and the labels.

p3 <- p2 +   scale_fill_manual(labels = c("Net Investment FY20", 
                               "Net Investment FY19"), 
                               values = c("#006991","#0f9fd5"))

p3

We can see that the country names are overlapping. We can rename that countries to make them shorter but here I want the bars horizontally. Lets fix that by flipping the coordinates.

p4 <- p3 + coord_flip()

p4

At this point, lets add some labels to our graph.

p5 <- p4 + ggtitle ("Net Foreign Investment in Pakistan by Countries") + 
  labs(y = "Net FDI\n", subtitle = "Net Foreign Investment, in Million US $") + 
  labs(caption = "\nSource: State Bank of Pakistan   \n") 

p5

Now we can start customizing our graph. We can choose an in-built theme by passing the theme argument but I want to build my own theme. Lets see how it works by removing the axis ticks and the background color.

p6 <- p5 + theme(panel.grid.minor = element_blank(), 
                 panel.grid.major.y = element_blank(),
                 panel.background = element_blank(),
                 line = element_blank())

p6

I want to change the position of the y axis and expand the limits. I also want to add intervals of 100.

p7 <- p6 + expand_limits(y = c(-200, 1000)) +
  scale_y_discrete(expand = c(0, 8), position = "right",
                              breaks = c(-100, 0, 100, 200, 300, 400, 500, 600, 700, 800),
                              lim = c(-100, 0, 100, 200, 300, 400, 500, 600, 700, 800))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p7

Lets add the axis line and also add some background color.

p8 <- p7 + theme(axis.line.x = element_line(),
                 rect = element_rect(fill = "#d5e7e8"),
                 panel.background = element_rect(fill = "#d5e7e8"),
                 panel.grid.major.y = element_line(color = "white"))

p8

I want to change the position of the legend and move it to top left.

p9 <- p8 + theme(legend.title = element_blank(),
        legend.position = "top",
        legend.justification = c(0, 0),
        legend.direction = "horizontal")

p9

The graph title and sub-title can also be customized.

p10 <- p9 + theme(plot.title = element_text(colour = "black", size = 14,
                                  face = "bold"), 
        plot.subtitle = element_text(colour = "black", size = 12))

p10

Finally, we can add data labels,

# add labels
p11 <- p10 + geom_text(aes(y = max(Net_FDI) + 100, label = Net_FDI),
                    position = position_dodge(width = 1),
                    size = 3.2)
p11

We can position the labels on the bars but I wanted to keep them on the right.

That’s it!