Overview

I’m going to visualise some charts for oil production and reserves for certain countries. The data used in the coming charts is mainly extracted from BP Statistical Review of World Energy 2018. When it comes to BP Review, oil production refers to a relateively broad category including crude oil, shale oil, oil sand and natrual gas liquids (NGLs). The aim of this report is to show some of the most popular charts used in the field and some of their customizations.

Packages used in data cleaning and visualisation

library(ggplot2)
library(ggthemes)
library(dplyr)
library(scales)
library(ggthemes)
library(readxl)
library(tidyverse)
library(plotly)

Bar Chart for OPEC countries with the biggest oil reserves

oil_reserves <- read_excel("C:/Users/Rami/Desktop/reserves.xlsx")
head(oil_reserves)
## # A tibble: 5 x 3
##   Country      Reserves   Cnt
##   <chr>           <dbl> <dbl>
## 1 Venezuela        303.  31.1
## 2 Saudi Arabia     266.  27.4
## 3 Iran             156.  16.0
## 4 Iraq             147.  15.1
## 5 Kuwait           102.  10.4

As we want the chart to show the production levels in ascending levels, we should manipultae the data a bit.

oil_reserves$Country <- factor(oil_reserves$Country, levels = rev(as.character(oil_reserves$Country)))

ggplot(oil_reserves, aes(x= reorder(Country, Reserves), y= Reserves, fill = Country)) +
  theme_minimal() + 
  geom_bar(stat = "identity", width = 0.8) + 
  scale_y_continuous(limits = c(0, 320), breaks = c(0, 40, 80, 120, 160, 200, 240, 280, 320), labels = scales::comma) +
  scale_fill_manual(values= c("#000B29", "#D70026", "azure4", "#EDB83D", "coral")) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.title.y = element_blank()) +
  theme(axis.text.x = element_text(size = 12, face = "bold", colour = "black")) +
  ylab("Proven Crude Oil Reserves") + 
  theme(panel.grid.major.x = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) + 
  theme(legend.position = "none") + 
  labs(title = "Five OPEC Countries with the Biggest Oil Reserves", subtitle = " Billion Barrels", caption = "Source: OPEC") +
  theme(title = element_text(size = 14))

Pie Chart for the percentage of the reserves

oil_reserves$Country <- factor(oil_reserves$Country, levels = rev(as.character(oil_reserves$Country)))

ggplot(oil_reserves, aes(x = "", y = Cnt, fill = Country)) + 
  coord_polar("y") + theme_classic() + 
  geom_bar(width = 2.5, size = 2, colour = "white", stat = "identity") +
  theme(axis.line.x = element_blank()) + 
  theme(axis.line.y = element_blank()) + 
  theme(axis.title.x = element_blank()) + 
  labs(x = NULL, Y = NULL) +
  theme(axis.text = element_blank()) + 
  scale_fill_manual(values = c("#000B29", "#D70026", "azure4", "#EDB83D", "coral")) + 
  guides(fill = guide_legend(reverse = TRUE)) + 
  geom_text(aes(label = paste0(round(Cnt), "%")), position = position_stack(vjust = 0.5), color = "white") + 
  labs(title = "Five Opec Countries with the Biggest Oil Reserves", caption = "Source = OPEC") +
  theme(title = element_text(size = 14)) + 
  theme(legend.text = element_text(size = 14)) + 
  theme(legend.title = element_blank()) + 
  theme(legend.position = "bottom") + 
  theme(plot.title = element_text(hjust = 0.5))

Importing and Cleaning BP Data for Oil Production

bp <- read_excel("C:/Users/Rami/Desktop/BPStats.xlsx", sheet = 6, skip = 2)
glimpse(bp)
## Observations: 86
## Variables: 57
## $ `Thousand barrels daily` <chr> NA, "US", "Canada", "Mexico", "Total ...
## $ `1965`                   <chr> NA, "9014.148000000001", "920", "362"...
## $ `1966`                   <chr> NA, "9579.241", "1012", "370", "10961...
## $ `1967`                   <chr> NA, "10219.252", "1106", "411", "1173...
## $ `1968`                   <chr> NA, "10599.743", "1194", "439", "1223...
## $ `1969`                   <chr> NA, "10827.674000000001", "1306", "46...
## $ `1970`                   <chr> NA, "11296.849", "1473", "487", "1325...
## $ `1971`                   <chr> NA, "11155.778", "1582", "486", "1322...
## $ `1972`                   <chr> NA, "11184.896000000001", "1829", "50...
## $ `1973`                   <chr> NA, "10945.953", "2114", "525", "1358...
## $ `1974`                   <chr> NA, "10461.205", "1993", "653", "1310...
## $ `1975`                   <chr> NA, "10007.736999999999", "1735", "80...
## $ `1976`                   <chr> NA, "9735.6389999999992", "1598", "89...
## $ `1977`                   <chr> NA, "9862.5619999999999", "1608", "10...
## $ `1978`                   <chr> NA, "10274.441000000001", "1597", "13...
## $ `1979`                   <chr> NA, "10135.534", "1835", "1607", "135...
## $ `1980`                   <chr> NA, "10169.626", "1764", "2129", "140...
## $ `1981`                   <chr> NA, "10180.573", "1610", "2553", "143...
## $ `1982`                   <chr> NA, "10198.534", "1590", "3001", "147...
## $ `1983`                   <chr> NA, "10246.668", "1661", "2930", "148...
## $ `1984`                   <chr> NA, "10508.950999999999", "1775", "29...
## $ `1985`                   <chr> NA, "10580.378000000001", "1812.79521...
## $ `1986`                   <chr> NA, "10231.142", "1805.1174616419273"...
## $ `1987`                   <chr> NA, "9943.9779999999992", "1908.76994...
## $ `1988`                   <chr> NA, "9764.6890000000003", "2002.41851...
## $ `1989`                   <chr> NA, "9159.0770000000011", "1961.45622...
## $ `1990`                   <chr> NA, "8914.3070000000007", "1967.73567...
## $ `1991`                   <chr> NA, "9075.5450000000001", "1983.50534...
## $ `1992`                   <chr> NA, "8868.125", "2065.5790598282069",...
## $ `1993`                   <chr> NA, "8582.6650000000009", "2189.01030...
## $ `1994`                   <chr> NA, "8388.5789999999997", "2281.33478...
## $ `1995`                   <chr> NA, "8321.6389999999992", "2402.12781...
## $ `1996`                   <chr> NA, "8294.527", "2479.9174236024764",...
## $ `1997`                   <chr> NA, "8268.5920000000006", "2587.59028...
## $ `1998`                   <chr> NA, "8010.8329999999996", "2672.38133...
## $ `1999`                   <chr> NA, "7731.4579999999996", "2604.39959...
## $ `2000`                   <chr> NA, "7731.6040000000003", "2703.40190...
## $ `2001`                   <chr> NA, "7669.4009999999998", "2727.95812...
## $ `2002`                   <chr> NA, "7625.0780000000004", "2858.16410...
## $ `2003`                   <chr> NA, "7367.3", "3003.4707919793873", "...
## $ `2004`                   <chr> NA, "7249.9949999999999", "3079.88144...
## $ `2005`                   <chr> NA, "6899.8220000000001", "3040.89602...
## $ `2006`                   <chr> NA, "6824.8639999999996", "3208.38420...
## $ `2007`                   <chr> NA, "6859.6859999999997", "3290.18966...
## $ `2008`                   <chr> NA, "6783.9579999999996", "3207.03529...
## $ `2009`                   <chr> NA, "7263.0339999999997", "3202.39768...
## $ `2010`                   <chr> NA, "7549.1880000000001", "3332.07980...
## $ `2011`                   <chr> NA, "7858.8490000000002", "3514.82206...
## $ `2012`                   <chr> NA, "8903.6180000000004", "3740.23767...
## $ `2013`                   <chr> NA, "10071.217000000001", "4000.41096...
## $ `2014`                   <chr> NA, "11768.192999999999", "4270.52990...
## $ `2015`                   <chr> NA, "12750.268", "4389.1402241510259"...
## $ `2016`                   <chr> NA, "12365.806", "4470.176332139712",...
## $ `2017`                   <chr> NA, "13056.993", "4830.6284506991078"...
## $ `2017__1`                <chr> NA, "5.5895022127955052E-2", "8.06348...
## $ `2006-16`                <chr> NA, "6.1238122797292904E-2", "3.37221...
## $ `2017__2`                <chr> NA, "0.14093023232708649", "5.2139232...

Change the column name of “Thousand Barrels”

colnames(bp)[1] <- "Country"

Removing the last three columns

bp_clean1 <- bp[1:(length(bp)-3)]

Removing NA values

bp_clean2 <- na.omit(bp_clean1)

Rearraning the data

bp_clean3 <- gather(bp_clean2, Year, Production, -Country, na.rm = TRUE)

Replacing n/a values with zeros

bp_clean3$Production <- as.integer(bp_clean3$Production)

bp_clean3$Production[is.na(bp_clean3$Production)] <- 0

Removing redundant rows from the dataset

remove.list <- paste(c("Other Africa", "Total World", "European Union #", "Other Asia Pacific", "Total Asia Pacific", "of which: OECD",
                 "Total Europe", "Total Africa", "OPEC", "Non-OPEC", "Other Europe", "Total North America", "Total CIS", 
                 "Non-OECD", "OECD", "Total Middle East", "Other Middle East", "Other CIS", "Other S. & Cent. America", 
                 "Total S. & Cent. America"))

bp_clean4 <- subset(bp_clean3, ! Country %in% remove.list)

Look at tidy data

head(bp_clean4)
## # A tibble: 6 x 3
##   Country   Year  Production
##   <chr>     <chr>      <dbl>
## 1 US        1965        9014
## 2 Canada    1965         920
## 3 Mexico    1965         362
## 4 Argentina 1965         275
## 5 Brazil    1965          96
## 6 Colombia  1965         203

Line Chart

Opec_five_prodcution <- bp_clean4 %>%
  filter(Country %in% c("Venezuela", "Saudi Arabia", "Iran", "Iraq", "Kuwait")) %>%
  filter(Year %in% c(1997:2017)) %>%
  group_by(Country, Production) %>%
  arrange((desc(Production))) 

head(Opec_five_prodcution)
## # A tibble: 6 x 3
## # Groups:   Country, Production [6]
##   Country      Year  Production
##   <chr>        <chr>      <dbl>
## 1 Saudi Arabia 2016       12401
## 2 Saudi Arabia 2015       11994
## 3 Saudi Arabia 2017       11950
## 4 Saudi Arabia 2012       11634
## 5 Saudi Arabia 2014       11504
## 6 Saudi Arabia 2013       11393
Opec_five_prodcution$Country <- factor(Opec_five_prodcution$Country, levels = c("Saudi Arabia", "Iran", "Iraq", "Venezuela", "Kuwait"))


plot_line <- ggplot(Opec_five_prodcution, aes(x = Year, y = Production, fill = Country)) +
  geom_line(size = 2, aes(group = Country, colour = Country)) + 
  scale_color_manual(values = c("#000B29", "#D70026", "azure4", "#EDB83D", "coral")) +
  theme_minimal() + 
  scale_y_continuous(limits = c(1000, 13000), breaks = c(1000, 4000, 7000, 10000, 13000), labels = scales::comma) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(size = 12, colour = "black")) + 
  theme(axis.title.y = element_blank()) + 
  theme(axis.text.y = element_text(size = 10, colour = "black")) + 
  theme(legend.title = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) +
  theme(legend.position = "bottom") + 
  theme(legend.key.size = unit(2, "lines")) +
  theme(legend.text = element_text(colour = "black", size = 12, face = "bold")) + 
  labs(title = "Oil Production of Five of OPEC Countries", subtitle = "Thousand Barrels Daily",
       caption = "Source: BP Statistical Review") +
  theme(title = element_text(size = 14))

plot_line

Area Chart

Opec_five_prodcution$Country <- factor(Opec_five_prodcution$Country, levels = c("Saudi Arabia", "Iran", "Iraq", "Venezuela", "Kuwait"))



ggplot(data = Opec_five_prodcution, aes(x=Year, y=Production, fill = Country, group = Country)) + 
  geom_area(aes(fill = Country), stat= "identity") + 
  scale_fill_manual(values = c("#000B29", "#D70026", "azure4", "#EDB83D", "coral")) + 
  theme_minimal() +
  scale_y_continuous(limits = c(0, 30000), breaks = c(0, 6000, 12000, 18000, 24000, 30000), labels = scales::comma) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(size = 12, colour = "black")) + 
  theme(axis.title.y = element_blank()) + 
  theme(axis.text.y = element_text(size = 10, colour = "black")) + 
  theme(legend.title = element_blank()) + 
  theme(panel.grid.major.x = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) +
  theme(legend.position = "bottom") + 
  theme(legend.key.width = unit(3, "lines")) +
  theme(legend.key.height = unit(0.4, "lines")) + 
  theme(legend.text = element_text(colour = "black", size = 12, face = "bold")) + 
  labs(title = "Oil Production of Five of OPEC Countries", subtitle = "Thousand Barrels Daily",
       caption = "Source: BP Statistical Review") +
  theme(title = element_text(size = 14))

Bar chart with labeled ends

bp_clean4 <- as.data.frame(bp_clean4)

bp_clean4$Production <- as.numeric(bp_clean4$Production)


top_ten <- bp_clean4 %>%
  filter(Year == "2017") %>%
  arrange(desc(Production)) %>%
  slice(1:15)


highest <- c("two", rep("one", 14))
ggplot(top_ten, aes(x = reorder(Country, Production), y = Production)) + 
  geom_bar(stat = "identity", aes(fill = highest)) + 
  scale_fill_manual(values = c("#813753","#54203F")) + coord_flip() +
  geom_text(aes(y= Production, x = seq(15,1), label = paste0(round(Production),"")), nudge_y = 500, color="black", size = 5) + 
  theme_minimal() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 12, colour = "black"),
        legend.position = "None",
        panel.background = element_blank(),
        panel.grid = element_blank()) + 
  labs(title = "Top Ten Oil Producing Countries in the World in 2017", subtitle = "Thousand Barrels Daily", 
      caption = "Source: BP Statistical Review") + 
  theme(title = element_text(size = 12))

Stacked bars chart for the top three producers in the last five years

top_three <- bp_clean4 %>%
  filter(Year %in% c(2012:2017) & Country %in% c("US", "Russian Federation", "Saudi Arabia")) %>%
  arrange(desc(Production))

ggplot(top_three, aes(x= Year, y = Production, fill = Country)) +
  geom_bar(stat = "identity") + 
  theme_minimal() +
  scale_fill_manual(values = c("#003B46", "#07575B", "#66A5AD")) +
  scale_y_continuous(limits = c(0,40000), breaks = c(0, 10000, 20000, 30000, 40000), labels = scales::comma) + 
  theme(axis.title.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 12, colour = "black"),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 12, colour = "black"),
        legend.position = "bottom", 
        legend.key.height = unit(0.8, "lines"), 
        legend.key.width = unit(3, "lines"),
        legend.text = element_text(size = 12, colour = "black", face = "bold")) + 
  labs(title= "Top Three Oil Producing Countries in the World (2012-2017)", subtitle = "Thousand Barrels Daily", 
       caption = "Source: BP Statistical Review") +
  theme(title = element_text(size = 15))

Clustered bars chart for the top three producers in the last five years

top_five <- bp_clean4 %>%
  filter(Year %in% c(2012:2017) & Country %in% c("US", "Russian Federation", "Saudi Arabia", "Iran", "Canada")) %>%
  arrange(desc(Production))



ggplot(top_five, aes(x= Year, y = Production, fill = Country)) +
  geom_bar(stat = "identity", position = "dodge") + 
  theme_minimal() +
  scale_fill_manual(values = c("#375E97", "#FB6542", "#FFBB00", "#3F681C", "#98DBC6")) +
  scale_y_continuous(limits = c(0,20000), breaks = c(0, 4000, 8000, 12000, 16000, 20000), labels = scales::comma) + 
  theme(axis.title.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 12, colour = "black"),
        axis.text.x = element_text(size = 12, colour = "black"),
        axis.title.y = element_blank(), 
        legend.position = "bottom", 
        legend.key.height = unit(0.8, "lines"), 
        legend.key.width = unit(0.8, "lines"),
        legend.text = element_text(size = 12, colour = "black", face = "bold")) + 
  labs(title= "Top Five Oil Producing Countries (2012-2017)", subtitle = "Thousand Barrels Daily", 
       caption = "Source: BP Statistical Review") +
  theme(title = element_text(size = 14)) 

Bar chart with line

us_production <- bp_clean4 %>%
  filter(Year %in% c(1997:2017) & Country == "US") %>%
  arrange(Year)


ggplot(us_production, aes(x= Year, y = Production)) + 
  geom_bar(size = 1.5, width = 0.5, fill = "gold", stat = "identity") + 
  geom_line(aes(group = 1), size = 1, color = "orangered3") + 
  geom_point(size = 7, shape = 21, fill = "orangered3") +
  theme_minimal() + 
  scale_y_continuous(limits = c(0,14000), breaks = c(0, 2000, 4000, 6000, 8000, 10000, 12000, 14000), labels = scales::comma) +
  theme(axis.title.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 12, colour = "black"),
        axis.text.x = element_text(size = 12, colour = "black"),
        axis.title.y = element_blank()) +
  labs(title= "US Oil Production (1997-2017)", subtitle = "Thousand Barrels Daily", 
       caption = "Source: BP Statistical Review") +
  theme(title = element_text(size = 14))