Main steps
The first step is the installation of the required packages.
install.packages(c("glue", "tidyverse", "cbsodataR", "plotly"))
Now we can download the data, it usually takes a few seconds.
library(tidyverse)
library(glue)
library(cbsodataR)
data <- cbs_get_data("83140ENG") |>
as_tibble()
head(data)
## # A tibble: 6 × 62
## EnergyCommodities Periods TotalPrimaryEnergySupplyTP…¹ IndigenousProduction_2
## <chr> <chr> <dbl> <dbl>
## 1 "T001027 " 1946JJ00 NA NA
## 2 "T001027 " 1947JJ00 NA NA
## 3 "T001027 " 1948JJ00 NA NA
## 4 "T001027 " 1949JJ00 NA NA
## 5 "T001027 " 1950JJ00 NA NA
## 6 "T001027 " 1951JJ00 NA NA
## # ℹ abbreviated name: ¹TotalPrimaryEnergySupplyTPES_1
## # ℹ 58 more variables: Imports_3 <dbl>, Exports_4 <dbl>, NetImports_5 <dbl>,
## # Bunkers_6 <dbl>, StockChange_7 <dbl>, StatisticalDifferences_8 <dbl>,
## # TotalEnergyConsumption_9 <dbl>, TotalEnergyTransformationInput_10 <dbl>,
## # ElectricityAndCHPTransformationInput_11 <dbl>,
## # OtherTransformationInput_12 <dbl>,
## # TotalEnergyTransformationOutput_13 <dbl>, …
Unfortunately, the data is a bit messy and it requires some data wrangling.
Let’s start selecting a specific year.
year <- "2022"
sel <- data |>
filter(
Periods == glue("{year}JJ00")
)
head(sel)
## # A tibble: 6 × 62
## EnergyCommodities Periods TotalPrimaryEnergySupplyTP…¹ IndigenousProduction_2
## <chr> <chr> <dbl> <dbl>
## 1 "T001027 " 2022JJ00 2699. 1035.
## 2 "E006459 " 2022JJ00 232. NA
## 3 "E006460 " 2022JJ00 235. NA
## 4 "E006461 " 2022JJ00 234. NA
## 5 "E006462 " 2022JJ00 0.1 NA
## 6 "E006463 " 2022JJ00 113. NA
## # ℹ abbreviated name: ¹TotalPrimaryEnergySupplyTPES_1
## # ℹ 58 more variables: Imports_3 <dbl>, Exports_4 <dbl>, NetImports_5 <dbl>,
## # Bunkers_6 <dbl>, StockChange_7 <dbl>, StatisticalDifferences_8 <dbl>,
## # TotalEnergyConsumption_9 <dbl>, TotalEnergyTransformationInput_10 <dbl>,
## # ElectricityAndCHPTransformationInput_11 <dbl>,
## # OtherTransformationInput_12 <dbl>,
## # TotalEnergyTransformationOutput_13 <dbl>, …
Now we add a column (label) containing the name of the
energy commodities that are stored in the named column
EnergyCommodities. We use the attribute labels
to extract the name of the commodities (e.g., Natural gas
for E006560).
sel <- sel |>
mutate(
label = names(attr(sel$EnergyCommodities, 'labels'))
) |>
filter(
label %in% c(
"Total coal and coal products", "Total crude and petroleum products",
"Natural gas", "Renewable energy", "Electricity", "Heat",
"Total other energy commodities"
)
) |>
select(-EnergyCommodities, -Periods)
Before continuing it might be worth spending a couple of minutes on the plotly documentation for the Sankey diagrams. The page contains several examples showing how the data should be structured.
Now I will create a data frame (sdata) containing the
list of the links, each of them containing the source node, the target
node, a value and its label.
sdata <- list(
# FIRST STAGE (prod, import, transf output)
sel |>
select(label, value = IndigenousProduction_2) |>
mutate(
from = 'Production',
to = label
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = Imports_3) |>
mutate(
from = 'Import',
to = label
) |>
filter(
!is.na(value)
),
sel |>
filter(
TotalNetEnergyTransformation_16 < 0
) |>
mutate(
value = -1 * TotalNetEnergyTransformation_16,
from = "Transformation Output",
to = label
) |>
select(label, value, from, to),
# SECOND STAGE -----------------------------------------------
sel |>
select(label, value = Exports_4) |>
mutate(
from = label,
to = "Export"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = Bunkers_6) |>
mutate(
from = label,
to = "Bunker"
) |>
filter(
!is.na(value)
),
sel |>
filter(TotalNetEnergyTransformation_16 > 0) |>
select(label, value = TotalNetEnergyTransformation_16) |>
mutate(
from = label,
to = "Transformation"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = Total_19) |>
mutate(
from = label,
to = "Own use"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = DistributionLosses_26) |>
mutate(
from = label,
to = "Distribution losses"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = TotalFinalConsumption_27) |>
mutate(
from = label,
to = "Total Final Consumption"
) |>
filter(
!is.na(value)
),
# THIRD STAGE (Final consumption) ------------------------------------------
sel |>
select(label, value = Total_29) |>
mutate(
from = "Total Final Consumption",
to = "Industry"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = Total_43) |>
mutate(
from = "Total Final Consumption",
to = "Transport"
) |>
filter(
!is.na(value)
),
sel |>
select(label, value = Total_50) |>
mutate(
from = "Total Final Consumption",
to = "Other Sectors"
) |>
filter(
!is.na(value)
)
) |>
bind_rows()
head(sdata)
## # A tibble: 6 × 4
## label value from to
## <chr> <dbl> <chr> <chr>
## 1 Total crude and petroleum products 49.9 Production Total crude and petroleu…
## 2 Natural gas 539. Production Natural gas
## 3 Renewable energy 364. Production Renewable energy
## 4 Total other energy commodities 81.9 Production Total other energy commo…
## 5 Total coal and coal products 241. Import Total coal and coal prod…
## 6 Total crude and petroleum products 8118. Import Total crude and petroleu…
For the sake of simplicity I omitted a few fields of the energy balance that can be considered negligible (for example the stock changes and the statistical differences).
Now let’s load the plotly package and define a data
frame with the list of the nodes and their color and a list of the
colors for each energy carrier.
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
node_list <- tribble(
~name, ~color,
"Production", '#dddddd',
"Import", '#dddddd',
"Transformation Output", '#dddddd',
"Total coal and coal products", "#a6761d",
"Total crude and petroleum products","#7570b3" ,
"Natural gas", "#e7298a",
"Renewable energy", "#66a61e",
"Electricity", "#1b9e77",
"Total other energy commodities", "#d95f02",
"Heat", "#e6ab02",
"Total Final Consumption", '#dddddd',
"Export",'#dddddd',
"Bunker", '#dddddd',
"Transformation", '#dddddd',
"Own use", '#dddddd',
"Distribution losses" , '#dddddd',
"Industry", '#dddddd',
"Transport", '#dddddd',
"Other Sectors", '#dddddd'
)
color_map <- c(
"Total crude and petroleum products"= "#7570b390",
"Natural gas" = "#e7298a90",
"Renewable energy" = "#66a61e90",
"Total other energy commodities" = "#d95f0290",
"Total coal and coal products" = "#a6761d90",
"Electricity" = "#1b9e7790",
"Heat" = "#e6ab0290"
)
Here the most important step. A plotly visualisation is created using
the plot_ly function, specifing all the needed fields for a
Sankey diagram, including the nodes and the links. See the documentation
for details and more examples. For the links, sources and targets must
be integer (starting from zero).
fig <- plot_ly(
type = 'sankey',
orientation = 'h',
valueformat = ".0f",
valuesuffix = " PJ",
node = list(
label = node_list$name,
color = node_list$color,
pad = 15,
thickness = 20,
line = list(
color = "black",
width = 1
)
),
link = list(
source = match(sdata$from, node_list$name)-1,
target = match(sdata$to, node_list$name)-1,
value = sdata$value,
label = sdata$label,
color = color_map[sdata$label]
)
)
We add a layout to the visualisation, adding the background colour and a title.
fig <- fig |> layout(
title = list(text = glue('Energy balance for the Netherlands year {year}<br>Data from <a href="https://cbs.nl/en-gb/figures/detail/83140eng">CBS</a>'),
y = 0.95),
font = list(
size = 16,
color = 'black'
),
plot_bgcolor = '#DDDDDD',
paper_bgcolor = '#DDDDDD'
)
fig
The visualisation can be seen typing fig, otherwise I
have uploaded it to the Plotly Chart Studio here