library(dplyr)
library(lubridate)
library(ggplot2)
library(plotly)
library(glue)
library(tidyr)
library(tidytext)
library(scales)
library(hrbrthemes)
For LBB (learning by building) - “Programming for Data Science” assignment , i’m using Export & Import data which classified by SITC (Standard International Trade Classification) with period ranging from the year 2011 up to 2020. Data was taken from Indonesia Central Bureau of Statistics website, export & import section.
Import data and assign it into object named “exim”. Data format is in CSV, but all information are stored in separate columns, thus i use additional import function (“sep=”;“), otherwise data will be imported to RMD and combined into single column.
<- read.csv("exim-menurut-golongan.csv", sep=";") exim
#checking data type
str(exim)
## 'data.frame': 200 obs. of 6 variables:
## $ Exim : chr "Export" "Export" "Export" "Export" ...
## $ nama_item_vertical_variabel: chr "0. Bahan makanan dan binatang hidup" "1. Minuman dan tembakau" "2. Bahan-bahan mentah, tidak untuk dimakan" "3. Bahan bakar pelikan, bahan penyemir dan bahan-bahan yang berkenaan dengan itu" ...
## $ nama_tahun : int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ nama_turunan_tahun : chr "Tahun" "Tahun" "Tahun" "Tahun" ...
## $ nilai : num 14565 1244 14034 25502 19710 ...
## $ volume : num 12862 324 48568 436554 28776 ...
#checking missing value
colSums(is.na(exim))
## Exim nama_item_vertical_variabel
## 0 0
## nama_tahun nama_turunan_tahun
## 0 0
## nilai volume
## 0 0
Based on data checking above, there are several data type “character” that need to be converted into “factor”.
<- exim %>%
exim mutate_if(is.character, as.factor) %>%
mutate(nama_tahun = as.factor(nama_tahun))
nama_item_vertical_variabel
need to be clean and modify to be more intuitive.
#To shorten information in column "nama_item_vertical_variabel"
$nama_item_vertical_variabel <- sapply(as.character(exim$nama_item_vertical_variabel), switch,
exim"0. Bahan makanan dan binatang hidup" = "Food and lives animal",
"1. Minuman dan tembakau" = "Beverages and tobacco",
"2. Bahan-bahan mentah, tidak untuk dimakan" = "Crude materials, inedible, except fuels",
"3. Bahan bakar pelikan, bahan penyemir dan bahan-bahan yang berkenaan dengan itu" = "Minerals fuels, lubricants and related materials",
"4. Lemak serta minyak hewan dan nabati" = "Animal and vegetable oil, fats and waxes",
"5. Bahan-bahan kimia" = "Chemical and related products",
"6. Barang-barang buatan pabrik dirinci menurut bahan" = "Manufactured goods classified by material",
"7. Mesin dan alat pengangkutan" = "Machinery and transport equipment",
"8. Berbagai jenis barang buatan pabrik" = "Miscellaneous manufactured articles",
"9. Barang-barang transaksi tidak dirinci" = "Commodities and transaction not classified")
$nama_item_vertical_variabel <- as.factor(exim$nama_item_vertical_variabel) exim
unique(exim$nama_turunan_tahun) #To check unique value in `nama_turunan_tahun`
## [1] Tahun
## Levels: Tahun
Noted nama_tunuran_tahun
column only contain single information “Tahun”, thus we will delete it from DF.
<- exim[-c(4)] #To delete "nama_turunan_tahun" column, since it contain unnecessary information. exim
#To change columns name to be more understandable / intuitive
names(exim)[1] <- "Category"
names(exim)[2] <- "Classification_SITC"
names(exim)[3] <- "Years"
names(exim)[4] <- "Value"
names(exim)[5] <- "Volume"
#To re-check data type, column name and "classification_SITC" information.
glimpse(exim)
## Rows: 200
## Columns: 5
## $ Category <fct> Export, Export, Export, Export, Export, Export, Ex~
## $ Classification_SITC <fct> "Food and lives animal", "Beverages and tobacco", ~
## $ Years <fct> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 20~
## $ Value <dbl> 14565.1, 1243.6, 14034.4, 25501.8, 19709.5, 12659.~
## $ Volume <dbl> 12861.5, 324.1, 48568.0, 436554.2, 28775.7, 16692.~
Category
: Categorize by Export / Import.
Classification_SITC
: information of Export / Import classified based on SITC.
Years
: Categorize by Years.
Value
: Export / Import value in $ million.
Volume
: Export / Import volume in kilotonne.
The Standard international trade classification, abbreviated as SITC, is a product classification of the United Nations (UN) used for external trade statistics (export and Import values and volumes of goods), allowing for international comparisons of commodities and manufactured goods.
The groupings of SITC reflect:
-the production materials
-the processing stage
-market practices and uses of the products
-the importance of the goods in world trade
-technological changes
<- exim %>%
exim_val group_by(Years ,Category) %>%
summarise(total_value = sum(Value)) %>%
mutate(label = glue("Value = {comma(round(total_value,0))}"))
<- ggplot(exim_val, aes(fill = Category, x = Years, y = total_value, text = label))+
exim_val_plot geom_bar(position = "dodge", stat = "identity")+
scale_fill_manual(values=c("lightseagreen", "navyblue"))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Indonesia Export & Import (Value)")+
labs(x = "Years",
y = "Value in $ Million", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "royalblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(exim_val_plot, tooltip = "label")
<- exim %>%
exim_vol group_by(Years ,Category) %>%
summarise(total_vol = sum(Volume)) %>%
mutate(label = glue("Value = {comma(round(total_vol,0))}"))
<- ggplot(data = exim_vol, aes(fill = Category, x = Years, y = total_vol, text = label))+
exim_vol_plot geom_bar(position = "dodge", stat = "identity")+
scale_fill_manual(values=c("burlywood1", "darkorange"))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Indonesia Export & Import (Volume)")+
labs(x = "Years",
y = "Volume in Killo Tonne", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "royalblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(exim_vol_plot, tooltip = "label")
Interpretations:
Value
tend to decrease from its peak in 2011 to 2016, this is due to government regulation in mining industry related to mineral ore export policy. In 2017, mining export policy is unveiled, whereas under certain condition export of mineral ore is re-allowed. SourceValue
also tend to decrease from its peak in 2012 to 2016, main reason due to decrease of production from import country of origin such as US and China. Source<- exim %>%
exim_scat_ex filter(Category == "Export") %>%
group_by(Category, Years) %>%
summarise(val = sum(Value), vol = sum(Volume)) %>%
mutate(label = glue("Year = {Years}
Value = {comma(round(val,2))}
Volume = {comma(round(vol,2))}"))
<- ggplot(data = exim_scat_ex, aes(x = val, y = vol, colour = Years, text = label)) +
exim_scat_imp_plot geom_point(size = 6)+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Export Value & Volume Correlation")+
labs(x = "Value in $ Million",
y = "Volume in Kilotonne ")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(exim_scat_imp_plot, tooltip = "label")
<- exim %>%
exim_scat_im filter(Category == "Import") %>%
group_by(Category, Years) %>%
summarise(val = sum(Value), vol = sum(Volume)) %>%
mutate(label = glue("Year = {Years}
Value = {comma(round(val,2))}
Volume = {comma(round(vol,2))}"))
<- ggplot(data = exim_scat_im, aes(x = val, y = vol, color = Years, text = label)) +
exim_scat_im_plot geom_point(size = 6)+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Import Value & Volume Correlation")+
labs(x = "Value in $ Million",
y = "Volume in Kilotonne ")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(exim_scat_im_plot, tooltip = "label")
Interpretations:
Export
chart, year “2013” have the highest Volume
but not in Value
, this has contra condition with the year “2011”.Import
chart, where in the year “2018”, “2012” and “2013” Value
amount are close, but not in Volume
.Due to there is a large range gap between Export and Import Volume
, instead of using facet_wrap
, we decide to split the chart.
<- exim %>%
exim_stack mutate(label = glue("Category = {Classification_SITC}
Value = {comma(round(Value,0))}"))
<- ggplot(data = exim_stack, aes(x = Years, y = Value, fill = Classification_SITC, text = label)) +
exim_stack_plot geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Export - Import by SITC (Value)")+
labs(x = "Year",
y = "Value in $ Million", fill = "")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))+
facet_wrap(~ Category)
ggplotly(exim_stack_plot, tooltip = "label")
<- exim %>%
exim_stack_ex filter(Category == "Export") %>%
group_by(Category, Years, Classification_SITC) %>%
summarise(val_ex = sum(Volume)) %>%
mutate(label = glue("Volume = {comma(round(val_ex,0))}"))
<- ggplot(data = exim_stack_ex, aes(x = Years, y = val_ex, fill = Classification_SITC, text = label)) +
exim_stack_ex_plot geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Export by SITC (Volume)")+
labs(x = "Year",
y = "Volume in Killotonne", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(exim_stack_ex_plot, tooltip = "label")
<- exim %>%
exim_stack_imp filter(Category == "Import") %>%
group_by(Category, Years, Classification_SITC) %>%
summarise(val_imp = sum(Volume)) %>%
mutate(label = glue("Volume = {comma(round(val_imp,0))}"))
<- ggplot(data = exim_stack_imp, aes(x = Years, y = val_imp, fill = Classification_SITC, text = label)) +
exim_stack_imp_plot geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Import by SITC (Volume)")+
labs(x = "Year",
y = "Volume in Killotonne", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(exim_stack_imp_plot, tooltip = "label")
Interpretations:
Value
and Volume
. As seen from chart above, if “Minerals fuels, lubricants and related materials” class decrease, it affecting the whole “Bar” in each Years
.Value
but low in Volume
, as machinery part and equipment is considered as an expensive goods.