library(dplyr)
library(lubridate)
library(ggplot2)
library(plotly)
library(glue)
library(tidyr)
library(tidytext)
library(scales)
library(hrbrthemes)For LBB (learning by building) - “Programming for Data Science” assignment , i’m using Export & Import data which classified by SITC (Standard International Trade Classification) with period ranging from the year 2011 up to 2020. Data was taken from Indonesia Central Bureau of Statistics website, export & import section.
Import data and assign it into object named “exim”. Data format is in CSV, but all information are stored in separate columns, thus i use additional import function (“sep=”;“), otherwise data will be imported to RMD and combined into single column.
exim <- read.csv("exim-menurut-golongan.csv", sep=";")#checking data type
str(exim)## 'data.frame': 200 obs. of 6 variables:
## $ Exim : chr "Export" "Export" "Export" "Export" ...
## $ nama_item_vertical_variabel: chr "0. Bahan makanan dan binatang hidup" "1. Minuman dan tembakau" "2. Bahan-bahan mentah, tidak untuk dimakan" "3. Bahan bakar pelikan, bahan penyemir dan bahan-bahan yang berkenaan dengan itu" ...
## $ nama_tahun : int 2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
## $ nama_turunan_tahun : chr "Tahun" "Tahun" "Tahun" "Tahun" ...
## $ nilai : num 14565 1244 14034 25502 19710 ...
## $ volume : num 12862 324 48568 436554 28776 ...
#checking missing value
colSums(is.na(exim))## Exim nama_item_vertical_variabel
## 0 0
## nama_tahun nama_turunan_tahun
## 0 0
## nilai volume
## 0 0
Based on data checking above, there are several data type “character” that need to be converted into “factor”.
exim <- exim %>%
mutate_if(is.character, as.factor) %>%
mutate(nama_tahun = as.factor(nama_tahun))nama_item_vertical_variabel need to be clean and modify to be more intuitive.
#To shorten information in column "nama_item_vertical_variabel"
exim$nama_item_vertical_variabel <- sapply(as.character(exim$nama_item_vertical_variabel), switch,
"0. Bahan makanan dan binatang hidup" = "Food and lives animal",
"1. Minuman dan tembakau" = "Beverages and tobacco",
"2. Bahan-bahan mentah, tidak untuk dimakan" = "Crude materials, inedible, except fuels",
"3. Bahan bakar pelikan, bahan penyemir dan bahan-bahan yang berkenaan dengan itu" = "Minerals fuels, lubricants and related materials",
"4. Lemak serta minyak hewan dan nabati" = "Animal and vegetable oil, fats and waxes",
"5. Bahan-bahan kimia" = "Chemical and related products",
"6. Barang-barang buatan pabrik dirinci menurut bahan" = "Manufactured goods classified by material",
"7. Mesin dan alat pengangkutan" = "Machinery and transport equipment",
"8. Berbagai jenis barang buatan pabrik" = "Miscellaneous manufactured articles",
"9. Barang-barang transaksi tidak dirinci" = "Commodities and transaction not classified")
exim$nama_item_vertical_variabel <- as.factor(exim$nama_item_vertical_variabel)unique(exim$nama_turunan_tahun) #To check unique value in `nama_turunan_tahun`## [1] Tahun
## Levels: Tahun
Noted nama_tunuran_tahun column only contain single information “Tahun”, thus we will delete it from DF.
exim <- exim[-c(4)] #To delete "nama_turunan_tahun" column, since it contain unnecessary information.#To change columns name to be more understandable / intuitive
names(exim)[1] <- "Category"
names(exim)[2] <- "Classification_SITC"
names(exim)[3] <- "Years"
names(exim)[4] <- "Value"
names(exim)[5] <- "Volume"#To re-check data type, column name and "classification_SITC" information.
glimpse(exim)## Rows: 200
## Columns: 5
## $ Category <fct> Export, Export, Export, Export, Export, Export, Ex~
## $ Classification_SITC <fct> "Food and lives animal", "Beverages and tobacco", ~
## $ Years <fct> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 20~
## $ Value <dbl> 14565.1, 1243.6, 14034.4, 25501.8, 19709.5, 12659.~
## $ Volume <dbl> 12861.5, 324.1, 48568.0, 436554.2, 28775.7, 16692.~
Category : Categorize by Export / Import.
Classification_SITC : information of Export / Import classified based on SITC.
Years : Categorize by Years.
Value : Export / Import value in $ million.
Volume : Export / Import volume in kilotonne.
The Standard international trade classification, abbreviated as SITC, is a product classification of the United Nations (UN) used for external trade statistics (export and Import values and volumes of goods), allowing for international comparisons of commodities and manufactured goods.
The groupings of SITC reflect:
-the production materials
-the processing stage
-market practices and uses of the products
-the importance of the goods in world trade
-technological changes
exim_val <- exim %>%
group_by(Years ,Category) %>%
summarise(total_value = sum(Value)) %>%
mutate(label = glue("Value = {comma(round(total_value,0))}"))
exim_val_plot <- ggplot(exim_val, aes(fill = Category, x = Years, y = total_value, text = label))+
geom_bar(position = "dodge", stat = "identity")+
scale_fill_manual(values=c("lightseagreen", "navyblue"))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Indonesia Export & Import (Value)")+
labs(x = "Years",
y = "Value in $ Million", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "royalblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(exim_val_plot, tooltip = "label")exim_vol <- exim %>%
group_by(Years ,Category) %>%
summarise(total_vol = sum(Volume)) %>%
mutate(label = glue("Value = {comma(round(total_vol,0))}"))
exim_vol_plot <- ggplot(data = exim_vol, aes(fill = Category, x = Years, y = total_vol, text = label))+
geom_bar(position = "dodge", stat = "identity")+
scale_fill_manual(values=c("burlywood1", "darkorange"))+
scale_y_continuous(labels = scales::comma)+
ggtitle("Indonesia Export & Import (Volume)")+
labs(x = "Years",
y = "Volume in Killo Tonne", fill = "")+
theme_minimal()+
theme(axis.text = element_text(colour = "royalblue"),
plot.title = element_text(hjust = 0.5))
ggplotly(exim_vol_plot, tooltip = "label") Interpretations:
Value tend to decrease from its peak in 2011 to 2016, this is due to government regulation in mining industry related to mineral ore export policy. In 2017, mining export policy is unveiled, whereas under certain condition export of mineral ore is re-allowed. SourceValue also tend to decrease from its peak in 2012 to 2016, main reason due to decrease of production from import country of origin such as US and China. Sourceexim_scat_ex <- exim %>%
filter(Category == "Export") %>%
group_by(Category, Years) %>%
summarise(val = sum(Value), vol = sum(Volume)) %>%
mutate(label = glue("Year = {Years}
Value = {comma(round(val,2))}
Volume = {comma(round(vol,2))}"))
exim_scat_imp_plot <- ggplot(data = exim_scat_ex, aes(x = val, y = vol, colour = Years, text = label)) +
geom_point(size = 6)+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Export Value & Volume Correlation")+
labs(x = "Value in $ Million",
y = "Volume in Kilotonne ")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(exim_scat_imp_plot, tooltip = "label")exim_scat_im <- exim %>%
filter(Category == "Import") %>%
group_by(Category, Years) %>%
summarise(val = sum(Value), vol = sum(Volume)) %>%
mutate(label = glue("Year = {Years}
Value = {comma(round(val,2))}
Volume = {comma(round(vol,2))}"))
exim_scat_im_plot <- ggplot(data = exim_scat_im, aes(x = val, y = vol, color = Years, text = label)) +
geom_point(size = 6)+
scale_y_continuous(labels = scales::comma)+
scale_x_continuous(labels = scales::comma)+
ggtitle("Import Value & Volume Correlation")+
labs(x = "Value in $ Million",
y = "Volume in Kilotonne ")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))
ggplotly(exim_scat_im_plot, tooltip = "label")Interpretations:
Export chart, year “2013” have the highest Volume but not in Value, this has contra condition with the year “2011”.Import chart, where in the year “2018”, “2012” and “2013” Value amount are close, but not in Volume.Due to there is a large range gap between Export and Import Volume, instead of using facet_wrap, we decide to split the chart.
exim_stack <- exim %>%
mutate(label = glue("Category = {Classification_SITC}
Value = {comma(round(Value,0))}"))
exim_stack_plot <- ggplot(data = exim_stack, aes(x = Years, y = Value, fill = Classification_SITC, text = label)) +
geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Export - Import by SITC (Value)")+
labs(x = "Year",
y = "Value in $ Million", fill = "")+
theme_minimal()+
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5))+
facet_wrap(~ Category)
ggplotly(exim_stack_plot, tooltip = "label")exim_stack_ex <- exim %>%
filter(Category == "Export") %>%
group_by(Category, Years, Classification_SITC) %>%
summarise(val_ex = sum(Volume)) %>%
mutate(label = glue("Volume = {comma(round(val_ex,0))}"))
exim_stack_ex_plot <- ggplot(data = exim_stack_ex, aes(x = Years, y = val_ex, fill = Classification_SITC, text = label)) +
geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Export by SITC (Volume)")+
labs(x = "Year",
y = "Volume in Killotonne", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(exim_stack_ex_plot, tooltip = "label")exim_stack_imp <- exim %>%
filter(Category == "Import") %>%
group_by(Category, Years, Classification_SITC) %>%
summarise(val_imp = sum(Volume)) %>%
mutate(label = glue("Volume = {comma(round(val_imp,0))}"))
exim_stack_imp_plot <- ggplot(data = exim_stack_imp, aes(x = Years, y = val_imp, fill = Classification_SITC, text = label)) +
geom_bar(position = "stack", stat = "identity")+
scale_y_continuous(labels = scales::comma)+
ggtitle("Import by SITC (Volume)")+
labs(x = "Year",
y = "Volume in Killotonne", fill = "")+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(exim_stack_imp_plot, tooltip = "label")Interpretations:
Value and Volume. As seen from chart above, if “Minerals fuels, lubricants and related materials” class decrease, it affecting the whole “Bar” in each Years.Value but low in Volume, as machinery part and equipment is considered as an expensive goods.