Clustering Country Based On Social and Economy Indicator
Import Library
library(rworldmap)
library(RColorBrewer)
library(tidyverse)
library(factoextra)
library(ggradar)
library(scales)
library(kableExtra)Dataset
dataset <- read.csv("Country-data.csv")
rmarkdown::paged_table(dataset)Tujuan dari RmD kali ini adalah menclusterkan data data tersebut kedalam beberapa kategori dan membuat visualisasi dari persebaran dari tiap kategori tersebut.
Child mort adalah kematian anak dibawah 5 tahun per 1000 kelahiran
Export adalah total ekspor barang dan jasa ke luar negeri selama setahun dalam dollar
Health adalah nilai kesehatan dari tiap negara
Export adalah total impor barang dan jasa dari luar negeri selama setahun dalam dollar
Income adalah pendapatan dari tiap negara selama setahun
Inflation adalah tingkat inflasi tahunan dari tiap negara
Life Expectancy adalah angka harapan hidup atau rata rata umur orang orang di suatu negara ketika dia meninggal
Total Fertility adalah angka kelahiran per 1000 wanita
GDPP adalah pendapatan per kapita, yaitu total pendapatan selama setahun, dibagi dengan jumlah penduduk di negara tersebut
Data Wrangling
rownames(dataset) <- dataset$country
dataset <- dataset %>%
select(-country)data_scale <- scale(dataset)Clustering
fviz_nbclust(x = data_scale,FUNcluster = kmeans, method = "wss")RNGkind(sample.kind = "Rounding")## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
set.seed(123)
country_scale <- kmeans(data_scale, centers = 3)fviz_cluster(object = country_scale,data = data_scale)dataset$group <- country_scale$cluster
dataset$group <- str_replace(dataset$group, "1", "Developing")
dataset$group <- str_replace(dataset$group, "2", "Middle")
dataset$group <- str_replace(dataset$group, "3", "Rich")Data Analysis
Karakteristik tiap Cluster
dat_radar <- dataset %>%
group_by(group) %>%
summarise_all("mean") %>%
mutate(group = as.character(group)) %>%
mutate_at(vars(-group),
funs(rescale))## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
ggradar(dat_radar,
grid.label.size = 3,
axis.label.size = 3,
group.point.size = 4,
group.line.width = 1,
legend.text.size= 10)Create Score
ranking <- dataset
ranking$child_mort <- ranking$child_mort*-1
ranking$inflation <- ranking$inflation*-1
ranking$total_fer <- ranking$total_fer*-1
ranking$netexport <- ranking$exports - ranking$imports
ranking <- ranking %>%
mutate(score = child_mort + health + income + inflation + life_expec + total_fer + gdpp + netexport)Ranking
ranking %>%
select(score) %>%
arrange(desc(score)) %>%
head() %>%
kbl() %>%
kable_paper("hover", full_width = F)| score | |
|---|---|
| Luxembourg | 196814.0 |
| Qatar | 195401.8 |
| Norway | 150190.6 |
| Switzerland | 130198.1 |
| Singapore | 118808.8 |
| Brunei | 115990.3 |
ranking %>%
select(score) %>%
arrange(score) %>%
head() %>%
kbl() %>%
kable_paper("hover", full_width = F)| score | |
|---|---|
| Congo, Dem. Rep. | 856.57 |
| Burundi | 921.86 |
| Liberia | 926.31 |
| Niger | 1066.02 |
| Central African Republic | 1214.56 |
| Mozambique | 1267.81 |
Grouping
Developing Country
Developing_Country <- dataset %>%
filter(group == "Developing") %>%
select(group)
Developing_Country %>%
kbl() %>%
kable_material_dark()| group | |
|---|---|
| Afghanistan | Developing |
| Angola | Developing |
| Benin | Developing |
| Botswana | Developing |
| Burkina Faso | Developing |
| Burundi | Developing |
| Cameroon | Developing |
| Central African Republic | Developing |
| Chad | Developing |
| Comoros | Developing |
| Congo, Dem. Rep. | Developing |
| Congo, Rep. | Developing |
| Cote d’Ivoire | Developing |
| Equatorial Guinea | Developing |
| Eritrea | Developing |
| Gabon | Developing |
| Gambia | Developing |
| Ghana | Developing |
| Guinea | Developing |
| Guinea-Bissau | Developing |
| Haiti | Developing |
| Iraq | Developing |
| Kenya | Developing |
| Kiribati | Developing |
| Lao | Developing |
| Lesotho | Developing |
| Liberia | Developing |
| Madagascar | Developing |
| Malawi | Developing |
| Mali | Developing |
| Mauritania | Developing |
| Mozambique | Developing |
| Namibia | Developing |
| Niger | Developing |
| Nigeria | Developing |
| Pakistan | Developing |
| Rwanda | Developing |
| Senegal | Developing |
| Sierra Leone | Developing |
| South Africa | Developing |
| Sudan | Developing |
| Tanzania | Developing |
| Timor-Leste | Developing |
| Togo | Developing |
| Uganda | Developing |
| Yemen | Developing |
| Zambia | Developing |
Middle Country
Middle_Country <- dataset %>%
filter(group == "Middle") %>%
select(group)
Middle_Country %>%
kbl() %>%
kable_material_dark()| group | |
|---|---|
| Albania | Middle |
| Algeria | Middle |
| Antigua and Barbuda | Middle |
| Argentina | Middle |
| Armenia | Middle |
| Azerbaijan | Middle |
| Bahamas | Middle |
| Bangladesh | Middle |
| Barbados | Middle |
| Belarus | Middle |
| Belize | Middle |
| Bhutan | Middle |
| Bolivia | Middle |
| Bosnia and Herzegovina | Middle |
| Brazil | Middle |
| Bulgaria | Middle |
| Cambodia | Middle |
| Cape Verde | Middle |
| Chile | Middle |
| China | Middle |
| Colombia | Middle |
| Costa Rica | Middle |
| Croatia | Middle |
| Dominican Republic | Middle |
| Ecuador | Middle |
| Egypt | Middle |
| El Salvador | Middle |
| Estonia | Middle |
| Fiji | Middle |
| Georgia | Middle |
| Grenada | Middle |
| Guatemala | Middle |
| Guyana | Middle |
| Hungary | Middle |
| India | Middle |
| Indonesia | Middle |
| Iran | Middle |
| Jamaica | Middle |
| Jordan | Middle |
| Kazakhstan | Middle |
| Kyrgyz Republic | Middle |
| Latvia | Middle |
| Lebanon | Middle |
| Libya | Middle |
| Lithuania | Middle |
| Macedonia, FYR | Middle |
| Malaysia | Middle |
| Maldives | Middle |
| Mauritius | Middle |
| Micronesia, Fed. Sts. | Middle |
| Moldova | Middle |
| Mongolia | Middle |
| Montenegro | Middle |
| Morocco | Middle |
| Myanmar | Middle |
| Nepal | Middle |
| Oman | Middle |
| Panama | Middle |
| Paraguay | Middle |
| Peru | Middle |
| Philippines | Middle |
| Poland | Middle |
| Romania | Middle |
| Russia | Middle |
| Samoa | Middle |
| Saudi Arabia | Middle |
| Serbia | Middle |
| Seychelles | Middle |
| Solomon Islands | Middle |
| Sri Lanka | Middle |
| St. Vincent and the Grenadines | Middle |
| Suriname | Middle |
| Tajikistan | Middle |
| Thailand | Middle |
| Tonga | Middle |
| Tunisia | Middle |
| Turkey | Middle |
| Turkmenistan | Middle |
| Ukraine | Middle |
| Uruguay | Middle |
| Uzbekistan | Middle |
| Vanuatu | Middle |
| Venezuela | Middle |
| Vietnam | Middle |
Rich Country
Rich_Country <- dataset %>%
filter(group == "Rich") %>%
select(group)
Rich_Country %>%
kbl() %>%
kable_material_dark()| group | |
|---|---|
| Australia | Rich |
| Austria | Rich |
| Bahrain | Rich |
| Belgium | Rich |
| Brunei | Rich |
| Canada | Rich |
| Cyprus | Rich |
| Czech Republic | Rich |
| Denmark | Rich |
| Finland | Rich |
| France | Rich |
| Germany | Rich |
| Greece | Rich |
| Iceland | Rich |
| Ireland | Rich |
| Israel | Rich |
| Italy | Rich |
| Japan | Rich |
| Kuwait | Rich |
| Luxembourg | Rich |
| Malta | Rich |
| Netherlands | Rich |
| New Zealand | Rich |
| Norway | Rich |
| Portugal | Rich |
| Qatar | Rich |
| Singapore | Rich |
| Slovak Republic | Rich |
| Slovenia | Rich |
| South Korea | Rich |
| Spain | Rich |
| Sweden | Rich |
| Switzerland | Rich |
| United Arab Emirates | Rich |
| United Kingdom | Rich |
| United States | Rich |
Map Data
data_final <- cbind(country = rownames(dataset), status = dataset$group)
data_final <- as.data.frame(data_final)
data_final$status <- as.factor(data_final$status)worldmap <- joinCountryData2Map(data_final, joinCode = "ADMIN", nameJoinColumn = "country")## 166 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 77 codes from the map weren't represented in your data
colourPalette <- brewer.pal(8,'PiYG')
mapCountryData(worldmap, nameColumnToPlot="status", colourPalette = colourPalette)## using catMethod='categorical' for non numeric data in mapCountryData
## Warning in rwmGetColours(colourPalette, numColours): 8 colours specified and 3
## required, using interpolation to calculate colours