Clustering-Project

Importing the required libraries

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(factoextra)

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(cluster)
library(clValid)
library(ggplot2)
library(ggpubr)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

World Indicators - Data Prep and Cleaning

# Reading of data 
world_indicator <- read.csv("/cloud/project/world_indicator/World Indicators.csv")

# Labeling the different rows with the country names
rownames(world_indicator) <- world_indicator$Country

# Removing dollar sign, percent symbols and commas
world_indicator$GDP = as.numeric(gsub("[\\$,]", "", world_indicator$GDP))
world_indicator$Health.Exp.Capita = as.numeric(gsub("[\\$,]", "", world_indicator$Health.Exp.Capita))
world_indicator$Business.Tax.Rate = as.numeric(gsub("[\\%,]", "", world_indicator$Business.Tax.Rate))

# Dropping the non numerical columns
newdf <- world_indicator[, 1:18]
df_actual <- world_indicator

# Dropping the columns that have more than 70 NA values
newdf <- newdf[ , colSums(is.na(newdf)) < 70]

# Removing the non-significant columns
newdf <- newdf[, -c(2,3,7,9,12,13)]

# Omitting the rows with NA values
newdf <- na.omit(newdf)

# Scaling of data
newdf <- as.data.frame(scale(newdf))

Performing K-Means Clustering

# This clustering is obtained by considering all the attributes including birth rate, GDP, health expenditure, infant mortality rate, life expectancy (males and females) and population (different age groups)

# Elbow method: Determining optimal number of clusters
fviz_nbclust(newdf, kmeans, method = "wss")

fviz_nbclust(newdf, kmeans, method = "silhouette")

# All the methods above report k = 2 as the optimal number of clusters

# Using k-means clustering with k=2
km_world_indi <- kmeans(newdf, 2, nstart = 20)
km_world_indi$cluster

##                        Algeria                         Angola 
##                              1                              2 
##                          Benin                       Botswana 
##                              2                              2 
##                   Burkina Faso                        Burundi 
##                              2                              2 
##                       Cameroon       Central African Republic 
##                              2                              2 
##                           Chad                        Comoros 
##                              2                              2 
##               Congo, Dem. Rep.                    Congo, Rep. 
##                              2                              2 
##                  Cote d'Ivoire                       Djibouti 
##                              2                              2 
##               Egypt, Arab Rep.              Equatorial Guinea 
##                              1                              2 
##                        Eritrea                       Ethiopia 
##                              2                              2 
##                          Gabon                    Gambia, The 
##                              2                              2 
##                          Ghana                         Guinea 
##                              2                              2 
##                  Guinea-Bissau                          Kenya 
##                              2                              2 
##                        Lesotho                        Liberia 
##                              2                              2 
##                          Libya                     Madagascar 
##                              1                              2 
##                         Malawi                           Mali 
##                              2                              2 
##                     Mauritania                      Mauritius 
##                              2                              1 
##                        Morocco                     Mozambique 
##                              1                              2 
##                        Namibia                          Niger 
##                              2                              2 
##                        Nigeria                         Rwanda 
##                              2                              2 
##          Sao Tome and Principe                        Senegal 
##                              2                              2 
##                     Seychelles                   Sierra Leone 
##                              1                              2 
##                   South Africa                    South Sudan 
##                              2                              2 
##                          Sudan                      Swaziland 
##                              2                              2 
##                       Tanzania                           Togo 
##                              2                              2 
##                        Tunisia                         Uganda 
##                              1                              2 
##                         Zambia                    Afghanistan 
##                              2                              2 
##                        Armenia                     Azerbaijan 
##                              1                              1 
##                     Bangladesh                         Bhutan 
##                              2                              2 
##              Brunei Darussalam                       Cambodia 
##                              1                              2 
##                          China                        Georgia 
##                              1                              1 
##                          India                      Indonesia 
##                              2                              1 
##                          Japan                     Kazakhstan 
##                              1                              1 
##                    Korea, Rep.                Kyrgyz Republic 
##                              1                              2 
##                        Lao PDR                       Malaysia 
##                              2                              1 
##                       Maldives                       Mongolia 
##                              1                              1 
##                          Nepal                       Pakistan 
##                              2                              2 
##                    Philippines                      Singapore 
##                              2                              1 
##                      Sri Lanka                     Tajikistan 
##                              1                              2 
##                       Thailand                    Timor-Leste 
##                              1                              2 
##                   Turkmenistan                     Uzbekistan 
##                              2                              2 
##                        Vietnam                        Albania 
##                              1                              1 
##                        Austria                        Belarus 
##                              1                              1 
##                        Belgium         Bosnia and Herzegovina 
##                              1                              1 
##                       Bulgaria                        Croatia 
##                              1                              1 
##                         Cyprus                 Czech Republic 
##                              1                              1 
##                        Denmark                        Estonia 
##                              1                              1 
##                        Finland                         France 
##                              1                              1 
##                        Germany                         Greece 
##                              1                              1 
##                        Hungary                        Iceland 
##                              1                              1 
##                        Ireland                          Italy 
##                              1                              1 
##                         Latvia                      Lithuania 
##                              1                              1 
##                     Luxembourg                 Macedonia, FYR 
##                              1                              1 
##                          Malta                        Moldova 
##                              1                              1 
##                     Montenegro                    Netherlands 
##                              1                              1 
##                         Norway                         Poland 
##                              1                              1 
##                       Portugal                        Romania 
##                              1                              1 
##             Russian Federation                         Serbia 
##                              1                              1 
##                Slovak Republic                       Slovenia 
##                              1                              1 
##                          Spain                         Sweden 
##                              1                              1 
##                    Switzerland                         Turkey 
##                              1                              1 
##                        Ukraine                 United Kingdom 
##                              1                              1 
##                        Bahrain             Iran, Islamic Rep. 
##                              1                              1 
##                           Iraq                         Israel 
##                              2                              1 
##                         Jordan                         Kuwait 
##                              1                              1 
##                        Lebanon                           Oman 
##                              1                              1 
##                          Qatar                   Saudi Arabia 
##                              1                              1 
##           United Arab Emirates                    Yemen, Rep. 
##                              1                              2 
##                      Australia                           Fiji 
##                              1                              1 
##                       Kiribati          Micronesia, Fed. Sts. 
##                              2                              2 
##                    New Zealand               Papua New Guinea 
##                              1                              2 
##                          Samoa                Solomon Islands 
##                              2                              2 
##                          Tonga                        Vanuatu 
##                              2                              2 
##            Antigua and Barbuda                      Argentina 
##                              1                              1 
##                   Bahamas, The                       Barbados 
##                              1                              1 
##                         Belize                        Bolivia 
##                              1                              2 
##                         Brazil                         Canada 
##                              1                              1 
##                          Chile                       Colombia 
##                              1                              1 
##                     Costa Rica                           Cuba 
##                              1                              1 
##             Dominican Republic                        Ecuador 
##                              1                              1 
##                    El Salvador                        Grenada 
##                              1                              1 
##                      Guatemala                         Guyana 
##                              2                              2 
##                          Haiti                       Honduras 
##                              2                              1 
##                        Jamaica                         Mexico 
##                              1                              1 
##                      Nicaragua                         Panama 
##                              1                              1 
##                       Paraguay                           Peru 
##                              1                              1 
##                      St. Lucia St. Vincent and the Grenadines 
##                              1                              1 
##                       Suriname            Trinidad and Tobago 
##                              1                              1 
##                  United States                        Uruguay 
##                              1                              1 
##                  Venezuela, RB 
##                              1

summary(km_world_indi)

##              Length Class  Mode   
## cluster      177    -none- numeric
## centers       20    -none- numeric
## totss          1    -none- numeric
## withinss       2    -none- numeric
## tot.withinss   1    -none- numeric
## betweenss      1    -none- numeric
## size           2    -none- numeric
## iter           1    -none- numeric
## ifault         1    -none- numeric

# K-means clustering of World Indicator data
clusplot(newdf, km_world_indi$cluster, main = '2D representation of the Cluster solution',
         color = TRUE, shade = TRUE, labels = 1, lines = 1)

Performing Hierarchical Clustering

# This clustering is obtained by considering all the attributes including birth rate, GDP, health expenditure, infant mortality rate, life expectancy (males and females) and population (different age groups)

hc_world_indi <- hclust(dist (newdf), method = "average")
summary(hc_world_indi)

##             Length Class  Mode     
## merge       352    -none- numeric  
## height      176    -none- numeric  
## order       177    -none- numeric  
## labels      177    -none- character
## method        1    -none- character
## call          3    -none- call     
## dist.method   1    -none- character

plot(hc_world_indi,main = "Clustered Dendogram of World Indicator Data")

# Using k = 3
hcm_world_indi <- cutree(hc_world_indi, k = 3)
plot(hcm_world_indi, main = "Solution by Hierarchical Clustering of World Indicator")

Performing Internal Validation Metrics to evaluate the performance of clustering

1.) Connectivity - For a particular clustering partition C = {C1,…,CK} of the N observations into K disjoint clusters, the connectivity is defined as summation of x(i) (i ranging from 1 to N, total number of observations) multiplied with summation of nn(i,j) (for i ranging from 1 to N and j ranging from 1 to L, parameter giving the number of nearest neighbors to use. Based on the different internal validation measures, the connectivity has a value between zero and ∞ and should be minimized.

2.) Silhouette Width - The Silhouette Width is the average of each observation’s Silhouette value. For observation i, it is defined as the ratio of (bi − ai) and maximum value of (bi, ai), where ai is the average distance between i and all other observations in the same cluster, and bi is the average distance between i and the observations in the nearest neighboring cluster.Silhouette Width lies in the interval [−1, 1] and should be maximized.

3.) Dunn Index - The Dunn Index is the ratio of the smallest distance between observations not in the same cluster to the largest intra-cluster distance.Dunn Index has a value between zero and ∞ and should be maximized for determining better cluster quality.

# Since our data does not have labels, we cannot perform external validation

int_validate <- clValid(newdf, nClust = 2:6, clMethods = c("kmeans", "hierarchical"),
                    validation = "internal")
summary(int_validate)

## 
## Clustering Methods:
##  kmeans hierarchical 
## 
## Cluster sizes:
##  2 3 4 5 6 
## 
## Validation Measures:
##                                  2       3       4       5       6
##                                                                   
## kmeans       Connectivity  16.7060 19.6349 34.6282 52.4016 55.7194
##              Dunn           0.0485  0.0836  0.1376  0.0810  0.1585
##              Silhouette     0.3883  0.3943  0.3580  0.2800  0.2791
## hierarchical Connectivity   2.9290  5.8579 21.4710 24.6250 31.8575
##              Dunn           0.7570  0.3923  0.1419  0.1419  0.1615
##              Silhouette     0.6815  0.3546  0.3607  0.3351  0.3211
## 
## Optimal Scores:
## 
##              Score  Method       Clusters
## Connectivity 2.9290 hierarchical 2       
## Dunn         0.7570 hierarchical 2       
## Silhouette   0.6815 hierarchical 2

print(paste0("Thus it is evident from these calculated validation measures that hierarchical clustering has a better cluster quality as compared to k-means"))

## [1] "Thus it is evident from these calculated validation measures that hierarchical clustering has a better cluster quality as compared to k-means"

Comparing clustering solutions and grouping countries based on clustering methods

# Comparing clustering solutions

# It has been depicted in the previous question based on the calculated validation measures that hierarchical clustering has a better cluster quality as compared to k-means"))

# Grouping the list of the countries clustered via k-means
listofcluster_kmeans = order(km_world_indi$cluster)
grouped_countries_kmeans <- data.frame(kmeans_cluster = km_world_indi$cluster[listofcluster_kmeans])
# View(grouped_countries_kmeans)

# Grouping the list of the countries clustered via hierarchical
listofcluster_hier = order(hcm_world_indi)
grouped_countries_hier <- data.frame(hierarchical_cluster = hcm_world_indi[listofcluster_hier])
# View(grouped_countries_hier)

Data Plotting

# This clustering is obtained considering 2 attributes - GDP and infant mortality rate
# Scatter Plot 1: Infant Mortality Rate v/s GDP (using k-means clustering)
newdf_scatter1 <- world_indicator[, c(5, 9)]
newdf_scatter1 <- na.omit(newdf_scatter1)
newdf_scatter1_scaled <- as.data.frame(scale(newdf_scatter1))

fviz_nbclust(newdf_scatter1_scaled, kmeans, method = "silhouette")

km_scatter1 <- kmeans(newdf_scatter1_scaled, 2, nstart = 20)
km_scatter1$cluster

##                        Algeria                         Angola 
##                              1                              2 
##                          Benin                       Botswana 
##                              2                              2 
##                   Burkina Faso                        Burundi 
##                              2                              2 
##                       Cameroon       Central African Republic 
##                              2                              2 
##                           Chad                        Comoros 
##                              2                              2 
##               Congo, Dem. Rep.                    Congo, Rep. 
##                              2                              2 
##                  Cote d'Ivoire                       Djibouti 
##                              2                              2 
##               Egypt, Arab Rep.              Equatorial Guinea 
##                              1                              2 
##                        Eritrea                       Ethiopia 
##                              2                              2 
##                          Gabon                    Gambia, The 
##                              2                              2 
##                          Ghana                         Guinea 
##                              2                              2 
##                  Guinea-Bissau                          Kenya 
##                              2                              2 
##                        Lesotho                        Liberia 
##                              2                              2 
##                          Libya                     Madagascar 
##                              1                              2 
##                         Malawi                           Mali 
##                              2                              2 
##                     Mauritania                      Mauritius 
##                              2                              1 
##                        Morocco                     Mozambique 
##                              1                              2 
##                        Namibia                          Niger 
##                              2                              2 
##                        Nigeria                         Rwanda 
##                              2                              2 
##          Sao Tome and Principe                        Senegal 
##                              2                              2 
##                     Seychelles                   Sierra Leone 
##                              1                              2 
##                   South Africa                    South Sudan 
##                              1                              2 
##                          Sudan                      Swaziland 
##                              2                              2 
##                       Tanzania                           Togo 
##                              2                              2 
##                        Tunisia                         Uganda 
##                              1                              2 
##                         Zambia                       Zimbabwe 
##                              2                              2 
##                    Afghanistan                        Armenia 
##                              2                              1 
##                     Azerbaijan                     Bangladesh 
##                              1                              2 
##                         Bhutan              Brunei Darussalam 
##                              1                              1 
##                       Cambodia                          China 
##                              2                              1 
##                        Georgia                          India 
##                              1                              2 
##                      Indonesia                          Japan 
##                              1                              1 
##                     Kazakhstan                    Korea, Rep. 
##                              1                              1 
##                Kyrgyz Republic                        Lao PDR 
##                              1                              2 
##                       Malaysia                       Maldives 
##                              1                              1 
##                       Mongolia                          Nepal 
##                              1                              2 
##                       Pakistan                    Philippines 
##                              2                              1 
##                      Singapore                      Sri Lanka 
##                              1                              1 
##                     Tajikistan                       Thailand 
##                              2                              1 
##                    Timor-Leste                   Turkmenistan 
##                              2                              2 
##                     Uzbekistan                        Vietnam 
##                              2                              1 
##                        Albania                        Austria 
##                              1                              1 
##                        Belarus                        Belgium 
##                              1                              1 
##         Bosnia and Herzegovina                       Bulgaria 
##                              1                              1 
##                        Croatia                         Cyprus 
##                              1                              1 
##                 Czech Republic                        Denmark 
##                              1                              1 
##                        Estonia                        Finland 
##                              1                              1 
##                         France                        Germany 
##                              1                              1 
##                         Greece                        Hungary 
##                              1                              1 
##                        Iceland                        Ireland 
##                              1                              1 
##                          Italy                         Latvia 
##                              1                              1 
##                      Lithuania                     Luxembourg 
##                              1                              1 
##                 Macedonia, FYR                          Malta 
##                              1                              1 
##                        Moldova                         Monaco 
##                              1                              1 
##                     Montenegro                    Netherlands 
##                              1                              1 
##                         Norway                         Poland 
##                              1                              1 
##                       Portugal                        Romania 
##                              1                              1 
##             Russian Federation                         Serbia 
##                              1                              1 
##                Slovak Republic                       Slovenia 
##                              1                              1 
##                          Spain                         Sweden 
##                              1                              1 
##                    Switzerland                         Turkey 
##                              1                              1 
##                        Ukraine                 United Kingdom 
##                              1                              1 
##                        Bahrain             Iran, Islamic Rep. 
##                              1                              1 
##                           Iraq                         Israel 
##                              1                              1 
##                         Jordan                         Kuwait 
##                              1                              1 
##                        Lebanon                           Oman 
##                              1                              1 
##                          Qatar                   Saudi Arabia 
##                              1                              1 
##           United Arab Emirates                    Yemen, Rep. 
##                              1                              2 
##                      Australia                           Fiji 
##                              1                              1 
##                       Kiribati               Marshall Islands 
##                              2                              1 
##          Micronesia, Fed. Sts.                    New Zealand 
##                              1                              1 
##               Papua New Guinea                          Samoa 
##                              2                              1 
##                Solomon Islands                          Tonga 
##                              1                              1 
##                        Vanuatu            Antigua and Barbuda 
##                              1                              1 
##                      Argentina                   Bahamas, The 
##                              1                              1 
##                       Barbados                         Belize 
##                              1                              1 
##                        Bolivia                         Brazil 
##                              1                              1 
##                         Canada                          Chile 
##                              1                              1 
##                       Colombia                     Costa Rica 
##                              1                              1 
##                           Cuba                       Dominica 
##                              1                              1 
##             Dominican Republic                        Ecuador 
##                              1                              1 
##                    El Salvador                        Grenada 
##                              1                              1 
##                      Guatemala                         Guyana 
##                              1                              1 
##                          Haiti                       Honduras 
##                              2                              1 
##                        Jamaica                         Mexico 
##                              1                              1 
##                      Nicaragua                         Panama 
##                              1                              1 
##                       Paraguay                           Peru 
##                              1                              1 
##            St. Kitts and Nevis                      St. Lucia 
##                              1                              1 
## St. Vincent and the Grenadines                       Suriname 
##                              1                              1 
##            Trinidad and Tobago                  United States 
##                              1                              1 
##                        Uruguay                  Venezuela, RB 
##                              1                              1

summary(km_scatter1)

##              Length Class  Mode   
## cluster      182    -none- numeric
## centers        4    -none- numeric
## totss          1    -none- numeric
## withinss       2    -none- numeric
## tot.withinss   1    -none- numeric
## betweenss      1    -none- numeric
## size           2    -none- numeric
## iter           1    -none- numeric
## ifault         1    -none- numeric

plot(newdf_scatter1$Infant.Mortality.Rate, newdf_scatter1$GDP, 
     main = "Infant Mortality Rate v/s GDP",
     xlab = "Infant Mortality Rate",
     ylab = "GDP", 
     col = km_scatter1$cluster, pch = 19, frame = FALSE)

# This clustering is obtained considering 2 attributes - Urban population and energy usage
# Scatter Plot 2: Urban Population v/s Energy Usage (using k-means clustering)
newdf_scatter2 <- world_indicator[,c(4,18)]
newdf_scatter2 <- na.omit(newdf_scatter2)
newdf_scatter2_scaled <- as.data.frame(scale(newdf_scatter2))

fviz_nbclust(newdf_scatter2_scaled, kmeans, method = "silhouette")

km_scatter2 <- kmeans(newdf_scatter2_scaled, 10, nstart = 20)
km_scatter2$cluster

##                Algeria                 Angola                  Benin 
##                     10                      6                      6 
##               Botswana               Cameroon       Congo, Dem. Rep. 
##                      1                      1                      6 
##            Congo, Rep.          Cote d'Ivoire       Egypt, Arab Rep. 
##                     10                      1                      6 
##                Eritrea               Ethiopia                  Gabon 
##                      7                      7                      4 
##                  Ghana                  Kenya                  Libya 
##                      1                      7                     10 
##                Morocco             Mozambique                Namibia 
##                      1                      6                      6 
##                Nigeria                Senegal           South Africa 
##                      6                      6                     10 
##                  Sudan               Tanzania                   Togo 
##                      6                      7                      6 
##                Tunisia                 Zambia               Zimbabwe 
##                     10                      6                      6 
##                Armenia             Azerbaijan             Bangladesh 
##                     10                      1                      6 
##      Brunei Darussalam               Cambodia                  China 
##                     10                      7                      3 
##                Georgia   Hong Kong SAR, China                  India 
##                      1                      9                      8 
##              Indonesia                  Japan             Kazakhstan 
##                      1                      2                      1 
##       Korea, Dem. Rep.            Korea, Rep.        Kyrgyz Republic 
##                      1                      5                      6 
##               Malaysia               Mongolia                Myanmar 
##                     10                     10                      6 
##                  Nepal               Pakistan            Philippines 
##                      7                      6                      6 
##              Singapore              Sri Lanka             Tajikistan 
##                      9                      7                      7 
##               Thailand           Turkmenistan             Uzbekistan 
##                      6                      1                      6 
##                Vietnam                Albania                Austria 
##                      6                      1                     10 
##                Belarus                Belgium Bosnia and Herzegovina 
##                     10                      9                      6 
##               Bulgaria                Croatia                 Cyprus 
##                     10                      1                     10 
##         Czech Republic                Denmark                Estonia 
##                     10                      4                     10 
##                Finland                 France                Germany 
##                      4                      5                      5 
##                 Greece                Hungary                Iceland 
##                     10                     10                      9 
##                Ireland                  Italy                 Latvia 
##                      1                     10                     10 
##              Lithuania             Luxembourg         Macedonia, FYR 
##                     10                      4                      1 
##                  Malta                Moldova             Montenegro 
##                      9                      6                     10 
##            Netherlands                 Norway                 Poland 
##                      4                      4                      1 
##               Portugal                Romania     Russian Federation 
##                      1                      1                      2 
##                 Serbia        Slovak Republic               Slovenia 
##                      1                      1                      1 
##                  Spain                 Sweden            Switzerland 
##                      5                      4                     10 
##                 Turkey                Ukraine         United Kingdom 
##                     10                     10                      5 
##                Bahrain     Iran, Islamic Rep.                   Iraq 
##                      4                      5                     10 
##                 Israel                 Jordan                 Kuwait 
##                      9                      4                      9 
##                Lebanon                   Oman                  Qatar 
##                      4                     10                      9 
##           Saudi Arabia   Syrian Arab Republic   United Arab Emirates 
##                      5                      1                      4 
##            Yemen, Rep.              Australia            New Zealand 
##                      6                      4                      4 
##              Argentina                Bolivia                 Brazil 
##                      4                     10                      5 
##                 Canada                  Chile               Colombia 
##                      5                      4                     10 
##             Costa Rica                   Cuba     Dominican Republic 
##                     10                     10                     10 
##                Ecuador            El Salvador              Guatemala 
##                     10                     10                      1 
##                  Haiti               Honduras                Jamaica 
##                      1                      1                      1 
##                 Mexico              Nicaragua                 Panama 
##                      5                      1                     10 
##               Paraguay                   Peru    Trinidad and Tobago 
##                      1                     10                      7 
##          United States                Uruguay          Venezuela, RB 
##                      3                      9                      4

summary(km_scatter2)

##              Length Class  Mode   
## cluster      135    -none- numeric
## centers       20    -none- numeric
## totss          1    -none- numeric
## withinss      10    -none- numeric
## tot.withinss   1    -none- numeric
## betweenss      1    -none- numeric
## size          10    -none- numeric
## iter           1    -none- numeric
## ifault         1    -none- numeric

plot_ly(newdf_scatter2,
        x = ~Population.Urban,
        y = ~Energy.Usage,
        color = ~km_scatter2$cluster) %>% 
  layout(title = "Urban Population v/s Energy Usage",
         xaxis = list(title = "Urban Pouplation"),
         yaxis = list(title = "Energy Usage"))

## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

# This clustering is obtained considering 2 attributes - Population 15-64 years and internet usage
# Scatter Plot 3: Population 15-64 years v/s Internet Usage (using hierarchical clustering)
newdf_scatter3 <- world_indicator[, c(10, 16)]
newdf_scatter3 <- na.omit(newdf_scatter3)
newdf_scatter3_scaled <- as.data.frame(scale(newdf_scatter3))

hc_scatter3 <- hclust(dist (newdf_scatter3_scaled), method = "average")
summary(hc_scatter3)

##             Length Class  Mode     
## merge       374    -none- numeric  
## height      187    -none- numeric  
## order       188    -none- numeric  
## labels      188    -none- character
## method        1    -none- character
## call          3    -none- call     
## dist.method   1    -none- character

plot(hc_scatter3)

#Using k=4
hcm_scatter3 <- cutree(hc_scatter3, k = 4)
plot(hcm_scatter3)

newdf_scatter3$Class_hier <- hcm_scatter3

ggplot(newdf_scatter3, aes(x = Population.15.64, y = Internet.Usage, color = as.factor(Class_hier))) +
  geom_point(stat = "identity") +
  ggtitle("Population 15-64 years v/s Internet Usage") +
  xlab("Pouplation 15-64 years") +
  ylab("Internet Usage") +
  labs(color = "Cluster")+
  scale_colour_brewer(palette = "Set1") +
  theme(plot.title = element_text(size = 13, face = "bold.italic", color = "black", hjust = 0.5),
        axis.title.x = element_text(size = 7, face = "bold"),
        axis.title.y = element_text(size = 7, face = "bold"),
        axis.text.x = element_text(size = 5, angle = 0),
        axis.text.y = element_text(size = 5, angle = 90),
        legend.position = "right",
        legend.title = element_text(size = 7, face = "bold"),
        legend.text = element_text(size = 7, face = "bold"),
        strip.text = element_text(size = 7))