r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)

Call the packages

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("cluster")

Import the Data

africa=readxl::read_excel("africa.xlsx",sheet="africa")
cpi=readxl::read_excel("cpiafrica.xlsx",sheet="cpi")
head(africa)
## # A tibble: 6 x 33
##      ID `Country Name` `Country Code` `1990` `1991` `1992`  `1993` `1994` `1995`
##   <dbl> <chr>          <chr>           <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
## 1     1 Angola         AGO            -3.45   0.991 -5.84  -24.0     1.34  15.0 
## 2     2 Burundi        BDI             3.50   5.00   1.01   -6.24   -3.83  -7.92
## 3     3 Benin          BEN             8.98   4.23   2.96    5.84    2.02   6.05
## 4     4 Burkina Faso   BFA            -0.603  9.07   0.233   3.46    1.32   5.72
## 5     5 Botswana       BWA             6.77   7.46   2.92    1.92    3.63   7.03
## 6     6 Central Afric… CAF            -2.15  -0.553 -6.42    0.335   4.90   7.20
## # … with 24 more variables: `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
## #   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>,
## #   `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>,
## #   `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>,
## #   `2014` <dbl>, `2015` <dbl>, `2016` <dbl>, `2017` <dbl>, `2018` <dbl>,
## #   `2019` <dbl>
head(cpi)
## # A tibble: 6 x 6
##   Country      `2015` `2016` `2017` `2018` `2019`
##   <chr>         <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 Algeria       173.   184.   194.   202.    206.
## 2 Angola        106.   138.   179.   214.    251.
## 3 Benin         100.    99.4  101.   102.    101.
## 4 Botswana       90.0   92.5   95.6   98.7   101.
## 5 Burkina Faso  102.   102.   104.   106.    102.
## 6 Burundi        93.8   99.0  115.   112.    111.

LAST 10 YEARS

africa2=dplyr::select(africa,c(2,24:33))
#afr2=africa2[!duplicated(africa1$`Country Name`), ]
afr3=as.data.frame(africa2)
afr3 <- data.frame(afr3[,-1], row.names = afr3[,1])
afr33=scale(afr3)

Work on the data to make it perfect for the clustering

#Africa GDP_GROWTH
africa1=dplyr::select(africa,c(2,4:33))
afr=africa1[!duplicated(africa1$`Country Name`), ]
afr=as.data.frame(afr)
afr1 <- data.frame(afr[,-1], row.names = afr[,1])
#CPI
cpi1=cpi[!duplicated(cpi$Country), ]
cpi1=as.data.frame(cpi1)
cpi2 <- data.frame(cpi1[,-1], row.names = cpi1[,1])
#See the data
afr11=scale(afr1)
cpi22=scale(cpi2)
head(afr1)
##                        X1990      X1991      X1992       X1993     X1994
## Angola            -3.4500987  0.9913593 -5.8382807 -23.9834174  1.339363
## Burundi            3.4998221  4.9968364  1.0099984  -6.2400000 -3.830000
## Benin              8.9761344  4.2257994  2.9577108   5.8361721  2.020400
## Burkina Faso      -0.6029285  9.0699845  0.2327108   3.4613849  1.315007
## Botswana           6.7728219  7.4587091  2.9170703   1.9161071  3.627916
## Central African_R -2.1475281 -0.5525430 -6.4240782   0.3352757  4.899949
##                       X1995     X1996     X1997     X1998     X1999      X2000
## Angola            15.000000 13.544370  7.274277 4.6911465  2.181490  3.0546242
## Burundi           -7.920000 -8.000000 -1.590000 4.7500000 -1.010000 -0.8568641
## Benin              6.045199  4.324284  5.734688 3.9610121  5.341449  5.8577142
## Burkina Faso       5.716374 11.014744  6.316835 7.3077196  7.404179  1.8884739
## Botswana           7.030410  5.829800  8.325891 0.4436635  9.667241  1.9876959
## Central African_R  7.200046 -4.000093  5.299936 4.7001424  3.599975 -2.4894324
##                       X2001     X2002     X2003     X2004      X2005     X2006
## Angola            4.2059986 13.665687  2.989850 10.952862 15.0289153 11.547683
## Burundi           2.0558071  4.446519 -1.223728  4.833658  0.9000000  5.413807
## Benin             5.3331357  4.643031  3.443577  4.429685  1.7131646  3.943739
## Burkina Faso      6.6134057  4.352964  7.802494  4.478452  8.6618732  6.253165
## Botswana          0.2505739  6.069531  4.625895  2.705822  4.5566457  8.363871
## Central African_R 4.4647390  3.616542 -5.397485  5.994884  0.9082105  4.771085
##                       X2007     X2008      X2009    X2010    X2011    X2012
## Angola            14.010018 11.166138  0.8587126 4.859198 3.471981 8.542147
## Burundi            3.451952  4.861713  3.8127469 5.124163 4.032602 4.446708
## Benin              5.986349  4.896577  2.3192921 2.114065 2.963753 4.811223
## Burkina Faso       4.111379  5.799992  2.9619509 8.446282 6.622563 6.452672
## Botswana           8.276764  6.245437 -7.6523102 8.563632 6.048316 4.456167
## Central African_R  4.607535  2.054131  8.5872604 4.630818 4.194615 5.053761
##                        X2013      X2014      X2015     X2016      X2017
## Angola              4.954590 4.82262555  0.9435756 -2.580097 -0.1472074
## Burundi             4.924195 4.24065164 -3.9000031 -0.600020  0.5000100
## Benin               7.191434 6.35767910  1.7781511  3.339673  5.6715555
## Burkina Faso        5.792606 4.32684561  3.9125572  5.958692  6.1571046
## Botswana           11.343424 4.14928989 -1.6979656  4.303737  2.9040063
## Central African_R -36.391977 0.08107052  4.3371210  4.750317  4.5272782
##                       X2018      X2019
## Angola            -2.003577 -0.8693938
## Burundi            1.609933  1.8424767
## Benin              6.697259  6.8656873
## Burkina Faso       6.819617  5.6885990
## Botswana           4.478823  2.9661359
## Central African_R  3.789444  2.9702756
head(cpi2)
##                  X2015     X2016     X2017     X2018    X2019
## Algeria      172.65333 183.69917 193.97000 202.25250 206.2000
## Angola       105.54000 137.93583 179.10083 214.25583 250.8533
## Benin        100.21879  99.42300 101.18220 102.04104 101.1067
## Botswana      89.96896  92.50155  95.56176  98.65607 101.3917
## Burkina Faso 101.65333 102.10167 103.61583 105.64250 102.2267
## Burundi       93.76674  98.97800 114.86648 111.63333 110.8667

Find the possible estimate of cluster

factoextra::fviz_nbclust(afr33,kmeans,method="wss")+
  geom_vline(xintercept = 4,linetype=2)

factoextra::fviz_nbclust(cpi22,kmeans,method="wss")+
  geom_vline(xintercept = 4,linetype=2)

Preparing the Clustering and the Visualization

#Compute k-means with k = 4
set.seed(123)
afr.res <- kmeans(afr33, 4, nstart = 25)
cpi.res <- kmeans(cpi22, 4, nstart = 25)

CPI Vizualization

fviz_cluster(cpi.res, data = cpi22,
palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
ellipse.type = "euclid",   
star.plot = TRUE,  
repel = TRUE, 
ggtheme = theme_minimal(),main ="Africa Countries Classification based on Annual CPI"
)
## Too few points to calculate an ellipse

GROWTH Vizualization

fviz_cluster(afr.res, data = afr33,
palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
ellipse.type = "euclid",  
star.plot = TRUE,   
repel = TRUE,  
ggtheme = theme_minimal(),main ="Africa Countries Classification based on Annual GDP GROWTH"
)
## Too few points to calculate an ellipse

K-Medoids Classification

factoextra::fviz_nbclust(afr33,pam,method="silhouette")+
 theme_classic()

factoextra::fviz_nbclust(cpi22,pam,method="silhouette")+
 theme_classic()

Visualization K.Medoids

af.res=pam(afr33,2)
cp.res=pam(cpi22,2)
#GROWTH
fviz_cluster(af.res,
palette = c("#00AFBB", "#FC4E07"), 
ellipse.type = "t",   
repel = TRUE,  
ggtheme = theme_classic(),main ="Africa Countries Classification based on Annual GDP GROWTH"
)

#CPI
fviz_cluster(cp.res,
palette = c("#00AFBB", "#FC4E07"),  
ellipse.type = "t",  
repel = TRUE,   
t,main ="Africa Countries Classification based on Annual CPI"
)
## Too few points to calculate an ellipse

HIERARCHICAL CLUSTERING

#Compute the dissimilarity matrix
afr.dist <- dist(afr33, method = "euclidean")
cpi.dist <- dist(cpi22, method = "euclidean")
afr.hc <- hclust(d = afr.dist, method = "ward.D2")
cpi.hc <- hclust(d = cpi.dist, method = "ward.D2")
fviz_dend(afr.hc, cex = 0.5,main ="Africa Countries Classification based on Annual GDP GROWTH")

fviz_dend(cpi.hc, cex = 0.5,main ="Africa Countries Classification based on Annual CPI")

#GROWTH
fviz_dend(afr.hc, k = 12, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), 
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE, # Add rectangle around groups
main ="Africa Countries Classification based on Annual GDP GROWTH")
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled

#CPI
fviz_dend(cpi.hc, k = 12, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), 
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE, # Add rectangle around groups
main ="Africa Countries Classification based on Annual CPI")
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled

LAST 10 YEARS

# Compute distances and hierarchical clustering
dd <- dist(scale(afr3), method = "euclidean")
ahc <- hclust(dd, method = "ward.D2")
dd1 <- dist(scale(cpi2), method = "euclidean")
cphc <- hclust(dd1, method = "ward.D2")
require("igraph")
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
fviz_dend(ahc, k = 4, k_colors = "jco",
          type = "phylogenic", repel = TRUE,
          main ="Africa Countries Classification based on Annual GDP GROWTH")

fviz_dend(cphc, k = 4, k_colors = "jco",main ="Africa Countries Classification based on Annual CPI",
          type = "phylogenic", repel = TRUE
          )

x1=subset(africa,africa1$`Country Name`=='Chad'|africa1$`Country Name`=='Angola'|
         africa1$`Country Name`=='Gabon'|africa1$`Country Name`=='Burundi'|
         africa1$`Country Name`=='Namibia'|africa1$`Country Name`=='Nigeria'|
         africa1$`Country Name`=='Congo,Rep.')

x2=subset(africa,africa1$`Country Name`=='Sudan'|africa1$`Country Name`=='Lesotho'|
         africa1$`Country Name`=='South Africa'|africa1$`Country Name`=='Eswatini'|
         africa1$`Country Name`=='Tunisia')

x3=subset(africa,africa1$`Country Name`=='Guinea-Bissau'|africa1$`Country Name`=='Cabo Verde'|
         africa1$`Country Name`=='Madagascar'|africa1$`Country Name`=='Egypt')

x4=subset(africa,africa1$`Country Name`=='Mauritania'|africa1$`Country Name`=='Malawi'|
        africa1$`Country Name`=='Morocco'|africa1$`Country Name`=='Cameroon'
        |africa1$`Country Name`=='Mauritius')

x5=subset(africa,africa1$`Country Name`=="Cote d'Ivoire"|africa1$`Country Name`=='Guinea'|
         africa1$`Country Name`=='Mali'|africa1$`Country Name`=='Senegal'|
         africa1$`Country Name`=='Benin')

x6=subset(africa,africa1$`Country Name`=='Ethiopia'|africa1$`Country Name`=='Ghana')

x7=subset(africa,africa1$`Country Name`=='Rwanda'|africa1$`Country Name`=='Niger'
          |africa1$`Country Name`=='Uganda'|africa1$`Country Name`=='Tanzania'
          |africa1$`Country Name`=='Burkina Faso'|africa1$`Country Name`=='Togo'|africa1$`Country Name`=='Kenya')

x8=subset(africa,africa1$`Country Name`=='Congo_DR'|africa1$`Country Name`=='Mozambique'|africa1$`Country Name`=='Zambia'
          |africa1$`Country Name`=='Botswana')
x9=subset(africa,africa1$`Country Name`=='Zimbabwe'|africa1$`Country Name`=='Sierra Leone')
         
x11=dplyr::select(x1,c(24:33))
x22=dplyr::select(x2,c(24:33))
x33=dplyr::select(x3,c(24:33))
x44=dplyr::select(x4,c(24:33))
x55=dplyr::select(x5,c(24:33))
x66=dplyr::select(x6,c(24:33))
x77=dplyr::select(x7,c(24:33))
x88=dplyr::select(x8,c(24:33))
x99=dplyr::select(x9,c(24:33))
#RowMEANS
x111=rowMeans(x11)
x222=rowMeans(x22)
x333=rowMeans(x33)
x444=rowMeans(x44)
x555=rowMeans(x55)
x666=rowMeans(x66)
x777=rowMeans(x77)
x888=rowMeans(x88)
x999=rowMeans(x99)
#colMeans
y111=colMeans(x11)
y222=colMeans(x22)
y333=colMeans(x33)
y444=colMeans(x44)
y555=colMeans(x55)
y666=colMeans(x66)
y777=colMeans(x77)
y888=colMeans(x88)
y999=colMeans(x99)
y9=as.data.frame(y999)
y8=as.data.frame(y888)
y7=as.data.frame(y777)
y6=as.data.frame(y666)
y5=as.data.frame(y555)
y4=as.data.frame(y444)
y3=as.data.frame(y333)
y2=as.data.frame(y222)
y1=as.data.frame(y111)
df1 <- cbind(Year = rownames(y1), y1)
df2 <- cbind(Year = rownames(y2), y2)
df3 <- cbind(Year = rownames(y3), y3)
df4 <- cbind(Year = rownames(y4), y4)
df5 <- cbind(Year = rownames(y5), y5)
df6 <- cbind(Year = rownames(y6), y6)
df7 <- cbind(Year = rownames(y7), y7)
df8 <- cbind(Year = rownames(y8), y8)
df9 <- cbind(Year = rownames(y9), y9)
par(mfrow=c(3,3))
plot(df1$Year,df1$y111,type='l',xlab = 'Year',
     col='red',ylab='GDP Growth',main='GROUP 1')
plot(df2$Year,df2$y222,type='l',xlab = 'Year',
     col='blue',ylab='GDP Growth',main='GROUP 2')
plot(df3$Year,df3$y333,type='l',xlab = 'Year',
     col='orange',ylab='GDP Growth',main='GROUP 3')
plot(df4$Year,df4$y444,type='l',xlab = 'Year',
    col='dark blue', ylab='GDP Growth',main='GROUP 4')
plot(df5$Year,df5$y555,type='l',xlab = 'Year',
     col='pink',ylab='GDP Growth',main='GROUP 5')
plot(df6$Year,df6$y666,type='l',xlab = 'Year',
     col='brown',ylab='GDP Growth',main='GROUP 6')
plot(df7$Year,df7$y777,type='l',xlab = 'Year',
     ylab='GDP Growth',main='GROUP 7')
plot(df8$Year,df8$y888,type='l',xlab = 'Year',
     col='green',ylab='GDP Growth',main='GROUP 8')
plot(df9$Year,df9$y999,type='l',xlab = 'Year',
     col='darkmagenta',ylab='GDP Growth',main='GROUP 9')

z9=as.data.frame(x999)
z8=as.data.frame(x888)
z7=as.data.frame(x777)
z6=as.data.frame(x666)
z5=as.data.frame(x555)
z4=as.data.frame(x444)
z3=as.data.frame(x333)
z2=as.data.frame(x222)
z1=as.data.frame(x111)
xf1 <- cbind(Year = rownames(z1), z1)
xf2 <- cbind(Year = rownames(z2), z2)
xf3 <- cbind(Year = rownames(z3), z3)
xf4 <- cbind(Year = rownames(z4), z4)
xf5 <- cbind(Year = rownames(z5), z5)
xf6 <- cbind(Year = rownames(z6), z6)
xf7 <- cbind(Year = rownames(z7), z7)
xf8 <- cbind(Year = rownames(z8), z8)
xf9 <- cbind(Year = rownames(z9), z9)
print(mean(xf1$x111))
## [1] 3.102627
mean(xf2$x222)
## [1] 2.159123
mean(xf3$x333)
## [1] 3.468698
mean(xf4$x444)
## [1] 3.929748
mean(xf5$x555)
## [1] 5.341074
mean(xf6$x666)
## [1] 8.283248
mean(xf7$x777)
## [1] 6.081943
mean(xf8$x888)
## [1] 5.359902
mean(xf9$x999)
## [1] 5.480693

Median

x10=subset(africa,africa1$`Country Name`=='Equatorial Guinea')
x100=dplyr::select(x10,c(24:33))
x100=as.data.frame(x100)
x101=t(x100)
x101=as.data.frame(x101)
xf12 <- cbind(Year = rownames(x101), x101)
plot(xf12$Year,xf12$V1,xlab = "Year",type='l',
     col='chocolate4',ylab = "GDP Growth",
     main ='GDP Growth of Equatorial Guinea')

mean(xf12$V1)
## [1] -3.333212