r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
Call the packages
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("cluster")
Import the Data
africa=readxl::read_excel("africa.xlsx",sheet="africa")
cpi=readxl::read_excel("cpiafrica.xlsx",sheet="cpi")
head(africa)
## # A tibble: 6 x 33
## ID `Country Name` `Country Code` `1990` `1991` `1992` `1993` `1994` `1995`
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Angola AGO -3.45 0.991 -5.84 -24.0 1.34 15.0
## 2 2 Burundi BDI 3.50 5.00 1.01 -6.24 -3.83 -7.92
## 3 3 Benin BEN 8.98 4.23 2.96 5.84 2.02 6.05
## 4 4 Burkina Faso BFA -0.603 9.07 0.233 3.46 1.32 5.72
## 5 5 Botswana BWA 6.77 7.46 2.92 1.92 3.63 7.03
## 6 6 Central Afric… CAF -2.15 -0.553 -6.42 0.335 4.90 7.20
## # … with 24 more variables: `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
## # `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>,
## # `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>,
## # `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>,
## # `2014` <dbl>, `2015` <dbl>, `2016` <dbl>, `2017` <dbl>, `2018` <dbl>,
## # `2019` <dbl>
head(cpi)
## # A tibble: 6 x 6
## Country `2015` `2016` `2017` `2018` `2019`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Algeria 173. 184. 194. 202. 206.
## 2 Angola 106. 138. 179. 214. 251.
## 3 Benin 100. 99.4 101. 102. 101.
## 4 Botswana 90.0 92.5 95.6 98.7 101.
## 5 Burkina Faso 102. 102. 104. 106. 102.
## 6 Burundi 93.8 99.0 115. 112. 111.
LAST 10 YEARS
africa2=dplyr::select(africa,c(2,24:33))
#afr2=africa2[!duplicated(africa1$`Country Name`), ]
afr3=as.data.frame(africa2)
afr3 <- data.frame(afr3[,-1], row.names = afr3[,1])
afr33=scale(afr3)
Work on the data to make it perfect for the clustering
#Africa GDP_GROWTH
africa1=dplyr::select(africa,c(2,4:33))
afr=africa1[!duplicated(africa1$`Country Name`), ]
afr=as.data.frame(afr)
afr1 <- data.frame(afr[,-1], row.names = afr[,1])
#CPI
cpi1=cpi[!duplicated(cpi$Country), ]
cpi1=as.data.frame(cpi1)
cpi2 <- data.frame(cpi1[,-1], row.names = cpi1[,1])
#See the data
afr11=scale(afr1)
cpi22=scale(cpi2)
head(afr1)
## X1990 X1991 X1992 X1993 X1994
## Angola -3.4500987 0.9913593 -5.8382807 -23.9834174 1.339363
## Burundi 3.4998221 4.9968364 1.0099984 -6.2400000 -3.830000
## Benin 8.9761344 4.2257994 2.9577108 5.8361721 2.020400
## Burkina Faso -0.6029285 9.0699845 0.2327108 3.4613849 1.315007
## Botswana 6.7728219 7.4587091 2.9170703 1.9161071 3.627916
## Central African_R -2.1475281 -0.5525430 -6.4240782 0.3352757 4.899949
## X1995 X1996 X1997 X1998 X1999 X2000
## Angola 15.000000 13.544370 7.274277 4.6911465 2.181490 3.0546242
## Burundi -7.920000 -8.000000 -1.590000 4.7500000 -1.010000 -0.8568641
## Benin 6.045199 4.324284 5.734688 3.9610121 5.341449 5.8577142
## Burkina Faso 5.716374 11.014744 6.316835 7.3077196 7.404179 1.8884739
## Botswana 7.030410 5.829800 8.325891 0.4436635 9.667241 1.9876959
## Central African_R 7.200046 -4.000093 5.299936 4.7001424 3.599975 -2.4894324
## X2001 X2002 X2003 X2004 X2005 X2006
## Angola 4.2059986 13.665687 2.989850 10.952862 15.0289153 11.547683
## Burundi 2.0558071 4.446519 -1.223728 4.833658 0.9000000 5.413807
## Benin 5.3331357 4.643031 3.443577 4.429685 1.7131646 3.943739
## Burkina Faso 6.6134057 4.352964 7.802494 4.478452 8.6618732 6.253165
## Botswana 0.2505739 6.069531 4.625895 2.705822 4.5566457 8.363871
## Central African_R 4.4647390 3.616542 -5.397485 5.994884 0.9082105 4.771085
## X2007 X2008 X2009 X2010 X2011 X2012
## Angola 14.010018 11.166138 0.8587126 4.859198 3.471981 8.542147
## Burundi 3.451952 4.861713 3.8127469 5.124163 4.032602 4.446708
## Benin 5.986349 4.896577 2.3192921 2.114065 2.963753 4.811223
## Burkina Faso 4.111379 5.799992 2.9619509 8.446282 6.622563 6.452672
## Botswana 8.276764 6.245437 -7.6523102 8.563632 6.048316 4.456167
## Central African_R 4.607535 2.054131 8.5872604 4.630818 4.194615 5.053761
## X2013 X2014 X2015 X2016 X2017
## Angola 4.954590 4.82262555 0.9435756 -2.580097 -0.1472074
## Burundi 4.924195 4.24065164 -3.9000031 -0.600020 0.5000100
## Benin 7.191434 6.35767910 1.7781511 3.339673 5.6715555
## Burkina Faso 5.792606 4.32684561 3.9125572 5.958692 6.1571046
## Botswana 11.343424 4.14928989 -1.6979656 4.303737 2.9040063
## Central African_R -36.391977 0.08107052 4.3371210 4.750317 4.5272782
## X2018 X2019
## Angola -2.003577 -0.8693938
## Burundi 1.609933 1.8424767
## Benin 6.697259 6.8656873
## Burkina Faso 6.819617 5.6885990
## Botswana 4.478823 2.9661359
## Central African_R 3.789444 2.9702756
head(cpi2)
## X2015 X2016 X2017 X2018 X2019
## Algeria 172.65333 183.69917 193.97000 202.25250 206.2000
## Angola 105.54000 137.93583 179.10083 214.25583 250.8533
## Benin 100.21879 99.42300 101.18220 102.04104 101.1067
## Botswana 89.96896 92.50155 95.56176 98.65607 101.3917
## Burkina Faso 101.65333 102.10167 103.61583 105.64250 102.2267
## Burundi 93.76674 98.97800 114.86648 111.63333 110.8667
Find the possible estimate of cluster
factoextra::fviz_nbclust(afr33,kmeans,method="wss")+
geom_vline(xintercept = 4,linetype=2)
factoextra::fviz_nbclust(cpi22,kmeans,method="wss")+
geom_vline(xintercept = 4,linetype=2)
Preparing the Clustering and the Visualization
#Compute k-means with k = 4
set.seed(123)
afr.res <- kmeans(afr33, 4, nstart = 25)
cpi.res <- kmeans(cpi22, 4, nstart = 25)
CPI Vizualization
fviz_cluster(cpi.res, data = cpi22,
palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
ellipse.type = "euclid",
star.plot = TRUE,
repel = TRUE,
ggtheme = theme_minimal(),main ="Africa Countries Classification based on Annual CPI"
)
## Too few points to calculate an ellipse
GROWTH Vizualization
fviz_cluster(afr.res, data = afr33,
palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
ellipse.type = "euclid",
star.plot = TRUE,
repel = TRUE,
ggtheme = theme_minimal(),main ="Africa Countries Classification based on Annual GDP GROWTH"
)
## Too few points to calculate an ellipse
K-Medoids Classification
factoextra::fviz_nbclust(afr33,pam,method="silhouette")+
theme_classic()
factoextra::fviz_nbclust(cpi22,pam,method="silhouette")+
theme_classic()
Visualization K.Medoids
af.res=pam(afr33,2)
cp.res=pam(cpi22,2)
#GROWTH
fviz_cluster(af.res,
palette = c("#00AFBB", "#FC4E07"),
ellipse.type = "t",
repel = TRUE,
ggtheme = theme_classic(),main ="Africa Countries Classification based on Annual GDP GROWTH"
)
#CPI
fviz_cluster(cp.res,
palette = c("#00AFBB", "#FC4E07"),
ellipse.type = "t",
repel = TRUE,
t,main ="Africa Countries Classification based on Annual CPI"
)
## Too few points to calculate an ellipse
HIERARCHICAL CLUSTERING
#Compute the dissimilarity matrix
afr.dist <- dist(afr33, method = "euclidean")
cpi.dist <- dist(cpi22, method = "euclidean")
afr.hc <- hclust(d = afr.dist, method = "ward.D2")
cpi.hc <- hclust(d = cpi.dist, method = "ward.D2")
fviz_dend(afr.hc, cex = 0.5,main ="Africa Countries Classification based on Annual GDP GROWTH")
fviz_dend(cpi.hc, cex = 0.5,main ="Africa Countries Classification based on Annual CPI")
#GROWTH
fviz_dend(afr.hc, k = 12, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE, # Add rectangle around groups
main ="Africa Countries Classification based on Annual GDP GROWTH")
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled
#CPI
fviz_dend(cpi.hc, k = 12, # Cut in four groups
cex = 0.5, # label size
k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),
color_labels_by_k = TRUE, # color labels by groups
rect = TRUE, # Add rectangle around groups
main ="Africa Countries Classification based on Annual CPI")
## Warning in get_col(col, k): Length of color vector was shorter than the number
## of clusters - color vector was recycled
LAST 10 YEARS
# Compute distances and hierarchical clustering
dd <- dist(scale(afr3), method = "euclidean")
ahc <- hclust(dd, method = "ward.D2")
dd1 <- dist(scale(cpi2), method = "euclidean")
cphc <- hclust(dd1, method = "ward.D2")
require("igraph")
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
fviz_dend(ahc, k = 4, k_colors = "jco",
type = "phylogenic", repel = TRUE,
main ="Africa Countries Classification based on Annual GDP GROWTH")
fviz_dend(cphc, k = 4, k_colors = "jco",main ="Africa Countries Classification based on Annual CPI",
type = "phylogenic", repel = TRUE
)
x1=subset(africa,africa1$`Country Name`=='Chad'|africa1$`Country Name`=='Angola'|
africa1$`Country Name`=='Gabon'|africa1$`Country Name`=='Burundi'|
africa1$`Country Name`=='Namibia'|africa1$`Country Name`=='Nigeria'|
africa1$`Country Name`=='Congo,Rep.')
x2=subset(africa,africa1$`Country Name`=='Sudan'|africa1$`Country Name`=='Lesotho'|
africa1$`Country Name`=='South Africa'|africa1$`Country Name`=='Eswatini'|
africa1$`Country Name`=='Tunisia')
x3=subset(africa,africa1$`Country Name`=='Guinea-Bissau'|africa1$`Country Name`=='Cabo Verde'|
africa1$`Country Name`=='Madagascar'|africa1$`Country Name`=='Egypt')
x4=subset(africa,africa1$`Country Name`=='Mauritania'|africa1$`Country Name`=='Malawi'|
africa1$`Country Name`=='Morocco'|africa1$`Country Name`=='Cameroon'
|africa1$`Country Name`=='Mauritius')
x5=subset(africa,africa1$`Country Name`=="Cote d'Ivoire"|africa1$`Country Name`=='Guinea'|
africa1$`Country Name`=='Mali'|africa1$`Country Name`=='Senegal'|
africa1$`Country Name`=='Benin')
x6=subset(africa,africa1$`Country Name`=='Ethiopia'|africa1$`Country Name`=='Ghana')
x7=subset(africa,africa1$`Country Name`=='Rwanda'|africa1$`Country Name`=='Niger'
|africa1$`Country Name`=='Uganda'|africa1$`Country Name`=='Tanzania'
|africa1$`Country Name`=='Burkina Faso'|africa1$`Country Name`=='Togo'|africa1$`Country Name`=='Kenya')
x8=subset(africa,africa1$`Country Name`=='Congo_DR'|africa1$`Country Name`=='Mozambique'|africa1$`Country Name`=='Zambia'
|africa1$`Country Name`=='Botswana')
x9=subset(africa,africa1$`Country Name`=='Zimbabwe'|africa1$`Country Name`=='Sierra Leone')
x11=dplyr::select(x1,c(24:33))
x22=dplyr::select(x2,c(24:33))
x33=dplyr::select(x3,c(24:33))
x44=dplyr::select(x4,c(24:33))
x55=dplyr::select(x5,c(24:33))
x66=dplyr::select(x6,c(24:33))
x77=dplyr::select(x7,c(24:33))
x88=dplyr::select(x8,c(24:33))
x99=dplyr::select(x9,c(24:33))
#RowMEANS
x111=rowMeans(x11)
x222=rowMeans(x22)
x333=rowMeans(x33)
x444=rowMeans(x44)
x555=rowMeans(x55)
x666=rowMeans(x66)
x777=rowMeans(x77)
x888=rowMeans(x88)
x999=rowMeans(x99)
#colMeans
y111=colMeans(x11)
y222=colMeans(x22)
y333=colMeans(x33)
y444=colMeans(x44)
y555=colMeans(x55)
y666=colMeans(x66)
y777=colMeans(x77)
y888=colMeans(x88)
y999=colMeans(x99)
y9=as.data.frame(y999)
y8=as.data.frame(y888)
y7=as.data.frame(y777)
y6=as.data.frame(y666)
y5=as.data.frame(y555)
y4=as.data.frame(y444)
y3=as.data.frame(y333)
y2=as.data.frame(y222)
y1=as.data.frame(y111)
df1 <- cbind(Year = rownames(y1), y1)
df2 <- cbind(Year = rownames(y2), y2)
df3 <- cbind(Year = rownames(y3), y3)
df4 <- cbind(Year = rownames(y4), y4)
df5 <- cbind(Year = rownames(y5), y5)
df6 <- cbind(Year = rownames(y6), y6)
df7 <- cbind(Year = rownames(y7), y7)
df8 <- cbind(Year = rownames(y8), y8)
df9 <- cbind(Year = rownames(y9), y9)
par(mfrow=c(3,3))
plot(df1$Year,df1$y111,type='l',xlab = 'Year',
col='red',ylab='GDP Growth',main='GROUP 1')
plot(df2$Year,df2$y222,type='l',xlab = 'Year',
col='blue',ylab='GDP Growth',main='GROUP 2')
plot(df3$Year,df3$y333,type='l',xlab = 'Year',
col='orange',ylab='GDP Growth',main='GROUP 3')
plot(df4$Year,df4$y444,type='l',xlab = 'Year',
col='dark blue', ylab='GDP Growth',main='GROUP 4')
plot(df5$Year,df5$y555,type='l',xlab = 'Year',
col='pink',ylab='GDP Growth',main='GROUP 5')
plot(df6$Year,df6$y666,type='l',xlab = 'Year',
col='brown',ylab='GDP Growth',main='GROUP 6')
plot(df7$Year,df7$y777,type='l',xlab = 'Year',
ylab='GDP Growth',main='GROUP 7')
plot(df8$Year,df8$y888,type='l',xlab = 'Year',
col='green',ylab='GDP Growth',main='GROUP 8')
plot(df9$Year,df9$y999,type='l',xlab = 'Year',
col='darkmagenta',ylab='GDP Growth',main='GROUP 9')
z9=as.data.frame(x999)
z8=as.data.frame(x888)
z7=as.data.frame(x777)
z6=as.data.frame(x666)
z5=as.data.frame(x555)
z4=as.data.frame(x444)
z3=as.data.frame(x333)
z2=as.data.frame(x222)
z1=as.data.frame(x111)
xf1 <- cbind(Year = rownames(z1), z1)
xf2 <- cbind(Year = rownames(z2), z2)
xf3 <- cbind(Year = rownames(z3), z3)
xf4 <- cbind(Year = rownames(z4), z4)
xf5 <- cbind(Year = rownames(z5), z5)
xf6 <- cbind(Year = rownames(z6), z6)
xf7 <- cbind(Year = rownames(z7), z7)
xf8 <- cbind(Year = rownames(z8), z8)
xf9 <- cbind(Year = rownames(z9), z9)
print(mean(xf1$x111))
## [1] 3.102627
mean(xf2$x222)
## [1] 2.159123
mean(xf3$x333)
## [1] 3.468698
mean(xf4$x444)
## [1] 3.929748
mean(xf5$x555)
## [1] 5.341074
mean(xf6$x666)
## [1] 8.283248
mean(xf7$x777)
## [1] 6.081943
mean(xf8$x888)
## [1] 5.359902
mean(xf9$x999)
## [1] 5.480693
Median
x10=subset(africa,africa1$`Country Name`=='Equatorial Guinea')
x100=dplyr::select(x10,c(24:33))
x100=as.data.frame(x100)
x101=t(x100)
x101=as.data.frame(x101)
xf12 <- cbind(Year = rownames(x101), x101)
plot(xf12$Year,xf12$V1,xlab = "Year",type='l',
col='chocolate4',ylab = "GDP Growth",
main ='GDP Growth of Equatorial Guinea')
mean(xf12$V1)
## [1] -3.333212