some libraries needed

library(magrittr)
library(polycor)
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
## 
##     polyserial
library(matrixcalc)
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
## 
##     isFALSE

read data

rm(list = ls())
dataPreparada="https://github.com/Estadistica-AnalisisPolitico/Sesion6/raw/main/idhdemo.csv"
idhdemo=read.csv(dataPreparada)
names(idhdemo)
##  [1] "country"          "hdiRanking"       "hdi"              "hdiLife"         
##  [5] "hdiSchoolExpec"   "hdiMeanEduc"      "hdiGni"           "ideRanking"      
##  [9] "ideRegime"        "ide"              "ideElectoral"     "ideFunctioning"  
## [13] "ideParticipation" "ideCulture"       "ideLiberties"
str(idhdemo)
## 'data.frame':    165 obs. of  15 variables:
##  $ country         : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ hdiRanking      : int  182 74 93 150 48 76 10 22 89 34 ...
##  $ hdi             : num  0.462 0.789 0.745 0.591 0.849 0.786 0.946 0.926 0.76 0.888 ...
##  $ hdiLife         : num  62.9 76.8 77.1 61.9 76.1 ...
##  $ hdiSchoolExpec  : num  10.7 14.5 15.5 12.2 19 ...
##  $ hdiMeanEduc     : num  2.51 10.12 6.99 5.84 11.14 ...
##  $ hdiGni          : num  1335 15293 10978 5328 22048 ...
##  $ ideRanking      : int  167 66 110 107 54 84 15 19 130 139 ...
##  $ ideRegime       : chr  "Authoritarian regime" "Flawed democracy" "Authoritarian regime" "Hybrid regime" ...
##  $ ide             : num  0.26 6.28 3.66 4.18 6.62 5.42 8.66 8.28 2.8 2.52 ...
##  $ ideElectoral    : num  0 7 3.08 4.5 9.17 7.92 10 9.58 0.5 0.42 ...
##  $ ideFunctioning  : num  0.07 6.07 2.5 3.21 5 4.64 8.57 7.5 2.5 2.71 ...
##  $ ideParticipation: num  0 5 3.89 5.56 7.22 6.11 7.22 8.89 3.33 3.33 ...
##  $ ideCulture      : num  1.25 6.25 5 5 3.75 3.13 7.5 6.88 5 4.38 ...
##  $ ideLiberties    : num  0 7.06 3.82 2.65 7.94 5.29 10 8.53 2.65 1.76 ...

data for factorial

dontselect=c("country","hdiRanking","hdi",
             "ideRanking","ideRegime","ide")
select=setdiff(names(idhdemo),dontselect) 
theData=idhdemo[,select]
head(theData,10)
##    hdiLife hdiSchoolExpec hdiMeanEduc    hdiGni ideElectoral ideFunctioning
## 1   62.879       10.70538    2.514790  1335.206         0.00           0.07
## 2   76.833       14.48747   10.121144 15293.327         7.00           6.07
## 3   77.129       15.48788    6.987444 10978.406         3.08           2.50
## 4   61.929       12.16760    5.844292  5327.788         4.50           3.21
## 5   76.064       18.97951   11.144080 22047.971         9.17           5.00
## 6   73.372       14.40561   11.330300 15388.300         7.92           4.64
## 7   83.579       21.08002   12.726820 49257.135        10.00           8.57
## 8   82.412       16.36746   12.305714 56529.663         9.58           7.50
## 9   73.488       12.71063   10.556130 15018.054         0.50           2.50
## 10  79.246       16.29968   11.046590 48731.446         0.42           2.71
##    ideParticipation ideCulture ideLiberties
## 1              0.00       1.25         0.00
## 2              5.00       6.25         7.06
## 3              3.89       5.00         3.82
## 4              5.56       5.00         2.65
## 5              7.22       3.75         7.94
## 6              6.11       3.13         5.29
## 7              7.22       7.50        10.00
## 8              8.89       6.88         8.53
## 9              3.33       5.00         2.65
## 10             3.33       4.38         1.76

correlations

corMatrix=polycor::hetcor(theData)$correlations

previous evaluations

KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = corMatrix)
## Overall MSA =  0.9
## MSA for each item = 
##          hdiLife   hdiSchoolExpec      hdiMeanEduc           hdiGni 
##             0.90             0.92             0.91             0.88 
##     ideElectoral   ideFunctioning ideParticipation       ideCulture 
##             0.84             0.93             0.96             0.87 
##     ideLiberties 
##             0.88
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

run factor analysis

resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")

see results

print(resfa$loadings)
## 
## Loadings:
##                  MR1   MR2  
## hdiLife          0.307 0.836
## hdiSchoolExpec   0.368 0.811
## hdiMeanEduc      0.316 0.782
## hdiGni           0.283 0.813
## ideElectoral     0.906 0.249
## ideFunctioning   0.801 0.470
## ideParticipation 0.768 0.331
## ideCulture       0.498 0.382
## ideLiberties     0.901 0.334
## 
##                  MR1   MR2
## SS loadings    3.522 3.280
## Proportion Var 0.391 0.364
## Cumulative Var 0.391 0.756
fa.diagram(resfa,main = "Resultados del EFA")

testing results

sort(resfa$communality) #¿Qué variables aportaron más a los factores?
##       ideCulture ideParticipation      hdiMeanEduc           hdiGni 
##        0.3941682        0.6993302        0.7119301        0.7405067 
##          hdiLife   hdiSchoolExpec   ideFunctioning     ideElectoral 
##        0.7933223        0.7936103        0.8629055        0.8835971 
##     ideLiberties 
##        0.9226321
sort(resfa$complexity) #¿Qué variables contribuyen a la construcción de más de un factor?
##     ideElectoral           hdiGni          hdiLife     ideLiberties 
##         1.150198         1.238563         1.265339         1.270410 
##      hdiMeanEduc ideParticipation   hdiSchoolExpec   ideFunctioning 
##         1.317113         1.358907         1.395523         1.614353 
##       ideCulture 
##         1.874466
resfa$TLI # ¿Tucker Lewis > 0.9? 
## [1] 0.9263416

PREPARE FOR REGRE see scores

resfa$scores
##                MR1         MR2
##   [1,] -1.68034008 -0.83207114
##   [2,]  0.42190255  0.17017645
##   [3,] -0.84997358  0.38447261
##   [4,] -0.33126116 -0.83527332
##   [5,]  0.61373479  0.55689007
##   [6,]  0.14046830  0.08167183
##   [7,]  1.02570884  1.51779769
##   [8,]  0.88103717  1.08945988
##   [9,] -1.29937276  0.44149238
##  [10,] -1.83242173  1.64245537
##  [11,]  0.31844737 -0.45412168
##  [12,] -1.82081368  0.79127982
##  [13,]  0.64419446  1.39022124
##  [14,]  0.16694597 -1.25237953
##  [15,]  0.32315566 -0.51282672
##  [16,]  0.00637766 -0.23230765
##  [17,]  0.07023144  0.02710679
##  [18,]  1.35156515 -0.85226593
##  [19,]  0.79590933 -0.19926374
##  [20,]  0.72857529 -0.06394465
##  [21,] -0.44112718 -1.43580407
##  [22,] -1.11960332 -1.02757486
##  [23,] -0.98973591 -0.28683067
##  [24,] -1.05515920 -0.43828325
##  [25,]  1.02809170  1.03254119
##  [26,]  1.36601176 -0.83504903
##  [27,] -0.92660701 -1.63788522
##  [28,] -0.85058018 -1.70714696
##  [29,]  1.10074591  0.49090905
##  [30,] -1.75324072  0.98181493
##  [31,]  0.89149677 -0.25215019
##  [32,] -0.68611998 -0.52775631
##  [33,]  1.39058539  0.04560051
##  [34,]  0.40424125  0.66961046
##  [35,] -1.34796274  0.76993161
##  [36,]  0.65938234  0.82403644
##  [37,]  0.92959429  0.67342523
##  [38,] -1.26521201 -0.91609497
##  [39,]  1.09624142  1.42337757
##  [40,] -0.84752864 -1.10397042
##  [41,]  0.83269487 -0.26618059
##  [42,]  0.31945248  0.03380673
##  [43,] -1.23959580  0.24370604
##  [44,]  0.08754890 -0.57562448
##  [45,] -1.41858602 -0.32833524
##  [46,] -1.43844169 -0.81564407
##  [47,]  0.95635314  0.72603920
##  [48,] -0.90652875 -0.54467311
##  [49,] -0.77178090 -0.86909663
##  [50,]  0.12193568 -0.20526226
##  [51,]  1.17845964  1.31639064
##  [52,]  0.85060877  0.95067281
##  [53,] -1.20082421 -0.11501435
##  [54,]  0.08388692 -1.31568124
##  [55,] -0.07550962  0.37877055
##  [56,]  1.04614268  1.22898977
##  [57,]  0.81037133 -1.16018427
##  [58,]  0.88180311  0.97570438
##  [59,]  0.32464021 -0.89745926
##  [60,] -0.91240495 -1.23329895
##  [61,] -0.69869116 -1.29860155
##  [62,]  0.66934453 -0.31331835
##  [63,] -0.72921268 -0.80520382
##  [64,]  0.42679596 -0.98709643
##  [65,] -0.53110948  1.83620281
##  [66,]  0.43564552  0.48096168
##  [67,]  1.19015878  1.44089595
##  [68,]  1.01117908 -0.81774982
##  [69,]  0.69240046 -0.36715512
##  [70,] -1.64011323  0.74063141
##  [71,] -1.00642518 -0.33713784
##  [72,]  0.91245412  1.74043238
##  [73,]  0.42565343  1.01075176
##  [74,]  0.56281540  0.99178963
##  [75,] -0.04724191 -1.35390947
##  [76,]  1.15298623 -0.64154921
##  [77,]  0.98435915  0.98932362
##  [78,] -0.93624247  0.22755893
##  [79,] -1.17866700  0.70225223
##  [80,]  0.07211009 -0.78474871
##  [81,] -1.14485678  1.26317399
##  [82,] -0.43718196 -0.11729826
##  [83,] -1.50791210 -0.26465704
##  [84,]  0.80853693  0.55650340
##  [85,] -0.67699074 -0.06929200
##  [86,]  0.96845703 -1.74864012
##  [87,]  0.61739872 -1.53294001
##  [88,] -1.60821323  0.39833519
##  [89,]  0.86802246  0.54127424
##  [90,]  1.10844177  1.16492380
##  [91,]  0.34811360 -1.37260980
##  [92,]  0.69213366 -1.28562507
##  [93,]  0.68999413  0.09447743
##  [94,] -0.53538920 -1.66944809
##  [95,]  0.73363037  0.96262662
##  [96,]  0.09007308 -1.27923062
##  [97,]  1.19187463 -0.04203392
##  [98,]  0.15252997  0.10314696
##  [99,]  0.55645050 -0.11646111
## [100,]  0.56582788 -0.20474328
## [101,]  0.64460702  0.39036249
## [102,] -0.19982650 -0.03549452
## [103,] -0.36582852 -1.24591778
## [104,] -1.88196390 -0.18598228
## [105,]  1.16225441 -1.32316529
## [106,]  0.22088559 -0.64168296
## [107,]  1.05161036  1.35430511
## [108,]  1.31958694  1.27220288
## [109,] -1.28723728  0.14980294
## [110,] -0.43770633 -1.59021551
## [111,]  0.18526936 -1.39531504
## [112,]  0.67802441 -0.19858766
## [113,]  1.16625850  1.57561068
## [114,] -1.15508917  0.81660202
## [115,] -0.18769735 -1.12736741
## [116,] -0.93037337  0.12674599
## [117,]  0.90761603  0.05775454
## [118,]  0.92365922 -1.19448337
## [119,]  0.84738911 -0.46055488
## [120,]  0.50880090 -0.09662182
## [121,]  0.90776879 -0.59654267
## [122,]  0.58393308  0.62081318
## [123,]  0.90296128  0.64806451
## [124,] -1.35888218  1.86397715
## [125,] -0.85789755 -0.39255596
## [126,]  0.68451204  0.19873132
## [127,] -1.60987764  0.90784741
## [128,] -0.67172313 -0.55253321
## [129,] -1.86400962  1.56576204
## [130,]  0.62997911 -1.35300459
## [131,]  0.63364308  0.10998868
## [132,]  0.23472276 -1.60108407
## [133,] -0.34169237  2.05198437
## [134,]  0.65819755  0.49790260
## [135,]  0.76665827  1.03062327
## [136,]  1.04324030 -0.52313894
## [137,]  0.93406100  1.07075094
## [138,]  0.82806821  0.99269003
## [139,]  0.22963133  0.12519213
## [140,] -1.41184347 -0.88452233
## [141,]  1.13805315 -0.87425890
## [142,]  1.07859340  1.47228748
## [143,]  0.97865206  1.49507107
## [144,] -1.69232803 -0.53922351
## [145,] -1.63969879  0.18148776
## [146,]  0.29593755 -1.14584429
## [147,]  0.29927451  0.39067918
## [148,]  1.09772120 -0.95649747
## [149,] -0.82941307 -0.61605772
## [150,]  0.84567513  0.07177427
## [151,]  0.02478126 -0.01607318
## [152,] -1.10663821  1.42920505
## [153,] -1.95052165  0.46723473
## [154,] -0.11580869 -0.86878372
## [155,] -0.16884568 -0.15748957
## [156,] -1.83594691  2.23546357
## [157,]  0.92596913  1.12762895
## [158,]  0.74853985  1.11710041
## [159,]  1.48400307  0.26012748
## [160,] -1.70970304  0.39503784
## [161,] -1.35736632  0.23791400
## [162,] -1.20266956  0.38847886
## [163,] -1.22944766 -1.15385709
## [164,]  0.54816162 -1.25718564
## [165,] -0.75561568 -0.67787241
idhdemo$ide_efa=resfa$scores[,1]
idhdemo$idh_efa=resfa$scores[,2]

NORMALIZING

efa_scores_norm=normalize(resfa$scores, 
                          method = "range", 
                          margin=2, # by column
                          range = c(0, 10))

idhdemo$ide_efa_norm=efa_scores_norm[,1]
idhdemo$idh_efa_norm=efa_scores_norm[,2]

REGRESSING

summary(lm(ide_efa_norm~idh_efa_norm,data = idhdemo))
## 
## Call:
## lm(formula = ide_efa_norm ~ idh_efa_norm, data = idhdemo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.7673 -2.4238  0.9237  2.3597  4.2718 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.34932    0.46289  11.556   <2e-16 ***
## idh_efa_norm  0.07515    0.09288   0.809     0.42    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.817 on 163 degrees of freedom
## Multiple R-squared:  0.004001,   Adjusted R-squared:  -0.00211 
## F-statistic: 0.6547 on 1 and 163 DF,  p-value: 0.4196
summary(lm(ide~hdi,data = idhdemo))
## 
## Call:
## lm(formula = ide ~ hdi, data = idhdemo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6195 -1.0091  0.4006  1.2527  3.3699 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.8224     0.6613  -2.756  0.00652 ** 
## hdi           9.7393     0.8923  10.915  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.817 on 163 degrees of freedom
## Multiple R-squared:  0.4223, Adjusted R-squared:  0.4187 
## F-statistic: 119.1 on 1 and 163 DF,  p-value: < 2.2e-16

CLUSTERING data

dataClus=theData
head(dataClus)
##   hdiLife hdiSchoolExpec hdiMeanEduc    hdiGni ideElectoral ideFunctioning
## 1  62.879       10.70538    2.514790  1335.206         0.00           0.07
## 2  76.833       14.48747   10.121144 15293.327         7.00           6.07
## 3  77.129       15.48788    6.987444 10978.406         3.08           2.50
## 4  61.929       12.16760    5.844292  5327.788         4.50           3.21
## 5  76.064       18.97951   11.144080 22047.971         9.17           5.00
## 6  73.372       14.40561   11.330300 15388.300         7.92           4.64
##   ideParticipation ideCulture ideLiberties
## 1             0.00       1.25         0.00
## 2             5.00       6.25         7.06
## 3             3.89       5.00         3.82
## 4             5.56       5.00         2.65
## 5             7.22       3.75         7.94
## 6             6.11       3.13         5.29
row.names(dataClus)=idhdemo$country

distances

library(cluster)
g.dist = daisy(dataClus, metric="gower")

suggestions

library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_nbclust(dataClus, pam,diss=g.dist,method = "gap_stat",k.max = 10,verbose = F)

K=3

pam

set.seed(123)
res.pam=pam(g.dist,k = K,cluster.only = F)
# nueva columna
dataClus$pam=res.pam$cluster
head(dataClus)
##             hdiLife hdiSchoolExpec hdiMeanEduc    hdiGni ideElectoral
## Afghanistan  62.879       10.70538    2.514790  1335.206         0.00
## Albania      76.833       14.48747   10.121144 15293.327         7.00
## Algeria      77.129       15.48788    6.987444 10978.406         3.08
## Angola       61.929       12.16760    5.844292  5327.788         4.50
## Argentina    76.064       18.97951   11.144080 22047.971         9.17
## Armenia      73.372       14.40561   11.330300 15388.300         7.92
##             ideFunctioning ideParticipation ideCulture ideLiberties pam
## Afghanistan           0.07             0.00       1.25         0.00   1
## Albania               6.07             5.00       6.25         7.06   2
## Algeria               2.50             3.89       5.00         3.82   1
## Angola                3.21             5.56       5.00         2.65   1
## Argentina             5.00             7.22       3.75         7.94   2
## Armenia               4.64             6.11       3.13         5.29   2

agnes

res.agnes<- hcut(g.dist, k = K,hc_func='agnes',hc_method = "ward.D")

dataClus$agnes=res.agnes$cluster

diana

res.diana <- hcut(g.dist, k = K,hc_func='diana')
dataClus$diana=res.diana$cluster 

Evaluate

fviz_silhouette(res.pam,print.summary = F)

fviz_silhouette(res.agnes,print.summary = F)

fviz_silhouette(res.diana,print.summary = F)

# set as working directory!!!
#knitr::spin('simpleCode_lastsessions.R', precious=TRUE)