Solución

library(openxlsx)
datafile='mundo.xlsx'
datat=read.xlsx(datafile)
head(datat)

##   codigopais                pais     accFT  ruralElec mujFueraEscuela
## 1        AFG         Afghanistan  17.71918  70.129379              NA
## 2        ALB             Albania  65.75154 100.000000         5.64351
## 3        DZA             Algeria  99.99000  99.248154              NA
## 4        AND             Andorra 100.00000 100.000000              NA
## 5        AGO              Angola  45.58080   4.386689              NA
## 6        ATG Antigua and Barbuda  99.99000  99.999336              NA
##    corruCont mujerFuerzaL polStabNoTerror
## 1 -1.6357235     17.63448      -2.5791519
## 2 -0.5250779     39.86474      -0.1914829
## 3 -0.5248498     19.20092      -1.2593676
## 4  1.3080720           NA       1.2782722
## 5 -1.3257300     45.23737      -0.2261823
## 6  1.3080720           NA       0.9049994

str(datat)

## 'data.frame':    201 obs. of  8 variables:
##  $ codigopais     : chr  "AFG" "ALB" "DZA" "AND" ...
##  $ pais           : chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ accFT          : num  17.7 65.8 100 100 45.6 ...
##  $ ruralElec      : num  70.13 100 99.25 100 4.39 ...
##  $ mujFueraEscuela: num  NA 5.64 NA NA NA ...
##  $ corruCont      : num  -1.636 -0.525 -0.525 1.308 -1.326 ...
##  $ mujerFuerzaL   : num  17.6 39.9 19.2 NA 45.2 ...
##  $ polStabNoTerror: num  -2.579 -0.191 -1.259 1.278 -0.226 ...

Previos: Si tuviera que prescindir de alguna de estas variables, Cuál sería la candidata más evidente pues complicaría en análisis estadístico?

summary(datat)

##   codigopais            pais               accFT          ruralElec     
##  Length:201         Length:201         Min.   :  2.00   Min.   :  0.70  
##  Class :character   Class :character   1st Qu.: 23.57   1st Qu.: 44.54  
##  Mode  :character   Mode  :character   Median : 80.55   Median : 97.57  
##                                        Mean   : 63.61   Mean   : 73.62  
##                                        3rd Qu.: 99.99   3rd Qu.:100.00  
##                                        Max.   :100.00   Max.   :100.00  
##                                        NA's   :13       NA's   :4       
##  mujFueraEscuela     corruCont        mujerFuerzaL   polStabNoTerror   
##  Min.   : 0.2612   Min.   :-1.7392   Min.   :12.48   Min.   :-3.13097  
##  1st Qu.: 3.0057   1st Qu.:-0.7600   1st Qu.:38.28   1st Qu.:-0.72194  
##  Median : 8.2741   Median :-0.2947   Median :44.16   Median : 0.02709  
##  Mean   :12.9939   Mean   :-0.0357   Mean   :40.99   Mean   :-0.05160  
##  3rd Qu.:15.5726   3rd Qu.: 0.6338   3rd Qu.:47.42   3rd Qu.: 0.79192  
##  Max.   :65.3875   Max.   : 2.3590   Max.   :54.54   Max.   : 1.64520  
##  NA's   :125       NA's   :2         NA's   :21      NA's   :1

En cuántos grupos de países se podría organizar esta data usando la data original entregada?

test=scale(datat[,-c(1,2)])
library(NbClust)
nb <- NbClust(test, method = "complete")

## Warning in pf(beale, pp, df2): NaNs produced

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 6 proposed 2 as the best number of clusters 
## * 1 proposed 3 as the best number of clusters 
## * 1 proposed 4 as the best number of clusters 
## * 6 proposed 5 as the best number of clusters 
## * 1 proposed 6 as the best number of clusters 
## * 1 proposed 7 as the best number of clusters 
## * 3 proposed 9 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 1 proposed 14 as the best number of clusters 
## * 2 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

En cuántos grupos de países se podría organizar esta data si cada valor perdido fuera reemplazado por la media de esa columna?

datasc=as.data.frame(lapply(datat[,-c(1,2)],function(x){replace(x,is.na(x),mean(x,na.rm = T))}))
test2=scale(datasc)
library(NbClust)
nb <- NbClust(test2, method = "complete")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 5 proposed 2 as the best number of clusters 
## * 8 proposed 3 as the best number of clusters 
## * 3 proposed 4 as the best number of clusters 
## * 3 proposed 5 as the best number of clusters 
## * 2 proposed 8 as the best number of clusters 
## * 2 proposed 12 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

Si a toda la data del archivo le llamamos MUNDO, qué código replicaría este gráfico?

library(openxlsx)
datafile='mundo.xlsx'
MUNDO=read.xlsx(datafile)
row.names(MUNDO)=MUNDO$pais
MUNDO=MUNDO[,-c(1,2)]
MUNDO_s=scale(MUNDO)
MUNDO_d=dist(MUNDO_s)
MUNDO_r=cmdscale(MUNDO_d,eig=TRUE, k=2) 
x=MUNDO_r$points[,1]
y=MUNDO_r$points[,2]
columnForLabels=row.names(MUNDO)
colorForLabels=as.factor(ifelse(MUNDO$polStabNoTerror>0,'si','no'))
paleta=c('red','green')
plot(x, y, main="Mi Grafico",type='n')
text(x, y,labels = columnForLabels, cex = 0.5,col = paleta[colorForLabels])
legend("bottomright", legend = levels(colorForLabels),fill = paleta,title = "terror positivo?",cex=0.5)

REALICEMOS ANALISIS DE COMPONENTES PRINCIPALES

IMPUTANDO

library(openxlsx)
datafile='mundo.xlsx'
MUNDO=read.xlsx(datafile)
subTable=MUNDO[,-c(1,2)]

for(i in 1:ncol(subTable)){  # para cada columna:
  MEDIA=mean(subTable[,i], na.rm = TRUE) # calcula la mediana de esa columna
  subTable[is.na(subTable[,i]), i] <- round(MEDIA,0) # pon la mediana donde haya un NA en esa columna (redondeada)
}

DATA SUFICIENTE?

library(psych)
matrizCor <- cor(subTable)
KMO(matrizCor)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = matrizCor)
## Overall MSA =  0.7
## MSA for each item = 
##           accFT       ruralElec mujFueraEscuela       corruCont 
##            0.68            0.67            0.91            0.73 
##    mujerFuerzaL polStabNoTerror 
##            0.59            0.69

TENEMOS MATRIZ IDENTIDAD?

cortest.bartlett(matrizCor, n=nrow(subTable))

## $chisq
## [1] 534.425
## 
## $p.value
## [1] 2.911896e-104
## 
## $df
## [1] 15

HABRA DOS LATENTES? CUANTA VARIANZA EXPLICAN?

resultadoPr=principal(matrizCor,2,rotate="varimax", scores=T)
print(resultadoPr,digits=3,cut = 0.4)

## Principal Components Analysis
## Call: principal(r = matrizCor, nfactors = 2, rotate = "varimax", scores = T)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                    RC1    RC2    h2    u2  com
## accFT            0.918        0.843 0.157 1.00
## ruralElec        0.911        0.841 0.159 1.03
## mujFueraEscuela -0.507        0.270 0.730 1.11
## corruCont        0.664  0.567 0.762 0.238 1.95
## mujerFuerzaL            0.831 0.763 0.237 1.21
## polStabNoTerror  0.524  0.703 0.768 0.232 1.85
## 
##                         RC1   RC2
## SS loadings           2.717 1.531
## Proportion Var        0.453 0.255
## Cumulative Var        0.453 0.708
## Proportion Explained  0.640 0.360
## Cumulative Proportion 0.640 1.000
## 
## Mean item complexity =  1.4
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.098 
## 
## Fit based upon off diagonal values = 0.944

library(openxlsx)
datafile='mundo.xlsx'
datat=read.xlsx(datafile)
etiquetas1=c('bajo','medio','alto')
datat$mujerFuerzaLO=cut(datat$mujerFuerzaL,
                           breaks=3,
                           labels=etiquetas1,
                           ordered_result = T)

etiquetas2=c('malo','regular','bueno')
datat$polStabNoTerrorO =cut(datat$polStabNoTerror,
                           breaks=3,
                           labels=etiquetas2,
                           ordered_result = T)

summary(datat)

##   codigopais            pais               accFT          ruralElec     
##  Length:201         Length:201         Min.   :  2.00   Min.   :  0.70  
##  Class :character   Class :character   1st Qu.: 23.57   1st Qu.: 44.54  
##  Mode  :character   Mode  :character   Median : 80.55   Median : 97.57  
##                                        Mean   : 63.61   Mean   : 73.62  
##                                        3rd Qu.: 99.99   3rd Qu.:100.00  
##                                        Max.   :100.00   Max.   :100.00  
##                                        NA's   :13       NA's   :4       
##  mujFueraEscuela     corruCont        mujerFuerzaL   polStabNoTerror   
##  Min.   : 0.2612   Min.   :-1.7392   Min.   :12.48   Min.   :-3.13097  
##  1st Qu.: 3.0057   1st Qu.:-0.7600   1st Qu.:38.28   1st Qu.:-0.72194  
##  Median : 8.2741   Median :-0.2947   Median :44.16   Median : 0.02709  
##  Mean   :12.9939   Mean   :-0.0357   Mean   :40.99   Mean   :-0.05160  
##  3rd Qu.:15.5726   3rd Qu.: 0.6338   3rd Qu.:47.42   3rd Qu.: 0.79192  
##  Max.   :65.3875   Max.   : 2.3590   Max.   :54.54   Max.   : 1.64520  
##  NA's   :125       NA's   :2         NA's   :21      NA's   :1         
##  mujerFuerzaLO polStabNoTerrorO
##  bajo : 20     malo   :18      
##  medio: 44     regular:84      
##  alto :116     bueno  :98      
##  NA's : 21     NA's   : 1      
##                                
##                                
##

tablaTE=table(datat$mujerFuerzaLO,datat$polStabNoTerrorO)

chisq.test(tablaTE,simulate.p.value = T)

## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  tablaTE
## X-squared = 29.765, df = NA, p-value = 0.0004998

library(ca)
tablaCA_te=ca(tablaTE)
plot.ca(tablaCA_te, col=c("red","blue"))