Untitled

YOHANA AZUCENA PAZ ARGUETA

22 de octubre de 2019

library(readxl)
datos <- read_excel("C:/Users/74/Desktop/turistas.xlsx")
##Quitar la columna de Nacionalidad
datos2<-datos
head(datos2)
## # A tibble: 6 x 4
##   `numero de noches` `numero de visitas anter~ `gasto noche por pers~  edad
##                <dbl>                     <dbl>                  <dbl> <dbl>
## 1                 14                         0                   76.6  42  
## 2                  7                         0                   35.7  33  
## 3                  7                         1                   46.2  35  
## 4                 14                         2                   37.6  39  
## 5                  7                         0                   85.9  31  
## 6                  7                         0                   41.8  24.5

cambiando nombre a las variables

names(datos2)
## [1] "numero de noches"             "numero de visitas anteriores"
## [3] "gasto noche por persona"      "edad"
names (datos2) = c("X1", "X2", "X3", "X4")
names (datos2)
## [1] "X1" "X2" "X3" "X4"

Matriz de correlacion

library(Hmisc)
library(stargazer)
Mat_R<-rcorr(as.matrix(datos2))
stargazer(Mat_R$r,type = "text")
## 
## ==============================
##      X1     X2     X3     X4  
## ------------------------------
## X1   1    0.702  -0.508 0.509 
## X2 0.702    1    -0.170 0.887 
## X3 -0.508 -0.170   1    -0.016
## X4 0.509  0.887  -0.016   1   
## ------------------------------

Graficos

library(PerformanceAnalytics)
chart.Correlation(as.matrix(datos2),histogram = TRUE,pch=19,)

library(corrplot)
library(grDevices)
corrplot(Mat_R$r,p.mat = Mat_R$r,type="lower",order="hclust",tl.col="black",tl.srt = 45,pch.col = "red",insig = "p-value", sig.level = -1,col = terrain.colors(100))

Prueba de Barlet

library(psych)
options(scipen = 999)
cortest.bartlett(datos2)
## $chisq
## [1] 32.77112
## 
## $p.value
## [1] 0.0000116044
## 
## $df
## [1] 6

KMO

library(rela)
KMO<-paf(as.matrix(datos2))$KMO
print(KMO)
## [1] 0.57187

Normalizacion de los datos

datos_normalq<- scale(datos2)
datos_normalq
##             X1       X2       X3        X4
##  [1,]  0.30629 -0.28016  0.37060  0.598430
##  [2,] -0.54004 -0.28016 -0.88500 -0.156599
##  [3,] -0.54004 -0.20234 -0.56383  0.011186
##  [4,]  0.30629 -0.12452 -0.82916  0.346754
##  [5,] -0.54004 -0.28016  0.65406 -0.324383
##  [6,] -0.54004 -0.28016 -0.69963 -0.869682
##  [7,]  2.48257  3.61098 -0.54076  3.199086
##  [8,] -0.54004 -0.28016  0.32446  0.262862
##  [9,]  0.30629 -0.28016 -1.40108 -0.743843
## [10,] -0.54004 -0.28016  0.25853 -0.576059
## [11,]  2.11986 -0.20234 -1.39315 -1.037466
## [12,] -0.54004 -0.28016  0.75294 -0.072707
## [13,] -0.66094 -0.28016  1.63958 -0.408275
## [14,] -0.54004 -0.28016  1.81177 -0.408275
## [15,] -0.54004 -0.28016  0.50066  0.178970
## attr(,"scaled:center")
##     X1     X2     X3     X4 
## 11.467  3.600 64.562 34.867 
## attr(,"scaled:scale")
##     X1     X2     X3     X4 
##  8.271 12.850 32.561 11.920

Analisis de componente principal

ACP4<-prcomp(datos_normalq)
ACP4
## Standard deviations (1, .., p=4):
## [1] 1.58460 1.05928 0.53878 0.27688
## 
## Rotation (n x k) = (4 x 4):
##         PC1      PC2       PC3      PC4
## X1 -0.54267 -0.29142 -0.742188 -0.26408
## X2 -0.59515  0.23974  0.069214  0.76389
## X3  0.25507  0.82437 -0.505126 -0.01423
## X4 -0.53502  0.42192  0.434987 -0.58866

ademir

modeloPC<-princomp(as.matrix(datos_normalq))
summary(modeloPC)
## Importance of components:
##                         Comp.1  Comp.2   Comp.3   Comp.4
## Standard deviation     1.53087 1.02337 0.520514 0.267489
## Proportion of Variance 0.62774 0.28052 0.072572 0.019165
## Cumulative Proportion  0.62774 0.90826 0.980835 1.000000

ademir usando PRCOMP

modeloPC<-prcomp(as.matrix(datos_normalq))
summary(modeloPC)
## Importance of components:
##                          PC1   PC2    PC3    PC4
## Standard deviation     1.585 1.059 0.5388 0.2769
## Proportion of Variance 0.628 0.281 0.0726 0.0192
## Cumulative Proportion  0.628 0.908 0.9808 1.0000

matriz de cargas

library(Hmisc)
library(readr)
library(stargazer)
Mat_R<-rcorr(as.matrix(datos2))
descomposicion<-eigen(Mat_R$r)
stargazer(descomposicion$values,type = "text")
## 
## =======================
## 2.511 1.122 0.290 0.077
## -----------------------
stargazer(descomposicion$vectors,type = "text")
## 
## ===========================
## -0.543 0.291  0.742  0.264 
## -0.595 -0.240 -0.069 -0.764
## 0.255  -0.824 0.505  0.014 
## -0.535 -0.422 -0.435 0.589 
## ---------------------------

solucion con 3 factores

library(psych)
modelo_3<-principal(r = datos2,nfactors = 3,covar = FALSE,rotate = "none")
modelo_3
## Principal Components Analysis
## Call: principal(r = datos2, nfactors = 3, rotate = "none", covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   PC3   h2       u2 com
## X1  0.86 -0.31  0.40 0.99 0.005346 1.7
## X2  0.94  0.25 -0.04 0.96 0.044734 1.1
## X3 -0.40  0.87  0.27 1.00 0.000016 1.6
## X4  0.85  0.45 -0.23 0.97 0.026565 1.7
## 
##                        PC1  PC2  PC3
## SS loadings           2.51 1.12 0.29
## Proportion Var        0.63 0.28 0.07
## Cumulative Var        0.63 0.91 0.98
## Proportion Explained  0.64 0.29 0.07
## Cumulative Proportion 0.64 0.93 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.02 
##  with the empirical chi square  0.05  with prob <  NA 
## 
## Fit based upon off diagonal values = 1

solucion con 4 factores

library(psych)
modelo_4<-principal(r = datos2,nfactors = 4,covar = FALSE,rotate = "none")
modelo_4
## Principal Components Analysis
## Call: principal(r = datos2, nfactors = 4, rotate = "none", covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   PC3   PC4 h2                  u2 com
## X1  0.86 -0.31  0.40  0.07  1 0.00000000000000011 1.7
## X2  0.94  0.25 -0.04 -0.21  1 0.00000000000000044 1.3
## X3 -0.40  0.87  0.27  0.00  1 0.00000000000000067 1.6
## X4  0.85  0.45 -0.23  0.16  1 0.00000000000000178 1.8
## 
##                        PC1  PC2  PC3  PC4
## SS loadings           2.51 1.12 0.29 0.08
## Proportion Var        0.63 0.28 0.07 0.02
## Cumulative Var        0.63 0.91 0.98 1.00
## Proportion Explained  0.63 0.28 0.07 0.02
## Cumulative Proportion 0.63 0.91 0.98 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1

solucion Rotada

library(psych)
modelo_5<-principal(r = datos2,nfactors = 4,covar = FALSE,rotate = "varimax")
modelo_5$loadings
## 
## Loadings:
##    RC1    RC2    RC3    RC4   
## X1  0.364 -0.320  0.875       
## X2  0.865         0.406  0.285
## X3         0.975 -0.224       
## X4  0.975         0.191 -0.110
## 
##                  RC1   RC2   RC3   RC4
## SS loadings    1.831 1.059 1.016 0.094
## Proportion Var 0.458 0.265 0.254 0.024
## Cumulative Var 0.458 0.722 0.976 1.000

en la solucion Quedan dentro del factor 1: numero de visitas anteriores, edad.

Método CRITIC

Normalización de datos y cálculos

FUNCIONES PARA NORMALIZAR LOS DATOS

norm_directa <- function(x){
return((x-min(x)) / (max(x)-min(x)))
}
norm_inverza <- function(x){
return((max(x)-x) / (max(x)-min(x)))
}

NORMALIZACION DE LOS DATOS

library(dplyr)
datos2 %>% dplyr::select(`X2`,X4) %>% dplyr::transmute(X4=norm_directa(X4), `X2`=norm_inverza(`X2`)) ->data_factor_1
print(data_factor_1)
## # A tibble: 15 x 2
##        X4    X2
##     <dbl> <dbl>
##  1 0.386   1   
##  2 0.208   1   
##  3 0.248   0.98
##  4 0.327   0.96
##  5 0.168   1   
##  6 0.0396  1   
##  7 1       0   
##  8 0.307   1   
##  9 0.0693  1   
## 10 0.109   1   
## 11 0       0.98
## 12 0.228   1   
## 13 0.149   1   
## 14 0.149   1   
## 15 0.287   1

Cálculo de las desviaciones estándar de cada variable

data_factor_1 %>% dplyr::summarise(S=sd(`X2`),Y=sd(X4))-> sd_vector
print(sd_vector)
## # A tibble: 1 x 2
##       S     Y
##   <dbl> <dbl>
## 1 0.257 0.236

Cálculo de la matriz de correlación

cor(data_factor_1)->mat_R_F1
print(mat_R_F1)
##         X4      X2
## X4  1.0000 -0.8873
## X2 -0.8873  1.0000

Cálculo de los ponderadores brutos

1-mat_R_F1->sum_data
colSums(sum_data)->sum_vector
sd_vector*sum_vector->vj
print(vj)
##         S       Y
## 1 0.48502 0.44548

Cálculo de los ponderadores netos

vj/sum(vj)->wj
print(wj)
##         S       Y
## 1 0.52125 0.47875

Ponderadores

print(round(wj*100,2))
##       S     Y
## 1 52.12 47.88

METODO ENTROPIA

NORMALIZACION DE LOS DATOS

datos2 %>% dplyr::select(X2,X4)->data_norm
apply(data_norm,2,prop.table)->data_norm
print(data_norm)
##             X2       X4
##  [1,] 0.000000 0.080306
##  [2,] 0.000000 0.063098
##  [3,] 0.018519 0.066922
##  [4,] 0.037037 0.074570
##  [5,] 0.000000 0.059273
##  [6,] 0.000000 0.046845
##  [7,] 0.925926 0.139579
##  [8,] 0.000000 0.072658
##  [9,] 0.000000 0.049713
## [10,] 0.000000 0.053537
## [11,] 0.018519 0.043021
## [12,] 0.000000 0.065010
## [13,] 0.000000 0.057361
## [14,] 0.000000 0.057361
## [15,] 0.000000 0.070746

FORMULA DE ENTROPIA

entropy<-function(x){
  return(x*log(x))
}
apply(data_norm,2,entropy)->data_norm_2
print(data_norm_2)
##             X2       X4
##  [1,]      NaN -0.20252
##  [2,]      NaN -0.17434
##  [3,] -0.07387 -0.18097
##  [4,] -0.12207 -0.19358
##  [5,]      NaN -0.16748
##  [6,]      NaN -0.14339
##  [7,] -0.07126 -0.27485
##  [8,]      NaN -0.19051
##  [9,]      NaN -0.14921
## [10,]      NaN -0.15672
## [11,] -0.07387 -0.13535
## [12,]      NaN -0.17769
## [13,]      NaN -0.16396
## [14,]      NaN -0.16396
## [15,]      NaN -0.18738

NUMERO DE VARIABLES EN EL FACTOR

ncol(data_norm)->m
m
## [1] 2

CONSTANTE DE ENTROPIA

-1/log(m)->K
print(K)
## [1] -1.4427

CALCULO DE LAS ENTROPIAS

K*colSums(data_norm_2)->Ej
print(Ej)
##     X2     X4 
##    NaN 3.8403

CALCULO DE LAS ESPECIFICIDADES

1-Ej->vj
print(vj)
##      X2      X4 
##     NaN -2.8403

CALCULO DE LOS PONDERADORES

prop.table(vj)->wj #es igual a usar vj/sum(vj)
print(wj)
##  X2  X4 
## NaN NaN