YOHANA AZUCENA PAZ ARGUETA
22 de octubre de 2019
library(readxl)
datos <- read_excel("C:/Users/74/Desktop/turistas.xlsx")
##Quitar la columna de Nacionalidad
datos2<-datos
head(datos2)## # A tibble: 6 x 4
## `numero de noches` `numero de visitas anter~ `gasto noche por pers~ edad
## <dbl> <dbl> <dbl> <dbl>
## 1 14 0 76.6 42
## 2 7 0 35.7 33
## 3 7 1 46.2 35
## 4 14 2 37.6 39
## 5 7 0 85.9 31
## 6 7 0 41.8 24.5
names(datos2)## [1] "numero de noches" "numero de visitas anteriores"
## [3] "gasto noche por persona" "edad"
names (datos2) = c("X1", "X2", "X3", "X4")
names (datos2)## [1] "X1" "X2" "X3" "X4"
library(Hmisc)
library(stargazer)
Mat_R<-rcorr(as.matrix(datos2))
stargazer(Mat_R$r,type = "text")##
## ==============================
## X1 X2 X3 X4
## ------------------------------
## X1 1 0.702 -0.508 0.509
## X2 0.702 1 -0.170 0.887
## X3 -0.508 -0.170 1 -0.016
## X4 0.509 0.887 -0.016 1
## ------------------------------
library(PerformanceAnalytics)
chart.Correlation(as.matrix(datos2),histogram = TRUE,pch=19,)library(corrplot)
library(grDevices)
corrplot(Mat_R$r,p.mat = Mat_R$r,type="lower",order="hclust",tl.col="black",tl.srt = 45,pch.col = "red",insig = "p-value", sig.level = -1,col = terrain.colors(100))library(psych)
options(scipen = 999)
cortest.bartlett(datos2)## $chisq
## [1] 32.77112
##
## $p.value
## [1] 0.0000116044
##
## $df
## [1] 6
library(rela)
KMO<-paf(as.matrix(datos2))$KMO
print(KMO)## [1] 0.57187
datos_normalq<- scale(datos2)
datos_normalq## X1 X2 X3 X4
## [1,] 0.30629 -0.28016 0.37060 0.598430
## [2,] -0.54004 -0.28016 -0.88500 -0.156599
## [3,] -0.54004 -0.20234 -0.56383 0.011186
## [4,] 0.30629 -0.12452 -0.82916 0.346754
## [5,] -0.54004 -0.28016 0.65406 -0.324383
## [6,] -0.54004 -0.28016 -0.69963 -0.869682
## [7,] 2.48257 3.61098 -0.54076 3.199086
## [8,] -0.54004 -0.28016 0.32446 0.262862
## [9,] 0.30629 -0.28016 -1.40108 -0.743843
## [10,] -0.54004 -0.28016 0.25853 -0.576059
## [11,] 2.11986 -0.20234 -1.39315 -1.037466
## [12,] -0.54004 -0.28016 0.75294 -0.072707
## [13,] -0.66094 -0.28016 1.63958 -0.408275
## [14,] -0.54004 -0.28016 1.81177 -0.408275
## [15,] -0.54004 -0.28016 0.50066 0.178970
## attr(,"scaled:center")
## X1 X2 X3 X4
## 11.467 3.600 64.562 34.867
## attr(,"scaled:scale")
## X1 X2 X3 X4
## 8.271 12.850 32.561 11.920
ACP4<-prcomp(datos_normalq)
ACP4## Standard deviations (1, .., p=4):
## [1] 1.58460 1.05928 0.53878 0.27688
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## X1 -0.54267 -0.29142 -0.742188 -0.26408
## X2 -0.59515 0.23974 0.069214 0.76389
## X3 0.25507 0.82437 -0.505126 -0.01423
## X4 -0.53502 0.42192 0.434987 -0.58866
modeloPC<-princomp(as.matrix(datos_normalq))
summary(modeloPC)## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 1.53087 1.02337 0.520514 0.267489
## Proportion of Variance 0.62774 0.28052 0.072572 0.019165
## Cumulative Proportion 0.62774 0.90826 0.980835 1.000000
modeloPC<-prcomp(as.matrix(datos_normalq))
summary(modeloPC)## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.585 1.059 0.5388 0.2769
## Proportion of Variance 0.628 0.281 0.0726 0.0192
## Cumulative Proportion 0.628 0.908 0.9808 1.0000
library(Hmisc)
library(readr)
library(stargazer)
Mat_R<-rcorr(as.matrix(datos2))
descomposicion<-eigen(Mat_R$r)
stargazer(descomposicion$values,type = "text")##
## =======================
## 2.511 1.122 0.290 0.077
## -----------------------
stargazer(descomposicion$vectors,type = "text")##
## ===========================
## -0.543 0.291 0.742 0.264
## -0.595 -0.240 -0.069 -0.764
## 0.255 -0.824 0.505 0.014
## -0.535 -0.422 -0.435 0.589
## ---------------------------
library(psych)
modelo_3<-principal(r = datos2,nfactors = 3,covar = FALSE,rotate = "none")
modelo_3## Principal Components Analysis
## Call: principal(r = datos2, nfactors = 3, rotate = "none", covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 0.86 -0.31 0.40 0.99 0.005346 1.7
## X2 0.94 0.25 -0.04 0.96 0.044734 1.1
## X3 -0.40 0.87 0.27 1.00 0.000016 1.6
## X4 0.85 0.45 -0.23 0.97 0.026565 1.7
##
## PC1 PC2 PC3
## SS loadings 2.51 1.12 0.29
## Proportion Var 0.63 0.28 0.07
## Cumulative Var 0.63 0.91 0.98
## Proportion Explained 0.64 0.29 0.07
## Cumulative Proportion 0.64 0.93 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.02
## with the empirical chi square 0.05 with prob < NA
##
## Fit based upon off diagonal values = 1
library(psych)
modelo_4<-principal(r = datos2,nfactors = 4,covar = FALSE,rotate = "none")
modelo_4## Principal Components Analysis
## Call: principal(r = datos2, nfactors = 4, rotate = "none", covar = FALSE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 h2 u2 com
## X1 0.86 -0.31 0.40 0.07 1 0.00000000000000011 1.7
## X2 0.94 0.25 -0.04 -0.21 1 0.00000000000000044 1.3
## X3 -0.40 0.87 0.27 0.00 1 0.00000000000000067 1.6
## X4 0.85 0.45 -0.23 0.16 1 0.00000000000000178 1.8
##
## PC1 PC2 PC3 PC4
## SS loadings 2.51 1.12 0.29 0.08
## Proportion Var 0.63 0.28 0.07 0.02
## Cumulative Var 0.63 0.91 0.98 1.00
## Proportion Explained 0.63 0.28 0.07 0.02
## Cumulative Proportion 0.63 0.91 0.98 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 4 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
library(psych)
modelo_5<-principal(r = datos2,nfactors = 4,covar = FALSE,rotate = "varimax")
modelo_5$loadings##
## Loadings:
## RC1 RC2 RC3 RC4
## X1 0.364 -0.320 0.875
## X2 0.865 0.406 0.285
## X3 0.975 -0.224
## X4 0.975 0.191 -0.110
##
## RC1 RC2 RC3 RC4
## SS loadings 1.831 1.059 1.016 0.094
## Proportion Var 0.458 0.265 0.254 0.024
## Cumulative Var 0.458 0.722 0.976 1.000
en la solucion Quedan dentro del factor 1: numero de visitas anteriores, edad.
norm_directa <- function(x){
return((x-min(x)) / (max(x)-min(x)))
}
norm_inverza <- function(x){
return((max(x)-x) / (max(x)-min(x)))
}library(dplyr)
datos2 %>% dplyr::select(`X2`,X4) %>% dplyr::transmute(X4=norm_directa(X4), `X2`=norm_inverza(`X2`)) ->data_factor_1
print(data_factor_1)## # A tibble: 15 x 2
## X4 X2
## <dbl> <dbl>
## 1 0.386 1
## 2 0.208 1
## 3 0.248 0.98
## 4 0.327 0.96
## 5 0.168 1
## 6 0.0396 1
## 7 1 0
## 8 0.307 1
## 9 0.0693 1
## 10 0.109 1
## 11 0 0.98
## 12 0.228 1
## 13 0.149 1
## 14 0.149 1
## 15 0.287 1
data_factor_1 %>% dplyr::summarise(S=sd(`X2`),Y=sd(X4))-> sd_vector
print(sd_vector)## # A tibble: 1 x 2
## S Y
## <dbl> <dbl>
## 1 0.257 0.236
cor(data_factor_1)->mat_R_F1
print(mat_R_F1)## X4 X2
## X4 1.0000 -0.8873
## X2 -0.8873 1.0000
1-mat_R_F1->sum_data
colSums(sum_data)->sum_vector
sd_vector*sum_vector->vj
print(vj)## S Y
## 1 0.48502 0.44548
vj/sum(vj)->wj
print(wj)## S Y
## 1 0.52125 0.47875
print(round(wj*100,2))## S Y
## 1 52.12 47.88
datos2 %>% dplyr::select(X2,X4)->data_norm
apply(data_norm,2,prop.table)->data_norm
print(data_norm)## X2 X4
## [1,] 0.000000 0.080306
## [2,] 0.000000 0.063098
## [3,] 0.018519 0.066922
## [4,] 0.037037 0.074570
## [5,] 0.000000 0.059273
## [6,] 0.000000 0.046845
## [7,] 0.925926 0.139579
## [8,] 0.000000 0.072658
## [9,] 0.000000 0.049713
## [10,] 0.000000 0.053537
## [11,] 0.018519 0.043021
## [12,] 0.000000 0.065010
## [13,] 0.000000 0.057361
## [14,] 0.000000 0.057361
## [15,] 0.000000 0.070746
entropy<-function(x){
return(x*log(x))
}
apply(data_norm,2,entropy)->data_norm_2
print(data_norm_2)## X2 X4
## [1,] NaN -0.20252
## [2,] NaN -0.17434
## [3,] -0.07387 -0.18097
## [4,] -0.12207 -0.19358
## [5,] NaN -0.16748
## [6,] NaN -0.14339
## [7,] -0.07126 -0.27485
## [8,] NaN -0.19051
## [9,] NaN -0.14921
## [10,] NaN -0.15672
## [11,] -0.07387 -0.13535
## [12,] NaN -0.17769
## [13,] NaN -0.16396
## [14,] NaN -0.16396
## [15,] NaN -0.18738
ncol(data_norm)->m
m## [1] 2
-1/log(m)->K
print(K)## [1] -1.4427
K*colSums(data_norm_2)->Ej
print(Ej)## X2 X4
## NaN 3.8403
1-Ej->vj
print(vj)## X2 X4
## NaN -2.8403
prop.table(vj)->wj #es igual a usar vj/sum(vj)
print(wj)## X2 X4
## NaN NaN