library(haven)
comaut <- read_sav("https://www.uv.es/mlejarza/datamine/comaut.sav")
View(comaut)
names(comaut)
## [1] "Cautonoma" "proppobla"
## [3] "propSuperf" "ratioPobsup"
## [5] "densidad" "Ventasaotras"
## [7] "Comprasdeotras" "autoabas"
## [9] "maxventa" "maxcompra"
## [11] "ratioVentasCompras" "proporcionvtasT"
## [13] "proporcioncomprasT" "ratiomaximaventaTotal"
## [15] "ratiomaxcompraTotal" "Gastoporhogar"
## [17] "Gastoporpersona" "Gastoporunidadconsumo"
## [19] "indicegastopersona" "indicegastoporhoga"
## [21] "indicegastounidadconsumo" "pibpercap"
## [23] "templeoAgri" "tempIndus"
## [25] "tempConstr" "tempServ"
## [27] "tbuscandoempleo"
data<-comaut
Procedemos a quitar las varibles que no queremos También añadimos a las filas el nombre de la comunidad autonóma
data <- (comaut[,c(-1,-5,-6,-7,-9, -10, -14, -15, -16, -17, -18)])
row.names(data)<-comaut$Cautonoma
## Warning: Setting row names on a tibble is deprecated.
Comprobamos que estan las variables con las que queremos trabajar
names(data)
## [1] "proppobla" "propSuperf"
## [3] "ratioPobsup" "autoabas"
## [5] "ratioVentasCompras" "proporcionvtasT"
## [7] "proporcioncomprasT" "indicegastopersona"
## [9] "indicegastoporhoga" "indicegastounidadconsumo"
## [11] "pibpercap" "templeoAgri"
## [13] "tempIndus" "tempConstr"
## [15] "tempServ" "tbuscandoempleo"
data
## # A tibble: 17 × 16
## proppobla propSuperf ratioP…¹ autoa…² ratio…³ propo…⁴ propo…⁵ indic…⁶ indic…⁷
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.179 0.173 1.03 26401. 0.556 0.112 0.113 89.9 99.2
## 2 0.0281 0.0943 0.298 8463. 0.476 0.0550 0.0646 100. 98.7
## 3 0.0218 0.021 1.04 5573. 0.540 0.0249 0.0257 100. 98.5
## 4 0.0244 0.0099 2.46 3707. 0.0809 0.00287 0.0198 105. 105.
## 5 0.0458 0.0147 3.12 6409. 0.283 0.0159 0.0314 87.1 95.3
## 6 0.0124 0.0105 1.18 2393. 0.457 0.0156 0.0192 90.9 94.5
## 7 0.051 0.186 0.274 12813. 0.698 0.0982 0.0787 94.1 92.2
## 8 0.0432 0.157 0.275 6592. 0.740 0.0673 0.0509 84.1 90.2
## 9 0.163 0.0635 2.57 48856. 0.577 0.178 0.172 113. 114.
## 10 0.106 0.046 2.31 20607. 0.451 0.0818 0.101 101. 101.
## 11 0.0227 0.0823 0.276 3892. 0.406 0.0155 0.0213 80.6 86.2
## 12 0.0574 0.0584 0.983 15424. 0.635 0.0619 0.0545 94.8 99.8
## 13 0.142 0.0159 8.91 12013. 0.692 0.110 0.0893 114. 120.
## 14 0.0318 0.0224 1.42 4861. 0.709 0.0374 0.0295 88.3 99.2
## 15 0.0139 0.0205 0.678 4119. 0.743 0.0339 0.0255 109. 111.
## 16 0.0469 0.0143 3.28 16712. 0.466 0.0711 0.0853 112. 109.
## 17 0.0067 0.01 0.67 1837. 0.705 0.0181 0.0143 87.7 86.6
## # … with 7 more variables: indicegastounidadconsumo <dbl>, pibpercap <dbl>,
## # templeoAgri <dbl>, tempIndus <dbl>, tempConstr <dbl>, tempServ <dbl>,
## # tbuscandoempleo <dbl>, and abbreviated variable names ¹ratioPobsup,
## # ²autoabas, ³ratioVentasCompras, ⁴proporcionvtasT, ⁵proporcioncomprasT,
## # ⁶indicegastopersona, ⁷indicegastoporhoga
Pasamos a ver la matriz de correlación y el gráfico.
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.2.2
## corrplot 0.92 loaded
matriz_correlaciones <- cor(data, use = "pairwise.complete.obs")
matriz_correlaciones
## proppobla propSuperf ratioPobsup autoabas
## proppobla 1.0000000 0.32877511 0.471492812 0.828041864
## propSuperf 0.3287751 1.00000000 -0.404260746 0.278640612
## ratioPobsup 0.4714928 -0.40426075 1.000000000 0.181662407
## autoabas 0.8280419 0.27864061 0.181662407 1.000000000
## ratioVentasCompras 0.1153052 0.28962282 -0.075678305 0.080373621
## proporcionvtasT 0.8602717 0.44741947 0.311642629 0.895774548
## proporcioncomprasT 0.8763673 0.37282083 0.306156670 0.949471701
## indicegastopersona 0.3388830 -0.34747989 0.570742876 0.426532386
## indicegastoporhoga 0.5075668 -0.34063883 0.711988019 0.459046865
## indicegastounidadconsumo 0.4077782 -0.35181772 0.633777591 0.446815316
## pibpercap 0.1099287 -0.42379896 0.573723549 0.180175588
## templeoAgri -0.1298541 0.42214313 -0.536843672 -0.203083034
## tempIndus -0.2833297 -0.03462689 -0.382041628 0.029191957
## tempConstr -0.1956773 -0.06770467 -0.004306534 -0.247757222
## tempServ 0.3544701 -0.31814475 0.768044033 0.154412159
## tbuscandoempleo 0.1805242 0.29472546 -0.114740656 0.009297234
## ratioVentasCompras proporcionvtasT proporcioncomprasT
## proppobla 0.11530521 0.86027168 0.87636734
## propSuperf 0.28962282 0.44741947 0.37282083
## ratioPobsup -0.07567830 0.31164263 0.30615667
## autoabas 0.08037362 0.89577455 0.94947170
## ratioVentasCompras 1.00000000 0.36935527 0.15184148
## proporcionvtasT 0.36935527 1.00000000 0.96237911
## proporcioncomprasT 0.15184148 0.96237911 1.00000000
## indicegastopersona -0.03656886 0.44371342 0.48055286
## indicegastoporhoga 0.02902381 0.49948500 0.50895551
## indicegastounidadconsumo -0.02891074 0.47020401 0.50178361
## pibpercap 0.10967237 0.28109172 0.26350968
## templeoAgri 0.30980391 -0.16859877 -0.24363845
## tempIndus 0.54312032 0.08245129 0.03469018
## tempConstr -0.52092477 -0.30272440 -0.27573058
## tempServ -0.55679728 0.11810496 0.19276948
## tbuscandoempleo -0.19202712 -0.07234084 -0.02578355
## indicegastopersona indicegastoporhoga
## proppobla 0.33888300 0.50756675
## propSuperf -0.34747989 -0.34063883
## ratioPobsup 0.57074288 0.71198802
## autoabas 0.42653239 0.45904686
## ratioVentasCompras -0.03656886 0.02902381
## proporcionvtasT 0.44371342 0.49948500
## proporcioncomprasT 0.48055286 0.50895551
## indicegastopersona 1.00000000 0.91177333
## indicegastoporhoga 0.91177333 1.00000000
## indicegastounidadconsumo 0.98901798 0.95979309
## pibpercap 0.86724232 0.76099841
## templeoAgri -0.76181877 -0.59980893
## tempIndus 0.23414877 0.01343191
## tempConstr -0.06702298 -0.07837518
## tempServ 0.46191786 0.51619425
## tbuscandoempleo -0.60476865 -0.39738196
## indicegastounidadconsumo pibpercap templeoAgri
## proppobla 0.40777817 0.1099287 -0.12985409
## propSuperf -0.35181772 -0.4237990 0.42214313
## ratioPobsup 0.63377759 0.5737235 -0.53684367
## autoabas 0.44681532 0.1801756 -0.20308303
## ratioVentasCompras -0.02891074 0.1096724 0.30980391
## proporcionvtasT 0.47020401 0.2810917 -0.16859877
## proporcioncomprasT 0.50178361 0.2635097 -0.24363845
## indicegastopersona 0.98901798 0.8672423 -0.76181877
## indicegastoporhoga 0.95979309 0.7609984 -0.59980893
## indicegastounidadconsumo 1.00000000 0.8501106 -0.71965247
## pibpercap 0.85011058 1.0000000 -0.67376367
## templeoAgri -0.71965247 -0.6737637 1.00000000
## tempIndus 0.15344888 0.3838987 -0.07374602
## tempConstr -0.06340038 -0.0684760 -0.15900351
## tempServ 0.49534236 0.3096957 -0.66397265
## tbuscandoempleo -0.54377296 -0.7728311 0.42239864
## tempIndus tempConstr tempServ tbuscandoempleo
## proppobla -0.28332973 -0.195677263 0.35447014 0.180524162
## propSuperf -0.03462689 -0.067704673 -0.31814475 0.294725462
## ratioPobsup -0.38204163 -0.004306534 0.76804403 -0.114740656
## autoabas 0.02919196 -0.247757222 0.15441216 0.009297234
## ratioVentasCompras 0.54312032 -0.520924770 -0.55679728 -0.192027125
## proporcionvtasT 0.08245129 -0.302724399 0.11810496 -0.072340839
## proporcioncomprasT 0.03469018 -0.275730578 0.19276948 -0.025783553
## indicegastopersona 0.23414877 -0.067022983 0.46191786 -0.604768649
## indicegastoporhoga 0.01343191 -0.078375179 0.51619425 -0.397381957
## indicegastounidadconsumo 0.15344888 -0.063400384 0.49534236 -0.543772959
## pibpercap 0.38389868 -0.068476003 0.30969573 -0.772831090
## templeoAgri -0.07374602 -0.159003513 -0.66397265 0.422398643
## tempIndus 1.00000000 -0.376993821 -0.66821175 -0.622145476
## tempConstr -0.37699382 1.000000000 0.29838978 -0.196906474
## tempServ -0.66821175 0.298389784 1.00000000 0.083210939
## tbuscandoempleo -0.62214548 -0.196906474 0.08321094 1.000000000
Gráfico de las correlaciones
corrplot(cor(data), order = "hclust", tl.col="black", tl.cex=1)
names(data)
## [1] "proppobla" "propSuperf"
## [3] "ratioPobsup" "autoabas"
## [5] "ratioVentasCompras" "proporcionvtasT"
## [7] "proporcioncomprasT" "indicegastopersona"
## [9] "indicegastoporhoga" "indicegastounidadconsumo"
## [11] "pibpercap" "templeoAgri"
## [13] "tempIndus" "tempConstr"
## [15] "tempServ" "tbuscandoempleo"
data <- (data[,c(-5, -6, -10,-11)])
names(data)
## [1] "proppobla" "propSuperf" "ratioPobsup"
## [4] "autoabas" "proporcioncomprasT" "indicegastopersona"
## [7] "indicegastoporhoga" "templeoAgri" "tempIndus"
## [10] "tempConstr" "tempServ" "tbuscandoempleo"
matriz_correlaciones <- cor(data, use = "pairwise.complete.obs")
matriz_correlaciones
## proppobla propSuperf ratioPobsup autoabas
## proppobla 1.0000000 0.32877511 0.471492812 0.828041864
## propSuperf 0.3287751 1.00000000 -0.404260746 0.278640612
## ratioPobsup 0.4714928 -0.40426075 1.000000000 0.181662407
## autoabas 0.8280419 0.27864061 0.181662407 1.000000000
## proporcioncomprasT 0.8763673 0.37282083 0.306156670 0.949471701
## indicegastopersona 0.3388830 -0.34747989 0.570742876 0.426532386
## indicegastoporhoga 0.5075668 -0.34063883 0.711988019 0.459046865
## templeoAgri -0.1298541 0.42214313 -0.536843672 -0.203083034
## tempIndus -0.2833297 -0.03462689 -0.382041628 0.029191957
## tempConstr -0.1956773 -0.06770467 -0.004306534 -0.247757222
## tempServ 0.3544701 -0.31814475 0.768044033 0.154412159
## tbuscandoempleo 0.1805242 0.29472546 -0.114740656 0.009297234
## proporcioncomprasT indicegastopersona indicegastoporhoga
## proppobla 0.87636734 0.33888300 0.50756675
## propSuperf 0.37282083 -0.34747989 -0.34063883
## ratioPobsup 0.30615667 0.57074288 0.71198802
## autoabas 0.94947170 0.42653239 0.45904686
## proporcioncomprasT 1.00000000 0.48055286 0.50895551
## indicegastopersona 0.48055286 1.00000000 0.91177333
## indicegastoporhoga 0.50895551 0.91177333 1.00000000
## templeoAgri -0.24363845 -0.76181877 -0.59980893
## tempIndus 0.03469018 0.23414877 0.01343191
## tempConstr -0.27573058 -0.06702298 -0.07837518
## tempServ 0.19276948 0.46191786 0.51619425
## tbuscandoempleo -0.02578355 -0.60476865 -0.39738196
## templeoAgri tempIndus tempConstr tempServ
## proppobla -0.12985409 -0.28332973 -0.195677263 0.35447014
## propSuperf 0.42214313 -0.03462689 -0.067704673 -0.31814475
## ratioPobsup -0.53684367 -0.38204163 -0.004306534 0.76804403
## autoabas -0.20308303 0.02919196 -0.247757222 0.15441216
## proporcioncomprasT -0.24363845 0.03469018 -0.275730578 0.19276948
## indicegastopersona -0.76181877 0.23414877 -0.067022983 0.46191786
## indicegastoporhoga -0.59980893 0.01343191 -0.078375179 0.51619425
## templeoAgri 1.00000000 -0.07374602 -0.159003513 -0.66397265
## tempIndus -0.07374602 1.00000000 -0.376993821 -0.66821175
## tempConstr -0.15900351 -0.37699382 1.000000000 0.29838978
## tempServ -0.66397265 -0.66821175 0.298389784 1.00000000
## tbuscandoempleo 0.42239864 -0.62214548 -0.196906474 0.08321094
## tbuscandoempleo
## proppobla 0.180524162
## propSuperf 0.294725462
## ratioPobsup -0.114740656
## autoabas 0.009297234
## proporcioncomprasT -0.025783553
## indicegastopersona -0.604768649
## indicegastoporhoga -0.397381957
## templeoAgri 0.422398643
## tempIndus -0.622145476
## tempConstr -0.196906474
## tempServ 0.083210939
## tbuscandoempleo 1.000000000
det(matriz_correlaciones)
## [1] 1.170451e-11
Determinar el numero de factores
library(parallel); library(nFactors)
## Warning: package 'nFactors' was built under R version 4.2.2
## Loading required package: lattice
##
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
##
## parallel
ev <- eigen(cor(data)) # Obtención de los autovalores
ap <- parallel(subject=nrow(data),var=ncol(data),rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS,xlab = "Número de Componentes",ylab = "Autovalores",
main = "Solución por autovalores para determinar
el número de factores")
abline(h=1, lty=3, col=4)
procedimiento prcomp
mod1<-prcomp(data,scale. = TRUE) # scale=TRUE PARA A.FACTORIAL SOBRE CORRELACIONES
summary(mod1)[1] # desvtipica, prop.de varianza, y proporc acumulada de var explicada
## $sdev
## [1] 2.16508909 1.65802537 1.51330883 1.01032009 0.68953115 0.60790303
## [7] 0.47604604 0.31997064 0.23702294 0.13527912 0.06314175 0.00482212
library(psy)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
vpropios <- (mod1$sdev)^2 #valores propios (cuadrado de desvi tipica)
plot(mod1,type="lines",main = "Scree Plot")
abline(h=1, lty=3, col=4)
fviz_screeplot(mod1, addlabels = TRUE) #grafico de los 10 componentes principales con mayor varianza
mod2 <- prcomp(data, scale. = TRUE, rank. = 4)
summary(mod2)
## Importance of first k=4 (out of 12) components:
## PC1 PC2 PC3 PC4
## Standard deviation 2.1651 1.6580 1.5133 1.01032
## Proportion of Variance 0.3906 0.2291 0.1908 0.08506
## Cumulative Proportion 0.3906 0.6197 0.8106 0.89563
matriz_correlaciones2<- cor(data, mod2$x)
matriz_correlaciones2
## PC1 PC2 PC3 PC4
## proppobla -0.67844964 0.6682333 -0.14521434 0.03024572
## propSuperf 0.23710485 0.7622721 0.02107029 0.39627029
## ratioPobsup -0.78090971 -0.2161576 -0.34635500 -0.25851763
## autoabas -0.63457253 0.6593559 0.18740211 0.12573727
## proporcioncomprasT -0.69211081 0.6670003 0.18499826 0.12455550
## indicegastopersona -0.87115358 -0.2620808 0.34081420 0.02550106
## indicegastoporhoga -0.90313793 -0.1417298 0.13569439 -0.09347242
## templeoAgri 0.72599913 0.4551806 -0.05691845 -0.11536147
## tempIndus 0.11546466 -0.1025685 0.97252519 -0.03689022
## tempConstr 0.04724864 -0.4060604 -0.40335914 0.78758300
## tempServ -0.68768304 -0.2561813 -0.63490685 0.02508686
## tbuscandoempleo 0.29910632 0.5080283 -0.65562887 -0.34593867
fviz_contrib(mod1, choice = "var", axes = 1, top = 10)
fviz_contrib(mod1, choice = "var", axes = 2, top = 10)
fviz_contrib(mod1, choice = "var", axes = 3, top = 10)
fviz_contrib(mod1, choice = "var", axes = 4, top = 10)
podemos obtener las puntuaciones TIPIFICADAS tipificando las columnas de mod2$x
puntuaciones<-as.array(scale(mod2$x),dimnames=c(F1,F2,F3,F4))
sd(puntuaciones[,1])#lo comprobamos
## [1] 1
biplot(x = mod2, scale = 0, cex = 0.6, col = c("blue4", "brown3"))
ROTACIÓN VARIMAX
solucion rotada varimax
sol.rotada<-varimax(mod2$rotation)# aplicamos la función varimax a la solución obtenida
sol.rotada
## $loadings
##
## Loadings:
## PC1 PC2 PC3 PC4
## proppobla 0.485 -0.174
## propSuperf 0.309 0.430 0.124 0.287
## ratioPobsup -0.278 -0.376 -0.215
## autoabas 0.516
## proporcioncomprasT 0.532
## indicegastopersona -0.475
## indicegastoporhoga -0.405 0.116 -0.123
## templeoAgri 0.422 -0.131
## tempIndus -0.192 0.608 -0.120
## tempConstr -0.148 0.845
## tempServ -0.200 -0.502
## tbuscandoempleo 0.409 -0.387 -0.312
##
## PC1 PC2 PC3 PC4
## SS loadings 1.000 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083 0.083
## Cumulative Var 0.083 0.167 0.250 0.333
##
## $rotmat
## [,1] [,2] [,3] [,4]
## [1,] 0.80193980 -0.4813327 0.34165632 0.09209959
## [2,] 0.50577368 0.8349332 0.04625931 -0.21198978
## [3,] -0.31742052 0.1001700 0.92930605 -0.16000143
## [4,] -0.01815948 0.2473285 0.13236803 0.95967575
sol.rotada$loadings # de nuevo son los coeficientes de T ( CP no tipificadas)
##
## Loadings:
## PC1 PC2 PC3 PC4
## proppobla 0.485 -0.174
## propSuperf 0.309 0.430 0.124 0.287
## ratioPobsup -0.278 -0.376 -0.215
## autoabas 0.516
## proporcioncomprasT 0.532
## indicegastopersona -0.475
## indicegastoporhoga -0.405 0.116 -0.123
## templeoAgri 0.422 -0.131
## tempIndus -0.192 0.608 -0.120
## tempConstr -0.148 0.845
## tempServ -0.200 -0.502
## tbuscandoempleo 0.409 -0.387 -0.312
##
## PC1 PC2 PC3 PC4
## SS loadings 1.000 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083 0.083
## Cumulative Var 0.083 0.167 0.250 0.333
sol.rotada$rotmat # matriz de rotación de la solución
## [,1] [,2] [,3] [,4]
## [1,] 0.80193980 -0.4813327 0.34165632 0.09209959
## [2,] 0.50577368 0.8349332 0.04625931 -0.21198978
## [3,] -0.31742052 0.1001700 0.92930605 -0.16000143
## [4,] -0.01815948 0.2473285 0.13236803 0.95967575
para obtener las puntuaciones rotadas tendremos que multiplicar matricialmente la matriz de puntuaciones (sin rotar)por la matriza de rotación que antes hay que definir como matriz
mat.rotacion<-as.array(sol.rotada$rotmat)
punt.rotadas<-as.array(puntuaciones%*%mat.rotacion,dimnames=C(FR1,FR2,FR3,FR4))
punt.rotadas
## [,1] [,2] [,3] [,4]
## [1,] 1.29311678 2.01113651 -0.52387924 0.1382764
## [2,] -0.12688179 0.02437965 1.15350042 0.2184912
## [3,] -0.10877475 -0.81407429 -0.25970503 -0.8698561
## [4,] -0.89983394 -0.83646350 -1.26836513 2.5707869
## [5,] 0.68628581 -0.74875045 -2.01947746 -1.0971457
## [6,] -0.13598655 -0.94759194 0.11670472 1.6618826
## [7,] 0.56371253 0.79879441 0.81397642 1.1079817
## [8,] 1.32703766 0.27054828 0.15687706 0.5575134
## [9,] -1.09866540 2.31890528 0.26369528 -0.2017761
## [10,] -0.22803274 0.66719894 -0.07846606 -0.5958250
## [11,] 1.73744030 -0.49489401 -0.37094454 -0.4359671
## [12,] 0.03683842 0.09470207 0.46981428 0.2916004
## [13,] -1.76365018 0.22532298 -1.80407499 -0.7229765
## [14,] 0.86967997 -0.62156430 0.04927999 -0.8188107
## [15,] -0.99175772 -0.79755167 1.53486360 -0.8655108
## [16,] -1.32826604 -0.02206066 0.47503043 -0.5396868
## [17,] 0.16773764 -1.12803730 1.29117024 -0.3989778
para obtener la matriz de estructura e intentar explicar la solución rotada
matriz.correlaciones.rotada<-as.array(cor(data,punt.rotadas),dimnames=c(FR1,FR2,FR3,FR4))
matriz.correlaciones.rotada
## [,1] [,2] [,3] [,4]
## proppobla -0.16055620 0.877424643 -0.331829599 -0.15188298
## propSuperf 0.56179678 0.622439513 0.188304822 0.23716308
## ratioPobsup -0.62093465 0.096767131 -0.632891304 -0.21877408
## autoabas -0.23717269 0.905829139 0.004493146 -0.10753819
## proporcioncomprasT -0.27866412 0.939373635 -0.017201892 -0.11520746
## indicegastopersona -0.93981078 0.240941278 0.010337424 -0.05473246
## indicegastoporhoga -0.83732020 0.306849068 -0.201390247 -0.16454789
## templeoAgri 0.83258793 -0.003635495 0.200933700 -0.13123197
## tempIndus -0.26731030 -0.052921000 0.933594938 -0.15863034
## tempConstr -0.05375172 -0.207388315 -0.273234560 0.91079457
## tempServ -0.47997323 0.059716229 -0.833504111 0.11663373
## tbuscandoempleo 0.71120473 0.128965220 -0.529378492 -0.30723664
Análisis Factorial por máx. Verosimilitud
Conjunto de datos
data2 <- (comaut[,c(2, 3, 4, 8, 12, 13, 16, 18, 19, 22, 23, 26, 27)])
names(data2)
## [1] "proppobla" "propSuperf" "ratioPobsup"
## [4] "autoabas" "proporcionvtasT" "proporcioncomprasT"
## [7] "Gastoporhogar" "Gastoporunidadconsumo" "indicegastopersona"
## [10] "pibpercap" "templeoAgri" "tempServ"
## [13] "tbuscandoempleo"
row.names(data2)<-comaut$Cautonoma
## Warning: Setting row names on a tibble is deprecated.
corrplot(cor(data2), order = "hclust", tl.col="black", tl.cex=1)
mod3<-prcomp(data2,scale. = TRUE) # scale=TRUE PARA A.FACTORIAL SOBRE CORRELACIONES
summary(mod3)[1] # desvtipica, prop.de varianza, y proporc acumulada de var explicada
## $sdev
## [1] 2.55804207 1.83096146 1.23733727 0.73314996 0.66765521 0.52935559
## [7] 0.37767826 0.28141533 0.20689250 0.17268763 0.10698462 0.05320285
## [13] 0.02775725
ev <- eigen(cor(data2)) # Obtención de los autovalores
ap <- parallel(subject=nrow(data2),var=ncol(data2),rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS,xlab = "Número de Componentes",ylab = "Autovalores",
main = "Solución por autovalores para determinar
el número de factores")
abline(h=1, lty=3, col=4)
fa <- factanal(~proppobla + autoabas + proporcionvtasT + proporcioncomprasT + ratioPobsup + tempServ + pibpercap +
Gastoporhogar + indicegastopersona + Gastoporunidadconsumo + propSuperf + templeoAgri + tbuscandoempleo,
factors = 3, rotation = "none", scores = "regression", data = data2)
fa #vemos los (componentes principales) resultados
##
## Call:
## factanal(x = ~proppobla + autoabas + proporcionvtasT + proporcioncomprasT + ratioPobsup + tempServ + pibpercap + Gastoporhogar + indicegastopersona + Gastoporunidadconsumo + propSuperf + templeoAgri + tbuscandoempleo, factors = 3, data = data2, scores = "regression", rotation = "none")
##
## Uniquenesses:
## proppobla autoabas proporcionvtasT
## 0.102 0.097 0.068
## proporcioncomprasT ratioPobsup tempServ
## 0.005 0.434 0.706
## pibpercap Gastoporhogar indicegastopersona
## 0.204 0.005 0.005
## Gastoporunidadconsumo propSuperf templeoAgri
## 0.005 0.459 0.373
## tbuscandoempleo
## 0.418
##
## Loadings:
## Factor1 Factor2 Factor3
## proppobla 0.568 0.684 0.327
## autoabas 0.606 0.732
## proporcionvtasT 0.633 0.729
## proporcioncomprasT 0.663 0.744
## ratioPobsup 0.634 -0.141 0.380
## tempServ 0.477 -0.159 0.201
## pibpercap 0.787 -0.357 -0.219
## Gastoporhogar 0.957 -0.159 0.231
## indicegastopersona 0.959 -0.216 -0.173
## Gastoporunidadconsumo 0.978 -0.199
## propSuperf -0.221 0.697
## templeoAgri -0.666 0.282 0.322
## tbuscandoempleo -0.460 0.390 0.467
##
## Factor1 Factor2 Factor3
## SS loadings 6.276 3.090 0.755
## Proportion Var 0.483 0.238 0.058
## Cumulative Var 0.483 0.720 0.779
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 68.32 on 42 degrees of freedom.
## The p-value is 0.00629
print(fa, digits = 2, cutoff = .3, sort = TRUE)
##
## Call:
## factanal(x = ~proppobla + autoabas + proporcionvtasT + proporcioncomprasT + ratioPobsup + tempServ + pibpercap + Gastoporhogar + indicegastopersona + Gastoporunidadconsumo + propSuperf + templeoAgri + tbuscandoempleo, factors = 3, data = data2, scores = "regression", rotation = "none")
##
## Uniquenesses:
## proppobla autoabas proporcionvtasT
## 0.10 0.10 0.07
## proporcioncomprasT ratioPobsup tempServ
## 0.00 0.43 0.71
## pibpercap Gastoporhogar indicegastopersona
## 0.20 0.00 0.00
## Gastoporunidadconsumo propSuperf templeoAgri
## 0.00 0.46 0.37
## tbuscandoempleo
## 0.42
##
## Loadings:
## Factor1 Factor2 Factor3
## ratioPobsup 0.63 0.38
## pibpercap 0.79 -0.36
## Gastoporhogar 0.96
## indicegastopersona 0.96
## Gastoporunidadconsumo 0.98
## templeoAgri -0.67 0.32
## proppobla 0.57 0.68 0.33
## autoabas 0.61 0.73
## proporcionvtasT 0.63 0.73
## proporcioncomprasT 0.66 0.74
## propSuperf 0.70
## tempServ 0.48
## tbuscandoempleo -0.46 0.39 0.47
##
## Factor1 Factor2 Factor3
## SS loadings 6.28 3.09 0.76
## Proportion Var 0.48 0.24 0.06
## Cumulative Var 0.48 0.72 0.78
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 68.32 on 42 degrees of freedom.
## The p-value is 0.00629
load <- fa$loadings[,1:2] #cargas de las variables en los dos primeros fractores
plot(load,type="n") # set up plot
text(load,labels=names(data),cex=.7) # añadimos nombres de las variables
print("puntuaciones factoriales Max.Veros.")
## [1] "puntuaciones factoriales Max.Veros."
fa$scores
## Factor1 Factor2 Factor3
## Andalucía -0.08256729 1.82762151 1.25710457
## Aragón 0.12069006 -0.01607969 -1.14837291
## Principado De Asturias -0.11492990 -0.90555704 -0.74260415
## Illes Balears 0.41486115 -1.56913565 -0.33691505
## Canarias -0.80315366 -0.11334418 1.10768945
## Cantabria -0.73549537 -0.57561052 0.01537339
## Castilla y León -0.36938027 0.90780803 -1.39424315
## Castilla-La Mancha -1.05397625 0.71057983 0.24635621
## Cataluña 1.90758164 1.81137374 -0.37290145
## Comunidad Valenciana 0.44847020 0.85600314 -0.54668534
## Extremadura -1.54449788 0.22677094 0.22628243
## Galicia -0.16023429 0.06150513 0.41413452
## Comunidad de Madrid 1.80064341 -0.57891521 1.50583465
## Región de Murcia -0.59775047 -0.31100723 1.69346160
## Comunidad Foral De Navarra 0.81578117 -1.70351227 0.31254557
## País Vasco 1.16412804 -0.30489335 -0.98834048
## La Rioja -1.21017029 -0.32360715 -1.24871985
library(psych)
## Warning: package 'psych' was built under R version 4.2.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:psy':
##
## wkappa
par(mfrow = c(1,1))
fa.diagram(fa$scores)
estas puntuaciones pueden ser guardadas en la base de datos como nuevas variables
ML<- fa(r = data2, nfactors = 3, rotate = "varimax", fm = "mle")
ml.none <- factanal(data2, factors = 3, rotation = "none", scores = "regression")
ml.none
##
## Call:
## factanal(x = data2, factors = 3, scores = "regression", rotation = "none")
##
## Uniquenesses:
## proppobla propSuperf ratioPobsup
## 0.102 0.459 0.434
## autoabas proporcionvtasT proporcioncomprasT
## 0.097 0.068 0.005
## Gastoporhogar Gastoporunidadconsumo indicegastopersona
## 0.005 0.005 0.005
## pibpercap templeoAgri tempServ
## 0.204 0.373 0.706
## tbuscandoempleo
## 0.418
##
## Loadings:
## Factor1 Factor2 Factor3
## proppobla 0.568 0.684 0.327
## propSuperf -0.221 0.697
## ratioPobsup 0.634 -0.141 0.380
## autoabas 0.606 0.732
## proporcionvtasT 0.633 0.729
## proporcioncomprasT 0.663 0.744
## Gastoporhogar 0.957 -0.159 0.231
## Gastoporunidadconsumo 0.978 -0.199
## indicegastopersona 0.959 -0.216 -0.173
## pibpercap 0.787 -0.357 -0.219
## templeoAgri -0.666 0.282 0.322
## tempServ 0.477 -0.159 0.201
## tbuscandoempleo -0.460 0.390 0.467
##
## Factor1 Factor2 Factor3
## SS loadings 6.276 3.090 0.755
## Proportion Var 0.483 0.238 0.058
## Cumulative Var 0.483 0.720 0.779
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 68.32 on 42 degrees of freedom.
## The p-value is 0.00629
ml.none$scores
## Factor1 Factor2 Factor3
## Andalucía -0.08256729 1.82762151 1.25710457
## Aragón 0.12069006 -0.01607969 -1.14837291
## Principado De Asturias -0.11492990 -0.90555704 -0.74260415
## Illes Balears 0.41486115 -1.56913565 -0.33691505
## Canarias -0.80315366 -0.11334418 1.10768945
## Cantabria -0.73549537 -0.57561052 0.01537339
## Castilla y León -0.36938027 0.90780803 -1.39424315
## Castilla-La Mancha -1.05397625 0.71057983 0.24635621
## Cataluña 1.90758164 1.81137374 -0.37290145
## Comunidad Valenciana 0.44847020 0.85600314 -0.54668534
## Extremadura -1.54449788 0.22677094 0.22628243
## Galicia -0.16023429 0.06150513 0.41413452
## Comunidad de Madrid 1.80064341 -0.57891521 1.50583465
## Región de Murcia -0.59775047 -0.31100723 1.69346160
## Comunidad Foral De Navarra 0.81578117 -1.70351227 0.31254557
## País Vasco 1.16412804 -0.30489335 -0.98834048
## La Rioja -1.21017029 -0.32360715 -1.24871985
ml.varimax <- factanal(data2, factors = 3, rotation = "varimax", scores = "regression")
ml.varimax
##
## Call:
## factanal(x = data2, factors = 3, scores = "regression", rotation = "varimax")
##
## Uniquenesses:
## proppobla propSuperf ratioPobsup
## 0.102 0.459 0.434
## autoabas proporcionvtasT proporcioncomprasT
## 0.097 0.068 0.005
## Gastoporhogar Gastoporunidadconsumo indicegastopersona
## 0.005 0.005 0.005
## pibpercap templeoAgri tempServ
## 0.204 0.373 0.706
## tbuscandoempleo
## 0.418
##
## Loadings:
## Factor1 Factor2 Factor3
## proppobla 0.867 -0.112 0.366
## propSuperf 0.508 -0.330 -0.417
## ratioPobsup 0.172 0.186 0.709
## autoabas 0.932 0.138 0.121
## proporcionvtasT 0.942 0.120 0.177
## proporcioncomprasT 0.971 0.176 0.146
## Gastoporhogar 0.313 0.500 0.805
## Gastoporunidadconsumo 0.292 0.713 0.636
## indicegastopersona 0.270 0.800 0.532
## pibpercap 0.774 0.439
## templeoAgri -0.742 -0.266
## tempServ 0.218 0.489
## tbuscandoempleo 0.116 -0.751
##
## Factor1 Factor2 Factor3
## SS loadings 4.023 3.378 2.720
## Proportion Var 0.309 0.260 0.209
## Cumulative Var 0.309 0.569 0.779
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 68.32 on 42 degrees of freedom.
## The p-value is 0.00629
ml.varimax$scores
## Factor1 Factor2 Factor3
## Andalucía 1.54582412 -1.5692436 0.27428473
## Aragón 0.06257658 0.8755010 -0.75044458
## Principado De Asturias -0.83824540 0.7621868 -0.31799271
## Illes Balears -1.17555748 1.0506706 0.51182439
## Canarias -0.50106850 -1.2311222 0.34372520
## Cantabria -0.85683688 -0.2694880 -0.25636700
## Castilla y León 0.64493040 0.4092130 -1.52350933
## Castilla-La Mancha 0.11778560 -1.0858903 -0.69530558
## Cataluña 2.50772922 0.8161854 0.32269744
## Comunidad Valenciana 0.97525118 0.3565992 -0.39300618
## Extremadura -0.54096755 -1.2092129 -0.85630643
## Galicia -0.02926307 -0.4089656 0.18126263
## Comunidad de Madrid 0.32473883 0.2980530 2.37712150
## Región de Murcia -0.58660718 -1.4366107 0.95597334
## Comunidad Foral De Navarra -1.11330950 0.9017983 1.26981865
## País Vasco 0.30364682 1.5242327 0.09738732
## La Rioja -0.84062719 0.2160933 -1.54116340
par(mfrow = c(1,2))
plot(ml.none$loadings[,1],
ml.none$loadings[,2],
xlab = "Factor 1",
ylab = "Factor 2",
ylim = c(-1,1),
xlim = c(-1,1),
main = "No rotation")
text(ml.none$loadings[,1]-0.08,
ml.none$loadings[,2]+0.08,
colnames(data2),
col="blue")
abline(h = 0, v = 0)
plot(ml.varimax$loadings[,1],
ml.varimax$loadings[,2],
xlab = "Factor 1",
ylab = "Factor 2",
ylim = c(-1,1),
xlim = c(-1,1),
main = "Varimax rotation")
text(ml.varimax$loadings[,1]-0.08,
ml.varimax$loadings[,2]+0.08,
colnames(data2),
col="blue")
abline(h = 0, v = 0)
analisis factorial por el método de Ejes Principales Principal Axis Factor Analysis
creamos un nuevo conjunto de datos con la variables a utilizar
data2 <- (comaut[,c(2, 3, 4, 8, 12, 13, 16, 18, 19, 22, 23, 26, 27)]) # mismo conjunto de datos que ML
names(data2) # ver las variables añadidas
## [1] "proppobla" "propSuperf" "ratioPobsup"
## [4] "autoabas" "proporcionvtasT" "proporcioncomprasT"
## [7] "Gastoporhogar" "Gastoporunidadconsumo" "indicegastopersona"
## [10] "pibpercap" "templeoAgri" "tempServ"
## [13] "tbuscandoempleo"
row.names(data2)<-comaut$Cautonoma # nombrar filas
## Warning: Setting row names on a tibble is deprecated.
extraemos los factores por el método de ejes princpales
mod4<-prcomp(data2,scale. = TRUE) # scale=TRUE PARA A.FACTORIAL SOBRE CORRELACIONES
summary(mod4)[1] # desvtipica, prop.de varianza, y proporc acumulada de var explicada
## $sdev
## [1] 2.55804207 1.83096146 1.23733727 0.73314996 0.66765521 0.52935559
## [7] 0.37767826 0.28141533 0.20689250 0.17268763 0.10698462 0.05320285
## [13] 0.02775725
install.packages("nFactors")
## Warning: package 'nFactors' is in use and will not be installed
library(nFactors)
ev <- eigen(cor(data2)) # Obtención de los autovalores
ap <- parallel(subject=nrow(data2),var=ncol(data2),rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS,xlab = "Número de Componentes",ylab = "Autovalores",
main = "Solución por autovalores para determinar
el número de factores")
abline(h=1, lty=3, col=4)
library(psych) cargamos el paquete necesario para ello la función fa de esta librería permite diverso métodos de análisis factorial por defecto es el metodo minres, con el argumento fm=‘pa’ aplicaremos ejes principales (rotate, por defecto es oblimin, si no queremos ninguna o queremos variamax hay que especificarlo)
library(psych)
ep<-fa(r = data2,nfactors=3,rotate = 'none', fm = 'pa', max.iter = 50)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
ep
## Factor Analysis using method = pa
## Call: fa(r = data2, nfactors = 3, rotate = "none", max.iter = 50, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 PA3 h2 u2 com
## proppobla 0.62 0.69 0.23 0.92 0.084 2.2
## propSuperf -0.21 0.70 -0.16 0.56 0.436 1.3
## ratioPobsup 0.70 -0.19 0.46 0.75 0.252 1.9
## autoabas 0.62 0.66 -0.10 0.83 0.168 2.0
## proporcionvtasT 0.67 0.71 -0.17 0.98 0.022 2.1
## proporcioncomprasT 0.70 0.70 -0.10 0.99 0.012 2.0
## Gastoporhogar 0.91 -0.14 0.03 0.86 0.144 1.0
## Gastoporunidadconsumo 0.95 -0.23 -0.11 0.96 0.035 1.1
## indicegastopersona 0.93 -0.27 -0.18 0.97 0.033 1.2
## pibpercap 0.78 -0.45 -0.30 0.91 0.089 1.9
## templeoAgri -0.69 0.38 -0.11 0.63 0.373 1.6
## tempServ 0.57 -0.20 0.70 0.85 0.147 2.1
## tbuscandoempleo -0.42 0.46 0.60 0.75 0.247 2.7
##
## PA1 PA2 PA3
## SS loadings 6.42 3.19 1.34
## Proportion Var 0.49 0.25 0.10
## Cumulative Var 0.49 0.74 0.84
## Proportion Explained 0.59 0.29 0.12
## Cumulative Proportion 0.59 0.88 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 78 and the objective function was 27.84 with Chi Square of 301.6
## The degrees of freedom for the model are 42 and the objective function was 9.98
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 17 with the empirical chi square 3.46 with prob < 1
## The total number of observations was 17 with Likelihood Chi Square = 88.11 with prob < 4.1e-05
##
## Tucker Lewis Index of factoring reliability = 0.49
## RMSEA index = 0.247 and the 90 % confidence intervals are 0.185 0.339
## BIC = -30.88
## Fit based upon off diagonal values = 1
ep$scores
## PA1 PA2 PA3
## Andalucía -0.04816621 2.32260765 0.68939050
## Aragón 0.33272628 1.11344816 -1.14361581
## Principado De Asturias 0.05930880 -0.09737213 0.22411416
## Illes Balears 0.49939948 -1.91582529 1.02309084
## Canarias -0.51231245 0.30097246 2.27104783
## Cantabria -0.86425896 -0.09353955 0.16240154
## Castilla y León -0.20328468 0.79738925 -0.91912771
## Castilla-La Mancha -1.06898883 0.59319806 -0.03490738
## Cataluña 2.12214606 1.81338475 -0.70100040
## Comunidad Valenciana 0.47595380 0.47843217 0.25445306
## Extremadura -1.79058494 -1.17621866 0.42448064
## Galicia -0.88725827 -0.79239870 -0.17242308
## Comunidad de Madrid 2.07887945 -0.59067808 1.20201229
## Región de Murcia -0.49005520 0.99502553 -0.20670283
## Comunidad Foral De Navarra 0.24425747 -1.74480023 -1.38041205
## País Vasco 0.75688615 -1.46883530 -0.65519267
## La Rioja -0.70464796 -0.53479011 -1.03760891
ep2<-fa(r = data2,nfactors=3,rotate = 'varimax', fm = 'pa', max.iter = 50)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
ep2
## Factor Analysis using method = pa
## Call: fa(r = data2, nfactors = 3, rotate = "varimax", max.iter = 50,
## fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA1 PA3 h2 u2 com
## proppobla 0.90 -0.06 0.33 0.92 0.084 1.3
## propSuperf 0.49 -0.39 -0.42 0.56 0.436 2.9
## ratioPobsup 0.18 0.29 0.79 0.75 0.252 1.4
## autoabas 0.90 0.14 0.07 0.83 0.168 1.1
## proporcionvtasT 0.97 0.19 0.03 0.98 0.022 1.1
## proporcioncomprasT 0.97 0.18 0.10 0.99 0.012 1.1
## Gastoporhogar 0.37 0.66 0.53 0.86 0.144 2.6
## Gastoporunidadconsumo 0.33 0.80 0.47 0.96 0.035 2.0
## indicegastopersona 0.30 0.85 0.41 0.97 0.033 1.7
## pibpercap 0.07 0.91 0.28 0.91 0.089 1.2
## templeoAgri -0.05 -0.57 -0.55 0.63 0.373 2.0
## tempServ 0.08 0.07 0.92 0.85 0.147 1.0
## tbuscandoempleo 0.11 -0.85 0.16 0.75 0.247 1.1
##
## PA2 PA1 PA3
## SS loadings 4.13 3.99 2.84
## Proportion Var 0.32 0.31 0.22
## Cumulative Var 0.32 0.62 0.84
## Proportion Explained 0.38 0.36 0.26
## Cumulative Proportion 0.38 0.74 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 factors are sufficient.
##
## The degrees of freedom for the null model are 78 and the objective function was 27.84 with Chi Square of 301.6
## The degrees of freedom for the model are 42 and the objective function was 9.98
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 17 with the empirical chi square 3.46 with prob < 1
## The total number of observations was 17 with Likelihood Chi Square = 88.11 with prob < 4.1e-05
##
## Tucker Lewis Index of factoring reliability = 0.49
## RMSEA index = 0.247 and the 90 % confidence intervals are 0.185 0.339
## BIC = -30.88
## Fit based upon off diagonal values = 1
ep2$scores
## PA2 PA1 PA3
## Andalucía 1.85695010 -1.556005362 -0.05166419
## Aragón 1.21000772 0.333552313 -1.04065118
## Principado De Asturias -0.06866104 -0.041963879 0.23822409
## Illes Balears -1.42017893 0.674583345 1.57940823
## Canarias -0.22154826 -1.785756150 1.50754703
## Cantabria -0.56013077 -0.617770792 -0.29441505
## Castilla y León 0.63612471 0.004939246 -1.05701040
## Castilla-La Mancha -0.07959823 -0.973635025 -0.73589303
## Cataluña 2.72596406 0.920164176 0.07541590
## Comunidad Valenciana 0.63585367 -0.064410200 0.33424117
## Extremadura -1.98972144 -0.852726897 -0.28936826
## Galicia -1.12818444 -0.101310119 -0.40222975
## Comunidad de Madrid 0.52775839 0.969848529 2.21276723
## Región de Murcia 0.58470857 -0.688597977 -0.67594724
## Comunidad Foral De Navarra -1.20793534 1.800460667 -0.55566228
## País Vasco -0.76216897 1.588901372 0.23264158
## La Rioja -0.73923978 0.389726755 -1.07740386
Diagrama de arbol
fa.diagram(ep)
fa.diagram(ep2)
par(mfrow = c(1,2))
plot(ep$loadings[,1],
ep$loadings[,2],
xlab = "Factor 1",
ylab = "Factor 2",
ylim = c(-1,1),
xlim = c(-1,1),
main = "No rotation")
text(ep$loadings[,1]-0.08,
ep$loadings[,2]+0.08,
colnames(data2),
col="blue")
abline(h = 0, v = 0)
plot(ep2$loadings[,1],
ep2$loadings[,2],
xlab = "Factor 1",
ylab = "Factor 2",
ylim = c(-1,1),
xlim = c(-1,1),
main = "Varimax rotation")
text(ep2$loadings[,1]-0.08,
ep2$loadings[,2]+0.08,
colnames(data2),
col="blue")
abline(h = 0, v = 0)
COMPARACION CP, ML Y EP
ML vs EP
EP vs CP
fa.diagram(ep2)
fa.diagram(matriz.correlaciones.rotada)
ep2$scores
## PA2 PA1 PA3
## Andalucía 1.85695010 -1.556005362 -0.05166419
## Aragón 1.21000772 0.333552313 -1.04065118
## Principado De Asturias -0.06866104 -0.041963879 0.23822409
## Illes Balears -1.42017893 0.674583345 1.57940823
## Canarias -0.22154826 -1.785756150 1.50754703
## Cantabria -0.56013077 -0.617770792 -0.29441505
## Castilla y León 0.63612471 0.004939246 -1.05701040
## Castilla-La Mancha -0.07959823 -0.973635025 -0.73589303
## Cataluña 2.72596406 0.920164176 0.07541590
## Comunidad Valenciana 0.63585367 -0.064410200 0.33424117
## Extremadura -1.98972144 -0.852726897 -0.28936826
## Galicia -1.12818444 -0.101310119 -0.40222975
## Comunidad de Madrid 0.52775839 0.969848529 2.21276723
## Región de Murcia 0.58470857 -0.688597977 -0.67594724
## Comunidad Foral De Navarra -1.20793534 1.800460667 -0.55566228
## País Vasco -0.76216897 1.588901372 0.23264158
## La Rioja -0.73923978 0.389726755 -1.07740386
punt.rotadas
## [,1] [,2] [,3] [,4]
## [1,] 1.29311678 2.01113651 -0.52387924 0.1382764
## [2,] -0.12688179 0.02437965 1.15350042 0.2184912
## [3,] -0.10877475 -0.81407429 -0.25970503 -0.8698561
## [4,] -0.89983394 -0.83646350 -1.26836513 2.5707869
## [5,] 0.68628581 -0.74875045 -2.01947746 -1.0971457
## [6,] -0.13598655 -0.94759194 0.11670472 1.6618826
## [7,] 0.56371253 0.79879441 0.81397642 1.1079817
## [8,] 1.32703766 0.27054828 0.15687706 0.5575134
## [9,] -1.09866540 2.31890528 0.26369528 -0.2017761
## [10,] -0.22803274 0.66719894 -0.07846606 -0.5958250
## [11,] 1.73744030 -0.49489401 -0.37094454 -0.4359671
## [12,] 0.03683842 0.09470207 0.46981428 0.2916004
## [13,] -1.76365018 0.22532298 -1.80407499 -0.7229765
## [14,] 0.86967997 -0.62156430 0.04927999 -0.8188107
## [15,] -0.99175772 -0.79755167 1.53486360 -0.8655108
## [16,] -1.32826604 -0.02206066 0.47503043 -0.5396868
## [17,] 0.16773764 -1.12803730 1.29117024 -0.3989778
Análisis cluster
df <- scale(ep2$scores)
d <- dist(df, method = "euclidean")
res.hc <- hclust(d, method = "ward.D" )
fviz_dend(res.hc, cex = 0.5, k = 5, rect = TRUE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <]8;;https://github.com/kassambara/factoextra/issueshttps://github.com/kassambara/factoextra/issues]8;;>.
ward_factor <- cutree(res.hc,5)
añadimos resultadoa a la base de datos
data4<-cbind(ward_factor,comaut)
analisis ANOVA
summary(aov(proppobla~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 0.00938 0.009380 3.692 0.0739 .
## Residuals 15 0.03811 0.002541
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(propSuperf~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 0.01179 0.011793 3.826 0.0693 .
## Residuals 15 0.04624 0.003082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(ratioPobsup~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 0.00 0.000 0 0.998
## Residuals 15 69.93 4.662
summary(aov(autoabas~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 2.000e+08 200027215 1.477 0.243
## Residuals 15 2.031e+09 135416915
summary(aov(proporcionvtasT~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 0.003425 0.003425 1.649 0.219
## Residuals 15 0.031164 0.002078
summary(aov(proporcioncomprasT~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 0.003514 0.003514 1.986 0.179
## Residuals 15 0.026539 0.001769
summary(aov(Gastoporhogar~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 1716254 1716254 0.234 0.636
## Residuals 15 109986503 7332434
summary(aov(templeoAgri~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 5.99 5.992 0.398 0.538
## Residuals 15 225.87 15.058
summary(aov(tempServ~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 32.1 32.08 0.901 0.358
## Residuals 15 534.3 35.62
summary(aov(tbuscandoempleo~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 21.34 21.335 8.261 0.0116 *
## Residuals 15 38.74 2.583
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(pibpercap~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 89083644 89083644 4.26 0.0568 .
## Residuals 15 313686050 20912403
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(Gastoporunidadconsumo~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 1555409 1555409 0.579 0.458
## Residuals 15 40260869 2684058
summary(aov(indicegastopersona~ward_factor, data = data4))
## Df Sum Sq Mean Sq F value Pr(>F)
## ward_factor 1 97.2 97.21 0.863 0.368
## Residuals 15 1689.2 112.61
ward_factor1<-as.factor(data4$ward_factor)
RatiopobSupWard<-TukeyHSD(aov(data4$ratioPobsup~ward_factor1)); RatiopobSupWard
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$ratioPobsup ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 -0.90487592 -5.1116323 3.3018805 0.9558668
## 3-1 3.61372162 -1.6330255 8.8604687 0.2451875
## 4-1 -1.29716414 -5.8409804 3.2466521 0.8876976
## 5-1 -0.09569192 -5.3424390 5.1510552 0.9999970
## 3-2 4.51859754 0.3118411 8.7253540 0.0334004
## 4-2 -0.39228822 -3.6808610 2.8962846 0.9949448
## 5-2 0.80918400 -3.3975724 5.0159404 0.9702041
## 4-3 -4.91088576 -9.4547020 -0.3670695 0.0322072
## 5-3 -3.70941355 -8.9561606 1.5373335 0.2252484
## 5-4 1.20147221 -3.3423440 5.7452885 0.9118976
tempServ <- TukeyHSD(aov(data4$tempServ~ward_factor1)); tempServ
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$tempServ ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 -5.5857143 -15.537609 4.366180 0.4221862
## 3-1 7.6500000 -4.762193 20.062193 0.3377914
## 4-1 -7.5500000 -18.299275 3.199275 0.2301732
## 5-1 -6.5500000 -18.962193 5.862193 0.4787707
## 3-2 13.2357143 3.283820 23.187609 0.0082615
## 4-2 -1.9642857 -9.744040 5.815468 0.9241834
## 5-2 -0.9642857 -10.916180 8.987609 0.9977325
## 4-3 -15.2000000 -25.949275 -4.450725 0.0052567
## 5-3 -14.2000000 -26.612193 -1.787807 0.0227803
## 5-4 1.0000000 -9.749275 11.749275 0.9980644
tbuscandoempleo <- TukeyHSD(aov(data4$tbuscandoempleo~ward_factor1)); tbuscandoempleo
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$tbuscandoempleo ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 -2.9928571 -6.796167 0.81045283 0.1528648
## 3-1 -4.7000000 -9.443561 0.04356095 0.0525468
## 4-1 -3.6000000 -7.708044 0.50804429 0.0967086
## 5-1 -5.1500000 -9.893561 -0.40643905 0.0313561
## 3-2 -1.7071429 -5.510453 2.09616711 0.6212725
## 4-2 -0.6071429 -3.580327 2.36604131 0.9631840
## 5-2 -2.1571429 -5.960453 1.64616711 0.4126049
## 4-3 1.1000000 -3.008044 5.20804429 0.9082631
## 5-3 -0.4500000 -5.193561 4.29356095 0.9979115
## 5-4 -1.5500000 -5.658044 2.55804429 0.7501312
pibpercap <- TukeyHSD(aov(data4$pibpercap~ward_factor1)); pibpercap
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$pibpercap ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 4208.3571 -4807.9910 13224.705 0.5883125
## 3-1 11362.0000 116.6381 22607.362 0.0472671
## 4-1 3234.0000 -6504.7691 12972.769 0.8236133
## 5-1 12311.5000 1066.1381 23556.862 0.0298295
## 3-2 7153.6429 -1862.7053 16169.991 0.1479728
## 4-2 -974.3571 -8022.7607 6074.046 0.9911513
## 5-2 8103.1429 -913.2053 17119.491 0.0859797
## 4-3 -8128.0000 -17866.7691 1610.769 0.1200561
## 5-3 949.5000 -10295.8619 12194.862 0.9986740
## 5-4 9077.5000 -661.2691 18816.269 0.0720286
Gastoporunidadconsumo <- TukeyHSD(aov(data4$Gastoporunidadconsumo~ward_factor1)); Gastoporunidadconsumo
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$Gastoporunidadconsumo ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 1000.501 -1902.8770 3903.8798 0.8042178
## 3-1 3147.715 -473.4341 6768.8641 0.1003464
## 4-1 -373.465 -3509.4721 2762.5421 0.9949769
## 5-1 2947.595 -673.5541 6568.7441 0.1333735
## 3-2 2147.214 -756.1648 5050.5920 0.1929032
## 4-2 -1373.966 -3643.6417 895.7089 0.3536853
## 5-2 1947.094 -956.2848 4850.4720 0.2663381
## 4-3 -3521.180 -6657.1871 -385.1729 0.0255877
## 5-3 -200.120 -3821.2691 3421.0291 0.9997504
## 5-4 3321.060 185.0529 6457.0671 0.0362752
indicegastopersona <- TukeyHSD(aov(data4$indicegastopersona~ward_factor1)); indicegastopersona
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data4$indicegastopersona ~ ward_factor1)
##
## $ward_factor1
## diff lwr upr p adj
## 2-1 8.808571 -10.5834085 28.2005514 0.6113520
## 3-1 21.200000 -2.9860483 45.3860483 0.0966029
## 4-1 0.032500 -20.9132323 20.9782323 1.0000000
## 5-1 21.565000 -2.6210483 45.7510483 0.0892501
## 3-2 12.391429 -7.0005514 31.7834085 0.3064903
## 4-2 -8.776071 -23.9354795 6.3833366 0.3939078
## 5-2 12.756429 -6.6355514 32.1484085 0.2821013
## 4-3 -21.167500 -42.1132323 -0.2217677 0.0472118
## 5-3 0.365000 -23.8210483 24.5510483 0.9999986
## 5-4 21.532500 0.5867677 42.4782323 0.0429485