poblacion <- read_excel("C:/Users/MIRZ/Downloads/PoblacionUSA.xlsm")
View(poblacion)
Año 2000 #########33
poblacion_2000 <- poblacion %>%
select(State, contains("AB:Qr-1-2000"), contains("Jul-1-2000"), contains("2000 - AB"))
poblacion_2000 <- scale(poblacion_2000[,-1])
View(poblacion_2000)
psych::KMO(poblacion_2000)
## Error in solve.default(r) :
## sistema es computacionalmente singular: número de condición recíproco = 3.40445e-18
## matrix is not invertible, image not found
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = poblacion_2000)
## Overall MSA = 0.5
## MSA for each item =
## Census Resident Total Population - AB:Qr-1-2000
## 0.5
## Resident Total Population Estimate - Jul-1-2000
## 0.5
## Net Domestic Migration - Jul-1-2000
## 0.5
## Federal/Civilian Movement from Abroad - Jul-1-2000
## 0.5
## Net International Migration - Jul-1-2000
## 0.5
## Period Births - Jul-1-2000
## 0.5
## Period Deaths - Jul-1-2000
## 0.5
## Resident Under 65 Population Estimate - Jul-1-2000
## 0.5
## Resident 65 Plus Population Estimate - Jul-1-2000
## 0.5
## Residual - Jul-1-2000
## 0.5
Nota: #El índice KMO para todas las variables es igual a 0.5, lo cual es aceptable para realizar un Análisis de Componentes Principales (PCA).
pca <- princomp(poblacion_2000)
summary(pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.6907367 1.1607536 0.8280683 0.6125685 0.33421800
## Proportion of Variance 0.7384865 0.1374296 0.0699411 0.0382745 0.01139357
## Cumulative Proportion 0.7384865 0.8759161 0.9458572 0.9841317 0.99552529
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 0.204077430 0.0391252568 2.629360e-02 0 0
## Proportion of Variance 0.004248055 0.0001561401 7.051803e-05 0 0
## Cumulative Proportion 0.999773342 0.9999294820 1.000000e+00 1 1
Nota: #Las primeras dos componentes principales explican la mayor parte de la varianza (>10%).
fviz_eig(pca, choice="variance")
> Nota: #Solo los componentes 1 y 2 tienen varianza mayor al
10%.
fviz_eig(pca, choice="eigenvalue")
> Nota: #Confirmamos que solo dos componentes tienen
eigenvalores mayores a 1.
fviz_pca_ind(pca, col.ind="cos2", gradient.cols=c("red", "yellow", "green"), repel=FALSE)
> Nota: #las observaciones en color rojo no son tan bien
representadas, pero son la minoria (solo son 2).
fviz_pca_var(pca, col.ind="contrib", gradient.cols=c("red", "yellow", "green"), repel=FALSE)
> Nota: #Las variables están representadas principalmente en
el primer cuadrante, con diferentes niveles de contribución.
fviz_pca_biplot(pca, col.var="red", col.ind="black")
> Nota: #Las observaciones se encuentran agrupadas
mayormente en el cuadrante 2
det(cor(poblacion_2000))
## [1] -8.80948e-41
pca2 <- psych::principal(poblacion_2000, nfactors=2, residuals = FALSE, rotate = "varimax",
scores=TRUE, oblique.scores = FALSE, method = "regression",
use = "pairwise", cor = "cor", weight = NULL)
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
## Warning in psych::principal(poblacion_2000, nfactors = 2, residuals = FALSE, :
## The matrix is not positive semi-definite, scores found from Structure loadings
pca2
## Principal Components Analysis
## Call: psych::principal(r = poblacion_2000, nfactors = 2, residuals = FALSE,
## rotate = "varimax", scores = TRUE, oblique.scores = FALSE,
## method = "regression", use = "pairwise", cor = "cor", weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## Census Resident Total Population - AB:Qr-1-2000 1.00 -0.02 0.99 0.0059 1.0
## Resident Total Population Estimate - Jul-1-2000 1.00 -0.02 0.99 0.0058 1.0
## Net Domestic Migration - Jul-1-2000 -0.26 0.77 0.66 0.3421 1.2
## Federal/Civilian Movement from Abroad - Jul-1-2000 0.74 0.42 0.73 0.2692 1.6
## Net International Migration - Jul-1-2000 0.94 0.04 0.89 0.1128 1.0
## Period Births - Jul-1-2000 0.99 0.05 0.99 0.0142 1.0
## Period Deaths - Jul-1-2000 0.97 -0.08 0.94 0.0563 1.0
## Resident Under 65 Population Estimate - Jul-1-2000 1.00 -0.01 0.99 0.0061 1.0
## Resident 65 Plus Population Estimate - Jul-1-2000 0.97 -0.07 0.94 0.0623 1.0
## Residual - Jul-1-2000 0.20 0.77 0.63 0.3663 1.1
##
## RC1 RC2
## SS loadings 7.38 1.38
## Proportion Var 0.74 0.14
## Cumulative Var 0.74 0.88
## Proportion Explained 0.84 0.16
## Cumulative Proportion 0.84 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.06
## with the empirical chi square 15.64 with prob < 0.94
##
## Fit based upon off diagonal values = 0.99
Año 2001
poblacion_2001 <- poblacion %>%
select(State, contains("Jul-1-2001"))
poblacion_2001 <- scale(poblacion_2001[,-1])
View(poblacion_2001)
psych::KMO(poblacion_2001)
## Error in solve.default(r) :
## sistema es computacionalmente singular: número de condición recíproco = 4.27828e-18
## matrix is not invertible, image not found
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = poblacion_2001)
## Overall MSA = 0.5
## MSA for each item =
## Resident Total Population Estimate - Jul-1-2001
## 0.5
## Net Domestic Migration - Jul-1-2001
## 0.5
## Federal/Civilian Movement from Abroad - Jul-1-2001
## 0.5
## Net International Migration - Jul-1-2001
## 0.5
## Period Births - Jul-1-2001
## 0.5
## Period Deaths - Jul-1-2001
## 0.5
## Resident Under 65 Population Estimate - Jul-1-2001
## 0.5
## Resident 65 Plus Population Estimate - Jul-1-2001
## 0.5
## Residual - Jul-1-2001
## 0.5
pca <- princomp(poblacion_2001)
summary(pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.5056381 1.2841609 0.66974585 0.55374490 0.31095905
## Proportion of Variance 0.7115319 0.1868945 0.05083674 0.03475179 0.01095883
## Cumulative Proportion 0.7115319 0.8984264 0.94926311 0.98401490 0.99497372
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.205937451 0.0345666035 2.728378e-02 0
## Proportion of Variance 0.004806493 0.0001354163 8.436586e-05 0
## Cumulative Proportion 0.999780218 0.9999156341 1.000000e+00 1
Nota: #Las primeras dos componentes principales explican la mayor parte de la varianza (>11%)
fviz_eig(pca, choice="variance")
fviz_eig(pca, choice="eigenvalue")
fviz_pca_ind(pca, col.ind="cos2", gradient.cols=c("red", "yellow", "green"), repel=FALSE)
fviz_pca_var(pca, col.ind="contrib", gradient.cols=c("red", "yellow", "green"), repel=FALSE)
fviz_pca_biplot(pca, col.var="red", col.ind="black")
det(cor(poblacion_2001))
## [1] -4.747386e-25
pca2 <- psych::principal(poblacion_2001, nfactors=2, residuals = FALSE, rotate = "varimax",
scores=TRUE, oblique.scores = FALSE, method = "regression",
use = "pairwise", cor = "cor", weight = NULL)
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in psych::principal(poblacion_2001, nfactors = 2, residuals = FALSE, :
## The matrix is not positive semi-definite, scores found from Structure loadings
pca2
## Principal Components Analysis
## Call: psych::principal(r = poblacion_2001, nfactors = 2, residuals = FALSE,
## rotate = "varimax", scores = TRUE, oblique.scores = FALSE,
## method = "regression", use = "pairwise", cor = "cor", weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## Resident Total Population Estimate - Jul-1-2001 1.00 0.03 0.99 0.0068 1.0
## Net Domestic Migration - Jul-1-2001 -0.22 0.87 0.81 0.1884 1.1
## Federal/Civilian Movement from Abroad - Jul-1-2001 -0.70 -0.47 0.72 0.2839 1.8
## Net International Migration - Jul-1-2001 0.94 0.08 0.89 0.1088 1.0
## Period Births - Jul-1-2001 0.99 0.06 0.98 0.0178 1.0
## Period Deaths - Jul-1-2001 0.97 0.00 0.93 0.0651 1.0
## Resident Under 65 Population Estimate - Jul-1-2001 1.00 0.03 0.99 0.0068 1.0
## Resident 65 Plus Population Estimate - Jul-1-2001 0.96 0.04 0.93 0.0676 1.0
## Residual - Jul-1-2001 0.27 0.87 0.83 0.1691 1.2
##
## RC1 RC2
## SS loadings 6.32 1.76
## Proportion Var 0.70 0.20
## Cumulative Var 0.70 0.90
## Proportion Explained 0.78 0.22
## Cumulative Proportion 0.78 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.05
## with the empirical chi square 7.81 with prob < 0.99
##
## Fit based upon off diagonal values = 1
# Pesos de los componentes
pca2$weights
##
## Loadings:
## RC1 RC2
## Resident Total Population Estimate - Jul-1-2001 0.996
## Net Domestic Migration - Jul-1-2001 -0.217 0.874
## Federal/Civilian Movement from Abroad - Jul-1-2001 -0.702 -0.473
## Net International Migration - Jul-1-2001 0.941
## Period Births - Jul-1-2001 0.989
## Period Deaths - Jul-1-2001 0.967
## Resident Under 65 Population Estimate - Jul-1-2001 0.996
## Resident 65 Plus Population Estimate - Jul-1-2001 0.965
## Residual - Jul-1-2001 0.268 0.871
##
## RC1 RC2
## SS loadings 6.324 1.761
## Proportion Var 0.703 0.196
## Cumulative Var 0.703 0.898
pca2$weights[1:2,]
## RC1 RC2
## Resident Total Population Estimate - Jul-1-2001 0.9960902 0.03160348
## Net Domestic Migration - Jul-1-2001 -0.2169152 0.87440525
pca2$scores
## RC1 RC2
## [1,] -1.4291841341 -0.895589927
## [2,] -4.5218019005 -0.225123863
## [3,] -0.2824457258 2.003493806
## [4,] -2.9196880400 -0.419243592
## [5,] 30.3149688256 3.109167975
## [6,] -1.1377761088 1.646116160
## [7,] -2.4271045796 -1.296376113
## [8,] -4.6451810435 -0.505020445
## [9,] -4.6828920563 -0.539294950
## [10,] 12.2059011851 7.805516259
## [11,] 3.3391111202 3.886120105
## [12,] -3.5103714637 -0.187796367
## [13,] -4.1291024130 0.007248103
## [14,] 7.1172472823 -0.163081667
## [15,] -0.2891830258 -0.885518029
## [16,] -2.8669196336 -1.076939998
## [17,] -2.5735488746 -0.433211616
## [18,] -1.5666950230 -0.482587488
## [19,] -1.1198693483 -0.798888935
## [20,] -4.2999265633 -0.435013511
## [21,] -0.1783808397 0.020322413
## [22,] -0.0005745319 -1.842910534
## [23,] 2.8756393843 -1.569909740
## [24,] -1.2857362018 -0.219614930
## [25,] -2.5535394289 -0.388290829
## [26,] -0.3704120562 -0.465809778
## [27,] -4.5271735883 -0.432530571
## [28,] -3.7187359157 -0.679554291
## [29,] -3.1357051016 2.047016827
## [30,] -4.4153947011 -0.252894045
## [31,] 2.5732276573 -1.030518910
## [32,] -3.6091967232 -0.620880183
## [33,] 13.4790034088 -3.842743194
## [34,] 3.2249490627 1.629815253
## [35,] -4.6399425997 -0.507231660
## [36,] 4.1329412793 -2.211222124
## [37,] -1.8633577761 -0.438310335
## [38,] -2.5094843875 -0.389091106
## [39,] 4.9957903006 -2.633449571
## [40,] -4.4430947999 -0.606538265
## [41,] -1.4946138247 -0.159111905
## [42,] -4.6090754460 -0.444260502
## [43,] -0.3884583164 -0.260214889
## [44,] 15.6117874789 4.358583262
## [45,] -3.2776626554 -0.499760309
## [46,] -4.8833313888 -0.550632272
## [47,] 3.6316197358 3.006730211
## [48,] 0.6355026177 1.086291021
## [49,] -3.8038639562 -0.733952684
## [50,] -1.1452304803 -0.976110868
## [51,] -4.8830346852 -0.507191398