1. Cargar librerías necesarias
library(pacman)
p_load(haven, dplyr, factoextra, FactoMineR, readr, rgl, fpc, psych, readxl)
3. Análisis PCA para data_pca
3.1 Preparación de los datos
data_pca_clean <- data_pca %>% select(-y)
data_pca_scaled <- scale(data_pca_clean)
3.2 Diagnóstico para realizar PCA
psych::KMO(data_pca_scaled)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = data_pca_scaled)
## Overall MSA = 0.34
## MSA for each item =
## x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
## 0.36 0.27 0.24 0.46 0.53 0.55 0.45 0.27 0.42 0.26 0.43 0.46 0.28 0.62 0.33
det(cor(data_pca_scaled))
## [1] 0.004667778
3.3 Realizar PCA
pca_data_pca <- princomp(data_pca_scaled)
summary(pca_data_pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.6220588 1.4501268 1.3332930 1.2434264 1.15529908
## Proportion of Variance 0.1762864 0.1408957 0.1191069 0.1035919 0.08942821
## Cumulative Proportion 0.1762864 0.3171821 0.4362890 0.5398809 0.62930907
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 1.05569426 0.90471763 0.88908929 0.8622762 0.80999883
## Proportion of Variance 0.07467272 0.05484181 0.05296347 0.0498171 0.04395967
## Cumulative Proportion 0.70398179 0.75882360 0.81178707 0.8616042 0.90556384
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## Standard deviation 0.7012045 0.59518243 0.53958339 0.4662240 0.234552581
## Proportion of Variance 0.0329439 0.02373482 0.01950755 0.0145638 0.003686091
## Cumulative Proportion 0.9385077 0.96224255 0.98175010 0.9963139 1.000000000
fviz_eig(pca_data_pca, choice = "variance")

fviz_eig(pca_data_pca, choice = "eigenvalue")

3.4 Representaciones gráficas
fviz_pca_ind(pca_data_pca, col.ind = "cos2", gradient.cols = c("red", "yellow", "green"), repel = FALSE)

fviz_pca_var(pca_data_pca, col.var = "contrib", gradient.cols = c("red", "yellow", "green"), repel = FALSE)

fviz_pca_biplot(pca_data_pca, col.var = "red", col.ind = "black")

3.5 Rotación Varimax
pca_data_pca_rot <- psych::principal(data_pca_scaled, nfactors = 2, rotate = "varimax", scores = TRUE)
pca_data_pca_rot
## Principal Components Analysis
## Call: psych::principal(r = data_pca_scaled, nfactors = 2, rotate = "varimax",
## scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 h2 u2 com
## x1 0.04 0.52 0.269 0.73 1.0
## x2 -0.73 0.00 0.533 0.47 1.0
## x3 -0.51 0.08 0.265 0.73 1.0
## x4 0.09 -0.45 0.206 0.79 1.1
## x5 -0.14 -0.70 0.503 0.50 1.1
## x6 0.22 0.03 0.048 0.95 1.0
## x7 -0.01 0.53 0.278 0.72 1.0
## x8 0.71 -0.11 0.519 0.48 1.0
## x9 0.09 0.42 0.181 0.82 1.1
## x10 -0.64 -0.08 0.419 0.58 1.0
## x11 -0.02 0.20 0.042 0.96 1.0
## x12 -0.02 0.51 0.258 0.74 1.0
## x13 0.12 0.12 0.030 0.97 2.0
## x14 0.05 0.66 0.436 0.56 1.0
## x15 0.87 0.10 0.768 0.23 1.0
##
## RC1 RC2
## SS loadings 2.57 2.18
## Proportion Var 0.17 0.15
## Cumulative Var 0.17 0.32
## Proportion Explained 0.54 0.46
## Cumulative Proportion 0.54 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.14
## with the empirical chi square 770.91 with prob < 1.5e-115
##
## Fit based upon off diagonal values = 0.51
4. Análisis PCA para PoblacionUSA (años 2000 y
2001)
4.1 Filtrar datos
# Seleccionar las columnas del año 2000 y 2001 (excepto el nombre del estado)
vars_2000 <- grep("2000", names(poblacion), value = TRUE)
vars_2001 <- grep("2001", names(poblacion), value = TRUE)
poblacion_2000 <- poblacion[, vars_2000]
poblacion_2001 <- poblacion[, vars_2001]
# Escalamos los datos
poblacion_2000_scaled <- scale(poblacion_2000)
poblacion_2001_scaled <- scale(poblacion_2001)
## 4.2 Diagnóstico PCA (año 2000)
psych::KMO(poblacion_2000_scaled)
## Error in solve.default(r) :
## sistema es computacionalmente singular: número de condición recíproco = 3.40445e-18
## matrix is not invertible, image not found
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = poblacion_2000_scaled)
## Overall MSA = 0.5
## MSA for each item =
## census resident total population - ab:qr-1-2000
## 0.5
## resident total population estimate - jul-1-2000
## 0.5
## net domestic migration - jul-1-2000
## 0.5
## federal/civilian movement from abroad - jul-1-2000
## 0.5
## net international migration - jul-1-2000
## 0.5
## period births - jul-1-2000
## 0.5
## period deaths - jul-1-2000
## 0.5
## resident under 65 population estimate - jul-1-2000
## 0.5
## resident 65 plus population estimate - jul-1-2000
## 0.5
## residual - jul-1-2000
## 0.5
det(cor(poblacion_2000_scaled))
## [1] -8.80948e-41
4.3 Realizar PCA año 2000
pca_poblacion_2000 <- princomp(poblacion_2000_scaled)
summary(pca_poblacion_2000)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.6907367 1.1607536 0.8280683 0.6125685 0.33421800
## Proportion of Variance 0.7384865 0.1374296 0.0699411 0.0382745 0.01139357
## Cumulative Proportion 0.7384865 0.8759161 0.9458572 0.9841317 0.99552529
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 0.204077430 0.0391252568 2.629360e-02 0 0
## Proportion of Variance 0.004248055 0.0001561401 7.051803e-05 0 0
## Cumulative Proportion 0.999773342 0.9999294820 1.000000e+00 1 1
fviz_eig(pca_poblacion_2000, choice = "variance")

fviz_eig(pca_poblacion_2000, choice = "eigenvalue")

fviz_pca_ind(pca_poblacion_2000, col.ind = "cos2", gradient.cols = c("red", "yellow", "green"))

fviz_pca_var(pca_poblacion_2000, col.var = "contrib", gradient.cols = c("red", "yellow", "green"))

fviz_pca_biplot(pca_poblacion_2000, col.var = "blue", col.ind = "black")

4.4 Diagnóstico PCA (año 2001)
psych::KMO(poblacion_2001_scaled)
## Error in solve.default(r) :
## sistema es computacionalmente singular: número de condición recíproco = 4.27828e-18
## matrix is not invertible, image not found
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = poblacion_2001_scaled)
## Overall MSA = 0.5
## MSA for each item =
## resident total population estimate - jul-1-2001
## 0.5
## net domestic migration - jul-1-2001
## 0.5
## federal/civilian movement from abroad - jul-1-2001
## 0.5
## net international migration - jul-1-2001
## 0.5
## period births - jul-1-2001
## 0.5
## period deaths - jul-1-2001
## 0.5
## resident under 65 population estimate - jul-1-2001
## 0.5
## resident 65 plus population estimate - jul-1-2001
## 0.5
## residual - jul-1-2001
## 0.5
det(cor(poblacion_2001_scaled))
## [1] -4.747386e-25
4.5 Realizar PCA año 2001
pca_poblacion_2001 <- princomp(poblacion_2001_scaled)
summary(pca_poblacion_2001)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.5056381 1.2841609 0.66974585 0.55374490 0.31095905
## Proportion of Variance 0.7115319 0.1868945 0.05083674 0.03475179 0.01095883
## Cumulative Proportion 0.7115319 0.8984264 0.94926311 0.98401490 0.99497372
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.205937451 0.0345666035 2.728378e-02 0
## Proportion of Variance 0.004806493 0.0001354163 8.436586e-05 0
## Cumulative Proportion 0.999780218 0.9999156341 1.000000e+00 1
fviz_eig(pca_poblacion_2001, choice = "variance")

fviz_eig(pca_poblacion_2001, choice = "eigenvalue")

fviz_pca_ind(pca_poblacion_2001, col.ind = "cos2", gradient.cols = c("red", "yellow", "green"))

fviz_pca_var(pca_poblacion_2001, col.var = "contrib", gradient.cols = c("red", "yellow", "green"))

fviz_pca_biplot(pca_poblacion_2001, col.var = "blue", col.ind = "black")

5. Análisis PCA para Covid
5.1 Preparación de los datos
covid_clean <- covid %>% select(where(is.numeric))
covid_scaled <- scale(covid_clean)
5.2 Diagnóstico
psych::KMO(covid_scaled)
## Error in solve.default(r) :
## sistema es computacionalmente singular: número de condición recíproco = 2.50339e-19
## matrix is not invertible, image not found
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = covid_scaled)
## Overall MSA = 0.5
## MSA for each item =
## census resident total population - ab:qr-1-2000
## 0.5
## resident total population estimate - jul-1-2000
## 0.5
## resident total population estimate - jul-1-2001
## 0.5
## net domestic migration - jul-1-2000
## 0.5
## net domestic migration - jul-1-2001
## 0.5
## federal/civilian movement from abroad - jul-1-2000
## 0.5
## federal/civilian movement from abroad - jul-1-2001
## 0.5
## net international migration - jul-1-2000
## 0.5
## net international migration - jul-1-2001
## 0.5
## period births - jul-1-2000
## 0.5
## period births - jul-1-2001
## 0.5
## period deaths - jul-1-2000
## 0.5
## period deaths - jul-1-2001
## 0.5
## resident under 65 population estimate - jul-1-2000
## 0.5
## resident under 65 population estimate - jul-1-2001
## 0.5
## resident 65 plus population estimate - jul-1-2000
## 0.5
## resident 65 plus population estimate - jul-1-2001
## 0.5
## residual - jul-1-2000
## 0.5
## residual - jul-1-2001
## 0.5
det(cor(covid_scaled))
## [1] -8.802968e-97
5.3 PCA
pca_covid <- princomp(covid_scaled)
summary(pca_covid)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 3.6751242 1.6920441 1.02094924 0.82169640 0.56482426
## Proportion of Variance 0.7250878 0.1536986 0.05595706 0.03624677 0.01712668
## Cumulative Proportion 0.7250878 0.8787864 0.93474349 0.97099026 0.98811694
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.378643593 0.25348098 0.0959218615 0.0501359223
## Proportion of Variance 0.007696757 0.00344935 0.0004939486 0.0001349412
## Cumulative Proportion 0.995813698 0.99926305 0.9997569965 0.9998919377
## Comp.10 Comp.11 Comp.12 Comp.13
## Standard deviation 0.036357133 2.279070e-02 1.157980e-02 4.658309e-03
## Proportion of Variance 0.000070962 2.788445e-05 7.198617e-06 1.164939e-06
## Cumulative Proportion 0.999962900 9.999908e-01 9.999980e-01 9.999991e-01
## Comp.14 Comp.15 Comp.16 Comp.17 Comp.18
## Standard deviation 3.633014e-03 1.636124e-03 1.794793e-08 0 0
## Proportion of Variance 7.085667e-07 1.437074e-07 1.729319e-17 0 0
## Cumulative Proportion 9.999999e-01 1.000000e+00 1.000000e+00 1 1
## Comp.19
## Standard deviation 0
## Proportion of Variance 0
## Cumulative Proportion 1
fviz_eig(pca_covid, choice = "variance")

fviz_pca_ind(pca_covid, col.ind = "cos2", gradient.cols = c("red", "yellow", "green"))

fviz_pca_var(pca_covid, col.var = "contrib", gradient.cols = c("red", "yellow", "green"))

fviz_pca_biplot(pca_covid, col.var = "blue", col.ind = "black")
