Análisis Factorial Exploratorio

Autor/a

Daniel Perdomo

Fecha de publicación

19 de mayo de 2025

Descripción de la base de datos

Título: Calidad del Vino

Se crearon dos conjuntos de datos utilizando muestras de vino tinto y blanco. Los dos conjuntos de datos están relacionados con las variantes tinto y blanco del vino portugués “Vinho Verde”. Se utilizará vino blanco para el análisis.

Número de observaciones vino blanco: 4898.
Número de variables: 11 + variable de calidad (ordinal). Valores de perdidos: Ninguno.

Variables de entrada (basadas en pruebas fisicoquímicas):
1 - Acidez fija
2 - Acidez volátil
3 - Ácido cítrico
4 - Azúcar residual
5 - Cloruros
6 - Dióxido de azufre libre
7 - Dióxido de azufre total
8 - Densidad
9 - pH
10 - Sulfatos
11 - Alcohol
12 - Calidad (puntuación entre 0 y 10)

Aplicación R

# Importar librerias
library(tidyverse)
library(stats)
library(ppcor)
library(ggplot2)
library(GGally) 
library(corrplot)
library(tseries)
library(lattice)
library(pacman)
library(MASS)
library(ca)
library(FactoMineR)
library(vegan)
library(gplots)
library(vcd)
library(factoextra)
library(DandEFA)
library(polycor)
library(psych)
library(ade4)
library(GPArotation)

library(reticulate)
# py_install("factor-analyzer")
# py_module_available("factor_analyzer")  # Debe devolver TRUE

options(scipen=999)      # Eliminar la notación científica
options(digits = 4)      # Número de decimales

Exploración inicial R

ruta <- file.choose()
wine_white <- read.csv(ruta, header=TRUE, sep=";")

# estructura
str(wine_white)
'data.frame':   4898 obs. of  12 variables:
 $ fixed.acidity       : num  7 6.3 8.1 7.2 7.2 8.1 6.2 7 6.3 8.1 ...
 $ volatile.acidity    : num  0.27 0.3 0.28 0.23 0.23 0.28 0.32 0.27 0.3 0.22 ...
 $ citric.acid         : num  0.36 0.34 0.4 0.32 0.32 0.4 0.16 0.36 0.34 0.43 ...
 $ residual.sugar      : num  20.7 1.6 6.9 8.5 8.5 6.9 7 20.7 1.6 1.5 ...
 $ chlorides           : num  0.045 0.049 0.05 0.058 0.058 0.05 0.045 0.045 0.049 0.044 ...
 $ free.sulfur.dioxide : num  45 14 30 47 47 30 30 45 14 28 ...
 $ total.sulfur.dioxide: num  170 132 97 186 186 97 136 170 132 129 ...
 $ density             : num  1.001 0.994 0.995 0.996 0.996 ...
 $ pH                  : num  3 3.3 3.26 3.19 3.19 3.26 3.18 3 3.3 3.22 ...
 $ sulphates           : num  0.45 0.49 0.44 0.4 0.4 0.44 0.47 0.45 0.49 0.45 ...
 $ alcohol             : num  8.8 9.5 10.1 9.9 9.9 10.1 9.6 8.8 9.5 11 ...
 $ quality             : int  6 6 6 6 6 6 6 6 6 6 ...
head(wine_white)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.0             0.27        0.36           20.7     0.045
2           6.3             0.30        0.34            1.6     0.049
3           8.1             0.28        0.40            6.9     0.050
4           7.2             0.23        0.32            8.5     0.058
5           7.2             0.23        0.32            8.5     0.058
6           8.1             0.28        0.40            6.9     0.050
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  45                  170  1.0010 3.00      0.45     8.8
2                  14                  132  0.9940 3.30      0.49     9.5
3                  30                   97  0.9951 3.26      0.44    10.1
4                  47                  186  0.9956 3.19      0.40     9.9
5                  47                  186  0.9956 3.19      0.40     9.9
6                  30                   97  0.9951 3.26      0.44    10.1
  quality
1       6
2       6
3       6
4       6
5       6
6       6
tail(wine_white)
     fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
4893           6.5             0.23        0.38            1.3     0.032
4894           6.2             0.21        0.29            1.6     0.039
4895           6.6             0.32        0.36            8.0     0.047
4896           6.5             0.24        0.19            1.2     0.041
4897           5.5             0.29        0.30            1.1     0.022
4898           6.0             0.21        0.38            0.8     0.020
     free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
4893                  29                  112  0.9930 3.29      0.54     9.7
4894                  24                   92  0.9911 3.27      0.50    11.2
4895                  57                  168  0.9949 3.15      0.46     9.6
4896                  30                  111  0.9925 2.99      0.46     9.4
4897                  20                  110  0.9887 3.34      0.38    12.8
4898                  22                   98  0.9894 3.26      0.32    11.8
     quality
4893       5
4894       6
4895       5
4896       6
4897       7
4898       6
# Se omite la variable "quality"
wine_white$quality <- NULL

head(wine_white)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.0             0.27        0.36           20.7     0.045
2           6.3             0.30        0.34            1.6     0.049
3           8.1             0.28        0.40            6.9     0.050
4           7.2             0.23        0.32            8.5     0.058
5           7.2             0.23        0.32            8.5     0.058
6           8.1             0.28        0.40            6.9     0.050
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  45                  170  1.0010 3.00      0.45     8.8
2                  14                  132  0.9940 3.30      0.49     9.5
3                  30                   97  0.9951 3.26      0.44    10.1
4                  47                  186  0.9956 3.19      0.40     9.9
5                  47                  186  0.9956 3.19      0.40     9.9
6                  30                   97  0.9951 3.26      0.44    10.1
tail(wine_white)
     fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
4893           6.5             0.23        0.38            1.3     0.032
4894           6.2             0.21        0.29            1.6     0.039
4895           6.6             0.32        0.36            8.0     0.047
4896           6.5             0.24        0.19            1.2     0.041
4897           5.5             0.29        0.30            1.1     0.022
4898           6.0             0.21        0.38            0.8     0.020
     free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
4893                  29                  112  0.9930 3.29      0.54     9.7
4894                  24                   92  0.9911 3.27      0.50    11.2
4895                  57                  168  0.9949 3.15      0.46     9.6
4896                  30                  111  0.9925 2.99      0.46     9.4
4897                  20                  110  0.9887 3.34      0.38    12.8
4898                  22                   98  0.9894 3.26      0.32    11.8
# resumen
summary(wine_white)
 fixed.acidity   volatile.acidity  citric.acid    residual.sugar 
 Min.   : 3.80   Min.   :0.080    Min.   :0.000   Min.   : 0.60  
 1st Qu.: 6.30   1st Qu.:0.210    1st Qu.:0.270   1st Qu.: 1.70  
 Median : 6.80   Median :0.260    Median :0.320   Median : 5.20  
 Mean   : 6.86   Mean   :0.278    Mean   :0.334   Mean   : 6.39  
 3rd Qu.: 7.30   3rd Qu.:0.320    3rd Qu.:0.390   3rd Qu.: 9.90  
 Max.   :14.20   Max.   :1.100    Max.   :1.660   Max.   :65.80  
   chlorides      free.sulfur.dioxide total.sulfur.dioxide    density     
 Min.   :0.0090   Min.   :  2.0       Min.   :  9          Min.   :0.987  
 1st Qu.:0.0360   1st Qu.: 23.0       1st Qu.:108          1st Qu.:0.992  
 Median :0.0430   Median : 34.0       Median :134          Median :0.994  
 Mean   :0.0458   Mean   : 35.3       Mean   :138          Mean   :0.994  
 3rd Qu.:0.0500   3rd Qu.: 46.0       3rd Qu.:167          3rd Qu.:0.996  
 Max.   :0.3460   Max.   :289.0       Max.   :440          Max.   :1.039  
       pH         sulphates       alcohol    
 Min.   :2.72   Min.   :0.22   Min.   : 8.0  
 1st Qu.:3.09   1st Qu.:0.41   1st Qu.: 9.5  
 Median :3.18   Median :0.47   Median :10.4  
 Mean   :3.19   Mean   :0.49   Mean   :10.5  
 3rd Qu.:3.28   3rd Qu.:0.55   3rd Qu.:11.4  
 Max.   :3.82   Max.   :1.08   Max.   :14.2  
describe(wine_white)
                     vars    n   mean    sd median trimmed   mad  min    max
fixed.acidity           1 4898   6.85  0.84   6.80    6.82  0.74 3.80  14.20
volatile.acidity        2 4898   0.28  0.10   0.26    0.27  0.09 0.08   1.10
citric.acid             3 4898   0.33  0.12   0.32    0.33  0.09 0.00   1.66
residual.sugar          4 4898   6.39  5.07   5.20    5.80  5.34 0.60  65.80
chlorides               5 4898   0.05  0.02   0.04    0.04  0.01 0.01   0.35
free.sulfur.dioxide     6 4898  35.31 17.01  34.00   34.36 16.31 2.00 289.00
total.sulfur.dioxide    7 4898 138.36 42.50 134.00  136.96 43.00 9.00 440.00
density                 8 4898   0.99  0.00   0.99    0.99  0.00 0.99   1.04
pH                      9 4898   3.19  0.15   3.18    3.18  0.15 2.72   3.82
sulphates              10 4898   0.49  0.11   0.47    0.48  0.10 0.22   1.08
alcohol                11 4898  10.51  1.23  10.40   10.43  1.48 8.00  14.20
                      range skew kurtosis   se
fixed.acidity         10.40 0.65     2.17 0.01
volatile.acidity       1.02 1.58     5.08 0.00
citric.acid            1.66 1.28     6.16 0.00
residual.sugar        65.20 1.08     3.46 0.07
chlorides              0.34 5.02    37.51 0.00
free.sulfur.dioxide  287.00 1.41    11.45 0.24
total.sulfur.dioxide 431.00 0.39     0.57 0.61
density                0.05 0.98     9.78 0.00
pH                     1.10 0.46     0.53 0.00
sulphates              0.86 0.98     1.59 0.00
alcohol                6.20 0.49    -0.70 0.02
#correlaciones
cor_ww <- cor(wine_white)
cor_ww
                     fixed.acidity volatile.acidity citric.acid residual.sugar
fixed.acidity              1.00000         -0.02270     0.28918        0.08902
volatile.acidity          -0.02270          1.00000    -0.14947        0.06429
citric.acid                0.28918         -0.14947     1.00000        0.09421
residual.sugar             0.08902          0.06429     0.09421        1.00000
chlorides                  0.02309          0.07051     0.11436        0.08868
free.sulfur.dioxide       -0.04940         -0.09701     0.09408        0.29910
total.sulfur.dioxide       0.09107          0.08926     0.12113        0.40144
density                    0.26533          0.02711     0.14950        0.83897
pH                        -0.42586         -0.03192    -0.16375       -0.19413
sulphates                 -0.01714         -0.03573     0.06233       -0.02666
alcohol                   -0.12088          0.06772    -0.07573       -0.45063
                     chlorides free.sulfur.dioxide total.sulfur.dioxide
fixed.acidity          0.02309          -0.0493959             0.091070
volatile.acidity       0.07051          -0.0970119             0.089261
citric.acid            0.11436           0.0940772             0.121131
residual.sugar         0.08868           0.2990984             0.401439
chlorides              1.00000           0.1013924             0.198910
free.sulfur.dioxide    0.10139           1.0000000             0.615501
total.sulfur.dioxide   0.19891           0.6155010             1.000000
density                0.25721           0.2942104             0.529881
pH                    -0.09044          -0.0006178             0.002321
sulphates              0.01676           0.0592172             0.134562
alcohol               -0.36019          -0.2501039            -0.448892
                      density         pH sulphates  alcohol
fixed.acidity         0.26533 -0.4258583  -0.01714 -0.12088
volatile.acidity      0.02711 -0.0319154  -0.03573  0.06772
citric.acid           0.14950 -0.1637482   0.06233 -0.07573
residual.sugar        0.83897 -0.1941335  -0.02666 -0.45063
chlorides             0.25721 -0.0904395   0.01676 -0.36019
free.sulfur.dioxide   0.29421 -0.0006178   0.05922 -0.25010
total.sulfur.dioxide  0.52988  0.0023210   0.13456 -0.44889
density               1.00000 -0.0935915   0.07449 -0.78014
pH                   -0.09359  1.0000000   0.15595  0.12143
sulphates             0.07449  0.1559515   1.00000 -0.01743
alcohol              -0.78014  0.1214321  -0.01743  1.00000
# mapa de calor correlacones
corrplot(
  cor_ww,
  method = "color", 
  type = "upper",           
  tl.cex = 0.8,  
  tl.col = "black",
  tl.srt = 45,              
  addCoef.col = "black",  
  number.cex = 0.7,       
  mar = c(1, 1, 2, 1),      
  title = "Mapa de calor - Matriz de correlaciones"
)

# Histogramas
hist(wine_white$fixed.acidity, col="skyblue4", main="Histograma de 'fixed acidity'", xlab="fixed acidity", ylab="Fracuencia")

hist(wine_white$volatile.acidity, col="skyblue4",  main="Histograma de 'volatile acidity'", xlab="volatile acidity", ylab="Fracuencia")

hist(wine_white$citric.acid, col="skyblue4",  main="Histograma de 'citric acid'", xlab="citric acid", ylab="Fracuencia")

hist(wine_white$residual.sugar, col="skyblue4", main="Histograma de 'residual sugar'", xlab="residual sugar", ylab="Fracuencia")

hist(wine_white$chlorides, col="skyblue4",  main="Histograma de 'chlorides'", xlab="chlorides", ylab="Fracuencia")

hist(wine_white$free.sulfur.dioxide, col="skyblue4",  main="Histograma de 'free sulfur dioxide'", xlab="free sulfur dioxide", ylab="Fracuencia")

hist(wine_white$total.sulfur.dioxide, col="skyblue4",  main="Histograma de 'total sulfur dioxide'", xlab="total sulfur dioxide", ylab="Fracuencia")

hist(wine_white$density, col="skyblue4",  main="Histograma de 'density'", xlab="density", ylab="Fracuencia")

hist(wine_white$pH, col="skyblue4",  main="Histograma de 'pH'", xlab="pH", ylab="Fracuencia")

hist(wine_white$sulphates, col="skyblue4",  main="Histograma de 'sulphates'", xlab="sulphates", ylab="Fracuencia")

hist(wine_white$alcohol, col="skyblue4",  main="Histograma de 'alcohol'", xlab="alcohol", ylab="Fracuencia")

Datos atípicos

Detección de datos atípicos

mahalanobis_dist <- mahalanobis(wine_white, colMeans(wine_white), cov(wine_white))

# Valor crítico para identificar outliers (usando chi-cuadrado)
gl <- ncol(wine_white)  # grados de libertad (número de variables)
alpha <- 0.05    # Nivel de significancia
valor_critico <- qchisq(1 - alpha, df = gl)

valor_critico #ver valor crítico
[1] 19.68
atipicos <- which(mahalanobis_dist > valor_critico) # Identificar outliers

head(mahalanobis_dist) #ver distancias
[1] 9.264 6.682 8.294 3.224 3.224 8.294
length(atipicos) 
[1] 395

Limpieza de datos atípicos

# nuevo data frame sin los datos atipicos
wine_w <- wine_white[-atipicos, ]

# comprobar:
nrow(wine_white)    
[1] 4898
nrow(wine_w) 
[1] 4503
nrow(wine_white) - nrow(wine_w) # filas eliminadas = 395
[1] 395

Estandarizar los datos

wine_scale <- scale(wine_w)

head(wine_scale)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1        0.1967         -0.00192      0.3026         2.9009    0.1460
2       -0.6903          0.35094      0.1093        -0.9757    0.4777
3        1.5906          0.11570      0.6891         0.1000    0.5606
4        0.4502         -0.47240     -0.0840         0.4248    1.2241
5        0.4502         -0.47240     -0.0840         0.4248    1.2241
6        1.5906          0.11570      0.6891         0.1000    0.5606
  free.sulfur.dioxide total.sulfur.dioxide  density        pH sulphates alcohol
1              0.6520               0.7906 2.429378 -1.304412  -0.33888 -1.4223
2             -1.3633              -0.1403 0.009714  0.770269   0.04105 -0.8475
3             -0.3231              -0.9977 0.389947  0.493645  -0.43386 -0.3549
4              0.7820               1.1825 0.562780  0.009553  -0.81379 -0.5191
5              0.7820               1.1825 0.562780  0.009553  -0.81379 -0.5191
6             -0.3231              -0.9977 0.389947  0.493645  -0.43386 -0.3549
tail(wine_scale)
     fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
4893       -0.4368          -0.4724      0.4958        -1.0366   -0.9321
4894       -0.8170          -0.7076     -0.3739        -0.9757   -0.3516
4895       -0.3101           0.5862      0.3026         0.3233    0.3118
4896       -0.4368          -0.3548     -1.3403        -1.0569   -0.1858
4897       -1.7040           0.2333     -0.2773        -1.0772   -1.7615
4898       -1.0704          -0.7076      0.4958        -1.1380   -1.9273
     free.sulfur.dioxide total.sulfur.dioxide density      pH sulphates alcohol
4893             -0.3881              -0.6303 -0.3429  0.7011    0.5160 -0.6833
4894             -0.7132              -1.1202 -0.9789  0.5628    0.1360  0.5483
4895              1.4321               0.7416  0.3208 -0.2671   -0.2439 -0.7654
4896             -0.3231              -0.6547 -0.4950 -1.3736   -0.2439 -0.9297
4897             -0.9732              -0.6792 -1.8258  1.0469   -1.0038  1.8620
4898             -0.8432              -0.9732 -1.5769  0.4936   -1.5737  1.0409
describe(wine_scale)
                     vars    n mean sd median trimmed  mad   min  max range
fixed.acidity           1 4503    0  1  -0.06   -0.03 0.94 -3.10 4.00  7.10
volatile.acidity        2 4503    0  1  -0.12   -0.08 0.87 -2.24 4.23  6.47
citric.acid             3 4503    0  1  -0.18   -0.06 0.72 -3.18 3.97  7.15
residual.sugar          4 4503    0  1  -0.22   -0.11 1.11 -1.18 3.47  4.65
chlorides               5 4503    0  1  -0.02   -0.06 0.86 -2.59 6.86  9.45
free.sulfur.dioxide     6 4503    0  1  -0.06   -0.04 1.06 -2.14 3.97  6.11
total.sulfur.dioxide    7 4503    0  1  -0.09   -0.03 1.05 -2.91 3.29  6.20
density                 8 4503    0  1  -0.09   -0.03 1.11 -2.37 2.91  5.29
pH                      9 4503    0  1  -0.06   -0.03 0.92 -2.76 4.02  6.78
sulphates              10 4503    0  1  -0.15   -0.07 0.99 -2.52 3.94  6.46
alcohol                11 4503    0  1  -0.11   -0.06 1.10 -1.75 3.01  4.76
                     skew kurtosis   se
fixed.acidity        0.35     0.35 0.01
volatile.acidity     0.79     0.93 0.01
citric.acid          0.69     1.77 0.01
residual.sugar       0.71    -0.53 0.01
chlorides            1.41     6.07 0.01
free.sulfur.dioxide  0.37    -0.22 0.01
total.sulfur.dioxide 0.28    -0.29 0.01
density              0.26    -0.80 0.01
pH                   0.36     0.21 0.01
sulphates            0.67     0.42 0.01
alcohol              0.44    -0.75 0.01

Prueba de supuestos

Normalidad

# Ho: Los datos siguen una distribución normal
# Ha: Los datos no siguen una distribución normal
jarque.bera.test(wine_w$fixed.acidity)

    Jarque Bera Test

data:  wine_w$fixed.acidity
X-squared = 114, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$volatile.acidity)

    Jarque Bera Test

data:  wine_w$volatile.acidity
X-squared = 636, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$citric.acid)

    Jarque Bera Test

data:  wine_w$citric.acid
X-squared = 945, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$residual.sugar)

    Jarque Bera Test

data:  wine_w$residual.sugar
X-squared = 434, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$chlorides)

    Jarque Bera Test

data:  wine_w$chlorides
X-squared = 8412, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$free.sulfur.dioxide)

    Jarque Bera Test

data:  wine_w$free.sulfur.dioxide
X-squared = 114, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$total.sulfur.dioxide)

    Jarque Bera Test

data:  wine_w$total.sulfur.dioxide
X-squared = 77, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$density)

    Jarque Bera Test

data:  wine_w$density
X-squared = 169, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$pH)

    Jarque Bera Test

data:  wine_w$pH
X-squared = 103, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$sulphates)

    Jarque Bera Test

data:  wine_w$sulphates
X-squared = 372, df = 2, p-value <0.0000000000000002
jarque.bera.test(wine_w$alcohol)

    Jarque Bera Test

data:  wine_w$alcohol
X-squared = 253, df = 2, p-value <0.0000000000000002

Prueba de Bartlett

# Ho: La matriz de correlaciones es la matriz identidad 
# Ha: La matriz de correlaciones no es la matriz identidad
bartlett.test(wine_w)

    Bartlett test of homogeneity of variances

data:  wine_w
Bartlett's K-squared = 356558, df = 10, p-value <0.0000000000000002

Prueba KMO

KMO(wine_w)
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = wine_w)
Overall MSA =  0.37
MSA for each item = 
       fixed.acidity     volatile.acidity          citric.acid 
                0.13                 0.29                 0.72 
      residual.sugar            chlorides  free.sulfur.dioxide 
                0.32                 0.82                 0.61 
total.sulfur.dioxide              density                   pH 
                0.68                 0.40                 0.12 
           sulphates              alcohol 
                0.19                 0.36 

Análisis Factorial Exploratorio (EFA)

# modelo de ejes principales
mAF <- fa(wine_scale, nfactors = 5, rotate = "none", fm="paf")

Comunalidades

mAF$communalities
       fixed.acidity     volatile.acidity          citric.acid 
             0.68190              0.27186              0.22673 
      residual.sugar            chlorides  free.sulfur.dioxide 
             0.99500              0.33062              0.46616 
total.sulfur.dioxide              density                   pH 
             0.99500              0.97629              0.31232 
           sulphates              alcohol 
             0.07167              0.81653 
# ordenadas de manera decreciente
sort(mAF$communalities,decreasing = T)
      residual.sugar total.sulfur.dioxide              density 
             0.99500              0.99500              0.97629 
             alcohol        fixed.acidity  free.sulfur.dioxide 
             0.81653              0.68190              0.46616 
           chlorides                   pH     volatile.acidity 
             0.33062              0.31232              0.27186 
         citric.acid            sulphates 
             0.22673              0.07167 

Unicidades

mAF$uniquenesses
       fixed.acidity     volatile.acidity          citric.acid 
            0.318134             0.728104             0.773289 
      residual.sugar            chlorides  free.sulfur.dioxide 
           -0.005115             0.669384             0.533832 
total.sulfur.dioxide              density                   pH 
           -0.006996             0.023716             0.687666 
           sulphates              alcohol 
            0.928363             0.183483 
# ordenadas de manera decreciente
sort(mAF$uniquenesses,decreasing = T)
           sulphates          citric.acid     volatile.acidity 
            0.928363             0.773289             0.728104 
                  pH            chlorides  free.sulfur.dioxide 
            0.687666             0.669384             0.533832 
       fixed.acidity              alcohol              density 
            0.318134             0.183483             0.023716 
      residual.sugar total.sulfur.dioxide 
           -0.005115            -0.006996 

Determinar número de factores

eigen_var_vect <- eigen(cor(wine_scale))

# Un criterio para determinar el número de factores es con Kaiser
# Autovalores mayores a 1 al tratarse de la matriz de correlaciones
eigen_var_vect$values
 [1] 3.46163 1.57425 1.23190 1.05864 0.95322 0.75338 0.72039 0.57133 0.38898
[10] 0.27420 0.01208
# Gráfico de sedimentación
plot(eigen_var_vect$values,type="b",pch=20,col="dodgerblue4", main="Gráfico de sedimentación",  xlab="Factores", ylab="Eigenvalores")
abline(h=1,lty=3,col="firebrick3")

# Gráficos de sedimentación FA y PC

# 1
scree(cor(wine_scale), main="Gráfico de sedimentación FA y PC")

# 2
fa.parallel(cor(wine_scale),main="Gráfico de sedimentación FA y PC", ylab="Eigenvalores")

Parallel analysis suggests that the number of factors =  5  and the number of components =  2 

Cargas factoriales

mAF$loadings

Loadings:
                     MR1    MR2    MR3    MR4    MR5   
fixed.acidity         0.217 -0.732  0.296              
volatile.acidity                           0.241 -0.454
citric.acid           0.155 -0.280  0.249         0.247
residual.sugar        0.805        -0.309  0.489  0.138
chlorides             0.459        -0.106 -0.278 -0.175
free.sulfur.dioxide   0.491  0.257  0.334  0.129  0.175
total.sulfur.dioxide  0.754  0.350  0.531        -0.174
density               0.954        -0.226              
pH                   -0.155  0.494        -0.150  0.130
sulphates                    0.144        -0.153  0.106
alcohol              -0.788         0.220  0.379       

                 MR1   MR2   MR3   MR4   MR5
SS loadings    3.303 1.086 0.765 0.592 0.419
Proportion Var 0.300 0.099 0.070 0.054 0.038
Cumulative Var 0.300 0.399 0.469 0.522 0.561
contribucion <- mAF$loadings*mAF$loadings
contribucion

Loadings:
                     MR1   MR2   MR3   MR4   MR5  
fixed.acidity              0.535                  
volatile.acidity                             0.206
citric.acid                                       
residual.sugar       0.648             0.239      
chlorides            0.211                        
free.sulfur.dioxide  0.241       0.112            
total.sulfur.dioxide 0.569 0.123 0.282            
density              0.911                        
pH                         0.244                  
sulphates                                         
alcohol              0.621             0.144      

                 MR1   MR2   MR3   MR4   MR5
SS loadings    2.065 0.372 0.118 0.088 0.050
Proportion Var 0.188 0.034 0.011 0.008 0.005
Cumulative Var 0.188 0.222 0.232 0.240 0.245
contribucion <- as.matrix(contribucion)
corrplot(contribucion, is.corr = FALSE, col = colorRampPalette(c("white", "firebrick3"))(200))

# Biplots AF sin rotación 
fa_no_rota <- fa(wine_scale, nfactors = 4, rotate = "none", fm = "paf")

# F1 y F2
biplot.psych(fa_no_rota,choose = c(1, 2), main = "Biplot: Factor 1 y Factor 2 (Sin rotación)", col = c("steelblue", "black"))

# F1 y F3
biplot.psych(fa_no_rota,choose = c(1, 3), main = "Biplot: Factor 1 y Factor 3 (Sin rotación)", col = c("firebrick", "black"))

# F1 y F4
biplot.psych(fa_no_rota,choose = c(1, 4), main = "Biplot: Factor 1 y Factor 4 (Sin rotación)", col = c("lightgreen", "black"))

# F2 y F3
biplot.psych(fa_no_rota,choose = c(2, 3), main = "Biplot: Factor 2 y Factor 3 (Sin rotación)", col = c("gold2", "black"))

# F2 y F4
biplot.psych(fa_no_rota,choose = c(2, 4), main = "Biplot: Factor 2 y Factor 4 (Sin rotación)", col = c("#808080", "black"))

# F3 y F4
biplot.psych(fa_no_rota,choose = c(3, 4), main = "Biplot: Factor 3 y Factor 4 (Sin rotación)", col = c("purple4", "black"))

# Círculos de correlación
cargas <- mAF$loadings
# F1 y F2
s.corcircle(cargas[, c(1, 2)],grid=T, sub="Factor 1 (x) y Factor 2 (y)", clabel = 1.2)

# F1 y F3
s.corcircle(cargas[, c(1, 3)],grid=T, sub="Factor 1 (x) y Factor 3 (y)", clabel = 1.2)

# F1 y F4
s.corcircle(cargas[, c(1, 4)],grid=T, sub="Factor 1 (x) y Factor 4 (y)", clabel = 1.2)

# F2 y F3
s.corcircle(cargas[, c(2, 3)],grid=T, sub="Factor 2 (x) y Factor 3 (y)", clabel = 1.2)

# F2 y F4
s.corcircle(cargas[, c(2, 4)],grid=T, sub="Factor 2 (x) y Factor 4 (y)", clabel = 1.2)

# F3 y F4
s.corcircle(cargas[, c(3, 4)],grid=T, sub="Factor 3 (x) y Factor 4 (y)", clabel = 1.2)

Rotación de los factores

Rotación ortogonal (quartimax)

# Biplots AF rotación ortogonal "biquartimax"
fa_quartimax <- fa(wine_scale, nfactors = 4, rotate = "quartimax", fm = "paf")

# F1 y F2
biplot.psych(fa_quartimax,choose = c(1, 2), main = "Biplot: Factor 1 y Factor 2 (Rotación: quartimax)", col = c("steelblue", "black"))

# F1 y F3
biplot.psych(fa_quartimax,choose = c(1, 3), main = "Biplot: Factor 1 y Factor 3 (Rotación: quartimax)", col = c("firebrick", "black"))

# F1 y F4
biplot.psych(fa_quartimax,choose = c(1, 4), main = "Biplot: Factor 1 y Factor 4 (Rotación: quartimax)", col = c("lightgreen", "black"))

# F2 y F3
biplot.psych(fa_quartimax,choose = c(2, 3), main = "Biplot: Factor 2 y Factor 3 (Rotación: quartimax)", col = c("gold2", "black"))

# F2 y F4
biplot.psych(fa_quartimax,choose = c(2, 4), main = "Biplot: Factor 2 y Factor 4 (Rotación: quartimax)", col = c("#808080", "black"))

# F3 y F4
biplot.psych(fa_quartimax,choose = c(3, 4), main = "Biplot: Factor 3 y Factor 4 (Rotación: quartimax)", col = c("purple4", "black"))

Rotación oblicua (oblimin)

fa_oblimin <- fa(wine_scale, nfactors = 4, rotate = "oblimin", fm = "paf")

# F1 y F2
biplot.psych(fa_oblimin,choose = c(1, 2), main = "Biplot: Factor 1 y Factor 2 (Rotación: oblimin)", col = c("steelblue", "black"))

# F1 y F3
biplot.psych(fa_oblimin,choose = c(1, 3), main = "Biplot: Factor 1 y Factor 3 (Rotación: oblimin)", col = c("firebrick", "black"))

# F1 y F4
biplot.psych(fa_oblimin,choose = c(1, 4), main = "Biplot: Factor 1 y Factor 4 (Rotación: oblimin)", col = c("lightgreen", "black"))

# F2 y F3
biplot.psych(fa_oblimin,choose = c(2, 3), main = "Biplot: Factor 2 y Factor 3 (Rotación: oblimin)", col = c("gold2", "black"))

# F2 y F4
biplot.psych(fa_oblimin,choose = c(2, 4), main = "Biplot: Factor 2 y Factor 4 (Rotación: oblimin)", col = c("#808080", "black"))

# F3 y F4
biplot.psych(fa_oblimin,choose = c(3, 4), main = "Biplot: Factor 3 y Factor 4 (Rotación: oblimin)", col = c("purple4", "black"))

# Correlación de los factores
fa_oblimin$r.scores
       [,1]    [,2]   [,3]    [,4]
[1,] 1.0000 0.53911 0.3767 0.16141
[2,] 0.5391 1.00000 0.4427 0.04545
[3,] 0.3767 0.44270 1.0000 0.18534
[4,] 0.1614 0.04545 0.1853 1.00000

Gráficos de factores

# Factores sin rotar
fa.diagram(fa_no_rota, main = "Factores sin rotación")

# Factores rotación ortogonal quartimax
fa.diagram(fa_quartimax, main = "Factores rotación ortogonal quartimax")

# Factores rotación oblicua oblimin
fa.diagram(fa_oblimin, main = "Factores rotación oblicua oblimin")

Aplicación Python

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
import factor_analyzer
from factor_analyzer import FactorAnalyzer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from scipy.spatial.distance import pdist, squareform
import pingouin as pg 
from scipy.spatial.distance import mahalanobis
from scipy.stats import chi2
from scipy.stats import jarque_bera
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
from factor_analyzer.factor_analyzer import calculate_kmo
import seaborn as sns

Exploración inicial

wine_white=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-white.csv", sep=";")

pd.set_option('display.max_rows', None)  # Mostrar todas las filas
pd.set_option('display.max_columns', None)  # Mostrar todas las columnas

# información de las variables
wine_white.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4898 entries, 0 to 4897
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         4898 non-null   float64
 1   volatile acidity      4898 non-null   float64
 2   citric acid           4898 non-null   float64
 3   residual sugar        4898 non-null   float64
 4   chlorides             4898 non-null   float64
 5   free sulfur dioxide   4898 non-null   float64
 6   total sulfur dioxide  4898 non-null   float64
 7   density               4898 non-null   float64
 8   pH                    4898 non-null   float64
 9   sulphates             4898 non-null   float64
 10  alcohol               4898 non-null   float64
 11  quality               4898 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 459.3 KB
# Datos descriptivos
wine_white.describe()
       fixed acidity  volatile acidity  citric acid  residual sugar  \
count    4898.000000       4898.000000  4898.000000     4898.000000   
mean        6.854788          0.278241     0.334192        6.391415   
std         0.843868          0.100795     0.121020        5.072058   
min         3.800000          0.080000     0.000000        0.600000   
25%         6.300000          0.210000     0.270000        1.700000   
50%         6.800000          0.260000     0.320000        5.200000   
75%         7.300000          0.320000     0.390000        9.900000   
max        14.200000          1.100000     1.660000       65.800000   

         chlorides  free sulfur dioxide  total sulfur dioxide      density  \
count  4898.000000          4898.000000           4898.000000  4898.000000   
mean      0.045772            35.308085            138.360657     0.994027   
std       0.021848            17.007137             42.498065     0.002991   
min       0.009000             2.000000              9.000000     0.987110   
25%       0.036000            23.000000            108.000000     0.991723   
50%       0.043000            34.000000            134.000000     0.993740   
75%       0.050000            46.000000            167.000000     0.996100   
max       0.346000           289.000000            440.000000     1.038980   

                pH    sulphates      alcohol      quality  
count  4898.000000  4898.000000  4898.000000  4898.000000  
mean      3.188267     0.489847    10.514267     5.877909  
std       0.151001     0.114126     1.230621     0.885639  
min       2.720000     0.220000     8.000000     3.000000  
25%       3.090000     0.410000     9.500000     5.000000  
50%       3.180000     0.470000    10.400000     6.000000  
75%       3.280000     0.550000    11.400000     6.000000  
max       3.820000     1.080000    14.200000     9.000000  
wine_white.head()
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  quality  
0      8.8        6  
1      9.5        6  
2     10.1        6  
3      9.9        6  
4      9.9        6  
wine_white.tail()
      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
4893            6.2              0.21         0.29             1.6      0.039   
4894            6.6              0.32         0.36             8.0      0.047   
4895            6.5              0.24         0.19             1.2      0.041   
4896            5.5              0.29         0.30             1.1      0.022   
4897            6.0              0.21         0.38             0.8      0.020   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
4893                 24.0                  92.0  0.99114  3.27       0.50   
4894                 57.0                 168.0  0.99490  3.15       0.46   
4895                 30.0                 111.0  0.99254  2.99       0.46   
4896                 20.0                 110.0  0.98869  3.34       0.38   
4897                 22.0                  98.0  0.98941  3.26       0.32   

      alcohol  quality  
4893     11.2        6  
4894      9.6        5  
4895      9.4        6  
4896     12.8        7  
4897     11.8        6  
# Elimar variable "quality"
wine_white=wine_white.drop("quality", axis=1) # axis=0 filas, axis=1 columnas

wine_white.head()
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.0              0.27         0.36            20.7      0.045   
1            6.3              0.30         0.34             1.6      0.049   
2            8.1              0.28         0.40             6.9      0.050   
3            7.2              0.23         0.32             8.5      0.058   
4            7.2              0.23         0.32             8.5      0.058   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 45.0                 170.0   1.0010  3.00       0.45   
1                 14.0                 132.0   0.9940  3.30       0.49   
2                 30.0                  97.0   0.9951  3.26       0.44   
3                 47.0                 186.0   0.9956  3.19       0.40   
4                 47.0                 186.0   0.9956  3.19       0.40   

   alcohol  
0      8.8  
1      9.5  
2     10.1  
3      9.9  
4      9.9  
wine_white.tail()
      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
4893            6.2              0.21         0.29             1.6      0.039   
4894            6.6              0.32         0.36             8.0      0.047   
4895            6.5              0.24         0.19             1.2      0.041   
4896            5.5              0.29         0.30             1.1      0.022   
4897            6.0              0.21         0.38             0.8      0.020   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
4893                 24.0                  92.0  0.99114  3.27       0.50   
4894                 57.0                 168.0  0.99490  3.15       0.46   
4895                 30.0                 111.0  0.99254  2.99       0.46   
4896                 20.0                 110.0  0.98869  3.34       0.38   
4897                 22.0                  98.0  0.98941  3.26       0.32   

      alcohol  
4893     11.2  
4894      9.6  
4895      9.4  
4896     12.8  
4897     11.8  
# Correlaciones
plt.clf()  # Limpia la figura actual antes de graficar
plt.figure(figsize=(10, 8))
sns.heatmap(wine_white.corr(), annot=True, cmap="coolwarm", center=0, linewidths=0.5)
plt.title("Mapa de calor correlaciones")
plt.show()

# Histogramas de las variables
for columna in wine_white.columns:
    plt.clf()
    sns.histplot(wine_white[columna], kde=True, bins=15) # kde graficar curva
    plt.title(f'Histograma de {columna}')
    plt.ylabel("Frecuencia")
    plt.show()

Datos atípicos

Detección de datos atípicos

xi_xbar = wine_white - wine_white.mean()
inv_cov = np.linalg.inv(wine_white.cov())
parte1 = np.dot(xi_xbar, inv_cov)
parte2 = np.dot(parte1, xi_xbar.T)

mahalanobis_dist= parte2.diagonal()
print(mahalanobis_dist)
[9.26390697 6.68246735 8.2939681  ... 9.55520151 7.89464608 7.23836837]
# Valor crítico para identificar outliers (usando chi-cuadrado)
gl = wine_white.shape[1] # grados de libertad (número de variables)
alpha = 0.05       # Nivel de significancia
valor_critico = chi2.ppf(1 - alpha, df=gl)

print("Valor crítico: ",valor_critico)
Valor crítico:  19.67513757268249
# Identificar outliers
atipicos = np.where(mahalanobis_dist > valor_critico)[0]

# Mostrar los outliers
print("Número de datos atípicos: \n", len(atipicos))
Número de datos atípicos: 
 395

Limpieza de datos atípicos

# Eliminar outliers
wine_w = wine_white.drop(index=atipicos)

print(f"Base original: {wine_white.shape[0]} filas")
Base original: 4898 filas
print(f"Base limpia: {wine_w.shape[0]} filas (se eliminaron {len(atipicos)} observaciones atípicas")
Base limpia: 4503 filas (se eliminaron 395 observaciones atípicas

Estandarizar los datos

# escalador
scaler = StandardScaler()

# escalar la base
wine_scale = scaler.fit_transform(wine_w)

# Dataframe de la base escalada
wine_scale = pd.DataFrame(wine_scale, columns=wine_white.columns)

# Base escalada/estandarizada
print(wine_scale.describe())
       fixed acidity  volatile acidity   citric acid  residual sugar  \
count   4.503000e+03      4.503000e+03  4.503000e+03    4.503000e+03   
mean    1.767284e-16     -3.818595e-16 -4.670678e-16   -1.767284e-16   
std     1.000111e+00      1.000111e+00  1.000111e+00    1.000111e+00   
min    -3.098146e+00     -2.236944e+00 -3.176817e+00   -1.178772e+00   
25%    -6.903295e-01     -7.077169e-01 -5.672597e-01   -9.351902e-01   
50%    -5.669373e-02     -1.195529e-01 -1.806586e-01   -2.247437e-01   
75%     5.769421e-01      5.862440e-01  4.958935e-01    7.089860e-01   
max     3.998575e+00      4.232861e+00  3.975304e+00    3.469578e+00   

          chlorides  free sulfur dioxide  total sulfur dioxide       density  \
count  4.503000e+03         4.503000e+03          4.503000e+03  4.503000e+03   
mean   1.136111e-16         2.288001e-16         -2.145987e-16  6.311728e-15   
std    1.000111e+00         1.000111e+00          1.000111e+00  1.000111e+00   
min   -2.591053e+00        -2.143602e+00         -2.908771e+00 -2.372190e+00   
25%   -6.834314e-01        -7.132585e-01         -7.283190e-01 -7.888627e-01   
50%   -1.991078e-02        -6.310253e-02         -9.133299e-02 -9.399628e-02   
75%    5.606698e-01         6.520691e-01          6.926498e-01  7.356950e-01   
max    6.864116e+00         3.967865e+00          3.289593e+00  2.913634e+00   

                 pH     sulphates       alcohol  
count  4.503000e+03  4.503000e+03  4.503000e+03  
mean  -1.375957e-15 -4.891589e-16 -1.375957e-15  
std    1.000111e+00  1.000111e+00  1.000111e+00  
min   -2.756994e+00 -2.523759e+00 -1.750924e+00  
25%   -6.820833e-01 -7.188921e-01 -8.476427e-01  
50%   -5.961010e-02 -1.489342e-01 -1.085945e-01  
75%    6.320268e-01  5.160168e-01  7.125703e-01  
max    4.021048e+00  3.935764e+00  3.011832e+00  

Prueba de supuestos

Normalidad

print("Ha: Los datos siguen una distribución normal")
Ha: Los datos siguen una distribución normal
print("Ha: Los datos no siguen una distribución normal")
Ha: Los datos no siguen una distribución normal
for columna in wine_w.columns:
    jb_stat, p_value = jarque_bera(wine_w[columna])
    print(f"\nVariable: {columna}")
    print(f"Estadístico JB: {jb_stat:.4f}")
    print(f"Valor p: {p_value:.4f}")

Variable: fixed acidity
Estadístico JB: 113.9378
Valor p: 0.0000

Variable: volatile acidity
Estadístico JB: 635.5124
Valor p: 0.0000

Variable: citric acid
Estadístico JB: 944.7620
Valor p: 0.0000

Variable: residual sugar
Estadístico JB: 434.1879
Valor p: 0.0000

Variable: chlorides
Estadístico JB: 8411.9283
Valor p: 0.0000

Variable: free sulfur dioxide
Estadístico JB: 113.9814
Valor p: 0.0000

Variable: total sulfur dioxide
Estadístico JB: 77.0026
Valor p: 0.0000

Variable: density
Estadístico JB: 169.2974
Valor p: 0.0000

Variable: pH
Estadístico JB: 102.9806
Valor p: 0.0000

Variable: sulphates
Estadístico JB: 371.7906
Valor p: 0.0000

Variable: alcohol
Estadístico JB: 253.3940
Valor p: 0.0000

Prueba de Bartlett

print("Ha: La matriz de correlaciones es la matriz identidad")
Ha: La matriz de correlaciones es la matriz identidad
print("Ha: La matriz de correlaciones no es la matriz identidad")
Ha: La matriz de correlaciones no es la matriz identidad
p_value = calculate_bartlett_sphericity(wine_w)
print(f"p-valor: {p_value}")
p-valor: (np.float64(26590.217856052033), np.float64(0.0))

Prueba KMO

kmo_all, kmo_model = calculate_kmo(wine_w)
print(kmo_all)
[0.12953995 0.28670479 0.72465744 0.31699293 0.82259387 0.60730888
 0.68125771 0.40296197 0.12415828 0.18852687 0.36168296]
print(kmo_model)
0.36653747127663994

Análisis Factorial Exploratorio (EFA)

# Crear el modelo
fa_sin_rotacion = FactorAnalyzer(n_factors=4, method='principal', rotation=None)

# Aplicar a la base
fa_sin_rotacion.fit(wine_scale)
FactorAnalyzer(method='principal', n_factors=4, rotation=None,
               rotation_kwargs={})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Varianza explicada
varianza = fa_sin_rotacion.get_factor_variance()
print("Varianza por factor:", varianza[1]*100,"%")
Varianza por factor: [31.46936965 14.31140846 11.19905582  9.62399505] %

Comunalidades

comunalidades = fa_sin_rotacion.get_communalities()
print("Comunalidades:", comunalidades)
Comunalidades: [0.6414543  0.56938736 0.60338116 0.65712336 0.57522096 0.76498
 0.75300714 0.90217532 0.6529997  0.41204653 0.79464536]

Unicidades

unicidades = 1-fa_sin_rotacion.get_communalities()
print("unicidades:", unicidades)
unicidades: [0.3585457  0.43061264 0.39661884 0.34287664 0.42477904 0.23502
 0.24699286 0.09782468 0.3470003  0.58795347 0.20535464]

Determinar número de factores

# 1. Eigenvalores
autovalores, _ = fa_sin_rotacion.get_eigenvalues()
print("Autovalores:", autovalores)
Autovalores: [3.46163066 1.57425493 1.23189614 1.05863946 0.95322006 0.75337666
 0.72039041 0.57133279 0.38897996 0.27420026 0.01207867]
# 2. Gráfico de sedimentación
plt.clf() 
plt.plot(range(1, len(autovalores)+1), autovalores, 'o-')
[<matplotlib.lines.Line2D object at 0x000001A211B3B4D0>]
plt.axhline(y=1, color='r', linestyle='--')
<matplotlib.lines.Line2D object at 0x000001A2189F1590>
plt.xlabel("Número de Factores")
Text(0.5, 0, 'Número de Factores')
plt.ylabel("Autovalores")
Text(0, 0.5, 'Autovalores')
plt.title("Gráfico de sedimentación")
Text(0.5, 1.0, 'Gráfico de sedimentación')
plt.show()

Cargas factoriales

cargas = fa_sin_rotacion.loadings_
print(cargas)
[[ 0.24254434 -0.75475996  0.1099159  -0.0297059 ]
 [-0.00883745  0.10944178 -0.61543426  0.42257831]
 [ 0.20178308 -0.4783024   0.56144353  0.13664821]
 [ 0.77565978 -0.02454286 -0.20811914  0.10751435]
 [ 0.55710835  0.09994367 -0.17144426 -0.47483615]
 [ 0.56614519  0.24582749  0.29703696  0.5438727 ]
 [ 0.73866287  0.24000483  0.12472533  0.36636808]
 [ 0.93358465 -0.00943635 -0.08982329 -0.14979238]
 [-0.19354225  0.73505749  0.23531004 -0.14092825]
 [ 0.11065673  0.30292328  0.52614027 -0.17667913]
 [-0.81398007 -0.05528124  0.09528781  0.34633225]]
cargas2 = pd.DataFrame(cargas, index=wine_w.columns)
cargas2
                             0         1         2         3
fixed acidity         0.242544 -0.754760  0.109916 -0.029706
volatile acidity     -0.008837  0.109442 -0.615434  0.422578
citric acid           0.201783 -0.478302  0.561444  0.136648
residual sugar        0.775660 -0.024543 -0.208119  0.107514
chlorides             0.557108  0.099944 -0.171444 -0.474836
free sulfur dioxide   0.566145  0.245827  0.297037  0.543873
total sulfur dioxide  0.738663  0.240005  0.124725  0.366368
density               0.933585 -0.009436 -0.089823 -0.149792
pH                   -0.193542  0.735057  0.235310 -0.140928
sulphates             0.110657  0.302923  0.526140 -0.176679
alcohol              -0.813980 -0.055281  0.095288  0.346332
# Contribución
contribucion = cargas2*cargas2
print(contribucion)
                             0         1         2         3
fixed acidity         0.058828  0.569663  0.012082  0.000882
volatile acidity      0.000078  0.011978  0.378759  0.178572
citric acid           0.040716  0.228773  0.315219  0.018673
residual sugar        0.601648  0.000602  0.043314  0.011559
chlorides             0.310370  0.009989  0.029393  0.225469
free sulfur dioxide   0.320520  0.060431  0.088231  0.295798
total sulfur dioxide  0.545623  0.057602  0.015556  0.134226
density               0.871580  0.000089  0.008068  0.022438
pH                    0.037459  0.540310  0.055371  0.019861
sulphates             0.012245  0.091763  0.276824  0.031216
alcohol               0.662564  0.003056  0.009080  0.119946
contribucion=contribucion.values

plt.clf() 
plt.figure(figsize=(10, 8))
<Figure size 1000x800 with 0 Axes>
sns.heatmap(contribucion, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
<Axes: >
plt.ylabel(wine_w.columns)
Text(95.72222222222221, 0.5, "Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',\n       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',\n       'pH', 'sulphates', 'alcohol'],\n      dtype='object')")
plt.title("Mapa de calor: contribución a los factores")
Text(0.5, 1.0, 'Mapa de calor: contribución a los factores')
plt.show()

# Gráficos de cargas y scores 

#####################################################################
def biplot(fa, X, choose=(0, 1), title="Biplot", point_color=None, vector_color='black'):
    plt.clf() 
    # Obtener cargas factoriales y puntuaciones
    loadings = fa.loadings_[:, choose]
    scores = fa.transform(X)[:, choose]
    
    # Escalar las puntuaciones para mejor visualización
    scores = (scores - scores.mean(axis=0)) / scores.std(axis=0)
    
    # Crear figura
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Graficar observaciones (puntos)
    ax.scatter(scores[:, 0], scores[:, 1], alpha=0.7, color=point_color)
    
    # Graficar variables (vectores)
    for i, (x, y) in enumerate(loadings):
        ax.arrow(0, 0, x, y, color=vector_color, head_width=0.05,          length_includes_head=True)
        ax.text(x*1.15, y*1.15, X.columns[i], color=vector_color, ha='center', va='center')
    
    # Líneas de referencia
    ax.axhline(0, color='gray', linestyle='--', linewidth=0.5)
    ax.axvline(0, color='gray', linestyle='--', linewidth=0.5)
    
    # Etiquetas y título
    ax.set_xlabel(f'Factor {choose[0]+1}')
    ax.set_ylabel(f'Factor {choose[1]+1}')
    ax.set_title(title)
    
    plt.grid(True, linestyle='--', alpha=0.3)
    plt.tight_layout()
    plt.show()

# Paleta de colores 
paleta_colores = ['#FFFF00', '#00FFFF', '#FF00FF', '#00FF00', '#FF0000', '#0892D0']

# Asignación de colores a cada combinación de factores
combinaciones_factores = [((0, 1), "Factor 1 y Factor 2"), ((0, 2), "Factor 1 y Factor 3"), ((0, 3), "Factor 1 y Factor 4"), ((1, 2), "Factor 2 y Factor 3"), ((1, 3), "Factor 2 y Factor 4"), ((2, 3), "Factor 3 y Factor 4")]
#######################################################################

# Generar todos los biplots con colores distintos
for (i, j), title_suffix in combinaciones_factores:
    color_idx = i + j - 1  # Asignación de color 
    biplot(fa_sin_rotacion, wine_scale, 
           choose=(i, j), 
           title=f"Biplot: {title_suffix} (Sin rotación)",
           point_color=paleta_colores[color_idx % len(paleta_colores)],
           vector_color='black')

Rotación de los factores

Rotación ortogonal (varimax)

# Crear el modelo
fa_varimax = FactorAnalyzer(n_factors=4, method='principal', rotation="varimax")

# Aplicar a la base
fa_varimax.fit(wine_scale)
FactorAnalyzer(method='principal', n_factors=4, rotation='varimax',
               rotation_kwargs={})
# Gráficos de cargas y scores 

# Generar todos los biplots con colores distintos
for (i, j), title_suffix in combinaciones_factores:
    color_idx = i + j - 1  # Asignación de color 
    biplot(fa_varimax, wine_scale, 
           choose=(i, j), 
           title=f"Biplot: {title_suffix} (Rotación=Varimax)",
           point_color=paleta_colores[color_idx % len(paleta_colores)],
           vector_color='black')

Rotación oblicua (promax)

# Crear el modelo
fa_promax = FactorAnalyzer(n_factors=4, method='principal', rotation="promax")

# Aplicar a la base
fa_promax.fit(wine_scale)
FactorAnalyzer(method='principal', n_factors=4, rotation_kwargs={})
# Gráficos de cargas y scores 

# Generar todos los biplots con colores distintos
for (i, j), title_suffix in combinaciones_factores:
    color_idx = i + j - 1  # Asignación de color 
    biplot(fa_promax, wine_scale, 
           choose=(i, j), 
           title=f"Biplot: {title_suffix} (Rotación=Promax)",
           point_color=paleta_colores[color_idx % len(paleta_colores)],
           vector_color='black')

           
# Correlaciones en los factores

# Puntuaciones factoriales
fa_scores = fa_promax.transform(wine_scale)

# Convertir a DataFrame
fa_scores_df = pd.DataFrame(fa_scores, columns=[f'Factor{i+1}' for i in range(fa_scores.shape[1])])

# Calcular matriz de correlaciones
corr_fa_promax = fa_scores_df.corr()
print(corr_fa_promax)
          Factor1   Factor2   Factor3   Factor4
Factor1  1.000000  0.138909 -0.085543  0.418329
Factor2  0.138909  1.000000 -0.021976  0.134548
Factor3 -0.085543 -0.021976  1.000000  0.100826
Factor4  0.418329  0.134548  0.100826  1.000000

Gráficos de factores

### Factores sin rotación
loadings = fa_sin_rotacion.loadings_
loadings_no_rota = pd.DataFrame(loadings, 
                           columns=[f'Factor{i+1}' for i in range(loadings.shape[1])])

loadings_altos = loadings_no_rota.where(np.abs(loadings_no_rota) >= 0.50)
loadings_altos.index = [f'{wine_scale.columns[i]}' for i in range(wine_scale.shape[1])]

# Graficar
plt.clf()
plt.figure(figsize=(10, 6))
<Figure size 1000x600 with 0 Axes>
sns.heatmap(loadings_altos, annot=True, cmap='coolwarm', center=0)
<Axes: >
plt.title("Cargas factoriales - Sin rotación")
Text(0.5, 1.0, 'Cargas factoriales - Sin rotación')
plt.xlabel("Factores")
Text(0.5, 36.72222222222221, 'Factores')
plt.ylabel("Variables")
Text(95.72222222222221, 0.5, 'Variables')
plt.show()

### Factores rotación ortogonal varimax
loadings = fa_varimax.loadings_
loadings_varimax = pd.DataFrame(loadings, 
                           columns=[f'Factor{i+1}' for i in range(loadings.shape[1])])

loadings_altos = loadings_varimax.where(np.abs(loadings_varimax) >= 0.50)
loadings_altos.index = [f'{wine_scale.columns[i]}' for i in range(wine_scale.shape[1])]


# Graficar
plt.clf()
plt.figure(figsize=(10, 6))
<Figure size 1000x600 with 0 Axes>
sns.heatmap(loadings_altos, annot=True, cmap='coolwarm', center=0)
<Axes: >
plt.title("Cargas factoriales - Rotación Varimax")
Text(0.5, 1.0, 'Cargas factoriales - Rotación Varimax')
plt.xlabel("Factores")
Text(0.5, 36.72222222222221, 'Factores')
plt.ylabel("Variables")
Text(95.72222222222221, 0.5, 'Variables')
plt.show()

### Factores rotación oblliua promax
loadings = fa_promax.loadings_
loadings_promax = pd.DataFrame(loadings, 
                           columns=[f'Factor{i+1}' for i in range(loadings.shape[1])])

loadings_altos = loadings_promax.where(np.abs(loadings_promax) >= 0.50)

loadings_altos.index = [f'{wine_scale.columns[i]}' for i in range(wine_scale.shape[1])]

# Graficar
plt.clf()
plt.figure(figsize=(10, 6))
<Figure size 1000x600 with 0 Axes>
sns.heatmap(loadings_altos, annot=True, cmap='coolwarm', center=0)
<Axes: >
plt.title("Cargas factoriales - Rotación Varimax")
Text(0.5, 1.0, 'Cargas factoriales - Rotación Varimax')
plt.xlabel("Factores")
Text(0.5, 36.72222222222221, 'Factores')
plt.ylabel("Variables")
Text(95.72222222222221, 0.5, 'Variables')
plt.show()

### Correlaciones entre factores (rotación oblicua)
plt.clf()
plt.figure(figsize=(10, 8))
<Figure size 1000x800 with 0 Axes>
sns.heatmap(corr_fa_promax, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
<Axes: >
plt.title("Mapa de calor correlaciones de los factores - Rotación oblicua promax")
Text(0.5, 1.0, 'Mapa de calor correlaciones de los factores - Rotación oblicua promax')
plt.show()