Análisis Factorial

Author

Merary y Mauricio

Análsis Factorial de 79 Empresas Estadounidenses

Importación y Exploración del Dataset

library(readxl)

mmt <- read.csv('C:\\Users\\merar\\Downloads\\companies79.csv')
#View(mmt)
head(mmt)
  X                            V1    V2   V3    V4     V5     V6   V7
1 1                  BellAtlantic 19788 9084 10636 1092.9 2576.8 79.4
2 2            ContinentalTelecom  5074 2557  1892  239.9  578.3 21.9
3 3         AmericanElectricPower 13621 4848  4572  485.0  898.9 23.4
4 4              BrooklynUnionGas  1117 1038   478   59.7   91.7  3.8
5 5  CentralIllinoisPublicService  1633  701   679   74.3  135.9  2.8
6 6 ClevelandElectricIlluminating  5651 1254  2002  310.7  407.9  6.2
             V8
1 Communication
2 Communication
3        Energy
4        Energy
5        Energy
6        Energy
mmt <- mmt[,-c(1,9)]
#View(mmt)

cambiamos la variable V1 y la colocamos como nombre de las filas.

rownames(mmt) <- mmt$V1
mmt$V1 <- NULL
print(mmt)
                                 V2    V3    V4     V5     V6    V7
BellAtlantic                  19788  9084 10636 1092.9 2576.8  79.4
ContinentalTelecom             5074  2557  1892  239.9  578.3  21.9
AmericanElectricPower         13621  4848  4572  485.0  898.9  23.4
BrooklynUnionGas               1117  1038   478   59.7   91.7   3.8
CentralIllinoisPublicService   1633   701   679   74.3  135.9   2.8
ClevelandElectricIlluminating  5651  1254  2002  310.7  407.9   6.2
ColumbiaGasSystem              5835  4053  1601   93.8  173.8  10.8
FloridaProgress                3494  1653  1442  160.9  320.3   6.4
IdahoPower                     1654   451   779   84.8  130.4   1.6
KansasPower&Light              1679  1354   687   93.8  154.6   4.6
MesaPetroleum                  1257   355   181  167.5  304.0   0.6
MontanaPower                   1743   597   717  121.6  172.4   3.5
PeoplesEnergy                  1440  1617   639   81.7  126.4   3.5
PhillipsPetroleum             14045 15636  2754  418.0 1462.0  27.3
PublicServiceCoofNewMexico     3010   749  1120  146.3  209.2   3.4
SanDiegoGas&Electric           3086  1739  1507  202.7  335.2   4.9
ValeroEnergy                   1995  2662   341   34.7  100.7   2.3
AmericanSavingsBankFSB         3614   367    90   14.1   24.6   1.1
BankSouth                      2788   271   304   23.5   28.9   2.1
H&RBlock                        327   542   959   54.1   72.5   2.8
CaliforniaFirstBank            5401   550   376   25.6   37.5   4.1
Cigna                         44736 16197  4653  732.5  651.9  48.5
Dreyfus                         401   176  1084   55.6   57.0   0.7
FirstAmerican                  4789   453   367   40.2   51.4   3.0
FirstEmpireState               2548   264   181   22.2   26.2   2.1
FirstTennesseeNational         5249   527   346   37.8   56.2   4.1
MarineCorp                     3720   356   211   26.6   34.8   2.4
MellonBank                    33406  3222  1413  201.7  246.7  15.8
NationalCity                  12505  1302   702  108.4  131.4   9.0
NorstarBancorp                 8998   882   988   93.0  119.0   7.4
Norwest                       21419  2516   930  107.6  164.7  15.6
SoutheastBanking              11052  1097   606   64.9   97.6   7.0
SovranFinancial                9672  1037   829   92.6  118.2   8.2
UnitedFinancialGroup           4989   518    53    3.1    0.3   0.8
AppleComputer                  1022  1754  1370   72.0  119.5   4.8
DigitalEquipment               6914  7029  7957  400.6  754.7  87.3
Eg&G                            430  1155  1045   55.7   70.8  22.5
GeneralElectric               26432 28285 33172 2336.0 3562.0 304.0
Hewlett-Packard                5769  6571  9462  482.0  792.0  83.0
IBM                           52634 50056 95697 6555.0 9874.0 400.2
NCR                            3940  4317  3940  315.2  566.3  62.0
Telex                           478   672   866   67.1  101.6   5.4
ArmstrongWorldIndustries       1093  1679  1070  100.9  164.5  20.8
CBIIndustries                  1128  1516   430   47.0   26.7  13.2
Fruehauf                       1804  2564   483   70.5  164.9  26.6
Halliburton                    4662  4781  2988   28.7  371.5  66.2
LTV                            6307  8199   598  771.5  524.3  57.5
Owens-CorningFiberglas         2366  3305  1117  131.2  256.5  25.2
PPGIndustries                  4084  4346  3023  302.7  521.7  37.5
Textron                       10348  5721  1915  223.6  322.5  49.5
Turner                          752  2149   101   11.1   15.2   2.6
UnitedTechnologies            10528 14992  5377  312.7  710.7 184.8
CommunityPsychiatricCenters     278   205   853   44.8   50.5   3.8
HospitalCorpofAmerica          6259  4152  3090  283.7  524.5  62.0
AHRobins                        707   706   275   61.4   77.8   6.1
SharedMedicalSystems            252   312   883   41.7   60.6   3.3
AirProducts                    2687  1870  1890  145.7  352.2  18.2
AlliedSignal                  13271  9115  8190  279.0   83.0 143.8
BallyManufactoring             1529  1295   444   25.6  137.0  19.4
CrownCork&Seal                  866  1487   944   71.7  115.4  12.6
Ex-Cell-0                       799  1140   683   57.6   89.2  15.4
LizClaiborne                    223   557  1040   60.6   63.7   1.9
WarnerCommunications           2286  2235  2306  195.3  219.0   8.0
Dayton-Hudson                  4418  8793  4459  283.6  456.5 128.0
DillardDepartmentStores         862  1601  1093   66.9  106.8  16.0
GiantFood                       623  2247   797   57.0   93.8  18.6
GreatA&PTea                    1608  6615   829   56.1  134.0  65.0
Kroger                         4178 17124  2091  180.8  390.4 164.6
MayDepartmentStores            3442  5080  2673  235.4  361.5  77.3
Stop&ShopCos                   1112  3689   542   30.3   96.9  43.5
SupermarketsGeneral            1104  5123   910   63.7  133.3  48.5
WickesCos                      2957  2806   457   40.6   93.5  50.0
FWWoolworth                    2535  5958  1921  177.0  288.0 118.1
AMR                            6425  6131  2448  345.8  682.5  49.5
IUInternational                 999  1878   393  173.5  108.1  23.3
PanAm                          2448  3484  1036   48.8  257.1  25.4
RepublicAirlines               1286  1734   361   69.2  145.7  14.3
TWA                            2769  3725   663  208.4   12.4  29.1
WesternAirLines                 952  1307   309   35.4   92.8  10.3
library(psych)
library(polycor)
Warning: package 'polycor' was built under R version 4.4.2

Adjuntando el paquete: 'polycor'
The following object is masked from 'package:psych':

    polyserial
library(ggcorrplot)
Warning: package 'ggcorrplot' was built under R version 4.4.2
Cargando paquete requerido: ggplot2

Adjuntando el paquete: 'ggplot2'
The following objects are masked from 'package:psych':

    %+%, alpha
mat_cor2 <- hetcor(mmt)$correlations #matriz de correlación policorica
ggcorrplot(mat_cor2)

mat_cor2
          V2        V3        V4        V5        V6        V7
V2 1.0000000 0.7464649 0.6822122 0.7197708 0.7055557 0.5943581
V3 0.7464649 1.0000000 0.8788920 0.8918790 0.8928037 0.9240429
V4 0.6822122 0.8788920 1.0000000 0.9875575 0.9780512 0.8182161
V5 0.7197708 0.8918790 0.9875575 1.0000000 0.9874559 0.8026105
V6 0.7055557 0.8928037 0.9780512 0.9874559 1.0000000 0.7994815
V7 0.5943581 0.9240429 0.8182161 0.8026105 0.7994815 1.0000000
pairs(mmt, col="darkblue", pch=19, main="dataset sin transformasiones")

mmts <- scale(mmt)
pairs(mmts, col="darkblue", pch=19, main="dataset Estandarizado")

Presencia de Datos Atípicos

boxplot(mmt, main= "Boxplot del Dataset Completo")

boxplot(mmts, main= "Boxplot del Dataset Completo Estandarizado")

Estructura de los datos

summary(mmt)
       V2              V3                V4                V5         
 Min.   :  223   Min.   :  176.0   Min.   :   53.0   Min.   :   3.10  
 1st Qu.: 1122   1st Qu.:  815.5   1st Qu.:  512.5   1st Qu.:  54.85  
 Median : 2788   Median : 1754.0   Median :  944.0   Median :  92.60  
 Mean   : 5941   Mean   : 4178.3   Mean   : 3269.8   Mean   : 268.29  
 3rd Qu.: 5802   3rd Qu.: 4563.5   3rd Qu.: 1961.5   3rd Qu.: 216.00  
 Max.   :52634   Max.   :50056.0   Max.   :95697.0   Max.   :6555.00  
       V6                V7        
 Min.   :   0.30   Min.   :  0.60  
 1st Qu.:  90.45   1st Qu.:  3.95  
 Median : 135.90   Median : 15.40  
 Mean   : 433.46   Mean   : 37.60  
 3rd Qu.: 356.85   3rd Qu.: 48.50  
 Max.   :9874.00   Max.   :400.20  

Supuestos que Debe Cumplir la Base de Datos

cortest.bartlett(mat_cor2)->p_esf
Warning in cortest.bartlett(mat_cor2): n not specified, 100 used
p_esf$p.value
[1] 7.954456e-242
KMO(mat_cor2)
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = mat_cor2)
Overall MSA =  0.82
MSA for each item = 
  V2   V3   V4   V5   V6   V7 
0.87 0.77 0.82 0.81 0.90 0.75 

como vemos se cumplen ambos, podemos decir que la matriz de correlacion no es ni se asemeja a la matriz identidad.

por otro lado el KMO es mayor que 0.7

Cantidad de Factores

eigen(mat_cor2)
eigen() decomposition
$values
[1] 5.161186913 0.445445928 0.318761470 0.050297303 0.016407477 0.007900909

$vectors
           [,1]        [,2]       [,3]        [,4]        [,5]        [,6]
[1,] -0.3480112  0.90838099  0.1327578 -0.18436659 -0.03864172 -0.02496722
[2,] -0.4226916 -0.05342805  0.3800100  0.76012153  0.29394475 -0.09940518
[3,] -0.4253798 -0.17231780 -0.3556313 -0.34209885  0.52589853 -0.51892587
[4,] -0.4285763 -0.08482558 -0.3683515 -0.01665633  0.15971969  0.80477097
[5,] -0.4266679 -0.10844566 -0.3691013  0.19743625 -0.75258829 -0.25414186
[6,] -0.3918531 -0.35119818  0.6630050 -0.48159536 -0.20888878  0.08925688

Por Sedimentacion

#diagrama de codos

scree(mat_cor2)

Evaluación del Modelo por Cantidad de Factores

x1 <- factanal(mmt, factors = 1, rotation = "varimax", scores = "regression")
x1

Call:
factanal(x = mmt, factors = 1, scores = "regression", rotation = "varimax")

Uniquenesses:
   V2    V3    V4    V5    V6    V7 
0.486 0.196 0.021 0.005 0.021 0.341 

Loadings:
   Factor1
V2 0.717  
V3 0.897  
V4 0.989  
V5 0.998  
V6 0.990  
V7 0.812  

               Factor1
SS loadings      4.930
Proportion Var   0.822

Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 109.55 on 9 degrees of freedom.
The p-value is 1.82e-19 
x2 <- factanal(mmt, factors = 2, rotation = "varimax", scores = "regression")
x2

Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "varimax")

Uniquenesses:
   V2    V3    V4    V5    V6    V7 
0.434 0.005 0.021 0.005 0.020 0.141 

Loadings:
   Factor1 Factor2
V2 0.493   0.569  
V3 0.540   0.839  
V4 0.855   0.498  
V5 0.856   0.512  
V6 0.840   0.523  
V7 0.461   0.804  

               Factor1 Factor2
SS loadings      2.917   2.457
Proportion Var   0.486   0.410
Cumulative Var   0.486   0.896

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07 
x3 <- factanal(mmt, factors = 3, rotation = "varimax", scores = "regression")
x3

Call:
factanal(x = mmt, factors = 3, scores = "regression", rotation = "varimax")

Uniquenesses:
   V2    V3    V4    V5    V6    V7 
0.349 0.005 0.012 0.005 0.020 0.031 

Loadings:
   Factor1 Factor2 Factor3
V2 0.399   0.291   0.638  
V3 0.454   0.674   0.579  
V4 0.810   0.451   0.358  
V5 0.797   0.412   0.437  
V6 0.779   0.413   0.450  
V7 0.412   0.846   0.289  

               Factor1 Factor2 Factor3
SS loadings      2.432   1.798   1.347
Proportion Var   0.405   0.300   0.224
Cumulative Var   0.405   0.705   0.930

The degrees of freedom for the model is 0 and the fit was 0.0339 

Matriz de Carga para 3 Factores

L.est.3.var <- x3$loadings
L.est.3.var

Loadings:
   Factor1 Factor2 Factor3
V2 0.399   0.291   0.638  
V3 0.454   0.674   0.579  
V4 0.810   0.451   0.358  
V5 0.797   0.412   0.437  
V6 0.779   0.413   0.450  
V7 0.412   0.846   0.289  

               Factor1 Factor2 Factor3
SS loadings      2.432   1.798   1.347
Proportion Var   0.405   0.300   0.224
Cumulative Var   0.405   0.705   0.930

Unicidades para 3 Factores

psi.est.3 <- diag(x3$uniquenesses)
psi.est.3
          [,1]  [,2]       [,3]  [,4]       [,5]       [,6]
[1,] 0.3492818 0.000 0.00000000 0.000 0.00000000 0.00000000
[2,] 0.0000000 0.005 0.00000000 0.000 0.00000000 0.00000000
[3,] 0.0000000 0.000 0.01227396 0.000 0.00000000 0.00000000
[4,] 0.0000000 0.000 0.00000000 0.005 0.00000000 0.00000000
[5,] 0.0000000 0.000 0.00000000 0.000 0.02003941 0.00000000
[6,] 0.0000000 0.000 0.00000000 0.000 0.00000000 0.03128346

Representación Gráfica para 3 Factores

plot(x3$scores[,1], x3$scores[,2], xlab="Primer Factor", ylab="Segundo Factor",
     main="Puntuaciones con MLE", pch=19, col="darkblue")
text(x3$scores[,c(1,2)], labels = rownames(mmt), pos=4, col = "darkblue")

plot(x3$scores[,1], x3$scores[,3], xlab="Primer Factor", ylab="Tercer Factor",
     main="Puntuaciones con MLE", pch=19, col="darkblue")
text(x3$scores[,c(1,3)], labels = rownames(mmt), pos=4, col = "darkblue")

plot(x3$scores[,2], x3$scores[,3], xlab="Segundo Factor", ylab="Tercer Factor",
     main="Puntuaciones con MLE", pch=19, col="darkblue")
text(x3$scores[,c(2,3)], labels = rownames(mmt), pos=4, col = "darkblue")

Análisis Final: Dos Factores

clasificacion por minimos cuadrados

#install.packages("ade4")
library(psych)
library(ade4)
Warning: package 'ade4' was built under R version 4.4.2
facto <- fa(mat_cor2, nfactors=2, rotate = "varimax")
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
The estimated weights for the factor scores are probably incorrect.  Try a
different factor score estimation method.
Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
ultra-Heywood case was detected.  Examine the results carefully
fa.diagram(facto)

Vemos las diferencias al hacer la clasificacion por maxima verosimilitud

facto2 <- fa(mat_cor2, nfactors=2, rotate = "varimax", fm="ml")
fa.diagram(facto2, main = "Clasificación por Máxima Verosimilitud")

Cargas y Varianzas de los Dos Factores Elegidos

x2n <- factanal(mmt, factors = 2, rotation = "none", scores = "regression",fm="ml")
x2n

Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "none",     fm = "ml")

Uniquenesses:
   V2    V3    V4    V5    V6    V7 
0.434 0.005 0.021 0.005 0.020 0.141 

Loadings:
   Factor1 Factor2
V2  0.746         
V3  0.961   0.268 
V4  0.970  -0.195 
V5  0.980  -0.186 
V6  0.976  -0.167 
V7  0.879   0.295 

               Factor1 Factor2
SS loadings      5.105   0.269
Proportion Var   0.851   0.045
Cumulative Var   0.851   0.896

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07 
x2 <- factanal(mmt, factors = 2, rotation = "varimax", scores = "regression",fm="ml")
x2

Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "varimax",     fm = "ml")

Uniquenesses:
   V2    V3    V4    V5    V6    V7 
0.434 0.005 0.021 0.005 0.020 0.141 

Loadings:
   Factor1 Factor2
V2 0.493   0.569  
V3 0.540   0.839  
V4 0.855   0.498  
V5 0.856   0.512  
V6 0.840   0.523  
V7 0.461   0.804  

               Factor1 Factor2
SS loadings      2.917   2.457
Proportion Var   0.486   0.410
Cumulative Var   0.486   0.896

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07 

Unicidades para Dos Factores

psi.est.2 <- diag(x2$uniquenesses)
psi.est.2
          [,1]  [,2]       [,3]  [,4]       [,5]      [,6]
[1,] 0.4335223 0.000 0.00000000 0.000 0.00000000 0.0000000
[2,] 0.0000000 0.005 0.00000000 0.000 0.00000000 0.0000000
[3,] 0.0000000 0.000 0.02066002 0.000 0.00000000 0.0000000
[4,] 0.0000000 0.000 0.00000000 0.005 0.00000000 0.0000000
[5,] 0.0000000 0.000 0.00000000 0.000 0.02036556 0.0000000
[6,] 0.0000000 0.000 0.00000000 0.000 0.00000000 0.1410475

Representación Gráfica de la Clasificación por Empresas

plot(x2$scores[,1], x2$scores[,2], xlab="Primer Factor", ylab="Segundo Factor",
     main="Puntuaciones con MLE", pch=19, col="darkblue")+
  abline(h=0, v=0)
integer(0)
text(x2$scores[,c(1,2)], labels = rownames(mmt), pos=4, col = "darkblue")

Scores

x2$scores
                                   Factor1      Factor2
BellAtlantic                   1.260897421  0.049483567
ContinentalTelecom             0.206726535 -0.400696611
AmericanElectricPower          0.399918322 -0.143799093
BrooklynUnionGas               0.006265463 -0.537764892
CentralIllinoisPublicService   0.097247134 -0.649535509
ClevelandElectricIlluminating  0.490527104 -0.801359778
ColumbiaGasSystem             -0.350886893  0.182532128
FloridaProgress                0.154022367 -0.526651296
IdahoPower                     0.154197448 -0.726693198
KansasPower&Light              0.033161350 -0.502304333
MesaPetroleum                  0.326818438 -0.850031901
MontanaPower                   0.198094722 -0.729094317
PeoplesEnergy                 -0.033253079 -0.419055447
PhillipsPetroleum             -1.259719220  2.654402419
PublicServiceCoofNewMexico     0.234285537 -0.726383858
SanDiegoGas&Electric           0.214571533 -0.552121310
ValeroEnergy                  -0.280547859 -0.094710523
AmericanSavingsBankFSB         0.008928734 -0.648020867
BankSouth                      0.045100401 -0.686420797
H&RBlock                       0.082059779 -0.667387391
CaliforniaFirstBank            0.008075516 -0.614716723
Cigna                         -1.003736451  2.611646166
Dreyfus                        0.141029260 -0.765194599
FirstAmerican                  0.050173092 -0.657965762
FirstEmpireState               0.040100545 -0.684528688
FirstTennesseeNational         0.034786971 -0.635202144
MarineCorp                     0.035520238 -0.665640189
MellonBank                    -0.052446751 -0.115012825
NationalCity                   0.052815736 -0.512304983
NorstarBancorp                 0.098892138 -0.613169290
Norwest                       -0.125350221 -0.192730588
SoutheastBanking               0.005175655 -0.517996000
SovranFinancial                0.069432901 -0.568443522
UnitedFinancialGroup          -0.038617687 -0.592996477
AppleComputer                 -0.051640466 -0.385337060
DigitalEquipment              -0.046545989  0.522265039
Eg&G                          -0.019421927 -0.491806358
GeneralElectric                1.056082116  3.423087286
Hewlett-Packard                0.207261572  0.284016431
IBM                            7.690451136  2.855672373
NCR                            0.088652436 -0.018648880
Telex                          0.085773115 -0.646634016
ArmstrongWorldIndustries      -0.002342138 -0.417680828
CBIIndustries                 -0.109510587 -0.382114671
Fruehauf                      -0.205371708 -0.142090814
Halliburton                   -0.517009423  0.440306967
LTV                            0.133083337  0.580864166
Owens-CorningFiberglas        -0.180022802 -0.038417659
PPGIndustries                  0.044605819 -0.001548012
Textron                       -0.378520338  0.498634512
Turner                        -0.267514281 -0.187176965
UnitedTechnologies            -1.541302129  2.817191018
CommunityPsychiatricCenters    0.109316222 -0.738541933
HospitalCorpofAmerica          0.028802476 -0.005649359
AHRobins                       0.048894402 -0.617117362
SharedMedicalSystems           0.091721845 -0.710427589
AirProducts                    0.109123455 -0.456353426
AlliedSignal                  -0.750228549  1.337464841
BallyManufactoring            -0.088172791 -0.425989397
CrownCork&Seal                -0.029701189 -0.437212557
Ex-Cell-0                     -0.015134684 -0.500786668
LizClaiborne                   0.091021760 -0.671402436
WarnerCommunications           0.120441041 -0.412850313
Dayton-Hudson                 -0.695512332  1.239936663
DillardDepartmentStores       -0.054843681 -0.400949977
GiantFood                     -0.181445660 -0.215230128
GreatA&PTea                   -0.864941765  0.951995998
Kroger                        -2.227011825  3.575365042
MayDepartmentStores           -0.248696539  0.325615750
Stop&ShopCos                  -0.465733282  0.213400575
SupermarketsGeneral           -0.613639606  0.541572073
WickesCos                     -0.323736178 -0.013198511
FWWoolworth                   -0.540686056  0.676698220
AMR                           -0.142999986  0.416335325
IUInternational                0.050253315 -0.417298526
PanAm                         -0.341614700  0.092658305
RepublicAirlines              -0.081361850 -0.362692957
TWA                           -0.192909368  0.038345852
WesternAirLines               -0.082178401 -0.434401404