Análsis Factorial de 79 Empresas Estadounidenses
Importación y Exploración del Dataset
library (readxl)
mmt <- read.csv ('C: \\ Users \\ merar \\ Downloads \\ companies79.csv' )
#View(mmt)
head (mmt)
X V1 V2 V3 V4 V5 V6 V7
1 1 BellAtlantic 19788 9084 10636 1092.9 2576.8 79.4
2 2 ContinentalTelecom 5074 2557 1892 239.9 578.3 21.9
3 3 AmericanElectricPower 13621 4848 4572 485.0 898.9 23.4
4 4 BrooklynUnionGas 1117 1038 478 59.7 91.7 3.8
5 5 CentralIllinoisPublicService 1633 701 679 74.3 135.9 2.8
6 6 ClevelandElectricIlluminating 5651 1254 2002 310.7 407.9 6.2
V8
1 Communication
2 Communication
3 Energy
4 Energy
5 Energy
6 Energy
mmt <- mmt[,- c (1 ,9 )]
#View(mmt)
cambiamos la variable V1 y la colocamos como nombre de las filas.
rownames (mmt) <- mmt$ V1
mmt$ V1 <- NULL
print (mmt)
V2 V3 V4 V5 V6 V7
BellAtlantic 19788 9084 10636 1092.9 2576.8 79.4
ContinentalTelecom 5074 2557 1892 239.9 578.3 21.9
AmericanElectricPower 13621 4848 4572 485.0 898.9 23.4
BrooklynUnionGas 1117 1038 478 59.7 91.7 3.8
CentralIllinoisPublicService 1633 701 679 74.3 135.9 2.8
ClevelandElectricIlluminating 5651 1254 2002 310.7 407.9 6.2
ColumbiaGasSystem 5835 4053 1601 93.8 173.8 10.8
FloridaProgress 3494 1653 1442 160.9 320.3 6.4
IdahoPower 1654 451 779 84.8 130.4 1.6
KansasPower&Light 1679 1354 687 93.8 154.6 4.6
MesaPetroleum 1257 355 181 167.5 304.0 0.6
MontanaPower 1743 597 717 121.6 172.4 3.5
PeoplesEnergy 1440 1617 639 81.7 126.4 3.5
PhillipsPetroleum 14045 15636 2754 418.0 1462.0 27.3
PublicServiceCoofNewMexico 3010 749 1120 146.3 209.2 3.4
SanDiegoGas&Electric 3086 1739 1507 202.7 335.2 4.9
ValeroEnergy 1995 2662 341 34.7 100.7 2.3
AmericanSavingsBankFSB 3614 367 90 14.1 24.6 1.1
BankSouth 2788 271 304 23.5 28.9 2.1
H&RBlock 327 542 959 54.1 72.5 2.8
CaliforniaFirstBank 5401 550 376 25.6 37.5 4.1
Cigna 44736 16197 4653 732.5 651.9 48.5
Dreyfus 401 176 1084 55.6 57.0 0.7
FirstAmerican 4789 453 367 40.2 51.4 3.0
FirstEmpireState 2548 264 181 22.2 26.2 2.1
FirstTennesseeNational 5249 527 346 37.8 56.2 4.1
MarineCorp 3720 356 211 26.6 34.8 2.4
MellonBank 33406 3222 1413 201.7 246.7 15.8
NationalCity 12505 1302 702 108.4 131.4 9.0
NorstarBancorp 8998 882 988 93.0 119.0 7.4
Norwest 21419 2516 930 107.6 164.7 15.6
SoutheastBanking 11052 1097 606 64.9 97.6 7.0
SovranFinancial 9672 1037 829 92.6 118.2 8.2
UnitedFinancialGroup 4989 518 53 3.1 0.3 0.8
AppleComputer 1022 1754 1370 72.0 119.5 4.8
DigitalEquipment 6914 7029 7957 400.6 754.7 87.3
Eg&G 430 1155 1045 55.7 70.8 22.5
GeneralElectric 26432 28285 33172 2336.0 3562.0 304.0
Hewlett-Packard 5769 6571 9462 482.0 792.0 83.0
IBM 52634 50056 95697 6555.0 9874.0 400.2
NCR 3940 4317 3940 315.2 566.3 62.0
Telex 478 672 866 67.1 101.6 5.4
ArmstrongWorldIndustries 1093 1679 1070 100.9 164.5 20.8
CBIIndustries 1128 1516 430 47.0 26.7 13.2
Fruehauf 1804 2564 483 70.5 164.9 26.6
Halliburton 4662 4781 2988 28.7 371.5 66.2
LTV 6307 8199 598 771.5 524.3 57.5
Owens-CorningFiberglas 2366 3305 1117 131.2 256.5 25.2
PPGIndustries 4084 4346 3023 302.7 521.7 37.5
Textron 10348 5721 1915 223.6 322.5 49.5
Turner 752 2149 101 11.1 15.2 2.6
UnitedTechnologies 10528 14992 5377 312.7 710.7 184.8
CommunityPsychiatricCenters 278 205 853 44.8 50.5 3.8
HospitalCorpofAmerica 6259 4152 3090 283.7 524.5 62.0
AHRobins 707 706 275 61.4 77.8 6.1
SharedMedicalSystems 252 312 883 41.7 60.6 3.3
AirProducts 2687 1870 1890 145.7 352.2 18.2
AlliedSignal 13271 9115 8190 279.0 83.0 143.8
BallyManufactoring 1529 1295 444 25.6 137.0 19.4
CrownCork&Seal 866 1487 944 71.7 115.4 12.6
Ex-Cell-0 799 1140 683 57.6 89.2 15.4
LizClaiborne 223 557 1040 60.6 63.7 1.9
WarnerCommunications 2286 2235 2306 195.3 219.0 8.0
Dayton-Hudson 4418 8793 4459 283.6 456.5 128.0
DillardDepartmentStores 862 1601 1093 66.9 106.8 16.0
GiantFood 623 2247 797 57.0 93.8 18.6
GreatA&PTea 1608 6615 829 56.1 134.0 65.0
Kroger 4178 17124 2091 180.8 390.4 164.6
MayDepartmentStores 3442 5080 2673 235.4 361.5 77.3
Stop&ShopCos 1112 3689 542 30.3 96.9 43.5
SupermarketsGeneral 1104 5123 910 63.7 133.3 48.5
WickesCos 2957 2806 457 40.6 93.5 50.0
FWWoolworth 2535 5958 1921 177.0 288.0 118.1
AMR 6425 6131 2448 345.8 682.5 49.5
IUInternational 999 1878 393 173.5 108.1 23.3
PanAm 2448 3484 1036 48.8 257.1 25.4
RepublicAirlines 1286 1734 361 69.2 145.7 14.3
TWA 2769 3725 663 208.4 12.4 29.1
WesternAirLines 952 1307 309 35.4 92.8 10.3
library (psych)
library (polycor)
Warning: package 'polycor' was built under R version 4.4.2
Adjuntando el paquete: 'polycor'
The following object is masked from 'package:psych':
polyserial
Warning: package 'ggcorrplot' was built under R version 4.4.2
Cargando paquete requerido: ggplot2
Adjuntando el paquete: 'ggplot2'
The following objects are masked from 'package:psych':
%+%, alpha
mat_cor2 <- hetcor (mmt)$ correlations #matriz de correlación policorica
ggcorrplot (mat_cor2)
V2 V3 V4 V5 V6 V7
V2 1.0000000 0.7464649 0.6822122 0.7197708 0.7055557 0.5943581
V3 0.7464649 1.0000000 0.8788920 0.8918790 0.8928037 0.9240429
V4 0.6822122 0.8788920 1.0000000 0.9875575 0.9780512 0.8182161
V5 0.7197708 0.8918790 0.9875575 1.0000000 0.9874559 0.8026105
V6 0.7055557 0.8928037 0.9780512 0.9874559 1.0000000 0.7994815
V7 0.5943581 0.9240429 0.8182161 0.8026105 0.7994815 1.0000000
pairs (mmt, col= "darkblue" , pch= 19 , main= "dataset sin transformasiones" )
mmts <- scale (mmt)
pairs (mmts, col= "darkblue" , pch= 19 , main= "dataset Estandarizado" )
Presencia de Datos Atípicos
boxplot (mmt, main= "Boxplot del Dataset Completo" )
boxplot (mmts, main= "Boxplot del Dataset Completo Estandarizado" )
Estructura de los datos
V2 V3 V4 V5
Min. : 223 Min. : 176.0 Min. : 53.0 Min. : 3.10
1st Qu.: 1122 1st Qu.: 815.5 1st Qu.: 512.5 1st Qu.: 54.85
Median : 2788 Median : 1754.0 Median : 944.0 Median : 92.60
Mean : 5941 Mean : 4178.3 Mean : 3269.8 Mean : 268.29
3rd Qu.: 5802 3rd Qu.: 4563.5 3rd Qu.: 1961.5 3rd Qu.: 216.00
Max. :52634 Max. :50056.0 Max. :95697.0 Max. :6555.00
V6 V7
Min. : 0.30 Min. : 0.60
1st Qu.: 90.45 1st Qu.: 3.95
Median : 135.90 Median : 15.40
Mean : 433.46 Mean : 37.60
3rd Qu.: 356.85 3rd Qu.: 48.50
Max. :9874.00 Max. :400.20
Supuestos que Debe Cumplir la Base de Datos
cortest.bartlett (mat_cor2)-> p_esf
Warning in cortest.bartlett(mat_cor2): n not specified, 100 used
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = mat_cor2)
Overall MSA = 0.82
MSA for each item =
V2 V3 V4 V5 V6 V7
0.87 0.77 0.82 0.81 0.90 0.75
como vemos se cumplen ambos, podemos decir que la matriz de correlacion no es ni se asemeja a la matriz identidad.
por otro lado el KMO es mayor que 0.7
Cantidad de Factores
eigen() decomposition
$values
[1] 5.161186913 0.445445928 0.318761470 0.050297303 0.016407477 0.007900909
$vectors
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] -0.3480112 0.90838099 0.1327578 -0.18436659 -0.03864172 -0.02496722
[2,] -0.4226916 -0.05342805 0.3800100 0.76012153 0.29394475 -0.09940518
[3,] -0.4253798 -0.17231780 -0.3556313 -0.34209885 0.52589853 -0.51892587
[4,] -0.4285763 -0.08482558 -0.3683515 -0.01665633 0.15971969 0.80477097
[5,] -0.4266679 -0.10844566 -0.3691013 0.19743625 -0.75258829 -0.25414186
[6,] -0.3918531 -0.35119818 0.6630050 -0.48159536 -0.20888878 0.08925688
Por Sedimentacion
#diagrama de codos
scree (mat_cor2)
Evaluación del Modelo por Cantidad de Factores
x1 <- factanal (mmt, factors = 1 , rotation = "varimax" , scores = "regression" )
x1
Call:
factanal(x = mmt, factors = 1, scores = "regression", rotation = "varimax")
Uniquenesses:
V2 V3 V4 V5 V6 V7
0.486 0.196 0.021 0.005 0.021 0.341
Loadings:
Factor1
V2 0.717
V3 0.897
V4 0.989
V5 0.998
V6 0.990
V7 0.812
Factor1
SS loadings 4.930
Proportion Var 0.822
Test of the hypothesis that 1 factor is sufficient.
The chi square statistic is 109.55 on 9 degrees of freedom.
The p-value is 1.82e-19
x2 <- factanal (mmt, factors = 2 , rotation = "varimax" , scores = "regression" )
x2
Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "varimax")
Uniquenesses:
V2 V3 V4 V5 V6 V7
0.434 0.005 0.021 0.005 0.020 0.141
Loadings:
Factor1 Factor2
V2 0.493 0.569
V3 0.540 0.839
V4 0.855 0.498
V5 0.856 0.512
V6 0.840 0.523
V7 0.461 0.804
Factor1 Factor2
SS loadings 2.917 2.457
Proportion Var 0.486 0.410
Cumulative Var 0.486 0.896
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07
x3 <- factanal (mmt, factors = 3 , rotation = "varimax" , scores = "regression" )
x3
Call:
factanal(x = mmt, factors = 3, scores = "regression", rotation = "varimax")
Uniquenesses:
V2 V3 V4 V5 V6 V7
0.349 0.005 0.012 0.005 0.020 0.031
Loadings:
Factor1 Factor2 Factor3
V2 0.399 0.291 0.638
V3 0.454 0.674 0.579
V4 0.810 0.451 0.358
V5 0.797 0.412 0.437
V6 0.779 0.413 0.450
V7 0.412 0.846 0.289
Factor1 Factor2 Factor3
SS loadings 2.432 1.798 1.347
Proportion Var 0.405 0.300 0.224
Cumulative Var 0.405 0.705 0.930
The degrees of freedom for the model is 0 and the fit was 0.0339
Matriz de Carga para 3 Factores
L.est.3. var <- x3$ loadings
L.est.3. var
Loadings:
Factor1 Factor2 Factor3
V2 0.399 0.291 0.638
V3 0.454 0.674 0.579
V4 0.810 0.451 0.358
V5 0.797 0.412 0.437
V6 0.779 0.413 0.450
V7 0.412 0.846 0.289
Factor1 Factor2 Factor3
SS loadings 2.432 1.798 1.347
Proportion Var 0.405 0.300 0.224
Cumulative Var 0.405 0.705 0.930
Unicidades para 3 Factores
psi.est.3 <- diag (x3$ uniquenesses)
psi.est.3
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0.3492818 0.000 0.00000000 0.000 0.00000000 0.00000000
[2,] 0.0000000 0.005 0.00000000 0.000 0.00000000 0.00000000
[3,] 0.0000000 0.000 0.01227396 0.000 0.00000000 0.00000000
[4,] 0.0000000 0.000 0.00000000 0.005 0.00000000 0.00000000
[5,] 0.0000000 0.000 0.00000000 0.000 0.02003941 0.00000000
[6,] 0.0000000 0.000 0.00000000 0.000 0.00000000 0.03128346
Representación Gráfica para 3 Factores
plot (x3$ scores[,1 ], x3$ scores[,2 ], xlab= "Primer Factor" , ylab= "Segundo Factor" ,
main= "Puntuaciones con MLE" , pch= 19 , col= "darkblue" )
text (x3$ scores[,c (1 ,2 )], labels = rownames (mmt), pos= 4 , col = "darkblue" )
plot (x3$ scores[,1 ], x3$ scores[,3 ], xlab= "Primer Factor" , ylab= "Tercer Factor" ,
main= "Puntuaciones con MLE" , pch= 19 , col= "darkblue" )
text (x3$ scores[,c (1 ,3 )], labels = rownames (mmt), pos= 4 , col = "darkblue" )
plot (x3$ scores[,2 ], x3$ scores[,3 ], xlab= "Segundo Factor" , ylab= "Tercer Factor" ,
main= "Puntuaciones con MLE" , pch= 19 , col= "darkblue" )
text (x3$ scores[,c (2 ,3 )], labels = rownames (mmt), pos= 4 , col = "darkblue" )
Análisis Final: Dos Factores
clasificacion por minimos cuadrados
#install.packages("ade4")
library (psych)
library (ade4)
Warning: package 'ade4' was built under R version 4.4.2
facto <- fa (mat_cor2, nfactors= 2 , rotate = "varimax" )
Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
The estimated weights for the factor scores are probably incorrect. Try a
different factor score estimation method.
Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
ultra-Heywood case was detected. Examine the results carefully
Vemos las diferencias al hacer la clasificacion por maxima verosimilitud
facto2 <- fa (mat_cor2, nfactors= 2 , rotate = "varimax" , fm= "ml" )
fa.diagram (facto2, main = "Clasificación por Máxima Verosimilitud" )
Cargas y Varianzas de los Dos Factores Elegidos
x2n <- factanal (mmt, factors = 2 , rotation = "none" , scores = "regression" ,fm= "ml" )
x2n
Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "none", fm = "ml")
Uniquenesses:
V2 V3 V4 V5 V6 V7
0.434 0.005 0.021 0.005 0.020 0.141
Loadings:
Factor1 Factor2
V2 0.746
V3 0.961 0.268
V4 0.970 -0.195
V5 0.980 -0.186
V6 0.976 -0.167
V7 0.879 0.295
Factor1 Factor2
SS loadings 5.105 0.269
Proportion Var 0.851 0.045
Cumulative Var 0.851 0.896
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07
x2 <- factanal (mmt, factors = 2 , rotation = "varimax" , scores = "regression" ,fm= "ml" )
x2
Call:
factanal(x = mmt, factors = 2, scores = "regression", rotation = "varimax", fm = "ml")
Uniquenesses:
V2 V3 V4 V5 V6 V7
0.434 0.005 0.021 0.005 0.020 0.141
Loadings:
Factor1 Factor2
V2 0.493 0.569
V3 0.540 0.839
V4 0.855 0.498
V5 0.856 0.512
V6 0.840 0.523
V7 0.461 0.804
Factor1 Factor2
SS loadings 2.917 2.457
Proportion Var 0.486 0.410
Cumulative Var 0.486 0.896
Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 34.75 on 4 degrees of freedom.
The p-value is 5.23e-07
Unicidades para Dos Factores
psi.est.2 <- diag (x2$ uniquenesses)
psi.est.2
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0.4335223 0.000 0.00000000 0.000 0.00000000 0.0000000
[2,] 0.0000000 0.005 0.00000000 0.000 0.00000000 0.0000000
[3,] 0.0000000 0.000 0.02066002 0.000 0.00000000 0.0000000
[4,] 0.0000000 0.000 0.00000000 0.005 0.00000000 0.0000000
[5,] 0.0000000 0.000 0.00000000 0.000 0.02036556 0.0000000
[6,] 0.0000000 0.000 0.00000000 0.000 0.00000000 0.1410475
Representación Gráfica de la Clasificación por Empresas
plot (x2$ scores[,1 ], x2$ scores[,2 ], xlab= "Primer Factor" , ylab= "Segundo Factor" ,
main= "Puntuaciones con MLE" , pch= 19 , col= "darkblue" )+
abline (h= 0 , v= 0 )
text (x2$ scores[,c (1 ,2 )], labels = rownames (mmt), pos= 4 , col = "darkblue" )
Scores
Factor1 Factor2
BellAtlantic 1.260897421 0.049483567
ContinentalTelecom 0.206726535 -0.400696611
AmericanElectricPower 0.399918322 -0.143799093
BrooklynUnionGas 0.006265463 -0.537764892
CentralIllinoisPublicService 0.097247134 -0.649535509
ClevelandElectricIlluminating 0.490527104 -0.801359778
ColumbiaGasSystem -0.350886893 0.182532128
FloridaProgress 0.154022367 -0.526651296
IdahoPower 0.154197448 -0.726693198
KansasPower&Light 0.033161350 -0.502304333
MesaPetroleum 0.326818438 -0.850031901
MontanaPower 0.198094722 -0.729094317
PeoplesEnergy -0.033253079 -0.419055447
PhillipsPetroleum -1.259719220 2.654402419
PublicServiceCoofNewMexico 0.234285537 -0.726383858
SanDiegoGas&Electric 0.214571533 -0.552121310
ValeroEnergy -0.280547859 -0.094710523
AmericanSavingsBankFSB 0.008928734 -0.648020867
BankSouth 0.045100401 -0.686420797
H&RBlock 0.082059779 -0.667387391
CaliforniaFirstBank 0.008075516 -0.614716723
Cigna -1.003736451 2.611646166
Dreyfus 0.141029260 -0.765194599
FirstAmerican 0.050173092 -0.657965762
FirstEmpireState 0.040100545 -0.684528688
FirstTennesseeNational 0.034786971 -0.635202144
MarineCorp 0.035520238 -0.665640189
MellonBank -0.052446751 -0.115012825
NationalCity 0.052815736 -0.512304983
NorstarBancorp 0.098892138 -0.613169290
Norwest -0.125350221 -0.192730588
SoutheastBanking 0.005175655 -0.517996000
SovranFinancial 0.069432901 -0.568443522
UnitedFinancialGroup -0.038617687 -0.592996477
AppleComputer -0.051640466 -0.385337060
DigitalEquipment -0.046545989 0.522265039
Eg&G -0.019421927 -0.491806358
GeneralElectric 1.056082116 3.423087286
Hewlett-Packard 0.207261572 0.284016431
IBM 7.690451136 2.855672373
NCR 0.088652436 -0.018648880
Telex 0.085773115 -0.646634016
ArmstrongWorldIndustries -0.002342138 -0.417680828
CBIIndustries -0.109510587 -0.382114671
Fruehauf -0.205371708 -0.142090814
Halliburton -0.517009423 0.440306967
LTV 0.133083337 0.580864166
Owens-CorningFiberglas -0.180022802 -0.038417659
PPGIndustries 0.044605819 -0.001548012
Textron -0.378520338 0.498634512
Turner -0.267514281 -0.187176965
UnitedTechnologies -1.541302129 2.817191018
CommunityPsychiatricCenters 0.109316222 -0.738541933
HospitalCorpofAmerica 0.028802476 -0.005649359
AHRobins 0.048894402 -0.617117362
SharedMedicalSystems 0.091721845 -0.710427589
AirProducts 0.109123455 -0.456353426
AlliedSignal -0.750228549 1.337464841
BallyManufactoring -0.088172791 -0.425989397
CrownCork&Seal -0.029701189 -0.437212557
Ex-Cell-0 -0.015134684 -0.500786668
LizClaiborne 0.091021760 -0.671402436
WarnerCommunications 0.120441041 -0.412850313
Dayton-Hudson -0.695512332 1.239936663
DillardDepartmentStores -0.054843681 -0.400949977
GiantFood -0.181445660 -0.215230128
GreatA&PTea -0.864941765 0.951995998
Kroger -2.227011825 3.575365042
MayDepartmentStores -0.248696539 0.325615750
Stop&ShopCos -0.465733282 0.213400575
SupermarketsGeneral -0.613639606 0.541572073
WickesCos -0.323736178 -0.013198511
FWWoolworth -0.540686056 0.676698220
AMR -0.142999986 0.416335325
IUInternational 0.050253315 -0.417298526
PanAm -0.341614700 0.092658305
RepublicAirlines -0.081361850 -0.362692957
TWA -0.192909368 0.038345852
WesternAirLines -0.082178401 -0.434401404