#!pip install factor_analyzerAlgo de analisis factorial en Python
import factor_analyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericityimport pandas as pd
data = pd.read_csv('usarrests.csv')
print(data.head(10)) Unnamed: 0 Murder Assault UrbanPop Rape
0 Alabama 13.2 236 58 21.2
1 Alaska 10.0 263 48 44.5
2 Arizona 8.1 294 80 31.0
3 Arkansas 8.8 190 50 19.5
4 California 9.0 276 91 40.6
5 Colorado 7.9 204 78 38.7
6 Connecticut 3.3 110 77 11.1
7 Delaware 5.9 238 72 15.8
8 Florida 15.4 335 80 31.9
9 Georgia 17.4 211 60 25.8
from factor_analyzer import FactorAnalyzerdf = pd.DataFrame(data, columns=['Murder', 'Assault', 'UrbanPop', 'Rape'])
print(df.head(10)) Murder Assault UrbanPop Rape
0 13.2 236 58 21.2
1 10.0 263 48 44.5
2 8.1 294 80 31.0
3 8.8 190 50 19.5
4 9.0 276 91 40.6
5 7.9 204 78 38.7
6 3.3 110 77 11.1
7 5.9 238 72 15.8
8 15.4 335 80 31.9
9 17.4 211 60 25.8
print(df.corr()) Murder Assault UrbanPop Rape
Murder 1.000000 0.801873 0.069573 0.563579
Assault 0.801873 1.000000 0.258872 0.665241
UrbanPop 0.069573 0.258872 1.000000 0.411341
Rape 0.563579 0.665241 0.411341 1.000000
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df)StandardScaler()
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
chi_square_value,p_value=calculate_bartlett_sphericity(df)
chi_square_value, p_value(88.28814686595526, 6.868423073358666e-17)
from factor_analyzer.factor_analyzer import calculate_kmokmo_all,kmo_model=calculate_kmo(df)
kmo_model0.6538150062740201
scaled_data = scaler.transform(df)
scaled_dataarray([[ 1.25517927, 0.79078716, -0.52619514, -0.00345116],
[ 0.51301858, 1.11805959, -1.22406668, 2.50942392],
[ 0.07236067, 1.49381682, 1.00912225, 1.05346626],
[ 0.23470832, 0.23321191, -1.08449238, -0.18679398],
[ 0.28109336, 1.2756352 , 1.77678094, 2.08881393],
[ 0.02597562, 0.40290872, 0.86954794, 1.88390137],
[-1.04088037, -0.73648418, 0.79976079, -1.09272319],
[-0.43787481, 0.81502956, 0.45082502, -0.58583422],
[ 1.76541475, 1.99078607, 1.00912225, 1.1505301 ],
[ 2.22926518, 0.48775713, -0.38662083, 0.49265293],
[-0.57702994, -1.51224105, 1.21848371, -0.11129987],
[-1.20322802, -0.61527217, -0.80534376, -0.75839217],
[ 0.60578867, 0.94836277, 1.21848371, 0.29852525],
[-0.13637203, -0.70012057, -0.03768506, -0.0250209 ],
[-1.29599811, -1.39102904, -0.5959823 , -1.07115345],
[-0.41468229, -0.67587817, 0.03210209, -0.34856705],
[ 0.44344101, -0.74860538, -0.94491807, -0.53190987],
[ 1.76541475, 0.94836277, 0.03210209, 0.10439756],
[-1.31919063, -1.06375661, -1.01470522, -1.44862395],
[ 0.81452136, 1.56654403, 0.10188925, 0.70835037],
[-0.78576263, -0.26375734, 1.35805802, -0.53190987],
[ 1.00006153, 1.02108998, 0.59039932, 1.49564599],
[-1.1800355 , -1.19708982, 0.03210209, -0.68289807],
[ 1.9277624 , 1.06957478, -1.5032153 , -0.44563089],
[ 0.28109336, 0.0877575 , 0.31125071, 0.75148985],
[-0.41468229, -0.74860538, -0.87513091, -0.521125 ],
[-0.80895515, -0.83345379, -0.24704653, -0.51034012],
[ 1.02325405, 0.98472638, 1.0789094 , 2.671197 ],
[-1.31919063, -1.37890783, -0.66576945, -1.26528114],
[-0.08998698, -0.14254532, 1.63720664, -0.26228808],
[ 0.83771388, 1.38472601, 0.31125071, 1.17209984],
[ 0.76813632, 1.00896878, 1.42784517, 0.52500755],
[ 1.20879423, 2.01502847, -1.43342815, -0.55347961],
[-1.62069341, -1.52436225, -1.5032153 , -1.50254831],
[-0.11317951, -0.61527217, 0.66018648, 0.01811858],
[-0.27552716, -0.23951493, 0.1716764 , -0.13286962],
[-0.66980002, -0.14254532, 0.10188925, 0.87012344],
[-0.34510472, -0.78496898, 0.45082502, -0.68289807],
[-1.01768785, 0.03927269, 1.49763233, -1.39469959],
[ 1.53348953, 1.3119988 , -1.22406668, 0.13675217],
[-0.92491776, -1.027393 , -1.43342815, -0.90938037],
[ 1.25517927, 0.20896951, -0.45640799, 0.61128652],
[ 1.13921666, 0.36654512, 1.00912225, 0.46029832],
[-1.06407289, -0.61527217, 1.00912225, 0.17989166],
[-1.29599811, -1.48799864, -2.34066115, -1.08193832],
[ 0.16513075, -0.17890893, -0.17725937, -0.05737552],
[-0.87853272, -0.31224214, 0.52061217, 0.53579242],
[-0.48425985, -1.08799901, -1.85215107, -1.28685088],
[-1.20322802, -1.42739264, 0.03210209, -1.1250778 ],
[-0.22914211, -0.11830292, -0.38662083, -0.60740397]])
fa = FactorAnalyzer()
fa.set_params(n_factors=4, rotation=None)
fa.fit(scaled_data)FactorAnalyzer(n_factors=4, rotation=None, rotation_kwargs={})
fa.get_eigenvalues()(array([2.48024158, 0.98976515, 0.35656318, 0.17343009]),
array([ 2.28299696e+00, 5.37887219e-01, 6.17387175e-02, -2.20915632e-07]))
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
evarray([2.48024158, 0.98976515, 0.35656318, 0.17343009])
sorted(ev)[0.17343008772983554,
0.35656318058083014,
0.9897651525398421,
2.480241579149494]
fa.get_communalities()array([0.85762759, 0.88586043, 0.41517638, 0.72395827])
import matplotlib.pyplot as plt
plt.scatter(range(1,df.shape[1]+1),ev)
plt.plot(range(1,df.shape[1]+1),ev)
plt.title('Screeplot')
plt.xlabel('Factor')
plt.ylabel('Eigenvalores')
plt.grid()
plt.show()import pandas as pd
factores=pd.DataFrame(fa.loadings_,columns=['F1','F2','F3','F4'],index=df.columns)
print(factores) F1 F2 F3 F4
Murder 0.843704 -0.374741 -0.073213 0.0
Assault 0.919370 -0.103672 0.172831 0.0
UrbanPop 0.331679 0.548848 0.062696 0.0
Rape 0.784798 0.292359 -0.150257 0.0
fa.corr_array([[1. , 0.80187331, 0.06957262, 0.56357883],
[0.80187331, 1. , 0.2588717 , 0.66524123],
[0.06957262, 0.2588717 , 1. , 0.41134124],
[0.56357883, 0.66524123, 0.41134124, 1. ]])
fa.n_factors4
print(pd.DataFrame(fa.get_factor_variance(),index=['Variance','Proportional Var','Cumulative Var'])) 0 1 2 3
Variance 2.282997 0.537887 0.061739 0.000000
Proportional Var 0.570749 0.134472 0.015435 0.000000
Cumulative Var 0.570749 0.705221 0.720656 0.720656
fa.get_factor_variance()(array([2.28299689, 0.53788712, 0.06173866, 0. ]),
array([0.57074922, 0.13447178, 0.01543467, 0. ]),
array([0.57074922, 0.705221 , 0.72065567, 0.72065567]))
fa.loadings_array([[ 0.84370394, -0.37474146, -0.07321271, 0. ],
[ 0.91937036, -0.10367227, 0.17283121, 0. ],
[ 0.33167926, 0.54884825, 0.06269644, 0. ],
[ 0.78479779, 0.29235872, -0.15025674, 0. ]])
ev, v = fa.get_eigenvalues()
evarray([2.48024158, 0.98976515, 0.35656318, 0.17343009])
(0.84370394)**2 + (-0.37474146)**2 + (-0.07321271)**20.8576276011199994
fa_varimax = FactorAnalyzer(rotation='varimax')
fa_varimax.fit(scaled_data)FactorAnalyzer(rotation='varimax', rotation_kwargs={})
fa_varimax.loadings_array([[ 0.91516486, 0.02762848, 0.13905951],
[ 0.88036791, 0.32020407, -0.09100618],
[ 0.05909459, 0.64142576, -0.0160376 ],
[ 0.56292624, 0.59517677, 0.22986286]])
fa_varimax.get_communalities()array([0.85762759, 0.88586043, 0.41517638, 0.72395827])
print(pd.DataFrame(fa.get_communalities(),index=df.columns,columns=['Communalities'])) Communalities
Murder 0.857628
Assault 0.885860
UrbanPop 0.415176
Rape 0.723958
# Get variance of each factors
fa.get_factor_variance()(array([2.28299689, 0.53788712, 0.06173866, 0. ]),
array([0.57074922, 0.13447178, 0.01543467, 0. ]),
array([0.57074922, 0.705221 , 0.72065567, 0.72065567]))
import princepca = prince.PCA(
n_components=2,
n_iter=3,
rescale_with_mean=True,
rescale_with_std=True,
copy=True,
check_input=True,
engine='auto',
random_state=42
)pca = pca.fit(df)pca.transform(df)| 0 | 1 | |
|---|---|---|
| 0 | 0.985566 | 1.133392 |
| 1 | 1.950138 | 1.073213 |
| 2 | 1.763164 | -0.745957 |
| 3 | -0.141420 | 1.119797 |
| 4 | 2.523980 | -1.542934 |
| 5 | 1.514563 | -0.987555 |
| 6 | -1.358647 | -1.088928 |
| 7 | 0.047709 | -0.325359 |
| 8 | 3.013042 | 0.039229 |
| 9 | 1.639283 | 1.278942 |
| 10 | -0.912657 | -1.570460 |
| 11 | -1.639800 | 0.210973 |
| 12 | 1.378911 | -0.681841 |
| 13 | -0.505461 | -0.151563 |
| 14 | -2.253646 | -0.104054 |
| 15 | -0.796881 | -0.270165 |
| 16 | -0.750859 | 0.958440 |
| 17 | 1.564818 | 0.871055 |
| 18 | -2.396829 | 0.376392 |
| 19 | 1.763369 | 0.427655 |
| 20 | -0.486166 | -1.474496 |
| 21 | 2.108441 | -0.155397 |
| 22 | -1.692682 | -0.632261 |
| 23 | 0.996494 | 2.393796 |
| 24 | 0.696787 | -0.263355 |
| 25 | -1.185452 | 0.536874 |
| 26 | -1.265637 | -0.193954 |
| 27 | 2.874395 | -0.775600 |
| 28 | -2.383915 | -0.018082 |
| 29 | 0.181566 | -1.449506 |
| 30 | 1.980024 | 0.142849 |
| 31 | 1.682577 | -0.823184 |
| 32 | 1.123379 | 2.228003 |
| 33 | -2.992226 | 0.599119 |
| 34 | -0.225965 | -0.742238 |
| 35 | -0.311783 | -0.287854 |
| 36 | 0.059122 | -0.541411 |
| 37 | -0.888416 | -0.571100 |
| 38 | -0.863772 | -1.491978 |
| 39 | 1.320724 | 1.933405 |
| 40 | -1.987775 | 0.823343 |
| 41 | 0.999742 | 0.860251 |
| 42 | 1.355138 | -0.412481 |
| 43 | -0.550565 | -1.471505 |
| 44 | -2.801412 | 1.402288 |
| 45 | -0.096335 | 0.199735 |
| 46 | -0.216903 | -0.970124 |
| 47 | -2.108585 | 1.424847 |
| 48 | -2.079714 | -0.611269 |
| 49 | -0.629427 | 0.321013 |
ax = pca.plot_row_coordinates(
df,
ax=None,
figsize=(6, 6),
x_component=0,
y_component=1,
labels=None,
#color_labels=data['Unnamed: 0'],
ellipse_outline=False,
ellipse_fill=True,
show_points=True
)No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
ax.get_figure()
plt.show()