import os
import numpy as np
import pandas as pd
import seaborn as sb
from math import pi
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.decomposition import PCA
from scipy.spatial.distance import pdist
from sklearn_extra.cluster import KMedoids
from sklearn.preprocessing import StandardScaler
def centroide(num_cluster, datos, clusters):
ind = clusters == num_cluster
return(pd.DataFrame(datos[ind].mean()).T)
def recodificar(col, nuevo_codigo):
col_cod = pd.Series(col, copy=True)
for llave, valor in nuevo_codigo.items():
col_cod.replace(llave, valor, inplace=True)
return col_cod
def bar_plot(centros, labels, scale = False,cluster = None, var = None):
from math import ceil, floor
from seaborn import color_palette
centros = np.copy(centros)
if scale:
for col in range(centros.shape[1]):
centros[:,col] /= max(centros[:,col])
colores = color_palette()
minimo = floor(centros.min()) if floor(centros.min()) < 0 else 0
def inside_plot(valores, labels, titulo):
plt.barh(range(len(valores)), valores, 1/1.5, color = colores)
plt.xlim(minimo, ceil(centros.max()))
plt.title(titulo)
if var is not None:
centros = np.array([n[[x in var for x in labels]] for n in centros])
colores = [colores[x % len(colores)] for x, i in enumerate(labels) if i in var]
labels = labels[[x in var for x in labels]]
if cluster is None:
for i in range(centros.shape[0]):
plt.subplot(1, centros.shape[0], i + 1)
inside_plot(centros[i].tolist(), labels, ('Cluster ' + str(i)))
plt.yticks(range(len(labels)), labels) if i == 0 else plt.yticks([])
else:
pos = 1
for i in cluster:
plt.subplot(1, len(cluster), pos)
inside_plot(centros[i].tolist(), labels, ('Cluster ' + str(i)))
plt.yticks(range(len(labels)), labels) if pos == 1 else plt.yticks([])
pos += 1
def bar_plot_detail(centros,columns_names = [], columns_to_plot = [],figsize = (10,7),dpi = 150):
from math import ceil, floor
import seaborn as sb
numClusters = centros.shape[0]
labels = ["Cluster "+ str(i) for i in range(numClusters)]
centros = pd.DataFrame(centros,columns=columns_names,index= labels)
plots = len(columns_to_plot) if len(columns_to_plot) != 0 else len(columns_names)
rows, cols = ceil(plots/2),2
plt.figure(1, figsize = figsize,dpi = dpi)
plt.subplots_adjust(hspace=1,wspace = 0.5)
columns = columns_names
if len(columns_to_plot) > 0:
if type(columns_to_plot[0]) is str:
columns = columns_to_plot
else:
columns = [columns_names[i] for i in columns_to_plot]
var = 0
for numRow in range(rows):
for numCol in range(cols):
if var < plots:
ax = plt.subplot2grid((rows, cols), (numRow, numCol), colspan=1, rowspan=1)
sb.barplot(y = labels, x=columns[var] ,data=centros ,ax=ax)
var += 1
def radar_plot(centros, labels):
from math import pi
centros = np.array([((n - min(n)) / (max(n) - min(n)) * 100) if
max(n) != min(n) else (n/n * 50) for n in centros.T])
angulos = [n / float(len(labels)) * 2 * pi for n in range(len(labels))]
angulos += angulos[:1]
ax = plt.subplot(111, polar = True)
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
plt.xticks(angulos[:-1], labels)
ax.set_rlabel_position(0)
plt.yticks([10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
["10%", "20%", "30%", "40%", "50%", "60%", "70%", "80%", "90%", "100%"],
color = "grey", size = 8)
plt.ylim(-10, 100)
for i in range(centros.shape[1]):
valores = centros[:, i].tolist()
valores += valores[:1]
ax.plot(angulos, valores, linewidth = 1, linestyle = 'solid',
label = 'Cluster ' + str(i))
ax.fill(angulos, valores, alpha = 0.3)
plt.legend(loc='upper right', bbox_to_anchor = (0.1, 0.1))
#1
##a
datos = pd.read_csv("C:/Users/Rodrigo/Desktop/TEC/Concentracion/datos/country_indicators.csv",index_col=0)
kmedias = KMeans(n_clusters=3, max_iter=400, n_init=100)
kmedias.fit(datos)
KMeans(max_iter=400, n_clusters=3, n_init=100)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grupos_media = kmedias.predict(datos)
centros_media = np.array(kmedias.cluster_centers_)
##b
kmedoids = KMedoids(n_clusters=3,max_iter = 400, metric = "cityblock")
kmedoids.fit(datos)
KMedoids(max_iter=400, metric='cityblock', n_clusters=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
print("Grupos\n",kmedoids.labels_)
## Grupos
## [2 2 1 2 1 1 2 0 0 1 1 0 2 1 1 0 2 2 2 2 2 1 1 0 1 2 2 2 2 0 2 2 2 1 2 1 2
## 2 2 1 2 1 0 1 0 1 2 2 2 1 2 1 2 0 0 1 2 2 0 2 0 1 2 2 2 2 2 1 0 2 2 1 1 0
## 0 0 2 0 2 1 2 2 0 2 2 1 1 2 2 1 1 0 1 2 2 1 1 2 1 2 1 2 2 2 1 2 2 2 2 2 0
## 0 2 2 0 0 2 1 2 1 2 1 1 0 1 1 2 2 0 2 1 1 2 0 1 1 2 1 1 0 2 1 2 1 0 0 2 2
## 1 2 2 2 2 1 2 2 2 0 0 0 1 2 2 1 2 2 2]
centros_medo = np.array(kmedoids.cluster_centers_)
pd.DataFrame(centros_medo)
##c
## 0 1 2 3 4 5 6 7 8
## 0 4.5 76.4 10.70 74.7 41100.0 1.880 80.0 1.86 44400.0
## 1 10.3 35.8 7.03 60.2 16300.0 0.238 79.8 1.61 8860.0
## 2 74.7 29.5 5.22 45.9 3060.0 16.600 62.2 4.27 1310.0
fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_media, datos.columns, scale=True)
## <string>:23: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_media, datos.columns)
## <string>:7: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_medo, datos.columns, scale=True)
plt.show()
## Traceback (most recent call last):
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_qt.py", line 468, in _draw_idle
## self.draw()
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_agg.py", line 400, in draw
## self.figure.draw(self.renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 95, in draw_wrapper
## result = draw(artist, renderer, *args, **kwargs)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\figure.py", line 3140, in draw
## mimage._draw_list_compositing_images(
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\image.py", line 131, in _draw_list_compositing_images
## a.draw(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 3028, in draw
## self._update_title_position(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 2961, in _update_title_position
## if (ax.xaxis.get_ticks_position() in ['top', 'unknown']
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2451, in get_ticks_position
## self._get_ticks_position()]
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2155, in _get_ticks_position
## major = self.majorTicks[0]
## IndexError: list index out of range

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_medo, datos.columns)
plt.show()
### En los resultados anteriores se puede ver una diferencia notable entre la forma de clusterisación. Y si analizamos las variables meticulosamente, adaptando al contexto al que pertenecemos, hace una serparación mas acertada que KMedoides, ya que separa: "inflation","total_fer","child_mort" en uno cluster. "life_expec" y "helth" en otro cluster. Y por ultimo "gdpp","exports","imports","income". Ya que tenemos tres rubros de categorias muy claros: desarrollo social, distribucion social y economico.
##d

pca = PCA(n_components=2)
componentes = pca.fit_transform(datos)
fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
ax.scatter(componentes[:, 0], componentes[:, 1],c=kmedias.predict(datos))
ax.set_xlabel('componente 1')
ax.set_ylabel('componente 2')
ax.set_title('3 Cluster K-Medias')
plt.show()
##e
## Traceback (most recent call last):
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_qt.py", line 468, in _draw_idle
## self.draw()
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_agg.py", line 400, in draw
## self.figure.draw(self.renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 95, in draw_wrapper
## result = draw(artist, renderer, *args, **kwargs)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\figure.py", line 3140, in draw
## mimage._draw_list_compositing_images(
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\image.py", line 131, in _draw_list_compositing_images
## a.draw(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 3028, in draw
## self._update_title_position(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 2961, in _update_title_position
## if (ax.xaxis.get_ticks_position() in ['top', 'unknown']
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2451, in get_ticks_position
## self._get_ticks_position()]
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2156, in _get_ticks_position
## minor = self.minorTicks[0]
## IndexError: list index out of range

Nc = range(2, 40)
kmediasList = [KMeans(n_clusters=i) for i in Nc]
varianza = [kmediasList[i].fit(datos).inertia_ for i in range(len(kmediasList))]
#Gráfico
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning:
##
## The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
##
## C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning:
##
## KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
ax.plot(Nc,varianza,'o-')
ax.set_xlabel('Número de clústeres')
ax.set_ylabel('Inercia Intraclases')
ax.set_title('Codo de Jambu')
plt.show()
###depues de las 10 se empieza a estabilizar

#2
##a
data = pd.read_csv("C:/Users/Rodrigo/Desktop/TEC/Concentracion/datos/VirtualPatient.csv",delimiter=',',decimal=".",index_col=0)
#print(data.dtypes)
numericos = data.select_dtypes(include=['float64', 'int64'])
kmedias = KMeans(n_clusters=3, max_iter=1500, n_init=150)
kmedias.fit(numericos)
KMeans(max_iter=1500, n_clusters=3, n_init=150)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grupos_media = kmedias.predict(numericos)
centros_media = np.array(kmedias.cluster_centers_)
kmedoids = KMedoids(n_clusters=3,max_iter = 1500, metric = "canberra")
kmedoids.fit(numericos)
KMedoids(max_iter=1500, metric='canberra', n_clusters=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
print("Grupos\n",kmedoids.labels_)
## Grupos
## [2 0 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2
## 2 2 2 2 0 2 2 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 2 0 0 0 0
## 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 0 0 1 1 1 1]
centros_medo = np.array(kmedoids.cluster_centers_)
pd.DataFrame(centros_medo)
## 0 1 2 3 4 5 6 7 8
## 0 75.0 0.0 1.0 7.67 4.79 26.978782 3.0 9.0 7.0
## 1 82.0 0.0 1.0 14.00 10.00 26.446281 4.0 10.0 7.0
## 2 76.0 0.0 1.0 8.25 4.60 26.977595 7.3 8.2 2.0
fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_media, numericos.columns, scale=True)
## <string>:23: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_media, numericos.columns)
## <string>:7: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_medo, numericos.columns)
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_medo, numericos.columns)
## <string>:4: RuntimeWarning:
##
## invalid value encountered in divide
plt.show()

##b
data = pd.read_csv("C:/Users/Rodrigo/Desktop/TEC/Concentracion/datos/VirtualPatient.csv")
numerics = data.select_dtypes(include=['float64', 'int64'])
vision_onehot = pd.get_dummies(data["vision"], prefix="vision")
datos = pd.concat([numerics, vision_onehot], axis=1)
print(datos.dtypes)
## age int64
## hospitalization_three_years int64
## exhaustion_score int64
## gait_get_up float64
## gait_speed_4m float64
## bmi_score float64
## anxiety_perception float64
## life_quality float64
## social_visits int64
## vision_Sees moderately uint8
## vision_Sees poorly uint8
## vision_Sees well uint8
## dtype: object
datos_s = pd.DataFrame(StandardScaler().fit_transform(datos), columns=datos.columns, index=datos.index)
kmedias = KMeans(n_clusters=3, max_iter=1500, n_init=150)
kmedias.fit(datos_s)
KMeans(max_iter=1500, n_clusters=3, n_init=150)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grupos_media = kmedias.predict(datos_s)
centros_media = np.array(kmedias.cluster_centers_)
kmedoids = KMedoids(n_clusters=3,max_iter = 1500, metric = "canberra")
kmedoids.fit(datos_s)
KMedoids(max_iter=1500, metric='canberra', n_clusters=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
print("Grupos\n",kmedoids.labels_)
## Grupos
## [0 0 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1
## 1 1 1 1 0 0 0 0 0 0 0 2 2 2 2 1 1 1 1 2 1 1 1 0 0 0 0 1 1 0 0 2 2 2 2 2 2
## 2 2 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 1 0 0 2 0 0 0 0 0 0 2 2 0 0 1 1 2 1 2
## 1 1 1 0 2 1]
centros_medo = np.array(kmedoids.cluster_centers_)
pd.DataFrame(centros_medo)
## 0 1 2 ... 9 10 11
## 0 0.367728 0.485440 -0.560898 ... -0.600387 -0.270914 0.707107
## 1 -0.209777 -0.722995 -0.560898 ... -0.600387 -0.270914 0.707107
## 2 -0.787282 -0.722995 -0.560898 ... -0.600387 -0.270914 0.707107
##
## [3 rows x 12 columns]
fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_media, datos_s.columns, scale=True)
## <string>:23: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_media, datos_s.columns)
## <string>:7: MatplotlibDeprecationWarning:
##
## Auto-removal of overlapping axes is deprecated since 3.6 and will be removed two minor releases later; explicitly call ax.remove() as needed.
plt.show()

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
bar_plot(centros_medo, datos_s.columns)
plt.show()
## Traceback (most recent call last):
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_qt.py", line 468, in _draw_idle
## self.draw()
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\backends\backend_agg.py", line 400, in draw
## self.figure.draw(self.renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 95, in draw_wrapper
## result = draw(artist, renderer, *args, **kwargs)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\figure.py", line 3140, in draw
## mimage._draw_list_compositing_images(
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\image.py", line 131, in _draw_list_compositing_images
## a.draw(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\artist.py", line 72, in draw_wrapper
## return draw(artist, renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 3028, in draw
## self._update_title_position(renderer)
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axes\_base.py", line 2961, in _update_title_position
## if (ax.xaxis.get_ticks_position() in ['top', 'unknown']
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2451, in get_ticks_position
## self._get_ticks_position()]
## File "C:\Users\Rodrigo\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\matplotlib\axis.py", line 2155, in _get_ticks_position
## major = self.majorTicks[0]
## IndexError: list index out of range

fig, ax = plt.subplots(1,1, figsize = (15,8), dpi = 200)
radar_plot(centros_medo, datos_s.columns)
plt.show()
### Los resultados son mejores agregando la variable Vision ya que muestre una separacion de clusters mas clara, sin embargo, se presentan valores negativos que en la grafica de barras hace un poco dificil su cumplrención, pero ya ne la grafica de radar se puede comprender perfectamente los clusters. Y a mi parecer, el mejor ejercicio para esta base de datos fue KMedias ya que tiene una separacion más racional poniendo un contexto.
