Required libraries

library(tidyverse)
library(reticulate)

List conda environment

conda_list()
##              name
## 1       anaconda3
## 2           py3.8
## 3     r-miniconda
## 4 r-mlflow-1.10.0
## 5    r-reticulate
##                                                                                python
## 1               C:\\Users\\hutajunj\\AppData\\Local\\Continuum\\anaconda3\\python.exe
## 2  C:\\Users\\hutajunj\\AppData\\Local\\Continuum\\anaconda3\\envs\\py3.8\\python.exe
## 3                        C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\python.exe
## 4 C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\envs\\r-mlflow-1.10.0\\python.exe
## 5    C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\envs\\r-reticulate\\python.exe

Activate py3.8 environment

use_condaenv("py3.8", required = TRUE)

Double check that reticulate is actually using the new conda env

py_config()
## python:         C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/python.exe
## libpython:      C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/python38.dll
## pythonhome:     C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8
## version:        3.8.3 (default, Jul  2 2020, 17:30:36) [MSC v.1916 64 bit (AMD64)]
## Architecture:   64bit
## numpy:          C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/Lib/site-packages/numpy
## numpy_version:  1.18.5
## 
## NOTE: Python version was forced by use_python function

Python test

1 + 1
## 2

Test numpy and pandas

import numpy as np
import pandas as pd

np.arange(1, 10)

# Make a sequence in a data frame using dict format
## array([1, 2, 3, 4, 5, 6, 7, 8, 9])
df = pd.DataFrame(data = {"sequence":np.arange(1,20,.01)})

# Use assign (mutate) equivalent to calculate the np.sin() of the series
df = df.assign(value=np.sin(df["sequence"]))

df
##       sequence     value
## 0         1.00  0.841471
## 1         1.01  0.846832
## 2         1.02  0.852108
## 3         1.03  0.857299
## 4         1.04  0.862404
## ...        ...       ...
## 1895     19.95  0.891409
## 1896     19.96  0.895896
## 1897     19.97  0.900294
## 1898     19.98  0.904602
## 1899     19.99  0.908819
## 
## [1900 rows x 2 columns]

Solve the plot on kniting

import os

os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/Library/plugins/platforms'

Test matplotlib

import matplotlib.pyplot as plt

df.plot(x="sequence", y = "value", title = "Matplotlib")
plt.show()

Test scikit-learn

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=0)

X = [[ 1,  2,  3],  # 2 samples, 3 features
     [11, 12, 13]]

y = [0, 1]  # classes of each sample

clf.fit(X, y)
## RandomForestClassifier(random_state=0)
clf.predict(X)  # predict classes of the training data
## array([0, 1])

Run affinity propagation

from sklearn.cluster import AffinityPropagation
from sklearn.datasets import make_blobs

# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,
                            random_state=0)

# Compute Affinity Propagation
af = AffinityPropagation(preference=-50).fit(X)
## C:\Users\hutajunj\AppData\Local\Continuum\anaconda3\envs\py3.8\lib\site-packages\sklearn\cluster\_affinity_propagation.py:146: FutureWarning: 'random_state' has been introduced in 0.23. It will be set to None starting from 0.25 which means that results will differ at every function call. Set 'random_state' to None to silence this warning, or to 0 to keep the behavior of versions <0.23.
##   warnings.warn(("'random_state' has been introduced in 0.23. "
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_

n_clusters_ = len(cluster_centers_indices)

# #############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle

plt.close('all')
plt.figure(1)
plt.clf()

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
    class_members = labels == k
    cluster_center = X[cluster_centers_indices[k]]
    plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=14)
    for x in X[class_members]:
        plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()