Required libraries
library(tidyverse)
library(reticulate)
List conda environment
conda_list()
## name
## 1 anaconda3
## 2 py3.8
## 3 r-miniconda
## 4 r-mlflow-1.10.0
## 5 r-reticulate
## python
## 1 C:\\Users\\hutajunj\\AppData\\Local\\Continuum\\anaconda3\\python.exe
## 2 C:\\Users\\hutajunj\\AppData\\Local\\Continuum\\anaconda3\\envs\\py3.8\\python.exe
## 3 C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\python.exe
## 4 C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\envs\\r-mlflow-1.10.0\\python.exe
## 5 C:\\Users\\hutajunj\\AppData\\Local\\r-miniconda\\envs\\r-reticulate\\python.exe
Activate py3.8 environment
use_condaenv("py3.8", required = TRUE)
Double check that reticulate is actually using the new conda env
py_config()
## python: C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/python.exe
## libpython: C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/python38.dll
## pythonhome: C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8
## version: 3.8.3 (default, Jul 2 2020, 17:30:36) [MSC v.1916 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/envs/py3.8/Lib/site-packages/numpy
## numpy_version: 1.18.5
##
## NOTE: Python version was forced by use_python function
Test numpy and pandas
import numpy as np
import pandas as pd
np.arange(1, 10)
# Make a sequence in a data frame using dict format
## array([1, 2, 3, 4, 5, 6, 7, 8, 9])
df = pd.DataFrame(data = {"sequence":np.arange(1,20,.01)})
# Use assign (mutate) equivalent to calculate the np.sin() of the series
df = df.assign(value=np.sin(df["sequence"]))
df
## sequence value
## 0 1.00 0.841471
## 1 1.01 0.846832
## 2 1.02 0.852108
## 3 1.03 0.857299
## 4 1.04 0.862404
## ... ... ...
## 1895 19.95 0.891409
## 1896 19.96 0.895896
## 1897 19.97 0.900294
## 1898 19.98 0.904602
## 1899 19.99 0.908819
##
## [1900 rows x 2 columns]
Solve the plot on kniting
import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'C:/Users/hutajunj/AppData/Local/Continuum/anaconda3/Library/plugins/platforms'
Test matplotlib
import matplotlib.pyplot as plt
df.plot(x="sequence", y = "value", title = "Matplotlib")
plt.show()

Test scikit-learn
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=0)
X = [[ 1, 2, 3], # 2 samples, 3 features
[11, 12, 13]]
y = [0, 1] # classes of each sample
clf.fit(X, y)
## RandomForestClassifier(random_state=0)
clf.predict(X) # predict classes of the training data
## array([0, 1])
Run affinity propagation
from sklearn.cluster import AffinityPropagation
from sklearn.datasets import make_blobs
# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5,
random_state=0)
# Compute Affinity Propagation
af = AffinityPropagation(preference=-50).fit(X)
## C:\Users\hutajunj\AppData\Local\Continuum\anaconda3\envs\py3.8\lib\site-packages\sklearn\cluster\_affinity_propagation.py:146: FutureWarning: 'random_state' has been introduced in 0.23. It will be set to None starting from 0.25 which means that results will differ at every function call. Set 'random_state' to None to silence this warning, or to 0 to keep the behavior of versions <0.23.
## warnings.warn(("'random_state' has been introduced in 0.23. "
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
# #############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle
plt.close('all')
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
class_members = labels == k
cluster_center = X[cluster_centers_indices[k]]
plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
for x in X[class_members]:
plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

Next
print("Next blog will be on how to setup TensorFlow 2 in R")
## Next blog will be on how to setup TensorFlow 2 in R