Calling Python from R in a variety of ways including R Markdown, sourcing Python scripts, importing Python modules, and using Python interactively within an R session.
The reticulate package provides an R interface to Python modules, classes, and functions.
$ operator (analogous to the way you would
interact with an R list, environment, or reference class).library(reticulate)
os <- import("os")
pd <- import("pandas")
os$getcwd()
## [1] "/Users/chenzhenghui/Dropbox/NTU_System_P/Data_Science_R_Python"
builtins <- import_builtins()
builtins$max(c(1, 2, 3, 4, 5))
## [1] 5
When calling into Python, R data types are automatically converted to their equivalent Python types.
When values are returned from Python to R they are converted back to R types.
| R | Python | Examples |
|---|---|---|
| Single-element vector | Scalar | 1, 1L, TRUE,
"foo" |
| Multi-element vector | List | c(1.0, 2.0, 3.0), c(1L, 2L, 3L) |
| List of multiple types | Tuple | list(1L, TRUE, "foo") |
| Named list | Dict | list(a = 1L, b = 2.0),
dict(x = x_data) |
| Matrix/Array | NumPy ndarray | matrix(c(1,2,3,4), nrow = 2, ncol = 2) |
| Data Frame | Pandas DataFrame | data.frame(x = c(1,2,3), y = c("a", "b", "c")) |
| Function | Python function | function(x) x + 1 |
| Raw | Python bytearray | as.raw(c(1:10)) |
| NULL, TRUE, FALSE | None, True, False | NULL, TRUE, FALSE |
# import numpy and specify no automatic Python to R conversion
np <- import("numpy", convert = FALSE)
# do some array manipulations with NumPy
a <- np$array(c(1:4))
class(a)
## [1] "numpy.ndarray" "python.builtin.object"
sum <- a$cumsum()
# convert to R explicitly at the end
py_to_r(sum)
## [1] 1 3 6 10
## source.py ##
import pandas as pd
import matplotlib.pyplot as plt
def read_file(file):
files = pd.read_csv(file, index_col=0, parse_dates=True)
return files
def add(x, y):
return x + y
PI = 3.14
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
source_python('source.py')
df <- as_tibble(read_file("air_quality_no2.csv"))
class(df)
## [1] "tbl_df" "tbl" "data.frame"
df
## # A tibble: 1,035 × 3
## station_antwerp station_paris station_london
## <dbl> <dbl> <dbl>
## 1 NaN NaN 23
## 2 50.5 25 19
## 3 45 27.7 19
## 4 NaN 50.4 16
## 5 NaN 61.9 NaN
## 6 NaN 72.4 26
## 7 NaN 77.7 32
## 8 NaN 67.9 32
## 9 NaN 56 28
## 10 NaN 34.5 21
## # … with 1,025 more rows
PI
## [1] 3.14
py_run_string("py_x = 100")
source <- import("source")
source$PI
## [1] 3.14
py$PI
## [1] 3.14
By default when Python objects are returned to R they are converted to their equivalent R types.
However, if you’d rather make conversion from Python to R
explicit and deal in native Python objects by default you can pass
convert = FALSE to the import
function.
# import numpy and specify no automatic Python to R conversion
np <- import("numpy", convert = FALSE)
# do some array manipulations with NumPy
a <- np$array(c(1:4))
s <- a$cumsum()
# convert to R explicitly at the end
py_to_r(s)
## [1] 1 3 6 10
x <- np$arange(1, 9)$reshape(2L, 2L, 2L)
class(x)
## [1] "numpy.ndarray" "python.builtin.object"
x
## [[[1. 2.]
## [3. 4.]]
##
## [[5. 6.]
## [7. 8.]]]
y <- py_to_r(x)
class(y)
## [1] "array"
y
## , , 1
##
## [,1] [,2]
## [1,] 1 3
## [2,] 5 7
##
## , , 2
##
## [,1] [,2]
## [1,] 2 4
## [2,] 6 8
RPy project appeared and focused on providing simple and robust access to R from within Python, with the initial Unix-only releases quickly followed by Microsoft and MacOS compatible versions.
RPy2, an evolution of RPy-1.x. Naturally RPy2 is inspired by RPy, but also by Alexander Belopolsky’s contributions that were waiting to be included into RPy.
import rpy2
print(rpy2.__version__)
from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')
# import R's "utils" package
utils = importr('utils')
# import R's "tidyverse" package
tidyverse = importr('tidyverse')
# R package names
packnames = ('ggplot2', 'hexbin')
# R vector of strings
from rpy2.robjects.vectors import StrVector
import rpy2.robjects.packages as rpackages
# Selectively install what needs to be install.
# We are fancy, just because we can.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
utils.install_packages(StrVector(names_to_install))
import rpy2.robjects as robjects
pi = robjects.r['pi']
pi[0]
robjects.r('''
# create a function `f`
f <- function(r, verbose=FALSE) {
if (verbose) {
cat("I am calling f().\n")
}
2 * pi * r
}
# call the function `f` with argument value 3
f(3)
''')
r_f = robjects.globalenv['f']
print(r_f.r_repr())
r_f = robjects.r['f']
res = r_f(3)
len(robjects.r['pi'])
robjects.r['pi'][0]
res = robjects.StrVector(['abc', 'def'])
print(res.r_repr())
res = robjects.IntVector([1, 2, 3])
print(res.r_repr())
res = robjects.FloatVector([1.1, 2.2, 3.3])
print(res.r_repr())
rsum = robjects.r['sum']
rsum(robjects.IntVector([1,2,3]))[0]
import rpy2.robjects as robjects
r = robjects.r
x = robjects.IntVector(range(10))
y = r.rnorm(10)
x
y
from rpy2.robjects import FloatVector
from rpy2.robjects.packages import importr
stats = importr('stats')
base = importr('base')
ctl = FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14])
trt = FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69])
group = base.gl(2, 10, 20, labels = ['Ctl','Trt'])
weight = ctl + trt
robjects.globalenv['weight'] = weight
robjects.globalenv['group'] = group
lm_D9 = stats.lm('weight ~ group')
print(stats.anova(lm_D9))
# omitting the intercept
lm_D90 = stats.lm('weight ~ group - 1')
print(base.summary(lm_D90))
preprocess.R)## preprocess.R
dataR = data.frame( Ingresos = c(23,45,24,23,54),
Bonos = c(23,45,12,67,54),
Deuda = c(23,4,1,6,3),
row.names = c("Nathy", "Tomas", "Joe", "Emily", "Javi") )
promedio_ingresos = mean(dataR$Ingresos)
Max_Ing = sort(dataR$Ingresos[dataR$Ingresos>promedio_ingresos])
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
r = robjects.r
output = r.source("preprocess.R")
output[0]
output[1]