library(Rcpp)
library(microbenchmark)
## Warning: package 'microbenchmark' was built under R version 4.0.3
x <- rnorm(50000)
# Define the function sum_loop
sum_loop <- function(x) {
result <- 0
for(i in x){
result <- result + i
}
result
}
# Check for equality
all.equal(sum_loop(x), sum(x))
## [1] TRUE
# Compare the performance
microbenchmark(sum_loop = sum_loop(x), R_sum = sum(x))
## Unit: microseconds
## expr min lq mean median uq max neval
## sum_loop 1077.300 1126.651 1802.74105 1225.302 1529.3005 12858.501 100
## R_sum 44.701 46.102 64.12512 53.651 68.7515 147.901 100
# Evaluate 2 + 2 in C++
x <- evalCpp("2+2")
# Evaluate 2 + 2 in R
y <- 2 + 2
# Storage modes of x and y
storage.mode(x)
## [1] "integer"
storage.mode(y)
## [1] "double"
# Change the C++ expression so that it returns a double
z <- evalCpp("(double)2 + 2")
# Evaluate 17 / 2 in C++
evalCpp("17/2")
## [1] 8
# Cast 17 to a double and divide by 2
evalCpp("(double)17 / 2")
## [1] 8.5
# Cast 56.3 to an int
evalCpp("(int)56.3")
## [1] 56
In between the C++ code and the R function there are many layers of engineering For c++ function we must declare the type of each argument and the objects it returns.
# Save temporary C++ file
cppFunction(" int fun(){
int x = 37;
return x;
}")
fun()
## [1] 37
cppFunction("
double add( double x, double y){
double res = x + y;
return res;
}
")
addr <- function(x, y){
res <- x + y
res
}
microbenchmark(cpp = add(20,25), R_sum = addr(20 , 25))
## Unit: nanoseconds
## expr min lq mean median uq max neval
## cpp 1101 1201 11574.95 1301 1600.5 976900 100
## R_sum 300 401 14061.02 401 501.0 1352202 100
# Define the function euclidean_distance()
cppFunction('
double euclidean_distance(double x, double y) {
return sqrt(x*x + y*y) ;
}
')
# Calculate the euclidean distance
euclidean_distance(1.5, 2.5)
## [1] 2.915476
cppFunction('
// adds x and y, but only if they are positive
int add_positive_numbers(int x, int y) {
// if x is negative, stop
if(x < 0) stop("x is negative") ;
// if y is negative, stop
if(y < 0) stop("y is negative") ;
return x + y ;
}
')
# Call the function with positive numbers
add_positive_numbers(2, 3)
## [1] 5
code.cpp
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
int timeTwo( int x ){
return 2*x
}
library(Rcpp)
sourceCpp("code.cpp")
timesTwo( 21 )
In this case only universal()
function is exported
#include <Rcpp.h>
using namespace Rcpp;
int twice( int x ){
return 2*x
}
// {{Rcpp::export}}
int universal(){
return twice(21)
}
#include <Rcpp.h>
using namespace Rcpp;
int twice( int x ){
return 2*x
}
// {{Rcpp::export}}
int universal(){
return twice(21)
}
/*** R
# This is R code
12 + 30
universal()
*/
// Include the Rcpp.h header
#include <Rcpp.h>
// Use the Rcpp namespace
using namespace Rcpp;
// [[Rcpp::export]]
int the_answer() {
// Return 42
return 42;
}
/*** R
# Call the_answer() to check you get the right result
the_answer()
*/
#include <Rcpp.h>
using namespace Rcpp;
// Make square() accept and return a double
double square(double x) {
// Return x times x
return x*x ;
}
// [[Rcpp::export]]
double dist(double x, double y) {
// Change this to use square()
return sqrt(x + y);
}
/*** R
# Call dist() to the point (3, 4)
dist(3, 4)
# Close the Rcpp R comment block
*/
#include <Rcpp.h>
using namespace Rcpp ;
// [[Rcpp::export]]
double absolute(double x) {
// Test for x greater than zero
if(x > 0) {
// Return x
return x ;
// Otherwise
} else {
// Return negative x
return -x;
}
}
/*** R
absolute(pi)
absolute(-3)
*/
Babilonian method: \[ x_{n+1} = \frac{1}{2}( x_n + \frac{S}{x_{n}} ) \]
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
double sqrt_approx(double value, int n) {
// Initialize x to be one
double x = 1;
// Specify the for loop
for(int i = 0; i<n; i++) {
x = (x + value / x) / 2.0;
}
return x;
}
/*** R
sqrt_approx(2, 10)
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
List sqrt_approx(double value, int n, double threshold) {
double x = 1.0;
double previous = x;
bool is_good_enough = false;
int i;
for(i = 0; i < n; i++) {
previous = x;
x = (x + value / x) / 2.0;
is_good_enough = fabs(previous - x) < threshold;
// If the solution is good enough, then "break"
if(is_good_enough) break;
}
return List::create(_["i"] = i , _["x"] = x);
}
/*** R
sqrt_approx(2, 1000, 0.1)
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
double first_plus_last(NumericVector x) {
// The size of x
int n = x.size();
// The first element of x
double first = x[0];
// The last element of x
double last = x[n - 1];
return first + last;
}
/*** R
x <- c(6, 28, 496, 8128)
first_plus_last(x)
# Does the function give the same answer as R?
all.equal(first_plus_last(x), x[1] + x[4])
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
double sum_cpp(NumericVector x) {
// The size of x
int n = x.size();
// Initialize the result
double result = 0;
// Complete the loop specification
for(int i = 0; i<n; i++) {
// Add the next value
result = result + x[i];
}
return result;
}
/*** R
set.seed(42)
x <- rnorm(1e6)
sum_cpp(x)
# Does the function give the same answer as R's sum() function?
all.equal(sum_cpp(x), sum(x))
*/
#include <Rcpp.h>
using namespace Rcpp;
// Set the return type to IntegerVector
// [[Rcpp::export]]
IntegerVector seq_cpp(int lo, int hi) {
int n = hi - lo + 1;
// Create a new integer vector, sequence, of size n
IntegerVector sequence(n);
for(int i = 0; i < n; i++) {
// Set the ith element of sequence to lo plus i
sequence[i] = lo + i;
}
return sequence;
}
/*** R
lo <- -2
hi <- 5
seq_cpp(lo, hi)
# Does it give the same answer as R's seq() function?
all.equal(seq_cpp(lo, hi), seq(lo, hi))
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
List create_vectors() {
// Create an unnamed character vector
CharacterVector polygons = CharacterVector::create("triangle", "square", "pentagon");
// Create a named integer vector
IntegerVector mersenne_primes = IntegerVector::create(_["first"] = 3, _["second"] = 7, _["third"] = 31);
// Create a named list
List both = List::create(_["polygons"] = polygons, _["mersenne_primes"] = mersenne_primes);
return both;
}
/*** R
create_vectors()
*/
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
double weighted_mean_cpp(NumericVector x, NumericVector w) {
double total_w = 0;
double total_xw = 0;
int n = x.size();
for(int i = 0; i < n; i++) {
// If the ith element of x or w is NA then return NA
if(NumericVector::is_na(x[i]) || NumericVector::is_na(w[i])) {
return NumericVector::get_na();
}
total_w += w[i];
total_xw += x[i] * w[i];
}
return total_xw / total_w;
}
/*** R
x <- c(0, 1, 3, 6, 2, 7, 13, NA, 12, 21, 11)
w <- 1 / seq_along(x)
weighted_mean_cpp(x, w)
*/
*/
More efficient than Rcpp. Handles memory without making copies.
The standard template library (stl) is a C++ library containing flexible algorithms and data structures. For example, the double vector from the stl is like a “native C++” equivalent of Rcpp’s NumericVector. the following code creates a standard double vector named x with ten elements.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector positive_rnorm(int n, double mean, double sd) {
// Specify out as a numeric vector of size n
NumericVector out(n);
// This loops over the elements of out
for(int i = 0; i < n; i++) {
// This loop keeps trying to generate a value
do {
// Call R's rnorm()
out[i] = R::rnorm(mean, sd);
// While the number is negative, keep trying
} while(out[i] <= 0);
}
return out;
}
/*** R
positive_rnorm(10, 2, 2)
*/
#include <Rcpp.h>
using namespace Rcpp;
// From previous exercise; do not modify
// [[Rcpp::export]]
int choose_component(NumericVector weights, double total_weight) {
double x = R::runif(0, total_weight);
int j = 0;
while(x >= weights[j]) {
x -= weights[j];
j++;
}
return j;
}
// [[Rcpp::export]]
NumericVector rmix(int n, NumericVector weights, NumericVector means, NumericVector sds) {
// Check that weights and means have the same size
int d = weights.size();
if(means.size() != d) {
stop("means size != weights size");
}
// Do the same for the weights and std devs
if(sds.size() != d) {
stop("sds size != weights size");
}
// Calculate the total weight
double total_weight = sum(weights);
// Create the output vector
NumericVector res(n);
// Fill the vector
for(int i = 0; i < n; i++) {
// Choose a component
int j = choose_component(weights, total_weight);
// Simulate from the chosen component
res[i] = R::rnorm(means[j], sds[j]);
}
return res;
}
/*** R
weights <- c(0.3, 0.7)
means <- c(2, 4)
sds <- c(2, 4)
rmix(10, weights, means, sds)
*/
rollmean1 <- function(x, window = 3) {
n <- length(x)
res <- rep(NA, n)
for(i in seq(window, n)) {
res[i] <- mean(x[seq(i - window + 1, i)])
}
res
}
rollmean2 <- function(x, window = 3){
n <- length(x)
res <- rep(NA, n)
total <- sum(head(x, window))
res[window] <- total / window
for(i in seq(window + 1, n)) {
total <- total + x[i] - x[i - window]
res[i] <- total / window
}
res
}
# Complete the definition of rollmean3()
rollmean3 <- function(x, window = 3) {
# Add the first window elements of x
initial_total <- sum(head(x, window))
# The elements to add at each iteration
lasts <- tail(x, - window)
# The elements to remove
firsts <- head(x, - window)
# Take the initial total and add the
# cumulative sum of lasts minus firsts
other_totals <- initial_total + cumsum(lasts - firsts)
# Build the output vector
c(
rep(NA, window - 1), # leading NA
initial_total / window, # initial mean
other_totals / window # other means
)
}
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector rollmean4(NumericVector x, int window) {
int n = x.size();
// Set res as a NumericVector of NAs with length n
NumericVector res(n, NumericVector::get_na());
// Sum the first window worth of values of x
double total = 0.0;
for(int i = 0; i < window; i++) {
total += x[i];
}
// Treat the first case seperately
res[window - 1] = total / window;
// Iteratively update the total and recalculate the mean
for(int i = window; i < n; i++) {
// Remove the (i - window)th case, and add the ith case
total += - x[i - window] + x[i];
// Calculate the mean at the ith position
res[i] = total / window;
}
return res;
}
/*** R
# Compare rollmean2, rollmean3 and rollmean4
set.seed(42)
x <- rnorm(10000)
microbenchmark(
rollmean2(x, 4),
rollmean3(x, 4),
rollmean4(x, 4),
times = 5
)
*/
Comments in functions