R: Coldbir vs PostgreSQL vs MonetDB

library(hflights)
library(MonetDB.R)
## Loading required package: DBI
## Loading required package: digest

# Connect to databases

## PostgreSQL
library(RPostgreSQL)
psql <- dbConnect(PostgreSQL(), user = "postgres", password = "postgres")
# dbWriteTable(psql, 'hflights', hflights)

# MonetDB
mdb <- dbConnect(dbDriver("MonetDB"), "monetdb://localhost/voc")
# dbWriteTable(mdb, 'hflights', hflights)

# Coldbir
library(Coldbir)
## Loading required package: data.table
## Loading required package: RJSONIO
## Loading required package: stringr
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:data.table':
## 
##     hour, mday, month, quarter, wday, week, yday, year
## 
## Loading required package: tools
## Loading required package: gtools
a <- cdb(tempfile())
a[] <- hflights

Tests

Mean on only one column


system.time({
    x <- dbGetQuery(psql, "SELECT AVG(\"FlightNum\") FROM hflights")[[1]]
})
##    user  system elapsed 
##   0.010   0.000   0.153

system.time({
    x <- dbGetQuery(mdb, "SELECT AVG(FlightNum) FROM hflights")[[1]]
})
##    user  system elapsed 
##   0.005   0.001   0.016

system.time({
    x <- mean(a["FlightNum"][[1]])
})
##    user  system elapsed 
##   0.008   0.001   0.009

Group by


### PostgreSQL
system.time({
    x <- dbGetQuery(psql, "SELECT \"FlightNum\", \"DayOfWeek\", COUNT(*) FROM hflights GROUP BY \"FlightNum\", \"DayOfWeek\"")
})
##    user  system elapsed 
##   0.014   0.002   0.887

### MonetDB
system.time({
    x <- dbGetQuery(mdb, "SELECT FlightNum, DayOfWeek, COUNT(*) FROM hflights GROUP BY FlightNum, DayOfWeek")
})
##    user  system elapsed 
##   0.026   0.003   0.124

### Coldbir
system.time({
    x <- a[c("FlightNum", "DayOfWeek")][, .N, by = list(FlightNum, DayOfWeek)]
})
##    user  system elapsed 
##   0.044   0.005   0.049

Where clause


### PostgreSQL
system.time({
    x <- dbGetQuery(psql, "SELECT \"FlightNum\", \"DayOfWeek\", COUNT(*) FROM hflights WHERE \"Distance\" >= 500 AND \"DayofMonth\" > 10 GROUP BY \"FlightNum\", \"DayOfWeek\"")
})
##    user  system elapsed 
##   0.014   0.002   0.294

### MonetDB
system.time({
    x <- dbGetQuery(mdb, "SELECT FlightNum, DayOfWeek, COUNT(*) FROM hflights WHERE Distance >= 500 AND DayofMonth > 10 GROUP BY FlightNum, DayOfWeek")
})
##    user  system elapsed 
##   0.019   0.001   0.069

### Coldbir
system.time({
    x <- a[c("FlightNum", "DayOfWeek", "DayofMonth", "Distance")][Distance >= 
        500 & DayofMonth > 10, .N, by = list(FlightNum, DayOfWeek)]
})
##    user  system elapsed 
##    0.06    0.01    0.07