R: Coldbir vs PostgreSQL vs MonetDB
library(hflights)
library(MonetDB.R)
## Loading required package: DBI
## Loading required package: digest
# Connect to databases
## PostgreSQL
library(RPostgreSQL)
psql <- dbConnect(PostgreSQL(), user = "postgres", password = "postgres")
# dbWriteTable(psql, 'hflights', hflights)
# MonetDB
mdb <- dbConnect(dbDriver("MonetDB"), "monetdb://localhost/voc")
# dbWriteTable(mdb, 'hflights', hflights)
# Coldbir
library(Coldbir)
## Loading required package: data.table
## Loading required package: RJSONIO
## Loading required package: stringr
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:data.table':
##
## hour, mday, month, quarter, wday, week, yday, year
##
## Loading required package: tools
## Loading required package: gtools
a <- cdb(tempfile())
a[] <- hflights
Tests
Mean on only one column
system.time({
x <- dbGetQuery(psql, "SELECT AVG(\"FlightNum\") FROM hflights")[[1]]
})
## user system elapsed
## 0.010 0.000 0.153
system.time({
x <- dbGetQuery(mdb, "SELECT AVG(FlightNum) FROM hflights")[[1]]
})
## user system elapsed
## 0.005 0.001 0.016
system.time({
x <- mean(a["FlightNum"][[1]])
})
## user system elapsed
## 0.008 0.001 0.009
Group by
### PostgreSQL
system.time({
x <- dbGetQuery(psql, "SELECT \"FlightNum\", \"DayOfWeek\", COUNT(*) FROM hflights GROUP BY \"FlightNum\", \"DayOfWeek\"")
})
## user system elapsed
## 0.014 0.002 0.887
### MonetDB
system.time({
x <- dbGetQuery(mdb, "SELECT FlightNum, DayOfWeek, COUNT(*) FROM hflights GROUP BY FlightNum, DayOfWeek")
})
## user system elapsed
## 0.026 0.003 0.124
### Coldbir
system.time({
x <- a[c("FlightNum", "DayOfWeek")][, .N, by = list(FlightNum, DayOfWeek)]
})
## user system elapsed
## 0.044 0.005 0.049
Where clause
### PostgreSQL
system.time({
x <- dbGetQuery(psql, "SELECT \"FlightNum\", \"DayOfWeek\", COUNT(*) FROM hflights WHERE \"Distance\" >= 500 AND \"DayofMonth\" > 10 GROUP BY \"FlightNum\", \"DayOfWeek\"")
})
## user system elapsed
## 0.014 0.002 0.294
### MonetDB
system.time({
x <- dbGetQuery(mdb, "SELECT FlightNum, DayOfWeek, COUNT(*) FROM hflights WHERE Distance >= 500 AND DayofMonth > 10 GROUP BY FlightNum, DayOfWeek")
})
## user system elapsed
## 0.019 0.001 0.069
### Coldbir
system.time({
x <- a[c("FlightNum", "DayOfWeek", "DayofMonth", "Distance")][Distance >=
500 & DayofMonth > 10, .N, by = list(FlightNum, DayOfWeek)]
})
## user system elapsed
## 0.06 0.01 0.07