Dr. Markus Schmidberger
November 4th, 2013 Munich, Germany
Email: markus@mongosoup.de
Twitter: @cloudHPC
{'first':'martin', 'last':'fowler',
'zip_id': {'city':'munich', 'zip':80883},
'card_id':2334}
{'first':'pramod', 'last':'sadalage',
'zip_id':'Munich, Lindwurmstr. 97',
'card_id':1333}
{'first':'jez', 'surname':'humble',
'zip_id': {'zip':94104, 'city':'Munich'},
'card_id':[2334, 6534]}
data(iris)
head(iris, n=3)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
class(iris)
[1] "data.frame"
running SQL statements on R data frames
library(sqldf)
sqldf("select * from iris limit 2")
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
sqldf("select count(*) from iris")
count(*)
1 150
on CRAN there are two packages to connect R with MongoDB
library(RMongo)
Sys.setenv(NOAWT=1) # for MAC OS X
mongo <- mongoDbConnect("cc_JwQcDLJSYQJb", "dbs001.mongosoup.de", 27017)
dbAuthenticate(mongo, username="JwQcDLJSYQJb", password="RSXPkUkXXXXX")
dbShowCollections(mongo)
[1] "zips" "ccp" "system.users" "system.indexes"
[5] "test" "test_data"
dbGetQuery(mongo, "zips","{'state':'AL'}", skip=0, limit=3)
X_id state loc pop city
1 35004 AL [ -86.51557 , 33.584132] 6055 ACMAR
2 35005 AL [ -86.959727 , 33.588437] 10616 ADAMSVILLE
3 35006 AL [ -87.167455 , 33.434277] 3205 ADGER
dbInsertDocument(mongo, "test_data", '{"foo": "bar", "size": 5 }')
[1] "ok"
dbDisconnect(mongo)
library(rmongodb)
mongo <- mongo.create(host="dbs001.mongosoup.de", db="cc_JwQcDLJSYQJb", username="JwQcDLJSYQJb", password="RSXPkUkXXXXX")
mongo
[1] 0
attr(,"mongo")
<pointer: 0x105d2f940>
attr(,"class")
[1] "mongo"
attr(,"host")
[1] "dbs001.mongosoup.de"
attr(,"name")
[1] ""
attr(,"username")
[1] "JwQcDLJSYQJb"
attr(,"password")
[1] "RSXPkUkxRdOX"
attr(,"db")
[1] "cc_JwQcDLJSYQJb"
attr(,"timeout")
[1] 0
mongo.get.database.collections(mongo, "cc_JwQcDLJSYQJb")
[1] "cc_JwQcDLJSYQJb.zips" "cc_JwQcDLJSYQJb.ccp"
[3] "cc_JwQcDLJSYQJb.test" "cc_JwQcDLJSYQJb.test_data"
mongo <- mongo.disconnect(mongo)
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, "state", "AL")
[1] TRUE
query <- mongo.bson.from.buffer(buf)
query
state : 2 AL
res <- mongo.find.one(mongo, "cc_JwQcDLJSYQJb.zips", query)
res
city : 2 ACMAR
loc : 4
0 : 1 -86.515570
1 : 1 33.584132
pop : 16 6055
state : 2 AL
_id : 2 35004
out <- mongo.bson.to.list(res)
out
$city
[1] "ACMAR"
$loc
[1] -86.52 33.58
$pop
[1] 6055
$state
[1] "AL"
$`_id`
[1] "35004"
res <- mongo.find.one(mongo, "cc_JwQcDLJSYQJb.zips", mongo.bson.from.JSON('{"state":"AL"}'))
res
city : 2 ACMAR
loc : 4
0 : 1 -86.515570
1 : 1 33.584132
pop : 16 6055
state : 2 AL
_id : 2 35004
cursor <- mongo.find(mongo, "cc_JwQcDLJSYQJb.zips", query)
res <- NULL
while (mongo.cursor.next(cursor)){
value <- mongo.cursor.value(cursor)
Rvalue <- mongo.bson.to.list(value)
res <- rbind(res, Rvalue)
}
err <- mongo.cursor.destroy(cursor)
head(res, n=4)
city loc pop state _id
Rvalue "ACMAR" Numeric,2 6055 "AL" "35004"
Rvalue "ADAMSVILLE" Numeric,2 10616 "AL" "35005"
Rvalue "ADGER" Numeric,2 3205 "AL" "35006"
Rvalue "KEYSTONE" Numeric,2 14218 "AL" "35007"
res <- mongo.find.batch(mongo, "cc_JwQcDLJSYQJb.zips", mongo.bson.from.JSON('{"state":"AL"}'))
head(res, n=4)
city loc pop state _id
val "ACMAR" Numeric,2 6055 "AL" "35004"
val "ADAMSVILLE" Numeric,2 10616 "AL" "35005"
val "ADGER" Numeric,2 3205 "AL" "35006"
val "KEYSTONE" Numeric,2 14218 "AL" "35007"
b <- mongo.bson.from.list(
list(name="Fred", age=29, city="Boston"))
b
name : 2 Fred
age : 1 29.000000
city : 2 Boston
mongo.bson.to.list(b)
$name
[1] "Fred"
$age
[1] 29
$city
[1] "Boston"
?mongo.bson
?mongo.bson.buffer.append
?mongo.bson.buffer.start.array
?mongo.bson.buffer.start.object
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, "aggregate", "zips")
mongo.bson.buffer.start.array(buf, "pipeline")
mongo.bson.buffer.start.object(buf, "$group")
mongo.bson.buffer.append(buf, "_id", "$state")
mongo.bson.buffer.start.object(buf, "totalPop")
mongo.bson.buffer.append(buf, "$sum", "$pop")
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.start.object(buf, "$match")
mongo.bson.buffer.start.object(buf, "totalPop")
mongo.bson.buffer.append(buf, "$gte", "10000")
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
query <- mongo.bson.from.buffer(buf)
buf <- mongo.bson.buffer.create()
query <- mongo.bson.from.buffer(buf)
buf <- mongo.bson.buffer.create()
err <- mongo.bson.buffer.append(buf, "user", 1)
err <- mongo.bson.buffer.append(buf, "type", 1)
field <- mongo.bson.from.buffer(buf)
out <- mongo.find(mongo, "cc_JwQcDLJSYQJb.ccp", query, fields=field, limit=1000)
res <- NULL
while (mongo.cursor.next(out)){
value <- mongo.cursor.value(out)
Rvalue <- mongo.bson.to.list(value)
res <- rbind(res, Rvalue)
}
res <- mongo.find.batch(mongo, "cc_JwQcDLJSYQJb.ccp", mongo.bson.empty(), fields=mongo.bson.from.JSON('{"user":1, "type":1}'), limit=1000)
boxplot( as.integer(table(unlist(res[,2])) ), cex=4, horizontal=TRUE, main="Number of actions per user")
mongo.get.keys(mongo, "cc_JwQcDLJSYQJb.ccp") # implemented
mongo.apply(mongo, "test.people", 1, keys="age", mean)
mongo.summary(mongo, "test.people") # implemented
mongo.aggregate( ... )
mongo.table( ... )
What functionality is required in R DB packages?
What were your biggest issues with R DB packages?
Email: markus@mongosoup.de
Twitter: @cloudHPC