Load RevoScaleR

library(RevoScaleR)

Verify the commpute context

rxSetComputeContext("local")

Define a connection String

connectionString <- 
  "Driver={SQL Server};Server=DESKTOP-F87VOKM;Database=AdventureWorks2016CTP3;Trusted_Connection=true"

Create a query

squery <-"SELECT SalesOrderID, Subtotal FROM Sales.SalesOrderHeader"

Create a datasource

sDataSet <- RxSqlServerData(sqlQuery=squery, connectionString=connectionString)

str(sDataSet)
## Formal class 'RxSqlServerData' [package "RevoScaleR"] with 23 slots
##   ..@ inSqlServer          : logi(0) 
##   ..@ computeSqlQueryOnly  : logi(0) 
##   ..@ table                : NULL
##   ..@ sqlQuery             : chr "SELECT SalesOrderID, Subtotal FROM Sales.SalesOrderHeader"
##   ..@ useFastRead          : logi TRUE
##   ..@ trimSpace            : logi TRUE
##   ..@ server               : NULL
##   ..@ dbmsName             : NULL
##   ..@ databaseName         : NULL
##   ..@ dsn                  : NULL
##   ..@ user                 : NULL
##   ..@ password             : NULL
##   ..@ connectionString     : chr "Driver={SQL Server};Server=DESKTOP-F87VOKM;Database=AdventureWorks2016CTP3;Trusted_Connection=true"
##   ..@ rowBuffering         : logi TRUE
##   ..@ writeFactorsAsIndexes: logi FALSE
##   ..@ isolationLevel       : NULL
##   ..@ id                   :<externalptr> 
##   ..@ colClasses           : NULL
##   ..@ colInfo              : NULL
##   ..@ returnDataFrame      : logi TRUE
##   ..@ stringsAsFactors     : logi FALSE
##   ..@ rowsOrBlocksPerRead  : int 50000
##   ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' <CompatibilityRequest>
##   Public:
##     assertServerCapability: function (capability, notSupported, notKnown) 
##     clone: function (deep = FALSE) 
##     deferredAssertServerCapability: function (capability, notSupported, notKnown) 
##     getRequestedCapabilities: function () 
##     initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) 
##     merge: function (request) 
##     requestCapability: function (capability) 
##     runDeferredAssertions: function (server) 
##     serialize: function (file) 
##   Private:
##     deferredRequests: list
##     notKnown: function (server, capability, warningMessage) 
##     notSupported: function (server, capability, errorMessage) 
##     requestedCapabilities: 
##     runCallback: function (type, server, capability, userHandler) 
##     server: ServerDefinition, AbstractServerDefinition

Designate a destination file.

sDataFile <- "D:/Dropbox/RProjects/test.xdf"

str(sDataFile)
##  chr "D:/Dropbox/RProjects/test.xdf"

Import the data as an xdf file.

t_xdf=rxImport(sDataSet,outFile=sDataFile,overwrite = TRUE)
## Rows Read: 31465, Total Rows Processed: 31465, Total Chunk Time: 0.040 seconds
str(t_xdf)
## Formal class 'RxXdfData' [package "RevoScaleR"] with 19 slots
##   ..@ fileSystem            :List of 1
##   .. ..$ fileSystemType: chr "native"
##   .. ..- attr(*, "class")= chr [1:2] "RxNativeFileSystem" "RxFileSystem"
##   ..@ createCompositeSet    : NULL
##   ..@ createPartitionSet    : NULL
##   ..@ blocksPerCompositeFile: int 3
##   ..@ readByBlock           : logi TRUE
##   ..@ xdfUuid               : chr "51C846DCCA69435FAA39A1A737AECCC9"
##   ..@ cache                 : logi FALSE
##   ..@ dfName                : chr "df-1141F876E0434011B5344116137580B2"
##   ..@ dfType                : chr "xdf"
##   ..@ dfSource              : chr "D:/Dropbox/RProjects/test.xdf"
##   ..@ file                  : chr "D:/Dropbox/RProjects/test.xdf"
##   ..@ colNames              : chr ""
##   ..@ id                    :<externalptr> 
##   ..@ colClasses            : NULL
##   ..@ colInfo               : NULL
##   ..@ returnDataFrame       : logi TRUE
##   ..@ stringsAsFactors      : logi FALSE
##   ..@ rowsOrBlocksPerRead   : int 1
##   ..@ compatibilityRequest  :Classes 'CompatibilityRequest', 'R6' <CompatibilityRequest>
##   Public:
##     assertServerCapability: function (capability, notSupported, notKnown) 
##     clone: function (deep = FALSE) 
##     deferredAssertServerCapability: function (capability, notSupported, notKnown) 
##     getRequestedCapabilities: function () 
##     initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown) 
##     merge: function (request) 
##     requestCapability: function (capability) 
##     runDeferredAssertions: function (server) 
##     serialize: function (file) 
##   Private:
##     deferredRequests: list
##     notKnown: function (server, capability, warningMessage) 
##     notSupported: function (server, capability, errorMessage) 
##     requestedCapabilities: 
##     runCallback: function (type, server, capability, userHandler) 
##     server: ServerDefinition, AbstractServerDefinition

Now get it as a dataframe.

t_df=rxImport(sDataSet)
## Rows Read: 31465, Total Rows Processed: 31465, Total Chunk Time: 0.029 seconds
str(t_df)
## 'data.frame':    31465 obs. of  2 variables:
##  $ SalesOrderID: int  43659 43660 43661 43662 43663 43664 43665 43666 43667 43668 ...
##  $ Subtotal    : num  20566 1294 32726 28833 419 ...

Verify the compute context.

rxGetComputeContext()
## RxLocalSeq Compute Context