Load RevoScaleR
library(RevoScaleR)
Verify the commpute context
rxSetComputeContext("local")
Define a connection String
connectionString <-
"Driver={SQL Server};Server=DESKTOP-F87VOKM;Database=AdventureWorks2016CTP3;Trusted_Connection=true"
Create a query
squery <-"SELECT SalesOrderID, Subtotal FROM Sales.SalesOrderHeader"
Create a datasource
sDataSet <- RxSqlServerData(sqlQuery=squery, connectionString=connectionString)
str(sDataSet)
## Formal class 'RxSqlServerData' [package "RevoScaleR"] with 23 slots
## ..@ inSqlServer : logi(0)
## ..@ computeSqlQueryOnly : logi(0)
## ..@ table : NULL
## ..@ sqlQuery : chr "SELECT SalesOrderID, Subtotal FROM Sales.SalesOrderHeader"
## ..@ useFastRead : logi TRUE
## ..@ trimSpace : logi TRUE
## ..@ server : NULL
## ..@ dbmsName : NULL
## ..@ databaseName : NULL
## ..@ dsn : NULL
## ..@ user : NULL
## ..@ password : NULL
## ..@ connectionString : chr "Driver={SQL Server};Server=DESKTOP-F87VOKM;Database=AdventureWorks2016CTP3;Trusted_Connection=true"
## ..@ rowBuffering : logi TRUE
## ..@ writeFactorsAsIndexes: logi FALSE
## ..@ isolationLevel : NULL
## ..@ id :<externalptr>
## ..@ colClasses : NULL
## ..@ colInfo : NULL
## ..@ returnDataFrame : logi TRUE
## ..@ stringsAsFactors : logi FALSE
## ..@ rowsOrBlocksPerRead : int 50000
## ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' <CompatibilityRequest>
## Public:
## assertServerCapability: function (capability, notSupported, notKnown)
## clone: function (deep = FALSE)
## deferredAssertServerCapability: function (capability, notSupported, notKnown)
## getRequestedCapabilities: function ()
## initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown)
## merge: function (request)
## requestCapability: function (capability)
## runDeferredAssertions: function (server)
## serialize: function (file)
## Private:
## deferredRequests: list
## notKnown: function (server, capability, warningMessage)
## notSupported: function (server, capability, errorMessage)
## requestedCapabilities:
## runCallback: function (type, server, capability, userHandler)
## server: ServerDefinition, AbstractServerDefinition
Designate a destination file.
sDataFile <- "D:/Dropbox/RProjects/test.xdf"
str(sDataFile)
## chr "D:/Dropbox/RProjects/test.xdf"
Import the data as an xdf file.
t_xdf=rxImport(sDataSet,outFile=sDataFile,overwrite = TRUE)
## Rows Read: 31465, Total Rows Processed: 31465, Total Chunk Time: 0.040 seconds
str(t_xdf)
## Formal class 'RxXdfData' [package "RevoScaleR"] with 19 slots
## ..@ fileSystem :List of 1
## .. ..$ fileSystemType: chr "native"
## .. ..- attr(*, "class")= chr [1:2] "RxNativeFileSystem" "RxFileSystem"
## ..@ createCompositeSet : NULL
## ..@ createPartitionSet : NULL
## ..@ blocksPerCompositeFile: int 3
## ..@ readByBlock : logi TRUE
## ..@ xdfUuid : chr "51C846DCCA69435FAA39A1A737AECCC9"
## ..@ cache : logi FALSE
## ..@ dfName : chr "df-1141F876E0434011B5344116137580B2"
## ..@ dfType : chr "xdf"
## ..@ dfSource : chr "D:/Dropbox/RProjects/test.xdf"
## ..@ file : chr "D:/Dropbox/RProjects/test.xdf"
## ..@ colNames : chr ""
## ..@ id :<externalptr>
## ..@ colClasses : NULL
## ..@ colInfo : NULL
## ..@ returnDataFrame : logi TRUE
## ..@ stringsAsFactors : logi FALSE
## ..@ rowsOrBlocksPerRead : int 1
## ..@ compatibilityRequest :Classes 'CompatibilityRequest', 'R6' <CompatibilityRequest>
## Public:
## assertServerCapability: function (capability, notSupported, notKnown)
## clone: function (deep = FALSE)
## deferredAssertServerCapability: function (capability, notSupported, notKnown)
## getRequestedCapabilities: function ()
## initialize: function (server, notSupported = capabilityNotSupported, notKnown = serverNotKnown)
## merge: function (request)
## requestCapability: function (capability)
## runDeferredAssertions: function (server)
## serialize: function (file)
## Private:
## deferredRequests: list
## notKnown: function (server, capability, warningMessage)
## notSupported: function (server, capability, errorMessage)
## requestedCapabilities:
## runCallback: function (type, server, capability, userHandler)
## server: ServerDefinition, AbstractServerDefinition
Now get it as a dataframe.
t_df=rxImport(sDataSet)
## Rows Read: 31465, Total Rows Processed: 31465, Total Chunk Time: 0.029 seconds
str(t_df)
## 'data.frame': 31465 obs. of 2 variables:
## $ SalesOrderID: int 43659 43660 43661 43662 43663 43664 43665 43666 43667 43668 ...
## $ Subtotal : num 20566 1294 32726 28833 419 ...
Verify the compute context.
rxGetComputeContext()
## RxLocalSeq Compute Context