RevoScale R - Data Import부분 동영상 예제 파일
# Data를 불러들일 디렉토리 폴더 지정
unitTestDataDir <- rxGetOption("unitTestDataDir")
# Input과 output파일의 이름과 폴더 지정(csv파일)
inputFile <- file.path(unitTestDataDir, "AirlineSampleDate.csv")
outputFile <- file.path(unitTestDataDir, "AirlineSampleDate1.xdf")
rxImport(inData = inputFile, outFile = outputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.015 seconds
# Input과 output파일의 이름과 폴더 지정(SAS파일)
inputFile <- file.path(unitTestDataDir, "AirlineSampleDate.sas7bdat")
outputFile <- file.path(unitTestDataDir, "AirlineSampleDate_SAS.xdf")
rxImport(inData = inputFile, outFile = outputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.020 seconds
# outfile을 따로 지정하지 않고, Data Import하기 outFile에 .xdf를 지정
Air1 <- rxImport(inData = inputFile, outFile = "Airline.xdf", overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds
class(Air1)
## [1] "RxXdfData"
## attr(,"package")
## [1] "RevoScaleR"
head(Air1)
## ArrDelay CRSDepTime UniqueCarrier Origin Dest Date
## 1 -3 11 WN STL TUL 1996-08-18
## 2 -16 15 XE VPS IAH 2006-06-03
## 3 -9 12 UA DEN ORD 1989-02-16
## 4 -16 7 NW LAX MEM 2006-10-13
## 5 5 6 US ORD CLT 2004-11-05
## 6 20 7 CO SNA DEN 1990-01-18
# outFile에 문자이름만 지정
Air2 <- rxImport(inData = inputFile, outFile = "Airline1", overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds
class(Air2)
## [1] "RxXdfData"
## attr(,"package")
## [1] "RevoScaleR"
# outFile을 NULL로 지정(데이터프레임으로 반환됨)
Air3 <- rxImport(inData = inputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.018 seconds
class(Air3)
## [1] "data.frame"
head(Air3)
## ArrDelay CRSDepTime UniqueCarrier Origin Dest Date
## 1 -3 11 WN STL TUL 1996-08-18
## 2 -16 15 XE VPS IAH 2006-06-03
## 3 -9 12 UA DEN ORD 1989-02-16
## 4 -16 7 NW LAX MEM 2006-10-13
## 5 5 6 US ORD CLT 2004-11-05
## 6 20 7 CO SNA DEN 1990-01-18
# 변수의 속성이나 이름 변환하면서 데이터 불러오기
rxGetVarInfo(Air1)
## Var 1: ArrDelay, Type: character
## Var 2: CRSDepTime, Type: numeric, Low/High: (0.0000, 23.0000)
## Var 3: UniqueCarrier, Type: character
## Var 4: Origin, Type: character
## Var 5: Dest, Type: character
## Var 6: Date, Type: character
colInfo = list(ArrDelay = list(type = "integer", newName = "arrdelay"))
Air4 <- rxImport(inData = inputFile, outFile = "Air.xdf", overwrite = TRUE,
colInfo = colInfo, rowSelection = c(CRSDepTime > 10))
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds
rxReadXdf(Air4, numRows = 10)
##
Rows Processed: 10
## Time to read data file: less than .001 secs.
## Time to convert to data frame: less than .001 secs.
## arrdelay CRSDepTime UniqueCarrier Origin Dest Date
## 1 -3 11 WN STL TUL 1996-08-18
## 2 -16 15 XE VPS IAH 2006-06-03
## 3 -9 12 UA DEN ORD 1989-02-16
## 4 6 13 WN RDU MCO 2004-06-08
## 5 2 20 MQ LAX SAN 2006-08-13
## 6 10 15 DL DCA JFK 2000-10-25
## 7 4 21 WN LAS ABQ 1997-04-06
## 8 -3 18 NW DTW GRB 2003-07-26
## 9 11 16 AA SJU MIA 1992-10-29
## 10 13 13 US PHX PHL 2000-05-28
rxGetVarInfo(Air4)
## Var 1: arrdelay, Type: integer, Low/High: (-36, 239)
## Var 2: CRSDepTime, Type: numeric, Low/High: (11.0000, 23.0000)
## Var 3: UniqueCarrier, Type: character
## Var 4: Origin, Type: character
## Var 5: Dest, Type: character
## Var 6: Date, Type: character