RevoScale R - Data Import부분 동영상 예제 파일


# Data를 불러들일 디렉토리 폴더 지정

unitTestDataDir <- rxGetOption("unitTestDataDir")

# Input과 output파일의 이름과 폴더 지정(csv파일)

inputFile <- file.path(unitTestDataDir, "AirlineSampleDate.csv")
outputFile <- file.path(unitTestDataDir, "AirlineSampleDate1.xdf")

rxImport(inData = inputFile, outFile = outputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.015 seconds

# Input과 output파일의 이름과 폴더 지정(SAS파일)

inputFile <- file.path(unitTestDataDir, "AirlineSampleDate.sas7bdat")
outputFile <- file.path(unitTestDataDir, "AirlineSampleDate_SAS.xdf")

rxImport(inData = inputFile, outFile = outputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.020 seconds

# outfile을 따로 지정하지 않고, Data Import하기 outFile에 .xdf를 지정

Air1 <- rxImport(inData = inputFile, outFile = "Airline.xdf", overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds
class(Air1)
## [1] "RxXdfData"
## attr(,"package")
## [1] "RevoScaleR"
head(Air1)
##   ArrDelay CRSDepTime UniqueCarrier Origin Dest       Date
## 1       -3         11            WN    STL  TUL 1996-08-18
## 2      -16         15            XE    VPS  IAH 2006-06-03
## 3       -9         12            UA    DEN  ORD 1989-02-16
## 4      -16          7            NW    LAX  MEM 2006-10-13
## 5        5          6            US    ORD  CLT 2004-11-05
## 6       20          7            CO    SNA  DEN 1990-01-18

# outFile에 문자이름만 지정

Air2 <- rxImport(inData = inputFile, outFile = "Airline1", overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds
class(Air2)
## [1] "RxXdfData"
## attr(,"package")
## [1] "RevoScaleR"

# outFile을 NULL로 지정(데이터프레임으로 반환됨)
Air3 <- rxImport(inData = inputFile, overwrite = TRUE)
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.018 seconds
class(Air3)
## [1] "data.frame"
head(Air3)
##   ArrDelay CRSDepTime UniqueCarrier Origin Dest       Date
## 1       -3         11            WN    STL  TUL 1996-08-18
## 2      -16         15            XE    VPS  IAH 2006-06-03
## 3       -9         12            UA    DEN  ORD 1989-02-16
## 4      -16          7            NW    LAX  MEM 2006-10-13
## 5        5          6            US    ORD  CLT 2004-11-05
## 6       20          7            CO    SNA  DEN 1990-01-18


# 변수의 속성이나 이름 변환하면서 데이터 불러오기

rxGetVarInfo(Air1)
## Var 1: ArrDelay, Type: character
## Var 2: CRSDepTime, Type: numeric, Low/High: (0.0000, 23.0000)
## Var 3: UniqueCarrier, Type: character
## Var 4: Origin, Type: character
## Var 5: Dest, Type: character
## Var 6: Date, Type: character
colInfo = list(ArrDelay = list(type = "integer", newName = "arrdelay"))

Air4 <- rxImport(inData = inputFile, outFile = "Air.xdf", overwrite = TRUE, 
    colInfo = colInfo, rowSelection = c(CRSDepTime > 10))
## Rows Read: 500, Total Rows Processed: 500, Total Chunk Time: 0.019 seconds

rxReadXdf(Air4, numRows = 10)
## 
Rows Processed: 10
## Time to read data file: less than .001 secs.
## Time to convert to data frame: less than .001 secs.
##    arrdelay CRSDepTime UniqueCarrier Origin Dest       Date
## 1        -3         11            WN    STL  TUL 1996-08-18
## 2       -16         15            XE    VPS  IAH 2006-06-03
## 3        -9         12            UA    DEN  ORD 1989-02-16
## 4         6         13            WN    RDU  MCO 2004-06-08
## 5         2         20            MQ    LAX  SAN 2006-08-13
## 6        10         15            DL    DCA  JFK 2000-10-25
## 7         4         21            WN    LAS  ABQ 1997-04-06
## 8        -3         18            NW    DTW  GRB 2003-07-26
## 9        11         16            AA    SJU  MIA 1992-10-29
## 10       13         13            US    PHX  PHL 2000-05-28
rxGetVarInfo(Air4)
## Var 1: arrdelay, Type: integer, Low/High: (-36, 239)
## Var 2: CRSDepTime, Type: numeric, Low/High: (11.0000, 23.0000)
## Var 3: UniqueCarrier, Type: character
## Var 4: Origin, Type: character
## Var 5: Dest, Type: character
## Var 6: Date, Type: character