매우 빠른 읽기 쓰기 패키지
- 매우 빠른 읽기 쓰기 패키지가 R로 제공
- 심지어 처음부터 데이터의 일부만 읽어오기도 지원
- 전체를 읽어와서 일부만 추출하는게 아니라 처음부터 일부만 불러옴
- 멀티쓰레드를 지원해서 속도도 쓰레드 갯수로 증가 가느
# install.packages("devtools")
devtools::install_github("fstPackage/fst", ref = "develop")
## Skipping install of 'fst' from a github remote, the SHA1 (b96aa4b1) has not changed since last install.
## Use `force = TRUE` to force installation
library(fst)
Basic usage
# Generate some random data frame with 10 million rows and various column types
nr_of_rows <- 1e7
df <- data.frame(
Logical = sample(c(TRUE, FALSE, NA), prob = c(0.85, 0.1, 0.05), nr_of_rows, replace = TRUE),
Integer = sample(1L:100L, nr_of_rows, replace = TRUE),
Real = sample(sample(1:10000, 20) / 100, nr_of_rows, replace = TRUE),
Factor = as.factor(sample(labels(UScitiesD), nr_of_rows, replace = TRUE))
)
# Store the data frame to disk
write.fst(df, "dataset.fst")
# Retrieve the data frame again
df <- read.fst("dataset.fst")
## Loading required namespace: data.table
Random access
#The fst file format provides full random access to stored datasets. You can retrieve a selection of columns and rows with:
df_subset <- read.fst("dataset.fst", c("Logical", "Factor"), from = 2000, to = 5000)