They are typically much faster (~10x) than their base
equivalents.
- https://r4ds.had.co.nz/data-import.html#getting-started
data source https://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data
b,30.83,0,u,g,w,v,1.25,t,t,01,f,g,00202,0,+
a,58.67,4.46,u,g,q,h,3.04,t,t,06,f,g,00043,560,+
a,24.50,0.5,u,g,q,h,1.5,t,f,0,f,g,00280,824,+
b,27.83,1.54,u,g,w,v,3.75,t,t,05,t,g,00100,3,+
b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,00120,0,+
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/credit-screening/crx.data"
data_raw <- read_csv(file = url,
col_names = paste0("A", seq(1:16)),
na = "?")
head(data_raw)
## # A tibble: 6 x 16
## A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <lgl> <lgl> <chr> <lgl> <chr>
## 1 b 30.8 0 u g w v 1.25 TRUE TRUE 01 FALSE g
## 2 a 58.7 4.46 u g q h 3.04 TRUE TRUE 06 FALSE g
## 3 a 24.5 0.5 u g q h 1.5 TRUE FALSE 0 FALSE g
## 4 b 27.8 1.54 u g w v 3.75 TRUE TRUE 05 TRUE g
## 5 b 20.2 5.62 u g w v 1.71 TRUE FALSE 0 FALSE s
## 6 b 32.1 4 u g m v 2.5 TRUE FALSE 0 TRUE g
## # ... with 3 more variables: A14 <chr>, A15 <dbl>, A16 <chr>
data <- data_raw %>%
filter(if_any(everything(), ~is.na(.)))
data %>% head
## # A tibble: 6 x 16
## A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <lgl> <lgl> <chr> <lgl> <chr>
## 1 b 34.8 4 u g d bb 12.5 TRUE FALSE 0 TRUE g
## 2 a NA 3.5 u g d v 3 TRUE FALSE 0 TRUE g
## 3 b NA 0.375 u g d v 0.875 TRUE FALSE 0 TRUE s
## 4 b NA 5 y p aa v 8.5 TRUE FALSE 0 FALSE g
## 5 b NA 0.5 u g c bb 0.835 TRUE FALSE 0 TRUE s
## 6 b 24.8 2.75 u g c v 2.25 TRUE TRUE 06 FALSE g
## # ... with 3 more variables: A14 <chr>, A15 <dbl>, A16 <chr>
read_csv("
a,b,c
1,2,3
4,5,6
")
## # A tibble: 2 x 3
## a b c
## <dbl> <dbl> <dbl>
## 1 1 2 3
## 2 4 5 6
read_delim("
a;b;c
1;2;3
4;5;6
", delim = ";")
## # A tibble: 2 x 3
## a b c
## <dbl> <dbl> <dbl>
## 1 1 2 3
## 2 4 5 6
read_csv("1,2,3\n4,5,6", col_names = c("x", "y", "z"))
## # A tibble: 2 x 3
## x y z
## <dbl> <dbl> <dbl>
## 1 1 2 3
## 2 4 5 6
read_csv("a,b\n 1,2,3\n 4,5,6")
## # A tibble: 2 x 2
## a b
## <dbl> <dbl>
## 1 1 23
## 2 4 56
read_csv("a,b,c\n 1,2\n 1,2,3,4")
## # A tibble: 2 x 3
## a b c
## <dbl> <dbl> <dbl>
## 1 1 2 NA
## 2 1 2 34
read_csv("a,b\n \"1")
## # A tibble: 1 x 2
## a b
## <dbl> <lgl>
## 1 1 NA
read_csv("a,b\n 1,2\n a,b")
## # A tibble: 2 x 2
## a b
## <chr> <chr>
## 1 1 2
## 2 a b
read_csv("a;b\n 1;3")
## # A tibble: 1 x 1
## `a;b`
## <chr>
## 1 1;3