Evan Klein / D590 / Fall 2025
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
heights <- read_csv('data/heights.csv', col_names = TRUE)
## Rows: 1192 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): sex, race
## dbl (4): earn, height, ed, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(parse_logical(c("TRUE", "FALSE", "NA")))
## logi [1:3] TRUE FALSE NA
str(parse_integer(c("1", "2", "3")))
## int [1:3] 1 2 3
str(parse_date(c("2010-01-01", "1979-10-14")))
## Date[1:2], format: "2010-01-01" "1979-10-14"
parse_integer(c("1", "231", ".", "456"), na = ".")
## [1] 1 231 NA 456
x <- parse_integer(c("123", "345", "abc", "123.45"))
## Warning: 2 parsing failures.
## row col expected actual
## 3 -- no trailing characters abc
## 4 -- no trailing characters 123.45
problems(x)
## # A tibble: 2 × 4
## row col expected actual
## <int> <int> <chr> <chr>
## 1 3 NA no trailing characters abc
## 2 4 NA no trailing characters 123.45
parse_double("1.23")
## [1] 1.23
parse_double("1,23", locale = locale(decimal_mark = ","))
## [1] 1.23
parse_number("20%")
## [1] 20
charToRaw("Evan Klein")
## [1] 45 76 61 6e 20 4b 6c 65 69 6e
x2 <- "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd"
parse_character(x2, locale = locale(encoding = "Shift-JIS"))
## [1] "こんにちは"
guess_encoding(charToRaw(x2))
## # A tibble: 1 × 2
## encoding confidence
## <chr> <dbl>
## 1 KOI8-R 0.42
vegetables <- c("carrot", "broccoli")
parse_factor(c("broccoli", "carrot"), levels = vegetables)
## [1] broccoli carrot
## Levels: carrot broccoli
parse_datetime("2010-10-01T2010")
## [1] "2010-10-01 20:10:00 UTC"
library(hms)
##
## Attaching package: 'hms'
## The following object is masked from 'package:lubridate':
##
## hms
parse_time("02:30 pm")
## 14:30:00
parse_date("01/02/15", "%m/%d/%y")
## [1] "2015-01-02"
guess_parser("2010-10-01")
## [1] "date"
challenge <- read_csv(readr_example("challenge.csv"))
## Rows: 2000 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): x
## date (1): y
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
problems(challenge)
## # A tibble: 0 × 5
## # ℹ 5 variables: row <int>, col <int>, expected <chr>, actual <chr>, file <chr>
tail(challenge)
## # A tibble: 6 × 2
## x y
## <dbl> <date>
## 1 0.805 2019-11-21
## 2 0.164 2018-03-29
## 3 0.472 2014-08-04
## 4 0.718 2015-08-16
## 5 0.270 2020-02-04
## 6 0.608 2019-01-06
challenge <- read_csv(
readr_example("challenge.csv"),
col_types = cols(
x = col_double(),
y = col_logical()
)
)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
challenge <- read_csv(
readr_example("challenge.csv"),
col_types = cols(
x = col_double(),
y = col_date()
)
)
tail(challenge)
## # A tibble: 6 × 2
## x y
## <dbl> <date>
## 1 0.805 2019-11-21
## 2 0.164 2018-03-29
## 3 0.472 2014-08-04
## 4 0.718 2015-08-16
## 5 0.270 2020-02-04
## 6 0.608 2019-01-06
challenge2 <- read_csv(readr_example("challenge.csv"),
col_types = cols(.default = col_character())
)
write_csv(challenge, "challenge.csv")
write_rds(challenge, "challenge.rds")
read_rds("challenge.rds")
## # A tibble: 2,000 × 2
## x y
## <dbl> <date>
## 1 404 NA
## 2 4172 NA
## 3 3004 NA
## 4 787 NA
## 5 37 NA
## 6 2332 NA
## 7 2489 NA
## 8 1449 NA
## 9 3665 NA
## 10 3863 NA
## # ℹ 1,990 more rows
library(feather)
write_feather(challenge, "challenge.feather")
read_feather("challenge.feather")
## # A tibble: 2,000 × 2
## x y
## <dbl> <date>
## 1 404 NA
## 2 4172 NA
## 3 3004 NA
## 4 787 NA
## 5 37 NA
## 6 2332 NA
## 7 2489 NA
## 8 1449 NA
## 9 3665 NA
## 10 3863 NA
## # ℹ 1,990 more rows