library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
#Sections: Introduction, Compared to Base R Exercises: 1
read_delim("a|b|c\n1|2|3\n4|5|6", delim = "|")
#Sections: Introduction, Compared to Base R Exercises: 5
read_csv("a,b\n1,2,3\n4,5,6")
## Warning: 2 parsing failures.
## row col expected actual file
## 1 -- 2 columns 3 columns literal data
## 2 -- 2 columns 3 columns literal data
read_csv("a,b,c\n1,2\n1,2,3,4")
## Warning: 2 parsing failures.
## row col expected actual file
## 1 -- 3 columns 2 columns literal data
## 2 -- 3 columns 4 columns literal data
read_csv("a,b\n\"1")
## Warning: 2 parsing failures.
## row col expected actual file
## 1 a closing quote at end of file literal data
## 1 -- 2 columns 1 columns literal data
read_csv("a,b\n1,2\na,b")
read_csv("a;b\n1;3")
#Sections: Parsing a vector, Numbers, Strings, Factors, Dates, Date-Times and Times
#Parsing a vector
str(parse_logical(c("TRUE", "FALSE", "NA")))
## logi [1:3] TRUE FALSE NA
str(parse_integer(c("1", "2", "3")))
## int [1:3] 1 2 3
str(parse_date(c("2010-01-01", "1979-10-14")))
## Date[1:2], format: "2010-01-01" "1979-10-14"
parse_integer(c("1", "231", ".", "456"), na = ".")
## [1] 1 231 NA 456
x <- parse_integer(c("123", "345", "abc", "123.45"))
## Warning: 2 parsing failures.
## row col expected actual
## 3 -- an integer abc
## 4 -- no trailing characters .45
x
## [1] 123 345 NA NA
## attr(,"problems")
## # A tibble: 2 x 4
## row col expected actual
## <int> <int> <chr> <chr>
## 1 3 NA an integer abc
## 2 4 NA no trailing characters .45
problems(x)
#Numbers
parse_double("1.23")
## [1] 1.23
parse_double("1,23", locale = locale(decimal_mark = ","))
## [1] 1.23
parse_number("$100")
## [1] 100
parse_number("20%")
## [1] 20
parse_number("It cost $123.45")
## [1] 123.45
# Used in America
parse_number("$123,456,789")
## [1] 123456789
# Used in many parts of Europe
parse_number("123.456.789", locale = locale(grouping_mark = "."))
## [1] 123456789
# Used in Switzerland
parse_number("123'456'789", locale = locale(grouping_mark = "'"))
## [1] 123456789
#Strings
charToRaw("Hadley")
## [1] 48 61 64 6c 65 79
x1 <- "El Ni\xf1o was particularly bad this year"
x2 <- "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd"
x1
## [1] "El Niño was particularly bad this year"
x2
## [1] "‚±‚ñ‚É‚¿‚Í"
parse_character(x1, locale = locale(encoding = "Latin1"))
## [1] "El Niño was particularly bad this year"
parse_character(x2, locale = locale(encoding = "Shift-JIS"))
## [1] "<U+3053><U+3093><U+306B><U+3061><U+306F>"
guess_encoding(charToRaw(x1))
guess_encoding(charToRaw(x2))
#Factors
fruit <- c("apple", "banana")
parse_factor(c("apple", "banana", "bananana"), levels = fruit)
## Warning: 1 parsing failure.
## row col expected actual
## 3 -- value in level set bananana
## [1] apple banana <NA>
## attr(,"problems")
## # A tibble: 1 x 4
## row col expected actual
## <int> <int> <chr> <chr>
## 1 3 NA value in level set bananana
## Levels: apple banana
#Dates, date-times, and times
parse_datetime("2010-10-01T2010")
## [1] "2010-10-01 20:10:00 UTC"
parse_datetime("20101010")
## [1] "2010-10-10 UTC"
parse_date("2010-10-01")
## [1] "2010-10-01"
library(hms)
parse_time("01:10 am")
## 01:10:00
parse_time("20:10:01")
## 20:10:01
parse_date("01/02/15", "%m/%d/%y")
## [1] "2015-01-02"
parse_date("01/02/15", "%d/%m/%y")
## [1] "2015-02-01"
parse_date("01/02/15", "%y/%m/%d")
## [1] "2001-02-15"
parse_date("1 janvier 2015", "%d %B %Y", locale = locale("fr"))
## [1] "2015-01-01"
#Sections: Parsing a vector, Numbers, Strings, Factors, Dates, Date-Times and Times; Exercises: 2
locale(decimal_mark = ',')
## <locale>
## Numbers: 123.456,78
## Formats: %AD / %AT
## Timezone: UTC
## Encoding: UTF-8
## <date_names>
## Days: Sunday (Sun), Monday (Mon), Tuesday (Tue), Wednesday (Wed), Thursday
## (Thu), Friday (Fri), Saturday (Sat)
## Months: January (Jan), February (Feb), March (Mar), April (Apr), May (May),
## June (Jun), July (Jul), August (Aug), September (Sep), October
## (Oct), November (Nov), December (Dec)
## AM/PM: AM/PM
#Sections: Parsing a vector, Numbers, Strings, Factors, Dates, Date-Times and Times; Exercises: 7
d1 <- "January 1, 2010"
parse_date(d1, "%B %d, %Y")
## [1] "2010-01-01"
d2 <- "2015-Mar-07"
parse_date(d2, "%Y-%b-%d")
## [1] "2015-03-07"
d3 <- "06-Jun-2017"
parse_date(d3, "%d-%b-%Y")
## [1] "2017-06-06"
d4 <- c("August 19 (2015)", "July 1 (2015)")
parse_date(d4, "%B %d (%Y)")
## [1] "2015-08-19" "2015-07-01"
d5 <- "12/30/14" # Dec 30, 2014
parse_date(d5, "%m/%d/%y")
## [1] "2014-12-30"
t1 <- "1705"
parse_time(t1, "%H%M")
## 17:05:00
t2 <- "11:15:10.12 PM"
parse_time(t2, "%I:%M:%OS %p")
## 23:15:10.12