Week 2 Coding Practice

Evan Klein / D590 / Fall 2025

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

heights <- read_csv('data/heights.csv', col_names = TRUE)

## Rows: 1192 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): sex, race
## dbl (4): earn, height, ed, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(parse_logical(c("TRUE", "FALSE", "NA")))

##  logi [1:3] TRUE FALSE NA

str(parse_integer(c("1", "2", "3")))

##  int [1:3] 1 2 3

str(parse_date(c("2010-01-01", "1979-10-14")))

##  Date[1:2], format: "2010-01-01" "1979-10-14"

parse_integer(c("1", "231", ".", "456"), na = ".")

## [1]   1 231  NA 456

x <- parse_integer(c("123", "345", "abc", "123.45"))

## Warning: 2 parsing failures.
## row col               expected actual
##   3  -- no trailing characters abc   
##   4  -- no trailing characters 123.45

problems(x)

## # A tibble: 2 × 4
##     row   col expected               actual
##   <int> <int> <chr>                  <chr> 
## 1     3    NA no trailing characters abc   
## 2     4    NA no trailing characters 123.45

parse_double("1.23")

## [1] 1.23

parse_double("1,23", locale = locale(decimal_mark = ","))

## [1] 1.23

parse_number("20%")

## [1] 20

charToRaw("Evan Klein")

##  [1] 45 76 61 6e 20 4b 6c 65 69 6e

x2 <- "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd"
parse_character(x2, locale = locale(encoding = "Shift-JIS"))

## [1] "こんにちは"

guess_encoding(charToRaw(x2))

## # A tibble: 1 × 2
##   encoding confidence
##   <chr>         <dbl>
## 1 KOI8-R         0.42

vegetables <- c("carrot", "broccoli")
parse_factor(c("broccoli", "carrot"), levels = vegetables)

## [1] broccoli carrot  
## Levels: carrot broccoli

parse_datetime("2010-10-01T2010")

## [1] "2010-10-01 20:10:00 UTC"

library(hms)

## 
## Attaching package: 'hms'

## The following object is masked from 'package:lubridate':
## 
##     hms

parse_time("02:30 pm")

## 14:30:00

parse_date("01/02/15", "%m/%d/%y")

## [1] "2015-01-02"

guess_parser("2010-10-01")

## [1] "date"

challenge <- read_csv(readr_example("challenge.csv"))

## Rows: 2000 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (1): x
## date (1): y
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

problems(challenge)

## # A tibble: 0 × 5
## # ℹ 5 variables: row <int>, col <int>, expected <chr>, actual <chr>, file <chr>

tail(challenge)

## # A tibble: 6 × 2
##       x y         
##   <dbl> <date>    
## 1 0.805 2019-11-21
## 2 0.164 2018-03-29
## 3 0.472 2014-08-04
## 4 0.718 2015-08-16
## 5 0.270 2020-02-04
## 6 0.608 2019-01-06

challenge <- read_csv(
  readr_example("challenge.csv"), 
  col_types = cols(
    x = col_double(),
    y = col_logical()
  )
)

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

challenge <- read_csv(
  readr_example("challenge.csv"), 
  col_types = cols(
    x = col_double(),
    y = col_date()
  )
)
tail(challenge)

## # A tibble: 6 × 2
##       x y         
##   <dbl> <date>    
## 1 0.805 2019-11-21
## 2 0.164 2018-03-29
## 3 0.472 2014-08-04
## 4 0.718 2015-08-16
## 5 0.270 2020-02-04
## 6 0.608 2019-01-06

challenge2 <- read_csv(readr_example("challenge.csv"), 
  col_types = cols(.default = col_character())
)

write_csv(challenge, "challenge.csv")

write_rds(challenge, "challenge.rds")
read_rds("challenge.rds")

## # A tibble: 2,000 × 2
##        x y     
##    <dbl> <date>
##  1   404 NA    
##  2  4172 NA    
##  3  3004 NA    
##  4   787 NA    
##  5    37 NA    
##  6  2332 NA    
##  7  2489 NA    
##  8  1449 NA    
##  9  3665 NA    
## 10  3863 NA    
## # ℹ 1,990 more rows

library(feather)
write_feather(challenge, "challenge.feather")
read_feather("challenge.feather")

## # A tibble: 2,000 × 2
##        x y     
##    <dbl> <date>
##  1   404 NA    
##  2  4172 NA    
##  3  3004 NA    
##  4   787 NA    
##  5    37 NA    
##  6  2332 NA    
##  7  2489 NA    
##  8  1449 NA    
##  9  3665 NA    
## 10  3863 NA    
## # ℹ 1,990 more rows

Week 2 Coding Practice - Part 2

2025-09-07