Import data
## function (..., list = character(), package = NULL, lib.loc = NULL,
## verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
## {
## fileExt <- function(x) {
## db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
## ans <- sub(".*\\.", "", x)
## ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
## x[db])
## ans
## }
## my_read_table <- function(...) {
## lcc <- Sys.getlocale("LC_COLLATE")
## on.exit(Sys.setlocale("LC_COLLATE", lcc))
## Sys.setlocale("LC_COLLATE", "C")
## read.table(...)
## }
## stopifnot(is.character(list))
## names <- c(as.character(substitute(list(...))[-1L]), list)
## if (!is.null(package)) {
## if (!is.character(package))
## stop("'package' must be a character vector or NULL")
## }
## paths <- find.package(package, lib.loc, verbose = verbose)
## if (is.null(lib.loc))
## paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
## paths)
## paths <- unique(normalizePath(paths[file.exists(paths)]))
## paths <- paths[dir.exists(file.path(paths, "data"))]
## dataExts <- tools:::.make_file_exts("data")
## if (length(names) == 0L) {
## db <- matrix(character(), nrow = 0L, ncol = 4L)
## for (path in paths) {
## entries <- NULL
## packageName <- if (file_test("-f", file.path(path,
## "DESCRIPTION")))
## basename(path)
## else "."
## if (file_test("-f", INDEX <- file.path(path, "Meta",
## "data.rds"))) {
## entries <- readRDS(INDEX)
## }
## else {
## dataDir <- file.path(path, "data")
## entries <- tools::list_files_with_type(dataDir,
## "data")
## if (length(entries)) {
## entries <- unique(tools::file_path_sans_ext(basename(entries)))
## entries <- cbind(entries, "")
## }
## }
## if (NROW(entries)) {
## if (is.matrix(entries) && ncol(entries) == 2L)
## db <- rbind(db, cbind(packageName, dirname(path),
## entries))
## else warning(gettextf("data index for package %s is invalid and will be ignored",
## sQuote(packageName)), domain = NA, call. = FALSE)
## }
## }
## colnames(db) <- c("Package", "LibPath", "Item", "Title")
## footer <- if (missing(package))
## paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
## "\n", "to list the data sets in all *available* packages.")
## else NULL
## y <- list(title = "Data sets", header = NULL, results = db,
## footer = footer)
## class(y) <- "packageIQR"
## return(y)
## }
## paths <- file.path(paths, "data")
## for (name in names) {
## found <- FALSE
## for (p in paths) {
## tmp_env <- if (overwrite)
## envir
## else new.env()
## if (file_test("-f", file.path(p, "Rdata.rds"))) {
## rds <- readRDS(file.path(p, "Rdata.rds"))
## if (name %in% names(rds)) {
## found <- TRUE
## if (verbose)
## message(sprintf("name=%s:\t found in Rdata.rds",
## name), domain = NA)
## objs <- rds[[name]]
## lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
## filter = function(x) x %in% objs)
## break
## }
## else if (verbose)
## message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
## name, paste(names(rds), collapse = ",")),
## domain = NA)
## }
## files <- list.files(p, full.names = TRUE)
## files <- files[grep(name, files, fixed = TRUE)]
## if (length(files) > 1L) {
## o <- match(fileExt(files), dataExts, nomatch = 100L)
## paths0 <- dirname(files)
## paths0 <- factor(paths0, levels = unique(paths0))
## files <- files[order(paths0, o)]
## }
## if (length(files)) {
## for (file in files) {
## if (verbose)
## message("name=", name, ":\t file= ...", .Platform$file.sep,
## basename(file), "::\t", appendLF = FALSE,
## domain = NA)
## ext <- fileExt(file)
## if (basename(file) != paste0(name, ".", ext))
## found <- FALSE
## else {
## found <- TRUE
## switch(ext, R = , r = {
## library("utils")
## sys.source(file, chdir = TRUE, envir = tmp_env)
## }, RData = , rdata = , rda = load(file, envir = tmp_env),
## TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
## tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
## my_read_table(file, header = TRUE, as.is = FALSE),
## envir = tmp_env), CSV = , csv = , csv.gz = ,
## csv.bz2 = , csv.xz = assign(name, my_read_table(file,
## header = TRUE, sep = ";", as.is = FALSE),
## envir = tmp_env), found <- FALSE)
## }
## if (found)
## break
## }
## if (verbose)
## message(if (!found)
## "*NOT* ", "found", domain = NA)
## }
## if (found)
## break
## }
## if (!found) {
## warning(gettextf("data set %s not found", sQuote(name)),
## domain = NA)
## }
## else if (!overwrite) {
## for (o in ls(envir = tmp_env, all.names = TRUE)) {
## if (exists(o, envir = envir, inherits = FALSE))
## warning(gettextf("an object named %s already exists and will not be overwritten",
## sQuote(o)))
## else assign(o, get(o, envir = tmp_env, inherits = FALSE),
## envir = envir)
## }
## rm(tmp_env)
## }
## }
## invisible(names)
## }
## <bytecode: 0x7fc568ec4508>
## <environment: namespace:utils>
Apply the following dplyr verbs to your data
Filter rows
## # A tibble: 9,768 × 23
## ...1 animal_id animal_name animal_type primary_color secondary_color sex
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 A693708 *charlien dog white <NA> Female
## 2 10 A734321 sophie dog cream <NA> Spayed
## 3 16 A729063 *gizmo dog brown white Neuter…
## 4 17 A619091 <NA> dog brown white Male
## 5 18 A697044 cinnamon dog brown black Spayed
## 6 23 A718171 *hanley dog white gray Neuter…
## 7 24 A694732 twin 1 dog white tan Male
## 8 25 A694734 wolfie dog tricolor <NA> Male
## 9 26 A694739 miracle dog brown white Male
## 10 38 A664040 *cupcake dog gray white Spayed
## # ℹ 9,758 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## # intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## # outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## # outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## # outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>
Arrange rows
## # A tibble: 29,787 × 23
## ...1 animal_id animal_name animal_type primary_color secondary_color sex
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 6 A730385 *brandon rabbit black white Neuter…
## 2 19 A706435 lorelei cat brown tabby <NA> Spayed
## 3 20 A727457 aphrodite guinea pig black calico Female
## 4 21 A727456 venus guinea pig white calico Female
## 5 22 A721324 calvin cat gray white Neuter…
## 6 23 A718171 *hanley dog white gray Neuter…
## 7 33 A675624 <NA> bird white gray Unknown
## 8 38 A664040 *cupcake dog gray white Spayed
## 9 44 A677069 *lito dog brown black Neuter…
## 10 54 A708530 *max dog black tan Neuter…
## # ℹ 29,777 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## # intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## # outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## # outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## # outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>
Select columns
## # A tibble: 29,787 × 2
## intake_date outcome_date
## <chr> <chr>
## 1 2/20/23 2/26/23
## 2 10/3/23 10/3/23
## 3 1/1/20 1/1/20
## 4 2/2/20 2/2/20
## 5 12/18/18 1/13/19
## 6 10/18/24 11/15/24
## 7 7/25/20 7/25/20
## 8 6/12/19 6/12/19
## 9 9/21/17 9/23/17
## 10 12/15/24 12/21/24
## # ℹ 29,777 more rows
Add columns
## # A tibble: 29,787 × 3
## intake_date outcome_date animal_type
## <chr> <chr> <chr>
## 1 2/20/23 2/26/23 dog
## 2 10/3/23 10/3/23 reptile
## 3 1/1/20 1/1/20 bird
## 4 2/2/20 2/2/20 bird
## 5 12/18/18 1/13/19 cat
## 6 10/18/24 11/15/24 rabbit
## 7 7/25/20 7/25/20 bird
## 8 6/12/19 6/12/19 other
## 9 9/21/17 9/23/17 cat
## 10 12/15/24 12/21/24 dog
## # ℹ 29,777 more rows
Summarize by groups
## # A tibble: 29,787 × 23
## # Groups: outcome_type [19]
## ...1 animal_id animal_name animal_type primary_color secondary_color sex
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 A693708 *charlien dog white <NA> Female
## 2 2 A708149 <NA> reptile brown green Unknown
## 3 3 A638068 <NA> bird green red Unknown
## 4 4 A639310 <NA> bird white gray Unknown
## 5 5 A618968 *morgan cat black white Female
## 6 6 A730385 *brandon rabbit black white Neuter…
## 7 7 A646202 <NA> bird black <NA> Unknown
## 8 8 A628138 <NA> other gray black Unknown
## 9 9 A597464 <NA> cat black <NA> Unknown
## 10 10 A734321 sophie dog cream <NA> Spayed
## # ℹ 29,777 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## # intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## # outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## # outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## # outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>