Import data

## function (..., list = character(), package = NULL, lib.loc = NULL, 
##     verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
## {
##     fileExt <- function(x) {
##         db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
##         ans <- sub(".*\\.", "", x)
##         ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", 
##             x[db])
##         ans
##     }
##     my_read_table <- function(...) {
##         lcc <- Sys.getlocale("LC_COLLATE")
##         on.exit(Sys.setlocale("LC_COLLATE", lcc))
##         Sys.setlocale("LC_COLLATE", "C")
##         read.table(...)
##     }
##     stopifnot(is.character(list))
##     names <- c(as.character(substitute(list(...))[-1L]), list)
##     if (!is.null(package)) {
##         if (!is.character(package)) 
##             stop("'package' must be a character vector or NULL")
##     }
##     paths <- find.package(package, lib.loc, verbose = verbose)
##     if (is.null(lib.loc)) 
##         paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), 
##             paths)
##     paths <- unique(normalizePath(paths[file.exists(paths)]))
##     paths <- paths[dir.exists(file.path(paths, "data"))]
##     dataExts <- tools:::.make_file_exts("data")
##     if (length(names) == 0L) {
##         db <- matrix(character(), nrow = 0L, ncol = 4L)
##         for (path in paths) {
##             entries <- NULL
##             packageName <- if (file_test("-f", file.path(path, 
##                 "DESCRIPTION"))) 
##                 basename(path)
##             else "."
##             if (file_test("-f", INDEX <- file.path(path, "Meta", 
##                 "data.rds"))) {
##                 entries <- readRDS(INDEX)
##             }
##             else {
##                 dataDir <- file.path(path, "data")
##                 entries <- tools::list_files_with_type(dataDir, 
##                   "data")
##                 if (length(entries)) {
##                   entries <- unique(tools::file_path_sans_ext(basename(entries)))
##                   entries <- cbind(entries, "")
##                 }
##             }
##             if (NROW(entries)) {
##                 if (is.matrix(entries) && ncol(entries) == 2L) 
##                   db <- rbind(db, cbind(packageName, dirname(path), 
##                     entries))
##                 else warning(gettextf("data index for package %s is invalid and will be ignored", 
##                   sQuote(packageName)), domain = NA, call. = FALSE)
##             }
##         }
##         colnames(db) <- c("Package", "LibPath", "Item", "Title")
##         footer <- if (missing(package)) 
##             paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), 
##                 "\n", "to list the data sets in all *available* packages.")
##         else NULL
##         y <- list(title = "Data sets", header = NULL, results = db, 
##             footer = footer)
##         class(y) <- "packageIQR"
##         return(y)
##     }
##     paths <- file.path(paths, "data")
##     for (name in names) {
##         found <- FALSE
##         for (p in paths) {
##             tmp_env <- if (overwrite) 
##                 envir
##             else new.env()
##             if (file_test("-f", file.path(p, "Rdata.rds"))) {
##                 rds <- readRDS(file.path(p, "Rdata.rds"))
##                 if (name %in% names(rds)) {
##                   found <- TRUE
##                   if (verbose) 
##                     message(sprintf("name=%s:\t found in Rdata.rds", 
##                       name), domain = NA)
##                   objs <- rds[[name]]
##                   lazyLoad(file.path(p, "Rdata"), envir = tmp_env, 
##                     filter = function(x) x %in% objs)
##                   break
##                 }
##                 else if (verbose) 
##                   message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", 
##                     name, paste(names(rds), collapse = ",")), 
##                     domain = NA)
##             }
##             files <- list.files(p, full.names = TRUE)
##             files <- files[grep(name, files, fixed = TRUE)]
##             if (length(files) > 1L) {
##                 o <- match(fileExt(files), dataExts, nomatch = 100L)
##                 paths0 <- dirname(files)
##                 paths0 <- factor(paths0, levels = unique(paths0))
##                 files <- files[order(paths0, o)]
##             }
##             if (length(files)) {
##                 for (file in files) {
##                   if (verbose) 
##                     message("name=", name, ":\t file= ...", .Platform$file.sep, 
##                       basename(file), "::\t", appendLF = FALSE, 
##                       domain = NA)
##                   ext <- fileExt(file)
##                   if (basename(file) != paste0(name, ".", ext)) 
##                     found <- FALSE
##                   else {
##                     found <- TRUE
##                     switch(ext, R = , r = {
##                       library("utils")
##                       sys.source(file, chdir = TRUE, envir = tmp_env)
##                     }, RData = , rdata = , rda = load(file, envir = tmp_env), 
##                       TXT = , txt = , tab = , tab.gz = , tab.bz2 = , 
##                       tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name, 
##                         my_read_table(file, header = TRUE, as.is = FALSE), 
##                         envir = tmp_env), CSV = , csv = , csv.gz = , 
##                       csv.bz2 = , csv.xz = assign(name, my_read_table(file, 
##                         header = TRUE, sep = ";", as.is = FALSE), 
##                         envir = tmp_env), found <- FALSE)
##                   }
##                   if (found) 
##                     break
##                 }
##                 if (verbose) 
##                   message(if (!found) 
##                     "*NOT* ", "found", domain = NA)
##             }
##             if (found) 
##                 break
##         }
##         if (!found) {
##             warning(gettextf("data set %s not found", sQuote(name)), 
##                 domain = NA)
##         }
##         else if (!overwrite) {
##             for (o in ls(envir = tmp_env, all.names = TRUE)) {
##                 if (exists(o, envir = envir, inherits = FALSE)) 
##                   warning(gettextf("an object named %s already exists and will not be overwritten", 
##                     sQuote(o)))
##                 else assign(o, get(o, envir = tmp_env, inherits = FALSE), 
##                   envir = envir)
##             }
##             rm(tmp_env)
##         }
##     }
##     invisible(names)
## }
## <bytecode: 0x7fc568ec4508>
## <environment: namespace:utils>

Apply the following dplyr verbs to your data

Filter rows

## # A tibble: 9,768 × 23
##     ...1 animal_id animal_name animal_type primary_color secondary_color sex    
##    <dbl> <chr>     <chr>       <chr>       <chr>         <chr>           <chr>  
##  1     1 A693708   *charlien   dog         white         <NA>            Female 
##  2    10 A734321   sophie      dog         cream         <NA>            Spayed 
##  3    16 A729063   *gizmo      dog         brown         white           Neuter…
##  4    17 A619091   <NA>        dog         brown         white           Male   
##  5    18 A697044   cinnamon    dog         brown         black           Spayed 
##  6    23 A718171   *hanley     dog         white         gray            Neuter…
##  7    24 A694732   twin 1      dog         white         tan             Male   
##  8    25 A694734   wolfie      dog         tricolor      <NA>            Male   
##  9    26 A694739   miracle     dog         brown         white           Male   
## 10    38 A664040   *cupcake    dog         gray          white           Spayed 
## # ℹ 9,758 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## #   intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## #   outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## #   outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## #   outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>

Arrange rows

## # A tibble: 29,787 × 23
##     ...1 animal_id animal_name animal_type primary_color secondary_color sex    
##    <dbl> <chr>     <chr>       <chr>       <chr>         <chr>           <chr>  
##  1     6 A730385   *brandon    rabbit      black         white           Neuter…
##  2    19 A706435   lorelei     cat         brown  tabby  <NA>            Spayed 
##  3    20 A727457   aphrodite   guinea pig  black         calico          Female 
##  4    21 A727456   venus       guinea pig  white         calico          Female 
##  5    22 A721324   calvin      cat         gray          white           Neuter…
##  6    23 A718171   *hanley     dog         white         gray            Neuter…
##  7    33 A675624   <NA>        bird        white         gray            Unknown
##  8    38 A664040   *cupcake    dog         gray          white           Spayed 
##  9    44 A677069   *lito       dog         brown         black           Neuter…
## 10    54 A708530   *max        dog         black         tan             Neuter…
## # ℹ 29,777 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## #   intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## #   outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## #   outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## #   outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>

Select columns

## # A tibble: 29,787 × 2
##    intake_date outcome_date
##    <chr>       <chr>       
##  1 2/20/23     2/26/23     
##  2 10/3/23     10/3/23     
##  3 1/1/20      1/1/20      
##  4 2/2/20      2/2/20      
##  5 12/18/18    1/13/19     
##  6 10/18/24    11/15/24    
##  7 7/25/20     7/25/20     
##  8 6/12/19     6/12/19     
##  9 9/21/17     9/23/17     
## 10 12/15/24    12/21/24    
## # ℹ 29,777 more rows

Add columns

## # A tibble: 29,787 × 3
##    intake_date outcome_date animal_type
##    <chr>       <chr>        <chr>      
##  1 2/20/23     2/26/23      dog        
##  2 10/3/23     10/3/23      reptile    
##  3 1/1/20      1/1/20       bird       
##  4 2/2/20      2/2/20       bird       
##  5 12/18/18    1/13/19      cat        
##  6 10/18/24    11/15/24     rabbit     
##  7 7/25/20     7/25/20      bird       
##  8 6/12/19     6/12/19      other      
##  9 9/21/17     9/23/17      cat        
## 10 12/15/24    12/21/24     dog        
## # ℹ 29,777 more rows

Summarize by groups

## # A tibble: 29,787 × 23
## # Groups:   outcome_type [19]
##     ...1 animal_id animal_name animal_type primary_color secondary_color sex    
##    <dbl> <chr>     <chr>       <chr>       <chr>         <chr>           <chr>  
##  1     1 A693708   *charlien   dog         white         <NA>            Female 
##  2     2 A708149   <NA>        reptile     brown         green           Unknown
##  3     3 A638068   <NA>        bird        green         red             Unknown
##  4     4 A639310   <NA>        bird        white         gray            Unknown
##  5     5 A618968   *morgan     cat         black         white           Female 
##  6     6 A730385   *brandon    rabbit      black         white           Neuter…
##  7     7 A646202   <NA>        bird        black         <NA>            Unknown
##  8     8 A628138   <NA>        other       gray          black           Unknown
##  9     9 A597464   <NA>        cat         black         <NA>            Unknown
## 10    10 A734321   sophie      dog         cream         <NA>            Spayed 
## # ℹ 29,777 more rows
## # ℹ 16 more variables: dob <chr>, intake_date <chr>, intake_condition <chr>,
## #   intake_type <chr>, intake_subtype <chr>, reason_for_intake <chr>,
## #   outcome_date <chr>, crossing <chr>, jurisdiction <chr>, outcome_type <chr>,
## #   outcome_subtype <chr>, latitude <dbl>, longitude <dbl>,
## #   outcome_is_dead <lgl>, was_outcome_alive <lgl>, geopoint <chr>

Module 6: Apply 5

Lilli Warnock

Import data

Apply the following dplyr verbs to your data

Filter rows

Arrange rows

Select columns

Add columns

Summarize by groups