Import data

# excel file
dog <- read_excel("../01_module4/data/myData.xlsx") %>%
    janitor::clean_names()
data

## function (..., list = character(), package = NULL, lib.loc = NULL, 
##     verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE) 
## {
##     fileExt <- function(x) {
##         db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
##         ans <- sub(".*\\.", "", x)
##         ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", 
##             x[db])
##         ans
##     }
##     my_read_table <- function(...) {
##         lcc <- Sys.getlocale("LC_COLLATE")
##         on.exit(Sys.setlocale("LC_COLLATE", lcc))
##         Sys.setlocale("LC_COLLATE", "C")
##         read.table(...)
##     }
##     stopifnot(is.character(list))
##     names <- c(as.character(substitute(list(...))[-1L]), list)
##     if (!is.null(package)) {
##         if (!is.character(package)) 
##             stop("'package' must be a character vector or NULL")
##     }
##     paths <- find.package(package, lib.loc, verbose = verbose)
##     if (is.null(lib.loc)) 
##         paths <- c(path.package(package, TRUE), if (!length(package)) getwd(), 
##             paths)
##     paths <- unique(normalizePath(paths[file.exists(paths)]))
##     paths <- paths[dir.exists(file.path(paths, "data"))]
##     dataExts <- tools:::.make_file_exts("data")
##     if (length(names) == 0L) {
##         db <- matrix(character(), nrow = 0L, ncol = 4L)
##         for (path in paths) {
##             entries <- NULL
##             packageName <- if (file_test("-f", file.path(path, 
##                 "DESCRIPTION"))) 
##                 basename(path)
##             else "."
##             if (file_test("-f", INDEX <- file.path(path, "Meta", 
##                 "data.rds"))) {
##                 entries <- readRDS(INDEX)
##             }
##             else {
##                 dataDir <- file.path(path, "data")
##                 entries <- tools::list_files_with_type(dataDir, 
##                   "data")
##                 if (length(entries)) {
##                   entries <- unique(tools::file_path_sans_ext(basename(entries)))
##                   entries <- cbind(entries, "")
##                 }
##             }
##             if (NROW(entries)) {
##                 if (is.matrix(entries) && ncol(entries) == 2L) 
##                   db <- rbind(db, cbind(packageName, dirname(path), 
##                     entries))
##                 else warning(gettextf("data index for package %s is invalid and will be ignored", 
##                   sQuote(packageName)), domain = NA, call. = FALSE)
##             }
##         }
##         colnames(db) <- c("Package", "LibPath", "Item", "Title")
##         footer <- if (missing(package)) 
##             paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")), 
##                 "\n", "to list the data sets in all *available* packages.")
##         else NULL
##         y <- list(title = "Data sets", header = NULL, results = db, 
##             footer = footer)
##         class(y) <- "packageIQR"
##         return(y)
##     }
##     paths <- file.path(paths, "data")
##     for (name in names) {
##         found <- FALSE
##         for (p in paths) {
##             tmp_env <- if (overwrite) 
##                 envir
##             else new.env()
##             if (file_test("-f", file.path(p, "Rdata.rds"))) {
##                 rds <- readRDS(file.path(p, "Rdata.rds"))
##                 if (name %in% names(rds)) {
##                   found <- TRUE
##                   if (verbose) 
##                     message(sprintf("name=%s:\t found in Rdata.rds", 
##                       name), domain = NA)
##                   objs <- rds[[name]]
##                   lazyLoad(file.path(p, "Rdata"), envir = tmp_env, 
##                     filter = function(x) x %in% objs)
##                   break
##                 }
##                 else if (verbose) 
##                   message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n", 
##                     name, paste(names(rds), collapse = ",")), 
##                     domain = NA)
##             }
##             files <- list.files(p, full.names = TRUE)
##             files <- files[grep(name, files, fixed = TRUE)]
##             if (length(files) > 1L) {
##                 o <- match(fileExt(files), dataExts, nomatch = 100L)
##                 paths0 <- dirname(files)
##                 paths0 <- factor(paths0, levels = unique(paths0))
##                 files <- files[order(paths0, o)]
##             }
##             if (length(files)) {
##                 for (file in files) {
##                   if (verbose) 
##                     message("name=", name, ":\t file= ...", .Platform$file.sep, 
##                       basename(file), "::\t", appendLF = FALSE, 
##                       domain = NA)
##                   ext <- fileExt(file)
##                   if (basename(file) != paste0(name, ".", ext)) 
##                     found <- FALSE
##                   else {
##                     found <- TRUE
##                     switch(ext, R = , r = {
##                       library("utils")
##                       sys.source(file, chdir = TRUE, envir = tmp_env)
##                     }, RData = , rdata = , rda = load(file, envir = tmp_env), 
##                       TXT = , txt = , tab = , tab.gz = , tab.bz2 = , 
##                       tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name, 
##                         my_read_table(file, header = TRUE, as.is = FALSE), 
##                         envir = tmp_env), CSV = , csv = , csv.gz = , 
##                       csv.bz2 = , csv.xz = assign(name, my_read_table(file, 
##                         header = TRUE, sep = ";", as.is = FALSE), 
##                         envir = tmp_env), found <- FALSE)
##                   }
##                   if (found) 
##                     break
##                 }
##                 if (verbose) 
##                   message(if (!found) 
##                     "*NOT* ", "found", domain = NA)
##             }
##             if (found) 
##                 break
##         }
##         if (!found) {
##             warning(gettextf("data set %s not found", sQuote(name)), 
##                 domain = NA)
##         }
##         else if (!overwrite) {
##             for (o in ls(envir = tmp_env, all.names = TRUE)) {
##                 if (exists(o, envir = envir, inherits = FALSE)) 
##                   warning(gettextf("an object named %s already exists and will not be overwritten", 
##                     sQuote(o)))
##                 else assign(o, get(o, envir = tmp_env, inherits = FALSE), 
##                   envir = envir)
##             }
##             rm(tmp_env)
##         }
##     }
##     invisible(names)
## }
## <bytecode: 0x1307acb00>
## <environment: namespace:utils>

Apply the following dplyr verbs to your data

Filter rows

filter(dog, length == "Short")  %>% 
    select(breed, length)

## # A tibble: 86 × 2
##    breed                         length
##    <chr>                         <chr> 
##  1 Retrievers (Labrador)         Short 
##  2 French Bulldogs               Short 
##  3 Bulldogs                      Short 
##  4 Beagles                       Short 
##  5 Rottweilers                   Short 
##  6 Pointers (German Shorthaired) Short 
##  7 Dachshunds                    Short 
##  8 Pembroke Welsh Corgis         Short 
##  9 Boxers                        Short 
## 10 Great Danes                   Short 
## # ℹ 76 more rows

Arrange rows

arrange(dog, desc(affectionate_with_family))

## # A tibble: 195 × 18
##    column1 breed                   affectionate_with_fa…¹ good_with_young_chil…²
##      <dbl> <chr>                                    <dbl>                  <dbl>
##  1       1 Retrievers (Labrador)                        5                      5
##  2       2 French Bulldogs                              5                      5
##  3       3 German Shepherd Dogs                         5                      5
##  4       4 Retrievers (Golden)                          5                      5
##  5       6 Poodles                                      5                      5
##  6       8 Rottweilers                                  5                      3
##  7       9 Pointers (German Short…                      5                      5
##  8      10 Dachshunds                                   5                      3
##  9      11 Pembroke Welsh Corgis                        5                      3
## 10      13 Yorkshire Terriers                           5                      5
## # ℹ 185 more rows
## # ℹ abbreviated names: ¹affectionate_with_family, ²good_with_young_children
## # ℹ 14 more variables: good_with_other_dogs <dbl>, shedding_level <dbl>,
## #   coat_grooming_frequency <dbl>, drooling_level <dbl>, type <chr>,
## #   length <chr>, openness_to_strangers <dbl>, playfulness_level <dbl>,
## #   watchdog_protective_nature <dbl>, adaptability_level <dbl>,
## #   trainability_level <dbl>, energy_level <dbl>, barking_level <dbl>, …

Select columns

select(dog, breed:good_with_other_dogs)

## # A tibble: 195 × 4
##    breed      affectionate_with_fa…¹ good_with_young_chil…² good_with_other_dogs
##    <chr>                       <dbl>                  <dbl>                <dbl>
##  1 Retriever…                      5                      5                    5
##  2 French Bu…                      5                      5                    4
##  3 German Sh…                      5                      5                    3
##  4 Retriever…                      5                      5                    5
##  5 Bulldogs                        4                      3                    3
##  6 Poodles                         5                      5                    3
##  7 Beagles                         3                      5                    5
##  8 Rottweile…                      5                      3                    3
##  9 Pointers …                      5                      5                    4
## 10 Dachshunds                      5                      3                    4
## # ℹ 185 more rows
## # ℹ abbreviated names: ¹affectionate_with_family, ²good_with_young_children

Add columns

mutate(dog,
       fun = affectionate_with_family - good_with_other_dogs) %>%
    #Select affectionate_with_family, good_with_young_children, good_with_other_dogs, and fun
    select(affectionate_with_family:good_with_other_dogs, fun)

## # A tibble: 195 × 4
##    affectionate_with_family good_with_young_children good_with_other_dogs   fun
##                       <dbl>                    <dbl>                <dbl> <dbl>
##  1                        5                        5                    5     0
##  2                        5                        5                    4     1
##  3                        5                        5                    3     2
##  4                        5                        5                    5     0
##  5                        4                        3                    3     1
##  6                        5                        5                    3     2
##  7                        3                        5                    5    -2
##  8                        5                        3                    3     2
##  9                        5                        5                    4     1
## 10                        5                        3                    4     1
## # ℹ 185 more rows

Summarize by groups

dog %>%
    
    # Group by dogs
    group_by(breed) %>%
    
    # Calculate average energy level
    summarise(energy = mean(energy_level)) %>%
    
    #Sort it
    arrange(energy)

## # A tibble: 195 × 2
##    breed                           energy
##    <chr>                            <dbl>
##  1 Plott Hounds                         0
##  2 Basset Hounds                        2
##  3 Neapolitan Mastiffs                  2
##  4 Affenpinschers                       3
##  5 Airedale Terriers                    3
##  6 American Hairless Terriers           3
##  7 American Staffordshire Terriers      3
##  8 Anatolian Shepherd Dogs              3
##  9 Azawakhs                             3
## 10 Barbets                              3
## # ℹ 185 more rows

Module 6: Apply 5

Nick Sobalo

Import data

Apply the following dplyr verbs to your data

Filter rows

Arrange rows

Select columns

Add columns

Summarize by groups