Import data
# excel file
dog <- read_excel("../01_module4/data/myData.xlsx") %>%
janitor::clean_names()
data
## function (..., list = character(), package = NULL, lib.loc = NULL,
## verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
## {
## fileExt <- function(x) {
## db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
## ans <- sub(".*\\.", "", x)
## ans[db] <- sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2",
## x[db])
## ans
## }
## my_read_table <- function(...) {
## lcc <- Sys.getlocale("LC_COLLATE")
## on.exit(Sys.setlocale("LC_COLLATE", lcc))
## Sys.setlocale("LC_COLLATE", "C")
## read.table(...)
## }
## stopifnot(is.character(list))
## names <- c(as.character(substitute(list(...))[-1L]), list)
## if (!is.null(package)) {
## if (!is.character(package))
## stop("'package' must be a character vector or NULL")
## }
## paths <- find.package(package, lib.loc, verbose = verbose)
## if (is.null(lib.loc))
## paths <- c(path.package(package, TRUE), if (!length(package)) getwd(),
## paths)
## paths <- unique(normalizePath(paths[file.exists(paths)]))
## paths <- paths[dir.exists(file.path(paths, "data"))]
## dataExts <- tools:::.make_file_exts("data")
## if (length(names) == 0L) {
## db <- matrix(character(), nrow = 0L, ncol = 4L)
## for (path in paths) {
## entries <- NULL
## packageName <- if (file_test("-f", file.path(path,
## "DESCRIPTION")))
## basename(path)
## else "."
## if (file_test("-f", INDEX <- file.path(path, "Meta",
## "data.rds"))) {
## entries <- readRDS(INDEX)
## }
## else {
## dataDir <- file.path(path, "data")
## entries <- tools::list_files_with_type(dataDir,
## "data")
## if (length(entries)) {
## entries <- unique(tools::file_path_sans_ext(basename(entries)))
## entries <- cbind(entries, "")
## }
## }
## if (NROW(entries)) {
## if (is.matrix(entries) && ncol(entries) == 2L)
## db <- rbind(db, cbind(packageName, dirname(path),
## entries))
## else warning(gettextf("data index for package %s is invalid and will be ignored",
## sQuote(packageName)), domain = NA, call. = FALSE)
## }
## }
## colnames(db) <- c("Package", "LibPath", "Item", "Title")
## footer <- if (missing(package))
## paste0("Use ", sQuote(paste("data(package =", ".packages(all.available = TRUE))")),
## "\n", "to list the data sets in all *available* packages.")
## else NULL
## y <- list(title = "Data sets", header = NULL, results = db,
## footer = footer)
## class(y) <- "packageIQR"
## return(y)
## }
## paths <- file.path(paths, "data")
## for (name in names) {
## found <- FALSE
## for (p in paths) {
## tmp_env <- if (overwrite)
## envir
## else new.env()
## if (file_test("-f", file.path(p, "Rdata.rds"))) {
## rds <- readRDS(file.path(p, "Rdata.rds"))
## if (name %in% names(rds)) {
## found <- TRUE
## if (verbose)
## message(sprintf("name=%s:\t found in Rdata.rds",
## name), domain = NA)
## objs <- rds[[name]]
## lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
## filter = function(x) x %in% objs)
## break
## }
## else if (verbose)
## message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
## name, paste(names(rds), collapse = ",")),
## domain = NA)
## }
## files <- list.files(p, full.names = TRUE)
## files <- files[grep(name, files, fixed = TRUE)]
## if (length(files) > 1L) {
## o <- match(fileExt(files), dataExts, nomatch = 100L)
## paths0 <- dirname(files)
## paths0 <- factor(paths0, levels = unique(paths0))
## files <- files[order(paths0, o)]
## }
## if (length(files)) {
## for (file in files) {
## if (verbose)
## message("name=", name, ":\t file= ...", .Platform$file.sep,
## basename(file), "::\t", appendLF = FALSE,
## domain = NA)
## ext <- fileExt(file)
## if (basename(file) != paste0(name, ".", ext))
## found <- FALSE
## else {
## found <- TRUE
## switch(ext, R = , r = {
## library("utils")
## sys.source(file, chdir = TRUE, envir = tmp_env)
## }, RData = , rdata = , rda = load(file, envir = tmp_env),
## TXT = , txt = , tab = , tab.gz = , tab.bz2 = ,
## tab.xz = , txt.gz = , txt.bz2 = , txt.xz = assign(name,
## my_read_table(file, header = TRUE, as.is = FALSE),
## envir = tmp_env), CSV = , csv = , csv.gz = ,
## csv.bz2 = , csv.xz = assign(name, my_read_table(file,
## header = TRUE, sep = ";", as.is = FALSE),
## envir = tmp_env), found <- FALSE)
## }
## if (found)
## break
## }
## if (verbose)
## message(if (!found)
## "*NOT* ", "found", domain = NA)
## }
## if (found)
## break
## }
## if (!found) {
## warning(gettextf("data set %s not found", sQuote(name)),
## domain = NA)
## }
## else if (!overwrite) {
## for (o in ls(envir = tmp_env, all.names = TRUE)) {
## if (exists(o, envir = envir, inherits = FALSE))
## warning(gettextf("an object named %s already exists and will not be overwritten",
## sQuote(o)))
## else assign(o, get(o, envir = tmp_env, inherits = FALSE),
## envir = envir)
## }
## rm(tmp_env)
## }
## }
## invisible(names)
## }
## <bytecode: 0x1307acb00>
## <environment: namespace:utils>
Apply the following dplyr verbs to your data
Filter rows
filter(dog, length == "Short") %>%
select(breed, length)
## # A tibble: 86 × 2
## breed length
## <chr> <chr>
## 1 Retrievers (Labrador) Short
## 2 French Bulldogs Short
## 3 Bulldogs Short
## 4 Beagles Short
## 5 Rottweilers Short
## 6 Pointers (German Shorthaired) Short
## 7 Dachshunds Short
## 8 Pembroke Welsh Corgis Short
## 9 Boxers Short
## 10 Great Danes Short
## # ℹ 76 more rows
Arrange rows
arrange(dog, desc(affectionate_with_family))
## # A tibble: 195 × 18
## column1 breed affectionate_with_fa…¹ good_with_young_chil…²
## <dbl> <chr> <dbl> <dbl>
## 1 1 Retrievers (Labrador) 5 5
## 2 2 French Bulldogs 5 5
## 3 3 German Shepherd Dogs 5 5
## 4 4 Retrievers (Golden) 5 5
## 5 6 Poodles 5 5
## 6 8 Rottweilers 5 3
## 7 9 Pointers (German Short… 5 5
## 8 10 Dachshunds 5 3
## 9 11 Pembroke Welsh Corgis 5 3
## 10 13 Yorkshire Terriers 5 5
## # ℹ 185 more rows
## # ℹ abbreviated names: ¹affectionate_with_family, ²good_with_young_children
## # ℹ 14 more variables: good_with_other_dogs <dbl>, shedding_level <dbl>,
## # coat_grooming_frequency <dbl>, drooling_level <dbl>, type <chr>,
## # length <chr>, openness_to_strangers <dbl>, playfulness_level <dbl>,
## # watchdog_protective_nature <dbl>, adaptability_level <dbl>,
## # trainability_level <dbl>, energy_level <dbl>, barking_level <dbl>, …
Select columns
select(dog, breed:good_with_other_dogs)
## # A tibble: 195 × 4
## breed affectionate_with_fa…¹ good_with_young_chil…² good_with_other_dogs
## <chr> <dbl> <dbl> <dbl>
## 1 Retriever… 5 5 5
## 2 French Bu… 5 5 4
## 3 German Sh… 5 5 3
## 4 Retriever… 5 5 5
## 5 Bulldogs 4 3 3
## 6 Poodles 5 5 3
## 7 Beagles 3 5 5
## 8 Rottweile… 5 3 3
## 9 Pointers … 5 5 4
## 10 Dachshunds 5 3 4
## # ℹ 185 more rows
## # ℹ abbreviated names: ¹affectionate_with_family, ²good_with_young_children
Add columns
mutate(dog,
fun = affectionate_with_family - good_with_other_dogs) %>%
#Select affectionate_with_family, good_with_young_children, good_with_other_dogs, and fun
select(affectionate_with_family:good_with_other_dogs, fun)
## # A tibble: 195 × 4
## affectionate_with_family good_with_young_children good_with_other_dogs fun
## <dbl> <dbl> <dbl> <dbl>
## 1 5 5 5 0
## 2 5 5 4 1
## 3 5 5 3 2
## 4 5 5 5 0
## 5 4 3 3 1
## 6 5 5 3 2
## 7 3 5 5 -2
## 8 5 3 3 2
## 9 5 5 4 1
## 10 5 3 4 1
## # ℹ 185 more rows
Summarize by groups
dog %>%
# Group by dogs
group_by(breed) %>%
# Calculate average energy level
summarise(energy = mean(energy_level)) %>%
#Sort it
arrange(energy)
## # A tibble: 195 × 2
## breed energy
## <chr> <dbl>
## 1 Plott Hounds 0
## 2 Basset Hounds 2
## 3 Neapolitan Mastiffs 2
## 4 Affenpinschers 3
## 5 Airedale Terriers 3
## 6 American Hairless Terriers 3
## 7 American Staffordshire Terriers 3
## 8 Anatolian Shepherd Dogs 3
## 9 Azawakhs 3
## 10 Barbets 3
## # ℹ 185 more rows