scooby <- read_excel("../00_data/MyData.xlsx")
scooby %>% skimr::skim()
| Name | Piped data |
| Number of rows | 603 |
| Number of columns | 75 |
| _______________________ | |
| Column type frequency: | |
| character | 58 |
| logical | 9 |
| numeric | 7 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| series_name | 0 | 1 | 4 | 42 | 0 | 29 | 0 |
| network | 0 | 1 | 3 | 20 | 0 | 11 | 0 |
| season | 0 | 1 | 1 | 9 | 0 | 7 | 0 |
| title | 0 | 1 | 4 | 76 | 0 | 602 | 0 |
| format | 0 | 1 | 5 | 21 | 0 | 5 | 0 |
| monster_name | 0 | 1 | 4 | 207 | 0 | 482 | 0 |
| monster_gender | 0 | 1 | 4 | 102 | 0 | 44 | 0 |
| monster_type | 0 | 1 | 4 | 166 | 0 | 133 | 0 |
| monster_subtype | 0 | 1 | 3 | 187 | 0 | 270 | 0 |
| monster_species | 0 | 1 | 3 | 134 | 0 | 195 | 0 |
| monster_real | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| caught_fred | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| caught_daphnie | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| caught_velma | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| caught_shaggy | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| caught_scooby | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| captured_fred | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| captured_daphnie | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| captured_velma | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| captured_shaggy | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| captured_scooby | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| unmask_fred | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| unmask_daphnie | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| unmask_velma | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| unmask_shaggy | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| unmask_scooby | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| snack_fred | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| snack_daphnie | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| snack_velma | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| snack_shaggy | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| snack_scooby | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| trap_work_first | 0 | 1 | 2 | 5 | 0 | 4 | 0 |
| setting_terrain | 0 | 1 | 3 | 8 | 0 | 15 | 0 |
| setting_country_state | 0 | 1 | 4 | 16 | 0 | 79 | 0 |
| non_suspect | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| arrested | 0 | 1 | 4 | 5 | 0 | 3 | 0 |
| culprit_name | 0 | 1 | 2 | 169 | 0 | 434 | 0 |
| culprit_gender | 0 | 1 | 4 | 64 | 0 | 24 | 0 |
| motive | 0 | 1 | 4 | 16 | 0 | 28 | 0 |
| if_it_wasnt_for | 0 | 1 | 3 | 116 | 0 | 108 | 0 |
| and_that | 0 | 1 | 3 | 80 | 0 | 65 | 0 |
| number_of_snacks | 0 | 1 | 1 | 17 | 0 | 19 | 0 |
| split_up | 0 | 1 | 1 | 4 | 0 | 4 | 0 |
| another_mystery | 0 | 1 | 1 | 4 | 0 | 5 | 0 |
| set_a_trap | 0 | 1 | 1 | 4 | 0 | 4 | 0 |
| jeepers | 0 | 1 | 1 | 4 | 0 | 11 | 0 |
| jinkies | 0 | 1 | 1 | 4 | 0 | 14 | 0 |
| my_glasses | 0 | 1 | 1 | 4 | 0 | 4 | 0 |
| just_about_wrapped_up | 0 | 1 | 1 | 4 | 0 | 3 | 0 |
| zoinks | 0 | 1 | 1 | 4 | 0 | 18 | 0 |
| groovy | 0 | 1 | 1 | 4 | 0 | 5 | 0 |
| scooby_doo_where_are_you | 0 | 1 | 1 | 4 | 0 | 6 | 0 |
| rooby_rooby_roo | 0 | 1 | 1 | 4 | 0 | 9 | 0 |
| fred_va | 0 | 1 | 4 | 18 | 0 | 6 | 0 |
| daphnie_va | 0 | 1 | 4 | 26 | 0 | 10 | 0 |
| velma_va | 0 | 1 | 4 | 19 | 0 | 13 | 0 |
| shaggy_va | 0 | 1 | 4 | 15 | 0 | 8 | 0 |
| scooby_va | 0 | 1 | 4 | 12 | 0 | 6 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| unmask_other | 0 | 1 | 0.06 | FAL: 568, TRU: 35 |
| caught_other | 0 | 1 | 0.14 | FAL: 519, TRU: 84 |
| caught_not | 0 | 1 | 0.05 | FAL: 572, TRU: 31 |
| door_gag | 0 | 1 | 0.10 | FAL: 544, TRU: 59 |
| batman | 0 | 1 | 0.01 | FAL: 599, TRU: 4 |
| scooby_dum | 0 | 1 | 0.03 | FAL: 586, TRU: 17 |
| scrappy_doo | 0 | 1 | 0.27 | FAL: 438, TRU: 165 |
| hex_girls | 0 | 1 | 0.01 | FAL: 597, TRU: 6 |
| blue_falcon | 0 | 1 | 0.05 | FAL: 570, TRU: 33 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| index | 0 | 1.00 | 302.00 | 174.22 | 1.0 | 151.5 | 302.0 | 452.50 | 603.0 | ▇▇▇▇▇ |
| imdb | 15 | 0.98 | 7.28 | 0.73 | 4.2 | 6.9 | 7.3 | 7.70 | 9.6 | ▁▁▇▆▁ |
| engagement | 15 | 0.98 | 580.33 | 4807.92 | 7.0 | 27.0 | 54.5 | 128.25 | 100951.0 | ▇▁▁▁▁ |
| run_time | 0 | 1.00 | 23.52 | 17.21 | 4.0 | 12.0 | 22.0 | 23.00 | 94.0 | ▇▃▁▁▁ |
| monster_amount | 0 | 1.00 | 1.75 | 2.29 | 0.0 | 1.0 | 1.0 | 2.00 | 19.0 | ▇▁▁▁▁ |
| suspects_amount | 0 | 1.00 | 2.85 | 2.62 | 0.0 | 1.0 | 3.0 | 4.00 | 20.0 | ▇▂▁▁▁ |
| culprit_amount | 0 | 1.00 | 1.04 | 1.07 | 0.0 | 0.0 | 1.0 | 1.00 | 11.0 | ▇▁▁▁▁ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date_aired | 0 | 1 | 1969-09-13 | 2021-02-25 | 1988-09-10 | 448 |
ncol_num <- scooby %>%
# Select a type of variables
select(where(is.numeric)) %>%
# Count columns
ncol()
ncol_num
## [1] 7
count_ncol_numeric <- function(.scooby) {
#body
ncol_num <- scooby %>%
#select a type of variable
select(where(is.numeric)) %>%
#count columns
ncol()
#return the new variables
return(ncol_num)
}
scooby %>% count_ncol_numeric()
## [1] 7
scooby %>% .[1:10, -1:13] %>% count_ncol_numeric()
## [1] 7
count_ncol_type <- function(.scooby, type_scooby = "numeric") {
# if statement for type of variables
if(type_scooby == "numeric") {
# body
ncol_type <- .scooby %>%
# select a type of variables
select(where(is.numeric)) %>%
# count columns
ncol()
} else if(type_scooby == "character") {
# body
ncol_type <- .scooby %>%
# select a type of variables
select(where(is.character)) %>%
# count columns
ncol()
}
# return the new variable
return(ncol_type)
}
scooby %>% count_ncol_type()
## [1] 7
scooby %>% count_ncol_type(type_scooby = "character")
## [1] 58
scooby %>% .[1:10, 1:5] %>% count_ncol_type(type_scooby = "character")
## [1] 4
nrow_num <- scooby %>%
# filter rows that meet a condition
filter(network == "Warner Home Video") %>%
# Count rows
nrow()
nrow_num
## [1] 39
count_num_episodes_by_network <- function(.scooby, network_name) {
# body
nrow_num <- .scooby %>%
# filter rows that meet a condition
filter(network == network_name) %>%
# Count rows
nrow()
# return the new variable
return(nrow_num)
}
scooby %>% .[1:500, "network"] %>%
count_num_episodes_by_network(network_name = "Warner Home Video")
## [1] 29
Create your own.
Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()
nrow_num <- scooby %>%
# filter rows that meet a condition
filter(monster_type == "Undead") %>%
# Count rows
nrow()
nrow_num
## [1] 37
count_num_monster_type <- function(.scooby, monster_type_name) {
# body
nrow_num <- .scooby %>%
# filter rows that meet a condition
filter(monster_type == monster_type_name) %>%
# Count rows
nrow()
# return the new variable
return(nrow_num)
}
scooby %>% .[1:10, "monster_type"] %>%
count_num_monster_type(monster_type_name = "monster_type")
## [1] 0