Import your data

data(flights)
data(penguins)

# Preview both datasets
skim(flights)
Data summary
Name flights
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
skim(penguins)
Data summary
Name penguins
Number of rows 344
Number of columns 8
_______________________
Column type frequency:
factor 3
numeric 5
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
species 0 1.00 FALSE 3 Ade: 152, Gen: 124, Chi: 68
island 0 1.00 FALSE 3 Bis: 168, Dre: 124, Tor: 52
sex 11 0.97 FALSE 2 mal: 168, fem: 165

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
bill_length_mm 2 0.99 43.92 5.46 32.1 39.23 44.45 48.5 59.6 ▃▇▇▆▁
bill_depth_mm 2 0.99 17.15 1.97 13.1 15.60 17.30 18.7 21.5 ▅▅▇▇▂
flipper_length_mm 2 0.99 200.92 14.06 172.0 190.00 197.00 213.0 231.0 ▂▇▃▅▂
body_mass_g 2 0.99 4201.75 801.95 2700.0 3550.00 4050.00 4750.0 6300.0 ▃▇▆▃▂
year 0 1.00 2008.03 0.82 2007.0 2007.00 2008.00 2009.0 2009.0 ▇▁▇▁▇

Create Data frame functions

Example 1: Count numeric columns

Code snippet

ncol_num <- flights %>%
  select(where(is.numeric)) %>%
  ncol()

ncol_num
## [1] 14

Function

count_num_cols <- function(.data) {
  num_cols <- .data %>%
    select(where(is.numeric)) %>%
    ncol()
  return(num_cols)
}

# Test
count_num_cols(flights)
## [1] 14

Function with argument for type

count_cols_by_type <- function(.data, type = "numeric") {
  if (type == "numeric") {
    num_cols <- .data %>% select(where(is.numeric)) %>% ncol()
    return(num_cols)
  } else if (type == "character") {
    char_cols <- .data %>% select(where(is.character)) %>% ncol()
    return(char_cols)
  }
}

# Test
count_cols_by_type(flights)
## [1] 14
count_cols_by_type(flights, type = "character")
## [1] 4

Example 2: Count rows by carrier

Code snippet

nrow_num <- flights %>%
  filter(carrier == "UA") %>%
  nrow()

nrow_num
## [1] 58665

Function

count_rows_by_carrier <- function(.data, carrier_name) {
  row_count <- .data %>%
    filter(carrier == carrier_name) %>%
    nrow()
  return(row_count)
}

# Test
count_rows_by_carrier(flights, "UA")
## [1] 58665
count_rows_by_carrier(flights, "AA")
## [1] 32729

Example 3: Your own function – Count penguins by species

Code snippet

penguins %>%
  filter(species == "Adelie") %>%
  nrow()
## [1] 152

Function

count_penguins_by_species <- function(.data, species_name) {
  .data %>%
    filter(species == species_name) %>%
    nrow()
}

# Test
count_penguins_by_species(penguins, "Adelie")
## [1] 152
count_penguins_by_species(penguins, "Gentoo")
## [1] 124