Import your data

data(flights)

flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
Coaster <- read_xlsx("../00_data/MyData.xlsx")

Coaster %>% skimr::skim()
Data summary
Name Piped data
Number of rows 8351
Number of columns 23
_______________________
Column type frequency:
character 16
numeric 6
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
acc_state 0 1.00 2 2 0 40 0
acc_city 118 0.99 4 20 0 674 0
fix_port 0 1.00 1 1 0 3 0
source 0 1.00 12 57 0 30 0
bus_type 0 1.00 4 29 0 17 0
industry_sector 0 1.00 7 14 0 4 0
device_category 0 1.00 7 23 0 21 0
device_type 0 1.00 4 26 0 91 0
tradename_or_generic 0 1.00 4 32 0 407 0
manufacturer 3310 0.60 2 40 0 253 0
gender 728 0.91 1 1 0 4 0
acc_desc 3 1.00 4 1258 0 8023 0
injury_desc 10 1.00 4 367 0 3985 0
report 8273 0.01 77 86 0 77 0
category 0 1.00 5 54 0 49 0
notes 8290 0.01 9 678 0 41 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
acc_id 0 1.00 1.005e+06 3126.04 920315 1002160 1005414 1007676 1009907 ▁▁▁▁▇
num_injured 2 1.00 1.050e+00 0.71 0 1 1 1 30 ▇▁▁▁▁
age_youngest 684 0.92 2.460e+01 18.28 0 10 18 38 92 ▇▃▃▁▁
mechanical 7977 0.04 1.000e+00 0.00 1 1 1 1 1 ▁▁▇▁▁
op_error 8192 0.02 1.000e+00 0.00 1 1 1 1 1 ▁▁▇▁▁
employee 8306 0.01 1.000e+00 0.00 1 1 1 1 1 ▁▁▇▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
acc_date 0 1 2010-06-12 2017-07-26 2014-06-28 1845

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

Number_of_columns <- function(x) {
    ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

return(ncol_num)
    
}
numerical_columns <- Number_of_columns(flights)

numerical_columns
## [1] 14

Adding arguments for details of operation

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 58665

Turn them into a function

nrows <- function(x){nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

return(nrow_num)
}
Number_of_UA_flights <- nrows()

Example 3: count rows

code snippets

nrows_num_ex <- Coaster %>%
    select(where(is.numeric)) %>%
    nrow()

nrow_num
## [1] 58665
nrow_num_ex2 <- Coaster %>%
    filter(acc_state == "NH") %>%
    nrow()

nrow_num_ex2
## [1] 211

Turn them into a function

Number_Accidents_in_NH <- function(x) {
    
    nrow_num_ex2 <- Coaster %>%
    filter(acc_state == "NH") %>%
    nrow()
    
    return(nrow_num_ex2)
}
Accidents_in_NH <- Number_Accidents_in_NH(Coaster)

Accidents_in_NH
## [1] 211