Import your data

data(flights)
flights %>% skimr::skim()
Data summary
Name Piped data
Number of rows 336776
Number of columns 19
_______________________
Column type frequency:
character 4
numeric 14
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
carrier 0 1.00 2 2 0 16 0
tailnum 2512 0.99 5 6 0 4043 0
origin 0 1.00 3 3 0 3 0
dest 0 1.00 3 3 0 105 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2013.00 0.00 2013 2013 2013 2013 2013 ▁▁▇▁▁
month 0 1.00 6.55 3.41 1 4 7 10 12 ▇▆▆▆▇
day 0 1.00 15.71 8.77 1 8 16 23 31 ▇▇▇▇▆
dep_time 8255 0.98 1349.11 488.28 1 907 1401 1744 2400 ▁▇▆▇▃
sched_dep_time 0 1.00 1344.25 467.34 106 906 1359 1729 2359 ▁▇▇▇▃
dep_delay 8255 0.98 12.64 40.21 -43 -5 -2 11 1301 ▇▁▁▁▁
arr_time 8713 0.97 1502.05 533.26 1 1104 1535 1940 2400 ▁▃▇▇▇
sched_arr_time 0 1.00 1536.38 497.46 1 1124 1556 1945 2359 ▁▃▇▇▇
arr_delay 9430 0.97 6.90 44.63 -86 -17 -5 14 1272 ▇▁▁▁▁
flight 0 1.00 1971.92 1632.47 1 553 1496 3465 8500 ▇▃▃▁▁
air_time 9430 0.97 150.69 93.69 20 82 129 192 695 ▇▂▂▁▁
distance 0 1.00 1039.91 733.23 17 502 872 1389 4983 ▇▃▂▁▁
hour 0 1.00 13.18 4.66 1 9 13 17 23 ▁▇▇▇▅
minute 0 1.00 26.23 19.30 0 8 29 44 59 ▇▃▆▃▅

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time_hour 0 1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00 6936
data <- read_excel("../00_data/MyData-Charts.xlsx")
data
## # A tibble: 1,222 × 11
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2015 January-… Alab…    7000 7000       1800      26 2800    250     4      
##  2  2015 January-… Ariz…   35000 35000      4600      13 3400    2100    6      
##  3  2015 January-… Arka…   13000 14000      1500      11 1200    90      1      
##  4  2015 January-… Cali… 1440000 1690000  255000      15 250000  124000  7      
##  5  2015 January-… Colo…    3500 12500      1500      12 200     140     1      
##  6  2015 January-… Conn…    3900 3900        870      22 290     NA      NA     
##  7  2015 January-… Flor…  305000 315000    42000      13 54000   25000   8      
##  8  2015 January-… Geor…  104000 105000    14500      14 47000   9500    9      
##  9  2015 January-… Hawa…   10500 10500       380       4 3400    760     7      
## 10  2015 January-… Idaho   81000 88000      3700       4 2600    8000    9      
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## #   abbreviated variable names ¹​colony_n, ²​colony_max, ³​colony_lost,
## #   ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno, ⁷​colony_reno_pct
data_small <- read_excel("../00_data/Datasimpler.xlsx")

data %>% count(year)
## # A tibble: 7 × 2
##    year     n
##   <dbl> <int>
## 1  2015   188
## 2  2016   188
## 3  2017   188
## 4  2018   188
## 5  2019   188
## 6  2020   188
## 7  2021    94
data %>% count(state)
## # A tibble: 47 × 2
##    state           n
##    <chr>       <int>
##  1 Alabama        26
##  2 Arizona        26
##  3 Arkansas       26
##  4 California     26
##  5 Colorado       26
##  6 Connecticut    26
##  7 Florida        26
##  8 Georgia        26
##  9 Hawaii         26
## 10 Idaho          26
## # … with 37 more rows
data %>% count(months)
## # A tibble: 4 × 2
##   months               n
##   <chr>            <int>
## 1 April-June         329
## 2 January-March      329
## 3 July-September     282
## 4 October-December   282

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    # Body of Function                       
    ncol_num <- flights %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
    
    ncol_num

    #Return The New Variable
    return(ncol_num)
    
}
flights %>% count_ncol_numeric()
## [1] 14
flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()
## [1] 14

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    #If Statement 
    if(type_data == "numeric") {
        ncol_type <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()   
    } else if (type_data == "character") {
        # body
        ncol_num <- flights %>%
        
        # Select a type of variables
        select(where(is.Charachter)) %>%
        
        # Count columns
        ncol()
        
    }
    #Return The New Variable
    return(ncol_num)
    
}

flights %>% count_ncol_type()
## [1] 14
flights %>% count_ncol_type(type_data = "charachter")
## [1] 14
flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "charachter")
## [1] 14

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    #body
    nrow_num <- .data %>%
    
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
    #Return New variable
    return(nrow_num)
}

flights %>% .[1:10, "carrier"] %>% 
count_num_flights_by_carrier(carrier_name = "AA")    
## [1] 2

Example 3: count rows

code snippets

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(state == "New York") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 26

Turn them into a function

count_num_states_by_New_York <- function(.data, state_name) {
    
    # body
    nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(state == "New York") %>%
    
    # Count rows
    nrow()
    return(nrow_num)
}

data %>% .[1:10,1:5,"state"] %>%
count_num_states_by_New_York(state_name = "New York")
## [1] 26