Module 12: Apply it to your data 11

Import your data

data(flights)

flights %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	336776
Number of columns	19
_______________________
Column type frequency:
character	4
numeric	14
POSIXct	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
carrier	0	1.00	2	2	16
tailnum	2512	0.99	5	6	4043
origin	0	1.00	3	3	3
dest	0	1.00	3	3	105

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
year	0	1.00	2013.00	0.00	2013	2013	2013	2013	2013	▁▁▇▁▁
month	0	1.00	6.55	3.41	1	4	7	10	12	▇▆▆▆▇
day	0	1.00	15.71	8.77	1	8	16	23	31	▇▇▇▇▆
dep_time	8255	0.98	1349.11	488.28	1	907	1401	1744	2400	▁▇▆▇▃
sched_dep_time	0	1.00	1344.25	467.34	106	906	1359	1729	2359	▁▇▇▇▃
dep_delay	8255	0.98	12.64	40.21	-43	-5	-2	11	1301	▇▁▁▁▁
arr_time	8713	0.97	1502.05	533.26	1	1104	1535	1940	2400	▁▃▇▇▇
sched_arr_time	0	1.00	1536.38	497.46	1	1124	1556	1945	2359	▁▃▇▇▇
arr_delay	9430	0.97	6.90	44.63	-86	-17	-5	14	1272	▇▁▁▁▁
flight	0	1.00	1971.92	1632.47	1	553	1496	3465	8500	▇▃▃▁▁
air_time	9430	0.97	150.69	93.69	20	82	129	192	695	▇▂▂▁▁
distance	0	1.00	1039.91	733.23	17	502	872	1389	4983	▇▃▂▁▁
hour	0	1.00	13.18	4.66	1	9	13	17	23	▁▇▇▇▅
minute	0	1.00	26.23	19.30	0	8	29	44	59	▇▃▆▃▅

Variable type: POSIXct

skim_variable	n_missing	complete_rate	min	max	median	n_unique
time_hour	0	1	2013-01-01 05:00:00	2013-12-31 23:00:00	2013-07-03 10:00:00	6936

Data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-29/wcmatches.csv')

## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl   (3): year, home_score, away_score
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data

## # A tibble: 900 × 15
##     year country city    stage home_team away_team home_score away_score outcome
##    <dbl> <chr>   <chr>   <chr> <chr>     <chr>          <dbl>      <dbl> <chr>  
##  1  1930 Uruguay Montev… Grou… France    Mexico             4          1 H      
##  2  1930 Uruguay Montev… Grou… Belgium   United S…          0          3 A      
##  3  1930 Uruguay Montev… Grou… Brazil    Yugoslav…          1          2 A      
##  4  1930 Uruguay Montev… Grou… Peru      Romania            1          3 A      
##  5  1930 Uruguay Montev… Grou… Argentina France             1          0 H      
##  6  1930 Uruguay Montev… Grou… Chile     Mexico             3          0 H      
##  7  1930 Uruguay Montev… Grou… Bolivia   Yugoslav…          0          4 A      
##  8  1930 Uruguay Montev… Grou… Paraguay  United S…          0          3 A      
##  9  1930 Uruguay Montev… Grou… Uruguay   Peru               1          0 H      
## 10  1930 Uruguay Montev… Grou… Argentina Mexico             6          3 H      
## # ℹ 890 more rows
## # ℹ 6 more variables: win_conditions <chr>, winning_team <chr>,
## #   losing_team <chr>, date <date>, month <chr>, dayofweek <chr>

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num

## [1] 14

Turn them into a function

# Create a function to count # of a type of columns
count_numeric_var <- function(.data) {
    
    # body
    ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
    
    # return value
    return(ncol_num)
    
}

flights %>% count_numeric_var()

## [1] 14

flights %>% .[, -1:-13] %>% count_numeric_var()

## [1] 4

Adding arguments for details of operation

# Create a function to count # of a type of columns
count_type_of_var <- function(.data, type = "numeric") {
    
    # if statement for type of variables
    if(type == "numeric") {
        
        # body
        ncol_num <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
        
    } else if(type == "character") {
        
        # body
        ncol_num <- .data %>%
            
        # Select a type of variables
        select(where(is.character)) %>%
            
        # Count columns
        ncol()
        
    }
    
    
    # return value
    return(ncol_num)
    
}

flights %>% count_type_of_var(type = "character")

## [1] 4

flights %>% .[, -1:-13] %>% count_type_of_var(type = "character")

## [1] 1

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "UA") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 58665

Turn them into a function

# Create a function to count # of a type of columns
count_n_flights_by_carrier <- function(.data, carrier_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()
    
    nrow_num
    
    # return value
    return(nrow_num)
    
}

flights %>% count_n_flights_by_carrier(carrier_name = "UA")

## [1] 58665

flights %>% .[1:10, ] %>% count_n_flights_by_carrier(carrier_name = "UA")

## [1] 3

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_numb <- Data %>%
    
    # filter rows that meet a condition
    filter(country == "Brazil") %>%
    
    # Count rows
    nrow()

nrow_numb

## [1] 86

Turn them into a function

# Create a function to count # of a type of columns
count_n_games_by_brazil <- function(.Data, country) {
    
    # body
    nrow_numb <- .Data %>%
        
        # filter rows that meet a condition
        filter(country == country) %>%
        
        # Count rows
        nrow()
    
    nrow_numb
    
    # return value
    return(nrow_numb)
    
}

Data %>% count_n_games_by_brazil(country = "Brazil")

## [1] 900

Module 12: Apply it to your data 11

Chapter 19 Functions

Sondre Asheim

Import your data

Create Data frame functions

Example 1: count columns

code snippets

Turn them into a function

Adding arguments for details of operation

Example 2: count rows

code snippets

Turn them into a function

Example 3: count rows

code snippets

Turn them into a function