Import your data

data <- read_excel("../00_data/Apply_1.xlsx")

data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 1155
Number of columns 13
_______________________
Column type frequency:
character 8
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
movie_name 0 1 2 43 0 830 0
director 0 1 3 31 0 510 0
actor_1_name 0 1 6 22 0 567 0
actor_2_name 0 1 7 27 0 647 0
character_1_gender 0 1 3 5 0 2 0
character_2_gender 0 1 3 5 0 2 0
actor_1_birthdate 0 1 10 10 0 562 0
actor_2_birthdate 0 1 10 10 0 640 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
release_year 0 1 2000.80 16.37 1935 1997 2004 2012 2022 ▁▁▁▆▇
age_difference 0 1 10.42 8.51 0 4 8 15 52 ▇▃▂▁▁
couple_number 0 1 1.40 0.75 1 1 1 2 7 ▇▁▁▁▁
actor_1_age 0 1 40.64 10.42 18 33 39 47 81 ▂▇▅▂▁
actor_2_age 0 1 30.21 7.50 17 25 29 34 68 ▇▇▂▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 5

Turn them into a function

count_ncol_numeric <- function(.data) { 
   
    # Body 
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    # Return the new variable
    return(ncol_num)
    
}

data %>% count_ncol_numeric()
## [1] 5
data %>% .[1:10, 1:13]
## # A tibble: 10 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    
    # if statement for type of variables 
        if(type_data == "numeric") {
            # body
           ncol_type <- .data %>%
                
                # Select a type of variables
                select(where(is.numeric)) %>%
                
                # Count columns
                ncol()
        } else if (type_data == "character") {
        
            # body
            ncol_type <- .data %>%
            
                # Select a type of variables
                select(where(is.character)) %>% 
                    
                # Count columns
                    ncol()
        
        }
            
        # return new variable 
        return(ncol_type)
    
}

data %>% count_ncol_type()
## [1] 5
data %>% count_ncol_type(type_data = "numeric")
## [1] 5
data %>% count_ncol_type(type_data = "character")
## [1] 8
data %>% .[1:10, 1:6] %>% count_ncol_type(type_data = "character")
## [1] 3

Example 2: count rows

code snippets

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(actor_1_age == "43") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 27

Turn them into a function

count_type_actor <- function(.data, actor_1_age) {
    
    # body
        nrow_num <- data %>%
        
        # filter rows that meet a condition
        filter(actor_1_age == "43") %>%
        
        # Count rows
        nrow()
        
    # return new variable 
     return(nrow_num)  
    
}
data %>% .[1.10, "43"] %>% 
    count_type_actor(actor_1_age = "43")
## [1] 27

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

Turn them into a function