Module 12: Apply it to your data 11

Import your data

data <- read_excel("../00_data/Apply_1.xlsx")

data %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	1155
Number of columns	13
_______________________
Column type frequency:
character	8
numeric	5
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
movie_name	1	2	43	830
director	1	3	31	510
actor_1_name	1	6	22	567
actor_2_name	1	7	27	647
character_1_gender	1	3	5	2
character_2_gender	1	3	5	2
actor_1_birthdate	1	10	10	562
actor_2_birthdate	1	10	10	640

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
release_year	1	2000.80	16.37	1935	1997	2004	2012	2022	▁▁▁▆▇
age_difference	1	10.42	8.51	0	4	8	15	52	▇▃▂▁▁
couple_number	1	1.40	0.75	1	1	1	2	7	▇▁▁▁▁
actor_1_age	1	40.64	10.42	18	33	39	47	81	▂▇▅▂▁
actor_2_age	1	30.21	7.50	17	25	29	34	68	▇▇▂▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num

## [1] 5

Turn them into a function

count_ncol_numeric <- function(.data) { 
   
    # Body 
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    # Return the new variable
    return(ncol_num)
    
}

data %>% count_ncol_numeric()

## [1] 5

data %>% .[1:10, 1:13]

## # A tibble: 10 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    
    # if statement for type of variables 
        if(type_data == "numeric") {
            # body
           ncol_type <- .data %>%
                
                # Select a type of variables
                select(where(is.numeric)) %>%
                
                # Count columns
                ncol()
        } else if (type_data == "character") {
        
            # body
            ncol_type <- .data %>%
            
                # Select a type of variables
                select(where(is.character)) %>% 
                    
                # Count columns
                    ncol()
        
        }
            
        # return new variable 
        return(ncol_type)
    
}

data %>% count_ncol_type()

## [1] 5

data %>% count_ncol_type(type_data = "numeric")

## [1] 5

data %>% count_ncol_type(type_data = "character")

## [1] 8

data %>% .[1:10, 1:6] %>% count_ncol_type(type_data = "character")

## [1] 3

Example 2: count rows

code snippets

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(actor_1_age == "43") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 27

Turn them into a function

count_type_actor <- function(.data, actor_1_age) {
    
    # body
        nrow_num <- data %>%
        
        # filter rows that meet a condition
        filter(actor_1_age == "43") %>%
        
        # Count rows
        nrow()
        
    # return new variable 
     return(nrow_num)  
    
}
data %>% .[1.10, "43"] %>% 
    count_type_actor(actor_1_age = "43")

## [1] 27

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

Module 12: Apply it to your data 11

Chapter 19 Functions

Daniel Lee

Import your data

Create Data frame functions

Example 1: count columns

code snippets

Turn them into a function

Adding arguments for details of operation

Example 2: count rows

code snippets

Turn them into a function

Example 3: count rows

code snippets

Turn them into a function