Module 12: Apply it to your data 11

Import your data

data <- read.csv("../00_data/myData.csv")

data %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	691
Number of columns	22
_______________________
Column type frequency:
character	8
numeric	14
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
sort_name	0	1.00	1	34	391
clean_name	0	1.00	1	34	386
album	0	1.00	1	69	685
genre	164	0.76	5	35	16
type	0	1.00	4	13	5
spotify_url	36	0.95	22	36	655
artist_gender	5	0.99	4	11	3
album_id	0	1.00	6	22	691

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
X	0	1.00	346.00	199.62	1	173.50	346.0	518.50	691	▇▇▇▇▇
rank_2003	191	0.72	250.50	144.49	1	125.75	250.5	375.25	500	▇▇▇▇▇
rank_2012	191	0.72	250.50	144.48	1	125.75	250.5	375.25	500	▇▇▇▇▇
rank_2020	191	0.72	250.50	144.48	1	125.75	250.5	375.25	500	▇▇▇▇▇
differential	0	1.00	-12.32	199.04	-501	-137.50	-8.0	106.00	484	▂▅▇▃▂
release_year	0	1.00	1982.87	14.55	1955	1971.00	1979.0	1994.00	2019	▂▇▃▃▂
weeks_on_billboard	119	0.83	64.27	75.14	1	20.75	44.5	81.00	741	▇▁▁▁▁
peak_billboard_position	0	1.00	61.19	77.16	1	2.00	17.0	111.50	201	▇▁▁▁▂
spotify_popularity	37	0.95	55.81	14.95	10	46.00	57.0	68.00	91	▁▃▇▇▂
artist_member_count	5	0.99	2.75	2.02	1	1.00	2.0	4.00	12	▇▅▁▁▁
artist_birth_year_sum	5	0.99	5363.21	3947.13	1910	1948.00	3896.0	7845.00	23368	▇▅▁▁▁
debut_album_release_year	5	0.99	1976.87	14.96	1934	1966.25	1973.0	1989.00	2019	▁▇▇▅▂
ave_age_at_top_500	5	0.99	29.61	9.35	17	24.04	27.0	31.00	88	▇▂▁▁▁
years_between	5	0.99	5.93	8.42	0	1.00	3.0	7.00	54	▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- flights %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num

## [1] 14

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
     ncol_num <- .data %>%
        
        # Select a type of variables
        select(where(is.numeric)) %>%
        
        # Count columns
        ncol()
     
     # return the new variable
     return(ncol_num)
        
}

flights %>% count_ncol_numeric()

## [1] 14

flights %>% .[1:10, -1:-13] %>% count_ncol_numeric()

## [1] 4

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variables
        if(type_data == "numeric") {
             # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    
        } else if(type_data == "character") {
               # body
         ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()
          }
    
   
     # return the new variable
     return(ncol_type)
        
}

flights %>% count_ncol_type()

## [1] 14

flights %>% count_ncol_type(type_data = "character")

## [1] 4

flights %>% .[1:10, 1:5] %>% count_ncol_type(type_data = "character")

## [1] 0

Example 2: count rows

code snippets

nrow_num <- flights %>%
    
    # filter rows that meet a condition
    filter(carrier == "DL") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 48110

Turn them into a function

count_num_flights_by_carrier <- function(.data, carrier_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(carrier == carrier_name) %>%
        
        # Count rows
        nrow()    
    
    # return the new variable
    return(nrow_num)
    
    
}

flights %>% .[1:10, "carrier"] %>% count_num_flights_by_carrier(carrier_name = "UA")

## [1] 3

Import your data

data <- read.csv("../00_data/myData.csv")

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(type == "Studio") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 608

Turn them into a function

count_num_studio_by_type <- function(data, type) {
    nrow_num <- data %>%
        filter(type == studio) %>%
    nrow()  
    
    return(nrow_num)
}

Module 12: Apply it to your data 11

Chapter 19 Functions

Katelynn Goy

Import your data

Create Data frame functions

Example 1: count columns

code snippets

Turn them into a function

Adding arguments for details of operation

Example 2: count rows

code snippets

Turn them into a function

Import your data

Example 3: count rows

code snippets

Turn them into a function