Module 12: Apply it to your data 11

Import your data

setwd("~/Desktop/PSU_DAT3000_IntroToDA/05_module8/data/")
data <- read_excel("My_Data.xlsx")
data

## # A tibble: 1,302 × 9
##    Language   Endonym `World Region` Country `Global Speakers` `Language Family`
##    <chr>      <chr>   <chr>          <chr>               <dbl> <chr>            
##  1 Abakuá     Abakuá  Caribbean      "Cuba"                 NA <NA>             
##  2 Abaza      Абаза   Western Asia   "Turke…             49800 Abkhaz-Adyge     
##  3 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  4 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  5 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  6 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  7 Adjoukrou  <NA>    Western Africa "Ivory…            140000 Atlantic-Congo   
##  8 Adyghe     <NA>    Western Asia   "Turke…            117500 Abkhaz-Adyge     
##  9 Afenmai    Afenmai Western Africa "Niger…            270000 Atlantic-Congo   
## 10 African-A… Black … Northern Amer… "Unite…          45109521 Indo-European    
## # ℹ 1,292 more rows
## # ℹ 3 more variables: Location <chr>, Size <chr>, Status <chr>

data %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	1302
Number of columns	9
_______________________
Column type frequency:
character	8
numeric	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
Language	0	1.00	2	29	749
Endonym	168	0.87	2	27	635
World Region	0	1.00	9	25	22
Country	0	1.00	4	59	354
Language Family	8	0.99	4	23	49
Location	0	1.00	4	41	259
Size	0	1.00	5	8	5
Status	0	1.00	8	11	5

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Global Speakers	176	0.86	30428972	105984908	100	346647.5	2408000	15870835	1116596640	▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num

## [1] 1

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    # return the new variable
    return(ncol_num)
    
}

data %>% count_ncol_numeric()

## [1] 1

data %>% .[1:10, -1:-3] %>% count_ncol_numeric()

## [1] 1

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variable
    if(type_data == "numeric") {
        # body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    } else if (type_data == "character") {
        # body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()
    }
    
    # return the new variable
    return(ncol_type)
    
}

data %>% count_ncol_type()

## [1] 1

data %>% count_ncol_type(type_data = "character")

## [1] 8

data %>% .[1:10, 1:9] %>% count_ncol_type(type_data = "character")

## [1] 8

Example 2: count rows

code snippets

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(Status == "Historical") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 84

Turn them into a function

count_num_data_by_status <- function(.data, status_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(Status == status_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

data %>% .[1:10, "Status"] %>%
count_num_data_by_status(status_name = "Historical")

## [1] 1

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(Status == "Historical") %>%
    
    # Count rows
    nrow()

nrow_num

## [1] 84

Turn them into a function

count_num_data_by_status <- function(.data, status_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(Status == status_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

data %>% .[1:10, "Status"] %>%
count_num_data_by_status(status_name = "Historical")

## [1] 1

Module 12: Apply it to your data 11

Chapter 19 Functions

Alex Lenfest

Import your data

Create Data frame functions

Example 1: count columns

code snippets

Turn them into a function

Adding arguments for details of operation

Example 2: count rows

code snippets

Turn them into a function

Example 3: count rows

code snippets

Turn them into a function