Import your data

setwd("~/Desktop/PSU_DAT3000_IntroToDA/05_module8/data/")
data <- read_excel("My_Data.xlsx")
data
## # A tibble: 1,302 × 9
##    Language   Endonym `World Region` Country `Global Speakers` `Language Family`
##    <chr>      <chr>   <chr>          <chr>               <dbl> <chr>            
##  1 Abakuá     Abakuá  Caribbean      "Cuba"                 NA <NA>             
##  2 Abaza      Абаза   Western Asia   "Turke…             49800 Abkhaz-Adyge     
##  3 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  4 Abruzzese… Abruzz… Southern Euro… "Italy"                NA Indo-European    
##  5 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  6 Acehnese   Bahsa … Southeastern … "Indon…           3500000 Austronesian     
##  7 Adjoukrou  <NA>    Western Africa "Ivory…            140000 Atlantic-Congo   
##  8 Adyghe     <NA>    Western Asia   "Turke…            117500 Abkhaz-Adyge     
##  9 Afenmai    Afenmai Western Africa "Niger…            270000 Atlantic-Congo   
## 10 African-A… Black … Northern Amer… "Unite…          45109521 Indo-European    
## # ℹ 1,292 more rows
## # ℹ 3 more variables: Location <chr>, Size <chr>, Status <chr>
data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 1302
Number of columns 9
_______________________
Column type frequency:
character 8
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Language 0 1.00 2 29 0 749 0
Endonym 168 0.87 2 27 0 635 0
World Region 0 1.00 9 25 0 22 0
Country 0 1.00 4 59 0 354 0
Language Family 8 0.99 4 23 0 49 0
Location 0 1.00 4 41 0 259 0
Size 0 1.00 5 8 0 5 0
Status 0 1.00 8 11 0 5 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Global Speakers 176 0.86 30428972 105984908 100 346647.5 2408000 15870835 1116596640 ▇▁▁▁▁

Create Data frame functions

Example 1: count columns

code snippets

ncol_num <- data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()

ncol_num
## [1] 1

Turn them into a function

count_ncol_numeric <- function(.data) {
    
    # body
    ncol_num <- .data %>%
    
    # Select a type of variables
    select(where(is.numeric)) %>%
    
    # Count columns
    ncol()
    
    # return the new variable
    return(ncol_num)
    
}

data %>% count_ncol_numeric()
## [1] 1
data %>% .[1:10, -1:-3] %>% count_ncol_numeric()
## [1] 1

Adding arguments for details of operation

count_ncol_type <- function(.data, type_data = "numeric") {
    
    # if statement for type of variable
    if(type_data == "numeric") {
        # body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.numeric)) %>%
            
            # Count columns
            ncol()
    } else if (type_data == "character") {
        # body
        ncol_type <- .data %>%
            
            # Select a type of variables
            select(where(is.character)) %>%
            
            # Count columns
            ncol()
    }
    
    # return the new variable
    return(ncol_type)
    
}

data %>% count_ncol_type()
## [1] 1
data %>% count_ncol_type(type_data = "character")
## [1] 8
data %>% .[1:10, 1:9] %>% count_ncol_type(type_data = "character")
## [1] 8

Example 2: count rows

code snippets

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(Status == "Historical") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 84

Turn them into a function

count_num_data_by_status <- function(.data, status_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(Status == status_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

data %>% .[1:10, "Status"] %>%
count_num_data_by_status(status_name = "Historical")
## [1] 1

Example 3: count rows

Create your own.

code snippets

Use the filter() function to select rows that meet a condition. Refer to Chapter 5.2 Filter rows with filter()

nrow_num <- data %>%
    
    # filter rows that meet a condition
    filter(Status == "Historical") %>%
    
    # Count rows
    nrow()

nrow_num
## [1] 84

Turn them into a function

count_num_data_by_status <- function(.data, status_name) {
    
    # body
    nrow_num <- .data %>%
        
        # filter rows that meet a condition
        filter(Status == status_name) %>%
        
        # Count rows
        nrow()
    
    # return the new variable
    return(nrow_num)
}

data %>% .[1:10, "Status"] %>%
count_num_data_by_status(status_name = "Historical")
## [1] 1