# Load packages

# Core
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 4.3.3

Functions

When should you right a function

# For reproductive work
set.seed(1234)
# creat a data frame
df <- tibble::tibble(
    a = rnorm(10),
    b = rnorm(10),
    c = rnorm(10),
    d = rnorm(10),
)
# Rescale each column 

df$a <- (df$a - min(df$a, na.rm = TRUE)) /
    (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
    (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
    (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
    (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
rescale <- function(x) {
    
    # body
    df$a <- (x - min(df$a, na.rm = TRUE)) /
        (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
    
    # return values
    return(x)
}
df$a <- rescale(df$a)
df$b <- rescale(df$b)
df$c <- rescale(df$c)
df$d <- rescale(df$d)

df
## # A tibble: 10 × 4
##        a       b       c      d
##    <dbl>   <dbl>   <dbl>  <dbl>
##  1 0.332 -0.477   0.134   1.10 
##  2 0.765 -0.998  -0.491  -0.476
##  3 1     -0.776  -0.441  -0.709
##  4 0      0.0645  0.460  -0.501
##  5 0.809  0.959  -0.694  -1.63 
##  6 0.831 -0.110  -1.45   -1.17 
##  7 0.516 -0.511   0.575  -2.18 
##  8 0.524 -0.911  -1.02   -1.34 
##  9 0.519 -0.837  -0.0151 -0.294
## 10 0.424  2.42   -0.936  -0.466

Functions are for humans and computers

Conditional execution

detect_sign <- function(x) {
    
    if(x > 0) {
        message("Value is positive")
        print(x)
    } else if(x == 0) {
        warning("Value is not posistive, but can be accepted")
        print(x)
    } else {
        stop("Value is negative, the function must stop")
        print(x)
    }
}

3 %>% detect_sign
## Value is positive
## [1] 3
0 %>% detect_sign
## Warning in detect_sign(.): Value is not posistive, but can be accepted
## [1] 0
#-1 %>% detect_sign

Function arguments

?mean
## starting httpd help server ... done
x <- c(1:10, 100, NA)
x
##  [1]   1   2   3   4   5   6   7   8   9  10 100  NA
x %>% mean()
## [1] NA
x %>% mean(na.rm = TRUE)
## [1] 14.09091
x %>% mean(na.rm = TRUE, trim = 0.1) 
## [1] 6
mean_remove_na <- function(x, na.rm = TRUE, ...) {
    
    avg <- mean(x, na.rm = na.rm, ...)
    
    return(avg)
    
}  

x %>% mean_remove_na()
## [1] 14.09091
x %>% mean_remove_na(na.rm = FALSE)
## [1] NA
x %>% mean_remove_na(trim = 0.1)
## [1] 6

two types of functions

Return values