# Load packages
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(nycflights13)
# Core
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::intersect() masks base::intersect()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::setdiff() masks base::setdiff()
## ✖ lubridate::union() masks base::union()
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
##
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(broom)
Functions
When should you write a function
# for reproducable work
set.seed(1234)
# create a data frame
df <- tibble::tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
# Rescale each column
df$a <- (df$a - min(df$a, na.rm = TRUE)) /
(max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$b, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
df
## # A tibble: 10 × 4
## a b c d
## <dbl> <dbl> <dbl> <dbl>
## 1 0.332 0.153 0.782 1
## 2 0.765 0 0.473 0.519
## 3 1 0.0651 0.498 0.448
## 4 0 0.311 0.943 0.511
## 5 0.809 0.573 0.373 0.168
## 6 0.831 0.260 0 0.308
## 7 0.516 0.143 1 0
## 8 0.524 0.0255 0.210 0.256
## 9 0.519 0.0472 0.708 0.575
## 10 0.424 1 0.253 0.522
square <- function(var) {
# body
squared_value <- var * var
# return value
return(squared_value)
}
rescale <- function(x){
# body
x <- (x - min(x, na.rm = TRUE)) /
(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
# return values
return(x)
}
df$a <- rescale(df$a)
df$b <- rescale(df$b)
df$c <- rescale(df$c)
df$d <- rescale(df$d)
Functions are for humans and computors
Conditional execution
detect_sign <- function(x) {
if(x > 0) {
message("value is positive")
print(x)
} else if(x == 0) {
warning("value is not positive, but can be accepted")
print(x)
} else {
stop("value is negative, the function must stop")
print(x)
}
}
3 %>% detect_sign
## value is positive
## [1] 3
0 %>% detect_sign
## Warning in detect_sign(.): value is not positive, but can be accepted
## [1] 0
#-1 %>% detect_sign
Function arguments
x <- c(1:10, 100, NA)
x %>% mean()
## [1] NA
x %>% mean(na.rm = TRUE)
## [1] 14.09091
x %>% mean(na.rm = TRUE, trim = 0.1)
## [1] 6
mean_remove_na <- function(x, na.rm = TRUE,...) {
avg <- mean(x, na.rm = na.rm, ...)
return(avg)
}
x %>% mean_remove_na()
## [1] 14.09091
x %>% mean_remove_na(na.rm = FALSE)
## [1] NA
x %>% mean_remove_na(trim = 0.1)
## [1] 6