search()

## [1] ".GlobalEnv"        "package:stats"     "package:graphics" 
## [4] "package:grDevices" "package:utils"     "package:datasets" 
## [7] "package:methods"   "Autoloads"         "package:base"

sum(1:10)

## [1] 55

sum <- 5 + 10

sum

## [1] 15

What is a free variable?

f <- function(x, y){
  x^2 + y / z
}

The function has two arguments : x and y
z is a free variable.

What is an environment?

An environment is a collection of (symbol, value) pairs, i.e., x is a symbol and 3.14 might be its value.
Every environment has parent environment and it is possible for an environment to multiple “children”.
Empty environment
closure or function closure: function together with an environment

How do we associate a value to a free variable?

# a function that returns another function: constructor function
make.power <- function(n){
  pow <- function(x){
    x^n
  }
  pow
}

cube <- make.power(3)
cube(2)

## [1] 8

cube(10)

## [1] 1000

square <- make.power(2)
square(11)

## [1] 121

cube

## function(x){
##     x^n
##   }
## <environment: 0x000001ffa3e0b388>

ls(environment(cube))

## [1] "n"   "pow"

get("n", environment(cube))

## [1] 3

Loop Functions

R has some functions which implement looping in a compact form

lapply() : Loop a list and evaluate a function on each element
sapply(): Same as lapply but try to simplify the result
apply() : Apply a function over the margins of an array
tapply() : Apply a function over a subsets of a vector
mapply() : Multivariate version of lapply.

An auxiliary function split().

`lapply()`

The lapply function does the following seris of operations:

It loops over a list, iterating over each element in that list.
It applies a function to each element of the list.
It returns a list.

The 3 arguments of lapply() * a list X; * a function FUN. * other arguments via its ... argument.

lapply

## function (X, FUN, ...) 
## {
##     FUN <- match.fun(FUN)
##     if (!is.vector(X) || is.object(X)) 
##         X <- as.list(X)
##     .Internal(lapply(X, FUN))
## }
## <bytecode: 0x000001ffa3ae25a0>
## <environment: namespace:base>

set.seed(45)
rnorm(10)

##  [1]  0.3407997 -0.7033403 -0.3795377 -0.7460474 -0.8981073 -0.3347941
##  [7] -0.5013782 -0.1745357  1.8090374 -0.2301050

set.seed(45)
x <- list(a = 1:5, b = rnorm(10))
x

## $a
## [1] 1 2 3 4 5
## 
## $b
##  [1]  0.3407997 -0.7033403 -0.3795377 -0.7460474 -0.8981073 -0.3347941
##  [7] -0.5013782 -0.1745357  1.8090374 -0.2301050

lapply(x, mean)

## $a
## [1] 3
## 
## $b
## [1] -0.1818009

lapply(x, sum)

## $a
## [1] 15
## 
## $b
## [1] -1.818009

lapply(x, range)

## $a
## [1] 1 5
## 
## $b
## [1] -0.8981073  1.8090374

runif(10)

##  [1] 0.12915002 0.31987266 0.58550179 0.66214577 0.89106978 0.06920426
##  [7] 0.94623103 0.74850150 0.65599262 0.21644297

runif(10, 50, 100)

##  [1] 69.62164 66.28225 74.27902 92.38420 72.01298 75.40058 99.99590 65.20489
##  [9] 52.46084 66.70704

set.seed(100)
y <- 1:4
lapply(y, runif)

## [[1]]
## [1] 0.3077661
## 
## [[2]]
## [1] 0.2576725 0.5523224
## 
## [[3]]
## [1] 0.05638315 0.46854928 0.48377074
## 
## [[4]]
## [1] 0.8124026 0.3703205 0.5465586 0.1702621

set.seed(100)
lapply(y, runif, min = 0, max = 10)

## [[1]]
## [1] 3.077661
## 
## [[2]]
## [1] 2.576725 5.523224
## 
## [[3]]
## [1] 0.5638315 4.6854928 4.8377074
## 
## [[4]]
## [1] 8.124026 3.703205 5.465586 1.702621

m <- list(a = matrix(1:4, 2, 2), 
          b = matrix(1:6, 3, 2))
m

## $a
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
## 
## $b
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6

# extract the first column of each matrix
lapply(m, function(mat){mat[ ,1]} )

## $a
## [1] 1 2
## 
## $b
## [1] 1 2 3

# extract the first column of each matrix
lapply(m, function(mat){matrix(mat[ ,2])} )

## $a
##      [,1]
## [1,]    3
## [2,]    4
## 
## $b
##      [,1]
## [1,]    4
## [2,]    5
## [3,]    6

`sapply()`

# extract the first column of each matrix
sapply(m, function(mat){matrix(mat[ ,2])} )

## $a
##      [,1]
## [1,]    3
## [2,]    4
## 
## $b
##      [,1]
## [1,]    4
## [2,]    5
## [3,]    6

set.seed(100)
lapply(y, runif, min = 0, max = 10)

## [[1]]
## [1] 3.077661
## 
## [[2]]
## [1] 2.576725 5.523224
## 
## [[3]]
## [1] 0.5638315 4.6854928 4.8377074
## 
## [[4]]
## [1] 8.124026 3.703205 5.465586 1.702621

set.seed(100)
z <- rep(4, 5)
sapply(z, runif, min = 0, max = 10)

##           [,1]     [,2]     [,3]     [,4]     [,5]
## [1,] 3.0776611 4.685493 5.465586 2.803538 2.046122
## [2,] 2.5767250 4.837707 1.702621 3.984879 3.575249
## [3,] 5.5232243 8.124026 6.249965 7.625511 3.594751
## [4,] 0.5638315 3.703205 8.821655 6.690217 6.902905

## [1] 4 4 4 4 4

sapply(z, range)

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    4    4    4    4    4
## [2,]    4    4    4    4    4

y <- list(a = 1:4,
          b = rnorm(10),
          c = rnorm(20, 1),
          d = rnorm(100, 5, 0.5))
y

## $a
## [1] 1 2 3 4
## 
## $b
##  [1]  0.08988614  0.09627446 -0.20163395  0.73984050  0.12337950 -0.02931671
##  [7] -0.38885425  0.51085626 -0.91381419  2.31029682
## 
## $c
##  [1]  0.5619100  1.7640606  1.2619613  1.7734046  0.1856209  0.5615494
##  [7]  0.2797784  1.2309445 -0.1577295  1.2470760  0.9088864  2.7573756
## [13]  0.8620704  0.8888065  0.3099857  0.7782058  1.1829077  1.4173233
## [19]  2.0654023  1.9702020
## 
## $d
##   [1] 4.949185 5.701602 4.111612 5.311434 4.738858 5.661115 4.818280 5.659533
##   [9] 5.021890 4.060672 4.776469 4.130701 5.089432 5.948733 3.864037 5.490232
##  [17] 4.300587 5.912436 5.690649 4.580574 4.869002 4.965578 4.810558 6.290979
##  [25] 5.064917 4.643488 5.318997 5.100846 4.965042 4.953755 5.224452 4.467822
##  [33] 4.418790 5.824261 3.968952 5.006375 4.456236 5.135270 5.504226 3.962798
##  [41] 5.448411 4.975002 4.327325 4.034394 5.354791 4.921047 5.108184 5.408681
##  [49] 5.863588 4.948115 4.721439 5.714151 4.553521 4.421214 4.734852 6.222841
##  [57] 4.583752 5.206760 4.410658 4.412983 4.833538 5.681557 4.765426 5.421438
##  [65] 4.271003 4.799847 4.611791 4.815352 5.620051 4.946283 5.086297 5.127301
##  [73] 4.692733 4.285392 4.834512 5.064193 5.509060 4.872213 4.848729 5.807595
##  [81] 4.613143 5.212001 4.708027 5.207518 4.227369 4.740625 4.860104 5.503729
##  [89] 4.765215 5.148949 4.791103 4.574810 5.344523 4.769902 5.674092 5.221536
##  [97] 4.924537 5.227774 4.979923 5.228061

lapply(y, range)

## $a
## [1] 1 4
## 
## $b
## [1] -0.9138142  2.3102968
## 
## $c
## [1] -0.1577295  2.7573756
## 
## $d
## [1] 3.864037 6.290979

M <- sapply(y, range)
M

##      a          b          c        d
## [1,] 1 -0.9138142 -0.1577295 3.864037
## [2,] 4  2.3102968  2.7573756 6.290979

rownames(M) <- c("lower", "upper")
M

##       a          b          c        d
## lower 1 -0.9138142 -0.1577295 3.864037
## upper 4  2.3102968  2.7573756 6.290979

`split()`

The split() function takes a vector or other objects and splits into groups determined by a factor or list of factors.

str(split)

## function (x, f, drop = FALSE, ...)

set.seed(12345)
x <- c(rnorm(10), runif(10), rnorm(10,1))
x

##  [1]  0.5855288  0.7094660 -0.1093033 -0.4534972  0.6058875 -1.8179560
##  [7]  0.6300986 -0.2761841 -0.2841597 -0.9193220  0.4537281  0.3267524
## [13]  0.9654153  0.7074819  0.6445426  0.3898285  0.6985436  0.5440579
## [19]  0.2264672  0.4845578  1.8168998  0.1136425  0.6684224  2.1207127
## [25]  1.2987237  1.7796219  2.4557851  0.3556716 -0.5531374 -0.5977095

# "generate levels" in a factor variable
f <- gl(3, 10)
f

##  [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3
## Levels: 1 2 3

split(x, f)

## $`1`
##  [1]  0.5855288  0.7094660 -0.1093033 -0.4534972  0.6058875 -1.8179560
##  [7]  0.6300986 -0.2761841 -0.2841597 -0.9193220
## 
## $`2`
##  [1] 0.4537281 0.3267524 0.9654153 0.7074819 0.6445426 0.3898285 0.6985436
##  [8] 0.5440579 0.2264672 0.4845578
## 
## $`3`
##  [1]  1.8168998  0.1136425  0.6684224  2.1207127  1.2987237  1.7796219
##  [7]  2.4557851  0.3556716 -0.5531374 -0.5977095

lapply(split(x, f), mean)

## $`1`
## [1] -0.1329441
## 
## $`2`
## [1] 0.5441375
## 
## $`3`
## [1] 0.9458633

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sapply(split(x, f), mean) %>% 
  data.frame() %>%
  rename(mean = 1)

Who is the winning presidential candidate per province?

library(readr)
prex2022 <- read_csv("president2022ResultByProvince.csv",
                     show_col_types = FALSE)
prex2022

prex2022[50, ]

names(which.max(prex2022[50, 3:12]))

## [1] "ROBREDO"

provinces <- split(prex2022, prex2022$PROVINCE)

provinces[["CAMARINES NORTE"]]

winners <-  lapply(provinces, 
                   function(prov){
                     names(which.max(prov[1, 3:12]))
                   }) %>% unlist() %>%
  as.data.frame()
winners

Scoping Rules of R and Loop Functions