Introduction

The interactive calculator

setwd(getwd())  # Function with an argument, the latter being a function inself

2 + 2  # Hit SHFT+CTRL+ENTER to execute the code chunck
## [1] 4
d <- 4 - 2
d  # Return the value of the object
## [1] 2
str(d)
##  num 2
typeof(d)
## [1] "double"
x <- 5
y <- 2
z1 <- x * y  # Multiplication
z2 <- x / y  # Division
z3 <- x^y  # Power
z1; z2; z3  # Semi-colon denotes new line of code
## [1] 10
## [1] 2.5
## [1] 25
2*3^4  # Power before multiplication
## [1] 162
(2*3)^4  # Force multiplication first
## [1] 1296
exp(1)
## [1] 2.718282
typeof(exp(1))
## [1] "double"
5 == 5.0  # Is an integer equal to a floating point value?
## [1] TRUE
(1 / sqrt(2 * pi)) * exp(-((1^2) / 2))
## [1] 0.2419707
1 / sqrt(2 * pi) * exp(-2^2/2)
## [1] 0.05399097
dnorm(x = c (1, 2))
## [1] 0.24197072 0.05399097

The help system

help(sin)
## starting httpd help server ... done
?sin  # Base and opened libraries
??sin  # Base and installed libraries
example(sin)
## 
## sin> x <- seq(-3, 7, by = 1/8)
## 
## sin> tx <- cbind(x, cos(pi*x), cospi(x), sin(pi*x), sinpi(x),
## sin+                tan(pi*x), tanpi(x), deparse.level=2)
## Warning in tanpi(x): NaNs produced
## 
## sin> op <- options(digits = 4, width = 90) # for nice formatting
## 
## sin> head(tx)
##           x cos(pi * x... cospi(x) sin(pi * x... sinpi(x) tan(pi * x... tanpi(x)
## [1,] -3.000    -1.000e+00  -1.0000    -3.674e-16   0.0000     3.674e-16   0.0000
## [2,] -2.875    -9.239e-01  -0.9239    -3.827e-01  -0.3827     4.142e-01   0.4142
## [3,] -2.750    -7.071e-01  -0.7071    -7.071e-01  -0.7071     1.000e+00   1.0000
## [4,] -2.625    -3.827e-01  -0.3827    -9.239e-01  -0.9239     2.414e+00   2.4142
## [5,] -2.500     3.062e-16   0.0000    -1.000e+00  -1.0000    -3.266e+15      NaN
## [6,] -2.375     3.827e-01   0.3827    -9.239e-01  -0.9239    -2.414e+00  -2.4142
## 
## sin> tx[ (x %% 1) %in% c(0, 0.5) ,]
##          x cos(pi * x... cospi(x) sin(pi * x... sinpi(x) tan(pi * x... tanpi(x)
##  [1,] -3.0    -1.000e+00       -1    -3.674e-16        0     3.674e-16        0
##  [2,] -2.5     3.062e-16        0    -1.000e+00       -1    -3.266e+15      NaN
##  [3,] -2.0     1.000e+00        1     2.449e-16        0     2.449e-16        0
##  [4,] -1.5    -1.837e-16        0     1.000e+00        1    -5.444e+15      NaN
##  [5,] -1.0    -1.000e+00       -1    -1.225e-16        0     1.225e-16        0
##  [6,] -0.5     6.123e-17        0    -1.000e+00       -1    -1.633e+16      NaN
##  [7,]  0.0     1.000e+00        1     0.000e+00        0     0.000e+00        0
##  [8,]  0.5     6.123e-17        0     1.000e+00        1     1.633e+16      NaN
##  [9,]  1.0    -1.000e+00       -1     1.225e-16        0    -1.225e-16        0
## [10,]  1.5    -1.837e-16        0    -1.000e+00       -1     5.444e+15      NaN
## [11,]  2.0     1.000e+00        1    -2.449e-16        0    -2.449e-16        0
## [12,]  2.5     3.062e-16        0     1.000e+00        1     3.266e+15      NaN
## [13,]  3.0    -1.000e+00       -1     3.674e-16        0    -3.674e-16        0
## [14,]  3.5    -4.286e-16        0    -1.000e+00       -1     2.333e+15      NaN
## [15,]  4.0     1.000e+00        1    -4.898e-16        0    -4.899e-16        0
## [16,]  4.5     5.511e-16        0     1.000e+00        1     1.815e+15      NaN
## [17,]  5.0    -1.000e+00       -1     6.123e-16        0    -6.123e-16        0
## [18,]  5.5    -2.450e-15        0    -1.000e+00       -1     4.082e+14      NaN
## [19,]  6.0     1.000e+00        1    -7.348e-16        0    -7.348e-16        0
## [20,]  6.5    -9.804e-16        0     1.000e+00        1    -1.020e+15      NaN
## [21,]  7.0    -1.000e+00       -1     8.572e-16        0    -8.573e-16        0
## 
## sin> options(op)

Useful mathematical functions

Vectors

x_1 <- c(1, 3, 5, 7, 9, 11)
x_2 <- c(6.5, 4.3, 9.1, -8.5, 0, 3.6)
x_3 <- c("E coli","P aeruginosa","K pneumoniae","S aureus")
x_4 <- c(a = 4, b = 5.5, c = 8.8)
str(x_1)
##  num [1:6] 1 3 5 7 9 11
str(x_2)
##  num [1:6] 6.5 4.3 9.1 -8.5 0 3.6
str(x_3)
##  chr [1:4] "E coli" "P aeruginosa" "K pneumoniae" "S aureus"
str(x_4)
##  Named num [1:3] 4 5.5 8.8
##  - attr(*, "names")= chr [1:3] "a" "b" "c"
mode(x_1)
## [1] "numeric"
mode(x_2)
## [1] "numeric"
mode(x_3)
## [1] "character"
mode(x_4)
## [1] "numeric"
length(x_1)
## [1] 6
length(x_2)
## [1] 6
length(x_3)
## [1] 4
length(x_4)
## [1] 3
names(x_4)
## [1] "a" "b" "c"
log(x_1)
## [1] 0.000000 1.098612 1.609438 1.945910 2.197225 2.397895
v <- c(1, 2, 3)
w <- c(10, 20, 30, 40, 50, 60, 70, 80, 90)
v + w
## [1] 11 22 33 41 52 63 71 82 93
1:5
## [1] 1 2 3 4 5
seq(from = 1, to = 5, by = 1)
## [1] 1 2 3 4 5
seq(from = 5, to = 1, by = -1)
## [1] 5 4 3 2 1
rep(1:3, 5)
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
rep(1:3, each = 5)
##  [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
rep(c(1, 10), c(2, 10))  # Repeat 1 twice and 10 10 times
##  [1]  1  1 10 10 10 10 10 10 10 10 10 10
seq(from = 1, to = 10, length.out = 19)
##  [1]  1.0  1.5  2.0  2.5  3.0  3.5  4.0  4.5  5.0  5.5  6.0  6.5  7.0  7.5  8.0
## [16]  8.5  9.0  9.5 10.0
c(c(1, 2), c(3, 4))
## [1] 1 2 3 4
vec_1 <- seq(10, 100, 10)  # One to 100 in steps of 10
vec_1
##  [1]  10  20  30  40  50  60  70  80  90 100
vec_1[3]
## [1] 30
vec_1[3:5]
## [1] 30 40 50
vec_1[c(3, 5)]
## [1] 30 50
vec_1[c(3, 5)] <- c(3000, 5000)
vec_1
##  [1]   10   20 3000   40 5000   60   70   80   90  100

Matrices

A = matrix(1:9, ncol = 3, nrow = 3)
A
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
B = matrix(1:9, ncol = 3, nrow = 3, byrow = T)
B
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
matrix(rep(c(1, 2), c(4, 4)), nrow = 2, ncol = 4)
##      [,1] [,2] [,3] [,4]
## [1,]    1    1    2    2
## [2,]    1    1    2    2
matrix(rep(c(1, 2), 4), nrow = 2, ncol = 4)
##      [,1] [,2] [,3] [,4]
## [1,]    1    1    1    1
## [2,]    2    2    2    2
matrix(rep(c(1, 2), 4), nrow = 2, ncol = 4, byrow = T)
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    1    2
## [2,]    1    2    1    2
cbind(c(1, 2, 3, 4, 5), c(10, 20, 30, 40, 50))
##      [,1] [,2]
## [1,]    1   10
## [2,]    2   20
## [3,]    3   30
## [4,]    4   40
## [5,]    5   50
rbind(c(1, 2, 3, 4, 5), c(10, 20, 30, 40, 50))
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]   10   20   30   40   50
t(rbind(c(1, 2, 3, 4, 5), c(10, 20, 30, 40, 50)))
##      [,1] [,2]
## [1,]    1   10
## [2,]    2   20
## [3,]    3   30
## [4,]    4   40
## [5,]    5   50
diag(c(1, 2, 3, 4))
##      [,1] [,2] [,3] [,4]
## [1,]    1    0    0    0
## [2,]    0    2    0    0
## [3,]    0    0    3    0
## [4,]    0    0    0    4
A = matrix(rnorm(30, mean = 0, sd = 1), nrow = 5, ncol = 6)
A
##            [,1]        [,2]        [,3]       [,4]       [,5]       [,6]
## [1,]  0.7056694  1.06489942 -0.67551957  1.5949925 -1.0949065 -0.2072596
## [2,]  0.4462632 -1.82792537 -0.08876104 -1.1629110  1.7513593 -1.5536505
## [3,]  0.6011499 -1.41786309  0.51038071  1.2123378 -1.0041103 -0.2121914
## [4,]  0.8094439  0.67024410 -0.74204407 -0.6771484  0.1464640 -1.4745251
## [5,] -1.5840674  0.04430783  1.42344776  1.5545427  0.6344505  1.8295643
A[3:5, 1:3]
##            [,1]        [,2]       [,3]
## [1,]  0.6011499 -1.41786309  0.5103807
## [2,]  0.8094439  0.67024410 -0.7420441
## [3,] -1.5840674  0.04430783  1.4234478
A[3:5, c(1, 3)]
##            [,1]       [,2]
## [1,]  0.6011499  0.5103807
## [2,]  0.8094439 -0.7420441
## [3,] -1.5840674  1.4234478
A[1, ]
## [1]  0.7056694  1.0648994 -0.6755196  1.5949925 -1.0949065 -0.2072596
A[ , 2]
## [1]  1.06489942 -1.82792537 -1.41786309  0.67024410  0.04430783
which(A > 0.05)
##  [1]  1  2  3  4  6  9 13 15 16 18 20 22 24 25 30

Arrays

tensor_1 <- array(1:24, dim = c(3, 4, 2))
tensor_1
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24

Factors

cat_1 = rep(c(1:3), 6)
cat_1
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
cat_1 <- factor(cat_1,
                levels = c(1, 2, 3),
                labels = c("Group A", "Group B", "Group C"))
cat_1
##  [1] Group A Group B Group C Group A Group B Group C Group A Group B Group C
## [10] Group A Group B Group C Group A Group B Group C Group A Group B Group C
## Levels: Group A Group B Group C
str(cat_1)
##  Factor w/ 3 levels "Group A","Group B",..: 1 2 3 1 2 3 1 2 3 1 ...

Lists

list_1 <- list("R", "Language", c(3, 4))
list_1
## [[1]]
## [1] "R"
## 
## [[2]]
## [1] "Language"
## 
## [[3]]
## [1] 3 4
list_1[1]
## [[1]]
## [1] "R"
typeof(list_1)
## [1] "list"
str(list_1)
## List of 3
##  $ : chr "R"
##  $ : chr "Language"
##  $ : num [1:2] 3 4

Data frames

df <- read.csv("BP.csv")
head(df)  # Dipslay first 6 rows
##   ID Age Group InitialBP FinalBP
## 1  1  61     I       140     145
## 2  2  58     I       120     116
## 3  3  55     I       180     168
## 4  4  57    II       134     120
## 5  5  43    II       134     120
## 6  6  40    II       200     192
mean(df$Age)
## [1] 49.39423

Logical operations

identical(c(3, 4, 5), c(3, 5, 4))
## [1] FALSE
3 < 4
## [1] TRUE
3 <= 4
## [1] TRUE
3 != 4
## [1] TRUE
vec_1 <= 50
##  [1]  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
vec_1[vec_1 >= 50]
## [1] 3000 5000   60   70   80   90  100
a <- c(1, 2, 3, 5)
b <- c(1, 1, 5, 4)
(a < b) | (a > 3)  # Comparing pairs from each respective vector
## [1] FALSE FALSE  TRUE  TRUE
vec_1[vec_1 < 30 | vec_1 > 60]
## [1]   10   20 3000 5000   70   80   90  100

Flow control

for (i in 1:10){
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
for (i in 1:3){
  for (j in 4:7){
    print(c(i, j))
  }
}
## [1] 1 4
## [1] 1 5
## [1] 1 6
## [1] 1 7
## [1] 2 4
## [1] 2 5
## [1] 2 6
## [1] 2 7
## [1] 3 4
## [1] 3 5
## [1] 3 6
## [1] 3 7
k <- 1  # Instantiate a counter
while (k <= 5){
  print(k)
  k = k + 1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
k <- 1
status <- FALSE

while (!status){
  status <- k >= 5
  print(k)
  k = k + 1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
k <- 20
if (k == 20){
  print("k is 20")
}
## [1] "k is 20"
k <- 19
if (k == 20){
  print("k is 20")
} else{
  print("k is not 20")
}
## [1] "k is not 20"
if(k == 20) "k is 20" else "k is not 20"
## [1] "k is not 20"
k <- 20
if (k < 20){
  print("k is less than 20")
} else if (k == 20){
  print("k is 20")
} else print("k is more than 20")
## [1] "k is 20"

Functions

simple_function <- function(x){
  cat("The square of", x, "is", x^2)
}
simple_function(5)
## The square of 5 is 25
formals(simple_function)
## $x
body(simple_function)
## {
##     cat("The square of", x, "is", x^2)
## }
environment(simple_function)
## <environment: R_GlobalEnv>
default_value_function <- function(x, y = 3){
  x + y
}
default_value_function(5)
## [1] 8
default_value_function(5, 5)  # Overwriting the default value
## [1] 10
my_function <- function(x_parameter){
  y_variable <- x_parameter^2  # Local variable
  print(x_parameter)  # Formal parameter
  print(y_variable)
  z_free <- 1000
  print(z_free)  # Unbound variable (error if it doen not exist in global scope)
}
x_parameter <- 42
y_variable <- "forty-two"
z_free <- 10

my_function(5)
## [1] 5
## [1] 25
## [1] 1000
x_parameter
## [1] 42
y_variable
## [1] "forty-two"
z_free
## [1] 10
my_function <- function(x_parameter){
  y_variable <- x_parameter^2  # Local variable
  print(x_parameter)  # Formal parameter
  print(y_variable)
  z_free <<- 1000  # Overwrite global variable
  print(z_free)  # Unbound variable (error if it doen not exist in global scope)
}
x_parameter <- 42
y_variable <- "forty-two"
z_free <- 10

my_function(5)
## [1] 5
## [1] 25
## [1] 1000
x_parameter
## [1] 42
y_variable
## [1] "forty-two"
z_free
## [1] 1000

The apply functions

lapply(c(1, 2, 3, 4), exp)
## [[1]]
## [1] 2.718282
## 
## [[2]]
## [1] 7.389056
## 
## [[3]]
## [1] 20.08554
## 
## [[4]]
## [1] 54.59815
sapply(list("Use", "R", "for", "all", "your", "analysis"), nchar)  # The nchar function returns the number of characters
## [1] 3 1 3 3 4 8
sapply(c("Use", "R", "for", "all", "your", "analysis"), nchar)
##      Use        R      for      all     your analysis 
##        3        1        3        3        4        8
mapply(function(x, y){x^y}, c(10, 10), c(2, 3))
## [1]  100 1000
mapply(function(x, y){x^y}, c(10), c(2, 3))
## [1]  100 1000
x_chars <- c("A", "B", "C")
y_chars <- c("1", "10", "100")
mapply(paste, x_chars, y_chars, sep = "/")
##       A       B       C 
##   "A/1"  "B/10" "C/100"
tensor_2 <- array(data = seq_len(12), dim = c(3, 4))
tensor_2
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
apply(tensor_2, 1, sum)
## [1] 22 26 30
apply(tensor_2, 2, sum)
## [1]  6 15 24 33
tensor_3 <- array(data = seq_len(24), dim = c(3, 4, 2))
tensor_3
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24

Speed up execution using vectorized code

x <- runif(n = 1e6, min = 0, max = 2 * pi)
system.time({
  y <- numeric(length(x))
  for (k in seq_along(x)) {
    y[k] <- sin(x[k])
  }
})
##    user  system elapsed 
##    0.12    0.00    0.13
system.time(sin(x))
##    user  system elapsed 
##    0.03    0.00    0.08

Distributions

Binomial distribution

  • Probability of \(x=3\) for \(n=20\) trials and probability of success \(p=\frac{1}{6}\)
dbinom(x = 3, size = 20, prob = 1/6)
## [1] 0.2378866
  • Probability of a vector of outcomes and plotting the result
binom_probs <- dbinom(x = 0:20, size = 20, prob = 1/6)
library(plotly)
binom_prob_plot <- plot_ly(x = seq(from = 0, to = 20, by = 1),
                           y = binom_probs,
                           name = "Probability",
                           type = "scatter",
                           mode = "lines+markers") %>% 
  layout(title = "Binomial distribution",
         xaxis = list(title = "Successful outcomes"),
         yaxis = list(title = "Probability"))
binom_prob_plot
  • Using the probability distribution function
sum(dbinom(x = 0:3, size = 20, prob = 1/6))
## [1] 0.5665456
pbinom(q = 3, size = 20, prob = 1/6, lower.tail = T)
## [1] 0.5665456

Normal distribution

  • Cumulative distribution for the normal distribution
pnorm(q = 70, mean = 75, sd = 5, lower.tail = TRUE)  # lower.tail = TRUE is the default
## [1] 0.1586553
1 - pnorm(q = 85, mean = 75, sd = 5)
## [1] 0.02275013
pnorm(q = 85, mean =75, sd = 5, lower.tail = FALSE)
## [1] 0.02275013
  • Finding quartiles or percentiles
qnorm(p = 0.25, mean = 75, sd = 5, lower.tail = T)
## [1] 71.62755
  • Probability density function
x_vals_seq <- seq(from = 55, to = 95, by = 0.25)
norm_probs <- dnorm(x_vals_seq, mean = 75, sd = 5)

norm_probs_plot <- plot_ly(x = x_vals_seq,
                           y = norm_probs,
                           name = "PDF",
                           type = "scatter",
                           mode = "lines") %>% 
  layout(title = "PDF for a normal distribution",
         xaxis = list(title = "Values"),
         yaxis = list(title = "Probability"))
norm_probs_plot
  • Draw random data point values from a normal distribution
norm_vals <- rnorm(1000, mean = 75, sd = 5)
hist(norm_vals,
     main = "Normally distributed random variables",
     xlab = "Values",
     ylab = "Count",
     col = "deepskyblue",
     las = 1)