basics

# An addition
5 + 5 # 10
## [1] 10
# A subtraction
5 - 5 # 0
## [1] 0
# A multiplication
3 * 5 # 15
## [1] 15
# A division
(5 + 5) / 2 # 5
## [1] 5
# Exponentiation
2^5 # 32
## [1] 32
# Modulo
28%%6 # 4
## [1] 4

# Assign the value 42
x <- 42
x
## [1] 42
y = 42
y
## [1] 42

# c()
# combine
# place the vector elements separated by a comma between the parenthese
numeric_vector <- c(1, 10, 49)
character_vector <- c("a", "b", "c")
boolean_vector <- c(TRUE,FALSE, TRUE)

# names()
# give a name to the elements of a vector

# Poker winnings from Monday to Friday
poker_vector <- c(140, -50, 20, -120, 240)

# Assign days as names of poker_vector
names(poker_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")


# sum two vectors in R => it takes the element-wise sum
A_vector <- c(1, 2, 3)
B_vector <- c(4, 5, 6)
total_vector <- A_vector+B_vector
total_vector
## [1] 5 7 9

# Vector selection
poker_midweek <- poker_vector[c(2,3,4)]
poker_midweek
##   Tuesday Wednesday  Thursday 
##       -50        20      -120
poker_midweek <- poker_vector[2:4]
poker_midweek
##   Tuesday Wednesday  Thursday 
##       -50        20      -120
poker_start <- poker_vector[c("Monday","Tuesday","Wednesday")]
poker_start
##    Monday   Tuesday Wednesday 
##       140       -50        20

# Selection by comparison

# (logical) comparison operators known to R are:
# < for less than
# > for greater than
# <= for less than or equal to
# >= for greater than or equal to
# == for equal to each other
# != not equal to each other

# comparison operators on vectors
c(4, 5, 6) > 5
## [1] FALSE FALSE  TRUE

# Which days did you make money on poker?
selection_vector <- poker_vector>0
selection_vector
##    Monday   Tuesday Wednesday  Thursday    Friday 
##      TRUE     FALSE      TRUE     FALSE      TRUE
poker_winning_days <- poker_vector[selection_vector]
poker_winning_days
##    Monday Wednesday    Friday 
##       140        20       240

# matrix is a collection 
# of elements of the same data type (numeric, character, or logical) 
# arranged into a fixed number of rows and columns
matrix(1:9, byrow = TRUE, nrow = 3)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

# rownames(my_matrix) <- row_names_vector
# colnames(my_matrix) <- col_names_vector
# rowSums(matrix) = totals for each row of a matrix
# cbind() = merges matrices and/or vectors together by column
# rbind()

# selecting
# my_matrix[,1] selects all elements of the first column.
# my_matrix[1,] selects all elements of the first row.

# element-wise arithmetic with matrices
# 2 * my_matrix multiplies each element of my_matrix by two.
2 * matrix(1:9, byrow = TRUE, nrow = 3)
##      [,1] [,2] [,3]
## [1,]    2    4    6
## [2,]    8   10   12
## [3,]   14   16   18
# e.g. my_matrix1 * my_matrix2 
# creates a matrix where each element 
# is the product of the corresponding elements in my_matrix1 and my_matrix2

# NB: standard matrix multiplication =>  %*%

# factor = statistical data type used to store categorical variables
# NB: categorical variable can belong to a limited number of categories

sex_vector <- c("Male", "Female", "Female", "Male", "Male")
factor_sex_vector <- factor(sex_vector)
# 'factor levels' = "Male" and "Female"
factor_sex_vector
## [1] Male   Female Female Male   Male  
## Levels: Female Male

# two types of categorical variables: 
# nominal categorical variable (without an implied order)
# ordinal categorical variable

# Animals
animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
factor_animals_vector <- factor(animals_vector)
factor_animals_vector
## [1] Elephant Giraffe  Donkey   Horse   
## Levels: Donkey Elephant Giraffe Horse

# Temperature
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector
## [1] High   Low    High   Low    Medium
## Levels: Low < Medium < High

# change the names of these levels (for clarity)

survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
factor_survey_vector
## [1] M F F M M
## Levels: F M
levels(factor_survey_vector)
## [1] "F" "M"
# note the order => specify the levels of the factor
levels(factor_survey_vector) <- c("Female", "Male")
factor_survey_vector
## [1] Male   Female Female Male   Male  
## Levels: Female Male

# quick overview of the contents of a variable
summary(survey_vector)
##    Length     Class      Mode 
##         5 character character
summary(factor_survey_vector)
## Female   Male 
##      2      3

# data frame
# variables = columns
# observations = rows
# data sets that contain different data types (instead of only one)

# develop a clear understanding of its structure and main elements
# show only a small part of the entire data set
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

# every column has the same length, 
# vectors you pass should also have the same length

# Definition of vectors
name <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune")
# The type of planet (Terrestrial or Gas Giant).
type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", 
          "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
# The planet's diameter relative to the diameter of the Earth.
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883)
# The planet's rotation across the sun relative to that of the Earth.
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67)
# If the planet has rings or not (TRUE or FALSE).
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)

# Create a data frame from the vectors
planets_df <- data.frame(name, type, diameter, rotation, rings)

# 8 observations and 5 variables?
str(planets_df)
## 'data.frame':    8 obs. of  5 variables:
##  $ name    : Factor w/ 8 levels "Earth","Jupiter",..: 4 8 1 3 2 6 7 5
##  $ type    : Factor w/ 2 levels "Gas giant","Terrestrial planet": 2 2 2 2 1 1 1 1
##  $ diameter: num  0.382 0.949 1 0.532 11.209 ...
##  $ rotation: num  58.64 -243.02 1 1.03 0.41 ...
##  $ rings   : logi  FALSE FALSE FALSE FALSE TRUE TRUE ...

# Print out diameter of Mercury (row 1, column 3)
planets_df[1,3]
## [1] 0.382

# Print out data for Mars (entire fourth row)
planets_df[4,]
##   name               type diameter rotation rings
## 4 Mars Terrestrial planet    0.532     1.03 FALSE

# select the first three elements of the type column
planets_df[1:3,2] # need to know / lookup the col number
## [1] Terrestrial planet Terrestrial planet Terrestrial planet
## Levels: Gas giant Terrestrial planet
planets_df[1:3,"type"]
## [1] Terrestrial planet Terrestrial planet Terrestrial planet
## Levels: Gas giant Terrestrial planet

# Select first 5 values of diameter column
planets_df$diameter[1:5]
## [1]  0.382  0.949  1.000  0.532 11.209

# select all elements of the variable diameter
planets_df[,3]
## [1]  0.382  0.949  1.000  0.532 11.209  9.449  4.007  3.883
planets_df[,"diameter"]
## [1]  0.382  0.949  1.000  0.532 11.209  9.449  4.007  3.883
planets_df$diameter
## [1]  0.382  0.949  1.000  0.532 11.209  9.449  4.007  3.883
rings_vector <- planets_df$rings
rings_vector
## [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
# use rings_vector to select the data for the four planets with rings
# select all columns for planets with rings
planets_df[rings_vector,]
##      name      type diameter rotation rings
## 5 Jupiter Gas giant   11.209     0.41  TRUE
## 6  Saturn Gas giant    9.449     0.43  TRUE
## 7  Uranus Gas giant    4.007    -0.72  TRUE
## 8 Neptune Gas giant    3.883     0.67  TRUE
subset(planets_df, subset = rings)
##      name      type diameter rotation rings
## 5 Jupiter Gas giant   11.209     0.41  TRUE
## 6  Saturn Gas giant    9.449     0.43  TRUE
## 7  Uranus Gas giant    4.007    -0.72  TRUE
## 8 Neptune Gas giant    3.883     0.67  TRUE

# Select planets with diameter < 1
planets_df[planets_df$diameter < 1,]
##      name               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE
subset(planets_df, diameter < 1)
##      name               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE

# sorting

a <- c(100, 10, 1000)
order(a) # ranked position of each element
## [1] 2 1 3
a[order(a)]
## [1]   10  100 1000
sort(a)
## [1]   10  100 1000

positions <-  order(planets_df$diameter)
planets_df[positions,]
##      name               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 3   Earth Terrestrial planet    1.000     1.00 FALSE
## 8 Neptune          Gas giant    3.883     0.67  TRUE
## 7  Uranus          Gas giant    4.007    -0.72  TRUE
## 6  Saturn          Gas giant    9.449     0.43  TRUE
## 5 Jupiter          Gas giant   11.209     0.41  TRUE

# Vectors (one dimensional array): 
# can hold numeric, character or logical values. 
# The elements in a vector all have the same data type.

# Matrices (two dimensional array): 
# can hold numeric, character or logical values. 
# The elements in a matrix all have the same data type.

# Data frames (two-dimensional objects): 
# can hold numeric, character or logical values. 
# Within a column all elements have the same data type, 
# but different columns can be of different data type.

# list = variety of objects
# list()
# arguments to the list function are the list components
# components can be matrices, vectors, other lists, etc

my_vector <- 1:10 
my_matrix <- matrix(1:9, ncol = 3)
my_df <- mtcars[1:10,]
# Adapt list() call to give the components names
my_list <- list(vec = my_vector, mat = my_matrix, df = my_df)
            # you want to avoid not knowing 
            # or remembering 
            # what the components of your list stand for
            # names(my_list) <- c("name1", "name2")
my_list
## $vec
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mat
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
## 
## $df
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4

# to "grab" the first component of list
# shining_list[[1]]
# shining_list[["reviews"]]
# shining_list$reviews
# shining_list[[2]][1]

# add to a list
# ext_list <- c(my_list, my_name = my_val)