basics
# An addition
5 + 5 # 10
## [1] 10
# A subtraction
5 - 5 # 0
## [1] 0
# A multiplication
3 * 5 # 15
## [1] 15
# A division
(5 + 5) / 2 # 5
## [1] 5
# Exponentiation
2^5 # 32
## [1] 32
# Modulo
28%%6 # 4
## [1] 4
# Assign the value 42
x <- 42
x
## [1] 42
y = 42
y
## [1] 42
# c()
# combine
# place the vector elements separated by a comma between the parenthese
numeric_vector <- c(1, 10, 49)
character_vector <- c("a", "b", "c")
boolean_vector <- c(TRUE,FALSE, TRUE)
# names()
# give a name to the elements of a vector
# Poker winnings from Monday to Friday
poker_vector <- c(140, -50, 20, -120, 240)
# Assign days as names of poker_vector
names(poker_vector) <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
# sum two vectors in R => it takes the element-wise sum
A_vector <- c(1, 2, 3)
B_vector <- c(4, 5, 6)
total_vector <- A_vector+B_vector
total_vector
## [1] 5 7 9
# Vector selection
poker_midweek <- poker_vector[c(2,3,4)]
poker_midweek
## Tuesday Wednesday Thursday
## -50 20 -120
poker_midweek <- poker_vector[2:4]
poker_midweek
## Tuesday Wednesday Thursday
## -50 20 -120
poker_start <- poker_vector[c("Monday","Tuesday","Wednesday")]
poker_start
## Monday Tuesday Wednesday
## 140 -50 20
# Selection by comparison
# (logical) comparison operators known to R are:
# < for less than
# > for greater than
# <= for less than or equal to
# >= for greater than or equal to
# == for equal to each other
# != not equal to each other
# comparison operators on vectors
c(4, 5, 6) > 5
## [1] FALSE FALSE TRUE
# Which days did you make money on poker?
selection_vector <- poker_vector>0
selection_vector
## Monday Tuesday Wednesday Thursday Friday
## TRUE FALSE TRUE FALSE TRUE
poker_winning_days <- poker_vector[selection_vector]
poker_winning_days
## Monday Wednesday Friday
## 140 20 240
# matrix is a collection
# of elements of the same data type (numeric, character, or logical)
# arranged into a fixed number of rows and columns
matrix(1:9, byrow = TRUE, nrow = 3)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
# rownames(my_matrix) <- row_names_vector
# colnames(my_matrix) <- col_names_vector
# rowSums(matrix) = totals for each row of a matrix
# cbind() = merges matrices and/or vectors together by column
# rbind()
# selecting
# my_matrix[,1] selects all elements of the first column.
# my_matrix[1,] selects all elements of the first row.
# element-wise arithmetic with matrices
# 2 * my_matrix multiplies each element of my_matrix by two.
2 * matrix(1:9, byrow = TRUE, nrow = 3)
## [,1] [,2] [,3]
## [1,] 2 4 6
## [2,] 8 10 12
## [3,] 14 16 18
# e.g. my_matrix1 * my_matrix2
# creates a matrix where each element
# is the product of the corresponding elements in my_matrix1 and my_matrix2
# NB: standard matrix multiplication => %*%
# factor = statistical data type used to store categorical variables
# NB: categorical variable can belong to a limited number of categories
sex_vector <- c("Male", "Female", "Female", "Male", "Male")
factor_sex_vector <- factor(sex_vector)
# 'factor levels' = "Male" and "Female"
factor_sex_vector
## [1] Male Female Female Male Male
## Levels: Female Male
# two types of categorical variables:
# nominal categorical variable (without an implied order)
# ordinal categorical variable
# Animals
animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
factor_animals_vector <- factor(animals_vector)
factor_animals_vector
## [1] Elephant Giraffe Donkey Horse
## Levels: Donkey Elephant Giraffe Horse
# Temperature
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector
## [1] High Low High Low Medium
## Levels: Low < Medium < High
# change the names of these levels (for clarity)
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
factor_survey_vector
## [1] M F F M M
## Levels: F M
levels(factor_survey_vector)
## [1] "F" "M"
# note the order => specify the levels of the factor
levels(factor_survey_vector) <- c("Female", "Male")
factor_survey_vector
## [1] Male Female Female Male Male
## Levels: Female Male
# quick overview of the contents of a variable
summary(survey_vector)
## Length Class Mode
## 5 character character
summary(factor_survey_vector)
## Female Male
## 2 3
# data frame
# variables = columns
# observations = rows
# data sets that contain different data types (instead of only one)
# develop a clear understanding of its structure and main elements
# show only a small part of the entire data set
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# every column has the same length,
# vectors you pass should also have the same length
# Definition of vectors
name <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune")
# The type of planet (Terrestrial or Gas Giant).
type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet",
"Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
# The planet's diameter relative to the diameter of the Earth.
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883)
# The planet's rotation across the sun relative to that of the Earth.
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67)
# If the planet has rings or not (TRUE or FALSE).
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
# Create a data frame from the vectors
planets_df <- data.frame(name, type, diameter, rotation, rings)
# 8 observations and 5 variables?
str(planets_df)
## 'data.frame': 8 obs. of 5 variables:
## $ name : Factor w/ 8 levels "Earth","Jupiter",..: 4 8 1 3 2 6 7 5
## $ type : Factor w/ 2 levels "Gas giant","Terrestrial planet": 2 2 2 2 1 1 1 1
## $ diameter: num 0.382 0.949 1 0.532 11.209 ...
## $ rotation: num 58.64 -243.02 1 1.03 0.41 ...
## $ rings : logi FALSE FALSE FALSE FALSE TRUE TRUE ...
# Print out diameter of Mercury (row 1, column 3)
planets_df[1,3]
## [1] 0.382
# Print out data for Mars (entire fourth row)
planets_df[4,]
## name type diameter rotation rings
## 4 Mars Terrestrial planet 0.532 1.03 FALSE
# select the first three elements of the type column
planets_df[1:3,2] # need to know / lookup the col number
## [1] Terrestrial planet Terrestrial planet Terrestrial planet
## Levels: Gas giant Terrestrial planet
planets_df[1:3,"type"]
## [1] Terrestrial planet Terrestrial planet Terrestrial planet
## Levels: Gas giant Terrestrial planet
# Select first 5 values of diameter column
planets_df$diameter[1:5]
## [1] 0.382 0.949 1.000 0.532 11.209
# select all elements of the variable diameter
planets_df[,3]
## [1] 0.382 0.949 1.000 0.532 11.209 9.449 4.007 3.883
planets_df[,"diameter"]
## [1] 0.382 0.949 1.000 0.532 11.209 9.449 4.007 3.883
planets_df$diameter
## [1] 0.382 0.949 1.000 0.532 11.209 9.449 4.007 3.883
rings_vector <- planets_df$rings
rings_vector
## [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
# use rings_vector to select the data for the four planets with rings
# select all columns for planets with rings
planets_df[rings_vector,]
## name type diameter rotation rings
## 5 Jupiter Gas giant 11.209 0.41 TRUE
## 6 Saturn Gas giant 9.449 0.43 TRUE
## 7 Uranus Gas giant 4.007 -0.72 TRUE
## 8 Neptune Gas giant 3.883 0.67 TRUE
subset(planets_df, subset = rings)
## name type diameter rotation rings
## 5 Jupiter Gas giant 11.209 0.41 TRUE
## 6 Saturn Gas giant 9.449 0.43 TRUE
## 7 Uranus Gas giant 4.007 -0.72 TRUE
## 8 Neptune Gas giant 3.883 0.67 TRUE
# Select planets with diameter < 1
planets_df[planets_df$diameter < 1,]
## name type diameter rotation rings
## 1 Mercury Terrestrial planet 0.382 58.64 FALSE
## 2 Venus Terrestrial planet 0.949 -243.02 FALSE
## 4 Mars Terrestrial planet 0.532 1.03 FALSE
subset(planets_df, diameter < 1)
## name type diameter rotation rings
## 1 Mercury Terrestrial planet 0.382 58.64 FALSE
## 2 Venus Terrestrial planet 0.949 -243.02 FALSE
## 4 Mars Terrestrial planet 0.532 1.03 FALSE
# sorting
a <- c(100, 10, 1000)
order(a) # ranked position of each element
## [1] 2 1 3
a[order(a)]
## [1] 10 100 1000
sort(a)
## [1] 10 100 1000
positions <- order(planets_df$diameter)
planets_df[positions,]
## name type diameter rotation rings
## 1 Mercury Terrestrial planet 0.382 58.64 FALSE
## 4 Mars Terrestrial planet 0.532 1.03 FALSE
## 2 Venus Terrestrial planet 0.949 -243.02 FALSE
## 3 Earth Terrestrial planet 1.000 1.00 FALSE
## 8 Neptune Gas giant 3.883 0.67 TRUE
## 7 Uranus Gas giant 4.007 -0.72 TRUE
## 6 Saturn Gas giant 9.449 0.43 TRUE
## 5 Jupiter Gas giant 11.209 0.41 TRUE
# Vectors (one dimensional array):
# can hold numeric, character or logical values.
# The elements in a vector all have the same data type.
# Matrices (two dimensional array):
# can hold numeric, character or logical values.
# The elements in a matrix all have the same data type.
# Data frames (two-dimensional objects):
# can hold numeric, character or logical values.
# Within a column all elements have the same data type,
# but different columns can be of different data type.
# list = variety of objects
# list()
# arguments to the list function are the list components
# components can be matrices, vectors, other lists, etc
my_vector <- 1:10
my_matrix <- matrix(1:9, ncol = 3)
my_df <- mtcars[1:10,]
# Adapt list() call to give the components names
my_list <- list(vec = my_vector, mat = my_matrix, df = my_df)
# you want to avoid not knowing
# or remembering
# what the components of your list stand for
# names(my_list) <- c("name1", "name2")
my_list
## $vec
## [1] 1 2 3 4 5 6 7 8 9 10
##
## $mat
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## $df
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
# to "grab" the first component of list
# shining_list[[1]]
# shining_list[["reviews"]]
# shining_list$reviews
# shining_list[[2]][1]
# add to a list
# ext_list <- c(my_list, my_name = my_val)