Topics Covered (Some Examples from DataCamp)

Working Directory

#### The working directory is where you have/save files for an R session

#to check current working directory
getwd()

#to change working directory
setwd("INSERT_HERE_some_file_path_to_the_folder_of_your_choice")

#to check all files in current working directory
list.files()

##### Your working environment is where R saves the objects (variables, vectors, matrices, data frames , etc...) of your session

# to remove object from working environment
shining_list <- list("moviename" = "The Shining", "actors"= actors, "reviews"= reviews)

# 'last_actor'
last_actor <- shining_list$actors[5]

# 'second_review'
second_review <- shining_list[[3]][2,]


myname <- "TADDE"
myname
rm("myname")
myname

# you can remove multiple objects at the same time
myname <- "TADDE"
mybeard <- "growing"

rm(list=c("myname","mybeard"))

# to remove every object from your working environment

rm(list=ls())  # the function ls() returns the list of all objects in your working environment

Basics:

  • Arithmetics with R
# An addition
5 + 5
## [1] 10
# A subtraction
5 - 5 
## [1] 0
# A multiplication
3 * 5
## [1] 15
 # A division
(5 + 5)/2 
## [1] 5
# Exponentiation

2^5
## [1] 32
# Modulo
28 %% 6
## [1] 4
  • Variable assignment
# Assign the value 42 to 'x'
x <- 42
# Assign a value to the variables called 'my_apples' and 'my_oranges'
my_apples <- 5

my_oranges <- 6

# Add these two variables together and print the result
my_apples + my_oranges
## [1] 11
# Create the variable 'my_fruit'
my_fruit <- my_apples + my_oranges

my_fruit
## [1] 11
  • Data Types in R
# integer
my_integer<- 42L

# Numeric
my_numeric<- 42L

# The quotation marks indicate that the variable is of type character
my_character <- "forty-two"

my_logical <- FALSE

# Check which type these variables have:
class(my_integer)
## [1] "integer"
class(my_numeric)
## [1] "integer"
class(my_character)
## [1] "character"
class(my_logical)
## [1] "logical"

Vectors

  • Creating a vector
#In R, you create a vector with the combine function c()
numeric_vector <- c(1, 10, 49)

character_vector <- c("a", "b", "c")

boolean_vector <- c(TRUE, FALSE, TRUE)

numeric_vector
## [1]  1 10 49
character_vector
## [1] "a" "b" "c"
boolean_vector
## [1]  TRUE FALSE  TRUE
  • Naming a vector
some_vector <- c("Johnny", "Poker Player")
names(some_vector) <- c("Name", "Profession")
some_vector
##           Name     Profession 
##       "Johnny" "Poker Player"
  • Vector Selection
# Poker winnings from Monday to Friday
poker_vector <- c(140, -50, 20, -120, 240)

# Roulette winnings from Monday to Friday
roulette_vector <- c(-24, -50, 100, -350, 10)

# Give names to both 'poker_vector' and 'roulette_vector'
days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
names(roulette_vector) <- days_vector
names(poker_vector) <- days_vector

# Define a new variable based on a selection
poker_wednesday_1 <- poker_vector["Wednesday"]
poker_wednesday_2 <- poker_vector[3]

poker_wednesday_1
## Wednesday 
##        20
poker_wednesday_2
## Wednesday 
##        20

Matrices

  • Creating a matrix
# Construction of a matrix with 3 rows that contain the numbers 1 up to 9
mymatrix <-matrix(c(1:9), byrow= T, nrow =3) # byrow= T means the matrix is filled in one row at a time
mymatrix
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9
  • Naming a matrix
colnames(mymatrix) <- c("firstCol", "secondCol", "thirdCol")
rownames(mymatrix) <- c("firRow","secondRow","thirdRow")

mymatrix
##           firstCol secondCol thirdCol
## firRow           1         2        3
## secondRow        4         5        6
## thirdRow         7         8        9
  • Adding a column
mymatix <- cbind(mymatrix, c(11:13)) # we added a column that contains the numbers 11, 12, 13
  • Adding a row
mymatix <- rbind(mymatrix, c(13:15)) # we added a row that contains the numbers 13, 14, 15
  • Selecting Matrix Elements
# Just the first row

firstRow <- mymatrix[1,]

#mean of column 2 only
my_mean <- mean(mymatrix[,2])
my_mean
## [1] 5
# I only want column 1 and 3 

columns1and3 <- mymatrix[,c(1,3)]
columns1and3
##           firstCol thirdCol
## firRow           1        3
## secondRow        4        6
## thirdRow         7        9
# I only want the 2nd row of column 1 and 3 

row2columns1and3 <- mymatrix[2,c(1,3)]
row2columns1and3
## firstCol thirdCol 
##        4        6
#I want the element in row 2 and column 3

row2column3 <- mymatrix[2,3]
row2column3
## [1] 6
#I want the element in row 1,4 and columns 1,2,3

rows1.3columns2.3 <- mymatrix[c(1,3),1:3]
rows1.3columns2.3
##          firstCol secondCol thirdCol
## firRow          1         2        3
## thirdRow        7         8        9
  • Some Arithmetic
mymatrix^2
##           firstCol secondCol thirdCol
## firRow           1         4        9
## secondRow       16        25       36
## thirdRow        49        64       81
mymatrix*5
##           firstCol secondCol thirdCol
## firRow           5        10       15
## secondRow       20        25       30
## thirdRow        35        40       45
sqrt(mymatrix) 
##           firstCol secondCol thirdCol
## firRow    1.000000  1.414214 1.732051
## secondRow 2.000000  2.236068 2.449490
## thirdRow  2.645751  2.828427 3.000000
mymatrix*5+(sqrt(mymatrix))/(x*my_apples-my_oranges)
##            firstCol secondCol thirdCol
## firRow     5.004902  10.00693 15.00849
## secondRow 20.009804  25.01096 30.01201
## thirdRow  35.012969  40.01386 45.01471

Factors

gender_vector <- c("Male", "Female", "Female", "Male", "Male")

# Define factor_gender_vector using 'factor()'
factor_gender_vector <- factor(gender_vector)

factor_gender_vector
## [1] Male   Female Female Male   Male  
## Levels: Female Male

The factor function is good for creating categorical variables such as colors and genders for example. Each level represents a class/case/type/instance of the categorical variable. The levels are unordered unless specified.

temperature_vector <- c("High", "Low", "High","Low", "Medium")

#Specifying the order of the levels
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
factor_temperature_vector
## [1] High   Low    High   Low    Medium
## Levels: Low < Medium < High
summary(factor_gender_vector)
## Female   Male 
##      2      3

Data Frames

A data frame is a matrix that can have variables (columns) of different types (integer, numeric, logical, factor, character) A matrix can only have columns of type integer OR numeric

#There are many ways to do this,...but this is the very basic way
planets <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune");
type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883); 
rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67);
rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE);

# Create the data frame:
planets_df  <- data.frame(planets, type, diameter, rotation, rings)

planets_df
##   planets               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 3   Earth Terrestrial planet    1.000     1.00 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE
## 5 Jupiter          Gas giant   11.209     0.41  TRUE
## 6  Saturn          Gas giant    9.449     0.43  TRUE
## 7  Uranus          Gas giant    4.007    -0.72  TRUE
## 8 Neptune          Gas giant    3.883     0.67  TRUE
# All data from the first three planets
closest_planets_df <- planets_df[1:3,]

# All data from the last three planets
furthest_planets_df <- planets_df[6:8,]

# Have a look:
closest_planets_df
##   planets               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 3   Earth Terrestrial planet    1.000     1.00 FALSE
furthest_planets_df
##   planets      type diameter rotation rings
## 6  Saturn Gas giant    9.449     0.43  TRUE
## 7  Uranus Gas giant    4.007    -0.72  TRUE
## 8 Neptune Gas giant    3.883     0.67  TRUE
#Select for the last six rows only the diameter
furthest_planets_diameter <- planets_df[3:8, "diameter"]
furthest_planets_df
##   planets      type diameter rotation rings
## 6  Saturn Gas giant    9.449     0.43  TRUE
## 7  Uranus Gas giant    4.007    -0.72  TRUE
## 8 Neptune Gas giant    3.883     0.67  TRUE
#Select the "rings" variable/column
rings_vector <- planets_df$rings
rings_vector
## [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
# Planets that are smaller than planet Earth:
small_planets_df  <- subset(planets_df, subset = planets_df$diameter<1)
small_planets_df
##   planets               type diameter rotation rings
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE
#Example of sorting
x <- c(15, 18, 293, 67, 85, 197, 65, 32)
order(x)
## [1] 1 2 8 7 4 5 6 3
x[order(x)]
## [1]  15  18  32  65  67  85 197 293
# What is the correct ordering based on the planets_df$diameter variable?
# We first order the "diameter" column
positions <- order(planets_df$diameter, decreasing = TRUE)
positions
## [1] 5 6 7 8 3 2 4 1
# Then we can use it to sort our data frame like this
largest_first_df <- planets_df[positions, ]
largest_first_df
##   planets               type diameter rotation rings
## 5 Jupiter          Gas giant   11.209     0.41  TRUE
## 6  Saturn          Gas giant    9.449     0.43  TRUE
## 7  Uranus          Gas giant    4.007    -0.72  TRUE
## 8 Neptune          Gas giant    3.883     0.67  TRUE
## 3   Earth Terrestrial planet    1.000     1.00 FALSE
## 2   Venus Terrestrial planet    0.949  -243.02 FALSE
## 4    Mars Terrestrial planet    0.532     1.03 FALSE
## 1 Mercury Terrestrial planet    0.382    58.64 FALSE

Lists

# Vector with numerics from 1 up to 10
my_vector <- 1:10 
# Matrix with numerics from 1 up to 9
my_matrix <- matrix(1:9, ncol = 3)
# First 10 elements of the built-in data frame 'mtcars'
my_df <- mtcars[1:10,]

# Construct 'my_list' with these different elements:
my_list <- list("vec" =my_vector, "mat"=my_matrix, "df"=my_df)
my_list
## $vec
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mat
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
## 
## $df
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
#  The list 'shining_list'
actors <- c("Jack Nicholson","Shelley Duvall","Danny Lloyd","Scatman Crothers","Barry Nelson")
reviews <- data.frame(
                      scores=  c(4.5,4.0,5.0), 
                      sources= c("IMDb1","IMDb2","IMDb3"), 
                      comments=c("Best Horror Film I Have Ever Seen",
                                 "truly brilliant and scary film from Stanley Kubrick",
                                 "A masterpiece of psychological horror"
                                 )
                      )
shining_list <- list("moviename" = "The Shining", "actors"= actors, "reviews"= reviews)
shining_list
## $moviename
## [1] "The Shining"
## 
## $actors
## [1] "Jack Nicholson"   "Shelley Duvall"   "Danny Lloyd"     
## [4] "Scatman Crothers" "Barry Nelson"    
## 
## $reviews
##   scores sources                                            comments
## 1    4.5   IMDb1                   Best Horror Film I Have Ever Seen
## 2    4.0   IMDb2 truly brilliant and scary film from Stanley Kubrick
## 3    5.0   IMDb3               A masterpiece of psychological horror
# 'last_actor'
last_actor <- shining_list$actors[5]
last_actor
## [1] "Barry Nelson"
# 'second_review'
second_review <- shining_list[[3]][2,]
second_review
##   scores sources                                            comments
## 2      4   IMDb2 truly brilliant and scary film from Stanley Kubrick
# We forgot something; add the year to shining_list
shining_list_full <- c(shining_list, "year" = c(1980))
shining_list_full
## $moviename
## [1] "The Shining"
## 
## $actors
## [1] "Jack Nicholson"   "Shelley Duvall"   "Danny Lloyd"     
## [4] "Scatman Crothers" "Barry Nelson"    
## 
## $reviews
##   scores sources                                            comments
## 1    4.5   IMDb1                   Best Horror Film I Have Ever Seen
## 2    4.0   IMDb2 truly brilliant and scary film from Stanley Kubrick
## 3    5.0   IMDb3               A masterpiece of psychological horror
## 
## $year
## [1] 1980
# a look at shining_list
str(shining_list_full)
## List of 4
##  $ moviename: chr "The Shining"
##  $ actors   : chr [1:5] "Jack Nicholson" "Shelley Duvall" "Danny Lloyd" "Scatman Crothers" ...
##  $ reviews  :'data.frame':   3 obs. of  3 variables:
##   ..$ scores  : num [1:3] 4.5 4 5
##   ..$ sources : Factor w/ 3 levels "IMDb1","IMDb2",..: 1 2 3
##   ..$ comments: Factor w/ 3 levels "A masterpiece of psychological horror",..: 2 3 1
##  $ year     : num 1980