1 Basics

1.1 Access workspace

Get working directory

# use getwd() to find out where you are
getwd()
## [1] "/Users/jcasey/Documents/Teaching/Marine Community Ecology/Spring 2023/Coding"

1.2 Install and use packages

# install the tidyverse package by running install.packages()
# you typically don't need to set repos when installing a package
install.packages("tidyverse", repos = "http://cran.us.r-project.org")
# load the tidyverse package in your current R session
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

1.3 Common operators

  • “?” help
  • “<-” assign value
  • “==” values on left match values on right
# get help
?install.packages

# assign value
one <- 1
one
## [1] 1
# check matching values (ouput will be TRUE or FALSE)
5==5
## [1] TRUE
5==6
## [1] FALSE
one==5
## [1] FALSE

1.4 Use R as calculator

# R is a basic calculator
4*4
## [1] 16
23*sin(19)/sqrt(12) + log(58)
## [1] 5.055557

1.5 Name objects

Assign values to objects

  • you can name objects however you want, but misspelling object names is one of the most common errors
  • develop a standardized system for naming objects!
  • for example, separate object names by “.” or “_” and don’t use capitals (e.g. “fish.data”)
# assign obj1 the value of 4*4
obj1 <- 4*4
obj1
## [1] 16
# assign obj2 a more complex value
obj2 <- 23/sqrt(58)*21^2
obj2
## [1] 1331.841
# assign obj3 a non-numerical value
obj3 <- "fish"
obj3
## [1] "fish"

Combine objects

  • you cannot combine a numerical and non-numerical object
# combine numerical objects
obj4 <- obj1 + obj2
obj4
## [1] 1347.841
# combine two non-numerical objects with a function(), like paste()
obj5 <- "goby"
obj6 <- paste(obj3, obj5)
obj6
## [1] "fish goby"

2 Vectors

2.1 Work with functions

Familiarize with function format

  • functions are usually denoted by parentheses
  • functions allow you to perform basic operations like means, standard deviations, lengths, etc
# c() is the concatenate function - use it to combine numbers or words
conc.vals <- c(1,2,3,4,5)
conc.vals
## [1] 1 2 3 4 5
# find the mean of conc.vals
mean(conc.vals)
## [1] 3

2.2 Create vector

Create vector with a sequence

# use seq() to create sequences
seq.obj <- seq(from = 1, to = 10, by = 1)
seq.obj
##  [1]  1  2  3  4  5  6  7  8  9 10
# alternative way to create the same sequence
seq.obj.1 <- seq(1:10)
seq.obj.1[1:10]
##  [1]  1  2  3  4  5  6  7  8  9 10

Create vector with repetitions

# use rep() to create repetitions
rep.obj <- rep(1, times = 20)
rep.obj
##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# alternative way to create the same repetition
rep.obj.each <- rep(1:10, each = 2)
rep.obj.each
##  [1]  1  1  2  2  3  3  4  4  5  5  6  6  7  7  8  8  9  9 10 10
# you can also repeat characters
rep.obj.ch <- rep("fish", times = 20)
rep.obj.ch
##  [1] "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish"
## [11] "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish"

Create vector of repeat sequences

# create repetitions of sequences
rep.obj.seq <- rep(seq(0, 5, by = 1), 4)
rep.obj.seq
##  [1] 0 1 2 3 4 5 0 1 2 3 4 5 0 1 2 3 4 5 0 1 2 3 4 5

2.3 Combine vectors

Combine numerical vectors

# add two numerical vectors
reps.comb <- rep.obj + rep.obj.each

# multiply a numerical vector
reps.mult5 <- rep.obj.seq*5
reps.mult5
##  [1]  0  5 10 15 20 25  0  5 10 15 20 25  0  5 10 15 20 25  0  5 10 15 20 25
# take the square root of a numerical vector
reps.sqrt <- sqrt(rep.obj.seq)
reps.sqrt
##  [1] 0.000000 1.000000 1.414214 1.732051 2.000000 2.236068 0.000000 1.000000
##  [9] 1.414214 1.732051 2.000000 2.236068 0.000000 1.000000 1.414214 1.732051
## [17] 2.000000 2.236068 0.000000 1.000000 1.414214 1.732051 2.000000 2.236068

Combine vectors of different classes

# combining a numerical and non-numerical vector does not work well
reps.comb.ch <- c(rep.obj, rep.obj.ch)
reps.comb.ch
##  [1] "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   
## [11] "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"    "1"   
## [21] "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish"
## [31] "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish" "fish"
# again, combining a numerical and non-numerical vector does not work well
reps.comb.ch.paste <- paste(rep.obj, rep.obj.ch)
head(reps.comb.ch.paste)
## [1] "1 fish" "1 fish" "1 fish" "1 fish" "1 fish" "1 fish"

2.4 Work with elements

Work with distinct elements

# call one of your vectors
seq.obj
##  [1]  1  2  3  4  5  6  7  8  9 10
# select only the third element
element3 <- seq.obj[3]
element3
## [1] 3

Select multiple elements

# use c() or x:y to select multiple elements
three.elements <- seq.obj[c(3,4,5)]
three.elements
## [1] 3 4 5
# get the last 8 elements
last.eight <- seq.obj[2:10]
last.eight
## [1]  2  3  4  5  6  7  8  9 10
# alternative way to get the last 8 elements
last.eight.2 <- seq.obj[-1]
last.eight.2
## [1]  2  3  4  5  6  7  8  9 10

2.5 Apply logic

Extract values based on a condition with logical operators

  • “>=” values equal or greater
  • “<=” values equal or smaller
  • “<” values smaller and > values greater
  • “==” values equal
  • “!=” values not equal
# create a new vector with random values
random.vals <- rpois(30, lambda = 5)
random.vals
##  [1] 2 7 5 5 8 6 3 3 6 9 4 7 5 4 8 5 2 4 6 6 7 2 5 3 7 6 4 3 9 5
# select values that are greater than 4
selected.vals <- random.vals[random.vals > 4]
selected.vals
##  [1] 7 5 5 8 6 6 9 7 5 8 5 6 6 7 5 7 6 9 5
# select values that are not 6
values.not.6 <- random.vals[random.vals != 6]
values.not.6
##  [1] 2 7 5 5 8 3 3 9 4 7 5 4 8 5 2 4 7 2 5 3 7 4 3 9 5

Use Boolean expression (logical statement) that is either true or false

  • three symbols: “&” = AND, “|” = OR, “!” = NOT
  • can be applied separately and in combination
  • NOT (“!”) has precedence over AND (“&”), which has precedence over OR (“|”)
# call random vector
random.vals
##  [1] 2 7 5 5 8 6 3 3 6 9 4 7 5 4 8 5 2 4 6 6 7 2 5 3 7 6 4 3 9 5
# get values between 4 and 8
boolean.and <- random.vals[random.vals >= 4 & random.vals <= 8]
boolean.and
##  [1] 7 5 5 8 6 6 4 7 5 4 8 5 4 6 6 7 5 7 6 4 5
# get values that are 3 or 5
boolean.or <- random.vals[random.vals == 3 | random.vals == 5]
boolean.or
##  [1] 5 5 3 3 5 5 5 3 3 5

Assign a new value to replace a value

# replace the first value of the vector with 1000
random.vals[1] <- 1000
random.vals
##  [1] 1000    7    5    5    8    6    3    3    6    9    4    7    5    4    8
## [16]    5    2    4    6    6    7    2    5    3    7    6    4    3    9    5
# replace values >5 with 1000
random.vals[random.vals > 5] <- 1000

2.6 Non-numerical vector

# create a vector of words
fish.comm <- paste(rep(c("whitefish", "bluefish", "yellowfish"), 5))
fish.comm
##  [1] "whitefish"  "bluefish"   "yellowfish" "whitefish"  "bluefish"  
##  [6] "yellowfish" "whitefish"  "bluefish"   "yellowfish" "whitefish" 
## [11] "bluefish"   "yellowfish" "whitefish"  "bluefish"   "yellowfish"
# remove whitefish
white.extinct <- fish.comm[fish.comm != "whitefish"]
white.extinct
##  [1] "bluefish"   "yellowfish" "bluefish"   "yellowfish" "bluefish"  
##  [6] "yellowfish" "bluefish"   "yellowfish" "bluefish"   "yellowfish"
# replace yellowfish with greenfish
white.extinct[white.extinct == "yellowfish"] <- "greenfish"
white.extinct
##  [1] "bluefish"  "greenfish" "bluefish"  "greenfish" "bluefish"  "greenfish"
##  [7] "bluefish"  "greenfish" "bluefish"  "greenfish"

2.7 Order values

# create a vector of values
# rpois() creates a random sample of integer values following a Poisson distribution
more.vals <- rpois(20, 5)
more.vals
##  [1]  4  3  7  7  5  5  4  7  2 10  9  7  5  3  3  2  6  8  8  5
# use sort() to sort values in increasing order
more.vals.sorted <- sort(more.vals)
more.vals.sorted
##  [1]  2  2  3  3  3  4  4  5  5  5  5  6  7  7  7  7  8  8  9 10
# sort the values in decreasing order using "decreasing = TRUE" or the rev() function
more.vals.sorted.dec <- rev(sort(more.vals))
more.vals.sorted.dec
##  [1] 10  9  8  8  7  7  7  7  6  5  5  5  5  4  4  3  3  3  2  2

2.8 Missing data

# create vector with Poisson distribution
fish.numbers <- rpois(20, 5)
fish.numbers
##  [1]  6  4 12  3  3  3  7  1  6  5  5  6  7  7  4  7  1  9  5  4
# replace some values with NAs
fish.numbers[c(5,15)] <- NA
fish.numbers
##  [1]  6  4 12  3 NA  3  7  1  6  5  5  6  7  7 NA  7  1  9  5  4
# create a vector of an area
area <- rep(100, 20)

# calculate fish density
fish.density <- fish.numbers/area
fish.density
##  [1] 0.06 0.04 0.12 0.03   NA 0.03 0.07 0.01 0.06 0.05 0.05 0.06 0.07 0.07   NA
## [16] 0.07 0.01 0.09 0.05 0.04
# calculate mean fish density
# use na.rm = TRUE to exclude NAs
mean.density.nona <- mean(fish.density, na.rm = TRUE)
mean.density.nona
## [1] 0.05444444

3 Data

3.1 Check and convert data classes

Data classes in R

  1. numeric: all numbers
  2. integer: whole numbers without decimals
  3. logical: TRUE or FALSE (and NA)
  4. character: character strings (letters, symbols, numbers), which can also be factors
# create vector of numbers
numbers <- seq(1:20)

# find data class using class() or str()
class(numbers)
## [1] "integer"
# check data class using "is" and "as"
is.numeric(numbers)
## [1] TRUE
is.character(numbers)
## [1] FALSE
# turn data into characters
numbers.char <- as.character(numbers)
numbers.char
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20"
#check that data class is character
is.character(numbers.char)
## [1] TRUE

3.2 Work with matrices

Understand data structures

# data structure: scalar
scalar <- 23
scalar
## [1] 23
# data structure: vector
many.scalars <- rep(1:23, 2) 
many.scalars
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23  1  2
## [26]  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# data structure: matrix
the.matrix <- matrix(1:24, nrow = 4, byrow = FALSE)
the.matrix
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    5    9   13   17   21
## [2,]    2    6   10   14   18   22
## [3,]    3    7   11   15   19   23
## [4,]    4    8   12   16   20   24

Assign names to columns and rows

# create non-numerical matrix
the.matrix <- matrix(rpois(24, 5), nrow = 6, byrow = FALSE)
the.matrix
##      [,1] [,2] [,3] [,4]
## [1,]    9    9    2    3
## [2,]    3    8    6    4
## [3,]    2    6    5    2
## [4,]    5    9    7    7
## [5,]    7    4    9    4
## [6,]    5    7    6    3
# assign column names
colnames(the.matrix) <- c("whitefish", "bluefish", "greenfish", "yellowfish")

# assign row names
rownames(the.matrix) <- c("site1", "site2", "site3", "site4", "site5", "site6")
the.matrix
##       whitefish bluefish greenfish yellowfish
## site1         9        9         2          3
## site2         3        8         6          4
## site3         2        6         5          2
## site4         5        9         7          7
## site5         7        4         9          4
## site6         5        7         6          3

3.3 Data frames

Combine data of different classes

# convert matrix to data frame
fish.df <- as.data.frame(the.matrix)
fish.df
##       whitefish bluefish greenfish yellowfish
## site1         9        9         2          3
## site2         3        8         6          4
## site3         2        6         5          2
## site4         5        9         7          7
## site5         7        4         9          4
## site6         5        7         6          3
# the dollar sign ($) operator indicates a column in a data frame
fish.df$whitefish
## [1] 9 3 2 5 7 5
# specify a value using square brackets
# show the first row
fish.df[1,] 
##       whitefish bluefish greenfish yellowfish
## site1         9        9         2          3
# make a vector with 6 site locations
locations <- c("Australia", "Indonesia", "Philippines", "Fiji", "Solomons", "Papua New Guinea")
locations
## [1] "Australia"        "Indonesia"        "Philippines"      "Fiji"            
## [5] "Solomons"         "Papua New Guinea"
# use $ to add the location column, thus adding a character vector to a data frame
fish.df$location <- locations
fish.df
##       whitefish bluefish greenfish yellowfish         location
## site1         9        9         2          3        Australia
## site2         3        8         6          4        Indonesia
## site3         2        6         5          2      Philippines
## site4         5        9         7          7             Fiji
## site5         7        4         9          4         Solomons
## site6         5        7         6          3 Papua New Guinea

3.4 Tibbles

Tibbles offer more information

# check structure of data frame
str(fish.df)
## 'data.frame':    6 obs. of  5 variables:
##  $ whitefish : int  9 3 2 5 7 5
##  $ bluefish  : int  9 8 6 9 4 7
##  $ greenfish : int  2 6 5 7 9 6
##  $ yellowfish: int  3 4 2 7 4 3
##  $ location  : chr  "Australia" "Indonesia" "Philippines" "Fiji" ...
# convert data frame to tibble
fish.tibble <- as_tibble(fish.df)

# view information provided by tibble
fish.tibble
## # A tibble: 6 × 5
##   whitefish bluefish greenfish yellowfish location        
##       <int>    <int>     <int>      <int> <chr>           
## 1         9        9         2          3 Australia       
## 2         3        8         6          4 Indonesia       
## 3         2        6         5          2 Philippines     
## 4         5        9         7          7 Fiji            
## 5         7        4         9          4 Solomons        
## 6         5        7         6          3 Papua New Guinea

3.5 Load and save data

Work with csv files

# use the write.csv() function to save a data frame or tibble in your working directory
# row.names = FALSE specifies to not include a column of row numbers in the csv
write.csv(fish.tibble, file = "data/fishtibble.csv", row.names = FALSE)

# load that csv file back into R
loaded.fish.tibble <- read.csv(file = "data/fishtibble.csv")
loaded.fish.tibble
##   whitefish bluefish greenfish yellowfish         location
## 1         9        9         2          3        Australia
## 2         3        8         6          4        Indonesia
## 3         2        6         5          2      Philippines
## 4         5        9         7          7             Fiji
## 5         7        4         9          4         Solomons
## 6         5        7         6          3 Papua New Guinea