Sources
Previous Contributors
Type directly into the console (best when you don't want to save the code) or type into the script - then run (CTRL + ENTER)
A script is a plain text file with R commands in it. This will be where you save the code that you are writing - the file will end in the extension .R
+ Addition - Subtraction * Multiplication / Division ^ Exponentiation %% Modulus (finds remainder) %/% Integer division (leaves off remainder)2+2
## [1] 4
7-2
## [1] 5
4^3
## [1] 64
9/2
## [1] 4.5
9%/%2
## [1] 4
9%%2
## [1] 1
2+5*2/3
## [1] 5.333333
2+(5*(2/3))
## [1] 5.333333
< Less than<= Less than or equal to > Greater than>= Greater than or equal to== Exactly equal to != Not equal to ! NOT| OR& AND2<3
## [1] TRUE
6>=4
## [1] TRUE
(2+2)==5
## [1] FALSE
9<=15 & 10>11
## [1] FALSE
9<=15 | 10>11
## [1] TRUE
x <- 1
x
## [1] 1
x*2
## [1] 2
x + 4
## [1] 5
y <- x + 4
y
## [1] 5
x <- x + 4
x
## [1] 5
x <- 6
y <- 38
y-x
## [1] 32
x*y
## [1] 228
y%%x
## [1] 2
z <- y + x
z
## [1] 44
x <- 10
y <- 3
x==y
## [1] FALSE
x>y
## [1] TRUE
z <- x<=y
z
## [1] FALSE
c()x <- c(1,2,3,4,5)
x
## [1] 1 2 3 4 5
y <- c(4,2)
y
## [1] 4 2
z <- c()
z
## NULL
x <- c(1,2,3,4,5)
x + 1
## [1] 2 3 4 5 6
x*2
## [1] 2 4 6 8 10
y <- c(1,4,3,2,5)
x==y
## [1] TRUE FALSE TRUE FALSE TRUE
x>y
## [1] FALSE FALSE FALSE TRUE FALSE
A function is a stored object that performs a task given some inputs (called arguments). R has many functions already available, but you can also write your own functions.
Functions are used in the format: name_of_function(inputs)
The output of a function can be saved to an object:
output <- name_of_function(inputs)
sum() to take the sum of all elements in an object:x <- c(45,3,9,99,0)
sum(x)
## [1] 156
z <- sum(x)
z
## [1] 156
prod() to multiply all elements in an object:prod(45,3,9,99,0)
## [1] 0
mean() to take the mean of all elements in an object:mean(45,3,9,99,0)
## [1] 45
help(mean)
? before a function name to view the help file ?mean # Same as help(mean)
?? to search for functions; e.g. search for any function whose help files contain the word "sequence"??sequence
What is the median of 34, 16, 105, and 27?
Remember: functions are often named intuitively.
What does the function range() do, and what is the sample example in the help file?
Is mean(4, 5) different than mean(c(4, 5))?
x <- 1
x <- c(5,6,7,8,9,10)
x
## [1] 5 6 7 8 9 10
x <- 5:10
x
## [1] 5 6 7 8 9 10
x <- seq(from = 5, to = 10, by = 1)
x
## [1] 5 6 7 8 9 10
is.vector(x)
## [1] TRUE
y <- c()
is.vector(y)
## [1] FALSE
z <- vector(length = 5, mode = "numeric")
z
## [1] 0 0 0 0 0
x <- c(1,2,3)
y <- c(4,5)
z <- c(x,y)
z
## [1] 1 2 3 4 5
z <- c(monday = 1, tuesday = 2, wednesday = 3, thursday = 4, friday = 5)
z
## monday tuesday wednesday thursday friday
## 1 2 3 4 5
names(z)
## [1] "monday" "tuesday" "wednesday" "thursday" "friday"
z <- c(1,2,3,4,5)
names(z)
## NULL
# Names need to be a `character` vector
names(z) <- c("monday", "tuesday", "wednesday", "thursday", "friday")
names(z)
## [1] "monday" "tuesday" "wednesday" "thursday" "friday"
z
## monday tuesday wednesday thursday friday
## 1 2 3 4 5
z <- unname(z)
z
## [1] 1 2 3 4 5
[ ]height <- c(76, 72, 74, 74, 78)
height
## [1] 76 72 74 74 78
height[1] # extract the 1st element in the vector
## [1] 76
height[5] # extract the 5th element
## [1] 78
height[6] # There is no 6th element
## [1] NA
x <- height[1]
x
## [1] 76
y <- height[5]
y
## [1] 78
height[c(1,2,3)]
## [1] 76 72 74
height[1:3]
## [1] 76 72 74
- operator.height <- c(76, 72, 74, 74, 78)
height[-1]
## [1] 72 74 74 78
height[c(-1,-5)]
## [1] 72 74 74
height_new <- height[-1]
temp <- c(monday = 28.1, tuesday = 28.5, wednesday = 29.0, thursday = 30.1, friday = 30.2)
temp
## monday tuesday wednesday thursday friday
## 28.1 28.5 29.0 30.1 30.2
temp["wednesday"] # Always use "" when subsetting by name
## wednesday
## 29
temp[3]
## wednesday
## 29
# Try subseting with only wednesday (without "").
# What happens?
y <- 5:50
y
## [1] 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
## [24] 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
# extract all elements less than or equal to 10
y[y <= 10]
## [1] 5 6 7 8 9 10
# extract all elements less than 10 and not equal to 5
y[y < 10 & y != 5]
## [1] 6 7 8 9
As a general rule single brackets [] in R has a preserving effect while subsetting data related objects.
Double brackets [[]], on the other hand, has a simpfiying effect in which it returns the simplest possible data structure that can represent the output.
z <- c(monday = 1, tuesday = 2, wednesday = 3, thursday = 4, friday = 5)
z[1] # returns name of the element and the element itself
## monday
## 1
z[[1]] # returns only the element
## [1] 1
z[1:3]
## monday tuesday wednesday
## 1 2 3
z[[1:3]] # Can't subset multiple elements
## Error in z[[1:3]]: attempt to select more than one element in vectorIndex
What are the 9th and 12th positions of the vector seq(1, 27, 0.5)?
Create a vector that includes all even number between 4 and 34 and name it a.
Extract all elements of a that are greater than or equal to 17.
x <- c(1,2,3,4,5)
y <- c("a", "b", "c","d","e")
length(x) # "length"" shows the number of elements in an object
## [1] 5
length(y)
## [1] 5
mode(x)
## [1] "numeric"
mode(y)
## [1] "character"
x <- c(1,2,3,4,5)
is.double(x)
## [1] TRUE
y <- c(1.25, 3.755, 9.001)
is.double(y)
## [1] TRUE
z1 <- c(1L,2L,3L,4L,5L)
is.integer(z1)
## [1] TRUE
z2 <- as.integer(y)
typeof(1:3) # `:` creates an integer vector
## [1] "integer"
x <- c("Bilgecan is the best TA")
x
## [1] "Bilgecan is the best TA"
length(x)
## [1] 1
y <- c("Bilgecan", "is", "the", "best", "TA")
y
## [1] "Bilgecan" "is" "the" "best" "TA"
y[1]
## [1] "Bilgecan"
y[c(2,4)]
## [1] "is" "best"
x <- c(1,2,3,4,5)
y <- x<3
y
## [1] TRUE TRUE FALSE FALSE FALSE
z <- c(TRUE, FALSE, T, F)
z
## [1] TRUE FALSE TRUE FALSE
x
## [1] 1 2 3 4 5
x <- as.character(x)
x
## [1] "1" "2" "3" "4" "5"
z <- as.character(z)
z
## [1] "TRUE" "FALSE" "TRUE" "FALSE"
x <- c("1","-2", "3.25", "A")
x <- as.numeric(x)
## Warning: NAs introduced by coercion
x
## [1] 1.00 -2.00 3.25 NA
y <- c(T,F,F,T,T)
y <- as.numeric(y)
y
## [1] 1 0 0 1 1
x <- c(0, 0, 5, 79, 3500)
x <- as.logical(x)
x
## [1] FALSE FALSE TRUE TRUE TRUE
y <- c("TRUE", "F", "1", "0", "35000")
y <- as.logical(y)
y
## [1] TRUE FALSE NA NA NA
All elements of an atomic vector must be the same type, so when you attempt to combine different types they will be coerced to the most flexible type. Types from least to most flexible are: logical, integer, double, and character.
attributes(temp)
## $names
## [1] "monday" "tuesday" "wednesday" "thursday" "friday"
attributes(x)
## NULL
class(x)
## [1] "logical"
mode(x)
## [1] "logical"
typeof(x)
## [1] "logical"
x <- c(1,2,3,4,5)
x <- factor(x)
x
## [1] 1 2 3 4 5
## Levels: 1 2 3 4 5
z <- c("a", "b", "c", "d")
z <- as.factor(z)
z
## [1] a b c d
## Levels: a b c d
z <- factor(z, levels = c("c","b","a","d"), ordered = T)
z
## [1] a b c d
## Levels: c < b < a < d
# You can't assign values that is outside of its levels to a factor
z[6] <- "e"
## Warning in `[<-.factor`(`*tmp*`, 6, value = "e"): invalid factor level, NA
## generated
f <- c(z,x) # You can't combine factors
typeof(f)
## [1] "integer"
attributes(z)
## $levels
## [1] "c" "b" "a" "d"
##
## $class
## [1] "ordered" "factor"
z[1:3] # preserving
## [1] a b c
## Levels: c < b < a < d
z[1:3, drop = T] # simplifying
## [1] a b c
## Levels: c < b < a
m <- matrix(nrow = 2, ncol = 2)
m
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
m <- matrix(c(1,2,3,4), nrow = 2, ncol = 2)
m
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
m <- matrix(c(1,2,3,4), nrow = 2, ncol = 2, byrow = T)
m
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
is.matrix(m)
## [1] TRUE
v1 <- c(1,2,3)
v2 <- c(4,5,6)
m <- cbind(v1, v2)
m
## v1 v2
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
m <- rbind(v1, v2)
m
## [,1] [,2] [,3]
## v1 1 2 3
## v2 4 5 6
is.matrix(m)
## [1] TRUE
names(m)
## NULL
colnames(m)
## NULL
rownames(m)
## [1] "v1" "v2"
colnames(m) <- c("a", "b", "c")
m
## a b c
## v1 1 2 3
## v2 4 5 6
m <- matrix(c(1,2,3,4), nrow = 2, ncol = 2)
m + 1
## [,1] [,2]
## [1,] 2 4
## [2,] 3 5
m*2
## [,1] [,2]
## [1,] 2 6
## [2,] 4 8
m2 <- matrix(c(5,6,7,8), nrow = 2, ncol = 2)
m + m2
## [,1] [,2]
## [1,] 6 10
## [2,] 8 12
m*m2 # this is not matrix multiplication
## [,1] [,2]
## [1,] 5 21
## [2,] 12 32
m %*% m2
## [,1] [,2]
## [1,] 23 31
## [2,] 34 46
january <- matrix(1:31, nrow = 5, ncol = 7, byrow = T)
## Warning in matrix(1:31, nrow = 5, ncol = 7, byrow = T): data length [31] is
## not a sub-multiple or multiple of the number of rows [5]
january
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 2 3 4 5 6 7
## [2,] 8 9 10 11 12 13 14
## [3,] 15 16 17 18 19 20 21
## [4,] 22 23 24 25 26 27 28
## [5,] 29 30 31 1 2 3 4
january[1,2] # first row, second column
## [1] 2
january[5,4] # fifth row, fourth column
## [1] 1
january[1,] # entire first row
## [1] 1 2 3 4 5 6 7
january[,3] # entire third column
## [1] 3 10 17 24 31
colnames(january) <- c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")
rownames(january) <- c("week 1", "week 2", "week 3", "week 4", "week 5")
january
## monday tuesday wednesday thursday friday saturday sunday
## week 1 1 2 3 4 5 6 7
## week 2 8 9 10 11 12 13 14
## week 3 15 16 17 18 19 20 21
## week 4 22 23 24 25 26 27 28
## week 5 29 30 31 1 2 3 4
january[,"monday"] # Remember to use "" when subsetting by name!
## week 1 week 2 week 3 week 4 week 5
## 1 8 15 22 29
january[,"sunday"]
## week 1 week 2 week 3 week 4 week 5
## 7 14 21 28 4
january["week 1",]
## monday tuesday wednesday thursday friday saturday sunday
## 1 2 3 4 5 6 7
january["week 3", "thursday"]
## [1] 18
january<15 # turns whole matrix to logical
## monday tuesday wednesday thursday friday saturday sunday
## week 1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## week 2 TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## week 3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## week 4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## week 5 FALSE FALSE FALSE TRUE TRUE TRUE TRUE
january[january<15]
## [1] 1 8 2 9 3 10 4 11 1 5 12 2 6 13 3 7 14 4
index <- january[,"tuesday"]<15
index
## week 1 week 2 week 3 week 4 week 5
## TRUE TRUE FALSE FALSE FALSE
january[index, "tuesday"]
## week 1 week 2
## 2 9
index <- january[,2]<15
january[index, 2]
## week 1 week 2
## 2 9
[,] has a simplfiying effect.# Following returns only the elements stored in the matrix
january[1,2]
## [1] 2
january[1,]
## monday tuesday wednesday thursday friday saturday sunday
## 1 2 3 4 5 6 7
january[,1]
## week 1 week 2 week 3 week 4 week 5
## 1 8 15 22 29
drop=F to preserve the exact structure.january[1,2, drop = F]
## tuesday
## week 1 2
january <- matrix(1:31, nrow = 5, ncol = 7, byrow = T)
## Warning in matrix(1:31, nrow = 5, ncol = 7, byrow = T): data length [31] is
## not a sub-multiple or multiple of the number of rows [5]
colnames(january) <- c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")
rownames(january) <- c("week 1", "week 2", "week 3", "week 4", "week 5")
x <- array(c(1,2,3), dim = 3)
x
## [1] 1 2 3
is.vector(x)
## [1] FALSE
is.array(x)
## [1] TRUE
m <- array(c(1,2,3,4), dim = c(2,2))
m
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
is.array(m)
## [1] TRUE
is.matrix(m)
## [1] TRUE
a <- array(c(1,2,3,4,5,6,7,8), dim = c(2,2,2))
a
## , , 1
##
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
##
## , , 2
##
## [,1] [,2]
## [1,] 5 7
## [2,] 6 8
a[,,1]
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
a[1,2,]
## [1] 3 7
a[1,2,2]
## [1] 7
Most data you are going to be working with in R is going to be in a "table" form but will also include variables of different types. So matrices and vectors do not meet all our needs.
Data frames can be thought of as collection of vectors with different types.
There are many pre-loaded data sets in R. We'll be working with iris, famously used by R.A. Fisher in 1936.
iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
is.data.frame(iris)
## [1] TRUE
class(iris)
## [1] "data.frame"
head() - see first 5 rowstail() - see last 5 rowsdim() - dimensions (# rows, # columns)nrow() - number of rowsncol() - number of columnsstr() - structure of any objectrownames() - row names colnames() - column namestrees and mtcars are other available data sets.
How many rows does the trees and mtcars data frames have?
How many columns? What are the column names?
Using the str() function, how many species does iris data set have?
What classes are each of the columns for the two data sets?
iris[1,3]
## [1] 1.4
head(iris[,4])
## [1] 0.2 0.2 0.2 0.2 0.2 0.4
tail(iris[,"Species"])
## [1] virginica virginica virginica virginica virginica virginica
## Levels: setosa versicolor virginica
head(iris$Petal.Width)
## [1] 0.2 0.2 0.2 0.2 0.2 0.4
head(iris[,4])
## [1] 0.2 0.2 0.2 0.2 0.2 0.4
iris[3:5,4] # simplifying
## [1] 0.2 0.2 0.2
iris[3:5,4, drop = F] # preserving
## Petal.Width
## 3 0.2
## 4 0.2
## 5 0.2
iris[1,] # preserving
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
iris[,1] # simplify
## [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8 5.7 5.4
## [18] 5.1 5.7 5.1 5.4 5.1 4.6 5.1 4.8 5.0 5.0 5.2 5.2 4.7 4.8 5.4 5.2 5.5
## [35] 4.9 5.0 5.5 4.9 4.4 5.1 5.0 4.5 4.4 5.0 5.1 4.8 5.1 4.6 5.3 5.0 7.0
## [52] 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 5.0 5.9 6.0 6.1 5.6 6.7 5.6 5.8
## [69] 6.2 5.6 5.9 6.1 6.3 6.1 6.4 6.6 6.8 6.7 6.0 5.7 5.5 5.5 5.8 6.0 5.4
## [86] 6.0 6.7 6.3 5.6 5.5 5.5 6.1 5.8 5.0 5.6 5.7 5.7 6.2 5.1 5.7 6.3 5.8
## [103] 7.1 6.3 6.5 7.6 4.9 7.3 6.7 7.2 6.5 6.4 6.8 5.7 5.8 6.4 6.5 7.7 7.7
## [120] 6.0 6.9 5.6 7.7 6.3 6.7 7.2 6.2 6.1 6.4 7.2 7.4 7.9 6.4 6.3 6.1 7.7
## [137] 6.3 6.4 6.0 6.9 6.7 6.9 5.8 6.8 6.7 6.7 6.3 6.5 6.2 5.9
What is the 9th entry of the Sepal.Width column of the iris data frame?
Subset the 17th row of the iris data frame.
Create an object with the 1st, 4th and 7th rows of the iris data frame.
Use the seq() function to subset all odd rows in the iris data frame.
Remove the sepal width column from iris data frame, and assign this to an object.
What do you get when you use the length() function on a data frame? How is that different from a matrix?
Extract the rows of iris that has larger than 3 petal length.
d <- data.frame(x = c(5.6, 2.45, 7.09), y = c("a","b","c"), z = c(1,2,3))
d
## x y z
## 1 5.60 a 1
## 2 2.45 b 2
## 3 7.09 c 3
str(d)
## 'data.frame': 3 obs. of 3 variables:
## $ x: num 5.6 2.45 7.09
## $ y: Factor w/ 3 levels "a","b","c": 1 2 3
## $ z: num 1 2 3
d <- data.frame(x = c(5.6, 2.45, 7.09), y = c("a","b","c"), z = c(1,2,3),
stringsAsFactors = F)
str(d)
## 'data.frame': 3 obs. of 3 variables:
## $ x: num 5.6 2.45 7.09
## $ y: chr "a" "b" "c"
## $ z: num 1 2 3
d$t <- as.factor(c(10,20,30))
d
## x y z t
## 1 5.60 a 1 10
## 2 2.45 b 2 20
## 3 7.09 c 3 30
str(d)
## 'data.frame': 3 obs. of 4 variables:
## $ x: num 5.6 2.45 7.09
## $ y: chr "a" "b" "c"
## $ z: num 1 2 3
## $ t: Factor w/ 3 levels "10","20","30": 1 2 3
d1 <- data.frame(x = c(5.6, 2.45, 7.09), y = c("a","b","c"),
stringsAsFactors = F)
d2 <- data.frame(z = c(1,2,3), t = as.factor(c(10,20,30)))
d3 <- cbind(d1,d2)
str(d3)
## 'data.frame': 3 obs. of 4 variables:
## $ x: num 5.6 2.45 7.09
## $ y: chr "a" "b" "c"
## $ z: num 1 2 3
## $ t: Factor w/ 3 levels "10","20","30": 1 2 3
# When binding two data frames by row, column names must match!
d4 <- rbind(d1,d2)
## Error in match.names(clabs, names(xi)): names do not match previous names
d2 <- data.frame(x = c(1,2,3), y = as.factor(c(10,20,30)))
d4 <- rbind(d1,d2)
# Remember coercison
str(d4)
## 'data.frame': 6 obs. of 2 variables:
## $ x: num 5.6 2.45 7.09 1 2 3
## $ y: chr "a" "b" "c" "10" ...
Create a two column data frame with c(1,2,3) and c(4,5,6,7). What was the result?
Create a data frame with c("a","b","c") and c(1,2,3) using dataframe directly. Create another one using cbind first and then dataframe (dataframe(cbind())). What is the difference?
R is not a spreadsheet program, so it's not great for direct data entry. It's best to start with spreadsheets for data entry and storage, and to import spreadhseets into R for data visualization and analysis.
.csv (comma separated values) files are often the preferred format to import into R.
Before we do that, we will need to look at concept of your working directory.
getwd()
## [1] "/Users/bilgecan/Documents/Bilgcan.github.io/RBootcamp"
setwd("/Users/bilgecan/Desktop/")
blue.hill <- read.csv("BlueHill.csv")
head(blue.hill)
# Reading data without changing working directory
blue.hill <- read.csv("/Users/bilgecan/Desktop/BlueHill.csv")
Average Height # BAD - this won't work
Average.Height # OK - this will work but isn't best practice (See style guide)
average.height # BETTER - this will work, but will be slow to type repeatedly
avg.height # GOOD!
Import the data frame BlueHill.csv and read it into R.
What is the mean daily temperature across all years and stations (MNTM)?
Subset the fourth column and assign it to an object. What is its class and mode?
Subset the first row and assign it to an object. What is its class? Does it have a single mode?
Considering the third and fourth questions, how is subsetting rows vs columns different for data frames compared to matrices?
# Write the file
write.csv(iris, file = "iris.csv", row.names = FALSE)
# Check in your working directory for the new file
list.files()
List can include elements of any type and class simultaneusly.
A data frame is a list of vectors.
Unless specified, data in lists are not stored in a table format.
d <- data.frame(x = c(5.6, 2.45, 7.09), y = c("a","b","c"), z = as.factor(c(1,2,3)))
l <- list(x = c(5.6, 2.45, 7.09), y = c("a","b","c"), z = as.factor(c(1,2,3)))
l
## $x
## [1] 5.60 2.45 7.09
##
## $y
## [1] "a" "b" "c"
##
## $z
## [1] 1 2 3
## Levels: 1 2 3
typeof(l)
## [1] "list"
typeof(d)
## [1] "list"
# Notice the similarity in the syntax with adding a column in a data frame
l$d <- d
l[2:4]
## $y
## [1] "a" "b" "c"
##
## $z
## [1] 1 2 3
## Levels: 1 2 3
##
## $d
## x y z
## 1 5.60 a 1
## 2 2.45 b 2
## 3 7.09 c 3
# Listception
l$l <- l
l[4:5]
## $d
## x y z
## 1 5.60 a 1
## 2 2.45 b 2
## 3 7.09 c 3
##
## $l
## $l$x
## [1] 5.60 2.45 7.09
##
## $l$y
## [1] "a" "b" "c"
##
## $l$z
## [1] 1 2 3
## Levels: 1 2 3
##
## $l$d
## x y z
## 1 5.60 a 1
## 2 2.45 b 2
## 3 7.09 c 3
# Brackets returns the elements of a given index as a list
l[1]
## $x
## [1] 5.60 2.45 7.09
typeof(l[1])
## [1] "list"
str(l[1])
## List of 1
## $ x: num [1:3] 5.6 2.45 7.09
# double brackets return the elements in its original type and class
l[[1]]
## [1] 5.60 2.45 7.09
typeof(l[[1]])
## [1] "double"
str(l[[1]])
## num [1:3] 5.6 2.45 7.09
v <- l[[1]]
v
## [1] 5.60 2.45 7.09
names(l)
## [1] "x" "y" "z" "d" "l"
l <- unname(l)
names(l)
## NULL
l[1:3]
## [[1]]
## [1] 5.60 2.45 7.09
##
## [[2]]
## [1] "a" "b" "c"
##
## [[3]]
## [1] 1 2 3
## Levels: 1 2 3
names(l)[1] <- "x"
names(l)[2] <- "y"
names(l[1])
## [1] "x"
names(l[[1]])
## NULL
names(l[[1]])[1] <- "a"
l[1]
## $x
## a <NA> <NA>
## 5.60 2.45 7.09
$ to call elements from a list as well. This is the same as using [[]].l$x
## a <NA> <NA>
## 5.60 2.45 7.09
l$y
## [1] "a" "b" "c"
Create the list in the previous example:
d <- data.frame(x = c(5.6, 2.45, 7.09),
y = c("a","b","c"),
z = as.factor(c(1,2,3)))
l <- list(x = c(5.6, 2.45, 7.09),
y = c("a","b","c"),
z = as.factor(c(1,2,3)))
l2 <- list(a = c(5:10), b = c(500:600))
l$d <- d
l$l2 <- l2
Extract the first element of z in the list.
Extract the entire second column of d in the list.
Extract the 50th element of y in the l2 within l.
Multiply x in l with 2 by extracting it with [] and [[]]. Why do you think one works and the other does not?
Subset vectors and factors using []
Subset matrices, arrays, and data frames using [,]
Subset lists with [] if you want your output to be a list, or if you want to subset multiple elements from a list.
Subset lists with [[]] if you want to subset and use the actual data within the elements of a list.
If the object is a vector, it's class is its type(mode)
if the object is a matrix, array, data frame or list, its class is its data strcuture name
x <- c(1,2,3)
class(x)
## [1] "numeric"
y <- as.factor(x)
class(factor)
## [1] "function"
m <- matrix(c(1,2,3,4), nrow = 2, ncol = 2)
class(m)
## [1] "matrix"