I learnt how to manage my files, packages, and saved work within an R session.
I also learnt about the following:
• Working Directory
• Packages
• Saving Work
# Example: basic objects
num_vec <- c(2.5, 7.3, 4.1)
char_vec <- c("apple", "banana", "cherry")
fac_vec <- factor(char_vec)
df <- data.frame(Value = num_vec, Fruit = fac_vec)
str(df)
## 'data.frame': 3 obs. of 2 variables:
## $ Value: num 2.5 7.3 4.1
## $ Fruit: Factor w/ 3 levels "apple","banana",..: 1 2 3
summary(df)
## Value Fruit
## Min. :2.500 apple :1
## 1st Qu.:3.300 banana:1
## Median :4.100 cherry:1
## Mean :4.633
## 3rd Qu.:5.700
## Max. :7.300
I learnt how to use R for basic calculations, how to store results as objects, and how to work with vectors, which are an essential data structure in R.Such as :
• Logarithms and Exponentials
• E-Notation
• Creating a Vector
• Vector Manipulation
• Subsetting and Element Extraction
Exercise 2.1
# a. Using R, verify that (6a + 42)/(3^(4.2−3.62)) = 29.50556
# when a = 2.3.
foo <- (((6*2.3)+42)/(3^(4.2-3.62)))
foo
## [1] 29.50556
foo==29.50556
## [1] FALSE
# b. Which of the following squares negative 4 and adds 2 to the
# result?
# i. (-4)^2+2
# ii. -4^2+2
# iii. (-4)^(2+2)
# iv. -4^(2+2)
a <- (-4)^2+2
a
## [1] 18
b <- -4^2+2
b
## [1] -14
c <- (-4)^(2+2)
c
## [1] 256
d <- -4^(2+2)
d
## [1] -256
# (-4)^2+2 is the answer for the question
# c. Using R, how would you calculate the square root of half of the average of the numbers 25.2, 15, 16.44, 15.3, and 18.6?
foo <- (25.2+15+16.44+15.3+18.6)/5
foo
## [1] 18.108
bar <- foo/0.5
bar
## [1] 36.216
qux <- sqrt(x=bar)
qux
## [1] 6.017973
# Find loge0.3.
mylog <- log(x=0.3)
mylog
## [1] -1.203973
# Compute the exponential transform of your answer to (d).
exp(x=mylog)
## [1] 0.3
# Identify R’s representation of −0.00000000423546322 when printing this number to the console.
x <- -0.00000000423546322
x
## [1] -4.235463e-09
Exercise 2.2
# a. Create an object that stores the value 3^2 × 4^(1/8)
foo <- 3^2 * 4^(1/8)
foo
## [1] 10.70286
# b. Overwrite your object in (a) by itself divided by 2.33. Print the result to the # console.
foo <- foo/2.33
foo
## [1] 4.593504
# c. Create a new object with the value −8.2 × 10^−13.
qux <- -8.2*10^-13
qux
## [1] -8.2e-13
# d. Print directly to the console the result of multiplying (b) by (c).
foo * qux
## [1] -3.766673e-12
Exercise 2.3
# a. Create and store a sequence of values from 5 to −11 that progresses in steps of 0.3.
myvec <- seq(from=5,to=-11,by=-0.3)
myvec
## [1] 5.0 4.7 4.4 4.1 3.8 3.5 3.2 2.9 2.6 2.3 2.0 1.7
## [13] 1.4 1.1 0.8 0.5 0.2 -0.1 -0.4 -0.7 -1.0 -1.3 -1.6 -1.9
## [25] -2.2 -2.5 -2.8 -3.1 -3.4 -3.7 -4.0 -4.3 -4.6 -4.9 -5.2 -5.5
## [37] -5.8 -6.1 -6.4 -6.7 -7.0 -7.3 -7.6 -7.9 -8.2 -8.5 -8.8 -9.1
## [49] -9.4 -9.7 -10.0 -10.3 -10.6 -10.9
# b. Overwrite the object from (a) using the same sequence with the order reversed.
rev(myvec)
## [1] -10.9 -10.6 -10.3 -10.0 -9.7 -9.4 -9.1 -8.8 -8.5 -8.2 -7.9 -7.6
## [13] -7.3 -7.0 -6.7 -6.4 -6.1 -5.8 -5.5 -5.2 -4.9 -4.6 -4.3 -4.0
## [25] -3.7 -3.4 -3.1 -2.8 -2.5 -2.2 -1.9 -1.6 -1.3 -1.0 -0.7 -0.4
## [37] -0.1 0.2 0.5 0.8 1.1 1.4 1.7 2.0 2.3 2.6 2.9 3.2
## [49] 3.5 3.8 4.1 4.4 4.7 5.0
# c. Repeat the vector c(-1,3,-5,7,-9) twice, with each element repeated 10 times, and store the result. Display the result sorted from largest to smallest.
oyin <- rep(c(-1,3,-5,7,-9),times=10,each=2, decreasing=TRUE)
oyin
## [1] -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5
## [26] -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9
## [51] -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5
## [76] -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9 -1 -1 3 3 -5 -5 7 7 -9 -9
# d. Create and store a vector that contains, in any configuration, the following:
# i. A sequence of integers from 6 to 12 (inclusive)
# ii. A threefold repetition of the value 5.3
# iii. The number −3
# iv. A sequence of nine values starting at 102 and ending at the number that is the total length of the vector created in (c)
zaza <- c(c(6:12),rep(x=5.3, times=3),-3, seq(from=102,to=length(oyin), length=9))
zaza
## [1] 6.00 7.00 8.00 9.00 10.00 11.00 12.00 5.30 5.30 5.30
## [11] -3.00 102.00 101.75 101.50 101.25 101.00 100.75 100.50 100.25 100.00
# e. Confirm that the length of the vector created in (d) is 20.
length(zaza)==20
## [1] TRUE
Exercise 2.4
# a. Create and store a vector that contains the following, in this
#order:
#– A sequence of length 5 from 3 to 6 (inclusive)
#– A twofold repetition of the vector c(2,-5.1,-33)
#– The value 7/42 + 2
a <- c(seq(3, 6, length.out = 5),
rep(c(2, -5.1, -33), 2),
7/ 42 + 2)
a
## [1] 3.000000 3.750000 4.500000 5.250000 6.000000 2.000000
## [7] -5.100000 -33.000000 2.000000 -5.100000 -33.000000 2.166667
# b. Extract the first and last elements of your vector from (a), storing
#them as a new object.
b <- c(a[1], a[length(a)])
b
## [1] 3.000000 2.166667
# c. Store as a third object the values returned by omitting the first
#and last values of your vector from (a).
c_vec <- a[-c(1, length(a))]
c_vec
## [1] 3.75 4.50 5.25 6.00 2.00 -5.10 -33.00 2.00 -5.10 -33.00
# d. Use only (b) and (c) to reconstruct (a).
reconstructed <- c(b[1], c_vec, b[2])
reconstructed
## [1] 3.000000 3.750000 4.500000 5.250000 6.000000 2.000000
## [7] -5.100000 -33.000000 2.000000 -5.100000 -33.000000 2.166667
# e. Overwrite (a) with the same values sorted from smallest to
#largest.
e <- sort(x=a, decreasing= FALSE)
e
## [1] -33.000000 -33.000000 -5.100000 -5.100000 2.000000 2.000000
## [7] 2.166667 3.000000 3.750000 4.500000 5.250000 6.000000
#f. Use the colon operator as an index vector to reverse the order of (e), and confirm this is identical #to using sort on (e) with
#decreasing=TRUE.
rev1 <- e[length(e):1]
rev2 <- sort(e, decreasing = TRUE)
identical(rev1, rev2)
## [1] TRUE
# Create a vector from (c) that repeats the third element of (c) three times, the sixth element four times, and the last element once.
g <- c(rep(x=c_vec[3], times=3), rep(x=c_vec[6],times=4), rep(x=c_vec[length(c_vec)], times=1))
g
## [1] 5.25 5.25 5.25 -5.10 -5.10 -5.10 -5.10 -33.00
# h. Create a new vector as a copy of (e) by assigning (e) as is to a newly named object. Using this new copy of (e), overwrite then first, the fifth to the # seventh (inclusive), and the last element with the values 99 to 95 (inclusive), respectively.
h <- e
h[c(1, 5:7, length(h))] <- 99:95
h
## [1] 99.00 -33.00 -5.10 -5.10 98.00 97.00 96.00 3.00 3.75 4.50
## [11] 5.25 95.00
Exercise 2.5
# a. Convert the vector c(2,0.5,1,2,0.5,1,2,0.5,1) to a vector of only 1s, using a vector of length 3.
lap <- c(2,0.5,1,2,0.5,1,2,0.5,1)/c(2,0.5,1)
lap
## [1] 1 1 1 1 1 1 1 1 1
# b. The conversion from a temperature measurement in degrees Fahrenheit F to Celsius C is performed using the following equation:
# C =5/9(F − 32), Use vector-oriented behavior in R to convert the temperatures 45, 77, 20, 19, 101, 120, and 212 in degrees Fahrenheit to degrees # Celsius
F <- c(45, 77, 20, 19, 101, 120, 212)
F
## [1] 45 77 20 19 101 120 212
C =5/9*(F-32)
C
## [1] 7.222222 25.000000 -6.666667 -7.222222 38.333333 48.888889 100.000000
I learnt how to work with data structures that have more than one dimension. I learnt to builds on the concept of vectors, showing how they can be stored together to form matrices (two-dimensional structures with rows and columns) and arrays (structures with more than two dimensions).
• Matrix Creation
• Row and Column Bindings
• Matrix Dimensions
Exercise 3.1
## a. Construct and store a 4 × 2 matrix that’s filled row-wise with the values 4.3, 3.1, 8.2, 8.2, 3.2, 0.9, 1.6, and 6.5, in that order.
mat <- matrix(c(4.3, 3.1, 8.2, 8.2, 3.2, 0.9, 1.6, 6.5), nrow = 4, ncol = 2, byrow = TRUE)
mat
## [,1] [,2]
## [1,] 4.3 3.1
## [2,] 8.2 8.2
## [3,] 3.2 0.9
## [4,] 1.6 6.5
## b. Confirm the dimensions of the matrix from (a) are 3 × 2 if you remove any one row.
dim(mat[-1, ])
## [1] 3 2
## c. Overwrite the second column of the matrix from (a) with that same column sorted from smallest to largest.
mat[, 2] <- sort(mat[, 2])
mat
## [,1] [,2]
## [1,] 4.3 0.9
## [2,] 8.2 3.1
## [3,] 3.2 6.5
## [4,] 1.6 8.2
## d. What does R return if you delete the fourth row and the first column from (c)? Use matrix to ensure the result is a single-column matrix, rather than a vector.
result_d <- matrix(mat[-4, -1], ncol = 1)
result_d
## [,1]
## [1,] 0.9
## [2,] 3.1
## [3,] 6.5
## e. Store the bottom four elements of (c) as a new 2 × 2 matrix.
mat_e <- matrix(tail(mat, 4), nrow = 2, ncol = 2)
## Warning in matrix(tail(mat, 4), nrow = 2, ncol = 2): data length differs from
## size of matrix: [8 != 2 x 2]
mat_e
## [,1] [,2]
## [1,] 4.3 3.2
## [2,] 8.2 1.6
## f. Overwrite, in this order, the elements of (c) at positions (4,2), (1,2), (4,1), and (1,1) with −1/2 of the two values on the diagonal of (e).
diag_vals <- diag(mat_e)
replacements <- -0.5 * diag_vals
mat[4, 2] <- replacements[1]
mat[1, 2] <- replacements[2]
mat[4, 1] <- replacements[1]
mat[1, 1] <- replacements[2]
mat
## [,1] [,2]
## [1,] -0.80 -0.80
## [2,] 8.20 3.10
## [3,] 3.20 6.50
## [4,] -2.15 -2.15
Exercise 3.2
# a. Calculate the following: $$ \frac{2}{7} \times \begin{bmatrix} 1 & 2 & 2 \\ 4 & 7 & 6 \end{bmatrix} - \begin{bmatrix} 10 & 20 & 30 \\ 40 & 50 & 60 \end{bmatrix} $$
M1 <- matrix(data=c(1, 2, 7, 2, 4, 6), nrow = 3, byrow = FALSE)
M1
## [,1] [,2]
## [1,] 1 2
## [2,] 2 4
## [3,] 7 6
M2 <- matrix(data=c(10, 30, 50, 20, 40, 60), nrow = 3, byrow = FALSE)
M2
## [,1] [,2]
## [1,] 10 20
## [2,] 30 40
## [3,] 50 60
you <- M1 - M2
you
## [,1] [,2]
## [1,] -9 -18
## [2,] -28 -36
## [3,] -43 -54
she <- 2/7
she
## [1] 0.2857143
them <- she*you
them
## [,1] [,2]
## [1,] -2.571429 -5.142857
## [2,] -8.000000 -10.285714
## [3,] -12.285714 -15.428571
# b. b. Store these two matrices: $$ A = \begin{bmatrix} 1 \\ 2 \\ 7 \end{bmatrix} \quad B = \begin{bmatrix} 3 \\ 4 \\ 8 \end{bmatrix} $$
A <- matrix(c(1, 2, 7), ncol = 1)
B <- matrix(c(3, 4, 8), ncol = 1)
# Which of the following multiplications are possible? For those that are, compute the result [5].
# i. A · B
# ii. A> · B
# iii. B> · (A · A>)
# iv. (A · A>) · B>
# v. [(B · B>) + (A · A>) − 100I₃]⁻¹
A %% B
## [,1]
## [1,] 1
## [2,] 2
## [3,] 7
# i.(not possible – dimensions mismatch)
t(A) %*% B
## [,1]
## [1,] 67
# ii.(possible: 1x1)
# t(B) %% (A %% t(A))
# iii. (# This multiplication is possible because:
# - A %*% t(A) produces a 3x3 matrix (outer product)
# - t(B) is 1x3, so it can multiply a 3x3
# The result is a 1x3 row vector (matrix)).
# (A %*% t(A)) %*% t(B)
# iv. (A %*% t(A)) %*% t(B) it is not possible
# v.[(B t(B) + (A t(A) - 100 I3]^-1
res <- solve((B %*% t(B)) + (A %*% t(A)) - 100 * diag(3))
res
## [,1] [,2] [,3]
## [1,] -0.007923676 0.003123274 0.007843334
## [2,] 0.003123274 -0.005350239 0.011483806
## [3,] 0.007843334 0.011483806 0.017584735
res_inv <- solve(res)
res_inv
## [,1] [,2] [,3]
## [1,] -90 14 31
## [2,] 14 -80 46
## [3,] 31 46 13
# [(B t(B) + (A t(A) - 100 I3]^-1 (possible: 3 × 3)
# c.
A <- diag(c(2, 3, 5, -1))
check <- solve(A) %*% A - diag(4)
check
## [,1] [,2] [,3] [,4]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
# check provides a 4 × 4 matrix of zeros.
I learnt how covering logical values, which are based on the simple premise of being either TRUE or FALSE.
• Creation and Relational Operators
• Multiple Comparisons with Logical Operators
• Logicals as Numbers
• Logical Subsetting
I learnt character strings, which are R’s way of representing text.
• Creating Strings
• Concatenation
I also learnt about factors, which are R’s primary data structure for handling categorical data—variables that fall into a finite number of distinct categories.
• Identifying Categories
• Levels
• Defining and Ordering Levels
Exercise 4.1
# a. Store the following vector of 15 values as an object in your workspace: c(6,9,7,3,6,7,9,6,3,6,6,7,1,9,1).
fiv <- c(6,9,7,3,6,7,9,6,3,6,6,7,1,9,1)
fiv
## [1] 6 9 7 3 6 7 9 6 3 6 6 7 1 9 1
# Identify the following elements: i. Those equal to 6
six <- fiv[fiv == 6]
# ii. Those greater than or equal to 6
six_grt <- fiv[fiv >= 6]
six_grt
## [1] 6 9 7 6 7 9 6 6 6 7 9
# iii. Those less than 6 + 2
sixt <- fiv[fiv < 6+2]
sixt
## [1] 6 7 3 6 7 6 3 6 6 7 1 1
# iv. Those not equal to 6
six_not <- fiv[fiv != 6]
six_not
## [1] 9 7 3 7 9 3 7 1 9 1
# b. Create a new vector from the one used in (a) by deleting its first three elements. With this new vector, fill a 2 × 2 × 3 array. Examine the array for the following entries:
new_vec <- foo[-c(1:3)]
new_vec
## numeric(0)
arr <- array(new_vec, dim = c(2,2,3))
arr
## , , 1
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 2
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 3
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
# i. Those less than or equal to 6 divided by 2, plus 4
arr[arr <= 7]
## [1] NA NA NA NA NA NA NA NA NA NA NA NA
# ii. Those less than or equal to 6 divided by 2, plus 4, after increasing every element in the array by 2
arr2 <- arr + 2
arr2
## , , 1
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 2
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 3
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
arr2[arr2 <= 7]
## [1] NA NA NA NA NA NA NA NA NA NA NA NA
arr2
## , , 1
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 2
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
##
## , , 3
##
## [,1] [,2]
## [1,] NA NA
## [2,] NA NA
Exercise 4.2
# a. Store the vector c(7,1,7,10,5,9,10,3,10,8) as foo.
foo <- c(7, 5, 6, 1, 2, 10, 8, 3, 8, 2)
foo
## [1] 7 5 6 1 2 10 8 3 8 2
# i. Identify the elements greater than 5 OR equal to 2.
less <- foo[foo > 5 | foo == 2]
less
## [1] 7 6 2 10 8 8 2
# b. Store the vector c(8,8,4,4,5,1,5,6,6,8) .
bar <- c(8,8,4,4,5,1,5,6,6,8)
bar
## [1] 8 8 4 4 5 1 5 6 6 8
# i. Identify the elements less than or equal to 6 AND not equal to 4.
not_equ <- bar[bar <= 6 | bar!= 4 ]
not_equ
## [1] 8 8 4 4 5 1 5 6 6 8
# c. Identify the elements that satisfy (a) in foo AND satisfy (b) in bar.
foo[less & not_equ]
## Warning in less & not_equ: longer object length is not a multiple of shorter
## object length
## [1] 7 5 6 1 2 10 8 3 8 2
# d. Store a third vector called baz that is equal to the element-wise sum of foo and bar. Determine the following:
baz <- foo + bar
baz
## [1] 15 13 10 5 7 11 13 9 14 10
# i. The elements of baz greater than or equal to 14 but not equal to 15
baz_i <- baz[baz >= 14 & baz != 15]
baz_i
## [1] 14
# ii. The elements of the vector obtained via an element-wise division of baz by foo that are greater than 4 OR less than or equal to 2
baf <- baz / foo
baf
## [1] 2.142857 2.600000 1.666667 5.000000 3.500000 1.100000 1.625000 3.000000
## [9] 1.750000 5.000000
baf2 <- baf[baf > 4 | baf <= 2]
baf2
## [1] 1.666667 5.000000 1.100000 1.625000 1.750000 5.000000
I learnt that I can store multiple types of data simultaneously. This overcomes the limitation of vectors, matrices, and arrays, which can only hold a single data type. The chapter is divided into two parts, covering lists as flexible containers and data frames as the standard structure for statistical data sets.
• A list is an object that can group any mix of R structures. For example, a single list can contain a numeric matrix, a logical vector, a character string, and even another list.
• Naming and Nesting
• A data frame is described as R’s most natural representation of a data set, similar in structure to a spreadsheet. In a data frame: ◦ Each column represents a variable. ◦ Each row represents a record or observation.
• Like lists, data frames can store different data types. However, they have a key restriction: all member vectors (the columns) must be of equal length.
• Construction and Modification
• Logical Subsets.
Exercise 5.2
# a. Create and store this data frame as dframe in your R workspace:
# person sex funny
# Stan M High
# Francine F Med
# Steve M Low
# Roger M High
# Hayley F Med
# Klaus M Med
# The variables person, sex, and funny should be identical in nature to the variables in the mydata object studied throughout Section 5.2. That is, person should be a character vector, sex should be a factor with levels F and M, and funny should be a factor with levels Low, Med, and High.
person <- c("Stan", "Francine", "Steve", "Roger", "Hayley", "Klaus")
person
## [1] "Stan" "Francine" "Steve" "Roger" "Hayley" "Klaus"
sex <- factor(c("M", "F", "M", "M", "F", "M"),
levels = c("F", "M"))
sex
## [1] M F M M F M
## Levels: F M
funny <- factor(c("High", "Med", "Low", "High", "Med", "Med"),
levels = c("Low", "Med", "High"))
funny
## [1] High Med Low High Med Med
## Levels: Low Med High
dframe <- data.frame(person, sex, funny)
dframe
## person sex funny
## 1 Stan M High
## 2 Francine F Med
## 3 Steve M Low
## 4 Roger M High
## 5 Hayley F Med
## 6 Klaus M Med
#b. Stan and Francine are 41 years old, Steve is 15, Hayley is 21, and Klaus is 60. Roger is extremely old—1,600 years. Append these data as a new numeric column variable in dframe called age.
age <- c(41, 41, 15, 1600, 21, 60)
age
## [1] 41 41 15 1600 21 60
dframe$age <- age
dframe
## person sex funny age
## 1 Stan M High 41
## 2 Francine F Med 41
## 3 Steve M Low 15
## 4 Roger M High 1600
## 5 Hayley F Med 21
## 6 Klaus M Med 60
# c. Use your knowledge of reordering the column variables based on column index positions to overwrite dframe, bringing it in line with mydata. That is, the first column should be person, the second column age, the third column sex, and the fourth column funny.
dframe <- dframe[ , c(1, 4, 2, 3)]
dframe
## person age sex funny
## 1 Stan 41 M High
## 2 Francine 41 F Med
## 3 Steve 15 M Low
## 4 Roger 1600 M High
## 5 Hayley 21 F Med
## 6 Klaus 60 M Med
# d. Turn your attention to mydata as it was left after you included the age.mon variable in Section 5.2.2. Create a new version of mydata called mydata2 by deleting the age.mon column.
mydata2 <- dframe[ , -5]
mydata2
## person age sex funny
## 1 Stan 41 M High
## 2 Francine 41 F Med
## 3 Steve 15 M Low
## 4 Roger 1600 M High
## 5 Hayley 21 F Med
## 6 Klaus 60 M Med
# e. Now, combine mydata2 with dframe, naming the resulting object mydataframe.
mydataframe <- rbind(mydata2, dframe)
mydataframe
## person age sex funny
## 1 Stan 41 M High
## 2 Francine 41 F Med
## 3 Steve 15 M Low
## 4 Roger 1600 M High
## 5 Hayley 21 F Med
## 6 Klaus 60 M Med
## 7 Stan 41 M High
## 8 Francine 41 F Med
## 9 Steve 15 M Low
## 10 Roger 1600 M High
## 11 Hayley 21 F Med
## 12 Klaus 60 M Med
#f. Write a single line of code that will extract from mydataframe just the names and ages of any records where the individual is female and has a level of funniness equal to Med OR High.
mydataframe[mydataframe$sex == "F" & (mydataframe$funny == "Med" | mydataframe$funny == "High"), c("person", "age")]
## person age
## 2 Francine 41
## 5 Hayley 21
## 8 Francine 41
## 11 Hayley 21
# g. Use your knowledge of handling character strings in R to extract all records from mydataframe that correspond to people whose names start with S. Hint: Recall substr from Section 4.2.4
mydataframe[substr(mydataframe$person, 1, 1) == "S", ]
## person age sex funny
## 1 Stan 41 M High
## 3 Steve 15 M Low
## 7 Stan 41 M High
## 9 Steve 15 M Low
I learnt how R represents missing data and distinguishes between different types of objects.
• Inf for infinity and NaN (Not a Number) for impossible numeric calculations.
• NA (Not Available) to denote missing values in any data type.
• NULL to represent an “empty” entity, which is distinct from a missing NA value.
• Every object has a class, which is a key attribute describing its structure (e.g., “numeric”, “matrix”).
• It provides functions to check an object’s class, such as is-dot functions like is.matrix().
• I learnt about coercion, which is the process of converting an object from one type to another. This can be done explicitly using as-dot functions like as.character().
Exercise 6.1
# a. Store the following vector: foo <- c(13563,-14156,-14319,16981,12921,11979,9568,8833,-12968,8133)
foo <- c(13563,-14156,-14319,16981,12921,11979,9568,8833,-12968,8133)
foo
## [1] 13563 -14156 -14319 16981 12921 11979 9568 8833 -12968 8133
# i. Output all elements of foo that, when raised to a power of 75 are NOT infinite.
foo[is.finite(foo^75)]
## [1] 11979 9568 8833 8133
# ii. Return the elements of foo, excluding those that result in negative infinity when raised to a power of 75.
foo[foo^75 != -Inf]
## [1] 13563 16981 12921 11979 9568 8833 8133
# ```{r}
# b. Store the following 3 × 4 matrix as the object bar:
#
# [ 77875.40 27551.45 23764.30 ]
# [ -36478.88 -35466.25 -73333.85 ]
# [ 36599.69 -70585.69 -39803.81 ]
# [ 55976.34 76694.82 47032.00 ]
#
bar <- matrix(c(77875.40, -36478.88, -39803.81,27551.45, -35466.25, 55976.34,23764.30, -73333.85, 76694.82,-70585.69, 36599.69, 47032.00), nrow = 3, byrow = FALSE)
bar
## [,1] [,2] [,3] [,4]
## [1,] 77875.40 27551.45 23764.30 -70585.69
## [2,] -36478.88 -35466.25 -73333.85 36599.69
## [3,] -39803.81 55976.34 76694.82 47032.00
# i. Identify the coordinate-specific indexes of the entries of bar that are NaN when you raise bar to a power of 65 and divide by infinity.
bar1 <- which(is.nan(bar^65 / Inf), arr.ind = TRUE)
bar1
## row col
## [1,] 1 1
## [2,] 3 2
## [3,] 2 3
## [4,] 3 3
## [5,] 1 4
# ii. Return the values in bar that are NOT NaN when bar is raised to a power of 67 and infinity is added to the result. Confirm this is identical to identifying those values in bar that, when raised to a power of 67, are not equal to negative infinity
not_nan <- bar[!is.nan(bar^67 + Inf)]
not_nan
## [1] 77875.40 -36478.88 -39803.81 27551.45 -35466.25 55976.34 23764.30
## [8] 76694.82 36599.69 47032.00
check <- bar[bar^67 != -Inf]
identical(not_nan, check)
## [1] TRUE
# iii. Identify those values in bar that are either negative infinity OR finite when you raise bar to a power of 67.
bar2 <- bar[is.infinite(bar^67) & (bar^67 == -Inf) | is.finite(bar^67)]
bar2
## [1] -36478.88 -39803.81 27551.45 -35466.25 23764.30 -73333.85 -70585.69
## [8] 36599.69
Exercise 6.3
# a. Identify the class of the following objects. For each object, also state whether the class is explicitly or implicitly defined.
# i. foo <- array(data=1:36,dim=c(3,3,4))
foo <- array(data=1:36,dim=c(3,3,4))
class(foo)
## [1] "array"
# ii. bar <- as.vector(foo)
bar <- as.vector(foo)
class(bar)
## [1] "integer"
# iii. baz <- as.character(bar)
baz <- as.character(bar)
class(baz)
## [1] "character"
# iv. qux <- as.factor(baz)
qux <- as.factor(baz)
class(qux)
## [1] "factor"
# v. quux <- bar + c(-0.1, 0.1)
quux <- bar + c(-0.1, 0.1)
class(quux)
## [1] "numeric"
#b. For each object defined in (a), find the sum of the result of calling is.numeric and is.integer on it separately. For example, is.numeric(foo)+is.integer(foo) would compute the sum for (i). Turn the collection of five results into a factor with levels 0, 1, and 2, identified by the results themselves. Compare this factor vector with the result of coercing it to a numeric vector.
# compute sums of is.numeric + is.integer
results <- c(
is.numeric(foo) + is.integer(foo),
is.numeric(bar) + is.integer(bar),
is.numeric(baz) + is.integer(baz),
is.numeric(qux) + is.integer(qux),
is.numeric(quux) + is.integer(quux))
results
## [1] 2 2 0 0 1
results_fac <- factor(results, levels = c(0,1,2))
results_fac
## [1] 2 2 0 0 1
## Levels: 0 1 2
# Compare with coercion to numeric
as.numeric(results_fac)
## [1] 3 3 1 1 2
# c. Turn the following: matrix(2,5,8,11,3,6,9,12,4,7,10,13) into the following: "2" "5" "8" "11" "3" "6" "9" "12" "4" "7" "10" "13
# Create the matrix
m <- matrix(2:13, nrow = 3)
m
## [,1] [,2] [,3] [,4]
## [1,] 2 5 8 11
## [2,] 3 6 9 12
## [3,] 4 7 10 13
# Turn it into the desired character vector
v <- as.character(m)
v
## [1] "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13"
# Store the following matrix: (34,0,1,23,1,2,33,1,1,42,0,1)
M <- matrix(c(34, 0, 1,23, 1, 2,33, 1, 1,42, 0, 1,41, 0, 2), ncol = 3, byrow = TRUE)
# i. Coerce the matrix to a data frame
df <- as.data.frame(M)
df
## V1 V2 V3
## 1 34 0 1
## 2 23 1 2
## 3 33 1 1
## 4 42 0 1
## 5 41 0 2
# ii. Coerce the second column to be logical-valued
df[[2]] <- as.logical(df[[2]])
df[[2]]
## [1] FALSE TRUE TRUE FALSE FALSE
# iii. Coerce the third column to be factor-valued
df[[3]] <- as.factor(df[[3]])
df[[3]]
## [1] 1 2 1 1 2
## Levels: 1 2
df
## V1 V2 V3
## 1 34 FALSE 1
## 2 23 TRUE 2
## 3 33 TRUE 1
## 4 42 FALSE 1
## 5 41 FALSE 2
I learnt about fundamental tools for creating data visualizations. It covers the standard plot function, extensive customization options through graphical parameters, and an introduction to the powerful ggplot2 package.
• type: This parameter specifies how the coordinates are displayed. For example, “p” plots individual points (the default), “l” connects points with lines, and “b” shows both points and lines.
• lwd: This is an abbreviation for line width and is used to control the thickness of plotted lines.
• pch: Standing for point character, this parameter selects the symbol used for individual points.
• cex: An abbreviation for character expansion, cex controls the size of the plotted points.
• lty: Standing for line type, this parameter specifies the style of line to use, such as solid, dotted, or dashed.
• Other parameters like main, xlab, ylab, and col are used for adding a title, axis labels, and color to the plot.
I also learnt how to add new elements, such as points, lines, and text, to a plot that has already been created, without clearing the graphics device.
I learnt about the ggplot2 package, a popular and powerful alternative to R’s basic graphics. It covers:
• The qplot function for creating quick plots.
• The use of geometric modifiers, known as geoms (e.g., geom_point()), to add and customize layers on a plot.
Exercise 7.1
# b. Input the data
weight <- c(55, 85, 75, 42, 93, 63, 58, 75, 89, 67)
height <- c(161, 185, 174, 154, 188, 178, 170, 167, 181, 178)
sex <- c("female", "male", "male", "female", "male",
"male", "female", "male", "male", "female")
# Create a data frame
df <- data.frame(weight, height, sex)
df
## weight height sex
## 1 55 161 female
## 2 85 185 male
## 3 75 174 male
## 4 42 154 female
## 5 93 188 male
## 6 63 178 male
## 7 58 170 female
## 8 75 167 male
## 9 89 181 male
## 10 67 178 female
# Assign colors / point characters based on sex
cols <- ifelse(df$sex == "male", "blue", "red")
pch_vals <- ifelse(df$sex == "male", 16, 17) # 16 = solid circle, 17 = triangle
# Plot with labels and title
plot(df$weight, df$height,
col = cols, pch = pch_vals,
xlab = "Weight (kg)", ylab = "Height (cm)",
main = "Height vs Weight by Sex")
# Add legend
legend("topleft", legend = c("Male", "Female"),
col = c("blue", "red"), pch = c(16, 17))
Exercise 7.2
#In Exercise 7.1 (b), you used base R graphics to plot some weight and height data, distinguishing males and females using different points or colors. Repeat this task using ggplot2.
# Load the ggplot2 package
library(ggplot2)
# Step 1: Enter the data
weight <- c(55, 85, 75, 42, 93, 63, 58, 75, 89, 67) # weight values (kg)
height <- c(161, 185, 174, 154, 188, 178, 170, 167, 181, 178) # height values (cm)
sex <- c("female", "male", "male", "female", "male",
"male", "female", "male", "male", "female") # sex labels
# Step 2: Combine into a data frame
df <- data.frame(weight, height, sex)
# Step 3: Create the plot
ggplot(df, aes(x = weight, y = height, color = sex, shape = sex)) + # map weight, height, and sex
geom_point(size = 3) + # add points with size 3
labs(title = "Height vs Weight by Sex", # add plot title
x = "Weight (kg)", # label x-axis
y = "Height (cm)") # label y-axis