R data structures ——————–
Vectors —–
# create vectors of data for three medical patients
subject_name <- c("John Doe", "Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_status <- c(FALSE, FALSE, TRUE)
# access the second element in body temperature vector
temperature[2]
## [1] 98.6
examples of accessing items in vector
# include items in the range 2 to 3
temperature[2:3]
## [1] 98.6 101.4
# exclude item 2 using the minus sign
temperature[-2]
## [1] 98.1 101.4
# use a vector to indicate whether to include item
temperature[c(TRUE, TRUE, FALSE)]
## [1] 98.1 98.6
Factors —–
# add gender factor
gender <- factor(c("MALE", "FEMALE", "MALE"))
gender
## [1] MALE FEMALE MALE
## Levels: FEMALE MALE
# add blood type factor
blood <- factor(c("O", "AB", "A"),
levels = c("A", "B", "AB", "O"))
blood
## [1] O AB A
## Levels: A B AB O
# add ordered factor
symptoms <- factor(c("SEVERE", "MILD", "MODERATE"),
levels = c("MILD", "MODERATE", "SEVERE"),
ordered = TRUE)
symptoms
## [1] SEVERE MILD MODERATE
## Levels: MILD < MODERATE < SEVERE
# check for symptoms greater than moderate
symptoms > "MODERATE"
## [1] TRUE FALSE FALSE
Lists —–
# display information for a patient
subject_name[1]
## [1] "John Doe"
temperature[1]
## [1] 98.1
flu_status[1]
## [1] FALSE
gender[1]
## [1] MALE
## Levels: FEMALE MALE
blood[1]
## [1] O
## Levels: A B AB O
symptoms[1]
## [1] SEVERE
## Levels: MILD < MODERATE < SEVERE
# create list for a patient
subject1 <- list(fullname = subject_name[1],
temperature = temperature[1],
flu_status = flu_status[1],
gender = gender[1],
blood = blood[1],
symptoms = symptoms[1])
# display the patient
subject1
## $fullname
## [1] "John Doe"
##
## $temperature
## [1] 98.1
##
## $flu_status
## [1] FALSE
##
## $gender
## [1] MALE
## Levels: FEMALE MALE
##
## $blood
## [1] O
## Levels: A B AB O
##
## $symptoms
## [1] SEVERE
## Levels: MILD < MODERATE < SEVERE
methods for accessing a list
# get a single list value by position (returns a sub-list)
subject1[2]
## $temperature
## [1] 98.1
# get a single list value by position (returns a numeric vector)
subject1[[2]]
## [1] 98.1
# get a single list value by name
subject1$temperature
## [1] 98.1
# get several list items by specifying a vector of names
subject1[c("temperature", "flu_status")]
## $temperature
## [1] 98.1
##
## $flu_status
## [1] FALSE
access a list like a vector
# get values 2 and 3
subject1[2:3]
## $temperature
## [1] 98.1
##
## $flu_status
## [1] FALSE
Data frames —–
create a data frame from medical patient data
pt_data <- data.frame(subject_name, temperature, flu_status, gender,
blood, symptoms, stringsAsFactors = FALSE)
# display the data frame
pt_data
accessing a data frame
# get a single column
pt_data$subject_name
## [1] "John Doe" "Jane Doe" "Steve Graves"
# get several columns by specifying a vector of names
pt_data[c("temperature", "flu_status")]
# this is the same as above, extracting temperature and flu_status
pt_data[2:3]
# accessing by row and column
pt_data[1, 2]
## [1] 98.1
# accessing several rows and several columns using vectors
pt_data[c(1, 3), c(2, 4)]
Leave a row or column blank to extract all rows or columns
# column 1, all rows
pt_data[, 1]
## [1] "John Doe" "Jane Doe" "Steve Graves"
# row 1, all columns
pt_data[1, ]
# all rows and all columns
pt_data[ , ]
# the following are equivalent
pt_data[c(1, 3), c("temperature", "gender")]
pt_data[-2, c(-1, -3, -5, -6)]
# creating a Celsius temperature column
pt_data$temp_c <- (pt_data$temperature - 32) * (5 / 9)
# comparing before and after
pt_data[c("temperature", "temp_c")]
Matrixes —–
# create a 2x2 matrix
m <- matrix(c(1, 2, 3, 4), nrow = 2)
m
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
# equivalent to the above
m <- matrix(c(1, 2, 3, 4), ncol = 2)
m
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
# create a 2x3 matrix
m <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2)
m
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
# create a 3x2 matrix
m <- matrix(c(1, 2, 3, 4, 5, 6), ncol = 2)
m
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
# extract values from matrixes
m[1, 1]
## [1] 1
m[3, 2]
## [1] 6
# extract rows
m[1, ]
## [1] 1 4
# extract columns
m[, 1]
## [1] 1 2 3