R data structures ——————–

Vectors —–

# create vectors of data for three medical patients
subject_name <- c("John Doe", "Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_status <- c(FALSE, FALSE, TRUE)
# access the second element in body temperature vector
temperature[2]
## [1] 98.6

examples of accessing items in vector

# include items in the range 2 to 3
temperature[2:3]
## [1]  98.6 101.4
# exclude item 2 using the minus sign
temperature[-2]
## [1]  98.1 101.4
# use a vector to indicate whether to include item
temperature[c(TRUE, TRUE, FALSE)]
## [1] 98.1 98.6

Factors —–

# add gender factor
gender <- factor(c("MALE", "FEMALE", "MALE"))
gender
## [1] MALE   FEMALE MALE  
## Levels: FEMALE MALE
# add blood type factor
blood <- factor(c("O", "AB", "A"),
                levels = c("A", "B", "AB", "O"))
blood
## [1] O  AB A 
## Levels: A B AB O
# add ordered factor
symptoms <- factor(c("SEVERE", "MILD", "MODERATE"),
                   levels = c("MILD", "MODERATE", "SEVERE"),
                   ordered = TRUE)
symptoms
## [1] SEVERE   MILD     MODERATE
## Levels: MILD < MODERATE < SEVERE
# check for symptoms greater than moderate
symptoms > "MODERATE"
## [1]  TRUE FALSE FALSE

Lists —–

# display information for a patient
subject_name[1]
## [1] "John Doe"
temperature[1]
## [1] 98.1
flu_status[1]
## [1] FALSE
gender[1]
## [1] MALE
## Levels: FEMALE MALE
blood[1]
## [1] O
## Levels: A B AB O
symptoms[1]
## [1] SEVERE
## Levels: MILD < MODERATE < SEVERE
# create list for a patient
subject1 <- list(fullname = subject_name[1], 
                 temperature = temperature[1],
                 flu_status = flu_status[1],
                 gender = gender[1],
                 blood = blood[1],
                 symptoms = symptoms[1])
# display the patient
subject1
## $fullname
## [1] "John Doe"
## 
## $temperature
## [1] 98.1
## 
## $flu_status
## [1] FALSE
## 
## $gender
## [1] MALE
## Levels: FEMALE MALE
## 
## $blood
## [1] O
## Levels: A B AB O
## 
## $symptoms
## [1] SEVERE
## Levels: MILD < MODERATE < SEVERE

methods for accessing a list

# get a single list value by position (returns a sub-list)
subject1[2]
## $temperature
## [1] 98.1
# get a single list value by position (returns a numeric vector)
subject1[[2]]
## [1] 98.1
# get a single list value by name
subject1$temperature
## [1] 98.1
# get several list items by specifying a vector of names
subject1[c("temperature", "flu_status")]
## $temperature
## [1] 98.1
## 
## $flu_status
## [1] FALSE

access a list like a vector

# get values 2 and 3
subject1[2:3]
## $temperature
## [1] 98.1
## 
## $flu_status
## [1] FALSE

Data frames —–

create a data frame from medical patient data

pt_data <- data.frame(subject_name, temperature, flu_status, gender,
                      blood, symptoms, stringsAsFactors = FALSE)
# display the data frame
pt_data

accessing a data frame

# get a single column
pt_data$subject_name
## [1] "John Doe"     "Jane Doe"     "Steve Graves"
# get several columns by specifying a vector of names
pt_data[c("temperature", "flu_status")]
# this is the same as above, extracting temperature and flu_status
pt_data[2:3]
# accessing by row and column
pt_data[1, 2]
## [1] 98.1
# accessing several rows and several columns using vectors
pt_data[c(1, 3), c(2, 4)]

Leave a row or column blank to extract all rows or columns

# column 1, all rows
pt_data[, 1]
## [1] "John Doe"     "Jane Doe"     "Steve Graves"
# row 1, all columns
pt_data[1, ]
# all rows and all columns
pt_data[ , ]
# the following are equivalent
pt_data[c(1, 3), c("temperature", "gender")]
pt_data[-2, c(-1, -3, -5, -6)]
# creating a Celsius temperature column
pt_data$temp_c <- (pt_data$temperature - 32) * (5 / 9)
# comparing before and after
pt_data[c("temperature", "temp_c")]

Matrixes —–

# create a 2x2 matrix
m <- matrix(c(1, 2, 3, 4), nrow = 2)
m
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
# equivalent to the above
m <- matrix(c(1, 2, 3, 4), ncol = 2)
m
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
# create a 2x3 matrix
m <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2)
m
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
# create a 3x2 matrix
m <- matrix(c(1, 2, 3, 4, 5, 6), ncol = 2)
m
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
# extract values from matrixes
m[1, 1]
## [1] 1
m[3, 2]
## [1] 6
# extract rows
m[1, ]
## [1] 1 4
# extract columns
m[, 1]
## [1] 1 2 3