R data structures

#Vectors ___
#Create vectors of data for three medical patients
subject_name <- c("John Doe", "Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_status <- c(FALSE, FALSE, TRUE)
#Access the second element in body temperature vector
temperature[2]
[1] 98.6
#Let's try creating other vectors of data with tax advisors
tax_advsior <- c("Zia, Maria, Coleen")
returns_completed <- c(12, 25, 34)
#access the first number of returns completed
returns_completed[1]
[1] 12
#Examples of accessing items in vector
#include items in the range 2 to 3
temperature[2:3]
[1]  98.6 101.4
#exclude item 2 using the minus sign
temperature[-2]
[1]  98.1 101.4
#use a vector to indicate whether to include item
temperature[c(TRUE, TRUE, FALSE)]
[1] 98.1 98.6
#examples of accesing items in vector from tax advisor example
returns_completed[2:3]
[1] 25 34
#exclude item 2 using the minus sign
returns_completed[-3]
[1] 12 25
#use a vector to indicate whether to include the item
returns_completed[c(TRUE, TRUE, FALSE)]
[1] 12 25

Factors-

#add gender factor
gender <- factor(c("MALE", "FEMALE", "MALE"))
gender
[1] MALE   FEMALE MALE  
Levels: FEMALE MALE
#Add blood type factor
blood <- factor (c("O", "AB", "A"),
                 levels = c("A", "B", "AB", "O"))
blood
[1] O  AB A 
Levels: A B AB O
#add order factor
symptoms <- factor(c("SEVERE", "MILD", "MODERATE"),
                   levels =c("MILD", "MODERATE", "SEVERE"),
                   ordered = TRUE)
symptoms
[1] SEVERE   MILD     MODERATE
Levels: MILD < MODERATE < SEVERE
#Check for symptoms greater than moderate
symptoms > "MODERATE"
[1]  TRUE FALSE FALSE

Lists -

#display infromation for a patient
subject_name[1]
[1] "John Doe"
temperature[1]
[1] 98.1
flu_status[1]
[1] FALSE
gender[1]
[1] MALE
Levels: FEMALE MALE
blood[1]
[1] O
Levels: A B AB O
symptoms[1]
[1] SEVERE
Levels: MILD < MODERATE < SEVERE
#create list for a patient
subject1 <- list(fullname = subject_name[1],
                temperature = temperature[1],
                flu_satus = flu_status[1],
                gender = gender[1],
                blood = blood[1],
                symptoms = symptoms[1])
#display the patient
subject1
$fullname
[1] "John Doe"

$temperature
[1] 98.1

$flu_satus
[1] FALSE

$gender
[1] MALE
Levels: FEMALE MALE

$blood
[1] O
Levels: A B AB O

$symptoms
[1] SEVERE
Levels: MILD < MODERATE < SEVERE

Methods for accessing a list

#get a single list value by position (retuns a sub-list)
subject1[2]
$temperature
[1] 98.1
#get a single list by value by postion (returns a numeric vector)
subject1[[2]]
[1] 98.1
#get several list items by specifying a vector of names
subject1[c("temperature", "flu_status")]
$temperature
[1] 98.1

$<NA>
NULL

Data frames


#create a data frame from medical patient data
pt_data <- data.frame(subject_name, temperature, flu_status, gender,
                      blood, symptoms, stringsAsFactors = FALSE)
#display the data frame
pt_data
NA
#accessing a data frame
#get a single colum
pt_data$subject_name
[1] "John Doe"     "Jane Doe"     "Steve Graves"
#get several columns by specifiying a vector of names
pt_data[c("temperature", "flu_status")]
#this is the same as above, extracting temperatue and flu_statues
pt_data[2:3]
#accessing by row and column
pt_data[1,2]
[1] 98.1
#accessing several rows and several columns using vectors
pt_data[c(1,3), c(2,4)]

Leave a row or column blank to extract all rows or columns

#columns 1, all rows
pt_data[,1]
[1] "John Doe"     "Jane Doe"     "Steve Graves"
#row 1, all columns
pt_data[1, ]
#all rows and all columns
pt_data[ , ]
NA
#The following are equivalent
pt_data[c(1,3), c("temperature", "gender")]
NA
pt_data[-2, c(-1, -3, -5, -6)]
NA
#creating a celsius temperature column
pt_data$temp_c <- (pt_data$temperature - 32) * (5/9)
#comparing before and after
pt_data[c("temperature", "temp_c")]

Matrixes

#create a 2X2 matrix
m <- matrix(c(1,2,3,4), nrow = 2)
m
     [,1] [,2]
[1,]    1    3
[2,]    2    4
#equivalent to the above
m <-matrix(c(1,2,3,4), ncol =2)
m
     [,1] [,2]
[1,]    1    3
[2,]    2    4
#create a 2x3 matrix
m <- matrix(c(1,2,3,4,5,6), nrow=2)
m
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
#create a 3x2 matrix
m <- matrix(c(1,2,3,4,5,6), ncol =2)
m
     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6
#extract values from matrixes
m[1,1]
[1] 1
m[3,2]
[1] 6
#extract rows
m[1,]
[1] 1 4
#extract columns
m[,1]
[1] 1 2 3
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKUiBkYXRhIHN0cnVjdHVyZXMKYGBge3J9CiNWZWN0b3JzIF9fXwojQ3JlYXRlIHZlY3RvcnMgb2YgZGF0YSBmb3IgdGhyZWUgbWVkaWNhbCBwYXRpZW50cwpzdWJqZWN0X25hbWUgPC0gYygiSm9obiBEb2UiLCAiSmFuZSBEb2UiLCAiU3RldmUgR3JhdmVzIikKdGVtcGVyYXR1cmUgPC0gYyg5OC4xLCA5OC42LCAxMDEuNCkKZmx1X3N0YXR1cyA8LSBjKEZBTFNFLCBGQUxTRSwgVFJVRSkKCmBgYApgYGB7cn0KI0FjY2VzcyB0aGUgc2Vjb25kIGVsZW1lbnQgaW4gYm9keSB0ZW1wZXJhdHVyZSB2ZWN0b3IKdGVtcGVyYXR1cmVbMl0KYGBgCgpgYGB7cn0KI0xldCdzIHRyeSBjcmVhdGluZyBvdGhlciB2ZWN0b3JzIG9mIGRhdGEgd2l0aCB0YXggYWR2aXNvcnMKdGF4X2FkdnNpb3IgPC0gYygiWmlhLCBNYXJpYSwgQ29sZWVuIikKcmV0dXJuc19jb21wbGV0ZWQgPC0gYygxMiwgMjUsIDM0KQoKYGBgCmBgYHtyfQojYWNjZXNzIHRoZSBmaXJzdCBudW1iZXIgb2YgcmV0dXJucyBjb21wbGV0ZWQKcmV0dXJuc19jb21wbGV0ZWRbMV0KYGBgCmBgYHtyfQojRXhhbXBsZXMgb2YgYWNjZXNzaW5nIGl0ZW1zIGluIHZlY3RvcgojaW5jbHVkZSBpdGVtcyBpbiB0aGUgcmFuZ2UgMiB0byAzCnRlbXBlcmF0dXJlWzI6M10KYGBgCmBgYHtyfQojZXhjbHVkZSBpdGVtIDIgdXNpbmcgdGhlIG1pbnVzIHNpZ24KdGVtcGVyYXR1cmVbLTJdCmBgYAoKYGBge3J9CiN1c2UgYSB2ZWN0b3IgdG8gaW5kaWNhdGUgd2hldGhlciB0byBpbmNsdWRlIGl0ZW0KdGVtcGVyYXR1cmVbYyhUUlVFLCBUUlVFLCBGQUxTRSldCmBgYApgYGB7cn0KI2V4YW1wbGVzIG9mIGFjY2VzaW5nIGl0ZW1zIGluIHZlY3RvciBmcm9tIHRheCBhZHZpc29yIGV4YW1wbGUKcmV0dXJuc19jb21wbGV0ZWRbMjozXQpgYGAKYGBge3J9CiNleGNsdWRlIGl0ZW0gMiB1c2luZyB0aGUgbWludXMgc2lnbgpyZXR1cm5zX2NvbXBsZXRlZFstM10KYGBgCmBgYHtyfQojdXNlIGEgdmVjdG9yIHRvIGluZGljYXRlIHdoZXRoZXIgdG8gaW5jbHVkZSB0aGUgaXRlbQpyZXR1cm5zX2NvbXBsZXRlZFtjKFRSVUUsIFRSVUUsIEZBTFNFKV0KYGBgCgpGYWN0b3JzLSAKYGBge3J9CiNhZGQgZ2VuZGVyIGZhY3RvcgpnZW5kZXIgPC0gZmFjdG9yKGMoIk1BTEUiLCAiRkVNQUxFIiwgIk1BTEUiKSkKZ2VuZGVyCmBgYApgYGB7cn0KI0FkZCBibG9vZCB0eXBlIGZhY3RvcgpibG9vZCA8LSBmYWN0b3IgKGMoIk8iLCAiQUIiLCAiQSIpLAogICAgICAgICAgICAgICAgIGxldmVscyA9IGMoIkEiLCAiQiIsICJBQiIsICJPIikpCmJsb29kCmBgYApgYGB7cn0KI2FkZCBvcmRlciBmYWN0b3IKc3ltcHRvbXMgPC0gZmFjdG9yKGMoIlNFVkVSRSIsICJNSUxEIiwgIk1PREVSQVRFIiksCiAgICAgICAgICAgICAgICAgICBsZXZlbHMgPWMoIk1JTEQiLCAiTU9ERVJBVEUiLCAiU0VWRVJFIiksCiAgICAgICAgICAgICAgICAgICBvcmRlcmVkID0gVFJVRSkKc3ltcHRvbXMKYGBgCmBgYHtyfQojQ2hlY2sgZm9yIHN5bXB0b21zIGdyZWF0ZXIgdGhhbiBtb2RlcmF0ZQpzeW1wdG9tcyA+ICJNT0RFUkFURSIKYGBgCgpMaXN0cyAtIApgYGB7cn0KI2Rpc3BsYXkgaW5mcm9tYXRpb24gZm9yIGEgcGF0aWVudApzdWJqZWN0X25hbWVbMV0KYGBgCmBgYHtyfQp0ZW1wZXJhdHVyZVsxXQpgYGAKYGBge3J9CmZsdV9zdGF0dXNbMV0KYGBgCmBgYHtyfQpnZW5kZXJbMV0KYGBgCmBgYHtyfQpibG9vZFsxXQpgYGAKYGBge3J9CnN5bXB0b21zWzFdCgpgYGAKYGBge3J9CiNjcmVhdGUgbGlzdCBmb3IgYSBwYXRpZW50CnN1YmplY3QxIDwtIGxpc3QoZnVsbG5hbWUgPSBzdWJqZWN0X25hbWVbMV0sCiAgICAgICAgICAgICAgICB0ZW1wZXJhdHVyZSA9IHRlbXBlcmF0dXJlWzFdLAogICAgICAgICAgICAgICAgZmx1X3NhdHVzID0gZmx1X3N0YXR1c1sxXSwKICAgICAgICAgICAgICAgIGdlbmRlciA9IGdlbmRlclsxXSwKICAgICAgICAgICAgICAgIGJsb29kID0gYmxvb2RbMV0sCiAgICAgICAgICAgICAgICBzeW1wdG9tcyA9IHN5bXB0b21zWzFdKQpgYGAKYGBge3J9CiNkaXNwbGF5IHRoZSBwYXRpZW50CnN1YmplY3QxCgpgYGAKTWV0aG9kcyBmb3IgYWNjZXNzaW5nIGEgbGlzdApgYGB7cn0KI2dldCBhIHNpbmdsZSBsaXN0IHZhbHVlIGJ5IHBvc2l0aW9uIChyZXR1bnMgYSBzdWItbGlzdCkKc3ViamVjdDFbMl0KCmBgYAoKYGBge3J9CiNnZXQgYSBzaW5nbGUgbGlzdCBieSB2YWx1ZSBieSBwb3N0aW9uIChyZXR1cm5zIGEgbnVtZXJpYyB2ZWN0b3IpCnN1YmplY3QxW1syXV0KCmBgYApgYGB7cn0KI2dldCBzZXZlcmFsIGxpc3QgaXRlbXMgYnkgc3BlY2lmeWluZyBhIHZlY3RvciBvZiBuYW1lcwpzdWJqZWN0MVtjKCJ0ZW1wZXJhdHVyZSIsICJmbHVfc3RhdHVzIildCgpgYGAKRGF0YSBmcmFtZXMKYGBge3J9CgojY3JlYXRlIGEgZGF0YSBmcmFtZSBmcm9tIG1lZGljYWwgcGF0aWVudCBkYXRhCnB0X2RhdGEgPC0gZGF0YS5mcmFtZShzdWJqZWN0X25hbWUsIHRlbXBlcmF0dXJlLCBmbHVfc3RhdHVzLCBnZW5kZXIsCiAgICAgICAgICAgICAgICAgICAgICBibG9vZCwgc3ltcHRvbXMsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKYGBgCmBgYHtyfQojZGlzcGxheSB0aGUgZGF0YSBmcmFtZQpwdF9kYXRhCgpgYGAKCmBgYHtyfQojYWNjZXNzaW5nIGEgZGF0YSBmcmFtZQojZ2V0IGEgc2luZ2xlIGNvbHVtCnB0X2RhdGEkc3ViamVjdF9uYW1lCmBgYApgYGB7cn0KI2dldCBzZXZlcmFsIGNvbHVtbnMgYnkgc3BlY2lmaXlpbmcgYSB2ZWN0b3Igb2YgbmFtZXMKcHRfZGF0YVtjKCJ0ZW1wZXJhdHVyZSIsICJmbHVfc3RhdHVzIildCmBgYAoKYGBge3J9CiN0aGlzIGlzIHRoZSBzYW1lIGFzIGFib3ZlLCBleHRyYWN0aW5nIHRlbXBlcmF0dWUgYW5kIGZsdV9zdGF0dWVzCnB0X2RhdGFbMjozXQpgYGAKYGBge3J9CiNhY2Nlc3NpbmcgYnkgcm93IGFuZCBjb2x1bW4KcHRfZGF0YVsxLDJdCmBgYApgYGB7cn0KI2FjY2Vzc2luZyBzZXZlcmFsIHJvd3MgYW5kIHNldmVyYWwgY29sdW1ucyB1c2luZyB2ZWN0b3JzCnB0X2RhdGFbYygxLDMpLCBjKDIsNCldCmBgYAoKTGVhdmUgYSByb3cgb3IgY29sdW1uIGJsYW5rIHRvIGV4dHJhY3QgYWxsIHJvd3Mgb3IgY29sdW1ucwpgYGB7cn0KI2NvbHVtbnMgMSwgYWxsIHJvd3MKcHRfZGF0YVssMV0KCmBgYApgYGB7cn0KI3JvdyAxLCBhbGwgY29sdW1ucwpwdF9kYXRhWzEsIF0KYGBgCgpgYGB7cn0KI2FsbCByb3dzIGFuZCBhbGwgY29sdW1ucwpwdF9kYXRhWyAsIF0KCmBgYAoKYGBge3J9CiNUaGUgZm9sbG93aW5nIGFyZSBlcXVpdmFsZW50CnB0X2RhdGFbYygxLDMpLCBjKCJ0ZW1wZXJhdHVyZSIsICJnZW5kZXIiKV0KCmBgYAoKYGBge3J9CnB0X2RhdGFbLTIsIGMoLTEsIC0zLCAtNSwgLTYpXQoKYGBgCmBgYHtyfQojY3JlYXRpbmcgYSBjZWxzaXVzIHRlbXBlcmF0dXJlIGNvbHVtbgpwdF9kYXRhJHRlbXBfYyA8LSAocHRfZGF0YSR0ZW1wZXJhdHVyZSAtIDMyKSAqICg1LzkpCmBgYAoKYGBge3J9CiNjb21wYXJpbmcgYmVmb3JlIGFuZCBhZnRlcgpwdF9kYXRhW2MoInRlbXBlcmF0dXJlIiwgInRlbXBfYyIpXQoKYGBgCk1hdHJpeGVzCgpgYGB7cn0KI2NyZWF0ZSBhIDJYMiBtYXRyaXgKbSA8LSBtYXRyaXgoYygxLDIsMyw0KSwgbnJvdyA9IDIpCm0KCmBgYAoKYGBge3J9CiNlcXVpdmFsZW50IHRvIHRoZSBhYm92ZQptIDwtbWF0cml4KGMoMSwyLDMsNCksIG5jb2wgPTIpCm0KYGBgCmBgYHtyfQojY3JlYXRlIGEgMngzIG1hdHJpeAptIDwtIG1hdHJpeChjKDEsMiwzLDQsNSw2KSwgbnJvdz0yKQptCmBgYApgYGB7cn0KI2NyZWF0ZSBhIDN4MiBtYXRyaXgKbSA8LSBtYXRyaXgoYygxLDIsMyw0LDUsNiksIG5jb2wgPTIpCm0KYGBgCgpgYGB7cn0KI2V4dHJhY3QgdmFsdWVzIGZyb20gbWF0cml4ZXMKbVsxLDFdCgpgYGAKYGBge3J9Cm1bMywyXQpgYGAKYGBge3J9CiNleHRyYWN0IHJvd3MKbVsxLF0KYGBgCmBgYHtyfQojZXh0cmFjdCBjb2x1bW5zCm1bLDFdCgpgYGAKCg==