Vectors

num_vec <- c(3.4, 1.5, 13.3, 5.5)

print(num_vec)
## [1]  3.4  1.5 13.3  5.5
int_vec <- c(1L, 3L, 15L, 5L, 60L)

print(int_vec)
## [1]  1  3 15  5 60
char_vec <- c("Use R!", "Data Science", "  ", ":)", "=^.^=")

print(char_vec)
## [1] "Use R!"       "Data Science" "  "           ":)"          
## [5] "=^.^="
logical_vec <- c(TRUE, TRUE, T, FALSE, F)

print(logical_vec)
## [1]  TRUE  TRUE  TRUE FALSE FALSE

Test the Vectors

is.numeric(num_vec)
## [1] TRUE
typeof(char_vec)
## [1] "character"
is.logical(logical_vec)
## [1] TRUE
is.numeric(int_vec)
## [1] TRUE
is.double(int_vec)
## [1] FALSE

Coerce the Vectors

typeof(as.double(int_vec))
## [1] "double"
as.numeric(logical_vec)
## [1] 1 1 1 0 0

Indexing

print(num_vec)
## [1]  3.4  1.5 13.3  5.5
print(num_vec[2])
## [1] 1.5
print(num_vec)
## [1]  3.4  1.5 13.3  5.5
print(num_vec[-2])
## [1]  3.4 13.3  5.5

Factors

stem_majors <- factor(c("Biology", "Chemistry", "Biology","Computer Science"),
  levels = c("Biology", "Chemistry", "Physics", "Engineering", "Mathematics", "Computer Science"))

stem_majors[5] <- "Computer Information Systems"
## Warning in `[<-.factor`(`*tmp*`, 5, value = "Computer Information
## Systems"): invalid factor level, NA generated

Lists

student_list <- list(Name = c("James", "Tressie"), Major = c("Accounting", "Sociology"), CumHours = c(63, 15), CumGPA = c(2.8, 4.0), First_Gen = c(TRUE, TRUE))

print(student_list)
## $Name
## [1] "James"   "Tressie"
## 
## $Major
## [1] "Accounting" "Sociology" 
## 
## $CumHours
## [1] 63 15
## 
## $CumGPA
## [1] 2.8 4.0
## 
## $First_Gen
## [1] TRUE TRUE
str(student_list)
## List of 5
##  $ Name     : chr [1:2] "James" "Tressie"
##  $ Major    : chr [1:2] "Accounting" "Sociology"
##  $ CumHours : num [1:2] 63 15
##  $ CumGPA   : num [1:2] 2.8 4
##  $ First_Gen: logi [1:2] TRUE TRUE

Testing

is.list(student_list)
## [1] TRUE

Indexing

student_list$Major
## [1] "Accounting" "Sociology"
student_list["Major"]
## $Major
## [1] "Accounting" "Sociology"
student_list[[2]]
## [1] "Accounting" "Sociology"
student_list[[2]][[2]]
## [1] "Sociology"

Modifying

student_list$Ethnicity <- c("Caucasian", "African-American")
print(student_list)
## $Name
## [1] "James"   "Tressie"
## 
## $Major
## [1] "Accounting" "Sociology" 
## 
## $CumHours
## [1] 63 15
## 
## $CumGPA
## [1] 2.8 4.0
## 
## $First_Gen
## [1] TRUE TRUE
## 
## $Ethnicity
## [1] "Caucasian"        "African-American"
student_list[[7]] <- c("Parks Hall", "Bowie Hall")

names(student_list)
## [1] "Name"      "Major"     "CumHours"  "CumGPA"    "First_Gen" "Ethnicity"
## [7] ""
names(student_list)[7] <- "Residential_Hall"
names(student_list)
## [1] "Name"             "Major"            "CumHours"        
## [4] "CumGPA"           "First_Gen"        "Ethnicity"       
## [7] "Residential_Hall"

Dataframes

admits <- data.frame(ADMIT_ID = c(paste0("000", 1:9),"0010"), ACT = round(runif(10, 17, 34)), 
                     HIGH_SCHOOL = c("Blue Springs South", "Warrensburg High School", "Odessa R-VII SR High School", "Blue Springs South", "Lees Summit North High School", "Lees Summit West High School",
                                      rep("Warrensburg High School", 3), "Platt County R-III High School"), stringsAsFactors = FALSE)
                                      
                                      
print(admits)                                     
##    ADMIT_ID ACT                    HIGH_SCHOOL
## 1      0001  27             Blue Springs South
## 2      0002  27        Warrensburg High School
## 3      0003  32    Odessa R-VII SR High School
## 4      0004  23             Blue Springs South
## 5      0005  21  Lees Summit North High School
## 6      0006  27   Lees Summit West High School
## 7      0007  29        Warrensburg High School
## 8      0008  17        Warrensburg High School
## 9      0009  34        Warrensburg High School
## 10     0010  20 Platt County R-III High School
str(admits)
## 'data.frame':    10 obs. of  3 variables:
##  $ ADMIT_ID   : chr  "0001" "0002" "0003" "0004" ...
##  $ ACT        : num  27 27 32 23 21 27 29 17 34 20
##  $ HIGH_SCHOOL: chr  "Blue Springs South" "Warrensburg High School" "Odessa R-VII SR High School" "Blue Springs South" ...

Testing

is.data.frame(admits)
## [1] TRUE
typeof(admits)
## [1] "list"
class(admits)
## [1] "data.frame"

Indexing

admits$ACT
##  [1] 27 27 32 23 21 27 29 17 34 20
admits[,2]
##  [1] 27 27 32 23 21 27 29 17 34 20
admits[2,2]
## [1] 27
admits$ACT[2]
## [1] 27

Modifying

library(truncnorm)

admits$HS_GPA <- round(rtruncnorm(n = 10, a = 0, b = 4, mean = 3.337253, sd = 0.5453862), 2)

print(admits)
##    ADMIT_ID ACT                    HIGH_SCHOOL HS_GPA
## 1      0001  27             Blue Springs South   3.40
## 2      0002  27        Warrensburg High School   2.91
## 3      0003  32    Odessa R-VII SR High School   3.15
## 4      0004  23             Blue Springs South   3.32
## 5      0005  21  Lees Summit North High School   2.48
## 6      0006  27   Lees Summit West High School   3.83
## 7      0007  29        Warrensburg High School   2.78
## 8      0008  17        Warrensburg High School   3.17
## 9      0009  34        Warrensburg High School   2.62
## 10     0010  20 Platt County R-III High School   3.56
names(admits)[3] <- "HIGH_SCHOOL_DESC"

names(admits)
## [1] "ADMIT_ID"         "ACT"              "HIGH_SCHOOL_DESC"
## [4] "HS_GPA"
financial_aid_info <- data.frame(APPLIED_FAFSA = rbinom(10, 1, .7), EFC = round(runif(10, 0, 100000)))

print(financial_aid_info)
##    APPLIED_FAFSA   EFC
## 1              0 42758
## 2              1 90349
## 3              1  5305
## 4              1 59208
## 5              1 72541
## 6              1 54669
## 7              1 14643
## 8              0  6767
## 9              0 27217
## 10             1 68725
admits_dt <- cbind(admits, financial_aid_info)

print(admits_dt)
##    ADMIT_ID ACT               HIGH_SCHOOL_DESC HS_GPA APPLIED_FAFSA   EFC
## 1      0001  27             Blue Springs South   3.40             0 42758
## 2      0002  27        Warrensburg High School   2.91             1 90349
## 3      0003  32    Odessa R-VII SR High School   3.15             1  5305
## 4      0004  23             Blue Springs South   3.32             1 59208
## 5      0005  21  Lees Summit North High School   2.48             1 72541
## 6      0006  27   Lees Summit West High School   3.83             1 54669
## 7      0007  29        Warrensburg High School   2.78             1 14643
## 8      0008  17        Warrensburg High School   3.17             0  6767
## 9      0009  34        Warrensburg High School   2.62             0 27217
## 10     0010  20 Platt County R-III High School   3.56             1 68725
new_admits <- data.frame(ADMIT_ID = paste0("00", 11:15), ACT = round(runif(5, 17, 34)), 
                                    HIGH_SCHOOL_DESC = c("Blue Springs South", "Oak Grove R-VI High School", 
                                                    "Lees Summit West High School", "Liberty High School", "Warrensburg High School"), HS_GPA = round(rtruncnorm(n = 5, a = 0, b = 4, mean = 3.337253, sd = 0.5453862), 2), APPLIED_FAFSA = rbinom(10, 1, .7), EFC = round(runif(10, 0, 100000)))

print(new_admits)
##    ADMIT_ID ACT             HIGH_SCHOOL_DESC HS_GPA APPLIED_FAFSA   EFC
## 1      0011  23           Blue Springs South   3.29             0 90454
## 2      0012  32   Oak Grove R-VI High School   3.27             1 77901
## 3      0013  21 Lees Summit West High School   3.34             1 49721
## 4      0014  26          Liberty High School   3.33             0 33175
## 5      0015  23      Warrensburg High School   3.74             1 65546
## 6      0011  23           Blue Springs South   3.29             1 47798
## 7      0012  32   Oak Grove R-VI High School   3.27             0 10064
## 8      0013  21 Lees Summit West High School   3.34             1 42292
## 9      0014  26          Liberty High School   3.33             1  1826
## 10     0015  23      Warrensburg High School   3.74             0 93558
admits_dataframe <- rbind(admits_dt, new_admits)

print(admits_dataframe)
##    ADMIT_ID ACT               HIGH_SCHOOL_DESC HS_GPA APPLIED_FAFSA   EFC
## 1      0001  27             Blue Springs South   3.40             0 42758
## 2      0002  27        Warrensburg High School   2.91             1 90349
## 3      0003  32    Odessa R-VII SR High School   3.15             1  5305
## 4      0004  23             Blue Springs South   3.32             1 59208
## 5      0005  21  Lees Summit North High School   2.48             1 72541
## 6      0006  27   Lees Summit West High School   3.83             1 54669
## 7      0007  29        Warrensburg High School   2.78             1 14643
## 8      0008  17        Warrensburg High School   3.17             0  6767
## 9      0009  34        Warrensburg High School   2.62             0 27217
## 10     0010  20 Platt County R-III High School   3.56             1 68725
## 11     0011  23             Blue Springs South   3.29             0 90454
## 12     0012  32     Oak Grove R-VI High School   3.27             1 77901
## 13     0013  21   Lees Summit West High School   3.34             1 49721
## 14     0014  26            Liberty High School   3.33             0 33175
## 15     0015  23        Warrensburg High School   3.74             1 65546
## 16     0011  23             Blue Springs South   3.29             1 47798
## 17     0012  32     Oak Grove R-VI High School   3.27             0 10064
## 18     0013  21   Lees Summit West High School   3.34             1 42292
## 19     0014  26            Liberty High School   3.33             1  1826
## 20     0015  23        Warrensburg High School   3.74             0 93558