## Creating Vectors ##
x <- c(0.5, 0.6) # numeric
x
## [1] 0.5 0.6
x <- c(TRUE, FALSE) # logical
x
## [1] TRUE FALSE
x <- c(T, F) # logical
x
## [1] TRUE FALSE
x <- c("a", "b", "c") # character
x
## [1] "a" "b" "c"
x <- c(9:29) # integer
x
## [1] 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
x <- c(1+0i, 2+4i) # complex
x
## [1] 1+0i 2+4i
x <- vector("numeric", length = 10)
x
## [1] 0 0 0 0 0 0 0 0 0 0
x <-c(88,233,45,67)
x <- c(x[1:3],168,x[4]) ## insert 168 before 13
x
## [1] 88 233 45 168 67
x <- c(1,2,4)
length(x)
## [1] 3
x <- c()
x
## NULL
length(x)
## [1] 0
1:length(x)
## [1] 1 0
y <- c(1.7, "a") # character, 1.7 is converted into "1.7"
y
## [1] "1.7" "a"
y <- c(TRUE, 2) # numeric, TRUE is converted into number (1)
y
## [1] 1 2
y <- c("a", TRUE) # character, TRUE is converted to "TRUE"
y
## [1] "a" "TRUE"
x <- seq(from = 12,to = 30, by = 3)
x
## [1] 12 15 18 21 24 27 30
x <- seq(from=1.1,to=2,length = 10)
x
## [1] 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0
x <- rep(8,4)
x
## [1] 8 8 8 8
rep(c(5,12,13),3)
## [1] 5 12 13 5 12 13 5 12 13
rep(c(5,12,13),each=2)
## [1] 5 5 12 12 13 13
rep(1:3,2)
## [1] 1 2 3 1 2 3
y <- c(1.2,3.9,0.4,0.12)
y[c(1,3)] # extract elements 1 and 3 of y
## [1] 1.2 0.4
y[2:3]
## [1] 3.9 0.4
v <- 3:4
y[v]
## [1] 0.40 0.12
x <- c(4,2,17,5)
x
## [1] 4 2 17 5
y <- x[c(1,1,3)]
y
## [1] 4 4 17
z <- c(5,12,13)
z[-1]
## [1] 12 13
z# exclude element 1
## [1] 5 12 13
z[-1:-2]
## [1] 13
z
## [1] 5 12 13
z[1:(length(z)-1)]# exclude elements 1 through 2
## [1] 5 12
z[-length(z)]
## [1] 5 12
x <- 1:10
any(x > 8)
## [1] TRUE
any(x > 88)
## [1] FALSE
all(x > 88)
## [1] FALSE
all(x > 0)
## [1] TRUE
student.marks <- c(10, 20, 30, 40) #using combine function
student.marks #print variable content
## [1] 10 20 30 40
#Vectorized operations :Flavor I :Input - Single vector, Output - Scalar
mean(student.marks)
## [1] 25
#Vectorized operations :Flavor II :Input - Single vector, output - Single vector
student.marks <- student.marks + 5 #arithmetic operator
student.marks
## [1] 15 25 35 45
student.marks >= 30 #logical operator
## [1] FALSE FALSE TRUE TRUE
#Vectorized operations :Flavor III :Input - Multiple vectors, output - Single vector
student.physics.marks <- c(20,40,30,50)
student.chemistry.marks <- c(30,20,50,20)
student.total.marks <- student.physics.marks + student.chemistry.marks #Addition
student.total.marks
## [1] 50 60 80 70
student.names <- c("Raj","Rahul","Priya","Poonam")
student.names
## [1] "Raj" "Rahul" "Priya" "Poonam"
student.weights <- c( 60.5, 72.5 , 45.2, 47.5)
student.weights
## [1] 60.5 72.5 45.2 47.5
student.genders <- factor(c("Male","Male","Female","Female"))
student.genders
## [1] Male Male Female Female
## Levels: Female Male
student.physics.marks <- c( 70L , 75L , 80L, 85L)
student.physics.marks
## [1] 70 75 80 85
student.chemistry.marks <- c(60L, 70L, 85L, 70L)
student.chemistry.marks
## [1] 60 70 85 70
student1 <- list(student.names[1], student.weights[1], student.genders[1],
student.physics.marks[1], student.chemistry.marks[1])
str(student1)
## List of 5
## $ : chr "Raj"
## $ : num 60.5
## $ : Factor w/ 2 levels "Female","Male": 2
## $ : int 70
## $ : int 60
student1
## [[1]]
## [1] "Raj"
##
## [[2]]
## [1] 60.5
##
## [[3]]
## [1] Male
## Levels: Female Male
##
## [[4]]
## [1] 70
##
## [[5]]
## [1] 60
student1 <- list(name = student.names[1],
weight = student.weights[1],
gender = student.genders[1],
physics = student.physics.marks[1],
chemistry = student.chemistry.marks[1])
str(student1)
## List of 5
## $ name : chr "Raj"
## $ weight : num 60.5
## $ gender : Factor w/ 2 levels "Female","Male": 2
## $ physics : int 70
## $ chemistry: int 60
student1 <- list(name = student.names[1],
weight = student.weights[1],
gender = student.genders[1],
marks = c(student.physics.marks[1], student.chemistry.marks[1]))
str(student1)
## List of 4
## $ name : chr "Raj"
## $ weight: num 60.5
## $ gender: Factor w/ 2 levels "Female","Male": 2
## $ marks : int [1:2] 70 60
#Subsetting: Extract element(s) using unnamed list
student1 <- list(student.names[1], student.weights[1], student.genders[1],
student.physics.marks[1], student.chemistry.marks[1])
student1[1] #Single brackets [] return element of same type
## [[1]]
## [1] "Raj"
typeof(student1[1])
## [1] "list"
student1[[1]] #double brackets [[]] return the object in its own type
## [1] "Raj"
typeof(student1[[1]])
## [1] "character"
student1[1:3] #Access multiple elements using index
## [[1]]
## [1] "Raj"
##
## [[2]]
## [1] 60.5
##
## [[3]]
## [1] Male
## Levels: Female Male
#Subsetting: Extract element(s) using named list
student1 <- list(name = student.names[1],
weight = student.weights[1],
gender = student.genders[1],
physics = student.physics.marks[1],
chemistry = student.chemistry.marks[1])
student1[["name"]]
## [1] "Raj"
student1$gender #Access element using element name or label
## [1] Male
## Levels: Female Male
student1[c("physics","chemistry")] #Access multiple elements using label names
## $physics
## [1] 70
##
## $chemistry
## [1] 60
length(student1)
## [1] 5
student.physics.marks <- c( 70L , 75L , 80L, 85L)
student.chemistry.marks <- c(60L, 70L, 85L, 70L)
student.marks <-rbind(student.physics.marks , student.chemistry.marks)
student.marks
## [,1] [,2] [,3] [,4]
## student.physics.marks 70 75 80 85
## student.chemistry.marks 60 70 85 70
student.marks <-cbind(student.physics.marks , student.chemistry.marks)
student.marks
## student.physics.marks student.chemistry.marks
## [1,] 70 60
## [2,] 75 70
## [3,] 80 85
## [4,] 85 70
rownames(student.marks) <- c("Raj","Rahul","Priya","Poonam")
student.marks
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Rahul 75 70
## Priya 80 85
## Poonam 85 70
str(student.marks)
## int [1:4, 1:2] 70 75 80 85 60 70 85 70
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:4] "Raj" "Rahul" "Priya" "Poonam"
## ..$ : chr [1:2] "student.physics.marks" "student.chemistry.marks"
student.marks <- matrix(c( 70L , 75L , 80L, 85L, 60L, 70L, 85L, 70L),ncol=2,nrow=4)
student.marks
## [,1] [,2]
## [1,] 70 60
## [2,] 75 70
## [3,] 80 85
## [4,] 85 70
student.marks <- matrix(c( 70L , 75L , 80L, 85L, 60L, 70L, 85L, 70L),ncol=4,nrow=2
,byrow=TRUE)
student.marks
## [,1] [,2] [,3] [,4]
## [1,] 70 75 80 85
## [2,] 60 70 85 70
## Creating a matrix for operations
student.physics.marks <- c( 70L , 75L , 80L, 85L)
student.chemistry.marks <- c(60L, 70L, 85L, 70L)
student.marks <-cbind(student.physics.marks , student.chemistry.marks)
rownames(student.marks) <- c("Raj","Rahul","Priya","Poonam")
student.marks
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Rahul 75 70
## Priya 80 85
## Poonam 85 70
#Subsetting: Extract element(s) from matrix
student.marks[,] #row number,column number
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Rahul 75 70
## Priya 80 85
## Poonam 85 70
student.marks[2,2]
## [1] 70
student.marks[2,]
## student.physics.marks student.chemistry.marks
## 75 70
student.marks[,2]
## Raj Rahul Priya Poonam
## 60 70 85 70
student.marks[1:3,]
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Rahul 75 70
## Priya 80 85
student.marks[c(1,3),]
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Priya 80 85
student.marks[c(T,F,F,T),]
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Poonam 85 70
#summary
student.marks
## student.physics.marks student.chemistry.marks
## Raj 70 60
## Rahul 75 70
## Priya 80 85
## Poonam 85 70
rowSums(student.marks) #Row wise sum
## Raj Rahul Priya Poonam
## 130 145 165 155
colSums(student.marks) #Column wise sum
## student.physics.marks student.chemistry.marks
## 310 285
colMeans(student.marks) #Column wise mean
## student.physics.marks student.chemistry.marks
## 77.50 71.25
student.names <- c("Raj","Rahul","Priya","Poonam")
student.weights <- c( 60.5, 72.5 , 45.2, 47.5)
student.genders <- factor(c("Male","Male","Female","Female"))
student.physics.marks <- c( 70L , 75L , 80L, 85L)
student.chemistry.marks <- c(60L, 70L, 85L, 70L)
students <- data.frame(student.names,student.weights,student.genders,
student.physics.marks, student.chemistry.marks)
typeof(students)
## [1] "list"
students
## student.names student.weights student.genders student.physics.marks
## 1 Raj 60.5 Male 70
## 2 Rahul 72.5 Male 75
## 3 Priya 45.2 Female 80
## 4 Poonam 47.5 Female 85
## student.chemistry.marks
## 1 60
## 2 70
## 3 85
## 4 70
str(students)
## 'data.frame': 4 obs. of 5 variables:
## $ student.names : Factor w/ 4 levels "Poonam","Priya",..: 4 3 2 1
## $ student.weights : num 60.5 72.5 45.2 47.5
## $ student.genders : Factor w/ 2 levels "Female","Male": 2 2 1 1
## $ student.physics.marks : int 70 75 80 85
## $ student.chemistry.marks: int 60 70 85 70
#Use stringAsFactors = FALSE , to avoid converstion of character vector to factor
students <- data.frame(student.names,student.weights,student.genders,
student.physics.marks, student.chemistry.marks,
stringsAsFactors = FALSE)
str(students)
## 'data.frame': 4 obs. of 5 variables:
## $ student.names : chr "Raj" "Rahul" "Priya" "Poonam"
## $ student.weights : num 60.5 72.5 45.2 47.5
## $ student.genders : Factor w/ 2 levels "Female","Male": 2 2 1 1
## $ student.physics.marks : int 70 75 80 85
## $ student.chemistry.marks: int 60 70 85 70
students[1] #Single brackets [] return element of same type
## student.names
## 1 Raj
## 2 Rahul
## 3 Priya
## 4 Poonam
typeof(students[1])
## [1] "list"
students[[1]] #double brackets [[]] return the object in its own type
## [1] "Raj" "Rahul" "Priya" "Poonam"
typeof(students[[1]])
## [1] "character"
students[["student.names"]] #double brackets [[]] return the object in its own type
## [1] "Raj" "Rahul" "Priya" "Poonam"
typeof(students[["student.names"]])
## [1] "character"
students$student.names #$ return the object in its own type
## [1] "Raj" "Rahul" "Priya" "Poonam"
typeof(students$student.names)
## [1] "character"
students[1:3]
## student.names student.weights student.genders
## 1 Raj 60.5 Male
## 2 Rahul 72.5 Male
## 3 Priya 45.2 Female
## 4 Poonam 47.5 Female
students[c("student.physics.marks","student.chemistry.marks")]
## student.physics.marks student.chemistry.marks
## 1 70 60
## 2 75 70
## 3 80 85
## 4 85 70
students
## student.names student.weights student.genders student.physics.marks
## 1 Raj 60.5 Male 70
## 2 Rahul 72.5 Male 75
## 3 Priya 45.2 Female 80
## 4 Poonam 47.5 Female 85
## student.chemistry.marks
## 1 60
## 2 70
## 3 85
## 4 70
students[1,2] #Row number, Column number
## [1] 60.5
students[1:3,1:2]
## student.names student.weights
## 1 Raj 60.5
## 2 Rahul 72.5
## 3 Priya 45.2
students[c(1,2),c(1,3)]
## student.names student.genders
## 1 Raj Male
## 2 Rahul Male
students[,1]
## [1] "Raj" "Rahul" "Priya" "Poonam"
students[1,]
## student.names student.weights student.genders student.physics.marks
## 1 Raj 60.5 Male 70
## student.chemistry.marks
## 1 60
students[c(T,F,T,F),]
## student.names student.weights student.genders student.physics.marks
## 1 Raj 60.5 Male 70
## 3 Priya 45.2 Female 80
## student.chemistry.marks
## 1 60
## 3 85
students[student.genders == "Male",]
## student.names student.weights student.genders student.physics.marks
## 1 Raj 60.5 Male 70
## 2 Rahul 72.5 Male 75
## student.chemistry.marks
## 1 60
## 2 70
students[student.physics.marks >= 75,]
## student.names student.weights student.genders student.physics.marks
## 2 Rahul 72.5 Male 75
## 3 Priya 45.2 Female 80
## 4 Poonam 47.5 Female 85
## student.chemistry.marks
## 2 70
## 3 85
## 4 70
# Create data frame
# A dataset is ~ table (list of vectors)
id <- c(1,2,3)
name <- c("John", "Kirk", "AJ")
age <- c(21,27,18)
employees <- data.frame(ID=id, Name=name, Age=age)
employees
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
## 3 3 AJ 18
city <- c("New York","Chicago","London")
address <- data.frame(ID=id, City=city)
address
## ID City
## 1 1 New York
## 2 2 Chicago
## 3 3 London
more.id <- c(11,12,13)
more.name <- c("Kira", "Jen", "Liz")
more.age <- c(25,27,21)
more.employees <- data.frame(ID=more.id, Name=more.name, Age=more.age)
more.employees
## ID Name Age
## 1 11 Kira 25
## 2 12 Jen 27
## 3 13 Liz 21
# ----------------------------------
# Inspect data frames
# check first few rows
head(employees)
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
## 3 3 AJ 18
# check some last rows
tail(employees)
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
## 3 3 AJ 18
# ----------------------------------
# Accessing elements of data frame
# data frames are addressed by row and columns in the matrix notation
# get a value from a cell (a particular row and a particular column)
employees[1,2] # first row, second column
## [1] John
## Levels: AJ John Kirk
employees[1,"Name"] # first row, column by name
## [1] John
## Levels: AJ John Kirk
employees[1,]$Name # first row, column by name
## [1] John
## Levels: AJ John Kirk
# get one row
employees[1,]
## ID Name Age
## 1 1 John 21
# get one column
employees[,2]
## [1] John Kirk AJ
## Levels: AJ John Kirk
employees[,"Name"]
## [1] John Kirk AJ
## Levels: AJ John Kirk
employees$Name
## [1] John Kirk AJ
## Levels: AJ John Kirk
# get multiple rows/columns (subset)
employees[1:2,] # returns 2 rows
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
employees[,1:2] # returns 2 columns
## ID Name
## 1 1 John
## 2 2 Kirk
## 3 3 AJ
employees[,c(1, 2)] # returns 2 columns
## ID Name
## 1 1 John
## 2 2 Kirk
## 3 3 AJ
employees[,c("ID", "Name")] # returns 2 columns
## ID Name
## 1 1 John
## 2 2 Kirk
## 3 3 AJ
# get rows that pass a test
employees[employees$Age > 20, ]
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
# ----------------------------------
# Data Frame properties
# number of rows
nrow(employees)
## [1] 3
# number of columns
ncol(employees)
## [1] 3
# summary stats
summary(employees)
## ID Name Age
## Min. :1.0 AJ :1 Min. :18.0
## 1st Qu.:1.5 John:1 1st Qu.:19.5
## Median :2.0 Kirk:1 Median :21.0
## Mean :2.0 Mean :22.0
## 3rd Qu.:2.5 3rd Qu.:24.0
## Max. :3.0 Max. :27.0
# structure
str(employees)
## 'data.frame': 3 obs. of 3 variables:
## $ ID : num 1 2 3
## $ Name: Factor w/ 3 levels "AJ","John","Kirk": 2 3 1
## $ Age : num 21 27 18
# ----------------------------------
# Manipulate data frame
# set value
employees[3,"Age"] <- 29
# order
employees[order(employees$Age),]
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
## 3 3 AJ 29
# reverse order
employees[order(employees$Age, decreasing=T),]
## ID Name Age
## 3 3 AJ 29
## 2 2 Kirk 27
## 1 1 John 21
# merging data frames
merge(employees, address, by="ID")
## ID Name Age City
## 1 1 John 21 New York
## 2 2 Kirk 27 Chicago
## 3 3 AJ 29 London
# add rows
all.employees <- rbind(employees, more.employees)
all.employees
## ID Name Age
## 1 1 John 21
## 2 2 Kirk 27
## 3 3 AJ 29
## 4 11 Kira 25
## 5 12 Jen 27
## 6 13 Liz 21
# add columns
cbind(employees, city) # city is treated as a data frame
## ID Name Age city
## 1 1 John 21 New York
## 2 2 Kirk 27 Chicago
## 3 3 AJ 29 London
# grouping
# aggregate is similar to group by in SQL. Here are the # employees grouped by age
aggregate(all.employees[,2], list(Age=all.employees$Age), FUN=length)
## Age x
## 1 21 2
## 2 25 1
## 3 27 2
## 4 29 1
# A column and a row of a data frame is a vector and all vector operations can be applied to it e.g. math/stats functions
mean(all.employees$Age)
## [1] 25
# ----------------------------------
# Test for data frame
is.data.frame(employees)
## [1] TRUE
class1.student.physics.marks <- c( 70L , 75L , 80L, 85L)
class1.student.chemistry.marks <- c(60L, 70L, 85L, 70L)
class1.student.marks <-cbind(class1.student.physics.marks
, class1.student.chemistry.marks)
class1.student.marks
## class1.student.physics.marks class1.student.chemistry.marks
## [1,] 70 60
## [2,] 75 70
## [3,] 80 85
## [4,] 85 70
class2.student.physics.marks <- c( 71L , 76L , 81L, 86L)
class2.student.chemistry.marks <- c(61L, 71L, 86L, 71L)
class2.student.marks <-cbind(class2.student.physics.marks
, class2.student.chemistry.marks)
class2.student.marks
## class2.student.physics.marks class2.student.chemistry.marks
## [1,] 71 61
## [2,] 76 71
## [3,] 81 86
## [4,] 86 71
student.marks <- array(c(class1.student.marks,class2.student.marks), dim=c(4,2,2))
student.marks
## , , 1
##
## [,1] [,2]
## [1,] 70 60
## [2,] 75 70
## [3,] 80 85
## [4,] 85 70
##
## , , 2
##
## [,1] [,2]
## [1,] 71 61
## [2,] 76 71
## [3,] 81 86
## [4,] 86 71
student.marks[2,2,2] #Row number, column number, sheet number
## [1] 71
student.marks[1:3,2,]
## [,1] [,2]
## [1,] 60 61
## [2,] 70 71
## [3,] 85 86