This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#The variables can be assigned values using leftward, rightward and equal to operator.
#The values of the variables can be printed using print() or cat() function.
#The cat() function combines multiple items into a continuous print output.
# Assignment using equal operator.
var.1 = c(0,1,2,3) # Assignment using leftward operator.
var.2 <- c("learn","R") # Assignment using rightward operator.
c(TRUE,1) -> var.3
print(var.1)
## [1] 0 1 2 3
cat ("var.1 is ", var.1 ,"\n")
## var.1 is 0 1 2 3
cat ("var.2 is ", var.2 ,"\n")
## var.2 is learn R
cat ("var.3 is ", var.3 ,"\n")
## var.3 is 1 1
var_x <- "Hello"
cat("The class of var_x is ",class(var_x),"\n")
## The class of var_x is character
var_x <- 34.5
cat(" Now the class of var_x is ",class(var_x),"\n")
## Now the class of var_x is numeric
var_x <- 27L
cat(" Next the class of var_x becomes ",class(var_x),"\n")
## Next the class of var_x becomes integer
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v+t)
## [1] 10.0 8.5 10.0
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v-t)
## [1] -6.0 2.5 2.0
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v*t)
## [1] 16.0 16.5 24.0
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v/t)
## [1] 0.250000 1.833333 1.500000
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v%%t) # gives the remainder of the first divided by the second
## [1] 2.0 2.5 2.0
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v%/%t) # the quotient of dividing the first by the second
## [1] 0 1 1
v <- c( 2,5.5,6)
t <- c(8, 3, 4)
print(v^t) # the first raised to the power of the second
## [1] 256.000 166.375 1296.000
x <- 5
print(x)
## [1] 5
x <- x + 1
print(x)
## [1] 6
# X # This line is removed since it would cause an error if X is not defined
y <- 6
x <- x + y
print(x)
## [1] 12
x <- "some text"
print(x)
## [1] "some text"
# x <- x + 1 # This line is commented out because it would cause an error
x <- 3.6
print(x)
## [1] 3.6
x <- 5
y <- 16
x+y
## [1] 21
#[1] 21
x-y
## [1] -11
#[1] -11
x*y
## [1] 80
#[1] 80
y/x
## [1] 3.2
#[1] 3.2
y%/%x
## [1] 3
#[1] 3
y%%x
## [1] 1
#[1] 1
y^x
## [1] 1048576
#[1] 1048576
X <-9
x<-9
y<-(x-2)%%2 #(remainder from division
y
## [1] 1
v <- c( 2,5.5,6)
print(v+2)
## [1] 4.0 7.5 8.0
v <- c( 2,5.5,6)
print(v-2)
## [1] 0.0 3.5 4.0
v <- c( 2,5.5,6)
print(v*2)
## [1] 4 11 12
v <- c( 2,5.5,6)
print(v/2)
## [1] 1.00 2.75 3.00
x<-25
sqrt(x)
## [1] 5
b<- 12
a1<-3.5
a2<-7.8
x1<-1
x2<-5
y<-b+a1*x1+a2*x2
y
## [1] 54.5
v1 <- c(3,8,4,5,0,11)
v2 <- c(4,11)
v3<-v1+v2
# V2 becomes c(4,11,4,11,4,11)
v3
## [1] 7 19 8 16 4 22
s<-c(1:5)
s
## [1] 1 2 3 4 5
# Create vector with elements from 5 to 9 incrementing by 0.4.
print(seq(5, 9, by = 0.4))
## [1] 5.0 5.4 5.8 6.2 6.6 7.0 7.4 7.8 8.2 8.6 9.0
s <- c('apple','red',5,TRUE)
s
## [1] "apple" "red" "5" "TRUE"
class(s)
## [1] "character"
t<-c(5, 3, 5, 6)
t
## [1] 5 3 5 6
class(t)
## [1] "numeric"
# Accessing vector elements using position.
t <- c("Sun","Mon","Tue","Wed","Thurs","Fri","Sat")
u <- t[c(2,3,6)]
print(u)
## [1] "Mon" "Tue" "Fri"
#What will “v <- t[c(-3)]” result in
# Accessing vector elements using logical indexing.
v <- t[c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)]
print(v)
## [1] "Sun" "Fri"
# Accessing vector elements using negative indexing.
x <- t[c(-2,-5)]
print(x)
## [1] "Sun" "Tue" "Wed" "Fri" "Sat"
# Accessing vector elements using 0/1 indexing.
y <- t[c(0,0,0,0,0,0,1)]
print(y)
## [1] "Sun"
# Create a list containing a vector, a matrix and a list.
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3))
#Naming parts of list
# Give names to the elements in the list.
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list")
# Show the list.
print(list_data)
## $`1st Quarter`
## [1] "Jan" "Feb" "Mar"
##
## $A_Matrix
## [,1] [,2] [,3]
## [1,] 3 5 -2
## [2,] 9 1 8
##
## $`A Inner list`
## $`A Inner list`[[1]]
## [1] "green"
##
## $`A Inner list`[[2]]
## [1] 12.3
#$`1st_Quarter`
#$A_Matrix
#$A_Inner_list $A_Inner_list[[1]]
#$A_Inner_list[[2]]
# Create a list containing a vector, a matrix and a list.
list_data <- list(c("Jan","Feb","Mar"), matrix(c(3,9,5,1,-2,8), nrow = 2), list("green",12.3))
# Give names to the elements in the list.
names(list_data) <- c("1st Quarter", "A_Matrix", "A Inner list")
# Add element at the end of the list.
list_data[4] <- "New element"
print(list_data[4])
## [[1]]
## [1] "New element"
# Remove the last element.
list_data[4] <- NULL
# Print the 4th Element.
print(list_data[4])
## $<NA>
## NULL
# Update the 3rd Element.
list_data[3] <- "updated element"
print(list_data[3])
## $`A Inner list`
## [1] "updated element"
# Create lists.
list1 <- list(1:5)
print(list1)
## [[1]]
## [1] 1 2 3 4 5
list2 <-list(10:14)
print(list2)
## [[1]]
## [1] 10 11 12 13 14
# Convert the lists to vectors.
v1 <- unlist(list1)
v2 <- unlist(list2)
print(v1)
## [1] 1 2 3 4 5
print(v2)
## [1] 10 11 12 13 14
# Now add the vectors
result <- v1+v2
print(result)
## [1] 11 13 15 17 19
# Create a list.
list1 <- list(c(2,5,3),21.3,sin)
# Print the list.
print(list1)
## [[1]]
## [1] 2 5 3
##
## [[2]]
## [1] 21.3
##
## [[3]]
## function (x) .Primitive("sin")
list1[[1]]
## [1] 2 5 3
list2 <- list(c(2,5,3),21.3,sin(30))
print(list2)
## [[1]]
## [1] 2 5 3
##
## [[2]]
## [1] 21.3
##
## [[3]]
## [1] -0.9880316
list2[[1]][[2]]
## [1] 5
list3<- list(list1, list2)
list3
## [[1]]
## [[1]][[1]]
## [1] 2 5 3
##
## [[1]][[2]]
## [1] 21.3
##
## [[1]][[3]]
## function (x) .Primitive("sin")
##
##
## [[2]]
## [[2]][[1]]
## [1] 2 5 3
##
## [[2]][[2]]
## [1] 21.3
##
## [[2]][[3]]
## [1] -0.9880316
list3[[1]][[1]]
## [1] 2 5 3
str(list3)
## List of 2
## $ :List of 3
## ..$ : num [1:3] 2 5 3
## ..$ : num 21.3
## ..$ :function (x)
## $ :List of 3
## ..$ : num [1:3] 2 5 3
## ..$ : num 21.3
## ..$ : num -0.988
class(list3)
## [1] "list"
M = matrix( c('a','a','b','c','b','a'), nrow = 2, ncol = 3, byrow = TRUE)
# Create a matrix.
print(M)
## [,1] [,2] [,3]
## [1,] "a" "a" "b"
## [2,] "c" "b" "a"
a <- array(c('green','yellow'),dim = c(3,3,2)) # Create an array.
print(a)
## , , 1
##
## [,1] [,2] [,3]
## [1,] "green" "yellow" "green"
## [2,] "yellow" "green" "yellow"
## [3,] "green" "yellow" "green"
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] "yellow" "green" "yellow"
## [2,] "green" "yellow" "green"
## [3,] "yellow" "green" "yellow"
b <- array(c(1:20),dim = c(4,5))
print(b)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 5 9 13 17
## [2,] 2 6 10 14 18
## [3,] 3 7 11 15 19
## [4,] 4 8 12 16 20
c <- array(c(1:20),dim = c(2,5))
print(c)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
c <- array(c(1:20),dim = c(7,5))
print(c)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 8 15 2 9
## [2,] 2 9 16 3 10
## [3,] 3 10 17 4 11
## [4,] 4 11 18 5 12
## [5,] 5 12 19 6 13
## [6,] 6 13 20 7 14
## [7,] 7 14 1 8 15
d<- b+10 # what is d
d
## [,1] [,2] [,3] [,4] [,5]
## [1,] 11 15 19 23 27
## [2,] 12 16 20 24 28
## [3,] 13 17 21 25 29
## [4,] 14 18 22 26 30
e <- array(c(1:10),dim = c(5,2)) # what is e
f <- b %*% e #what is f
f
## [,1] [,2]
## [1,] 175 400
## [2,] 190 440
## [3,] 205 480
## [4,] 220 520
# Make some data
a = c(1,2,3)
b = c(2,4,6)
c = cbind(a,b) # a function to combine two columns
c
## a b
## [1,] 1 2
## [2,] 2 4
## [3,] 3 6
str(c)
## num [1:3, 1:2] 1 2 3 2 4 6
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "a" "b"
x = c(2,2,2)
a*b
## [1] 2 8 18
b*a
## [1] 2 8 18
# This works (matrix multiplication)
x%*%c
## a b
## [1,] 12 24
#Arrays – any number of dimensions
# Create an array.
a <- array(c('green','yellow'),dim = c(3,3,2))
print(a)
## , , 1
##
## [,1] [,2] [,3]
## [1,] "green" "yellow" "green"
## [2,] "yellow" "green" "yellow"
## [3,] "green" "yellow" "green"
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] "yellow" "green" "yellow"
## [2,] "green" "yellow" "green"
## [3,] "yellow" "green" "yellow"
# Create two vectors of different lengths.
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)
column.names <- c("COL1","COL2","COL3")
row.names <- c("ROW1","ROW2","ROW3")
matrix.names <- c("Matrix1","Matrix2")
# Take these vectors as input to the array.
result <- array(c(vector1,vector2),dim = c(3,3,2),dimnames = list(row.names,column.names, matrix.names))
print(result)
## , , Matrix1
##
## COL1 COL2 COL3
## ROW1 5 10 13
## ROW2 9 11 14
## ROW3 3 12 15
##
## , , Matrix2
##
## COL1 COL2 COL3
## ROW1 5 10 13
## ROW2 9 11 14
## ROW3 3 12 15
# Elements are arranged sequentially by row.
M <- matrix(c(3:14), nrow = 4, byrow = TRUE) #values from 3 to 14
print(M)
## [,1] [,2] [,3]
## [1,] 3 4 5
## [2,] 6 7 8
## [3,] 9 10 11
## [4,] 12 13 14
# Elements are arranged sequentially by column.
N <- matrix(c(3:14), nrow = 4, byrow = FALSE)
print(N)
## [,1] [,2] [,3]
## [1,] 3 7 11
## [2,] 4 8 12
## [3,] 5 9 13
## [4,] 6 10 14
# Define the column and row names.
rownames = c("row1", "row2", "row3", "row4")
colnames = c("col1", "col2", "col3")
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames))
print(P)
## col1 col2 col3
## row1 3 4 5
## row2 6 7 8
## row3 9 10 11
## row4 12 13 14
#What will “rownames(M) = rownames” do?
# Define the column and row names.
rownames = c("row1", "row2", "row3", "row4")
colnames = c("col1", "col2", "col3")
# Create the matrix.
P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames))
# Access the element at 3rd column and 1st row.
P
## col1 col2 col3
## row1 3 4 5
## row2 6 7 8
## row3 9 10 11
## row4 12 13 14
print(P[1,3])
## [1] 5
# Access the element at 2nd column and 4th row.
print(P[4,2])
## [1] 13
# Access only the 2nd row.
print(P[2,])
## col1 col2 col3
## 6 7 8
# Access only the 3rd column.
print(P[,3])
## row1 row2 row3 row4
## 5 8 11 14
#What will: P[,"col3"] do
P[,"col3"]
## row1 row2 row3 row4
## 5 8 11 14
#Syntax: apply(aray, rowcol, function)
#aray – the array, rowcol – which order by rows (1), by columns (2), both (1,2)
# Create two vectors of different lengths.
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)
# Take these vectors as input to the array.
new.array <- array(c(vector1,vector2),dim = c(3,3,2))
print(new.array)
## , , 1
##
## [,1] [,2] [,3]
## [1,] 5 10 13
## [2,] 9 11 14
## [3,] 3 12 15
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 5 10 13
## [2,] 9 11 14
## [3,] 3 12 15
# Use apply to calculate the sum of the rows across all the matrices.
result <- apply(new.array, c(1), sum)
print(result) # Use apply to calculate the sum of the columns across all the matrices.
## [1] 56 68 60
result <- apply(new.array, c(2), sum)
print(result)
## [1] 34 66 84
#Create using the data.frame function
# Create the data frame.
BMI <- data.frame( gender = c("Male", "Male","Female"), height = c(152, 171.5, 165), weight = c(81,93, 78), Age = c(42,38,26) )
print(BMI)
## gender height weight Age
## 1 Male 152.0 81 42
## 2 Male 171.5 93 38
## 3 Female 165.0 78 26
# Create the data frame.
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), stringsAsFactors = FALSE )
# Print the data frame.
print(emp.data)
## emp_id emp_name salary start_date
## 1 1 Rick 623.30 2012-01-01
## 2 2 Dan 515.20 2013-09-23
## 3 3 Michelle 611.00 2014-11-15
## 4 4 Ryan 729.00 2014-05-11
## 5 5 Gary 843.25 2015-03-27
#structure of the data frame
str(emp.data)
## 'data.frame': 5 obs. of 4 variables:
## $ emp_id : int 1 2 3 4 5
## $ emp_name : chr "Rick" "Dan" "Michelle" "Ryan" ...
## $ salary : num 623 515 611 729 843
## $ start_date: Date, format: "2012-01-01" "2013-09-23" ...
# Print the summary – get statistical summaries
print(summary(emp.data))
## emp_id emp_name salary start_date
## Min. :1 Length:5 Min. :515.2 Min. :2012-01-01
## 1st Qu.:2 Class :character 1st Qu.:611.0 1st Qu.:2013-09-23
## Median :3 Mode :character Median :623.3 Median :2014-05-11
## Mean :3 Mean :664.4 Mean :2014-01-14
## 3rd Qu.:4 3rd Qu.:729.0 3rd Qu.:2014-11-15
## Max. :5 Max. :843.2 Max. :2015-03-27
# Extract from the data frame - here specific columns – notice the naming convention – use names
result <- data.frame(emp.data$emp_name,emp.data$salary)
print(result)
## emp.data.emp_name emp.data.salary
## 1 Rick 623.30
## 2 Dan 515.20
## 3 Michelle 611.00
## 4 Ryan 729.00
## 5 Gary 843.25
str(result)
## 'data.frame': 5 obs. of 2 variables:
## $ emp.data.emp_name: chr "Rick" "Dan" "Michelle" "Ryan" ...
## $ emp.data.salary : num 623 515 611 729 843
# Extract first two rows (all columns). – use row numbers instead of names
result <- emp.data[1:2,]
print(result)
## emp_id emp_name salary start_date
## 1 1 Rick 623.3 2012-01-01
## 2 2 Dan 515.2 2013-09-23
#Add a column to a data frame
# Add the "dept" column.
emp.data$dept <- c("IT","Operations","IT","HR","Finance")
v <- emp.data
print(v)
## emp_id emp_name salary start_date dept
## 1 1 Rick 623.30 2012-01-01 IT
## 2 2 Dan 515.20 2013-09-23 Operations
## 3 3 Michelle 611.00 2014-11-15 IT
## 4 4 Ryan 729.00 2014-05-11 HR
## 5 5 Gary 843.25 2015-03-27 Finance
# Create the first data frame.
emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11", "2015-03-27")), dept = c("IT","Operations","IT","HR","Finance"), stringsAsFactors = FALSE )
# Create the second data frame
emp.newdata <- data.frame( emp_id = c (6:8), emp_name = c("Rasmi","Pranab","Tusar"), salary = c(578.0,722.5,632.8), start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")), dept = c("IT","Operations","Fianance"), stringsAsFactors = FALSE )
# Bind the two data frames – add rows
emp.finaldata <- rbind(emp.data,emp.newdata)
print(emp.finaldata)
## emp_id emp_name salary start_date dept
## 1 1 Rick 623.30 2012-01-01 IT
## 2 2 Dan 515.20 2013-09-23 Operations
## 3 3 Michelle 611.00 2014-11-15 IT
## 4 4 Ryan 729.00 2014-05-11 HR
## 5 5 Gary 843.25 2015-03-27 Finance
## 6 6 Rasmi 578.00 2013-05-21 IT
## 7 7 Pranab 722.50 2013-07-30 Operations
## 8 8 Tusar 632.80 2014-06-17 Fianance
#factors
# Create a vector as input.
data <- c("East","West","East","North","North","East","West", "West","West","East","North")
print(data)
## [1] "East" "West" "East" "North" "North" "East" "West" "West" "West"
## [10] "East" "North"
print(is.factor(data))
## [1] FALSE
# Apply the factor function.
factor_data <- factor(data)
print(factor_data)
## [1] East West East North North East West West West East North
## Levels: East North West
print(is.factor(factor_data))
## [1] TRUE
data <- c("East","West","East","North","North","East","West", "West","West","East","North")
# Create the factors
factor_data <- factor(data)
print(factor_data)
## [1] East West East North North East West West West East North
## Levels: East North West
# Apply the factor function with required order of the level.
new_order_data <- factor(factor_data,levels = c("East","West","North"))
print(new_order_data)
## [1] East West East North North East West West West East North
## Levels: East West North
#Creating any data frame with a column of text data,
#R treats the text column as categorical data and creates factors on it.
# Create the vectors for data frame.
height <- c(132,151,162,139,166,147,122)
weight <- c(48,49,66,53,67,52,40)
gender <- c("male","male","female","female","male","female","male")
# Create the data frame.
input_data <- data.frame(height,weight,factor(gender))
print(input_data)
## height weight factor.gender.
## 1 132 48 male
## 2 151 49 male
## 3 162 66 female
## 4 139 53 female
## 5 166 67 male
## 6 147 52 female
## 7 122 40 male
str(input_data)
## 'data.frame': 7 obs. of 3 variables:
## $ height : num 132 151 162 139 166 147 122
## $ weight : num 48 49 66 53 67 52 40
## $ factor.gender.: Factor w/ 2 levels "female","male": 2 2 1 1 2 1 2
# Test if the gender column is a factor.
print(is.factor(input_data$gender))
## [1] FALSE
# Print the gender column so see the levels.
print(input_data$gender)
## NULL