Last updated: 19:57:02 IST, 24 July, 2023

This page is intended to be an introduction to R Commands for beginners.

Working Directory

# Get working directory
getwd()
## [1] "d:/expt-r"
# Set working directory
setwd("d:/expt-r")

Help

# get help
#help()

# get help for a function named 'summary'
#help(summary)

# get help with a ? mark before the command
#?summary

Source and Sink

# Execute a set of R commands from a file ‘code.R’
# source("filename.R")

# Route the output to a file
# sink("outputfile")
# sink() # restores output to console

Objects

# List of objects in memory
ls()
## character(0)
objects()
## character(0)
# Remove objects in memory
#rm(<specific object>)

# Remove all objects in memory
#rm(list=ls())

Assignment

# Atomic Datatypes
# character, integer, double, logical, complex
# Assignment of a value to variable is through an expression
# Some examples
x <- 5
x <- 1+5i
x <- "abc"
x <- TRUE
x <- FALSE
5 -> x  

Operations

# Arithmetic operations
a <- 5
b <- a + 5
b
## [1] 10
# Logical Operations
a <- TRUE
b <- TRUE
c <- FALSE
a 
## [1] TRUE
b 
## [1] TRUE
c 
## [1] FALSE
d <- a & b # AND
d 
## [1] TRUE
d <- a | c  # OR
d 
## [1] TRUE
d <- a & c
d 
## [1] FALSE
a 
## [1] TRUE
!a   # NOT operator 
## [1] FALSE
# Similarly <,>,<=, >=, ==
a <- 10
b <- 5
a > b       # Greater than
## [1] TRUE
a < b        # Lesser than
## [1] FALSE

typeof()

# Get to know the type of an object x
typeof(x)
## [1] "double"

Vectors

# Vectors can hold elements of a particular datatype
x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
y <- c(x, 5, x)
z <- c("a", "b")

# create a vector with integer values
z <- c(5L, 6L)

# Coercion Example
x <- c(1.7, "a")  #Coercion to character type
typeof(x)
## [1] "character"
x  
## [1] "1.7" "a"
# Vector arithmetic
p<- c(2,2)
q<- c(1,3)
v <- 2*p + q + 1   # vectors should be of same length
# Size of a vector
length(a) 
## [1] 1
# Length of an object can be adjusted. 
a <- c(1,2,3,4)
a  
## [1] 1 2 3 4
length(a) <- 3
a 
## [1] 1 2 3
length(a) <- 5
a 
## [1]  1  2  3 NA NA

Generating a sequence of values

# Use seq() function for generating sequences

x <- seq(from=1, to=10, by=0.1) 
x
##  [1]  1.0  1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2.0  2.1  2.2  2.3  2.4
## [16]  2.5  2.6  2.7  2.8  2.9  3.0  3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9
## [31]  4.0  4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5.0  5.1  5.2  5.3  5.4
## [46]  5.5  5.6  5.7  5.8  5.9  6.0  6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9
## [61]  7.0  7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8.0  8.1  8.2  8.3  8.4
## [76]  8.5  8.6  8.7  8.8  8.9  9.0  9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9
## [91] 10.0
s <- seq(length=51, from=-5, by=.2)
# Print values of s. Notice that the print spreads
  # over several lines. Each line starting with the element number
s 
##  [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4 -2.2
## [16] -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2  0.0  0.2  0.4  0.6  0.8
## [31]  1.0  1.2  1.4  1.6  1.8  2.0  2.2  2.4  2.6  2.8  3.0  3.2  3.4  3.6  3.8
## [46]  4.0  4.2  4.4  4.6  4.8  5.0
# Repeating elements
p <- c(1,2)

x <- rep(p, times=5)  #  x <- rep(p,5)
x     
##  [1] 1 2 1 2 1 2 1 2 1 2
y = rep(p,5)
y      
##  [1] 1 2 1 2 1 2 1 2 1 2
z = rep(p, each=5)
z      
##  [1] 1 1 1 1 1 2 2 2 2 2

Infinity, NA and NaN

x <- 0/1
x  
## [1] 0
# Infinity represented as 'Inf'
x <-1/0
x # 
## [1] Inf
# Not a Number (NaN)
x <-0/0
x 
## [1] NaN
# NA is 'Not Available'
a <- c(5,NA,3)
a
## [1]  5 NA  3
b <- a
b
## [1]  5 NA  3
b <- a*3
b  
## [1] 15 NA  9
#is.na(xx) is TRUE both for NA and NaN values. 
#is.nan(xx) is only TRUE for NaNs.
x <- c(5,NA,NaN,7) 
x 
## [1]   5  NA NaN   7
is.na(x) 
## [1] FALSE  TRUE  TRUE FALSE
is.nan(x) 
## [1] FALSE FALSE  TRUE FALSE
x <- c(seq(1:10),NA, NA, seq(13,20))
x
##  [1]  1  2  3  4  5  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20
x[!is.na(x)]
##  [1]  1  2  3  4  5  6  7  8  9 10 13 14 15 16 17 18 19 20

Accessing elements in a vector

a <- c(5,10,15,20,25)
a[1]   
## [1] 5
a[2]   
## [1] 10
# Accessing elements in a vector
x[1]
## [1] 1
#all elements except the first one
x[-1]
##  [1]  2  3  4  5  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20
# First five elements
x[1:5]
## [1] 1 2 3 4 5
# Exclude first five elements
x[-(1:5)]
##  [1]  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20
x[(length(x)-5):length(x)]
## [1] 15 16 17 18 19 20

Lists

# Lists can hold elements with different datatypes
mylist <- list(name="Fred", no.children=3, child.ages=c(4,7,9))
mylist[1]
## $name
## [1] "Fred"
mylist[2]
## $no.children
## [1] 3
mylist[3]
## $child.ages
## [1] 4 7 9
mylist$child.ages[1]
## [1] 4
mylist$child.ages[3]
## [1] 9
mylist[[1]]   # is same as mylist$name
## [1] "Fred"
mylist[[2]]   # is same as mylist$no.children
## [1] 3
mylist[[3]]   # is same as mylist$child.ages
## [1] 4 7 9
mylist[[3]][1] # is same as mylist$child.ages[1]
## [1] 4
mylist[[3]][2] # is same as mylist$child.ages[3]
## [1] 7

Conversion of data type

z<- 0:9
z 
##  [1] 0 1 2 3 4 5 6 7 8 9
digits <- as.character(z)
digits  
##  [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
z <-as.integer(z)
z 
##  [1] 0 1 2 3 4 5 6 7 8 9
#An “empty” object still has a type.
e <- numeric()
e 
## numeric(0)
e <- character()
e 
## character(0)

Factors

# Non-Ordered Factors
a <- c("Male","Female","Male","Male")
factor_a <- factor(a)
typeof(a) 
## [1] "character"
typeof(factor_a) 
## [1] "integer"
print(factor_a) 
## [1] Male   Female Male   Male  
## Levels: Female Male
# Ordered Factors
b <- c("L","M","H","L")
b
## [1] "L" "M" "H" "L"
factor_b <- factor(b,order=TRUE,levels=c("L","M","H"))
print(factor_b)
## [1] L M H L
## Levels: L < M < H

Matrices

# Create a 2x3 Matrix without any elements
m <- matrix(nrow=2,ncol=3)
m
##      [,1] [,2] [,3]
## [1,]   NA   NA   NA
## [2,]   NA   NA   NA
dim(m)
## [1] 2 3
attributes(m)
## $dim
## [1] 2 3
# Matrices are constructed column-wise
m <-matrix (1:6,nrow=2,ncol=3)
m
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
# Convert a vector to a matrix by adjusting the dimensions
m <- 1:10
dim(m) <- c(2,5)
m
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10
nrow(m)
## [1] 2
ncol(m)
## [1] 5
# 3 dimensional arrays
m <- 1:24
dim(m) <- c(4,3,2)
m
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24
# Use cbind() and rbind() functions to combine R objects by rows or columns
# Use them to create matrices
x <- 1:3
y <- 10:12
z <- 20:22
m <- cbind(x,y)
m
##      x  y
## [1,] 1 10
## [2,] 2 11
## [3,] 3 12
m <- rbind(x,y)
m
##   [,1] [,2] [,3]
## x    1    2    3
## y   10   11   12

Vectors, Lists, Matrices and Arrays

#Vector 
v1 <- c(1:10); v1; dim(v1); attributes(v1); class(v1); typeof(v1)
##  [1]  1  2  3  4  5  6  7  8  9 10
## NULL
## NULL
## [1] "integer"
## [1] "integer"
#Lists
list1 <- list(1:10); list1; dim(list1); attributes(list1); class(list1); typeof(list1)
## [[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10
## NULL
## NULL
## [1] "list"
## [1] "list"
list2 <- list("abc", 5.6, TRUE, 10L,c(5,6,7)); list2; dim(list2); attributes(list2); class(list2); typeof(list2)
## [[1]]
## [1] "abc"
## 
## [[2]]
## [1] 5.6
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] 10
## 
## [[5]]
## [1] 5 6 7
## NULL
## NULL
## [1] "list"
## [1] "list"
list3 <- list(name="abc", age=5.6, is_student=TRUE, weight=10L, marks=c(5,6,7)); list3; dim(list3); attributes(list3);class(list3);typeof(list3)
## $name
## [1] "abc"
## 
## $age
## [1] 5.6
## 
## $is_student
## [1] TRUE
## 
## $weight
## [1] 10
## 
## $marks
## [1] 5 6 7
## NULL
## $names
## [1] "name"       "age"        "is_student" "weight"     "marks"
## [1] "list"
## [1] "list"
#Arrays
#Z <- array(data_vector, dim_vector)
a1 <- array(1:24,c(2,3,4))  ; a1; dim(a1); attributes(a1); class(a1); typeof(a1)
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12
## 
## , , 3
## 
##      [,1] [,2] [,3]
## [1,]   13   15   17
## [2,]   14   16   18
## 
## , , 4
## 
##      [,1] [,2] [,3]
## [1,]   19   21   23
## [2,]   20   22   24
## [1] 2 3 4
## $dim
## [1] 2 3 4
## [1] "array"
## [1] "integer"
a2 <- array(1:6,c(2,3))  ; a2; dim(a2); attributes(a2); class(a2); typeof(a2)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## [1] 2 3
## $dim
## [1] 2 3
## [1] "matrix" "array"
## [1] "integer"
a3 <- array(1:3,c(1,3))  ; a3; dim(a3); attributes(a3); class(a3); typeof(a3)
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [1] 1 3
## $dim
## [1] 1 3
## [1] "matrix" "array"
## [1] "integer"
a4 <- array(1:2,c(2,1)); a4; dim(a4); attributes(a4); class(a4); typeof(a4)
##      [,1]
## [1,]    1
## [2,]    2
## [1] 2 1
## $dim
## [1] 2 1
## [1] "matrix" "array"
## [1] "integer"
#Matrix
m1 <- matrix(1:6,nrow=2,ncol=3); m1; dim(m1); attributes(m1); class(m1); typeof(m1)
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## [1] 2 3
## $dim
## [1] 2 3
## [1] "matrix" "array"
## [1] "integer"

DATAFRAME

# Creating a dataframe using data.frame()
name <- c("Ram","Kumar","Leela")
gender <- c("M","M","F")
height <- c(153.3,173.2,170.5)
age <- c(25,27,26)

df <- data.frame(name,gender,height,age)
df
##    name gender height age
## 1   Ram      M  153.3  25
## 2 Kumar      M  173.2  27
## 3 Leela      F  170.5  26
# Get structure of the dataframe
str(df)
## 'data.frame':    3 obs. of  4 variables:
##  $ name  : chr  "Ram" "Kumar" "Leela"
##  $ gender: chr  "M" "M" "F"
##  $ height: num  153 173 170
##  $ age   : num  25 27 26
# Get row names
rownames(df)
## [1] "1" "2" "3"
# Get col names
colnames(df)
## [1] "name"   "gender" "height" "age"
# Another way to get row names
row.names(df)
## [1] "1" "2" "3"
# Get a summary of the dataframe
summary(df)
##      name              gender              height           age      
##  Length:3           Length:3           Min.   :153.3   Min.   :25.0  
##  Class :character   Class :character   1st Qu.:161.9   1st Qu.:25.5  
##  Mode  :character   Mode  :character   Median :170.5   Median :26.0  
##                                        Mean   :165.7   Mean   :26.0  
##                                        3rd Qu.:171.8   3rd Qu.:26.5  
##                                        Max.   :173.2   Max.   :27.0
# Create a subset of the dataframe under some conditions
subset(df,gender=="F")
##    name gender height age
## 3 Leela      F  170.5  26
#Extracting a column of the dataframe
df$name
## [1] "Ram"   "Kumar" "Leela"
mean(df$height)
## [1] 165.6667
# Accessing data from a dataframe
df$name
## [1] "Ram"   "Kumar" "Leela"
df[1]
##    name
## 1   Ram
## 2 Kumar
## 3 Leela
df[1:3]
##    name gender height
## 1   Ram      M  153.3
## 2 Kumar      M  173.2
## 3 Leela      F  170.5
#Add another column to the dataframe
df$city <- c("Delhi","Bombay","Delhi")
df
##    name gender height age   city
## 1   Ram      M  153.3  25  Delhi
## 2 Kumar      M  173.2  27 Bombay
## 3 Leela      F  170.5  26  Delhi
# Combining two df with same set of columns
dfnew <- rbind(df,df)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)

# Get the first few rows of a dataframe
head(dfnew)
##    name gender height age   city
## 1   Ram      M  153.3  25  Delhi
## 2 Kumar      M  173.2  27 Bombay
## 3 Leela      F  170.5  26  Delhi
## 4   Ram      M  153.3  25  Delhi
## 5 Kumar      M  173.2  27 Bombay
## 6 Leela      F  170.5  26  Delhi
# Get the last few rows of a dataframe
tail(dfnew)
##     name gender height age   city
## 43   Ram      M  153.3  25  Delhi
## 44 Kumar      M  173.2  27 Bombay
## 45 Leela      F  170.5  26  Delhi
## 46   Ram      M  153.3  25  Delhi
## 47 Kumar      M  173.2  27 Bombay
## 48 Leela      F  170.5  26  Delhi
#Another example implementation

Col_A <- runif(10,min=10,max=20)
Col_B <- rnorm(10,mean=25,sd=1)
df <- data.frame(Col_A,Col_B)
df
##       Col_A    Col_B
## 1  10.07390 25.89788
## 2  13.76707 26.69443
## 3  11.69064 24.97166
## 4  10.48133 24.17288
## 5  18.23005 24.54403
## 6  12.62370 26.02923
## 7  15.23046 24.52895
## 8  13.25574 23.62686
## 9  19.62945 25.26378
## 10 11.16252 26.05964
Col_C <- rnorm(10,mean=15,sd=2)
Col_D <- rnorm(10,mean=100,sd=2)

df <- cbind(df, Col_C)
df
##       Col_A    Col_B    Col_C
## 1  10.07390 25.89788 14.26405
## 2  13.76707 26.69443 15.42672
## 3  11.69064 24.97166 12.47615
## 4  10.48133 24.17288 14.93380
## 5  18.23005 24.54403 15.82702
## 6  12.62370 26.02923 16.69077
## 7  15.23046 24.52895 16.31722
## 8  13.25574 23.62686 15.81878
## 9  19.62945 25.26378 15.01939
## 10 11.16252 26.05964 16.27902
df$Col_D <- Col_D
df
##       Col_A    Col_B    Col_C     Col_D
## 1  10.07390 25.89788 14.26405 100.98332
## 2  13.76707 26.69443 15.42672  95.75710
## 3  11.69064 24.97166 12.47615 101.54778
## 4  10.48133 24.17288 14.93380 100.58335
## 5  18.23005 24.54403 15.82702 100.91446
## 6  12.62370 26.02923 16.69077  99.45412
## 7  15.23046 24.52895 16.31722  97.73053
## 8  13.25574 23.62686 15.81878 102.67945
## 9  19.62945 25.26378 15.01939  97.71984
## 10 11.16252 26.05964 16.27902 102.57861

Controlling the execution Flow

# If Else Check
a <- 5
b <- 6
if (a>b) { 
  print("a is bigger") 
} else {
  print("a is not bigger")
}
## [1] "a is not bigger"
a <- 5
b <- 5
if (a>b) 
{ 
  print("a is bigger") 
} else 
if(a<b)
{
  print("a is smaller")
} else
{
  print("a is equal to b")
}
## [1] "a is equal to b"
## Switch construct to check
number <- 3
value <- switch(number,"one","two","three")
print(value) 
## [1] "three"

Loops

#for (var in expr_1) {expr_2}
#var is the variable name
#expr_1 is a vector with elements
#expr_2 is the expression(s) to be executed

for (i in c(5,7,9,11,13)) 
{
  print(i)
}
## [1] 5
## [1] 7
## [1] 9
## [1] 11
## [1] 13
for (i in 1:5)
{
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
for (i in seq(5))
{
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
# Yet another way to loop - the 'while' loop; 
i <- 1
while (i < 10)
{
  print(i)
  i <- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9

Usage of break and next in loops

# There are situations when under some conditions
# the loop should be terminated. Use 'break' statement for this.

# There are situations where a particular iteration of the 
# loop is to be skipped but the next set of iterations
# should continue. Use 'next' statement for this. 

# Examples of usage of break and next
i <- 1
for (i in 1:10)
{
  #if (i == 5) break
  if (i == 5) next
  print(i)
  i <- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# Yet another loop method: Use 'repeat'
i<-1
repeat{
  if(i>10) {break}
  print(i)
  #i< i+1
  i<- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

Constants

#There are constants stored and readily available. Examples:

LETTERS
##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
letters
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
# Access letters 5 to 10
for (i in letters[5:10])
{
  print(i)
}
## [1] "e"
## [1] "f"
## [1] "g"
## [1] "h"
## [1] "i"
## [1] "j"
# Use seq_along function to create a sequence of letters
for (i in seq_along(letters))
{
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26

Calling and Writing your own functions

# Calling a function
x <- 1:100
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   25.75   50.50   50.50   75.25  100.00
seq(5)
## [1] 1 2 3 4 5
seq(from=10,to=15)
## [1] 10 11 12 13 14 15
# Defining our own functions


#Name of the function is myfunction1
#No arguments are passed 
#No values are returned
#Definition of myfunction1 is below
myfunction1 <- function()
{
  print("Executing myfunction1")
}
# Calling the function
myfunction1()
## [1] "Executing myfunction1"
# A function with arguments x and y
myfunction2 <- function(x,y)
{
  print("Executing myfunction1")
  print(2*x+y)
}
#Calling with arguments
myfunction2(3,2)
## [1] "Executing myfunction1"
## [1] 8
#Calling with name of the argument
myfunction2(x=3,y=2)
## [1] "Executing myfunction1"
## [1] 8

Read/Write Data from/to a csv file

# create a csv file using Excel and save it in working directory.
readdata <- read.csv('csvfile.csv')
typeof(readdata)
## [1] "list"
print(readdata)
##    X  Y   Z
## 1  3  9  48
## 2  3 15  78
## 3  4 14  74
## 4  2 13  67
## 5  0 20 100
## 6  2 14  72
## 7  3 11  58
## 8  4 18  94
## 9  0  8  40
## 10 3 19  98
## 11 3 14  73
## 12 1 15  76
## 13 0 10  50
## 14 0 11  55
## 15 1 15  76
## 16 4 16  84
## 17 1 13  66
## 18 0 11  55
## 19 4  7  39
## 20 1 12  61
print(paste(typeof(readdata$X),typeof(readdata$Y),typeof(readdata$Z)))
## [1] "integer integer integer"
#write data to a file. 
df <- mtcars
write.csv(df,"testdump.csv")