Introduction_to_R

Last updated: 19:57:02 IST, 24 July, 2023

This page is intended to be an introduction to R Commands for beginners.

Working Directory

# Get working directory
getwd()

## [1] "d:/expt-r"

# Set working directory
setwd("d:/expt-r")

Help

# get help
#help()

# get help for a function named 'summary'
#help(summary)

# get help with a ? mark before the command
#?summary

Source and Sink

# Execute a set of R commands from a file ‘code.R’
# source("filename.R")

# Route the output to a file
# sink("outputfile")
# sink() # restores output to console

Objects

# List of objects in memory
ls()

## character(0)

objects()

## character(0)

# Remove objects in memory
#rm(<specific object>)

# Remove all objects in memory
#rm(list=ls())

Assignment

# Atomic Datatypes
# character, integer, double, logical, complex
# Assignment of a value to variable is through an expression
# Some examples
x <- 5
x <- 1+5i
x <- "abc"
x <- TRUE
x <- FALSE
5 -> x

Print variables

x <- c(5,6,7,8)
# Type variable name to print it's value
x

## [1] 5 6 7 8

# Use print function to print value of a variable
print(x)

## [1] 5 6 7 8

# Use paste() for printing multiple values in one print call
print(paste(x[1],x[2],x[3],x[4]))

## [1] "5 6 7 8"

Operations

# Arithmetic operations
a <- 5
b <- a + 5
b

## [1] 10

# Logical Operations
a <- TRUE
b <- TRUE
c <- FALSE
a

## [1] TRUE

## [1] TRUE

## [1] FALSE

d <- a & b # AND
d

## [1] TRUE

d <- a | c  # OR
d

## [1] TRUE

d <- a & c
d

## [1] FALSE

## [1] TRUE

!a   # NOT operator

## [1] FALSE

# Similarly <,>,<=, >=, ==
a <- 10
b <- 5
a > b       # Greater than

## [1] TRUE

a < b        # Lesser than

## [1] FALSE

typeof()

# Get to know the type of an object x
typeof(x)

## [1] "double"

Vectors

# Vectors can hold elements of a particular datatype
x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
y <- c(x, 5, x)
z <- c("a", "b")

# create a vector with integer values
z <- c(5L, 6L)

# Coercion Example
x <- c(1.7, "a")  #Coercion to character type
typeof(x)

## [1] "character"

## [1] "1.7" "a"

# Vector arithmetic
p<- c(2,2)
q<- c(1,3)
v <- 2*p + q + 1   # vectors should be of same length
# Size of a vector
length(a)

## [1] 1

# Length of an object can be adjusted. 
a <- c(1,2,3,4)
a

## [1] 1 2 3 4

length(a) <- 3
a

## [1] 1 2 3

length(a) <- 5
a

## [1]  1  2  3 NA NA

Generating a sequence of values

# Use seq() function for generating sequences

x <- seq(from=1, to=10, by=0.1) 
x

##  [1]  1.0  1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2.0  2.1  2.2  2.3  2.4
## [16]  2.5  2.6  2.7  2.8  2.9  3.0  3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9
## [31]  4.0  4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5.0  5.1  5.2  5.3  5.4
## [46]  5.5  5.6  5.7  5.8  5.9  6.0  6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9
## [61]  7.0  7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8.0  8.1  8.2  8.3  8.4
## [76]  8.5  8.6  8.7  8.8  8.9  9.0  9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9
## [91] 10.0

s <- seq(length=51, from=-5, by=.2)
# Print values of s. Notice that the print spreads
  # over several lines. Each line starting with the element number
s

##  [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4 -2.2
## [16] -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2  0.0  0.2  0.4  0.6  0.8
## [31]  1.0  1.2  1.4  1.6  1.8  2.0  2.2  2.4  2.6  2.8  3.0  3.2  3.4  3.6  3.8
## [46]  4.0  4.2  4.4  4.6  4.8  5.0

# Repeating elements
p <- c(1,2)

x <- rep(p, times=5)  #  x <- rep(p,5)
x

##  [1] 1 2 1 2 1 2 1 2 1 2

y = rep(p,5)
y

##  [1] 1 2 1 2 1 2 1 2 1 2

z = rep(p, each=5)
z

##  [1] 1 1 1 1 1 2 2 2 2 2

Infinity, NA and NaN

x <- 0/1
x

## [1] 0

# Infinity represented as 'Inf'
x <-1/0
x #

## [1] Inf

# Not a Number (NaN)
x <-0/0
x

## [1] NaN

# NA is 'Not Available'
a <- c(5,NA,3)
a

## [1]  5 NA  3

b <- a
b

## [1]  5 NA  3

b <- a*3
b

## [1] 15 NA  9

#is.na(xx) is TRUE both for NA and NaN values. 
#is.nan(xx) is only TRUE for NaNs.
x <- c(5,NA,NaN,7) 
x

## [1]   5  NA NaN   7

is.na(x)

## [1] FALSE  TRUE  TRUE FALSE

is.nan(x)

## [1] FALSE FALSE  TRUE FALSE

x <- c(seq(1:10),NA, NA, seq(13,20))
x

##  [1]  1  2  3  4  5  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20

x[!is.na(x)]

##  [1]  1  2  3  4  5  6  7  8  9 10 13 14 15 16 17 18 19 20

Accessing elements in a vector

a <- c(5,10,15,20,25)
a[1]

## [1] 5

a[2]

## [1] 10

# Accessing elements in a vector
x[1]

## [1] 1

#all elements except the first one
x[-1]

##  [1]  2  3  4  5  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20

# First five elements
x[1:5]

## [1] 1 2 3 4 5

# Exclude first five elements
x[-(1:5)]

##  [1]  6  7  8  9 10 NA NA 13 14 15 16 17 18 19 20

x[(length(x)-5):length(x)]

## [1] 15 16 17 18 19 20

Lists

# Lists can hold elements with different datatypes
mylist <- list(name="Fred", no.children=3, child.ages=c(4,7,9))
mylist[1]

## $name
## [1] "Fred"

mylist[2]

## $no.children
## [1] 3

mylist[3]

## $child.ages
## [1] 4 7 9

mylist$child.ages[1]

## [1] 4

mylist$child.ages[3]

## [1] 9

mylist[[1]]   # is same as mylist$name

## [1] "Fred"

mylist[[2]]   # is same as mylist$no.children

## [1] 3

mylist[[3]]   # is same as mylist$child.ages

## [1] 4 7 9

mylist[[3]][1] # is same as mylist$child.ages[1]

## [1] 4

mylist[[3]][2] # is same as mylist$child.ages[3]

## [1] 7

Conversion of data type

z<- 0:9
z

##  [1] 0 1 2 3 4 5 6 7 8 9

digits <- as.character(z)
digits

##  [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"

z <-as.integer(z)
z

##  [1] 0 1 2 3 4 5 6 7 8 9

#An “empty” object still has a type.
e <- numeric()
e

## numeric(0)

e <- character()
e

## character(0)

Factors

# Non-Ordered Factors
a <- c("Male","Female","Male","Male")
factor_a <- factor(a)
typeof(a)

## [1] "character"

typeof(factor_a)

## [1] "integer"

print(factor_a)

## [1] Male   Female Male   Male  
## Levels: Female Male

# Ordered Factors
b <- c("L","M","H","L")
b

## [1] "L" "M" "H" "L"

factor_b <- factor(b,order=TRUE,levels=c("L","M","H"))
print(factor_b)

## [1] L M H L
## Levels: L < M < H

Matrices

# Create a 2x3 Matrix without any elements
m <- matrix(nrow=2,ncol=3)
m

##      [,1] [,2] [,3]
## [1,]   NA   NA   NA
## [2,]   NA   NA   NA

dim(m)

## [1] 2 3

attributes(m)

## $dim
## [1] 2 3

# Matrices are constructed column-wise
m <-matrix (1:6,nrow=2,ncol=3)
m

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

# Convert a vector to a matrix by adjusting the dimensions
m <- 1:10
dim(m) <- c(2,5)
m

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10

nrow(m)

## [1] 2

ncol(m)

## [1] 5

# 3 dimensional arrays
m <- 1:24
dim(m) <- c(4,3,2)
m

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24

# Use cbind() and rbind() functions to combine R objects by rows or columns
# Use them to create matrices
x <- 1:3
y <- 10:12
z <- 20:22
m <- cbind(x,y)
m

##      x  y
## [1,] 1 10
## [2,] 2 11
## [3,] 3 12

m <- rbind(x,y)
m

##   [,1] [,2] [,3]
## x    1    2    3
## y   10   11   12

Vectors, Lists, Matrices and Arrays

#Vector 
v1 <- c(1:10); v1; dim(v1); attributes(v1); class(v1); typeof(v1)

##  [1]  1  2  3  4  5  6  7  8  9 10

## NULL

## NULL

## [1] "integer"

## [1] "integer"

#Lists
list1 <- list(1:10); list1; dim(list1); attributes(list1); class(list1); typeof(list1)

## [[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10

## NULL

## NULL

## [1] "list"

## [1] "list"

list2 <- list("abc", 5.6, TRUE, 10L,c(5,6,7)); list2; dim(list2); attributes(list2); class(list2); typeof(list2)

## [[1]]
## [1] "abc"
## 
## [[2]]
## [1] 5.6
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] 10
## 
## [[5]]
## [1] 5 6 7

## NULL

## NULL

## [1] "list"

## [1] "list"

list3 <- list(name="abc", age=5.6, is_student=TRUE, weight=10L, marks=c(5,6,7)); list3; dim(list3); attributes(list3);class(list3);typeof(list3)

## $name
## [1] "abc"
## 
## $age
## [1] 5.6
## 
## $is_student
## [1] TRUE
## 
## $weight
## [1] 10
## 
## $marks
## [1] 5 6 7

## NULL

## $names
## [1] "name"       "age"        "is_student" "weight"     "marks"

## [1] "list"

## [1] "list"

#Arrays
#Z <- array(data_vector, dim_vector)
a1 <- array(1:24,c(2,3,4))  ; a1; dim(a1); attributes(a1); class(a1); typeof(a1)

## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]    7    9   11
## [2,]    8   10   12
## 
## , , 3
## 
##      [,1] [,2] [,3]
## [1,]   13   15   17
## [2,]   14   16   18
## 
## , , 4
## 
##      [,1] [,2] [,3]
## [1,]   19   21   23
## [2,]   20   22   24

## [1] 2 3 4

## $dim
## [1] 2 3 4

## [1] "array"

## [1] "integer"

a2 <- array(1:6,c(2,3))  ; a2; dim(a2); attributes(a2); class(a2); typeof(a2)

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

## [1] 2 3

## $dim
## [1] 2 3

## [1] "matrix" "array"

## [1] "integer"

a3 <- array(1:3,c(1,3))  ; a3; dim(a3); attributes(a3); class(a3); typeof(a3)

##      [,1] [,2] [,3]
## [1,]    1    2    3

## [1] 1 3

## $dim
## [1] 1 3

## [1] "matrix" "array"

## [1] "integer"

a4 <- array(1:2,c(2,1)); a4; dim(a4); attributes(a4); class(a4); typeof(a4)

##      [,1]
## [1,]    1
## [2,]    2

## [1] 2 1

## $dim
## [1] 2 1

## [1] "matrix" "array"

## [1] "integer"

#Matrix
m1 <- matrix(1:6,nrow=2,ncol=3); m1; dim(m1); attributes(m1); class(m1); typeof(m1)

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

## [1] 2 3

## $dim
## [1] 2 3

## [1] "matrix" "array"

## [1] "integer"

DATAFRAME

# Creating a dataframe using data.frame()
name <- c("Ram","Kumar","Leela")
gender <- c("M","M","F")
height <- c(153.3,173.2,170.5)
age <- c(25,27,26)

df <- data.frame(name,gender,height,age)
df

##    name gender height age
## 1   Ram      M  153.3  25
## 2 Kumar      M  173.2  27
## 3 Leela      F  170.5  26

# Get structure of the dataframe
str(df)

## 'data.frame':    3 obs. of  4 variables:
##  $ name  : chr  "Ram" "Kumar" "Leela"
##  $ gender: chr  "M" "M" "F"
##  $ height: num  153 173 170
##  $ age   : num  25 27 26

# Get row names
rownames(df)

## [1] "1" "2" "3"

# Get col names
colnames(df)

## [1] "name"   "gender" "height" "age"

# Another way to get row names
row.names(df)

## [1] "1" "2" "3"

# Get a summary of the dataframe
summary(df)

##      name              gender              height           age      
##  Length:3           Length:3           Min.   :153.3   Min.   :25.0  
##  Class :character   Class :character   1st Qu.:161.9   1st Qu.:25.5  
##  Mode  :character   Mode  :character   Median :170.5   Median :26.0  
##                                        Mean   :165.7   Mean   :26.0  
##                                        3rd Qu.:171.8   3rd Qu.:26.5  
##                                        Max.   :173.2   Max.   :27.0

# Create a subset of the dataframe under some conditions
subset(df,gender=="F")

##    name gender height age
## 3 Leela      F  170.5  26

#Extracting a column of the dataframe
df$name

## [1] "Ram"   "Kumar" "Leela"

mean(df$height)

## [1] 165.6667

# Accessing data from a dataframe
df$name

## [1] "Ram"   "Kumar" "Leela"

df[1]

##    name
## 1   Ram
## 2 Kumar
## 3 Leela

df[1:3]

##    name gender height
## 1   Ram      M  153.3
## 2 Kumar      M  173.2
## 3 Leela      F  170.5

#Add another column to the dataframe
df$city <- c("Delhi","Bombay","Delhi")
df

##    name gender height age   city
## 1   Ram      M  153.3  25  Delhi
## 2 Kumar      M  173.2  27 Bombay
## 3 Leela      F  170.5  26  Delhi

# Combining two df with same set of columns
dfnew <- rbind(df,df)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)

# Get the first few rows of a dataframe
head(dfnew)

##    name gender height age   city
## 1   Ram      M  153.3  25  Delhi
## 2 Kumar      M  173.2  27 Bombay
## 3 Leela      F  170.5  26  Delhi
## 4   Ram      M  153.3  25  Delhi
## 5 Kumar      M  173.2  27 Bombay
## 6 Leela      F  170.5  26  Delhi

# Get the last few rows of a dataframe
tail(dfnew)

##     name gender height age   city
## 43   Ram      M  153.3  25  Delhi
## 44 Kumar      M  173.2  27 Bombay
## 45 Leela      F  170.5  26  Delhi
## 46   Ram      M  153.3  25  Delhi
## 47 Kumar      M  173.2  27 Bombay
## 48 Leela      F  170.5  26  Delhi

#Another example implementation

Col_A <- runif(10,min=10,max=20)
Col_B <- rnorm(10,mean=25,sd=1)
df <- data.frame(Col_A,Col_B)
df

##       Col_A    Col_B
## 1  10.07390 25.89788
## 2  13.76707 26.69443
## 3  11.69064 24.97166
## 4  10.48133 24.17288
## 5  18.23005 24.54403
## 6  12.62370 26.02923
## 7  15.23046 24.52895
## 8  13.25574 23.62686
## 9  19.62945 25.26378
## 10 11.16252 26.05964

Col_C <- rnorm(10,mean=15,sd=2)
Col_D <- rnorm(10,mean=100,sd=2)

df <- cbind(df, Col_C)
df

##       Col_A    Col_B    Col_C
## 1  10.07390 25.89788 14.26405
## 2  13.76707 26.69443 15.42672
## 3  11.69064 24.97166 12.47615
## 4  10.48133 24.17288 14.93380
## 5  18.23005 24.54403 15.82702
## 6  12.62370 26.02923 16.69077
## 7  15.23046 24.52895 16.31722
## 8  13.25574 23.62686 15.81878
## 9  19.62945 25.26378 15.01939
## 10 11.16252 26.05964 16.27902

df$Col_D <- Col_D
df

##       Col_A    Col_B    Col_C     Col_D
## 1  10.07390 25.89788 14.26405 100.98332
## 2  13.76707 26.69443 15.42672  95.75710
## 3  11.69064 24.97166 12.47615 101.54778
## 4  10.48133 24.17288 14.93380 100.58335
## 5  18.23005 24.54403 15.82702 100.91446
## 6  12.62370 26.02923 16.69077  99.45412
## 7  15.23046 24.52895 16.31722  97.73053
## 8  13.25574 23.62686 15.81878 102.67945
## 9  19.62945 25.26378 15.01939  97.71984
## 10 11.16252 26.05964 16.27902 102.57861

Controlling the execution Flow

# If Else Check
a <- 5
b <- 6
if (a>b) { 
  print("a is bigger") 
} else {
  print("a is not bigger")
}

## [1] "a is not bigger"

a <- 5
b <- 5
if (a>b) 
{ 
  print("a is bigger") 
} else 
if(a<b)
{
  print("a is smaller")
} else
{
  print("a is equal to b")
}

## [1] "a is equal to b"

## Switch construct to check
number <- 3
value <- switch(number,"one","two","three")
print(value)

## [1] "three"

Loops

#for (var in expr_1) {expr_2}
#var is the variable name
#expr_1 is a vector with elements
#expr_2 is the expression(s) to be executed

for (i in c(5,7,9,11,13)) 
{
  print(i)
}

## [1] 5
## [1] 7
## [1] 9
## [1] 11
## [1] 13

for (i in 1:5)
{
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5

for (i in seq(5))
{
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5

# Yet another way to loop - the 'while' loop; 
i <- 1
while (i < 10)
{
  print(i)
  i <- i+1
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9

Usage of break and next in loops

# There are situations when under some conditions
# the loop should be terminated. Use 'break' statement for this.

# There are situations where a particular iteration of the 
# loop is to be skipped but the next set of iterations
# should continue. Use 'next' statement for this. 

# Examples of usage of break and next
i <- 1
for (i in 1:10)
{
  #if (i == 5) break
  if (i == 5) next
  print(i)
  i <- i+1
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

# Yet another loop method: Use 'repeat'
i<-1
repeat{
  if(i>10) {break}
  print(i)
  #i< i+1
  i<- i+1
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10

Constants

#There are constants stored and readily available. Examples:

LETTERS

##  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"

letters

##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"

# Access letters 5 to 10
for (i in letters[5:10])
{
  print(i)
}

## [1] "e"
## [1] "f"
## [1] "g"
## [1] "h"
## [1] "i"
## [1] "j"

# Use seq_along function to create a sequence of letters
for (i in seq_along(letters))
{
  print(i)
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26

Calling and Writing your own functions

# Calling a function
x <- 1:100
summary(x)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   25.75   50.50   50.50   75.25  100.00

seq(5)

## [1] 1 2 3 4 5

seq(from=10,to=15)

## [1] 10 11 12 13 14 15

# Defining our own functions


#Name of the function is myfunction1
#No arguments are passed 
#No values are returned
#Definition of myfunction1 is below
myfunction1 <- function()
{
  print("Executing myfunction1")
}
# Calling the function
myfunction1()

## [1] "Executing myfunction1"

# A function with arguments x and y
myfunction2 <- function(x,y)
{
  print("Executing myfunction1")
  print(2*x+y)
}
#Calling with arguments
myfunction2(3,2)

## [1] "Executing myfunction1"
## [1] 8

#Calling with name of the argument
myfunction2(x=3,y=2)

## [1] "Executing myfunction1"
## [1] 8

Read/Write Data from/to a csv file

# create a csv file using Excel and save it in working directory.
readdata <- read.csv('csvfile.csv')
typeof(readdata)

## [1] "list"

print(readdata)

##    X  Y   Z
## 1  3  9  48
## 2  3 15  78
## 3  4 14  74
## 4  2 13  67
## 5  0 20 100
## 6  2 14  72
## 7  3 11  58
## 8  4 18  94
## 9  0  8  40
## 10 3 19  98
## 11 3 14  73
## 12 1 15  76
## 13 0 10  50
## 14 0 11  55
## 15 1 15  76
## 16 4 16  84
## 17 1 13  66
## 18 0 11  55
## 19 4  7  39
## 20 1 12  61

print(paste(typeof(readdata$X),typeof(readdata$Y),typeof(readdata$Z)))

## [1] "integer integer integer"

#write data to a file. 
df <- mtcars
write.csv(df,"testdump.csv")

Introduction_to_R_Commands

Selvaraj Vadivelu

Working Directory

Help

Source and Sink

Objects

Assignment

Print variables

Operations

typeof()

Vectors

Generating a sequence of values

Infinity, NA and NaN

Accessing elements in a vector

Lists

Conversion of data type

Factors

Matrices

Vectors, Lists, Matrices and Arrays

DATAFRAME

Controlling the execution Flow

Loops

Usage of break and next in loops

Constants

Calling and Writing your own functions

Read/Write Data from/to a csv file