Last updated: 19:57:02 IST, 24 July, 2023
This page is intended to be an introduction to R Commands for beginners.
# Get working directory
getwd()
## [1] "d:/expt-r"
# Set working directory
setwd("d:/expt-r")
# get help
#help()
# get help for a function named 'summary'
#help(summary)
# get help with a ? mark before the command
#?summary
# Execute a set of R commands from a file ‘code.R’
# source("filename.R")
# Route the output to a file
# sink("outputfile")
# sink() # restores output to console
# List of objects in memory
ls()
## character(0)
objects()
## character(0)
# Remove objects in memory
#rm(<specific object>)
# Remove all objects in memory
#rm(list=ls())
# Atomic Datatypes
# character, integer, double, logical, complex
# Assignment of a value to variable is through an expression
# Some examples
x <- 5
x <- 1+5i
x <- "abc"
x <- TRUE
x <- FALSE
5 -> x
x <- c(5,6,7,8)
# Type variable name to print it's value
x
## [1] 5 6 7 8
# Use print function to print value of a variable
print(x)
## [1] 5 6 7 8
# Use paste() for printing multiple values in one print call
print(paste(x[1],x[2],x[3],x[4]))
## [1] "5 6 7 8"
# Arithmetic operations
a <- 5
b <- a + 5
b
## [1] 10
# Logical Operations
a <- TRUE
b <- TRUE
c <- FALSE
a
## [1] TRUE
b
## [1] TRUE
c
## [1] FALSE
d <- a & b # AND
d
## [1] TRUE
d <- a | c # OR
d
## [1] TRUE
d <- a & c
d
## [1] FALSE
a
## [1] TRUE
!a # NOT operator
## [1] FALSE
# Similarly <,>,<=, >=, ==
a <- 10
b <- 5
a > b # Greater than
## [1] TRUE
a < b # Lesser than
## [1] FALSE
# Get to know the type of an object x
typeof(x)
## [1] "double"
# Vectors can hold elements of a particular datatype
x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
y <- c(x, 5, x)
z <- c("a", "b")
# create a vector with integer values
z <- c(5L, 6L)
# Coercion Example
x <- c(1.7, "a") #Coercion to character type
typeof(x)
## [1] "character"
x
## [1] "1.7" "a"
# Vector arithmetic
p<- c(2,2)
q<- c(1,3)
v <- 2*p + q + 1 # vectors should be of same length
# Size of a vector
length(a)
## [1] 1
# Length of an object can be adjusted.
a <- c(1,2,3,4)
a
## [1] 1 2 3 4
length(a) <- 3
a
## [1] 1 2 3
length(a) <- 5
a
## [1] 1 2 3 NA NA
# Use seq() function for generating sequences
x <- seq(from=1, to=10, by=0.1)
x
## [1] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4
## [16] 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9
## [31] 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4
## [46] 5.5 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9
## [61] 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8.0 8.1 8.2 8.3 8.4
## [76] 8.5 8.6 8.7 8.8 8.9 9.0 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8 9.9
## [91] 10.0
s <- seq(length=51, from=-5, by=.2)
# Print values of s. Notice that the print spreads
# over several lines. Each line starting with the element number
s
## [1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4 -2.2
## [16] -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8
## [31] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8
## [46] 4.0 4.2 4.4 4.6 4.8 5.0
# Repeating elements
p <- c(1,2)
x <- rep(p, times=5) # x <- rep(p,5)
x
## [1] 1 2 1 2 1 2 1 2 1 2
y = rep(p,5)
y
## [1] 1 2 1 2 1 2 1 2 1 2
z = rep(p, each=5)
z
## [1] 1 1 1 1 1 2 2 2 2 2
x <- 0/1
x
## [1] 0
# Infinity represented as 'Inf'
x <-1/0
x #
## [1] Inf
# Not a Number (NaN)
x <-0/0
x
## [1] NaN
# NA is 'Not Available'
a <- c(5,NA,3)
a
## [1] 5 NA 3
b <- a
b
## [1] 5 NA 3
b <- a*3
b
## [1] 15 NA 9
#is.na(xx) is TRUE both for NA and NaN values.
#is.nan(xx) is only TRUE for NaNs.
x <- c(5,NA,NaN,7)
x
## [1] 5 NA NaN 7
is.na(x)
## [1] FALSE TRUE TRUE FALSE
is.nan(x)
## [1] FALSE FALSE TRUE FALSE
x <- c(seq(1:10),NA, NA, seq(13,20))
x
## [1] 1 2 3 4 5 6 7 8 9 10 NA NA 13 14 15 16 17 18 19 20
x[!is.na(x)]
## [1] 1 2 3 4 5 6 7 8 9 10 13 14 15 16 17 18 19 20
a <- c(5,10,15,20,25)
a[1]
## [1] 5
a[2]
## [1] 10
# Accessing elements in a vector
x[1]
## [1] 1
#all elements except the first one
x[-1]
## [1] 2 3 4 5 6 7 8 9 10 NA NA 13 14 15 16 17 18 19 20
# First five elements
x[1:5]
## [1] 1 2 3 4 5
# Exclude first five elements
x[-(1:5)]
## [1] 6 7 8 9 10 NA NA 13 14 15 16 17 18 19 20
x[(length(x)-5):length(x)]
## [1] 15 16 17 18 19 20
# Lists can hold elements with different datatypes
mylist <- list(name="Fred", no.children=3, child.ages=c(4,7,9))
mylist[1]
## $name
## [1] "Fred"
mylist[2]
## $no.children
## [1] 3
mylist[3]
## $child.ages
## [1] 4 7 9
mylist$child.ages[1]
## [1] 4
mylist$child.ages[3]
## [1] 9
mylist[[1]] # is same as mylist$name
## [1] "Fred"
mylist[[2]] # is same as mylist$no.children
## [1] 3
mylist[[3]] # is same as mylist$child.ages
## [1] 4 7 9
mylist[[3]][1] # is same as mylist$child.ages[1]
## [1] 4
mylist[[3]][2] # is same as mylist$child.ages[3]
## [1] 7
z<- 0:9
z
## [1] 0 1 2 3 4 5 6 7 8 9
digits <- as.character(z)
digits
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
z <-as.integer(z)
z
## [1] 0 1 2 3 4 5 6 7 8 9
#An “empty” object still has a type.
e <- numeric()
e
## numeric(0)
e <- character()
e
## character(0)
# Non-Ordered Factors
a <- c("Male","Female","Male","Male")
factor_a <- factor(a)
typeof(a)
## [1] "character"
typeof(factor_a)
## [1] "integer"
print(factor_a)
## [1] Male Female Male Male
## Levels: Female Male
# Ordered Factors
b <- c("L","M","H","L")
b
## [1] "L" "M" "H" "L"
factor_b <- factor(b,order=TRUE,levels=c("L","M","H"))
print(factor_b)
## [1] L M H L
## Levels: L < M < H
# Create a 2x3 Matrix without any elements
m <- matrix(nrow=2,ncol=3)
m
## [,1] [,2] [,3]
## [1,] NA NA NA
## [2,] NA NA NA
dim(m)
## [1] 2 3
attributes(m)
## $dim
## [1] 2 3
# Matrices are constructed column-wise
m <-matrix (1:6,nrow=2,ncol=3)
m
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
# Convert a vector to a matrix by adjusting the dimensions
m <- 1:10
dim(m) <- c(2,5)
m
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
nrow(m)
## [1] 2
ncol(m)
## [1] 5
# 3 dimensional arrays
m <- 1:24
dim(m) <- c(4,3,2)
m
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 13 17 21
## [2,] 14 18 22
## [3,] 15 19 23
## [4,] 16 20 24
# Use cbind() and rbind() functions to combine R objects by rows or columns
# Use them to create matrices
x <- 1:3
y <- 10:12
z <- 20:22
m <- cbind(x,y)
m
## x y
## [1,] 1 10
## [2,] 2 11
## [3,] 3 12
m <- rbind(x,y)
m
## [,1] [,2] [,3]
## x 1 2 3
## y 10 11 12
#Vector
v1 <- c(1:10); v1; dim(v1); attributes(v1); class(v1); typeof(v1)
## [1] 1 2 3 4 5 6 7 8 9 10
## NULL
## NULL
## [1] "integer"
## [1] "integer"
#Lists
list1 <- list(1:10); list1; dim(list1); attributes(list1); class(list1); typeof(list1)
## [[1]]
## [1] 1 2 3 4 5 6 7 8 9 10
## NULL
## NULL
## [1] "list"
## [1] "list"
list2 <- list("abc", 5.6, TRUE, 10L,c(5,6,7)); list2; dim(list2); attributes(list2); class(list2); typeof(list2)
## [[1]]
## [1] "abc"
##
## [[2]]
## [1] 5.6
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] 10
##
## [[5]]
## [1] 5 6 7
## NULL
## NULL
## [1] "list"
## [1] "list"
list3 <- list(name="abc", age=5.6, is_student=TRUE, weight=10L, marks=c(5,6,7)); list3; dim(list3); attributes(list3);class(list3);typeof(list3)
## $name
## [1] "abc"
##
## $age
## [1] 5.6
##
## $is_student
## [1] TRUE
##
## $weight
## [1] 10
##
## $marks
## [1] 5 6 7
## NULL
## $names
## [1] "name" "age" "is_student" "weight" "marks"
## [1] "list"
## [1] "list"
#Arrays
#Z <- array(data_vector, dim_vector)
a1 <- array(1:24,c(2,3,4)) ; a1; dim(a1); attributes(a1); class(a1); typeof(a1)
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 7 9 11
## [2,] 8 10 12
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 13 15 17
## [2,] 14 16 18
##
## , , 4
##
## [,1] [,2] [,3]
## [1,] 19 21 23
## [2,] 20 22 24
## [1] 2 3 4
## $dim
## [1] 2 3 4
## [1] "array"
## [1] "integer"
a2 <- array(1:6,c(2,3)) ; a2; dim(a2); attributes(a2); class(a2); typeof(a2)
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
## [1] 2 3
## $dim
## [1] 2 3
## [1] "matrix" "array"
## [1] "integer"
a3 <- array(1:3,c(1,3)) ; a3; dim(a3); attributes(a3); class(a3); typeof(a3)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [1] 1 3
## $dim
## [1] 1 3
## [1] "matrix" "array"
## [1] "integer"
a4 <- array(1:2,c(2,1)); a4; dim(a4); attributes(a4); class(a4); typeof(a4)
## [,1]
## [1,] 1
## [2,] 2
## [1] 2 1
## $dim
## [1] 2 1
## [1] "matrix" "array"
## [1] "integer"
#Matrix
m1 <- matrix(1:6,nrow=2,ncol=3); m1; dim(m1); attributes(m1); class(m1); typeof(m1)
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
## [1] 2 3
## $dim
## [1] 2 3
## [1] "matrix" "array"
## [1] "integer"
# Creating a dataframe using data.frame()
name <- c("Ram","Kumar","Leela")
gender <- c("M","M","F")
height <- c(153.3,173.2,170.5)
age <- c(25,27,26)
df <- data.frame(name,gender,height,age)
df
## name gender height age
## 1 Ram M 153.3 25
## 2 Kumar M 173.2 27
## 3 Leela F 170.5 26
# Get structure of the dataframe
str(df)
## 'data.frame': 3 obs. of 4 variables:
## $ name : chr "Ram" "Kumar" "Leela"
## $ gender: chr "M" "M" "F"
## $ height: num 153 173 170
## $ age : num 25 27 26
# Get row names
rownames(df)
## [1] "1" "2" "3"
# Get col names
colnames(df)
## [1] "name" "gender" "height" "age"
# Another way to get row names
row.names(df)
## [1] "1" "2" "3"
# Get a summary of the dataframe
summary(df)
## name gender height age
## Length:3 Length:3 Min. :153.3 Min. :25.0
## Class :character Class :character 1st Qu.:161.9 1st Qu.:25.5
## Mode :character Mode :character Median :170.5 Median :26.0
## Mean :165.7 Mean :26.0
## 3rd Qu.:171.8 3rd Qu.:26.5
## Max. :173.2 Max. :27.0
# Create a subset of the dataframe under some conditions
subset(df,gender=="F")
## name gender height age
## 3 Leela F 170.5 26
#Extracting a column of the dataframe
df$name
## [1] "Ram" "Kumar" "Leela"
mean(df$height)
## [1] 165.6667
# Accessing data from a dataframe
df$name
## [1] "Ram" "Kumar" "Leela"
df[1]
## name
## 1 Ram
## 2 Kumar
## 3 Leela
df[1:3]
## name gender height
## 1 Ram M 153.3
## 2 Kumar M 173.2
## 3 Leela F 170.5
#Add another column to the dataframe
df$city <- c("Delhi","Bombay","Delhi")
df
## name gender height age city
## 1 Ram M 153.3 25 Delhi
## 2 Kumar M 173.2 27 Bombay
## 3 Leela F 170.5 26 Delhi
# Combining two df with same set of columns
dfnew <- rbind(df,df)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)
dfnew <- rbind(dfnew,dfnew)
# Get the first few rows of a dataframe
head(dfnew)
## name gender height age city
## 1 Ram M 153.3 25 Delhi
## 2 Kumar M 173.2 27 Bombay
## 3 Leela F 170.5 26 Delhi
## 4 Ram M 153.3 25 Delhi
## 5 Kumar M 173.2 27 Bombay
## 6 Leela F 170.5 26 Delhi
# Get the last few rows of a dataframe
tail(dfnew)
## name gender height age city
## 43 Ram M 153.3 25 Delhi
## 44 Kumar M 173.2 27 Bombay
## 45 Leela F 170.5 26 Delhi
## 46 Ram M 153.3 25 Delhi
## 47 Kumar M 173.2 27 Bombay
## 48 Leela F 170.5 26 Delhi
#Another example implementation
Col_A <- runif(10,min=10,max=20)
Col_B <- rnorm(10,mean=25,sd=1)
df <- data.frame(Col_A,Col_B)
df
## Col_A Col_B
## 1 10.07390 25.89788
## 2 13.76707 26.69443
## 3 11.69064 24.97166
## 4 10.48133 24.17288
## 5 18.23005 24.54403
## 6 12.62370 26.02923
## 7 15.23046 24.52895
## 8 13.25574 23.62686
## 9 19.62945 25.26378
## 10 11.16252 26.05964
Col_C <- rnorm(10,mean=15,sd=2)
Col_D <- rnorm(10,mean=100,sd=2)
df <- cbind(df, Col_C)
df
## Col_A Col_B Col_C
## 1 10.07390 25.89788 14.26405
## 2 13.76707 26.69443 15.42672
## 3 11.69064 24.97166 12.47615
## 4 10.48133 24.17288 14.93380
## 5 18.23005 24.54403 15.82702
## 6 12.62370 26.02923 16.69077
## 7 15.23046 24.52895 16.31722
## 8 13.25574 23.62686 15.81878
## 9 19.62945 25.26378 15.01939
## 10 11.16252 26.05964 16.27902
df$Col_D <- Col_D
df
## Col_A Col_B Col_C Col_D
## 1 10.07390 25.89788 14.26405 100.98332
## 2 13.76707 26.69443 15.42672 95.75710
## 3 11.69064 24.97166 12.47615 101.54778
## 4 10.48133 24.17288 14.93380 100.58335
## 5 18.23005 24.54403 15.82702 100.91446
## 6 12.62370 26.02923 16.69077 99.45412
## 7 15.23046 24.52895 16.31722 97.73053
## 8 13.25574 23.62686 15.81878 102.67945
## 9 19.62945 25.26378 15.01939 97.71984
## 10 11.16252 26.05964 16.27902 102.57861
# If Else Check
a <- 5
b <- 6
if (a>b) {
print("a is bigger")
} else {
print("a is not bigger")
}
## [1] "a is not bigger"
a <- 5
b <- 5
if (a>b)
{
print("a is bigger")
} else
if(a<b)
{
print("a is smaller")
} else
{
print("a is equal to b")
}
## [1] "a is equal to b"
## Switch construct to check
number <- 3
value <- switch(number,"one","two","three")
print(value)
## [1] "three"
#for (var in expr_1) {expr_2}
#var is the variable name
#expr_1 is a vector with elements
#expr_2 is the expression(s) to be executed
for (i in c(5,7,9,11,13))
{
print(i)
}
## [1] 5
## [1] 7
## [1] 9
## [1] 11
## [1] 13
for (i in 1:5)
{
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
for (i in seq(5))
{
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
# Yet another way to loop - the 'while' loop;
i <- 1
while (i < 10)
{
print(i)
i <- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
# There are situations when under some conditions
# the loop should be terminated. Use 'break' statement for this.
# There are situations where a particular iteration of the
# loop is to be skipped but the next set of iterations
# should continue. Use 'next' statement for this.
# Examples of usage of break and next
i <- 1
for (i in 1:10)
{
#if (i == 5) break
if (i == 5) next
print(i)
i <- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
# Yet another loop method: Use 'repeat'
i<-1
repeat{
if(i>10) {break}
print(i)
#i< i+1
i<- i+1
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
#There are constants stored and readily available. Examples:
LETTERS
## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
# Access letters 5 to 10
for (i in letters[5:10])
{
print(i)
}
## [1] "e"
## [1] "f"
## [1] "g"
## [1] "h"
## [1] "i"
## [1] "j"
# Use seq_along function to create a sequence of letters
for (i in seq_along(letters))
{
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
# Calling a function
x <- 1:100
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 25.75 50.50 50.50 75.25 100.00
seq(5)
## [1] 1 2 3 4 5
seq(from=10,to=15)
## [1] 10 11 12 13 14 15
# Defining our own functions
#Name of the function is myfunction1
#No arguments are passed
#No values are returned
#Definition of myfunction1 is below
myfunction1 <- function()
{
print("Executing myfunction1")
}
# Calling the function
myfunction1()
## [1] "Executing myfunction1"
# A function with arguments x and y
myfunction2 <- function(x,y)
{
print("Executing myfunction1")
print(2*x+y)
}
#Calling with arguments
myfunction2(3,2)
## [1] "Executing myfunction1"
## [1] 8
#Calling with name of the argument
myfunction2(x=3,y=2)
## [1] "Executing myfunction1"
## [1] 8
# create a csv file using Excel and save it in working directory.
readdata <- read.csv('csvfile.csv')
typeof(readdata)
## [1] "list"
print(readdata)
## X Y Z
## 1 3 9 48
## 2 3 15 78
## 3 4 14 74
## 4 2 13 67
## 5 0 20 100
## 6 2 14 72
## 7 3 11 58
## 8 4 18 94
## 9 0 8 40
## 10 3 19 98
## 11 3 14 73
## 12 1 15 76
## 13 0 10 50
## 14 0 11 55
## 15 1 15 76
## 16 4 16 84
## 17 1 13 66
## 18 0 11 55
## 19 4 7 39
## 20 1 12 61
print(paste(typeof(readdata$X),typeof(readdata$Y),typeof(readdata$Z)))
## [1] "integer integer integer"
#write data to a file.
df <- mtcars
write.csv(df,"testdump.csv")