Looks at how you can control the flow and order of execution in your code.
An IF statement runs a block of code ONLY when a certain condition is true.
The condition is enclosed in parenthesis after the IF The condition must yield a single logical value (TRUE or FALSE) If condition is not satisfied, R skips the code inside the bracket and does nothing or continues to execute any code after the closing bracket.
# setup variables
a <-3
mynumber <- 4
a
[1] 3
mynumber
[1] 4
# test condtions
if(a<=mynumber)
{ a<-a^2}
a
[1] 9
# setup variables
myvec <-c(2.73,5.4,2.15,5.29,1.36,2.16,1.41,6.97,7.99,9.52)
myvec
[1] 2.73 5.40 2.15 5.29 1.36 2.16 1.41 6.97 7.99 9.52
mymat <-matrix(c(2,0,1,2,3,0,3,0,1,1),5,2)
mymat
[,1] [,2]
[1,] 2 0
[2,] 0 3
[3,] 1 0
[4,] 2 1
[5,] 3 1
# test the if statement
if(any(myvec-1)>9||matrix(myvec,2,5)[2,1]<=6)
{
cat("contiion satisfied -- \n")
new.myvec<-myvec
new.myvec[seq(1,9,2)] <-NA
mylist <- list(aa=new.myvec,bb=mymat+0.5)
cat("--a list with",length(mylist),"members now exists.")
}
coercing argument of type 'double' to logical
contiion satisfied --
--a list with 2 members now exists.
mylist
$aa
[1] NA 5.40 NA 5.29 NA 2.16 NA 6.97 NA 9.52
$bb
[,1] [,2]
[1,] 2.5 0.5
[2,] 0.5 3.5
[3,] 1.5 0.5
[4,] 2.5 1.5
[5,] 3.5 1.5
If you want something to execute if a defined condition is FALSE you can add an [else] decalaration.
# setup variables
a <-3
mynumber <-4
a
[1] 3
mynumber
[1] 4
# test if else
if(a<=mynumber)
{
cat("Condition was", a<=mynumber)
a<-a^2
} else
{
cat("Condition was", a<=mynumber)
a<-a-3.5
}
Condition was TRUE
# see what a is now
a
[1] 9
Since [if] statements can only check on a single logical value, you need [ifelse] to perform vector oriented check in relatively simple cases.
# variables
x <-5
y <--5:5
x
[1] 5
y
[1] -5 -4 -3 -2 -1 0 1 2 3 4 5
# if we were to divide x/y one of them would come up to inf due to divide by zero
y==0
[1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
# using ifelse
result <- ifelse(test=y==0,yes=NA,no=x/y)
result
[1] -1.000000 -1.250000 -1.666667 -2.500000 -5.000000 NA 5.000000 2.500000 1.666667 1.250000 1.000000
YOu can next mulitple IF statement inside other IF statements
# set up variable
a<-6
mynumber <-4
if(a<=mynumber)
{
cat("First Condition was TRUE")
a<-a^2
if(mynumber>3)
{
cat("Second Condition was true")
b<-seq(1,a,length=mynumber)
} else
{
cat("Second Condition was FALSE")
b<-a*mynumber
}
} else
{
cat("First Condition was False\n")
a<-a-3.5
if(mynumber>=4)
{
cat("Second condition was TRUE")
b<-a^(3-mynumber)
}else
{
cat("Second Condition was False")
b<-rep(a+mynumber,times=3)
}
}
First Condition was False
Second condition was TRUE
# see what a is now
a
[1] 2.5
b
[1] 0.4
Similar to case statement
# set up variable
mystring<-"Lisa"
foo <-switch(EXPR=mystring, Homer=12,Marge=34,Bart=56,Lisa=78,Maggie=90,NA)
foo
[1] 78
For integers, the Switch works using the number to specify the position
# variable
mynum <- 3
foo <-switch(mynum,12,34,56,78,NA)
foo
[1] 56
For Loop repeats code as it works its way through a vector While loop simply repeats code until a specific condition evaluates to FALSE
General form: for(loopindex in loopvector){} do any code here
loopindex is a placeholder that represents an element in the loopvector. It starts off fromthe first element in the vector and moves to the next element wiht each loop repetition
for(myitem in 5:7){
cat("--braced area begins -- \n")
cat("the current item is",myitem,"\n")
cat("--braced area ends--\n\n")
}
--braced area begins --
the current item is 5
--braced area ends--
--braced area begins --
the current item is 6
--braced area ends--
--braced area begins --
the current item is 7
--braced area ends--
# you can use loops to manipulate objects that exists outside the loop
counter <-0
for(myitem in 5:7){
counter<-counter+1
cat("--braced area begins -- \n")
cat("the current item in run",counter, "is ",myitem,"\n")
cat("--braced area ends--\n\n")
}
--braced area begins --
the current item in run 1 is 5
--braced area ends--
--braced area begins --
the current item in run 2 is 6
--braced area ends--
--braced area begins --
the current item in run 3 is 7
--braced area ends--
Looping via Index or Value
myvec <-c(0.4,1.1,0.34,0.55)
for(i in myvec){
print(2*i)
}
[1] 0.8
[1] 2.2
[1] 0.68
[1] 1.1
# example
myvec <-c(0.4,1.1,0.34,0.55)
for(i in 1:length(myvec)){
print(2*myvec[i])
}
[1] 0.8
[1] 2.2
[1] 0.68
[1] 1.1
Nesting For loops You can also nest for loops just like [if] statements When a loop is nested, the inner loop is executed in full first before the outer loop loopindex is incremented, at which point the inner loop is executed all over again.
loopvec1 <-5:7
loopvec1
[1] 5 6 7
loopvec2 <- 9:6
loopvec2
[1] 9 8 7 6
foo <-matrix(NA,length(loopvec1),length(loopvec2))
foo
[,1] [,2] [,3] [,4]
[1,] NA NA NA NA
[2,] NA NA NA NA
[3,] NA NA NA NA
# The following nested loop fills foo wiht the result of mulitplying each integer in loopvec1 by each integer in loopvec2
for(i in 1:length(loopvec1)){
for(j in 1:length(loopvec2)){
foo[i,j]<-loopvec1[i]*loopvec2[j]
print(loopvec1[i])
print(loopvec2[j])
print(foo[i,j])
}
}
[1] 5
[1] 9
[1] 45
[1] 5
[1] 8
[1] 40
[1] 5
[1] 7
[1] 35
[1] 5
[1] 6
[1] 30
[1] 6
[1] 9
[1] 54
[1] 6
[1] 8
[1] 48
[1] 6
[1] 7
[1] 42
[1] 6
[1] 6
[1] 36
[1] 7
[1] 9
[1] 63
[1] 7
[1] 8
[1] 56
[1] 7
[1] 7
[1] 49
[1] 7
[1] 6
[1] 42
loopvec1
[1] 5 6 7
loopvec2
[1] 9 8 7 6
foo<-matrix(NA, length(loopvec1),length(loopvec2))
foo
[,1] [,2] [,3] [,4]
[1,] NA NA NA NA
[2,] NA NA NA NA
[3,] NA NA NA NA
for(i in 1:length(loopvec1)){
for(j in 1:i){
foo[i,j] <-loopvec1[i]+loopvec2[j]
cat("i=",i,"j=",j,loopvec1[i],loopvec2[j],foo[i,j],"\n")
}
}
i= 1 j= 1 5 9 14
i= 2 j= 1 6 9 15
i= 2 j= 2 6 8 14
i= 3 j= 1 7 9 16
i= 3 j= 2 7 8 15
i= 3 j= 3 7 7 14
foo
[,1] [,2] [,3] [,4]
[1,] 14 NA NA NA
[2,] 15 14 NA NA
[3,] 16 15 14 NA
Loops are computationally costly in R. YOu should always try to do this in vector-oriented fashion first.
Unlike [for loops] where you need to know the exact number of times to do the loop, [while loops] can execute while a condition is true.
The general form: while(loopcondition) { do any code in here }
# a simple example
myval <- 5
while(myval<10){
myval <-myval+1
cat("\n'myval' is now",myval, "\n")
cat("'mycondition' is now", myval<10, "\n")
}
'myval' is now 6
'mycondition' is now TRUE
'myval' is now 7
'mycondition' is now TRUE
'myval' is now 8
'mycondition' is now TRUE
'myval' is now 9
'mycondition' is now TRUE
'myval' is now 10
'mycondition' is now FALSE
The [apply] function is one of the most basic form of implicit looping. It takes a funtion and applies it to each margin of an array
# you could use sum to get the totals, but you get the entire totals.
foo <-matrix(1:12,4,3)
foo
sum(foo)
# to get row totals
row.totals <-rep(NA,times=nrow(foo))
for(i in 1:nrow(foo)){
row.totals[i]<-sum(foo[i,])
}
row.totals
# same row totals but this time using apply function
row.totals2<-apply(X=foo,MARGIN=1,FUN=sum)
row.totals2
[1] 15 18 21 24
# to sum the columns change margin to 2
row.totals2<-apply(X=foo,MARGIN=2,FUN=sum)
row.totals2
tapply is a similar function. It performs operations on subsets of the object of interest, where theose subsets are defined in terms of one or more factor vectors.
dia.url<-"https://www.amstat.org/publications/jse/v9n2/4cdata.txt"
diamonds <-read.table(dia.url)
names(diamonds) <-c("Carat","Color","Clarity","Cert","Price")
diamonds[1:5,]
To add up the total value of the diamonds present for the full data set but separated according to Color, you can use Tapply like this:
tapply(diamonds$Price,INDEX=diamonds$Color,FUN=sum)
D E F G H I
113598 242349 392485 287702 302866 207001
Lapply is similar to apply but applies to lists.
baz<-list(aa=c(3.4,1),bb=matrix(1:4,2,2),cc=matrix(c(T,T,F,T,F,F),3,2),
dd="string here",
ee=matrix(c("red","blue","yellow")))
baz
$aa
[1] 3.4 1.0
$bb
[,1] [,2]
[1,] 1 3
[2,] 2 4
$cc
[,1] [,2]
[1,] TRUE TRUE
[2,] TRUE FALSE
[3,] FALSE FALSE
$dd
[1] "string here"
$ee
[,1]
[1,] "red"
[2,] "blue"
[3,] "yellow"
lapply(baz,FUN=is.matrix)
$aa
[1] FALSE
$bb
[1] TRUE
$cc
[1] TRUE
$dd
[1] FALSE
$ee
[1] TRUE
The returned value is also a list but in an array form. To return as a vector, use the sapply
sapply(baz,FUN=is.matrix)
aa bb cc dd ee
FALSE TRUE TRUE FALSE TRUE
You can pass additional functions to the apply function
# added sorting
foo
[,1] [,2] [,3]
[1,] 1 5 9
[2,] 2 6 10
[3,] 3 7 11
[4,] 4 8 12
apply(foo,1,sort,decreasing=TRUE)
[,1] [,2] [,3] [,4]
[1,] 9 10 11 12
[2,] 5 6 7 8
[3,] 1 2 3 4
To premeptively break out of a loop you can declare [break]
foo <-5
bar <-c(2,3,1.1,4,0,4.1,3)
loop1.result<-rep(NA,length(bar))
loop1.result
[1] NA NA NA NA NA NA NA
for(i in 1:length(bar)){
temp<-foo/bar[i]
if(is.finite(temp)){
loop1.result[i]<-temp
} else {
break
}
}
loop1.result
[1] 2.500000 1.666667 4.545455 1.250000 NA NA NA
Using break is a drastic solution mostly for troubleshooting. You can use [Next]
loop2.result<-rep(NA,length(bar))
loop2.result
[1] NA NA NA NA NA NA NA
for(i in 1:length(bar)){
if(bar[i]==0){
next()
}
loop2.result[i]<-foo/bar[i]
}
loop2.result
[1] 2.500000 1.666667 4.545455 1.250000 NA 1.219512 1.666667
Another way to do an operation over and over again is to use [repeat] The general format:
repeat{ do any code here. }
fib.a <-1
fib.b<-1
repeat{
temp <-fib.a+fib.b
fib.a<-fib.b
fib.b <-temp
cat(fib.b,", ",sep=" ")
if(fib.b>150){
cat("breaknow..\n")
break
}
}
2 , 3 , 5 , 8 , 13 , 21 , 34 , 55 , 89 , 144 , 233 , breaknow..