Chapter 11 Writing functions >/h1>

11.1 The [function] command

Functions allow your code to be reused without having to copy and paste. It also allows other users to use your functions to carry out the same computations on their own data or objects.

Learn how to define and use arguments, how to return output from a function and how to specialize your functions in other ways.

11.1.1 Function Creation

Functions are created with the [function()] command. The standard format: functionname <- function(art1, arg2, arg3,…) { do any code in here when the function is called }

# fibonacci sequence
myfib<-function(){
  fib.a<-1
  fib.b<-1
  cat(fib.a,", ",fib.b,", ",sep=" ")
  repeat{
    temp <-fib.a+fib.b
    fib.a<-fib.b
    fib.b<-temp
    cat(fib.b,", ",sep=" ")
    if(fib.b>150){
      cat("break now...")
      break
    }
  }
}
ls()
 [1] "a"                    "b"                    "bar"                  "baz"                 
 [5] "char.mat"             "char.vec"             "counter"              "ctrl"                
 [9] "df.world_ports"       "dia.url"              "diamonds"             "fac.vec"             
[13] "fib.a"                "fib.b"                "flow.jok"             "flow.vat"            
[17] "foo"                  "full_form"            "Full_form"            "geocodes.world_ports"
[21] "html.world_ports"     "i"                    "ice.river"            "in_train"            
[25] "int_plot"             "j"                    "logic.mat"            "logic.vec"           
[29] "loop1.result"         "loop2.result"         "loopvec1"             "loopvec2"            
[33] "mnr_impute"           "mnr_tune"             "mydatafile"           "myfib"               
[37] "myfile"               "myitem"               "mylist"               "mylookup"            
[41] "mymat"                "mynum"                "mynumber"             "mystring"            
[45] "myval"                "myvec"                "new.myvec"            "newobect"            
[49] "num.mat1"             "num.mat2"             "num.vec1"             "num.vec2"            
[53] "opt.arg"              "ordfac.vec"           "prec"                 "predic"              
[57] "ptype"                "quux"                 "qux"                  "result"              
[61] "row.totals"           "row.totals2"          "scat"                 "scat_orig"           
[65] "small_form"           "small_tr_dat"         "somelist"             "temp"                
[69] "test_data"            "test_data3"           "train_data"           "wls"                 
[73] "x"                    "y"                   
myfib()
1 ,  1 , 2 , 3 , 5 , 8 , 13 , 21 , 34 , 55 , 89 , 144 , 233 , break now...

Let’s add a threshold of when to stop the fibonacci sequence

# fibonacci sequence
myfib<-function(threshold){
  fib.a<-1
  fib.b<-1
  cat(fib.a,", ",fib.b,", ",sep=" ")
  repeat{
    temp <-fib.a+fib.b
    fib.a<-fib.b
    fib.b<-temp
    cat(fib.b,", ",sep=" ")
    if(fib.b>threshold){
      cat("break now...")
      break
    }
  }
}
# check the myfib is now part of the workspace
ls()
 [1] "a"                    "b"                    "bar"                  "baz"                 
 [5] "char.mat"             "char.vec"             "counter"              "ctrl"                
 [9] "df.world_ports"       "dia.url"              "diamonds"             "fac.vec"             
[13] "fib.a"                "fib.b"                "flow.jok"             "flow.vat"            
[17] "foo"                  "full_form"            "Full_form"            "geocodes.world_ports"
[21] "html.world_ports"     "i"                    "ice.river"            "in_train"            
[25] "int_plot"             "j"                    "logic.mat"            "logic.vec"           
[29] "loop1.result"         "loop2.result"         "loopvec1"             "loopvec2"            
[33] "mnr_impute"           "mnr_tune"             "mydatafile"           "myfib"               
[37] "myfile"               "myitem"               "mylist"               "mylookup"            
[41] "mymat"                "mynum"                "mynumber"             "mystring"            
[45] "myval"                "myvec"                "new.myvec"            "newobect"            
[49] "num.mat1"             "num.mat2"             "num.vec1"             "num.vec2"            
[53] "opt.arg"              "ordfac.vec"           "prec"                 "predic"              
[57] "ptype"                "quux"                 "qux"                  "result"              
[61] "row.totals"           "row.totals2"          "scat"                 "scat_orig"           
[65] "small_form"           "small_tr_dat"         "somelist"             "temp"                
[69] "test_data"            "test_data3"           "train_data"           "wls"                 
[73] "x"                    "y"                   

Now run myfib with an argument inside:

myfib(2250)
1 ,  1 , 2 , 3 , 5 , 8 , 13 , 21 , 34 , 55 , 89 , 144 , 233 , 377 , 610 , 987 , 1597 , 2584 , break now...

You will note that the sequence now stops once the numbered sequence reached the threshold value.

Now use [return] to return the value of a function

myfib3<-function(threshold){
  fibseq<-c(1,1)
  counter<-2
  repeat{
    fibseq<-c(fibseq,fibseq[counter-1]+fibseq[counter])
    # increment counter
    counter <- counter +1
    # test for break condition
    if(fibseq[counter]>threshold){
      cat("break now...")
      break
    }
  }
  return(fibseq)
}
# now call the function
myfib3(500)
break now... [1]   1   1   2   3   5   8  13  21  34  55  89 144 233 377 610

YOu can assign the returnobject to a variable, suc a sfoo and foo is now just another R object in the global environment that you can manipulate.

11.1.2 Returning Results

If there is no return statement inside a function, it will end when the last line in the body code has been run. At this point, it will return the most recently assigned or created object in the function.

If nothing is created, the function returns NULL.

dummy1<-function(){
  aa <-2.5
  bb<-"string me along"
  cc<-"strem 'em up"
  dd<-4:8
}
dummy2<-function(){
  aa <-2.5
  bb<-"string me along"
  cc<-"strem 'em up"
  dd<-4:8
  return(dd)
}
# assign dummy1 output to foo
foo<-dummy1()
# assign dummy2 output to bar
bar<-dummy2()
# print out foo and bar
foo
[1] 4 5 6 7 8
bar
[1] 4 5 6 7 8

Notice that both bar and foo have the same contents…which is dd, which happens to be also the LAST object created by dummy1.

11.2 Arguments

Learn how R evaluates arguments. It takes a hammer and pounds the weakest one. You see how to write functions that hav default argument values. You learn how to make functions handle missing argument values, and how to pass extra arguments into an internal function all with ellipsis.

11.2.1 Lazy Evaluation

Lazy evaluation means that arguments are only evaluated when needed This is a sample function to search for objects in a supplied matrix. If no matrices are in the suplied list or if no appropriate matrices are present, the function should return a string.

# x is the matrix
# mat is the post multipling matrix
# str1 str2 is the string to return if no suitable mmembers
multiples1 <-function(x,mat,str1,str2)
  {
    # check if a matrix has been supplied 
    matrix.flags <-sapply(x,FUN=is.matrix)
      if(!any(matrix.flags))
        {
        return(str1)
        }
    indexes <-which(matrix.flags)
    counter <-0
    result <-list()
    for(i in indexes)
      {
      temp <-x[[i]]
        {
        if(ncol(temp)==nrow(mat))
          {
          counter<-counter+1
          result[[counter]]<-temp%*%mat
          }
        }
      if(counter==0){
      # matrix has no members
      return(str2)
      }else{
          # valid - so return the result.
          return(result)
      }
    }
}

Now let us test the function

# setup the lists
foo<-list(matrix(1:4,2,2),"not a matrix","definitely not a matrix",matrix(1:8,2,4),matrix(1:8,4,2))
bar <-list(1:4,"not a matrix",c(F,T,T,T),"??")
baz<-list(1:4,"not a matrix",c(F,T,T,T),"??",matrix(1:8,2,4))
foo
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4

[[2]]
[1] "not a matrix"

[[3]]
[1] "definitely not a matrix"

[[4]]
     [,1] [,2] [,3] [,4]
[1,]    1    3    5    7
[2,]    2    4    6    8

[[5]]
     [,1] [,2]
[1,]    1    5
[2,]    2    6
[3,]    3    7
[4,]    4    8
bar
[[1]]
[1] 1 2 3 4

[[2]]
[1] "not a matrix"

[[3]]
[1] FALSE  TRUE  TRUE  TRUE

[[4]]
[1] "??"
baz
[[1]]
[1] 1 2 3 4

[[2]]
[1] "not a matrix"

[[3]]
[1] FALSE  TRUE  TRUE  TRUE

[[4]]
[1] "??"

[[5]]
     [,1] [,2] [,3] [,4]
[1,]    1    3    5    7
[2,]    2    4    6    8
# now try out the function multiples1
multiples1(x=foo,mat=diag(2),str1="no matrices in 'x'",str2="matrices in 'x' but none of appropriate dimensions given 'mat'")
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4

NOte: i am still confused. Why only one matrix for foo? As to the author’s point of lazy evaluation, notice that even if we did not specify str1 and str2, since it is not needed when x=foo, there will be no error message.

multiples1(x=foo,mat=diag(2))
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4

However, if we set x=bar, since there is no matrix, R will need str1 and str2, now, there will be an error message

multiples1(x=bar,mat=diag(2))
Error in multiples1(x = bar, mat = diag(2)) : 
  argument "str1" is missing, with no default

11.2.2 Setting Defaults

Defaults are default values in arguments so even if you forget to specify them, a value will be assigned. Here is a re-write of the code to take in default values:

# x is the matrix
# mat is the post multipling matrix
# str1 str2 is the string to return if no suitable mmembers
# now we supply default values to the arguments
multiples2 <-function(x=baz,mat=diag(2),str1="no matrices in 'x'",str2="matrices in 'x' but none of appropriate dimensions given 'mat'")
  {
    # check if a matrix has been supplied 
    matrix.flags <-sapply(x,FUN=is.matrix)
      if(!any(matrix.flags))
        {
        return(str1)
        }
    indexes <-which(matrix.flags)
    counter <- 0
    result <-list()
    for(i in indexes)
      {
      temp <-x[[i]]
        {
        if(ncol(temp)==nrow(mat))
          {
          counter<-counter+1
          result[[counter]]<-temp%*%mat
          }
        }
      if(counter==0){
      # matrix has no members
      return(str2)
      }else{
          # valid - so return the result.
          return(result)
      }
    }
}
# now test it
multiples2()
[1] "matrices in 'x' but none of appropriate dimensions given 'mat'"
multiples2(x=foo)
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4
multiples2(str2="fck off")
[1] "fck off"

11.2.3 Checking for Missing Arguments

The [missing] function checks the arguments of a function to see if all required arguments have been supplied. It takes an argument tag and returns a single logical value of TRUE if the specified argument isn’t found.

# x is the matrix
# mat is the post multipling matrix
# str1 str2 is the string to return if no suitable mmembers
# now we supply default values to the arguments
multiples3 <-function(x,mat,str1,str2){
    # check if a matrix has been supplied 
    matrix.flags <-sapply(x,FUN=is.matrix)
      if(!any(matrix.flags))
        # this is where we check for missing values using the missing function
        if(missing(str1)){
          return("str1 was missing so we used this instead")
        } else{
        return(str1)
        }
    indexes <-which(matrix.flags)
    counter <- 0
    result <-list()
    for(i in indexes)
      {
      temp <-x[[i]]
        {
        if(ncol(temp)==nrow(mat))
          {
          counter<-counter+1
          result[[counter]]<-temp%*%mat
          }
        }
      if(counter==0){
      # matrix has no members
        # check if str2 is missing
        if(missing(str2)){
          return("str2 was missing so we used this")
        } else {
              return(str2)
      }
      }else{
          # valid - so return the result.
          return(result)
      }
    }
    }
    
# now test it
multiples3(x=bar,diag(2))
[1] "str1 was missing so we used this instead"
multiples3(x=foo,diag(2))
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4
multiples3(x=baz,diag(2))
[1] "str2 was missing so we used this"

11.2.4 Dealing with Ellipsis

Ellipsis allows you to pass in extra arguments without having to first define them in the argument list.

myfibplot<-function(threshold,plotit=TRUE,...){
  fibseq<-c(1,1)
  counter<-2
  repeat{
    fibseq<-c(fibseq,fibseq[counter-1]+fibseq[counter])
    # increment counter
    counter <- counter +1
    # test for break condition
    if(fibseq[counter]>threshold){
       break
    }
  }
if(plotit){
  plot(1:length(fibseq),fibseq,...)
} else {
  return(fibseq)
}
}
# now call the function
myfibplot(1500)

Now here is where the ellipsis come in We can pass additional parameters to the plot command

myfibplot(1500,type="b",pch=4,lty=2,main="Terms of Fibonacci Sequence",ylab="Fibonacci number", xlab="Term(n)")

You might want to unpack the arguments passed to it via the x<- list(…)

11.3 Specialized Functions

11.3.1 Helper Function

A helper funtion is a general term used to describe functions written and used specifically to facilitate the computations carried out by another function. They can be defined internally or externally.

A sample of externally defined helper function here:

multiples_helper_ext<- function(x,matrix.flags,mat){
  indexes <- which(matrix.flags)
  counter<-0
  result <- list()
  for(i in indexes){
    temp <-x[[i]]
    if(ncol(temp)==nrow(mat)){
      counter<-counter+1
      result[[counter]] <-temp%*%mat
      
    }
  }
  return(list(result,counter))
}
multiples4<-function(x,mat,str1="no valid matrix", str2="no valid member"){
  matrix.flags<-sapply(x,FUN=is.matrix)
  if(!any(matrix.flags)){
    return(str1)
  }
  
  helper.call <- multiples_helper_ext(x,matrix.flags, mat)
  result <-helper.call[[1]]
  counter <-helper.call[[2]]
  
  if(counter==0){
    return(str2)
    
  } else {
    return(result)
    
  }
}
# now we call the function
multiples4(x=foo,mat=diag(2))
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4

[[2]]
     [,1] [,2]
[1,]    1    5
[2,]    2    6
[3,]    3    7
[4,]    4    8

For internally defined helper functions, they are defined within the lexical environment of the function that calls it.

multiples5<-function(x,mat,str1="no valid matrix", str2="no valid member"){
  matrix.flags<-sapply(x,FUN=is.matrix)
  if(!any(matrix.flags)){
    return(str1)
  }
  
  multiples_helper_int<- function(x,matrix.flags,mat){
  indexes <- which(matrix.flags)
  counter<-0
  result <- list()
  for(i in indexes){
    temp <-x[[i]]
    if(ncol(temp)==nrow(mat)){
      counter<-counter+1
      result[[counter]] <-temp%*%mat
      
    }
  }
  return(list(result,counter))
  }
  
  
  helper.call <- multiples_helper_int(x,matrix.flags, mat)
  result <-helper.call[[1]]
  counter <-helper.call[[2]]
  
  if(counter==0){
    return(str2)
    
  } else {
    return(result)
    
  }
}
# now we call the function
multiples5(x=foo,mat=diag(2))
[[1]]
     [,1] [,2]
[1,]    1    3
[2,]    2    4

[[2]]
     [,1] [,2]
[1,]    1    5
[2,]    2    6
[3,]    3    7
[4,]    4    8

11.3.2 Disposal Functions

Disposable (anonymous) functions allow a simple one line task that you define for use in a single instance without explicitly creating a new object in your global environment.

foo <-matrix(c(2,3,3,4,2,4,7,3,3,6,7,2),3,4)
foo
     [,1] [,2] [,3] [,4]
[1,]    2    4    7    6
[2,]    3    2    3    7
[3,]    3    4    3    2
# disposable function using apply
apply(foo,MARGIN=2,FUN=function(x){sort(rep(x,2))})
     [,1] [,2] [,3] [,4]
[1,]    2    2    3    2
[2,]    2    2    3    2
[3,]    3    4    3    6
[4,]    3    4    3    6
[5,]    3    4    7    7
[6,]    3    4    7    7

11.3.3 Recursive Functions

Recursive functions call themselves.

myfibrec <- function(n){
  if(n==1||n==2){
    return(1)
    
  }else {
    return(myfibrec(n-1)+myfibrec(n-2))
  }
}
# now call it
myfibrec(15)
[1] 610
