1.\(apply(obejct,margin,function)\)

  1. 4*4 matrix
M <- matrix(seq(1,16), 4, 4)
M
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
  1. sum with rows
apply(M, 1, sum)
## [1] 28 32 36 40
  1. internal vectoriz,since we take** margin=2**,the function takes in column as y,thus \(y*y\) will be calling vectorization automatically as \(\vec{y} \cdot \vec{y}\)
apply(M, 2, function(y)y*y)
##      [,1] [,2] [,3] [,4]
## [1,]    1   25   81  169
## [2,]    4   36  100  196
## [3,]    9   49  121  225
## [4,]   16   64  144  256
  1. both margin 1 and margin 2, means every element in the matrix
apply(M, c(1,2), function(t)t^2) 
##      [,1] [,2] [,3] [,4]
## [1,]    1   25   81  169
## [2,]    4   36  100  196
## [3,]    9   49  121  225
## [4,]   16   64  144  256

2.\(lapply()\)

\(l+apply\) here \(l\) means list, take in a list, and the return object of this function is also a list in R:

x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE))
x
## $a
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $beta
## [1]  0.04978707  0.13533528  0.36787944  1.00000000  2.71828183  7.38905610
## [7] 20.08553692
## 
## $logic
## [1]  TRUE FALSE FALSE
lapply(x, FUN = mean) #sum within each list
## $a
## [1] 5.5
## 
## $beta
## [1] 4.535125
## 
## $logic
## [1] 0.3333333
lapply(x, quantile, probs = 1:3/4)
## $a
##  25%  50%  75% 
## 3.25 5.50 7.75 
## 
## $beta
##       25%       50%       75% 
## 0.2516074 1.0000000 5.0536690 
## 
## $logic
## 25% 50% 75% 
## 0.0 0.0 0.5

3.\(sapply()\)

used most because it take in a list and return a vector or a matrix

sapply(x, quantile)
##          a        beta logic
## 0%    1.00  0.04978707   0.0
## 25%   3.25  0.25160736   0.0
## 50%   5.50  1.00000000   0.0
## 75%   7.75  5.05366896   0.5
## 100% 10.00 20.08553692   1.0

same as

unlist(lapply(x,quantile))
##        a.0%       a.25%       a.50%       a.75%      a.100%     beta.0% 
##  1.00000000  3.25000000  5.50000000  7.75000000 10.00000000  0.04978707 
##    beta.25%    beta.50%    beta.75%   beta.100%    logic.0%   logic.25% 
##  0.25160736  1.00000000  5.05366896 20.08553692  0.00000000  0.00000000 
##   logic.50%   logic.75%  logic.100% 
##  0.00000000  0.50000000  1.00000000

4.\(vapply()\)

has a pre-specified type of return value, so it can be safer (and sometimes faster) to use than \(sapply()\)

i39 <- sapply(3:9, seq) # list of vectors seq(3) seq(4) seq(5) seq(6)...
i39
## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] 1 2 3 4
## 
## [[3]]
## [1] 1 2 3 4 5
## 
## [[4]]
## [1] 1 2 3 4 5 6
## 
## [[5]]
## [1] 1 2 3 4 5 6 7
## 
## [[6]]
## [1] 1 2 3 4 5 6 7 8
## 
## [[7]]
## [1] 1 2 3 4 5 6 7 8 9
sapply(i39, fivenum)#5 quantile
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,]  1.0  1.0    1  1.0  1.0  1.0    1
## [2,]  1.5  1.5    2  2.0  2.5  2.5    3
## [3,]  2.0  2.5    3  3.5  4.0  4.5    5
## [4,]  2.5  3.5    4  5.0  5.5  6.5    7
## [5,]  3.0  4.0    5  6.0  7.0  8.0    9
vapply(i39, fivenum,c(0,0,0,0,0))
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,]  1.0  1.0    1  1.0  1.0  1.0    1
## [2,]  1.5  1.5    2  2.0  2.5  2.5    3
## [3,]  2.0  2.5    3  3.5  4.0  4.5    5
## [4,]  2.5  3.5    4  5.0  5.5  6.5    7
## [5,]  3.0  4.0    5  6.0  7.0  8.0    9
vapply(i39, fivenum,c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))  #vapply can specify the return type,here 0 means numeric type
##         [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## Min.     1.0  1.0    1  1.0  1.0  1.0    1
## 1st Qu.  1.5  1.5    2  2.0  2.5  2.5    3
## Median   2.0  2.5    3  3.5  4.0  4.5    5
## 3rd Qu.  2.5  3.5    4  5.0  5.5  6.5    7
## Max.     3.0  4.0    5  6.0  7.0  8.0    9

\(fivenum()\) is useful for a boxplot or descriptive statistics

5. a better example of vapply

structure returns the given object with further attributes set.

e<-structure(1:6, dim = 2:3)#a matrix
e
##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6
v <- structure(10*(5:8), names = LETTERS[1:4])
v
##  A  B  C  D 
## 50 60 70 80

the function repeat x for 3 times and then calculate the outer prodect of x and y,which could be a matrix

f2 <- function(x, y) outer(rep(x, length.out = 3), y)

\(1*(3:1)\)
\(2*(3:1)\)
\(3*(3:1)\)

f2(1:3,3:1)
##      [,1] [,2] [,3]
## [1,]    3    2    1
## [2,]    6    4    2
## [3,]    9    6    3

below is a little tricky.
Remember that f2() takes in x and repeat is three times and then outer product it with y? here sapply takes v in, first is (A,50), repeat it 3 times and get (50,50,50), then outer produce it with y,which is (2,4,6,8,10).
Then takes in (B,60), do the same thing, and so on and so forth.

a2 <- sapply(v, f2, y = 2*(1:5))#return a matrix
a2
##         A   B   C   D
##  [1,] 100 120 140 160
##  [2,] 100 120 140 160
##  [3,] 100 120 140 160
##  [4,] 200 240 280 320
##  [5,] 200 240 280 320
##  [6,] 200 240 280 320
##  [7,] 300 360 420 480
##  [8,] 300 360 420 480
##  [9,] 300 360 420 480
## [10,] 400 480 560 640
## [11,] 400 480 560 640
## [12,] 400 480 560 640
## [13,] 500 600 700 800
## [14,] 500 600 700 800
## [15,] 500 600 700 800
a3 <- sapply(v, f2, y = 2*(1:5),simplify = "array")#simplify = "array", return an array(a cube)
a3
## , , A
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  100  200  300  400  500
## [2,]  100  200  300  400  500
## [3,]  100  200  300  400  500
## 
## , , B
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  120  240  360  480  600
## [2,]  120  240  360  480  600
## [3,]  120  240  360  480  600
## 
## , , C
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  140  280  420  560  700
## [2,]  140  280  420  560  700
## [3,]  140  280  420  560  700
## 
## , , D
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  160  320  480  640  800
## [2,]  160  320  480  640  800
## [3,]  160  320  480  640  800
a4 <- vapply(v, f2, y = 2*(1:5),FUN.VALUE = outer(1:3, 1:5))
outer(1:3, 1:5)#must manually specify the return type for each element in v that applied to vapply
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    2    4    6    8   10
## [3,]    3    6    9   12   15
a4
## , , A
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  100  200  300  400  500
## [2,]  100  200  300  400  500
## [3,]  100  200  300  400  500
## 
## , , B
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  120  240  360  480  600
## [2,]  120  240  360  480  600
## [3,]  120  240  360  480  600
## 
## , , C
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  140  280  420  560  700
## [2,]  140  280  420  560  700
## [3,]  140  280  420  560  700
## 
## , , D
## 
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  160  320  480  640  800
## [2,]  160  320  480  640  800
## [3,]  160  320  480  640  800

6.\(tapply()\)

\(tapply()\) is used to apply the function to different factor variable,mostly used when calcaluting different groups.

x <- 1:20
y <- factor(rep(letters[1:5], each = 4))

here the repeat is a little different
if \(rep(1:2,lengh.out=3)\),the outcome is \(1,2,1,2,1,2\)
if \(rep(1:2,each=3)\),it will repeat each element first,so the outcome will be \(1,1,1,2,2,2\)

looks familiar like a pivot table in Excel

cbind(x,y)
##        x y
##  [1,]  1 1
##  [2,]  2 1
##  [3,]  3 1
##  [4,]  4 1
##  [5,]  5 2
##  [6,]  6 2
##  [7,]  7 2
##  [8,]  8 2
##  [9,]  9 3
## [10,] 10 3
## [11,] 11 3
## [12,] 12 3
## [13,] 13 4
## [14,] 14 4
## [15,] 15 4
## [16,] 16 4
## [17,] 17 5
## [18,] 18 5
## [19,] 19 5
## [20,] 20 5
tapply(x, y, sum)
##  a  b  c  d  e 
## 10 26 42 58 74

5.aggreate()

the logic is split-apply-combine

NA will be omitted

testDF <- data.frame(v1 =c(1,3,5,7,8,3,5,NA,4,5,7,9),v2=c(11,33,55,77,88,33,55,NA,44,55,77,99))
by1 <- c("red", "blue", 1, 2, NA, "big", 1, 2, "red", 1, NA, 12)
by2 <- c("wet", "dry", 99, 95, NA, "damp", 95, 99, "red", 99, NA, NA)
cbind(testDF,by1,by2)
##    v1 v2  by1  by2
## 1   1 11  red  wet
## 2   3 33 blue  dry
## 3   5 55    1   99
## 4   7 77    2   95
## 5   8 88 <NA> <NA>
## 6   3 33  big damp
## 7   5 55    1   95
## 8  NA NA    2   99
## 9   4 44  red  red
## 10  5 55    1   99
## 11  7 77 <NA> <NA>
## 12  9 99   12 <NA>
aggregate(x = testDF, by = list(by1, by2), FUN = "mean")
##   Group.1 Group.2 v1 v2
## 1       1      95  5 55
## 2       2      95  7 77
## 3       1      99  5 55
## 4       2      99 NA NA
## 5     big    damp  3 33
## 6    blue     dry  3 33
## 7     red     red  4 44
## 8     red     wet  1 11

list is a list of grouping elements, each as long as the variables in the data frame x, and can be represented as combinations of different grouping standards.

if you want to keep NAs as a group

fby1 <- factor(by1, exclude = "")
fby2 <- factor(by2, exclude = "")
aggregate(x = testDF, by = list(fby1, fby2), FUN = "mean")
##    Group.1 Group.2  v1   v2
## 1        1      95 5.0 55.0
## 2        2      95 7.0 77.0
## 3        1      99 5.0 55.0
## 4        2      99  NA   NA
## 5      big    damp 3.0 33.0
## 6     blue     dry 3.0 33.0
## 7      red     red 4.0 44.0
## 8      red     wet 1.0 11.0
## 9       12    <NA> 9.0 99.0
## 10    <NA>    <NA> 7.5 82.5

another example,just seems like pivot table in excel

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
aggregate(. ~ Species, data = iris, mean)
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026

\(mapply()\) \(a multi-variate version of sapply()\)

mapply(rep, times = 1:4, x = 4:1)
## [[1]]
## [1] 4
## 
## [[2]]
## [1] 3 3
## 
## [[3]]
## [1] 2 2 2
## 
## [[4]]
## [1] 1 1 1 1
mapply(rep, times = 1:4, MoreArgs = list(x = 42))
## [[1]]
## [1] 42
## 
## [[2]]
## [1] 42 42
## 
## [[3]]
## [1] 42 42 42
## 
## [[4]]
## [1] 42 42 42 42
mapply(function(x, y) seq_len(x) + y,x=c(a=1,b=2,c=3), y=c(A = 10, B = 0, C = -10))#??
## $a
## [1] 11
## 
## $b
## [1] 1 2
## 
## $c
## [1] -9 -8 -7

7. \(Vectorize\)

Most powerful, make a function vectorization-able,Vectorize creates a function wrapper that vectorizes the action of its argument FUN.

vrep <- Vectorize(rep.int)
vrep(times = 1:4, x = 4:1) 
## [[1]]
## [1] 4
## 
## [[2]]
## [1] 3 3
## 
## [[3]]
## [1] 2 2 2
## 
## [[4]]
## [1] 1 1 1 1
# 4
# 3 3
# 2 2 2
# 1 1 1 1


#what is the following???
SS <- function(Vm, K, resp, conc) {
    pred <- (Vm * conc)/(K + conc)
    sum((resp - pred)^2 / pred)
}
vSS <- Vectorize(SS, c("Vm", "K"))
Treated <- subset(Puromycin, state == "treated")

Vm <- seq(140, 310, length.out = 50)
K <- seq(0, 0.15, length.out = 40)
SSvals <- outer(Vm, K, vSS, Treated$rate, Treated$conc)
contour(Vm, K, SSvals, levels = (1:10)^2, xlab = "Vm", ylab = "K")

8. Bonus of system function and polynomial equations

Sys.Date()
## [1] "2016-03-28"
Sys.info()
##        sysname        release        version       nodename        machine 
##      "Windows"      "8.1 x64"   "build 9600"    "SUNNYBLUE"       "x86-64" 
##          login           user effective_user 
##       "Steven"       "Steven"       "Steven"
sessionInfo()
## R version 3.2.4 (2016-03-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 8.1 x64 (build 9600)
## 
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936 
## [2] LC_CTYPE=Chinese (Simplified)_China.936   
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C                              
## [5] LC_TIME=Chinese (Simplified)_China.936    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] magrittr_1.5    tools_3.2.4     htmltools_0.3   yaml_2.1.13    
##  [5] stringi_1.0-1   rmarkdown_0.9.5 knitr_1.12.3    stringr_1.0.0  
##  [9] digest_0.6.9    evaluate_0.8.3
s1<-Sys.time()
vrep <- Vectorize(rep.int)#vectorize the function
vrep(1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4
vrep(times = 1:4, x = 4:1) 
## [[1]]
## [1] 4
## 
## [[2]]
## [1] 3 3
## 
## [[3]]
## [1] 2 2 2
## 
## [[4]]
## [1] 1 1 1 1
s2<-Sys.time()
s2-s1
## Time difference of 0.003001928 secs
Sys.getenv()
## ALLUSERSPROFILE       C:\ProgramData
## APPDATA               C:\Users\Steven\AppData\Roaming
## asl.log               Destination=file
## CLASSPATH             .;C:\Program
##                       Files\Java\jdk1.8.0_66\lib\dt.jar;C:\Program
##                       Files\Java\jdk1.8.0_66\lib\tools.jar;
## CommonProgramFiles    C:\Program Files\Common Files
## CommonProgramFiles(x86)
##                       C:\Program Files (x86)\Common Files
## CommonProgramW6432    C:\Program Files\Common Files
## COMPUTERNAME          SUNNYBLUE
## ComSpec               C:\windows\system32\cmd.exe
## configsetroot         C:\windows\ConfigSetRoot
## DISPLAY               :0
## easyplussdk           "C:\Program Files (x86)\Common
##                       Files\lenovo\easyplussdk\bin"
## FP_NO_HOST_CHECK      NO
## GFORTRAN_STDERR_UNIT
##                       -1
## GFORTRAN_STDOUT_UNIT
##                       -1
## GIT_ASKPASS           rpostback-askpass
## HOME                  C:/Users/Steven/Documents
## HOMEDRIVE             C:
## HOMEPATH              \Users\Steven
## JAVA_HOME             C:\Program Files\Java\jdk1.8.0_66
## LOCALAPPDATA          C:\Users\Steven\AppData\Local
## LOGONSERVER           \\MicrosoftAccount
## MOZ_PLUGIN_PATH       C:\Program Files (x86)\Foxit Software\Foxit
##                       Reader\plugins\
## NUMBER_OF_PROCESSORS
##                       4
## OS                    Windows_NT
## PATH                  C:\Program
##                       Files\R\R-3.2.4\bin\x64;C:\ProgramData\Oracle\Java\javapath;C:\windows\system32;C:\windows;C:\windows\System32\Wbem;D:\SAS92\SharedFiles\Formats;JAVA_HOME%\bin;C:\Program
##                       Files\Java\jdk1.8.0_66\jre\bin;C:\Program
##                       Files (x86)\MySQL\MySQL Fabric 1.5 & MySQL
##                       Utilities 1.5\;C:\Program Files
##                       (x86)\MySQL\MySQL Fabric 1.5 & MySQL
##                       Utilities 1.5\Doctrine extensions for
##                       PHP\;d:\mysql\bin;
## PATHEXT               .COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC
## PROCESSOR_ARCHITECTURE
##                       AMD64
## PROCESSOR_IDENTIFIER
##                       Intel64 Family 6 Model 61 Stepping 4,
##                       GenuineIntel
## PROCESSOR_LEVEL       6
## PROCESSOR_REVISION    3d04
## ProgramData           C:\ProgramData
## ProgramFiles          C:\Program Files
## ProgramFiles(x86)     C:\Program Files (x86)
## ProgramW6432          C:\Program Files
## PSModulePath          C:\windows\system32\WindowsPowerShell\v1.0\Modules\
## PUBLIC                C:\Users\Public
## R_ARCH                /x64
## R_DOC_DIR             C:/PROGRA~1/R/R-32~1.4/doc
## R_HOME                C:/PROGRA~1/R/R-32~1.4
## R_LIBS                C:/Users/Steven/Documents/R/win-library/3.2;C:/Program
##                       Files/R/R-3.2.4/library
## R_LIBS_USER           C:/Users/Steven/Documents/R/win-library/3.2
## R_USER                C:/Users/Steven/Documents
## READYAPPS             C:\ProgramData\Lenovo\ReadyApps
## RMARKDOWN_MATHJAX_PATH
##                       C:/Program Files
##                       (x86)/RStudio/resources/mathjax-23
## RS_LOCAL_PEER         \\.\pipe\14614-rsession
## RS_RPOSTBACK_PATH     C:/Program Files (x86)/RStudio/bin/rpostback
## RS_SHARED_SECRET      63341846741
## RSTUDIO               1
## RSTUDIO_MSYS_SSH      C:/Program Files
##                       (x86)/RStudio/bin/msys-ssh-1000-18
## RSTUDIO_PANDOC        C:/Program Files (x86)/RStudio/bin/pandoc
## RSTUDIO_SESSION_PORT
##                       14614
## RSTUDIO_USER_IDENTITY
##                       Steven
## SESSIONNAME           Console
## SSH_ASKPASS           rpostback-askpass
## SystemDrive           C:
## SystemRoot            C:\windows
## TEMP                  C:\Users\Steven\AppData\Local\Temp
## TKPATHIA32            D:\SAS92\SharedFiles\ICU
## TMP                   C:\Users\Steven\AppData\Local\Temp
## TVT                   C:\Program Files (x86)\Lenovo
## USERDOMAIN            SUNNYBLUE
## USERDOMAIN_ROAMINGPROFILE
##                       SUNNYBLUE
## USERNAME              Steven
## USERPROFILE           C:\Users\Steven
## VS110COMNTOOLS        C:\Program Files (x86)\Microsoft Visual
##                       Studio 11.0\Common7\Tools\
## VS120COMNTOOLS        D:\visual studio\Common7\Tools\
## VS140COMNTOOLS        C:\Program Files (x86)\Microsoft Visual
##                       Studio 14.0\Common7\Tools\
## windir                C:\windows
Sys.sleep(time = 1)
system.time(for(i in 1:100) mad(runif(1000)))
##    user  system elapsed 
##    0.01    0.00    0.01
ptm <- proc.time()
for (i in 1:50) mad(stats::runif(500))
proc.time() - ptm
##    user  system elapsed 
##       0       0       0
#equation<-c(1,-arma.4a$coef[1:3])
#roots <- polyroot(equation) #1.542932+0.928342i -1.870974-0.000000i  1.542932-0.928342i
#module <- Mod(roots)#1.800683 1.870974 1.800683
#cycle <- 2*pi/acos(1.542932/1.800683)

#equation<-c(1,-arma.4a$coef[1:3])
#roots <- polyroot(equation) #1.542932+0.928342i -1.870974-0.000000i  1.542932-0.928342i
#module <- Mod(roots)#1.800683 1.870974 1.800683
#cycle <- 2*pi/acos(1.542932/1.800683)#