# Source
# http://adv-r.had.co.nz/Profiling.html
#identify bottlenecks within a function
#Lineprof
# For each sequence of calls, lineprof calculates:
#
# time: the time in seconds
#
# alloc: the memory allocated, in megabytes
# released: the memory released, in megabytes. Unless torture = TRUE this release is somewhat random: memory will be only released if a garbage collection is triggered by an allocation.
# dups: the number of calls to the internal duplicate function which is called by C code to duplicate R vectors.
#install.packages("lineprof")
#install.packages("stringi")
#devtools::install_github("hadley/lineprof")
library(lineprof)
library(shiny)
## Warning: package 'shiny' was built under R version 3.2.3
f <- function() {
pause(0.1)
g()
h()
}
g <- function() {
pause(0.1)
h()
}
h <- function() {
pause(0.1)
}
l <- lineprof(f())
l
## Reducing depth to 2 (from 4)
## time alloc release dups ref src
## 1 0.045 0.001 0 2 c("pause", ".Call") pause/.Call
## 2 0.048 0.001 0 0 <text>#27 g/pause
## 3 0.040 0.001 0 0 <text>#28 g/h
## 4 0.040 0.000 0 0 <text>#31 h/pause
#shine(l)
#?lineprof
#Comparing functions using microbenchmark
library(microbenchmark)
library(data.table)
library(readr)
getwd()
## [1] "C:/Users/dell/Desktop/Teaching"
dir("C:/Users/dell/Desktop/",pattern = ".csv")
## [1] "Analytics decisionstats.com Audience Overview 20110617-20120717.csv"
## [2] "BigDiamonds.csv"
## [3] "BigDiamonds.csv.zip"
## [4] "Boston.csv"
## [5] "ccFraud.csv"
## [6] "test.csv"
microbenchmark(
fread("C:/Users/dell/Desktop/BigDiamonds.csv" ),
read_csv("C:/Users/dell/Desktop/BigDiamonds.csv")
,times=3
)
##
Read 0.0% of 598024 rows
Read 28.4% of 598024 rows
Read 53.5% of 598024 rows
Read 80.3% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:07
## Warning: 597311 problems parsing 'C:/Users/dell/Desktop/BigDiamonds.csv'.
## See problems(...) for more details.
## Warning: 597311 problems parsing 'C:/Users/dell/Desktop/BigDiamonds.csv'.
## See problems(...) for more details.
##
Read 21.7% of 598024 rows
Read 50.2% of 598024 rows
Read 83.6% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:05
##
Read 48.5% of 598024 rows
Read 76.9% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:04
## Warning: 597311 problems parsing 'C:/Users/dell/Desktop/BigDiamonds.csv'.
## See problems(...) for more details.
## Unit: seconds
## expr min lq
## fread("C:/Users/dell/Desktop/BigDiamonds.csv") 3.980537 4.359082
## read_csv("C:/Users/dell/Desktop/BigDiamonds.csv") 3.257605 3.285306
## mean median uq max neval cld
## 5.055004 4.737628 5.592237 6.446847 3 a
## 3.770749 3.313006 4.027321 4.741635 3 a
#Compiler ~5% speedup
fread_c <- compiler::cmpfun(fread)
microbenchmark(
fread("C:/Users/dell/Desktop/BigDiamonds.csv" ),
fread_c("C:/Users/dell/Desktop/BigDiamonds.csv")
,times=3
)
##
Read 53.5% of 598024 rows
Read 81.9% of 598024 rows
Read 93.6% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:05
##
Read 36.8% of 598024 rows
Read 68.6% of 598024 rows
Read 98.7% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:05
##
Read 21.7% of 598024 rows
Read 35.1% of 598024 rows
Read 58.5% of 598024 rows
Read 90.3% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:06
##
Read 43.5% of 598024 rows
Read 73.6% of 598024 rows
Read 98.7% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:05
##
Read 13.4% of 598024 rows
Read 43.5% of 598024 rows
Read 75.2% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:05
##
Read 50.2% of 598024 rows
Read 73.6% of 598024 rows
Read 87.0% of 598024 rows
Read 598024 rows and 13 (of 13) columns from 0.049 GB file in 00:00:06
## Unit: seconds
## expr min lq
## fread("C:/Users/dell/Desktop/BigDiamonds.csv") 4.188466 4.577904
## fread_c("C:/Users/dell/Desktop/BigDiamonds.csv") 4.320403 4.494925
## mean median uq max neval cld
## 4.875539 4.967343 5.219075 5.470808 3 a
## 4.724924 4.669447 4.927185 5.184923 3 a
#Parallel Processing
library(parallel)
cores <- detectCores()
cores
## [1] 4
detectCores()
## [1] 4
#
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: i386-w64-mingw32/i386 (32-bit)
## Running under: Windows 7 (build 7601) Service Pack 1
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] readr_0.1.1 data.table_1.9.4 microbenchmark_1.4-2
## [4] shiny_0.12.2 lineprof_0.1.9001
##
## loaded via a namespace (and not attached):
## [1] codetools_0.2-14 digest_0.6.8 htmltools_0.3 R6_2.1.1
## [5] splines_3.2.2 scales_0.3.0 grid_3.2.2 stringr_1.0.0
## [9] knitr_1.10.5 survival_2.38-3 munsell_0.4.2 compiler_3.2.2
## [13] proto_0.3-10 httpuv_1.3.3 lattice_0.20-33 mime_0.4
## [17] chron_2.3-47 TH.data_1.0-6 ggplot2_1.0.1 MASS_7.3-43
## [21] plyr_1.8.3 multcomp_1.4-0 zoo_1.7-12 stringi_1.0-1
## [25] magrittr_1.5 reshape2_1.4.1 rmarkdown_0.7 evaluate_0.7
## [29] gtable_0.1.2 sandwich_2.3-3 colorspace_1.2-6 tools_3.2.2
## [33] mvtnorm_1.0-2 xtable_1.8-0 Rcpp_0.12.2
pause <- function(i) {
function(x) Sys.sleep(i)
}
system.time(lapply(1:10, pause(0.25)))
## user system elapsed
## 0.00 0.00 2.51
cluster <- makePSOCKcluster(cores)
system.time(parLapply(cluster, 1:10, function(i) Sys.sleep(1)))
## user system elapsed
## 0.00 0.00 3.01
# communication overhead with parallel computing. If the subproblems are very small,
# then parallelisation might hurt
# rather than help.
#
# An embarrassingly parallel problem is one that’s made
# up of many simple problems that can be solved independently.