Codage Efficient

• Pour ce faire il faut prendre l’habitude de fairre de function et tester sous different forme.

# Print the R version details using version
version

##                _                           
## platform       x86_64-apple-darwin15.6.0   
## arch           x86_64                      
## os             darwin15.6.0                
## system         x86_64, darwin15.6.0        
## status                                     
## major          3                           
## minor          4.3                         
## year           2017                        
## month          11                          
## day            30                          
## svn rev        73796                       
## language       R                           
## version.string R version 3.4.3 (2017-11-30)
## nickname       Kite-Eating Tree

# permet de connaitre la duree d'execution 
colon=function(n) 1:n

system.time(res<-colon(1e8))

##    user  system elapsed 
##   0.241   0.234   0.970

# microbenchmark()  Permet de compare le temps d'execution des fonctions entre elle

# compare l'obsoléscence de sa machine par rapport au autre
#res=benchmark_std(runs = 3)
#plot(res)  


# Assign the variable ram to the amount of RAM on this machine
ram <- get_ram()
ram

## 4.29 GB

# Assign the variable cpu to the cpu specs
cpu <- get_cpu()
cpu

## $vendor_id
## [1] "GenuineIntel"
## 
## $model_name
## [1] "Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz"
## 
## $no_of_cores
## [1] 4

# Load the package
library(benchmarkme)

# Run the io benchmark
#res <- benchmark_io(runs = 1, size = 5)

# Plot the results
# plot(res)

n <- 30000
# Slow code (vecteur grandissant )
growing <- function(n) {
    x <- NULL
    for(i in 1:n)
        x <- c(x, rnorm(1))
    x
}

# Fast code
pre_allocate <- function(n) {
    x <- numeric(n) # Pre-allocate
    for(i in 1:n) 
        x[i] <- rnorm(1)
    x
}


# Use <- with system.time() to store the result as res_grow 
# à ne jamais faire trop lent 
system.time(res_grow <-growing(30000))

##    user  system elapsed 
##   2.278   1.631   3.986

# Use <- with system.time() to store the result as res_allocate
n <- 30000
system.time(res_allocate <- pre_allocate(30000))

##    user  system elapsed 
##   0.071   0.020   0.092

# il faut preferer l'utilisation des dataframes au matrices pour travailler sur les colonnes et l'inverse pour travailler sur les lignes

Profiler le code (usage package profvis)

# Load the data set
data(movies, package = "ggplot2movies") 



# Profile the following code with the profvis function
profvis({
  # Load and select data
  movies <- movies[movies$Comedy == 1, ]

  # Plot data of interest
  plot(movies$year, movies$rating)

  # Loess regression line
  model <- loess(rating ~ year, data = movies)
  j <- order(movies$year)
  
  # Add a fitted line to the plot
  lines(movies$year[j], model$fitted[j], col = "red")
})

profvis({


# The previous data frame solution is defined
# d() Simulates 6 dices rolls
d <- function() {
  data.frame(
    d1 = sample(1:6, 3, replace = TRUE),
    d2 = sample(1:6, 3, replace = TRUE)
  )
}

# Complete the matrix solution
m <- function() {
  matrix(sample(1:6,6, replace = TRUE), ncol=2)
}

# Use microbenchmark to time m() and d()
microbenchmark(
 data.frame_solution = d(),
 matrix_solution     = m()
)
})

l’utilisation du && au lieu du &

is_double<- c(T,F,T)
# Define the previous solution
move <- function(is_double) {
    if (is_double[1] & is_double[2] & is_double[3]) {
        current <- 11 # Go To Jail
    }
}

# Define the improved solution
improved_move <- function(is_double) {
# si la premiere est fausse alors la deuxieme n'est pas vérifier
    if (is_double[1] && is_double[2] && is_double[3]) {
        current <- 11 # Go To Jail
    }
}

## microbenchmark both solutions
microbenchmark(improved_move(is_double), move(is_double), times = 1e5)

Rendre le code effecient par le parallel computing

Le parallel computing permet de gagner du temps . Cependant nous ne pouvons pas faire du parallel computing s’il y existe une quelconque dependance entre les procedures.

# connaitre le nombre de coeur du processeur
detectCores()


# Create a cluster via makeCluster
cl <- makeCluster(2)

# Parallelize this code
parApply(cl,d, 2, median)

# Stop the cluster
stopCluster(cl)

Quand il faut definir spécifiquement la fonction

play <- function() {
  total <- no_of_rolls <- 0
  while(total < 10) {
    total <- total + sample(1:6, 1)

    # If even. Reset to 0
    if(total %% 2 == 0) total <- 0 
    no_of_rolls <- no_of_rolls + 1
  }
  no_of_rolls
}
# Create a cluster via makeCluster (2 cores)
cl <- makeCluster(2)

# Export the play() function to the cluster
clusterExport(cl,"play")

# Parallelize this code
res <- parSapply(cl,1:100, function(i) play())

# Stop the cluster
stopCluster(cl)

# Set the number of games to play
no_of_games <- 1e5

## Time serial version
system.time(serial <- sapply(1:no_of_games, function(i) play()))

##    user  system elapsed 
##   7.230   0.342   7.609

## Set up cluster
cl <- makeCluster(4)
clusterExport(cl, "play")

## Time parallel version
system.time(par <-parSapply(cl,1:no_of_games,function(i) play()))

##    user  system elapsed 
##   0.075   0.010   3.926

## Stop cluster
stopCluster(cl)

Big Data

Codage Efficient

Profiler le code (usage package profvis)

l’utilisation du && au lieu du &

Rendre le code effecient par le parallel computing

Quand il faut definir spécifiquement la fonction

Parallel Computing

Sparklyr