Step 1

Le code que j’utiliserai pour ce DM est la version d’Alan GUIVARCH du code développé en groupe. Mon code personel se trouve en bas de la copie et utilise des data frames, je n’ai cependant pas réussi à en tirer des résultats cohérents avec le reste de la promotion.

Scenario 1

scenario1 = function(lambda, d) {
  N = 10000 # number of jobs
  K = 10 # number of servers
  t = 0 # simulation time
  arrival = cumsum(rexp(n=N, rate=lambda)) # jobs arrival times
  queue = matrix(data = NA, nrow = N, ncol = d)# size-Nxd matrix indicating where the i-th copy of the n-th job is dispatched
  for (i in 1:N) {
    queue[i,] = sample(x = (1:K),
                      size = d,
                      replace = FALSE)
  }
  mu = 1 # service rate
  xm = 1 # scale parameter
  alpha = 1.5; # shape parameter
  
  # Comment one of the two following options
  # exponential case
  service = replicate(N, rexp(d,mu)); # jobs service times (size N*d)
  # pareto case
  #service = replicate(N, xm/(runif(d)^(1/alpha))); # jobs service times (size N*d)
  if (d == 1) {
    remaining = t(t(service)) # size-Nxd matrix indicating the       remaining time of the i-th copy of the n-th job
  } else {
    remaining = t(service) # size-Nxd matrix indicating the       remaining time of the i-th copy of the n-th job
  }
  completion = array(data=NA, dim = N) # size-N vector indicating the jobs' completion time
  CurJob = rep(NA, K) # size-K vector indicating the current job in processing at each server, i.e. CurJob[i] takes value value in {1,...,N}
  CurrReplica = rep(NA, K); # size-K vector indicating the current replica in processing at each server, i.e. CurrReplica[i] takes value {1,...,d}
  
  if(N==0){
      break;
  }else{
      NextJob = 1;
  }
  
  tmp = array(data=0, dim=K)
  jobDone = array(data=FALSE, dim = N);
  
  while (TRUE){
      # time to next arrival of a job
      dtA = NA
      if (length(arrival[arrival > t]) > 0){
          dtA = head(arrival[arrival > t]) - t
      }
  
      # time to next job completion
      dtC = NA
      for (i in 1:K){
          if (!is.na(CurJob[i])){
              dtC = min(dtC, remaining[CurJob[i], CurrReplica[i]], na.rm = T)
          }
      }
  
      if (is.na(dtA) && is.na(dtC)){
          break;
      }
  
      # Update time value
      dt = min(dtA, dtC, na.rm = T)
      t = t + dt
      
      for(i in 1:K){
          if(!is.na(CurJob[i])){
              remaining[CurJob[i], CurrReplica[i]] = remaining[CurJob[i], CurrReplica[i]] - dt
          }
      }
      
      # If a job has arrived, update the remaining time
      if (NextJob <= N && (abs(arrival[NextJob]- t)) < 1e-4){
          NextJob = NextJob + 1 # Next
      }
      
      for (i in 1:K){
          if (!is.na(CurJob[i])){
              if (remaining[CurJob[i], CurrReplica[i]] <= 0){ # job is complete
                  completion[CurJob[i]] = t
                  remaining[CurJob[i], CurrReplica[i]] = 0
                  tmp[i] = CurJob[i]
                  CurJob[i] = NA
                  CurrReplica[i] = NA
              }
          }
      }
      
      for (k in 1:K) {
          if (is.na(CurJob[k]) && tmp[k] < NextJob - 1){  # server is inactive, searching for a new job
              found = FALSE
              for (i in ((tmp[k]+1):(NextJob - 1))){  # searching from the ID of its last job
                  if (!jobDone[i]){    # found a replica that should run on this server
                      for (j in 1:d){
                          if(queue[i,j] == k){
                              CurJob[k] = i
                              CurrReplica[k] = j
                              jobDone[i] = TRUE   # other servers are not supposed to run this job
                              found = TRUE
                              break;
                          }
                      }
                  }
                  if(found){
                      break;
                  }
              }
          }
      }
  }
  t = completion-arrival
  return(mean(t))
}

Scenario 2

scenario2 = function(lambda, d) {
  N = 10000 # number of jobs
  K = 10 # number of servers
  t = 0 # simulation time
  arrival = cumsum(rexp(n=N, rate=lambda)) # jobs arrival times
  queue = matrix(data = NA, nrow = N, ncol = d)# size-Nxd matrix indicating where the i-th copy of the n-th job is dispatched
  for (i in 1:N) {
    queue[i,] = sample(x = (1:K),
                      size = d,
                      replace = FALSE)
  }
  mu = 1 # service rate
  xm = 1 # scale parameter
  alpha = 1.5; # shape parameter
  
  # Comment one of the two following options
  # exponential case
  service = replicate(N, rexp(d,mu)); # jobs service times (size N*d)
  # pareto case
  #service = replicate(N, xm/(runif(d)^(1/alpha))); # jobs service times (size N*d)
  
  if (d == 1) {
    remaining = t(t(service)) # size-Nxd matrix indicating the       remaining time of the i-th copy of the n-th job
  } else {
    remaining = t(service) # size-Nxd matrix indicating the       remaining time of the i-th copy of the n-th job
  }
  completion = array(data=NA, dim = N) # size-N vector indicating the jobs' completion time
  CurJob = rep(NA, K) # size-K vector indicating the current job in processing at each server, i.e. CurJob[i] takes value value in {1,...,N}
  CurrReplica = rep(NA, K); # size-K vector indicating the current replica in processing at each server, i.e. CurrReplica[i] takes value {1,...,d}
  
  if(N==0){
      break;
  }else{
      NextJob = 1;
  }
  
  tmp = array(data=0, dim=K)
  jobDone = array(data=FALSE, dim = N);
  
  while (TRUE){
      # time to next arrival of a job
      dtA = NA
      if (length(arrival[arrival > t]) > 0){
          dtA = head(arrival[arrival > t]) - t
      }
  
      # time to next job completion
      dtC = NA
      for (i in 1:K){
          if (!is.na(CurJob[i])){
              dtC = min(dtC, remaining[CurJob[i], CurrReplica[i]], na.rm = T)
          }
      }
  
      if (is.na(dtA) && is.na(dtC)){
          break;
      }
  
      # Update time value
      dt = min(dtA, dtC, na.rm = T)
      t = t + dt
      
      for(i in 1:K){
          if(!is.na(CurJob[i])){
              remaining[CurJob[i], CurrReplica[i]] = remaining[CurJob[i], CurrReplica[i]] - dt
          }
      }
      
      # If a job has arrived, update the remaining time
      if (NextJob <= N &&  (abs(arrival[NextJob]- t)) < 1e-4){
          NextJob = NextJob + 1 # Next
      }
      
      for (i in 1:K){
          if (!is.na(CurJob[i])){
              if (remaining[CurJob[i], CurrReplica[i]] <= 0){ # job is complete
                  completion[CurJob[i]] = t
                  remaining[CurJob[i], CurrReplica[i]] = 0
                  jobDone[CurJob[i]] = TRUE   # other servers are not supposed to run this job
                    tmp[i] = CurJob[i]
                  for (j in 1:K) {
                    if (!is.na(CurJob[j]) && CurJob[j] == tmp[i]) {
                      CurJob[j] = NA
                      CurrReplica[j] = NA
                      tmp[j] = tmp[i]
                    }
                 }
              }
          }
      }
      
      for (k in 1:K) {
          if (is.na(CurJob[k]) && tmp[k] < NextJob - 1){  # server is inactive, searching for a new job
              found = FALSE
              for (i in ((tmp[k]+1):(NextJob - 1))){  # searching from the ID of its last job
                  if (!jobDone[i]){    # found a replica that should run on this server
                      for (j in 1:d){
                          if(queue[i,j] == k){
                              CurJob[k] = i
                              CurrReplica[k] = j
                              found = TRUE
                              break;
                          }
                      }
                  }
                  if(found){
                      break;
                  }
              }
          }
      }
  
  }
  t = completion-arrival
  return(mean(t))
}

Step 2

Question 1

Il y a K serveurs, les requêtes sont donc en moyenne K fois plus rapidement, on peut considerer cette formule comme formule du temps de réponse. \[ E(R) = \frac{1}{\mu - \frac{\lambda}{K}} \] ### Question 2 #### Scenario 1

lambdas = seq(0.1, 9.5, 0.5);
res1 = c();

for (i in 0:length(lambdas)) {
  res1[i] = scenario1(lambdas[i], 4);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
        main="Sénario 1",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

On peut constater que les valeur obtenues sont très proches de celles attendues. N = 10000 semble donc être suffisant pour observer les similitudes entre la théorie et les observations.

Scenario 2

lambdas = seq(0.1, 9.5, 0.5);
res2 = c();

for (i in 0:length(lambdas)) {
  res2[i] = scenario2(lambdas[i], 4);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res2,
        main="Sénario 2",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

On peut constater que les valeur obtenues sont très différentes de celles attendues.

Question 3

lambdas = seq(0.1, 9.5, 0.5);
res1 = c();

for (i in 0:length(lambdas)) {
  res1[i] = scenario1(lambdas[i], 1);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
lambdas = seq(0.1, 9.5, 0.5);
res2 = c();

for (i in 0:length(lambdas)) {
  res2[i] = scenario2(lambdas[i], 1);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
        main="Sénario 1",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

plot(x = lambdas, y = res2,
        main="Sénario 2",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

A paramètres égaux et pour D = 1, le scénaio 1 et le scénario 2 sont équivalents.

lambdas = seq(0.1, 9.5, 1);
res1 = c();

for (i in 0:length(lambdas)) {
  res1[i] = scenario1(lambdas[i], 2);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
lambdas = seq(0.1, 9.5, 1);
res2 = c();

for (i in 0:length(lambdas)) {
  res2[i] = scenario2(lambdas[i], 2);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
        main="Sénario 1",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

plot(x = lambdas, y = res2,
        main="Sénario 2",
        xlab="lambda",
        ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");

On peut aussi constater que plus le nombre de duplications augmente, le temps moyen par tache pour le scénario 1 augmente, le scénario 2 prends lui aussi plus de temps mais cet effet est moins marqué. Je ne vois pas de cas ou le scénario A à l’avantage sur le 2.

Step 1 bis

Scenario 1

Cette version du code ne permet pas de trouver quelque différence que ce soit quand on fait varier lambda, ce qui bien entendu est faux.

scenar1 = function(K, N, D, MU, LAMBDA) {
  t = 0
  
  jobs <- data.frame(job = 1:N,
                     arrival = cumsum(rexp(n = N, rate = LAMBDA)),
                     completion = NA)
  
  tasks <- data.frame(matrix(ncol = 3, nrow = 0))
  colnames(tasks) <- c("job", "dup", "remaining")
  
  queues <- data.frame(matrix(ncol = 3, nrow = 0))
  colnames(queues) <- c("job", "dup", "queue")
  
  while (T) {
    dtA = (jobs %>% filter (arrival > t) %>% summarise(min(arrival)))[[1]] - t
    dtC = (tasks  %>% summarise(min(remaining)))[[1]]
    dt = min(dtA, dtC)
    t = t + dt
    
    if (is.infinite(t)) {
      break
      
    }
    
    new <- jobs %>% filter(arrival == t)
    tasks <- tasks %>% mutate(remaining = remaining - dt)
    
    newqueue <- expand.grid(job = new$job, dup = 1:D)
    newqueue$queue <-
      as.vector(replicate((new %>% count())[[1]], sample(D, x = 1:K)))
    queues <- rbind(queues, newqueue)
    
    finished <- tasks %>% filter(remaining == 0)
    tasks <- tasks %>% filter(remaining != 0)
    queues <- queues %>% filter(!(job %in% finished$job))
    tasks <- tasks %>% filter(!(job %in% finished$job))
    jobs <-
      rbind(
        jobs %>% filter(job %in% finished$job) %>% mutate(completion = t),
        jobs %>% filter(!(job %in% finished$job))
      )
    lazyqueues <-
      queues %>% filter(!(job %in% tasks$job && dup %in% tasks$dup))
    queue_min = jobs %>% merge(lazyqueues, by = "job") %>% group_by(queue) %>% summarize(min = min(arrival))
    newtasks <-
      lazyqueues %>% merge(queue_min, by = "queue") %>% merge(jobs %>% select (job, arrival), by =
                                                                "job") %>% filter(min == arrival) %>% select(job, dup)
    queues <- queues %>% filter(!(job %in% newtasks$job) || (job %in% newtasks$job && !(dup %in% newtasks$dup)))
    newtasks$remaining <-
      rexp(n = (newtasks %>% count())[[1]], rate = MU)
    tasks <- rbind(tasks, newtasks)
  }
  
  jobs$service_time = jobs$completion - jobs$arrival
  return(jobs)
  
}

Scenario 2

Cette version du code permet bien de voir l’augmentation exponentielle du temps quand lambda augmente mais les valeurs ne sont pas cohérentes avec la théorie.

scenar2 = function(K, N, D, MU, LAMBDA) {
  t = 0
  
  jobs <- data.frame(job = 1:N,
                     arrival = cumsum(rexp(n = N, rate = LAMBDA)),
                     completion = NA)
  
  tasks <- data.frame(matrix(ncol = 3, nrow = 0))
  colnames(tasks) <- c("job", "dup", "remaining")
  
  queues <- data.frame(matrix(ncol = 3, nrow = 0))
  colnames(queues) <- c("job", "dup", "queue")
  
  while (T) {
    dtA = (jobs %>% filter (arrival > t) %>% summarise(min(arrival)))[[1]] - t
    dtC = (tasks  %>% summarise(min(remaining)))[[1]]
    dt = min(dtA, dtC)
    t = t + dt
    
    if (is.infinite(t)) {
      break
      
    }
    
    new <- jobs %>% filter(arrival == t)
    tasks <- tasks %>% mutate(remaining = remaining - dt)
    
    newqueue <- expand.grid(job = new$job, dup = 1:D)
    newqueue$queue <-
      as.vector(replicate((new %>% count())[[1]], sample(D, x = 1:K)))
    queues <- rbind(queues, newqueue)
    
    finished <- tasks %>% filter(remaining == 0)
    tasks <- tasks %>% filter(remaining != 0)
    queues <- queues %>% filter(!(job %in% finished$job))
    tasks <- tasks %>% filter(!(job %in% finished$job))
    jobs <-
      rbind(
        jobs %>% filter(job %in% finished$job) %>% mutate(completion = t),
        jobs %>% filter(!(job %in% finished$job))
      )
    lazyqueues <-
      queues %>% filter(!(job %in% tasks$job && dup %in% tasks$dup))
    queue_min = jobs %>% merge(lazyqueues, by = "job") %>% group_by(queue) %>% summarize(min = min(arrival))
    newtasks <-
      lazyqueues %>% merge(queue_min, by = "queue") %>% merge(jobs %>% select (job, arrival), by =
                                                                "job") %>% filter(min == arrival) %>% select(job, dup)
    newtasks$remaining <-
      rexp(n = (newtasks %>% count())[[1]], rate = MU)
    tasks <- rbind(tasks, newtasks)
  }
  
  jobs$service_time = jobs$completion - jobs$arrival
  return(jobs)
  
}