Le code que j’utiliserai pour ce DM est la version d’Alan GUIVARCH du code développé en groupe. Mon code personel se trouve en bas de la copie et utilise des data frames, je n’ai cependant pas réussi à en tirer des résultats cohérents avec le reste de la promotion.
scenario1 = function(lambda, d) {
N = 10000 # number of jobs
K = 10 # number of servers
t = 0 # simulation time
arrival = cumsum(rexp(n=N, rate=lambda)) # jobs arrival times
queue = matrix(data = NA, nrow = N, ncol = d)# size-Nxd matrix indicating where the i-th copy of the n-th job is dispatched
for (i in 1:N) {
queue[i,] = sample(x = (1:K),
size = d,
replace = FALSE)
}
mu = 1 # service rate
xm = 1 # scale parameter
alpha = 1.5; # shape parameter
# Comment one of the two following options
# exponential case
service = replicate(N, rexp(d,mu)); # jobs service times (size N*d)
# pareto case
#service = replicate(N, xm/(runif(d)^(1/alpha))); # jobs service times (size N*d)
if (d == 1) {
remaining = t(t(service)) # size-Nxd matrix indicating the remaining time of the i-th copy of the n-th job
} else {
remaining = t(service) # size-Nxd matrix indicating the remaining time of the i-th copy of the n-th job
}
completion = array(data=NA, dim = N) # size-N vector indicating the jobs' completion time
CurJob = rep(NA, K) # size-K vector indicating the current job in processing at each server, i.e. CurJob[i] takes value value in {1,...,N}
CurrReplica = rep(NA, K); # size-K vector indicating the current replica in processing at each server, i.e. CurrReplica[i] takes value {1,...,d}
if(N==0){
break;
}else{
NextJob = 1;
}
tmp = array(data=0, dim=K)
jobDone = array(data=FALSE, dim = N);
while (TRUE){
# time to next arrival of a job
dtA = NA
if (length(arrival[arrival > t]) > 0){
dtA = head(arrival[arrival > t]) - t
}
# time to next job completion
dtC = NA
for (i in 1:K){
if (!is.na(CurJob[i])){
dtC = min(dtC, remaining[CurJob[i], CurrReplica[i]], na.rm = T)
}
}
if (is.na(dtA) && is.na(dtC)){
break;
}
# Update time value
dt = min(dtA, dtC, na.rm = T)
t = t + dt
for(i in 1:K){
if(!is.na(CurJob[i])){
remaining[CurJob[i], CurrReplica[i]] = remaining[CurJob[i], CurrReplica[i]] - dt
}
}
# If a job has arrived, update the remaining time
if (NextJob <= N && (abs(arrival[NextJob]- t)) < 1e-4){
NextJob = NextJob + 1 # Next
}
for (i in 1:K){
if (!is.na(CurJob[i])){
if (remaining[CurJob[i], CurrReplica[i]] <= 0){ # job is complete
completion[CurJob[i]] = t
remaining[CurJob[i], CurrReplica[i]] = 0
tmp[i] = CurJob[i]
CurJob[i] = NA
CurrReplica[i] = NA
}
}
}
for (k in 1:K) {
if (is.na(CurJob[k]) && tmp[k] < NextJob - 1){ # server is inactive, searching for a new job
found = FALSE
for (i in ((tmp[k]+1):(NextJob - 1))){ # searching from the ID of its last job
if (!jobDone[i]){ # found a replica that should run on this server
for (j in 1:d){
if(queue[i,j] == k){
CurJob[k] = i
CurrReplica[k] = j
jobDone[i] = TRUE # other servers are not supposed to run this job
found = TRUE
break;
}
}
}
if(found){
break;
}
}
}
}
}
t = completion-arrival
return(mean(t))
}
scenario2 = function(lambda, d) {
N = 10000 # number of jobs
K = 10 # number of servers
t = 0 # simulation time
arrival = cumsum(rexp(n=N, rate=lambda)) # jobs arrival times
queue = matrix(data = NA, nrow = N, ncol = d)# size-Nxd matrix indicating where the i-th copy of the n-th job is dispatched
for (i in 1:N) {
queue[i,] = sample(x = (1:K),
size = d,
replace = FALSE)
}
mu = 1 # service rate
xm = 1 # scale parameter
alpha = 1.5; # shape parameter
# Comment one of the two following options
# exponential case
service = replicate(N, rexp(d,mu)); # jobs service times (size N*d)
# pareto case
#service = replicate(N, xm/(runif(d)^(1/alpha))); # jobs service times (size N*d)
if (d == 1) {
remaining = t(t(service)) # size-Nxd matrix indicating the remaining time of the i-th copy of the n-th job
} else {
remaining = t(service) # size-Nxd matrix indicating the remaining time of the i-th copy of the n-th job
}
completion = array(data=NA, dim = N) # size-N vector indicating the jobs' completion time
CurJob = rep(NA, K) # size-K vector indicating the current job in processing at each server, i.e. CurJob[i] takes value value in {1,...,N}
CurrReplica = rep(NA, K); # size-K vector indicating the current replica in processing at each server, i.e. CurrReplica[i] takes value {1,...,d}
if(N==0){
break;
}else{
NextJob = 1;
}
tmp = array(data=0, dim=K)
jobDone = array(data=FALSE, dim = N);
while (TRUE){
# time to next arrival of a job
dtA = NA
if (length(arrival[arrival > t]) > 0){
dtA = head(arrival[arrival > t]) - t
}
# time to next job completion
dtC = NA
for (i in 1:K){
if (!is.na(CurJob[i])){
dtC = min(dtC, remaining[CurJob[i], CurrReplica[i]], na.rm = T)
}
}
if (is.na(dtA) && is.na(dtC)){
break;
}
# Update time value
dt = min(dtA, dtC, na.rm = T)
t = t + dt
for(i in 1:K){
if(!is.na(CurJob[i])){
remaining[CurJob[i], CurrReplica[i]] = remaining[CurJob[i], CurrReplica[i]] - dt
}
}
# If a job has arrived, update the remaining time
if (NextJob <= N && (abs(arrival[NextJob]- t)) < 1e-4){
NextJob = NextJob + 1 # Next
}
for (i in 1:K){
if (!is.na(CurJob[i])){
if (remaining[CurJob[i], CurrReplica[i]] <= 0){ # job is complete
completion[CurJob[i]] = t
remaining[CurJob[i], CurrReplica[i]] = 0
jobDone[CurJob[i]] = TRUE # other servers are not supposed to run this job
tmp[i] = CurJob[i]
for (j in 1:K) {
if (!is.na(CurJob[j]) && CurJob[j] == tmp[i]) {
CurJob[j] = NA
CurrReplica[j] = NA
tmp[j] = tmp[i]
}
}
}
}
}
for (k in 1:K) {
if (is.na(CurJob[k]) && tmp[k] < NextJob - 1){ # server is inactive, searching for a new job
found = FALSE
for (i in ((tmp[k]+1):(NextJob - 1))){ # searching from the ID of its last job
if (!jobDone[i]){ # found a replica that should run on this server
for (j in 1:d){
if(queue[i,j] == k){
CurJob[k] = i
CurrReplica[k] = j
found = TRUE
break;
}
}
}
if(found){
break;
}
}
}
}
}
t = completion-arrival
return(mean(t))
}
Il y a K serveurs, les requêtes sont donc en moyenne K fois plus rapidement, on peut considerer cette formule comme formule du temps de réponse. \[ E(R) = \frac{1}{\mu - \frac{\lambda}{K}} \] ### Question 2 #### Scenario 1
lambdas = seq(0.1, 9.5, 0.5);
res1 = c();
for (i in 0:length(lambdas)) {
res1[i] = scenario1(lambdas[i], 4);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
main="Sénario 1",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
On peut constater que les valeur obtenues sont très proches de celles attendues. N = 10000 semble donc être suffisant pour observer les similitudes entre la théorie et les observations.
lambdas = seq(0.1, 9.5, 0.5);
res2 = c();
for (i in 0:length(lambdas)) {
res2[i] = scenario2(lambdas[i], 4);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res2,
main="Sénario 2",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
On peut constater que les valeur obtenues sont très différentes de celles attendues.
lambdas = seq(0.1, 9.5, 0.5);
res1 = c();
for (i in 0:length(lambdas)) {
res1[i] = scenario1(lambdas[i], 1);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
lambdas = seq(0.1, 9.5, 0.5);
res2 = c();
for (i in 0:length(lambdas)) {
res2[i] = scenario2(lambdas[i], 1);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
main="Sénario 1",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
plot(x = lambdas, y = res2,
main="Sénario 2",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
A paramètres égaux et pour D = 1, le scénaio 1 et le scénario 2 sont équivalents.
lambdas = seq(0.1, 9.5, 1);
res1 = c();
for (i in 0:length(lambdas)) {
res1[i] = scenario1(lambdas[i], 2);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
lambdas = seq(0.1, 9.5, 1);
res2 = c();
for (i in 0:length(lambdas)) {
res2[i] = scenario2(lambdas[i], 2);
}
## Warning in rexp(n = N, rate = lambda): production de NAs
plot(x = lambdas, y = res1,
main="Sénario 1",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
plot(x = lambdas, y = res2,
main="Sénario 2",
xlab="lambda",
ylab="Temps de réponse moyen",ylim = c(0,22));
lines(x = lambdas, y = 1/(1- lambdas/10), col = "red", type = "l");
On peut aussi constater que plus le nombre de duplications augmente, le temps moyen par tache pour le scénario 1 augmente, le scénario 2 prends lui aussi plus de temps mais cet effet est moins marqué. Je ne vois pas de cas ou le scénario A à l’avantage sur le 2.
Cette version du code ne permet pas de trouver quelque différence que ce soit quand on fait varier lambda, ce qui bien entendu est faux.
scenar1 = function(K, N, D, MU, LAMBDA) {
t = 0
jobs <- data.frame(job = 1:N,
arrival = cumsum(rexp(n = N, rate = LAMBDA)),
completion = NA)
tasks <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(tasks) <- c("job", "dup", "remaining")
queues <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(queues) <- c("job", "dup", "queue")
while (T) {
dtA = (jobs %>% filter (arrival > t) %>% summarise(min(arrival)))[[1]] - t
dtC = (tasks %>% summarise(min(remaining)))[[1]]
dt = min(dtA, dtC)
t = t + dt
if (is.infinite(t)) {
break
}
new <- jobs %>% filter(arrival == t)
tasks <- tasks %>% mutate(remaining = remaining - dt)
newqueue <- expand.grid(job = new$job, dup = 1:D)
newqueue$queue <-
as.vector(replicate((new %>% count())[[1]], sample(D, x = 1:K)))
queues <- rbind(queues, newqueue)
finished <- tasks %>% filter(remaining == 0)
tasks <- tasks %>% filter(remaining != 0)
queues <- queues %>% filter(!(job %in% finished$job))
tasks <- tasks %>% filter(!(job %in% finished$job))
jobs <-
rbind(
jobs %>% filter(job %in% finished$job) %>% mutate(completion = t),
jobs %>% filter(!(job %in% finished$job))
)
lazyqueues <-
queues %>% filter(!(job %in% tasks$job && dup %in% tasks$dup))
queue_min = jobs %>% merge(lazyqueues, by = "job") %>% group_by(queue) %>% summarize(min = min(arrival))
newtasks <-
lazyqueues %>% merge(queue_min, by = "queue") %>% merge(jobs %>% select (job, arrival), by =
"job") %>% filter(min == arrival) %>% select(job, dup)
queues <- queues %>% filter(!(job %in% newtasks$job) || (job %in% newtasks$job && !(dup %in% newtasks$dup)))
newtasks$remaining <-
rexp(n = (newtasks %>% count())[[1]], rate = MU)
tasks <- rbind(tasks, newtasks)
}
jobs$service_time = jobs$completion - jobs$arrival
return(jobs)
}
Cette version du code permet bien de voir l’augmentation exponentielle du temps quand lambda augmente mais les valeurs ne sont pas cohérentes avec la théorie.
scenar2 = function(K, N, D, MU, LAMBDA) {
t = 0
jobs <- data.frame(job = 1:N,
arrival = cumsum(rexp(n = N, rate = LAMBDA)),
completion = NA)
tasks <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(tasks) <- c("job", "dup", "remaining")
queues <- data.frame(matrix(ncol = 3, nrow = 0))
colnames(queues) <- c("job", "dup", "queue")
while (T) {
dtA = (jobs %>% filter (arrival > t) %>% summarise(min(arrival)))[[1]] - t
dtC = (tasks %>% summarise(min(remaining)))[[1]]
dt = min(dtA, dtC)
t = t + dt
if (is.infinite(t)) {
break
}
new <- jobs %>% filter(arrival == t)
tasks <- tasks %>% mutate(remaining = remaining - dt)
newqueue <- expand.grid(job = new$job, dup = 1:D)
newqueue$queue <-
as.vector(replicate((new %>% count())[[1]], sample(D, x = 1:K)))
queues <- rbind(queues, newqueue)
finished <- tasks %>% filter(remaining == 0)
tasks <- tasks %>% filter(remaining != 0)
queues <- queues %>% filter(!(job %in% finished$job))
tasks <- tasks %>% filter(!(job %in% finished$job))
jobs <-
rbind(
jobs %>% filter(job %in% finished$job) %>% mutate(completion = t),
jobs %>% filter(!(job %in% finished$job))
)
lazyqueues <-
queues %>% filter(!(job %in% tasks$job && dup %in% tasks$dup))
queue_min = jobs %>% merge(lazyqueues, by = "job") %>% group_by(queue) %>% summarize(min = min(arrival))
newtasks <-
lazyqueues %>% merge(queue_min, by = "queue") %>% merge(jobs %>% select (job, arrival), by =
"job") %>% filter(min == arrival) %>% select(job, dup)
newtasks$remaining <-
rexp(n = (newtasks %>% count())[[1]], rate = MU)
tasks <- rbind(tasks, newtasks)
}
jobs$service_time = jobs$completion - jobs$arrival
return(jobs)
}