We consider now that a schedule contains two information: the order in which segments are sent and their destination. The evaluation will first selects all the leaves. Among the leaves, only the ones corresponding to segments that must be sent first will be considered. Among the remaining, the transfers that start the soonest will be selected. Only the transfers for the segment with the lowest index will be kept and only the transfers from the machine with the lowest index will be finally kept.
In order to find the leaves, it may be easier to keep the same structure for the destination and to add another structure that gives the rank of the segment transfer.
evaluate <- function(solution, alpha, beta, gamma) {
n <- nrow(solution$destination)
m <- ncol(solution$destination)
# Arrays to keep the next time a processor is ready for reduction, for
# sending and for receiving
reducing <- rep(0, n)
sending <- rep(0, n)
receiving <- rep(0, n)
ready <- matrix(0, nrow = n, ncol = m)
for (k in 1:(m * (n - 1))) {
# Select all the ready processors
leaf <- sapply(1:m, function(j) return(!sapply(1:n, function(i) return(i %in%
solution$destination[, j]))))
if (!any(leaf))
return(Inf)
# Select the segments which it is the turn
tmp <- which(leaf)[which(solution$rank[leaf] == 1)]
machines <- (tmp - 1)%%n + 1
segments <- (tmp - 1)%/%n + 1
if (length(machines) == 0)
return(Inf)
# Keep the transfers that can start the soonest
soonest <- Inf
for (i in 1:length(machines)) {
current <- max(ready[machines[i], segments[i]], sending[machines[i]],
receiving[solution$destination[machines[i], segments[i]]] -
alpha)
if (current < soonest) {
selection <- i
soonest <- current
} else if (abs(current - soonest) < 1e-15) {
selection <- c(selection, i)
}
}
machines <- machines[selection]
segments <- segments[selection]
# Select the lowest segment index and then the lowest machine index
j <- min(segments)
from <- min(machines[segments == j])
to <- solution$destination[from, j]
starting <- max(ready[from, j], sending[from], receiving[to] - alpha)
# Performs the transfer and reduction
sending[from] <- starting + beta
receiving[to] <- starting + alpha + beta
reducing[to] <- max(reducing[to], receiving[to]) + gamma
ready[to, j] <- reducing[to]
solution$destination[from, j] <- from
solution$rank[from, ] <- solution$rank[from, ] - 1
}
return(max(ready[1, ]))
}
Let's try the same tests:
n <- 3
m <- 2
solution1 <- list(destination = matrix(c(1, 1, 1, 3, 1, 1), nrow = n, ncol = m,
byrow = TRUE), rank = matrix(c(1, 2, 1, 2, 1, 2), nrow = n, ncol = m, byrow = TRUE))
solution2 <- list(destination = matrix(c(1, 1, 1, 1, 1, 1), nrow = n, ncol = m,
byrow = TRUE), rank = matrix(c(1, 2, 1, 2, 1, 2), nrow = n, ncol = m, byrow = TRUE))
c(evaluate(solution1, 0.1, 1, 0.3), 2 * 0.1 + 3 * 1 + 2 * 0.3)
## [1] 3.8 3.8
c(evaluate(solution1, 0.1, 1, 1.3), 2 * 0.1 + 3 * 1 + 2 * 1.3)
## [1] 5.8 5.8
c(evaluate(solution2, 1.1, 1, 0.3), 1.1 + 4 * max(1, 0.3) + min(1, 0.3))
## [1] 5.4 5.4
c(evaluate(solution2, 1.1, 1, 1.3), 1.1 + 4 * max(1, 1.3) + min(1, 1.3))
## [1] 7.3 7.3
Seems OK.
The permutation code also needs to be adapted (mostly the second part). Also, the permutation should be checked on the segments.
find_permutation <- function(sol1, sol2, on_segment = FALSE, permutation = NULL) {
n <- nrow(sol1$destination)
if (on_segment)
n <- ncol(sol1$destination)
if (length(permutation) == n)
return(test_permutation(sol1, sol2, permutation, on_segment))
permutation <- c(permutation, NA)
for (i in 1:n) {
if (i %in% permutation)
next
permutation[length(permutation)] <- i
if (find_permutation(sol1, sol2, on_segment, permutation))
return(TRUE)
}
return(FALSE)
}
test_permutation <- function(sol1, sol2, permutation, on_segment = FALSE) {
n <- nrow(sol1$destination)
m <- ncol(sol1$destination)
for (i in 2:n) for (j in 1:m) if (!on_segment) {
if (permutation[sol1$destination[i, j]] != sol2$destination[permutation[i],
j] || sol1$rank[i, j] != sol2$rank[permutation[i], j])
return(FALSE)
} else {
if (sol1$destination[i, j] != sol2$destination[i, permutation[j]] ||
sol1$rank[i, j] != sol2$rank[i, permutation[j]])
return(FALSE)
}
return(TRUE)
}
Let's test this:
solution1_permut <- list(destination = matrix(c(1, 1, 1, 1, 1, 2), nrow = n,
ncol = m, byrow = TRUE), rank = matrix(c(1, 2, 1, 2, 1, 2), nrow = n, ncol = m,
byrow = TRUE))
test_permutation(solution1, solution1_permut, c(1, 3, 2))
## [1] TRUE
test_permutation(solution1, solution1_permut, c(1, 2, 3))
## [1] FALSE
test_permutation(solution1, solution2, c(1, 3, 2))
## [1] FALSE
Now, let's try each possible combination:
merge_solutions <- function(solutions1, solutions2) {
solutions1$nb <- solutions1$nb + solutions2$nb
if (is.infinite(solutions2$mks))
return(solutions1)
solutions1$valid <- solutions1$valid + solutions2$valid
if (abs(solutions2$mks - solutions1$mks) < 1e-15) {
solutions1$min <- solutions1$min + solutions2$min
for (sol2 in solutions2$sols) {
present <- FALSE
for (sol1 in solutions1$sols) if (find_permutation(sol1, sol2) ||
find_permutation(sol1, sol2, TRUE))
present <- TRUE
if (!present)
solutions1$sols <- c(solutions1$sols, list(sol2))
}
} else if (solutions2$mks < solutions1$mks) {
solutions1$mks <- solutions2$mks
solutions1$min <- solutions2$min
solutions1$sols <- solutions2$sols
}
return(solutions1)
}
find_min <- function(n, m, alpha, beta, gamma, solution = NULL, i = 2, j = 1) {
# In case of first call to the function (when the solution is null)
if (is.null(solution)) {
solution <- list(destination = NULL, rank = matrix(0, nrow = n, ncol = m))
return(find_min(n, m, alpha, beta, gamma, solution, 2, 1))
}
# In case anything has to be done on the first row
if (i == 1)
return(find_min(n, m, alpha, beta, gamma, solution, 2, j))
# In case the orders are being generated
if (is.null(solution$destination)) {
# In case the orders has just finished being generated
if (j == m + 1) {
solution$destination <- matrix(1, nrow = n, ncol = m)
return(find_min(n, m, alpha, beta, gamma, solution, 2, 1))
}
# Select a new rank for a given segment
result <- list(mks = Inf, nb = 0, valid = 0, min = 0, sols = list())
for (k in 1:m) {
if (k %in% solution$rank[i, ])
next
solution$rank[i, j] <- k
solutions <- find_min(n, m, alpha, beta, gamma, solution, i%%n +
1, j + (i == n))
result <- merge_solutions(result, solutions)
}
return(result)
} else {
# In case a complete solution has just been built
if (j == m + 1) {
makespan <- evaluate(solution, alpha, beta, gamma)
return(list(mks = makespan, nb = 1, valid = 1 * is.finite(makespan),
min = 1, sols = list(solution)))
}
# Select a new destination for a given segment
result <- list(mks = Inf, nb = 0, valid = 0, min = 0, sols = list())
for (k in 1:n) {
if (k == i)
next
solution$destination[i, j] = k
# Optimization: test partial solution with only a few first segments if (i
# == n) { partial_solution <-
# list(destination=matrix(solution$destination[1:(n*j)], nrow = n, ncol =
# j), rank=matrix(solution$rank[1:(n*j)], nrow = n, ncol = j)) for (ii in
# 2:n) partial_solution$rank[ii,] <- rank(partial_solution$rank[ii,])
# partial_mks <- evaluate(partial_solution, alpha, beta, gamma) if
# (partial_mks > result$mks || is.infinite(partial_mks)) next }
solutions <- find_min(n, m, alpha, beta, gamma, solution, i%%n +
1, j + (i == n))
result <- merge_solutions(result, solutions)
}
return(result)
}
}
Optimizing by cutting part of the search (checking invalid partial solutions) seems to have marginal effects. Also, the number of minimum values found is not the same, which suggests some bug in the optimization.
plot_schedule <- function(solution, alpha, beta, gamma) {
n <- nrow(solution$destination)
m <- ncol(solution$destination)
makespan <- evaluate(solution, alpha, beta, gamma)
plot(c(0, makespan), c(0, -n), type = "n")
# Arrays to keep the next time a processor is ready for reduction, for
# sending and for receiving
reducing <- rep(0, n)
sending <- rep(0, n)
receiving <- rep(0, n)
ready <- matrix(0, nrow = n, ncol = m)
for (k in 1:(m * (n - 1))) {
# Select all the ready processors
leaf <- sapply(1:m, function(j) return(!sapply(1:n, function(i) return(i %in%
solution$destination[, j]))))
if (!any(leaf))
return(Inf)
# Select the segments which it is the turn
tmp <- which(leaf)[which(solution$rank[leaf] == 1)]
machines <- (tmp - 1)%%n + 1
segments <- (tmp - 1)%/%n + 1
if (length(machines) == 0)
return(Inf)
# Keep the transfers that can start the soonest
soonest <- Inf
for (i in 1:length(machines)) {
current <- max(ready[machines[i], segments[i]], sending[machines[i]],
receiving[solution$destination[machines[i], segments[i]]] -
alpha)
if (current < soonest) {
selection <- i
soonest <- current
} else if (abs(current - soonest) < 1e-15) {
selection <- c(selection, i)
}
}
machines <- machines[selection]
segments <- segments[selection]
# Select the lowest segment index and then the lowest machine index
j <- min(segments)
from <- min(machines[segments == j])
to <- solution$destination[from, j]
starting <- max(ready[from, j], sending[from], receiving[to] - alpha)
# Performs the transfer and reduction
sending[from] <- starting + beta
receiving[to] <- starting + alpha + beta
reducing[to] <- max(reducing[to], receiving[to]) + gamma
ready[to, j] <- reducing[to]
solution$destination[from, j] <- from
solution$rank[from, ] <- solution$rank[from, ] - 1
# Plot transfer
rect(sending[from] - beta, -(from - 1 + 2/3), sending[from], -(from -
1 + 3/3), col = colors()[j + from * n])
text(sending[from] - beta/2, -(from - 1 + 5/6), j)
rect(receiving[to] - beta, -(to - 1 + 1/3), receiving[to], -(to - 1 +
2/3), col = colors()[j + from * n])
text(receiving[to] - beta/2, -(to - 1 + 3/6), j)
rect(reducing[to] - gamma, -(to - 1 + 0/3), reducing[to], -(to - 1 +
1/3), col = colors()[j + from * n])
text(reducing[to] - gamma/2, -(to - 1 + 1/6), j)
}
}
plot_schedule(solution1, 0.1, 1, 0.3)
sols <- find_min(3, 3, 0.1, 1, 0.3)
plot_schedule(sols$sols[[1]], 0.1, 1, 0.3)
Now, let's see how many solutions are optimal (if there are many of them, an optimal polynomial algorithm may exist).
n <- 7
m <- 8
makespans <- matrix(nrow = n, ncol = m)
proportions <- matrix(nrow = n, ncol = m)
for (i in 3:n) for (j in 1:m) {
if (i == 3 && j > 4 || i == 4 && j > 3 || i == 5 && j > 2 || i > 5 && j >
1)
next
solutions <- find_min(i, j, 1, 1, 1)
makespans[i, j] <- solutions$mks
proportions[i, j] <- solutions$min/solutions$valid
}
proportions
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] NA NA NA NA NA NA NA NA
## [2,] NA NA NA NA NA NA NA NA
## [3,] 0.33333 0.235294 0.10791 0.02648 NA NA NA NA
## [4,] 0.06250 0.104821 0.01035 NA NA NA NA NA
## [5,] 0.10400 0.002056 NA NA NA NA NA NA
## [6,] 0.01543 NA NA NA NA NA NA NA
## [7,] 0.03748 NA NA NA NA NA NA NA
makespans
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] NA NA NA NA NA NA NA NA
## [2,] NA NA NA NA NA NA NA NA
## [3,] 4 6 7 8 NA NA NA NA
## [4,] 5 7 8 NA NA NA NA NA
## [5,] 6 7 NA NA NA NA NA NA
## [6,] 6 NA NA NA NA NA NA NA
## [7,] 7 NA NA NA NA NA NA NA
As there are many optimal solutions, a random algorithm (random destination and order) could be quite effective. Also, a greedy one or a dynamic one may well be close to the optimal.
As there are too many optimal solutions, we will focus on the most frequent ones, expecting they will have specific and interesting structures.
From the 8 combinations for 0/1 for alpha/beta/gamma, some solutions could be optimal in several situations: which situations are the closest (ie, how many optimal solutions from one situation are also optimal for the other)? Then, we will select the solutions that are optimal the most frequently.
For this, a first tentative is with 3 machines and 4 segments (2 hours). This could later be tested with 4 machines and 3 segments (estimated as 1 day of computation).
solutions3_4_0_0_1 <- find_min(3, 4, 0, 0, 1)
solutions3_4_0_1_0 <- find_min(3, 4, 0, 1, 0)
solutions3_4_0_1_1 <- find_min(3, 4, 0, 1, 1)
solutions3_4_1_0_0 <- find_min(3, 4, 1, 0, 0)
solutions3_4_1_0_1 <- find_min(3, 4, 1, 0, 1)
solutions3_4_1_1_0 <- find_min(3, 4, 1, 1, 0)
solutions3_4_1_1_1 <- find_min(3, 4, 1, 1, 1)
Let's start by comparing the similarities between each set of optimal solutions:
common_solutions <- function(solutions1, solutions2) {
common <- 0
for (sol1 in solutions1) for (sol2 in solutions2) if (find_permutation(sol1,
sol2) || find_permutation(sol1, sol2, TRUE)) {
common <- common + 1
break
}
return(c(common, length(solutions1), length(solutions2)))
}
# Full similarity
common_solutions(solutions3_4_0_1_1$sols, solutions3_4_1_1_0$sols)
## [1] 2 2 2
# All solutions from the second set appear in the first set
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_0_1_0$sols)
## [1] 29 365 29
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_0_1_1$sols)
## [1] 2 365 2
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_1_0_1$sols)
## [1] 45 365 45
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_1_1_0$sols)
## [1] 2 365 2
common_solutions(solutions3_4_0_1_0$sols, solutions3_4_0_1_1$sols)
## [1] 2 29 2
common_solutions(solutions3_4_0_1_0$sols, solutions3_4_1_1_0$sols)
## [1] 2 29 2
common_solutions(solutions3_4_1_0_1$sols, solutions3_4_0_1_1$sols)
## [1] 2 45 2
common_solutions(solutions3_4_1_0_1$sols, solutions3_4_1_1_0$sols)
## [1] 2 45 2
common_solutions(solutions3_4_1_1_1$sols, solutions3_4_0_1_1$sols)
## [1] 2 42 2
common_solutions(solutions3_4_1_1_1$sols, solutions3_4_1_1_0$sols)
## [1] 2 42 2
# Some similarities
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_1_1_1$sols)
## [1] 11 365 42
common_solutions(solutions3_4_0_1_0$sols, solutions3_4_1_0_1$sols)
## [1] 16 29 45
common_solutions(solutions3_4_0_1_0$sols, solutions3_4_1_1_1$sols)
## [1] 4 29 42
common_solutions(solutions3_4_1_0_1$sols, solutions3_4_1_1_1$sols)
## [1] 11 45 42
# No similarity
common_solutions(solutions3_4_0_0_1$sols, solutions3_4_1_0_0$sols)
## [1] 0 365 24
common_solutions(solutions3_4_0_1_0$sols, solutions3_4_1_0_0$sols)
## [1] 0 29 24
common_solutions(solutions3_4_0_1_1$sols, solutions3_4_1_0_0$sols)
## [1] 0 2 24
common_solutions(solutions3_4_1_0_0$sols, solutions3_4_1_0_1$sols)
## [1] 0 24 45
common_solutions(solutions3_4_1_0_0$sols, solutions3_4_1_1_0$sols)
## [1] 0 24 2
common_solutions(solutions3_4_1_0_0$sols, solutions3_4_1_1_1$sols)
## [1] 0 24 42
Solutions optimal for (1, 1, 0) and for (0, 1, 1) are identical and those solutions are optimal for (0, 0, 1), (0, 1, 0), (1, 0, 1) and (1, 1, 1), but they are not for (1, 0, 0).
The symmetry between computation cost and latency could probably be proved. It would be nice if the problem could be reduced to only two parameters. Additionally, the first two situations, which have the same optimal values, have only two optimal solutions, which is simpler to analyse.
Some questions to validate the restrictions:
solutions3_4_0_10_1 <- find_min(3, 4, 0, 10, 1)
solutions3_4_0_1_10 <- find_min(3, 4, 0, 1, 10)
solutions3_4_10_1_0 <- find_min(3, 4, 10, 1, 0)
solutions3_4_1_10_0 <- find_min(3, 4, 1, 10, 0)
common_solutions(solutions3_4_0_1_1$sols, solutions3_4_0_1_10$sols)
## [1] 2 2 55
common_solutions(solutions3_4_0_1_1$sols, solutions3_4_0_10_1$sols)
## [1] 2 2 2
common_solutions(solutions3_4_1_1_0$sols, solutions3_4_10_1_0$sols)
## [1] 0 2 24
common_solutions(solutions3_4_1_1_0$sols, solutions3_4_1_10_0$sols)
## [1] 2 2 2
The 2 optimal solutions remains the same except when the latency has a greater cost than the bandwidth. Now, let's determine this limit more precisely:
evaluate(solutions3_4_10_1_0$sols[[1]], 4, 1, 0)
## [1] 12
evaluate(solutions3_4_1_1_0$sols[[1]], 4, 1, 0)
## [1] 12
evaluate(solutions3_4_10_1_0$sols[[1]], 4.1, 1, 0)
## [1] 12.1
evaluate(solutions3_4_1_1_0$sols[[1]], 4.1, 1, 0)
## [1] 12.2
This limit may be related to the number of segments and the communication cost.
evaluate(solutions3_4_10_1_0$sols[[1]], 3.1, 1, 1)
## [1] 12.1
evaluate(solutions3_4_1_1_0$sols[[1]], 3.1, 1, 1)
## [1] 12.2
evaluate(solutions3_4_10_1_0$sols[[1]], 3, 1, 1.1)
## [1] 12.8
evaluate(solutions3_4_1_1_0$sols[[1]], 3, 1, 1.1)
## [1] 12.4
A part of the cost can be transferred to the computation up to some threshold.
evaluate(solutions3_4_10_1_0$sols[[1]], 10, 1, 2.7)
## [1] 32.6
evaluate(solutions3_4_1_1_0$sols[[1]], 10, 1, 2.7)
## [1] 32.8
evaluate(solutions3_4_10_1_0$sols[[1]], 10, 1, 2.8)
## [1] 33.4
evaluate(solutions3_4_1_1_0$sols[[1]], 10, 1, 2.8)
## [1] 33.2
If the latency is significantly larger the previous limit changes with an unclear pattern.
Let's study the specific optimal solutions:
plot_schedule(solutions3_4_1_1_0$sols[[1]], 1, 1, 0)
plot_schedule(solutions3_4_1_1_0$sols[[2]], 1, 1, 0)
plot_schedule(solutions3_4_10_1_0$sols[[1]], 1, 1, 0)
The first two are optimal most of the time except when the latency is large enough (it is then the last one which is optimal).
The first two send the first segment to the root and then pipeline the other segments by exploiting bidirectional links (two pipelines). Interestingly the schedules are optimal for 1 segment, 2 segments and also 3 segments. This suggests that optimal schedules could be built greedily. In the last schedule, all segments are sent direclty to the root.
Future investigations:
((n-1)*m)^(n-1)
sapply(1:m, function(j) return(sapply(1:n, function(i) return(suppressWarnings(as.integer(((i -
1) * j)^(i - 1)))))))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 1 1 1 1 1 1
## [2,] 1 2 3 4 5 6 7
## [3,] 4 16 36 64 100 144 196
## [4,] 27 216 729 1728 3375 5832 9261
## [5,] 256 4096 20736 65536 160000 331776 614656
## [6,] 3125 100000 759375 3200000 9765625 24300000 52521875
## [7,] 46656 2985984 34012224 191102976 729000000 NA NA
## [,8]
## [1,] 1
## [2,] 8
## [3,] 256
## [4,] 13824
## [5,] 1048576
## [6,] 102400000
## [7,] NA
This last idea would allow to study 4 machines with any number of segments, 5 machines with 5 segments, and 6 machines with 2 or 3 segments.
Algorithm ideas: