The objective is twofold: use more distributions and merge the figures into two figures. The other distributions to include are: Poisson, uniform, Weibull, geometric and triangular. The merge could be done by doing a color map of the variation between all figures (for both the overlapping and the non-commutative figures).
Let's start with the Poisson:
rpois_cost <- function(n, mean, sigma) {
if (mean == 0 || sigma == 0)
return(rep(mean, n))
lambda <- sigma^2
m <- mean - lambda
return(rpois(n, lambda) + m)
}
mean(rpois_cost(10000, 10, 1))
## [1] 9.99
sqrt(var(rpois_cost(10000, 10, 1)))
## [1] 0.9942
rpois_cost(10, 10, 1)
## [1] 9 9 10 9 10 9 11 10 9 9
Let's be careful as not to use cov lower than 1 with the distribution. Now the uniform one:
runif_cost <- function(n, mean, sigma) {
if (mean == 0 || sigma == 0)
return(rep(mean, n))
min <- mean - sigma * sqrt(12)/2
max <- mean + sigma * sqrt(12)/2
return(runif(n, min, max))
}
mean(runif_cost(10000, 10, 1))
## [1] 9.986
sqrt(var(runif_cost(10000, 10, 1)))
## [1] 1.001
2/sqrt(12)
## [1] 0.5774
Again, cov must be lower than 2/sqrt(12). The Weibull case is hard as the gamma function is involved. Let's switch to the geometric one.
rgeom_cost <- function(n, mean, sigma) {
if (mean == 0 || sigma == 0)
return(rep(mean, n))
prob <- (sqrt(4 * sigma^2 + 1) - 1)/(2 * sigma^2)
m <- mean - 1/prob
return(rgeom(n, prob) + m)
}
mean(rgeom_cost(10000, 10, 1))
## [1] 9.001
sqrt(var(rgeom_cost(10000, 10, 1)))
## [1] 1.013
rgeom_cost(10, 1, 0.1)
## [1] -0.009902 -0.009902 -0.009902 -0.009902 -0.009902 -0.009902 -0.009902
## [8] -0.009902 -0.009902 -0.009902
Unfortunately, with unitary mean, most values are negatives. Hence, we discard this one.
library(triangle)
rtriangle_cost <- function(n, mean, sigma) {
if (mean == 0 || sigma == 0)
return(rep(mean, n))
a <- mean - sqrt(6) * sigma
b <- mean + sqrt(6) * sigma
return(rtriangle(n, a, b))
}
mean(rtriangle_cost(10000, 10, 1))
## [1] 9.995
sqrt(var(rtriangle_cost(10000, 10, 1)))
## [1] 0.9995
1/sqrt(6)
## [1] 0.4082
As for the uniform case, the cov must lower than 1/sqrt(6).
To summarize, we have the Poisson, the uniform and the triangular that are usable with cov lower than 1, 0.5 and 0.4. We discard the Weibull and geometric distributions for simplicity.
Let's prepare some data:
n <- 64
iterations <- 1000
nb_point <- 31 # odd because of symmetry
V_over_gamma <- get_all_makespans_over(iterations, n, rgamma_cost, nb_point)
V_over_bern <- get_all_makespans_over(iterations, n, rbern_cost, nb_point)
V_over_bino <- get_all_makespans_over(iterations, n, rbino_cost, nb_point)
V_over_exp <- get_all_makespans_over(iterations, n, rexp_cost, nb_point)
V_over_pois <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rpois_cost(n,
mean, sigma)), nb_point, cov_max = 1)
V_over_unif <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(runif_cost(n,
mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_over_triangle <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rtriangle_cost(n,
mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_over_beta0_01 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n,
mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_over_beta1 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n,
mean, sigma, 1)), nb_point, cov_max = 0.999)
V_over_beta100 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n,
mean, sigma, 100)), nb_point, cov_max = 0.0999)
Let's try a heat map by merging data:
V_over_gamma_m <- V_over_gamma[, 1:3]
colnames(V_over_gamma_m)[3] <- "gamma"
V_over_m <- V_over_gamma_m
V_over_bern_m <- V_over_bern[, 1:3]
colnames(V_over_bern_m)[3] <- "bern"
V_over_m <- merge(V_over_m, V_over_bern_m, by = c("cd", "cov"), all = TRUE)
V_over_bino_m <- V_over_bino[, 1:3]
colnames(V_over_bino_m)[3] <- "bino"
V_over_m <- merge(V_over_m, V_over_bino_m, by = c("cd", "cov"), all = TRUE)
V_over_exp_m <- V_over_exp[, 1:3]
colnames(V_over_exp_m)[3] <- "exp"
V_over_m <- merge(V_over_m, V_over_exp_m, by = c("cd", "cov"), all = TRUE)
V_over_pois_m <- V_over_pois[, 1:3]
colnames(V_over_pois_m)[3] <- "pois"
V_over_m <- merge(V_over_m, V_over_pois_m, by = c("cd", "cov"), all = TRUE)
V_over_unif_m <- V_over_unif[, 1:3]
colnames(V_over_unif_m)[3] <- "unif"
V_over_m <- merge(V_over_m, V_over_unif_m, by = c("cd", "cov"), all = TRUE)
V_over_triangle_m <- V_over_triangle[, 1:3]
colnames(V_over_triangle_m)[3] <- "triangle"
V_over_m <- merge(V_over_m, V_over_triangle_m, by = c("cd", "cov"), all = TRUE)
V_over_beta0_01_m <- V_over_beta0_01[, 1:3]
colnames(V_over_beta0_01_m)[3] <- "beta0_01"
V_over_m <- merge(V_over_m, V_over_beta0_01_m, by = c("cd", "cov"), all = TRUE)
V_over_beta1_m <- V_over_beta1[, 1:3]
colnames(V_over_beta1_m)[3] <- "beta1"
V_over_m <- merge(V_over_m, V_over_beta1_m, by = c("cd", "cov"), all = TRUE)
V_over_beta100_m <- V_over_beta100[, 1:3]
colnames(V_over_beta100_m)[3] <- "beta100"
V_over_m <- merge(V_over_m, V_over_beta100_m, by = c("cd", "cov"), all = TRUE)
var_over <- NULL
for (i in 1:nrow(V_over_m)) {
perfs <- as.numeric(V_over_m[i, -c(1:2)])
perfs <- perfs[!is.na(perfs)]
range_perfs <- max(perfs) - min(perfs)
var_over <- c(var_over, range_perfs/max(abs(perfs - 1)))
# range_perfs <- max(perfs)/min(perfs) var_over <- c(var_over,
# (range_perfs-1)/max(c(perfs, 1/perfs)-1)) var_over <- c(var_over,
# sqrt(var(perfs))) var_over <- c(var_over, range_perfs) var_over <-
# c(var_over, range_perfs/abs(mean(perfs)-1))
}
V_over_m <- cbind(V_over_m, perf = var_over)
Let's plot it:
library(ggplot2)
library(plyr) # For rounding values
library(scales) # For darker lines and comma formatter
library(directlabels)
## Loading required package: grid
## Loading required package: quadprog
contour_break <- c(0.5, 1, 1.5, 2)
p <- ggplot(V_over_m, aes(cov, cd, fill = perf, z = perf))
p <- p + geom_tile()
p <- p + scale_x_log10(breaks = unique(10^round_any(log10(V_over_m$cov), 1)),
labels = comma)
p <- p + scale_y_log10(breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10), labels = percent)
p <- p + stat_contour(aes(color = ..level..), color = alpha("black", 1/2), breaks = contour_break)
p <- p + labs(x = "Coefficient of variation", y = "Proportion of computation costs")
p <- p + scale_fill_continuous("Difference ratio", low = "white", high = "red",
breaks = seq(0, 2, 0.5))
p <- p + theme_bw()
p <- p + theme(legend.position = "bottom")
p <- p + annotation_logticks()
p <- p + scale_color_continuous(low = "#000000", high = "#000000")
direct.label(p)
## Loading required package: proto
The ratio could be computed in two ways that provide two extremely close figures. The most easy method to explain may be the less relevant, but it has only a marginal impact as the ratio are all close to one. Combining a maximum difference with a relative measure is purposely a worst-case value because other metrics such as the standard deviation would have hidden the differences between all distributions as those are low (results are quite consistent).
There are mostly three areas where there is divergence: cov ~ 0.1 and 30% of computation; cov ~ 1 with perfect overlapping; and, cov greater than 5. The first two areas are related to the proximity between both methods, hence a slight quantitative performance difference is amplified by the relative metric that is used (those transitions between both methods occur for the same set of parameters however). The last area essentially due to the exponential method with which the improvement of the dynamic method is stronger than with other distributions.
However, as the value is always lower than one, all distributions give consistent conclusion about the best method.
Now, for the non-commutative case:
n <- 64
iterations <- 1000
nb_point <- 31 # odd because of symmetry
V_non_comm_gamma <- get_all_makespans_non_comm(iterations, n, rgamma_cost, nb_point)
V_non_comm_bern <- get_all_makespans_non_comm(iterations, n, rbern_cost, nb_point)
V_non_comm_bino <- get_all_makespans_non_comm(iterations, n, rbino_cost, nb_point)
V_non_comm_exp <- get_all_makespans_non_comm(iterations, n, rexp_cost, nb_point)
V_non_comm_pois <- get_all_makespans_non_comm(iterations, n, function(n, mean,
sigma) return(rpois_cost(n, mean, sigma)), nb_point, cov_max = 1)
V_non_comm_unif <- get_all_makespans_non_comm(iterations, n, function(n, mean,
sigma) return(runif_cost(n, mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_non_comm_triangle <- get_all_makespans_non_comm(iterations, n, function(n,
mean, sigma) return(rtriangle_cost(n, mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_non_comm_beta0_01 <- get_all_makespans_non_comm(iterations, n, function(n,
mean, sigma) return(rbeta_cost(n, mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_non_comm_beta1 <- get_all_makespans_non_comm(iterations, n, function(n, mean,
sigma) return(rbeta_cost(n, mean, sigma, 1)), nb_point, cov_max = 0.999)
V_non_comm_beta100 <- get_all_makespans_non_comm(iterations, n, function(n,
mean, sigma) return(rbeta_cost(n, mean, sigma, 100)), nb_point, cov_max = 0.0999)
Let's try a heat map by merging data:
V_non_comm_gamma_m <- V_non_comm_gamma[, 1:3]
colnames(V_non_comm_gamma_m)[3] <- "gamma"
V_non_comm_m <- V_non_comm_gamma_m
V_non_comm_bern_m <- V_non_comm_bern[, 1:3]
colnames(V_non_comm_bern_m)[3] <- "bern"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bern_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_bino_m <- V_non_comm_bino[, 1:3]
colnames(V_non_comm_bino_m)[3] <- "bino"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bino_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_exp_m <- V_non_comm_exp[, 1:3]
colnames(V_non_comm_exp_m)[3] <- "exp"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_exp_m, by = c("cd", "cov"), all = TRUE)
V_non_comm_pois_m <- V_non_comm_pois[, 1:3]
colnames(V_non_comm_pois_m)[3] <- "pois"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_pois_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_unif_m <- V_non_comm_unif[, 1:3]
colnames(V_non_comm_unif_m)[3] <- "unif"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_unif_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_triangle_m <- V_non_comm_triangle[, 1:3]
colnames(V_non_comm_triangle_m)[3] <- "triangle"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_triangle_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_beta0_01_m <- V_non_comm_beta0_01[, 1:3]
colnames(V_non_comm_beta0_01_m)[3] <- "beta0_01"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta0_01_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_beta1_m <- V_non_comm_beta1[, 1:3]
colnames(V_non_comm_beta1_m)[3] <- "beta1"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta1_m, by = c("cd", "cov"),
all = TRUE)
V_non_comm_beta100_m <- V_non_comm_beta100[, 1:3]
colnames(V_non_comm_beta100_m)[3] <- "beta100"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta100_m, by = c("cd", "cov"),
all = TRUE)
var_non_comm <- NULL
for (i in 1:nrow(V_non_comm_m)) {
perfs <- sapply(V_non_comm_m[i, -(1:2)], toString)
perfs <- perfs[perfs != "NA"]
var_non_comm <- c(var_non_comm, 1 - max(as.vector(table(perfs)))/sum(as.vector(table(perfs))))
}
V_non_comm_m <- cbind(V_non_comm_m, perf = var_non_comm)
Let's plot it:
library(ggplot2)
library(plyr) # For rounding values
library(scales) # For darker lines and comma formatter
library(directlabels)
contour_break <- seq(0.1, 1, 0.2)
p <- ggplot(V_non_comm_m, aes(cov, cd, fill = perf, z = perf))
p <- p + geom_tile()
p <- p + scale_x_log10(breaks = unique(10^round_any(log10(V_non_comm_m$cov),
1)), labels = comma)
p <- p + scale_y_log10(breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10), labels = percent)
p <- p + stat_contour(aes(color = ..level..), color = alpha("black", 1/2), breaks = contour_break)
p <- p + labs(x = "Coefficient of variation", y = "Proportion of computation costs")
p <- p + scale_fill_continuous("Difference proportion", low = "white", high = "red",
breaks = contour_break)
p <- p + theme_bw()
p <- p + theme(legend.position = "bottom")
p <- p + annotation_logticks()
p <- p + scale_color_continuous(low = "#000000", high = "#000000")
direct.label(p)
As expected, the difference occurs at the frontiers.
Relaunching the XP with 10 distinct distributions (only 4 of them go up to cov=10) provide both similar quantitative and qualitative results.