Merge of results with several distributions

The objective is twofold: use more distributions and merge the figures into two figures. The other distributions to include are: Poisson, uniform, Weibull, geometric and triangular. The merge could be done by doing a color map of the variation between all figures (for both the overlapping and the non-commutative figures).

New distributions

Let's start with the Poisson:

rpois_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    lambda <- sigma^2
    m <- mean - lambda
    return(rpois(n, lambda) + m)
}
mean(rpois_cost(10000, 10, 1))

## [1] 9.99

sqrt(var(rpois_cost(10000, 10, 1)))

## [1] 0.9942

rpois_cost(10, 10, 1)

##  [1]  9  9 10  9 10  9 11 10  9  9

Let's be careful as not to use cov lower than 1 with the distribution. Now the uniform one:

runif_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    min <- mean - sigma * sqrt(12)/2
    max <- mean + sigma * sqrt(12)/2
    return(runif(n, min, max))
}
mean(runif_cost(10000, 10, 1))

## [1] 9.986

sqrt(var(runif_cost(10000, 10, 1)))

## [1] 1.001

2/sqrt(12)

## [1] 0.5774

Again, cov must be lower than 2/sqrt(12). The Weibull case is hard as the gamma function is involved. Let's switch to the geometric one.

rgeom_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    prob <- (sqrt(4 * sigma^2 + 1) - 1)/(2 * sigma^2)
    m <- mean - 1/prob
    return(rgeom(n, prob) + m)
}
mean(rgeom_cost(10000, 10, 1))

## [1] 9.001

sqrt(var(rgeom_cost(10000, 10, 1)))

## [1] 1.013

rgeom_cost(10, 1, 0.1)

##  [1] -0.009902 -0.009902 -0.009902 -0.009902 -0.009902 -0.009902 -0.009902
##  [8] -0.009902 -0.009902 -0.009902

Unfortunately, with unitary mean, most values are negatives. Hence, we discard this one.

library(triangle)
rtriangle_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    a <- mean - sqrt(6) * sigma
    b <- mean + sqrt(6) * sigma
    return(rtriangle(n, a, b))
}
mean(rtriangle_cost(10000, 10, 1))

## [1] 9.995

sqrt(var(rtriangle_cost(10000, 10, 1)))

## [1] 0.9995

1/sqrt(6)

## [1] 0.4082

As for the uniform case, the cov must lower than 1/sqrt(6).

To summarize, we have the Poisson, the uniform and the triangular that are usable with cov lower than 1, 0.5 and 0.4. We discard the Weibull and geometric distributions for simplicity.

Data merge

Let's prepare some data:

n <- 64
iterations <- 1000
nb_point <- 31  # odd because of symmetry
V_over_gamma <- get_all_makespans_over(iterations, n, rgamma_cost, nb_point)
V_over_bern <- get_all_makespans_over(iterations, n, rbern_cost, nb_point)
V_over_bino <- get_all_makespans_over(iterations, n, rbino_cost, nb_point)
V_over_exp <- get_all_makespans_over(iterations, n, rexp_cost, nb_point)
V_over_pois <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rpois_cost(n, 
    mean, sigma)), nb_point, cov_max = 1)
V_over_unif <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(runif_cost(n, 
    mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_over_triangle <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rtriangle_cost(n, 
    mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_over_beta0_01 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_over_beta1 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 1)), nb_point, cov_max = 0.999)
V_over_beta100 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 100)), nb_point, cov_max = 0.0999)

Let's try a heat map by merging data:

V_over_gamma_m <- V_over_gamma[, 1:3]
colnames(V_over_gamma_m)[3] <- "gamma"
V_over_m <- V_over_gamma_m
V_over_bern_m <- V_over_bern[, 1:3]
colnames(V_over_bern_m)[3] <- "bern"
V_over_m <- merge(V_over_m, V_over_bern_m, by = c("cd", "cov"), all = TRUE)
V_over_bino_m <- V_over_bino[, 1:3]
colnames(V_over_bino_m)[3] <- "bino"
V_over_m <- merge(V_over_m, V_over_bino_m, by = c("cd", "cov"), all = TRUE)
V_over_exp_m <- V_over_exp[, 1:3]
colnames(V_over_exp_m)[3] <- "exp"
V_over_m <- merge(V_over_m, V_over_exp_m, by = c("cd", "cov"), all = TRUE)
V_over_pois_m <- V_over_pois[, 1:3]
colnames(V_over_pois_m)[3] <- "pois"
V_over_m <- merge(V_over_m, V_over_pois_m, by = c("cd", "cov"), all = TRUE)
V_over_unif_m <- V_over_unif[, 1:3]
colnames(V_over_unif_m)[3] <- "unif"
V_over_m <- merge(V_over_m, V_over_unif_m, by = c("cd", "cov"), all = TRUE)
V_over_triangle_m <- V_over_triangle[, 1:3]
colnames(V_over_triangle_m)[3] <- "triangle"
V_over_m <- merge(V_over_m, V_over_triangle_m, by = c("cd", "cov"), all = TRUE)
V_over_beta0_01_m <- V_over_beta0_01[, 1:3]
colnames(V_over_beta0_01_m)[3] <- "beta0_01"
V_over_m <- merge(V_over_m, V_over_beta0_01_m, by = c("cd", "cov"), all = TRUE)
V_over_beta1_m <- V_over_beta1[, 1:3]
colnames(V_over_beta1_m)[3] <- "beta1"
V_over_m <- merge(V_over_m, V_over_beta1_m, by = c("cd", "cov"), all = TRUE)
V_over_beta100_m <- V_over_beta100[, 1:3]
colnames(V_over_beta100_m)[3] <- "beta100"
V_over_m <- merge(V_over_m, V_over_beta100_m, by = c("cd", "cov"), all = TRUE)

var_over <- NULL
for (i in 1:nrow(V_over_m)) {
    perfs <- as.numeric(V_over_m[i, -c(1:2)])
    perfs <- perfs[!is.na(perfs)]
    range_perfs <- max(perfs) - min(perfs)
    var_over <- c(var_over, range_perfs/max(abs(perfs - 1)))
    # range_perfs <- max(perfs)/min(perfs) var_over <- c(var_over,
    # (range_perfs-1)/max(c(perfs, 1/perfs)-1)) var_over <- c(var_over,
    # sqrt(var(perfs))) var_over <- c(var_over, range_perfs) var_over <-
    # c(var_over, range_perfs/abs(mean(perfs)-1))
}
V_over_m <- cbind(V_over_m, perf = var_over)

Let's plot it:

library(ggplot2)
library(plyr)  # For rounding values
library(scales)  # For darker lines and comma formatter
library(directlabels)

## Loading required package: grid

## Loading required package: quadprog


contour_break <- c(0.5, 1, 1.5, 2)
p <- ggplot(V_over_m, aes(cov, cd, fill = perf, z = perf))
p <- p + geom_tile()
p <- p + scale_x_log10(breaks = unique(10^round_any(log10(V_over_m$cov), 1)), 
    labels = comma)
p <- p + scale_y_log10(breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10), labels = percent)
p <- p + stat_contour(aes(color = ..level..), color = alpha("black", 1/2), breaks = contour_break)
p <- p + labs(x = "Coefficient of variation", y = "Proportion of computation costs")
p <- p + scale_fill_continuous("Difference ratio", low = "white", high = "red", 
    breaks = seq(0, 2, 0.5))
p <- p + theme_bw()
p <- p + theme(legend.position = "bottom")
p <- p + annotation_logticks()
p <- p + scale_color_continuous(low = "#000000", high = "#000000")
direct.label(p)

## Loading required package: proto

plot of chunk data-overlap-merge-plot

The ratio could be computed in two ways that provide two extremely close figures. The most easy method to explain may be the less relevant, but it has only a marginal impact as the ratio are all close to one. Combining a maximum difference with a relative measure is purposely a worst-case value because other metrics such as the standard deviation would have hidden the differences between all distributions as those are low (results are quite consistent).

There are mostly three areas where there is divergence: cov ~ 0.1 and 30% of computation; cov ~ 1 with perfect overlapping; and, cov greater than 5. The first two areas are related to the proximity between both methods, hence a slight quantitative performance difference is amplified by the relative metric that is used (those transitions between both methods occur for the same set of parameters however). The last area essentially due to the exponential method with which the improvement of the dynamic method is stronger than with other distributions.

However, as the value is always lower than one, all distributions give consistent conclusion about the best method.

Now, for the non-commutative case:

n <- 64
iterations <- 1000
nb_point <- 31  # odd because of symmetry
V_non_comm_gamma <- get_all_makespans_non_comm(iterations, n, rgamma_cost, nb_point)
V_non_comm_bern <- get_all_makespans_non_comm(iterations, n, rbern_cost, nb_point)
V_non_comm_bino <- get_all_makespans_non_comm(iterations, n, rbino_cost, nb_point)
V_non_comm_exp <- get_all_makespans_non_comm(iterations, n, rexp_cost, nb_point)
V_non_comm_pois <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(rpois_cost(n, mean, sigma)), nb_point, cov_max = 1)
V_non_comm_unif <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(runif_cost(n, mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_non_comm_triangle <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rtriangle_cost(n, mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_non_comm_beta0_01 <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rbeta_cost(n, mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_non_comm_beta1 <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(rbeta_cost(n, mean, sigma, 1)), nb_point, cov_max = 0.999)
V_non_comm_beta100 <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rbeta_cost(n, mean, sigma, 100)), nb_point, cov_max = 0.0999)

Let's try a heat map by merging data:

V_non_comm_gamma_m <- V_non_comm_gamma[, 1:3]
colnames(V_non_comm_gamma_m)[3] <- "gamma"
V_non_comm_m <- V_non_comm_gamma_m
V_non_comm_bern_m <- V_non_comm_bern[, 1:3]
colnames(V_non_comm_bern_m)[3] <- "bern"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bern_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_bino_m <- V_non_comm_bino[, 1:3]
colnames(V_non_comm_bino_m)[3] <- "bino"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bino_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_exp_m <- V_non_comm_exp[, 1:3]
colnames(V_non_comm_exp_m)[3] <- "exp"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_exp_m, by = c("cd", "cov"), all = TRUE)
V_non_comm_pois_m <- V_non_comm_pois[, 1:3]
colnames(V_non_comm_pois_m)[3] <- "pois"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_pois_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_unif_m <- V_non_comm_unif[, 1:3]
colnames(V_non_comm_unif_m)[3] <- "unif"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_unif_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_triangle_m <- V_non_comm_triangle[, 1:3]
colnames(V_non_comm_triangle_m)[3] <- "triangle"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_triangle_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta0_01_m <- V_non_comm_beta0_01[, 1:3]
colnames(V_non_comm_beta0_01_m)[3] <- "beta0_01"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta0_01_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta1_m <- V_non_comm_beta1[, 1:3]
colnames(V_non_comm_beta1_m)[3] <- "beta1"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta1_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta100_m <- V_non_comm_beta100[, 1:3]
colnames(V_non_comm_beta100_m)[3] <- "beta100"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta100_m, by = c("cd", "cov"), 
    all = TRUE)

var_non_comm <- NULL
for (i in 1:nrow(V_non_comm_m)) {
    perfs <- sapply(V_non_comm_m[i, -(1:2)], toString)
    perfs <- perfs[perfs != "NA"]
    var_non_comm <- c(var_non_comm, 1 - max(as.vector(table(perfs)))/sum(as.vector(table(perfs))))
}
V_non_comm_m <- cbind(V_non_comm_m, perf = var_non_comm)

Let's plot it:

library(ggplot2)
library(plyr)  # For rounding values
library(scales)  # For darker lines and comma formatter
library(directlabels)

contour_break <- seq(0.1, 1, 0.2)
p <- ggplot(V_non_comm_m, aes(cov, cd, fill = perf, z = perf))
p <- p + geom_tile()
p <- p + scale_x_log10(breaks = unique(10^round_any(log10(V_non_comm_m$cov), 
    1)), labels = comma)
p <- p + scale_y_log10(breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10), labels = percent)
p <- p + stat_contour(aes(color = ..level..), color = alpha("black", 1/2), breaks = contour_break)
p <- p + labs(x = "Coefficient of variation", y = "Proportion of computation costs")
p <- p + scale_fill_continuous("Difference proportion", low = "white", high = "red", 
    breaks = contour_break)
p <- p + theme_bw()
p <- p + theme(legend.position = "bottom")
p <- p + annotation_logticks()
p <- p + scale_color_continuous(low = "#000000", high = "#000000")
direct.label(p)

plot of chunk data-non-comm-merge-plot

As expected, the difference occurs at the frontiers.

Conclusion

Relaunching the XP with 10 distinct distributions (only 4 of them go up to cov=10) provide both similar quantitative and qualitative results.