We will study the heterogeneous properties of small matrices generated with the range-based and CVB methods. To have a stronger confidence, we will re-implement those methods.
range_based <- function(n, m, Rtask, Rmach, a = 0, b = 0) {
cost <- matrix(nrow = n, ncol = m)
for (i in 1:n) {
v <- runif(1, min = 1, max = Rtask)
for (j in 1:m) cost[i, j] <- v * runif(1, min = 1, max = Rmach)
}
for (i in 1:as.integer(a * n)) cost[i, 1:as.integer(b * m)] <- cost[i, order(cost[i,
1:as.integer(b * m)])]
cost
}
CVB <- function(n, m, Vtask, Vmach, mutask, a = 0, b = 0) {
cost <- matrix(nrow = n, ncol = m)
alphatask <- 1/Vtask^2
alphamach <- 1/Vmach^2
betatask <- mutask/alphatask
for (i in 1:n) {
q <- rgamma(1, shape = alphatask, scale = betatask)
betamach <- q/alphamach
for (j in 1:m) cost[i, j] <- rgamma(1, shape = alphamach, scale = betamach)
}
for (i in 1:as.integer(a * n)) cost[i, 1:as.integer(b * m)] <- cost[i, order(cost[i,
1:as.integer(b * m)])]
cost
}
Let's test them with some known outcomes (n=m=1000, Rtask=Rmach=1e6, Vtask=Vmach=0.1, a=b=1):
rb <- range_based(n = 1000, m = 1000, Rtask = 1e+06, Rmach = 1e+06, a = 1, b = 1)
CV_mean_row(rb)
## [1] 0.5931
CV_mean_col(rb)
## [1] 0.577
cv <- CVB(n = 1000, m = 1000, Vtask = 0.1, Vmach = 0.1, mutask = 10, a = 1,
b = 1)
CV_mean_row(cv)
## [1] 0.1013
CV_mean_col(cv)
## [1] 0.09998
OK, this works and this is symmetric. Let's test with no reordering:
rb <- range_based(n = 1000, m = 1000, Rtask = 1e+06, Rmach = 1e+06, a = 0, b = 0)
CV_mean_row(rb)
## [1] 0.5788
CV_mean_col(rb)
## [1] 0.02065
cv <- CVB(n = 1000, m = 1000, Vtask = 0.1, Vmach = 0.1, mutask = 10, a = 0,
b = 0)
CV_mean_row(cv)
## [1] 0.09896
CV_mean_col(cv)
## [1] 0.003181
As expected, the machine heterogeneity is zero.
Let's generate the heterogeneity figure of previous studies with only MC for the range-based:
literature_range <- read.table("literature_range.txt", na.strings = "", col.names = c("task_range",
"mach_range", "task_hetero", "mach_hetero", "row_consis", "col_consis"))
result <- NULL
for (i in 1:nrow(literature_range)) {
suppressWarnings(param <- sapply(literature_range[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
mat <- range_based(1000, 1000, param["task_range"], param["mach_range"],
param["row_consis"], param["col_consis"])
result <- rbind(result, data.frame(CV_mean_row = CV_mean_row(mat), CV_mean_col = CV_mean_col(mat),
mean_CV_col = mean_CV_col(mat), mean_CV_row = mean_CV_row(mat)))
}
literature_range <- cbind(literature_range, result)
Let's plot these values:
library(ggplot2)
p <- ggplot(data = literature_range, aes(x = CV_mean_row, y = CV_mean_col, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
Now with the CVB method :
literature_CV <- read.table("literature_CV.txt", na.strings = "", col.names = c("task_range",
"mach_range", "task_hetero", "mach_hetero", "row_consis", "col_consis",
"mean"))
The following computation should take around 20 minutes.
result <- NULL
for (i in 1:nrow(literature_CV)) {
suppressWarnings(param <- sapply(literature_CV[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["mean"]))
param["mean"] <- 1
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
mat <- CVB(1000, 1000, param["task_range"], param["mach_range"], param["mean"],
param["row_consis"], param["col_consis"])
result <- rbind(result, data.frame(CV_mean_row = CV_mean_row(mat), CV_mean_col = CV_mean_col(mat),
mean_CV_col = mean_CV_col(mat), mean_CV_row = mean_CV_row(mat)))
}
literature_CV <- cbind(literature_CV, result)
Let's plot these values:
library(ggplot2)
p <- ggplot(data = literature_CV, aes(x = CV_mean_row, y = CV_mean_col, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "med.med", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "medmed", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
22, 4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
## Warning: Removed 7 rows containing missing values (geom_point).
This is consistent with what we have generated analytically. The new implementations, however, are far slower than the previous. Let's swith to these:
range_based <- function(n, m, Rtask, Rmach, a = 0, b = 0) {
generate_matrix_siegel_range(n, m, Rtask, Rmach, a, b)
}
CVB <- function(n, m, Vtask, Vmach, mutask, a = 0, b = 0) {
generate_matrix_siegel_CV(n, m, mutask, mutask, Vtask, Vmach, a, b)
}
Let's generate a density plot by running some MC iterations for each settings with various size:
get_properties <- function(literature, n, m, MC) {
result <- NULL
for (i in 1:nrow(literature)) {
suppressWarnings(param <- sapply(literature[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
for (k in 1:MC) {
mat <- range_based(n, m, param["task_range"], param["mach_range"],
param["row_consis"], param["col_consis"])
result <- rbind(result, data.frame(literature[i, ], Vtask = CV_mean_row(mat),
Vmach = CV_mean_col(mat), measure = "CVmean"))
result <- rbind(result, data.frame(literature[i, ], Vtask = mean_CV_col(mat),
Vmach = mean_CV_row(mat), measure = "meanCV"))
}
}
result
}
M <- 10
settings_small <- get_properties(literature_range, 16, 4, M)
Let's plot the density:
library(ggplot2)
p <- ggplot(data = settings_small, aes(x = Vtask, y = Vmach))
p <- p + geom_point(aes(shape = interaction(task_hetero, mach_hetero), color = interaction(task_hetero,
mach_hetero), fill = interaction(task_hetero, mach_hetero)), alpha = 1/2)
p <- p + geom_density2d()
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p <- p + facet_grid(. ~ measure)
p
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).
Let's start again with a medium size and a large size:
settings_medium <- get_properties(literature_range, 64, 16, M)
settings_large <- get_properties(literature_range, 256, 64, M)
settings <- rbind(cbind(settings_small, size = "16/4"), cbind(settings_medium,
size = "64/16"), cbind(settings_large, size = "256/64"))
And plot the result with facets:
library(ggplot2)
p <- ggplot(data = settings, aes(x = Vtask, y = Vmach))
p <- p + geom_point(aes(shape = interaction(task_hetero, mach_hetero), color = interaction(task_hetero,
mach_hetero), fill = interaction(task_hetero, mach_hetero)), alpha = 1/2)
p <- p + geom_density2d()
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p <- p + facet_grid(measure ~ size)
p
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 2 rows containing missing values (geom_point).
get_properties_CVB <- function(literature, n, m, MC) {
result <- NULL
for (i in 1:nrow(literature)) {
suppressWarnings(param <- sapply(literature[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
if (is.na(param["mean"]))
param["mean"] <- 1
for (k in 1:MC) {
mat <- CVB(n, m, param["task_range"], param["mach_range"], param["mean"],
param["row_consis"], param["col_consis"])
result <- rbind(result, data.frame(literature[i, ], Vtask = CV_mean_row(mat),
Vmach = CV_mean_col(mat), measure = "CVmean"))
result <- rbind(result, data.frame(literature[i, ], Vtask = mean_CV_col(mat),
Vmach = mean_CV_row(mat), measure = "meanCV"))
}
}
result
}
settings_CVB_small <- get_properties_CVB(literature_CV, 16, 4, M)
settings_CVB_medium <- get_properties_CVB(literature_CV, 64, 16, M)
settings_CVB_large <- get_properties_CVB(literature_CV, 256, 64, M)
settings_CVB <- rbind(cbind(settings_CVB_small, size = "16/4"), cbind(settings_CVB_medium,
size = "64/16"), cbind(settings_CVB_large, size = "256/64"))
Let's put everyting on the same plot:
settings_complete <- rbind(cbind(settings, method = "Range-based"), cbind(settings_CVB[,
colnames(settings)], method = "CVB"))
settings_complete$task_hetero <- factor(settings_complete$task_hetero, levels = levels(literature_CV$task_hetero))
settings_complete$mach_hetero <- factor(settings_complete$mach_hetero, levels = levels(literature_CV$mach_hetero))
levels(settings_complete$measure) <- c(expression(V ~ mu[task]/V ~ mu[mach]),
expression(mu ~ V[task]/mu ~ V[mach]))
library(ggplot2)
p <- ggplot(data = settings_complete, aes(x = Vtask, y = Vmach))
p <- p + geom_point(aes(shape = interaction(task_hetero, mach_hetero), color = interaction(task_hetero,
mach_hetero), fill = interaction(task_hetero, mach_hetero)), alpha = 1/10)
p <- p + scale_x_log10(name = "", limits = c(0.001, 10))
p <- p + scale_y_log10(name = "", limits = c(0.001, 10))
breaks <- c("high.high", "low.high", "high.low", "low.low", "med.med", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "medmed", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
22, 4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p <- p + guides(colour = guide_legend(override.aes = list(size = 4, alpha = 1)))
p <- p + facet_grid(method + measure ~ size, labeller = label_parsed)
p <- p + annotation_logticks()
p