The objective is to extend the previous experiments and to select the best curve for the europar submission.
Let’s start by extending Cholesky instances with larger ratio m/k.
cd 19_revisit_instances
mkdir -p instances/gml
cd instances/gml
for i in $(seq 2 15)
do
../../generate_cholesky.py $i
done
cd ../..
Let’s generate the schedules, lower bounds, …
sapply(c("cholesky", "params"), function(dir) {
dir.create(path = paste(prefix_path, "instances", dir, sep = "/"),
showWarnings = FALSE)
})
k_s <- c(1, 2, 4, 8)
mk_s <- c(1, 2, 3, 5, 10, 20)
for (i in 2:15) {
prefix <- paste(prefix_path, "instances", sep = "/")
name_gml <- paste("gml/cholesky_", i, ".gml", sep = "")
g <- read_instance_gml(paste(prefix, name_gml, sep = "/"))
size <- gorder(g)
name_dot <- paste("cholesky/cholesky_", size, ".dot", sep = "")
write_graph_str(g, paste(prefix, name_dot, sep = "/"), "dot")
for (k in k_s)
for (mk in mk_s) {
m <- k * mk
schedule <- HEFT(m, k, g)
LB <- LB_europar(cbind(get.vertex.attribute(g, "CPU"),
get.vertex.attribute(g, "GPU")), m, k)
params <- data.frame(CP = CP(g), LB_europar = LB, HEFT = schedule$mks,
rank = i, size = size, m = m, k = k)
name_params <- paste("params/cholesky", size, m, k, sep = "_") %>%
paste("params", sep = ".")
write.table(params, paste(prefix, name_params, sep = "/"),
row.names = FALSE, sep = ",")
name_HEFT <- str_replace(name_params, "\\.params", ".HEFT")
capture.output(schedule, file = paste(prefix, name_HEFT, sep = "/"))
}
}
Let’s run the online algorithms:
# Generating results
PARAM=cholesky
RESULT=result_$PARAM.csv
rm -rf src/input/$PARAM
mkdir -p src/input/$PARAM
cp instances/cholesky/cholesky_*.dot src/input/$PARAM
> $RESULT
for k in 1 2 4 8
do
for mk in 1 2 3 5 10 20
do
m=$((k * mk))
./src/test -m $m -k $k -i src/input/$PARAM
sed "s/_\([0-9]*\).dot/_\1_$m\_$k/" -i output_$m-$k.dat
grep cholesky output_$m-$k.dat >> $RESULT
done
done
sed -i "1i\\$(head -n 1 output_$m-$k.dat)" $RESULT
# Mixing results with lower bounds
sed -i "s/\t/,/g" $RESULT
sed -i "s/ //g" $RESULT
for file in $(awk -F',' 'NR>1 { print "instances/params/" $1 ".params" }' $RESULT)
do
header=$(head -n 1 $file)
tail -n 1 $file
done > $RESULT.tmp1
paste $RESULT <(echo $header; cat $RESULT.tmp1) > $RESULT.tmp2
rm $RESULT.tmp1
mv $RESULT.tmp2 $RESULT
Let’s plot the result:
data <- read.csv(paste(prefix_path, "result_cholesky.csv", sep = "/"))
table(data$MxQE - data$EFiT)
##
## 0
## 336
data %>%
gather(algo, mks, QALS, ERLS, EFiT, Quik, Rati) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS", "Rati", "Quik"),
labels = c("EFT/Mix", "QALS", "ERLS",
"Ratio", "Quick"))) %>%
ggplot(aes(x = size, y = mks / HEFT, col = algo, shape = algo)) +
geom_point() +
geom_line() +
scale_color_brewer(name = "Algorithm", palette = "Set1") +
scale_shape(name = "Algorithm") +
facet_grid(m / k ~ k, scales = "free_y")
For a final figure, let’s take k=2 and m=10 or m=20.
g_cholesky <- data %>%
filter(k == 2 & m == 10) %>%
gather(algo, mks, QALS, ERLS, EFiT, Quik, Rati) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS",
"Rati", "Quik", "MxQE"),
labels = c("EFT (Mix)", "QALS", "ERLS",
"Ratio", "Quick", "Mix"))) %>%
ggplot(aes(x = size, y = mks / HEFT, col = algo, shape = algo)) +
geom_point() +
geom_line() +
scale_color_brewer(name = "Algorithm", palette = "Set1", drop = FALSE) +
scale_shape(name = "Algorithm", drop = FALSE) +
scale_x_log10(name = "Number of tasks (Cholesky)") +
scale_y_continuous(name = "Ratio to HEFT") +
annotation_logticks(sides = "b") +
theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1)) +
coord_cartesian(ylim = c(1, 2.5))
print(g_cholesky)
It shows that:
We run the same set of simulations as before but with a larger number of tasks and with higher CPU compared to GPU.
mkdir -p instances/stg
for size in 100
do
wget -c http://www.kasahara.elec.waseda.ac.jp/schedule/ver2/rnc$size.tgz
tar -xzf rnc$size.tgz
if [ -d "$size" ]; then
cd $size
for file in $(ls)
do
mv "$file" "$(basename $file .stg)_$size.stg"
done
cd ..
mv $size/* instances/stg
rm -r $size
fi
done
Let’s finalize the generation of the instances with settings for CPU costs similar to our previous Europar article (mean = 15 and CV = 1).
sapply(c("kasahara", "params"), function(dir) {
dir.create(path = paste(prefix_path, "instances", dir, sep = "/"),
showWarnings = FALSE)
})
tau <- 15
CV_CPU <- 1
k_s <- c(1, 2, 4, 8)
mk_s <- c(1, 2, 3, 5, 10, 20)
files <- list.files(paste(prefix_path, "instances/stg", sep = "/"),
full.names = TRUE)
for (filename in files) {
size <- type.convert(str_match(filename, "_([0-9]+)")[2])
instance <- read_instance_stg(filename, tau = tau, CV_CPU = CV_CPU)
g <- instance$g
name_dot <- str_replace(filename, "/stg/", "/kasahara/") %>%
str_replace(".stg", ".dot")
write_graph_str(g, name_dot, "dot")
for (k in k_s)
for (mk in mk_s) {
m <- k * mk
schedule <- HEFT(m, k, g)
LB <- LB_europar(cbind(get.vertex.attribute(g, "CPU"),
get.vertex.attribute(g, "GPU")), m, k)
params <- data.frame(CP = CP(g), LB_europar = LB, HEFT = schedule$mks,
n = size, tau = tau, CV_CPU = CV_CPU, m = m, k = k,
instance$params[c("Precedenceconstraintsgenerator",
"Taskprocessingtimegenerator")])
name_params <- str_replace(name_dot, "/kasahara/", "/params/") %>%
str_replace("\\.dot", paste("_", m, "_", k, ".params", sep = ""))
write.table(params, name_params, row.names = FALSE, sep = ",")
name_HEFT <- str_replace(name_params, "\\.params", ".HEFT")
capture.output(schedule, file = name_HEFT)
}
}
Let’s run the algorithms:
# Generating results
PARAM=kasahara
RESULT=result_$PARAM.csv
> $RESULT
for k in 1 2 4 8
do
for mk in 1 2 3 5 10 20
do
m=$((k * mk))
rm -rf src/input/$PARAM
mkdir -p src/input/$PARAM
cp instances/$PARAM/*.dot src/input/$PARAM
./src/test -m $m -k $k -i src/input/$PARAM
sed "s/_\([0-9]*\).dot/_\1_$m\_$k/" -i output_$m-$k.dat
tail -n +2 output_$m-$k.dat >> $RESULT
done
done
sed -i "1i\\$(head -n 1 output_$m-$k.dat)" $RESULT
# Mixing results with lower bounds
sed -i "s/\t/,/g" $RESULT
sed -i "s/ //g" $RESULT
for file in $(awk -F',' 'NR>1 { print "instances/params/" $1 ".params" }' $RESULT)
do
tail -n 1 $file
done > $RESULT.tmp1
paste $RESULT <(echo $(head -n 1 $file); cat $RESULT.tmp1) > $RESULT.tmp2
rm $RESULT.tmp1
mv $RESULT.tmp2 $RESULT
Now the results:
data <- read.csv(paste(prefix_path, "result_kasahara.csv", sep = "/"))
table(data$MxQE - data$EFiT)
##
## 0
## 4320
data %>%
gather(algo, mks, QALS, ERLS, EFiT, Quik, Rati) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS", "Rati", "Quik"),
labels = c("EFT (Mix)", "QALS", "ERLS",
"Ratio", "Quick"))) %>%
ggplot(aes(x = Precedenceconstraintsgenerator, y = mks / HEFT, col = algo)) +
geom_boxplot() +
scale_color_brewer(name = "Algorithm", palette = "Set1") +
facet_grid(m / k ~ k) +
coord_cartesian(ylim = c(1, 2.5))
We can make the following observations:
For a final figure, let’s take k=2 and m=10 for graphs of size 300.
mkdir -p instances/stg
for size in 300
do
wget http://www.kasahara.elec.waseda.ac.jp/schedule/ver2/rnc$size.tgz
tar -xzf rnc$size.tgz
if [ -d "$size" ]; then
cd $size
for file in $(ls)
do
mv "$file" "$(basename $file .stg)_$size.stg"
done
cd ..
mv $size/* instances/stg
rm -r $size
fi
done
Finalization for instances of size 300:
m <- 20
k <- 2
tau <- 15
CV_CPU <- 1
files <- list.files(paste(prefix_path, "instances/stg", sep = "/"),
full.names = TRUE, pattern = "300")
size <- 300
for (filename in files) {
instance <- read_instance_stg(filename, tau = tau, CV_CPU = CV_CPU)
g <- instance$g
name_dot <- str_replace(filename, "/stg/", "/kasahara/") %>%
str_replace(".stg", ".dot")
write_graph_str(g, name_dot, "dot")
schedule <- HEFT(m, k, g)
LB <- LB_europar(cbind(get.vertex.attribute(g, "CPU"),
get.vertex.attribute(g, "GPU")), m, k)
params <- data.frame(CP = CP(g), LB_europar = LB, HEFT = schedule$mks,
n = size, tau = tau, CV_CPU = CV_CPU, m = m, k = k,
instance$params[c("Precedenceconstraintsgenerator",
"Taskprocessingtimegenerator")])
name_params <- str_replace(name_dot, "/kasahara/", "/params/") %>%
str_replace("\\.dot", ".params")
write.table(params, name_params, row.names = FALSE, sep = ",")
name_HEFT <- str_replace(name_params, "\\.params", ".HEFT")
capture.output(schedule, file = name_HEFT)
}
The algorithms:
# Generating results
PARAM=kasahara
RESULT=result_$PARAM\_300.csv
> $RESULT
k=2
m=20
rm -rf src/input/$PARAM
mkdir -p src/input/$PARAM
cp instances/$PARAM/*_300.dot src/input/$PARAM
./src/test -m $m -k $k -i src/input/$PARAM
sed "s/_\([0-9]*\).dot/_\1/" -i output_$m-$k.dat
tail -n +2 output_$m-$k.dat >> $RESULT
sed -i "1i\\$(head -n 1 output_$m-$k.dat)" $RESULT
# Mixing results with lower bounds
sed -i "s/\t/,/g" $RESULT
sed -i "s/ //g" $RESULT
for file in $(awk -F',' 'NR>1 { print "instances/params/" $1 ".params" }' $RESULT)
do
tail -n 1 $file
done > $RESULT.tmp1
paste $RESULT <(echo $(head -n 1 $file); cat $RESULT.tmp1) > $RESULT.tmp2
rm $RESULT.tmp1
mv $RESULT.tmp2 $RESULT
Now the results:
data <- read.csv(paste(prefix_path, "result_kasahara_300.csv", sep = "/"))
table(data$MxQE - data$EFiT)
##
## 0
## 180
g_kasahara <- data %>%
gather(algo, mks, QALS, ERLS, EFiT, Quik, Rati) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS", "Rati", "Quik"),
labels = c("EFT (Mix)", "QALS", "ERLS",
"Ratio", "Quick"))) %>%
ggplot(aes(x = Precedenceconstraintsgenerator, y = mks / HEFT, col = algo)) +
geom_boxplot(outlier.size = 1) +
scale_color_brewer(name = "Algorithm", palette = "Set1") +
scale_x_discrete(name = "STG instances") +
scale_y_continuous(name = "Ratio to HEFT") +
theme(legend.position = "bottom") +
coord_cartesian(ylim = c(1, 2.5))
print(g_kasahara)
Observations:
set.seed(1)
NB <- 300
n <- 300
m <- 20
k <- 2
CV <- 0.1
sapply(c("chain_indep", "params"), function(dir) {
dir.create(path = paste(prefix_path, "instances", dir, sep = "/"),
showWarnings = FALSE)
})
for (i in 1:NB) {
mean_cost <- 10**(runif(1, min = 1, max = 7)*1/4-1/2)
filename <- paste(prefix_path, "instances/chain_indep",
paste("chain_indep", mean_cost, i, sep = "_") %>%
paste("dot", sep = "."),sep = "/")
g <- chain_indep(m, k, n, CV, tau = mean_cost)
write_graph_str(g, filename, "dot")
schedule <- HEFT(m, k, g)
LB <- LB_europar(cbind(get.vertex.attribute(g, "CPU"),
get.vertex.attribute(g, "GPU")), m, k)
params <- data.frame(CP = CP(g), LB_europar = LB, HEFT = schedule$mks,
num = i, mean_cost = mean_cost)
name_params <- str_replace(filename, "/chain_indep/", "/params/") %>%
str_replace("\\.dot", ".params")
write.table(params, name_params, row.names = FALSE, sep = ",")
name_HEFT <- str_replace(name_params, "\\.params", ".HEFT")
capture.output(schedule, file = name_HEFT)
}
Let’s launch the algorithms:
# Generating results
PARAM=chain_indep
RESULT=result_$PARAM.csv
> $RESULT
k=2
m=20
rm -rf src/input/$PARAM
mkdir -p src/input/$PARAM
cp instances/$PARAM/*.dot src/input/$PARAM
./src/test -m $m -k $k -i src/input/$PARAM
sed "s/_\([0-9]*\).dot/_\1/" -i output_$m-$k.dat
tail -n +2 output_$m-$k.dat >> $RESULT
sed -i "1i\\$(head -n 1 output_$m-$k.dat)" $RESULT
# Mixing results with lower bounds
sed -i "s/\t/,/g" $RESULT
sed -i "s/ //g" $RESULT
for file in $(awk -F',' 'NR>1 { print "instances/params/" $1 ".params" }' $RESULT)
do
tail -n 1 $file
done > $RESULT.tmp1
paste $RESULT <(echo $(head -n 1 $file); cat $RESULT.tmp1) > $RESULT.tmp2
rm $RESULT.tmp1
mv $RESULT.tmp2 $RESULT
Finally:
data <- read.csv(paste(prefix_path, "result_chain_indep.csv", sep = "/"))
table(data$MxQE - data$EFiT)
##
## 0
## 300
g_chain_indep <- data %>%
gather(algo, mks, QALS, ERLS, EFiT, Quik, Rati) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS", "Rati", "Quik"),
labels = c("EFT (Mix)", "QALS", "ERLS",
"Ratio", "Quick"))) %>%
ggplot(aes(x = mean_cost, y = mks / HEFT, col = algo, shape = algo)) +
geom_point(size = 0.5) +
geom_line() +
scale_color_brewer(name = "Algorithm", palette = "Set1") +
scale_shape_discrete(name = "Algorithm") +
scale_x_log10(name = "Expected cost ratio CPU/GPU") +
scale_y_log10(name = "Ratio to HEFT") +
annotation_logticks(sides = "bl") +
theme(legend.position = "bottom") +
coord_cartesian(ylim = c(1, 10))
print(g_chain_indep)
Observations:
The fourth plot concerns the effect of the MixEFT parameter. Let’s start by the largest Cholesky instance, the first STG instance and a random one with medium tau.
# Generating results
PARAM=parameter
RESULT=result_$PARAM.csv
rm -rf src/input/$PARAM
mkdir -p src/input/$PARAM
cp instances/cholesky/cholesky_*.dot src/input/$PARAM
cp instances/kasahara/rand*_300.dot src/input/$PARAM
cp instances/chain_indep/chain_indep_*.dot src/input/$PARAM
m=20
k=2
> $RESULT
for theta in $(LANG=en_US seq 0 0.2 2)
do
./src/test -m $m -k $k -i src/input/$PARAM -r $theta
sed "s/\\t\$/\\t$theta\\t/" -i output_$m-$k.dat
tail -n +2 output_$m-$k.dat >> $RESULT
done
sed "s/\(cholesky_[0-9]*\).dot/\1_$m\_$k/" -i $RESULT
sed "s/\.dot//" -i $RESULT
sed -i "1i\\$(head -n 1 output_$m-$k.dat)" $RESULT
sed -i "1s/[0-9.]\+/theta/" $RESULT
# Mixing results with lower bounds
sed -i "s/\t/,/g" $RESULT
sed -i "s/ //g" $RESULT
for file in $(awk -F',' 'NR>1 { print "instances/params/" $1 ".params" }' $RESULT)
do
tail -n 1 $file
done > $RESULT.tmp1
paste <(tail -n +2 $RESULT) $RESULT.tmp1 > $RESULT.tmp2
rm $RESULT.tmp1
file_chol=$(ls instances/params/chol*.params | head -n 1)
echo $(head -n 1 $RESULT) $(head -n 1 $file_chol) > $RESULT\_chol
grep chol $RESULT.tmp2 >> $RESULT\_chol
file_rand=$(ls instances/params/rand*_300.params | head -n 1)
echo $(head -n 1 $RESULT) $(head -n 1 $file_rand) > $RESULT\_rand
grep rand $RESULT.tmp2 >> $RESULT\_rand
file_chain=$(ls instances/params/chain*.params | head -n 1)
echo $(head -n 1 $RESULT) $(head -n 1 $file_chain) > $RESULT\_chain
grep chain $RESULT.tmp2 >> $RESULT\_chain
rm $RESULT.tmp2
Let’s plot the expected value of the parameter for which Mix switch from QALS to EFT:
data_chol <- read.csv(paste(prefix_path, "result_parameter.csv_chol", sep = "/"))
data_rand <- read.csv(paste(prefix_path, "result_parameter.csv_rand", sep = "/"))
data_chain <- read.csv(paste(prefix_path, "result_parameter.csv_chain", sep = "/"))
data <- bind_rows(data_chol, data_rand, data_chain)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
data %>%
mutate(prox = (MxQE - EFiT) / (QALS - EFiT)) %>%
mutate(type = str_match(Treename, "[^_0-9]*")) %>%
ggplot(aes(x = factor(theta), y = prox, col = type)) +
geom_boxplot()
For a figure that is consistent with the previous ones:
g_parameter <- data %>%
gather(algo, mks, QALS, MxQE, EFiT) %>%
mutate(algo = factor(algo, levels = c("EFiT", "QALS", "ERLS",
"Rati", "Quik", "MxQE"),
labels = c("EFT (Mix)", "QALS", "ERLS",
"Ratio", "Quick", "Mix"))) %>%
ggplot(aes(x = factor(theta), y = mks / HEFT, col = algo)) +
geom_boxplot(outlier.size = 1) +
scale_color_brewer(name = "Algorithm", palette = "Set1", drop = FALSE) +
scale_y_log10(name = "Ratio to HEFT") +
scale_x_discrete(name = "MixEFT parameter") +
theme(legend.position = "bottom") +
annotation_logticks(sides = "l") +
coord_cartesian(ylim = c(1, 10))
print(g_parameter)
We see that MixEFT is similar to QALS until the parameter is close to 1, at which point, the change to follow EFT trend is brutal.
Built with previous figures:
library(tikzDevice)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(grid)
grid_arrange_shared_legend <- function(..., ncol = length(list(...)), nrow = 1,
position = c("bottom", "right"), ylabel = NULL) {
plots <- list(...)
position <- match.arg(position)
g <- ggplotGrob(plots[[1]] + theme(legend.position = position))$grobs
legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
lheight <- sum(legend$height)
lwidth <- sum(legend$width)
gl <- lapply(plots, function(x) x + theme(legend.position = "none")
+ theme(axis.title.y = element_blank()))
gl <- c(gl, ncol = ncol, nrow = nrow, left = ylabel)
combined <- switch(position,
"bottom" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 1,
heights = unit.c(unit(1, "npc") - lheight, lheight)),
"right" = arrangeGrob(do.call(arrangeGrob, gl),
legend,
ncol = 2,
widths = unit.c(unit(1, "npc") - lwidth, lwidth)))
grid.newpage()
grid.draw(combined)
# return gtable invisibly
invisible(combined)
}
tikz(paste(prefix_path, "XP.tex", sep = "/"), width = 5, height = 5.5)
grid_arrange_shared_legend(g_cholesky, g_kasahara, g_chain_indep, g_parameter,
nrow = 2, ncol = 2, ylabel = "Ratio to HEFT")
dev.off()
## png
## 2
grid_arrange_shared_legend(g_cholesky, g_kasahara, g_chain_indep, g_parameter,
nrow = 2, ncol = 2, ylabel = "Ratio to HEFT")
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gridExtra_2.3 tikzDevice_0.10-1 bindrcpp_0.2 igraph_1.0.1
## [5] ggplot2_2.2.1 dplyr_0.7.4 tidyr_0.2.0 purrr_0.2.0
## [9] stringr_1.2.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.10 knitr_1.15.1 bindr_0.1
## [4] magrittr_1.5 munsell_0.4 colorspace_1.2-2
## [7] R6_2.1.0 rlang_0.1.6 filehash_2.4-1
## [10] plyr_1.8.1 tools_3.4.3 gtable_0.1.2
## [13] pacman_0.4.6 htmltools_0.3.6 lazyeval_0.2.0
## [16] yaml_2.1.13 rprojroot_1.2 digest_0.6.3
## [19] assertthat_0.1 tibble_1.3.1 RColorBrewer_1.0-5
## [22] reshape2_1.2.2 glue_1.2.0 evaluate_0.10
## [25] rmarkdown_1.8 labeling_0.1 stringi_0.5-5
## [28] compiler_3.4.3 scales_0.4.1 backports_1.0.5
## [31] pkgconfig_2.0.1