Improving heuristics

Let’s extend the LPT_balance approach. An idea is to simply consider LPT for each iteration of greedy_balance. We get the same approximation ratio and we expect better results. We try two additional heuristic ideas.

n <- 300
m <- 20
k <- 4
cases <- data.frame(rbind(cv1 = c(0.2, 1, 0.2, 1), cv2 = c(0.2, 0.2, 1, 1)))
ratios_specific <- map_df(cases, function(case) {
  replicate(100, {
    costs_CPU <- 15 * rgamma_cost(n, mean = 1, sd = case[1])
    costs_GPU <- rgamma_cost(n, mean = 1, sd = case[2])
    costs <- matrix(c(costs_CPU, costs_GPU), ncol = 2, byrow = FALSE)
    options <- c("largest", "abssuff", "relsuff")
    RR <- map(options, ~ schedule_LPT_balance(costs, m, k, .))
    RR <- c(RR, list(schedule_LPT_balance_greedy(costs, m, k)))
    RR <- c(RR, list(schedule_LPT_balance_variant1(costs, m, k)))
    RR <- c(RR, list(schedule_LPT_balance_variant2(costs, m, k)))
    names(RR) <- c(options, "greedy", "variant1", "variant2")
    c(list(case = paste(map(case, ~ ifelse(. == 0.2, "low", "high")),
                        collapse = "/"),
           LB = LB_simple(costs, m, k)), map_dbl(RR, ~.$mks))
  }) %>% t() %>% data.frame() %>% map(unlist) %>% data.frame()
})

## Warning in rbind_all(x, .id): Unequal factor levels: coercing to character

ratios_specific %>%
  gather(key = "algo", value = "ratio", -case, -LB) %>%
  ggplot(aes(x = case, y = ratio / LB, color = algo)) +
  geom_boxplot(aes(group = interaction(algo, case)), outlier.size = 1) +
  coord_cartesian(ylim = c(1, 1.1)) +
  scale_color_brewer("Algorithm", palette = "Set1") +
  scale_x_discrete("Coefficient of variation (CPU/GPU)") +
  scale_y_continuous("Makespan / LB")

The guaranteed extension shows good results (not the best, but not far). It is easier to explain than the current LPT_balance and will thus replace it. One of the new heuristic idea show very good result and could be kept for future work.

Let’s redo Euro-Par figure with schedule_LPT_balance_greedy and small-caps in the algorithm names.

cases <- data.frame(rbind(cv1 = c(0.2, 1, 0.2, 1), cv2 = c(0.2, 0.2, 1, 1)))
ratios <- map_df(cases, function(case) {
  replicate(100, {
    costs_CPU <- 15 * rgamma_cost(n, mean = 1, sd = case[1])
    costs_GPU <- rgamma_cost(n, mean = 1, sd = case[2])
    costs <- matrix(c(costs_CPU, costs_GPU), ncol = 2, byrow = FALSE)
    RR <- list(schedule_LPT_balance_greedy(costs, m, k),
               schedule_greedy_balance_proved(costs, m, k),
               schedule_DADA(costs, m, k),
               schedule_DualHP(costs, m, k),
               schedule_CLB2C(costs, m, k),
               schedule_HeteroPrio(costs, m, k))
    names(RR) <- c("LPT_bal", "greedy_bal", "DADA", "DualHP", "CLB2C",
                   "HeteroPrio")
    c(list(case = paste(map(case, ~ ifelse(. == 0.2, "low", "high")),
                        collapse = "/"),
           LB = LB_simple(costs, m, k)), map_dbl(RR, ~.$mks))
  }) %>% t() %>% data.frame() %>% map(unlist) %>% data.frame()
})

## Warning in rbind_all(x, .id): Unequal factor levels: coercing to character

balanceMakespan <- "BᴀʟᴀɴᴄᴇᴅMᴀᴋᴇsᴘᴀɴ"
balanceEstimate <- "BᴀʟᴀɴᴄᴇᴅEsᴛɪᴍᴀᴛᴇ"
names(ratios)[3:ncol(ratios)] <- c(balanceMakespan, balanceEstimate, "DADA",
                                   "DᴜᴀʟHP", "CLB2C", "HᴇᴛᴇʀᴏPʀɪᴏ")
ratios[,c("case", "LB", balanceMakespan, balanceEstimate, "DᴜᴀʟHP", "DADA",
          "CLB2C", "HᴇᴛᴇʀᴏPʀɪᴏ")] %>%
  gather(key = "algo", value = "ratio", -case, -LB) %>%
  ggplot(aes(x = case, y = ratio / LB, color = algo)) +
  geom_boxplot(aes(group = interaction(algo, case)), outlier.size = 1) +
  coord_cartesian(ylim = c(1, 1.2)) +
  scale_color_brewer("Algorithm", palette = "Set1") +
  scale_x_discrete("Coefficient of variation (CPU/GPU)") +
  scale_y_continuous("Makespan / LB")

Now, we update the statistics and add additional ones:

map_dbl(ratios[,3:8], ~quantile(. / ratios$LB, probs = 0.025))

## BᴀʟᴀɴᴄᴇᴅMᴀᴋᴇsᴘᴀɴ BᴀʟᴀɴᴄᴇᴅEsᴛɪᴍᴀᴛᴇ             DADA           DᴜᴀʟHP 
##         1.003241         1.003286         1.008697         1.003286 
##            CLB2C       HᴇᴛᴇʀᴏPʀɪᴏ 
##         1.036501         1.123559

map_dbl(ratios[,3:8], ~quantile(. / ratios$LB, probs = 0.975))

## BᴀʟᴀɴᴄᴇᴅMᴀᴋᴇsᴘᴀɴ BᴀʟᴀɴᴄᴇᴅEsᴛɪᴍᴀᴛᴇ             DADA           DᴜᴀʟHP 
##         1.072248         1.146454         1.133311         1.142928 
##            CLB2C       HᴇᴛᴇʀᴏPʀɪᴏ 
##         1.321175         1.397867

map_dbl(3:8, ~ max(ratios[,.] / apply(ratios[,setdiff(3:8,.)], 1, min)))

## [1] 1.004626 1.158899 1.168605 1.146375 1.503140 1.478522

map_dbl(3:8, ~ length(which(ratios[,.] < apply(ratios[,setdiff(3:8,.)], 1, min) + 1e-5))) / nrow(ratios)

## [1] 0.9575 0.3900 0.0425 0.3925 0.0000 0.0000

## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
## 
## locale:
##  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
##  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
##  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_2.1.0 dplyr_0.4.3   tidyr_0.2.0   purrr_0.2.0   stringr_1.0.0
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.7        knitr_1.14         magrittr_1.5      
##  [4] munsell_0.4        colorspace_1.2-2   R6_2.1.2          
##  [7] plyr_1.8.1         tools_3.3.2        parallel_3.3.2    
## [10] grid_3.3.2         gtable_0.1.2       pacman_0.4.1      
## [13] DBI_0.3.1          htmltools_0.3.5    yaml_2.1.13       
## [16] lazyeval_0.1.10    assertthat_0.1     digest_0.6.9      
## [19] tibble_1.2         RColorBrewer_1.0-5 reshape2_1.2.2    
## [22] formatR_1.4        evaluate_0.10      rmarkdown_1.1     
## [25] labeling_0.1       stringi_0.5-5      scales_0.4.0

Improving heuristics

L.-C. Canon

February 15, 2017