Relation between the TMA and correlations in cost matrices (February 10, 2016)

We will generate plots showing the strength of the relation between the TMA and correlations.

results_total <- NULL
for (dirname in c("perf_vs_corr_10000/noise", "perf_vs_corr_10000/combinaison")) {
  vSet <- c(0.01, 0.1, 0.3, 1)
  results_sub <- NULL
  for (CV_or_Vmax in vSet) {
    # Read results
    results <- read.csv(file = paste(dirname, "/bench_results_", CV_or_Vmax,
                                     ".csv", sep = ""))
    results <- cbind(results, CV_or_Vmax)
    matrix <- read.csv(file = paste(dirname, "/results_corr_", CV_or_Vmax,
                                     ".csv", sep = ""))
    results <- cbind(results, matrix)
    results_sub <- rbind(results_sub, results)
  }
  results_sub <- cbind(results_sub, method = dirname)
  results_total <- rbind(results_total, results_sub)
}

TMAs <- tbl_df(results_total) %>%
  filter(CV_or_Vmax == 0.1 | CV_or_Vmax == 0.3) %>%
  arrange(desc(CV_or_Vmax))

library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard

probit_trans <- trans_new("probit", transform = qnorm, inverse = pnorm)

levels(TMAs$method) <- c("Correlation noise-based", "Combination-based")

ggplot(TMAs, aes(x = rho, y = TMA2, color = factor(CV_or_Vmax), shape = factor(CV_or_Vmax))) +
  geom_point(alpha = 0.05) +
  geom_smooth(color = "black") +
  facet_grid(method ~ .) +
  scale_x_continuous(expression(r[task] == r[mach]), trans = probit_trans,
                     breaks = c(0.01, 0.1, 0.5, 0.9, 0.99)) +
  scale_y_continuous("TMA") +
  scale_color_hue("Coefficient\nof variation") +
  scale_shape_discrete("Coefficient\nof variation") +
  theme(legend.position = "left") +
  guides(colour = guide_legend(override.aes = list(alpha = 1)))

## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

This indicates that correlation and TMA are closely related. Let’s see the relation when both correlations are distinct.

cv <- 0.3
corr_methods <- c("corr_noise_based", "comb_based")
global_results <- NULL
for (corr_method in corr_methods) {
  filename <- paste(corr_method, "/corr", cv, "/bench_results_corr.csv", sep = "")
  norm_results <- read.csv(file = filename)
  norm_results <- cbind(norm_results, method = corr_method)
  global_results <- rbind(global_results, norm_results)
}
TMAs_diff <- global_results

levels(TMAs_diff$method) <- c("Correlation noise-based", "Combination-based")
breaks_fill <- seq(0, 1, 0.01)

tbl_df(TMAs_diff) %>%
  group_by(method, rhorow, rhocol) %>%
  summarise(TMA2 = mean(TMA2)) %>%
  ggplot(aes(x = rhorow, y = rhocol, fill = TMA2, z = TMA2)) +
  geom_tile() +
  facet_grid(method ~ .) +
  scale_fill_gradientn(name = "TMA", colours = rainbow(100), breaks = breaks_fill) +
  scale_x_continuous(expression(r[task]), trans = probit_trans,
                     breaks = c(0.01, 0.1, 0.5, 0.9, 0.99)) +
  scale_y_continuous(expression(r[mach]), trans = probit_trans,
                     breaks = c(0.01, 0.1, 0.5, 0.9, 0.99)) +
  stat_contour(aes(color = ..level..), breaks = breaks_fill) +
  scale_color_continuous(low = "black", high = "black", guide = "none") +
  geom_abline(size = 1.2)

The higher the correlation, the higher the TMA and reciprocally.

Let’s plot all the performance of EFT, HLPT and BalSuff relatively to the TMA:

TMAs_perf <- map_df(c("corr", "comb"), function(method) {
                      paste("mean_norm_results_with_tma_", method, ".csv", sep = "") %>%
                      read.csv() %>%
                      mutate(method = method) } ) %>%
  gather(algo, cmax, contains("cmax"))
TMAs_perf$method <- factor(TMAs_perf$method)
levels(TMAs_perf$method) <- c("Correlation noise-based", "Combination-based")
levels(TMAs_perf$algo) <- c("EFT", "HLTP", "BalSuff")
ggplot(TMAs_perf, aes(x = TMA2, y = cmax)) +
  geom_point(alpha = 0.01) +
  geom_smooth() +
  geom_density2d(breaks = 30) +
  scale_x_continuous("TMA") +
  scale_y_continuous("Relative difference to reference") +
  facet_grid(method ~ algo) +
  coord_cartesian(ylim = extendrange(c(0, 0.4)))

## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.