LPT with simplified unrelated instances: Siegel and corr (April 16, 2014)

Objectives

The main objective is to run the LPT algorithm with unrelated instances. First, an instance must be simplified as two vectors of costs.

Simplification

simplify_matrix <- function(Z) {
    M1 <- sapply(1:nrow(Z), function(i) return(mean(Z[i, ])))
    M2 <- sapply(1:ncol(Z), function(j) return(mean(Z[, j])))
    s <- sqrt(mean(c(Z)))
    return(list(row = M1/s, col = M2/s))
}

create_matrix <- function(row, col) {
    P <- matrix(nrow = length(row), ncol = length(col))
    for (i in 1:length(row)) for (j in 1:length(col)) P[i, j] <- row[i] * col[j]
    return(P)
}

Z <- create_matrix(c(3, 6, 1), c(1, 2))
S <- simplify_matrix(Z)
N <- create_matrix(S$row, S$col)
sum(c(Z - N))
## [1] 0

LPT

LPT <- function(task_cost, processor_speed) {
    T <- sort(task_cost, decreasing = TRUE)
    P <- rep(0, length(processor_speed))
    for (i in 1:length(T)) {
        idx <- 1
        for (j in 2:length(P)) if (P[j] + processor_speed[j] * T[i] < P[idx] + 
            processor_speed[idx] * T[i]) {
            idx <- j
        }
        P[idx] <- P[idx] + processor_speed[idx] * T[i]
    }
    max(P)
}

LPT(c(3, 6, 1), c(1, 2))
## [1] 7

Basic matrix generation with correlation

generate_matrix_corr <- function(n, m, rdist, mu, sigma, rhoR, rhoC) {
    # Given a basis X, generate a vector that has the correlation rho with any
    # vector that is generated in a similar way
    combine <- function(X, Y, rho, mean) {
        return(sqrt(rho) * X + sqrt(1 - rho) * Y + (1 - sqrt(rho) - sqrt(1 - 
            rho)) * mean)
    }

    # Set the correlation between each pair of columns
    C <- rdist(n, mu, sigma)
    Z <- sapply(1:m, function(x) return(combine(C, rdist(length(C), mu, sigma), 
        rhoC, mu)))
    # TODO remove those comments after proving this in the research report
    # print(c(mean_CV_row(Z), sigma / mu * sqrt(1 - rhoC)))
    # print(c(mean_CV_col(Z), sigma / mu)) print(c(CV_mean_row(Z), sigma / mu *
    # sqrt(rhoC))) print(c(CV_mean_col(Z), 0)) Set the correlation between each
    # pair of row
    R <- rdist(m, mu, sqrt(1 - rhoC) * sigma)
    for (i in 1:nrow(Z)) Z[i, ] <- combine(R, Z[i, ], rhoR, mu)
    # print(c(mean_CV_row(Z), sigma / mu * sqrt(1 - rhoC)))
    # print(c(mean_CV_col(Z), sigma / mu * sqrt(1 - rhoR)))
    # print(c(CV_mean_row(Z), sigma / mu * sqrt(rhoC) * sqrt(1 -rhoR)))
    # print(c(CV_mean_col(Z), sigma / mu * sqrt(1 - rhoC) * sqrt(rhoR))) Set the
    # final standard deviation to sd
    Z <- (Z - mu)/sqrt(1 - rhoR * rhoC) + mu
    return(Z)
}

generate_matrix_siegel_corr <- function(n, m, rdist, mu, sigma, rhoC) {
    muR <- 1
    muC <- mu
    sigmaC <- sqrt(rhoC) * sigma
    sigmaR <- sigma * sqrt((1 - rhoC)/(rhoC * sigma^2 + mu^2))

    C <- rdist(n, muC, sigmaC)
    Z <- sapply(1:m, function(j) return(C))
    for (i in 1:nrow(Z)) Z[i, ] <- Z[i, ] * rdist(m, muR, sigmaR)

    return(Z)
}

generate_consistent_matrix_siegel_corr <- function(n, m, rdist, mu, sigma, rhoC) {
    Z <- generate_matrix_siegel_corr(n, m, rdist, mu, sigma, rhoC)
    make_consistent(Z)
}

Several choices have been made. Instead of specifying one mean and two standard deviations for Siegel, only the final cost mean and standard deviation are specified: it is more intuitive and it leaves one degree of freedom for specifying the correlation between any pair of columns.

Also, for simplicity, only one distribution is provided to both methods whereas it would have been possible to provide two distributions. Independently of this fact, it should be noted that the final cost distribution in both case is distinct: for the new distribution, it is a sum of three random variables that follow the input distribution; the Siegel's distribution, it is the product of two random variables that follow the input distribution.

Let's test those methods:

set.seed(0)

rdist <- function(n, mean, sd) {
    a <- mean - sqrt(12)/2 * sd  #lower
    b <- mean + sqrt(12)/2 * sd  #upper
    return(runif(n, a, b))
}

n <- 1000
m <- 800
mu <- 10
sigma <- 2
rhoR <- 0
rhoC <- 0.5

Z <- generate_matrix_corr(n, m, rdist, mu, sigma, rhoR, rhoC)

c(mu, mean(Z))
## [1] 10 10
c(sigma, sqrt(var(c(Z))))
## [1] 2.000 1.998
SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
c(sigma, summary(SDr))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.34    1.40    1.41    1.41    1.43    1.48
SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
c(sigma, summary(SDc))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.89    1.97    2.00    2.00    2.02    2.11
c(rhoR, cor.test(Z[1, ], Z[2, ])$estimate)
##               cor 
##  0.00000 -0.00466
c(rhoC, cor.test(Z[, 1], Z[, 2])$estimate)
##           cor 
## 0.5000 0.4596
plot.ecdf(Z)

Z <- generate_matrix_siegel_corr(n, m, rdist, mu, sigma, rhoC)

c(mu, mean(Z))
## [1] 10.00 10.02
c(sigma, sqrt(var(c(Z))))
## [1] 2.000 2.034
SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
c(mu * sigma * sqrt((1 - rhoC)/(rhoC * sigma^2 + mu^2)), summary(SDr))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.40    1.03    1.22    1.40    1.40    1.59    1.79
SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
c(sigma, summary(SDc))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.91    2.01    2.03    2.03    2.06    2.13
c(0, cor.test(Z[1, ], Z[2, ])$estimate)
##                 cor 
##  0.000000 -0.006135
c(rhoC, cor.test(Z[, 1], Z[, 2])$estimate)
##           cor 
## 0.5000 0.5223
plot.ecdf(Z, add = TRUE, col = 2)

plot of chunk LPT_unrelated_ecdf

The difference between both ECDF is marginal. This test also suggests the standard deviation of each row is incorrect and could depend on the correlation.

A last observation is that it is possible to generate a continuum of matrices with varying coefficient correlations by using the same seed:

set.seed(0)
Z0 <- generate_matrix_corr(n, m, rdist, mu, sigma, rhoR, rhoC)
set.seed(0)
Z1 <- generate_matrix_corr(n, m, rdist, mu, sigma, rhoR + 0.1, rhoC + 0.1)
summary(c((Z1 - Z0)/pmax(Z0, Z1)))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.11900 -0.03380  0.00173  0.00124  0.03540  0.13700

The difference between any cost of both matrix is below 20% half the time.

Analysis fix for standard deviations

For the test, we will use the same parameter as above except for the row correlation.

rhoR <- 0.5
combine <- function(X, Y, rho, mean) {
    return(sqrt(rho) * X + sqrt(1 - rho) * Y + (1 - sqrt(rho) - sqrt(1 - rho)) * 
        mean)
}

Standard deviation of each row

After the generation of the columns, the standard deviation of any row is: sqrt(1 - rhoC) * sigma

C <- rdist(n, mu, sigma)
Z <- sapply(1:m, function(x) return(combine(C, rdist(length(C), mu, sigma), 
    rhoC, mu)))
SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
c(sqrt(1 - rhoC) * sigma, summary(SDr))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.414   1.350   1.400   1.410   1.410   1.430   1.490

After the second operation, the standard deviation is the same.

R <- rdist(m, mu, sqrt(1 - rhoC) * sigma)
for (i in 1:nrow(Z)) Z[i, ] <- combine(R, Z[i, ], rhoR, mu)
SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
c(sqrt(1 - rhoC) * sigma, summary(SDr))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.414   1.320   1.390   1.410   1.410   1.430   1.490

After the final scaling, it becomes: sqrt( (1 - rhoC) / (1 - rhoR * rhoC) ) * sigma

Z <- (Z - mu)/sqrt(1 - rhoR * rhoC) + mu
SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
c(sqrt((1 - rhoC)/(1 - rhoR * rhoC)) * sigma, summary(SDr))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.633   1.520   1.610   1.630   1.630   1.650   1.720

Standard deviation of each column

After the generation of the columns, the standard deviation of any column is: sigma

C <- rdist(n, mu, sigma)
Z <- sapply(1:m, function(x) return(combine(C, rdist(length(C), mu, sigma), 
    rhoC, mu)))
SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
c(sigma, summary(SDc))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.90    1.98    2.00    2.00    2.02    2.11

After the second operation, the standard deviation is sqrt(1 - rhoR) * sigma

R <- rdist(m, mu, sqrt(1 - rhoC) * sigma)
for (i in 1:nrow(Z)) Z[i, ] <- combine(R, Z[i, ], rhoR, mu)
SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
c(sqrt(1 - rhoR) * sigma, summary(SDc))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.414   1.340   1.400   1.420   1.420   1.430   1.500

After the final scaling, it becomes: sqrt( (1 - rhoR) / (1 - rhoR * rhoC) ) * sigma

Z <- (Z - mu)/sqrt(1 - rhoR * rhoC) + mu
SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
c(sqrt((1 - rhoR)/(1 - rhoR * rhoC)) * sigma, summary(SDc))
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.633   1.550   1.620   1.630   1.630   1.650   1.730

Validation

for (rhoR in c(0, 0.1, 0.5, 0.9)) for (rhoC in c(0, 0.1, 0.5, 0.9)) {
    Z <- generate_matrix_corr(n, m, rdist, mu, sigma, rhoR, rhoC)
    SDr <- sapply(1:nrow(Z), function(i) return(sqrt(var(Z[i, ]))))
    print(c(sqrt((1 - rhoC)/(1 - rhoR * rhoC)) * sigma, summary(SDr)))
    SDc <- sapply(1:ncol(Z), function(i) return(sqrt(var(Z[, i]))))
    print(c(sqrt((1 - rhoR)/(1 - rhoR * rhoC)) * sigma, summary(SDc)))
}
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.90    1.98    2.00    2.00    2.02    2.11 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.91    1.98    2.00    2.00    2.02    2.08 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.897   1.800   1.880   1.900   1.900   1.920   1.990 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.91    1.98    2.00    2.00    2.02    2.09 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.414   1.330   1.400   1.410   1.410   1.430   1.480 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.87    1.97    1.99    1.99    2.01    2.08 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6325  0.6040  0.6260  0.6330  0.6320  0.6390  0.6660 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.93    1.98    2.00    2.00    2.01    2.06 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.87    1.98    2.00    2.00    2.03    2.10 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.897   1.810   1.880   1.900   1.900   1.910   1.970 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.907   1.780   1.880   1.900   1.900   1.930   2.000 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.907   1.820   1.890   1.910   1.910   1.930   2.000 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.451   1.360   1.440   1.450   1.450   1.470   1.530 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.947   1.820   1.920   1.940   1.940   1.970   2.080 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.663   0.619   0.655   0.662   0.662   0.670   0.701 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.989   1.930   1.990   2.000   2.000   2.010   2.070 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.89    1.99    2.02    2.02    2.04    2.13 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.414   1.350   1.400   1.410   1.410   1.430   1.480 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.947   1.820   1.910   1.940   1.940   1.960   2.060 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.451   1.370   1.430   1.450   1.450   1.460   1.520 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.633   1.560   1.620   1.650   1.650   1.670   1.760 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.633   1.530   1.590   1.610   1.610   1.630   1.710 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8528  0.7970  0.8360  0.8460  0.8460  0.8560  0.9080 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.907   1.860   1.920   1.930   1.930   1.940   2.000 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    1.92    1.97    1.98    1.98    1.99    2.05 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6325  0.6020  0.6260  0.6320  0.6320  0.6380  0.6550 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.989   1.910   1.970   1.990   1.990   2.000   2.050 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.663   0.634   0.655   0.662   0.662   0.670   0.691 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.907   1.810   1.880   1.890   1.890   1.900   1.950 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8528  0.8050  0.8410  0.8510  0.8510  0.8610  0.9010 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.451   1.410   1.460   1.470   1.470   1.480   1.520 
##            Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.451   1.380   1.410   1.420   1.420   1.430   1.480

Conclusion

We can now generate cost matrices with two methods having each different properties in terms of final mean, variance, row variance, column variance, row correlation and column generation. We have four input parameters : mean, variance and both correlations. Therefore, the row and column variance are given by the chosen method.

The differences between both methods are:

  1. the row correlation cannot be specified with Siegel's method
  2. the row and column variances are distinct
  3. the cost distribution is slightly distinct

Note that in our adaptation of Siegel's method, muR is set to 1 because it simplifies the adaptation. Changing this value would lead to different values for the row variance. However, controlling this property is not expected to be useful.

A final point is that any generated matrix that has negative value should be rejected. Thus, the set of admissible parameters is restricted.