Data Generation for CCPE (April 29, 2014)

The previous simulations were actually biased.

rdistd <- function(n, mean = 1, sigma = cov) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    shape <- mean^2/sigma^2
    rate <- mean/sigma^2
    return(rgamma(n, shape, rate))
}
iterations <- 10000
V_cov <- vector(length = iterations)
V_perf <- vector(length = iterations)
# Biased method
for (i in 1:iterations) {
    set.seed(i)
    V_cov[i] <- exp(runif(1, log(0.01), log(10)))
    set.seed(i)
    V_perf[i] <- rdistd(1, 1, V_cov[i])
}
plot(V_cov, V_perf, log = "x", cex = 0.1, ylim = c(0, 5))

plot of chunk bias


# Unbiased method
for (i in 1:iterations) {
    set.seed(i)
    V_cov[i] <- exp(runif(1, log(0.01), log(10)))
    V_perf[i] <- rdistd(1, 1, V_cov[i])
}
plot(V_cov, V_perf, log = "x", cex = 0.1, ylim = c(0, 5))

plot of chunk bias

Average study with ribbons

source("function_fix.R")
dir.create("CCPE-data", showWarnings = FALSE)

Let's avoid the bias while being conservative:

n <- 64
iterations <- 1e+06

V_cov <- vector(length = iterations)
V_perf <- vector(length = 4 * iterations)
for (i in 1:iterations) {
    rdistd <- function(n, mean = 1, sigma = cov) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = 0, sigma = 0) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i)
    cov <- exp(runif(1, log(0.01), log(10)))
    bino <- static_order_MC(tree_build(n, 1, 0), rdistd, rdistc, 1)
    set.seed(i)
    cov <- exp(runif(1, log(0.01), log(10)))
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, 1)
    set.seed(i)
    cov <- exp(runif(1, log(0.01), log(10)))
    dyn <- dyn_MC(n, rdistd, rdistc, 1)
    set.seed(i)
    cov <- exp(runif(1, log(0.01), log(10)))
    dyn_com <- dyn_com_MC(n, rdistd, rdistc, 1)

    V_cov[i] <- cov
    V_perf[1:4 + (i - 1) * 4] <- c(bino, fibo, dyn, dyn_com)
}
V_cov <- rep(V_cov, each = 4)
V_method <- rep(factor(c("bino", "fibo", "dyn", "dyn_com")), times = iterations)
V_gamma <- data.frame(cov = V_cov, method = V_method, perf = V_perf)
write.csv(V_gamma, "CCPE-data/cost-cons.csv", row.names = FALSE)

Let's try to be less conservative:

n <- 64
iterations <- 1e+06

V_cov <- vector(length = iterations)
V_perf <- vector(length = 4 * iterations)
for (i in 1:iterations) {
    rdistd <- function(n, mean = 1, sigma = cov) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = 0, sigma = 0) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i)
    cov <- exp(runif(1, log(0.01), log(10)))
    bino <- static_order_MC(tree_build(n, 1, 0), rdistd, rdistc, 1)
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, 1)
    dyn <- dyn_MC(n, rdistd, rdistc, 1)
    dyn_com <- dyn_com_MC(n, rdistd, rdistc, 1)

    V_cov[i] <- cov
    V_perf[1:4 + (i - 1) * 4] <- c(bino, fibo, dyn, dyn_com)
}
V_cov <- rep(V_cov, each = 4)
V_method <- rep(factor(c("bino", "fibo", "dyn", "dyn_com")), times = iterations)
V_gamma <- data.frame(cov = V_cov, method = V_method, perf = V_perf)
write.csv(V_gamma, "CCPE-data/cost-agg.csv", row.names = FALSE)

Considering non-negligible computations

Let's save the result of the previous analysis in the same way:

n <- 64
iterations <- 1000
nb_point <- 30 + 1  # odd because of symmetry
cd <- exp(seq(log(0.1), log(10), length.out = nb_point))
cov <- exp(seq(log(0.01), log(10), length.out = nb_point))

V_over <- data.frame()
for (i in 1:length(cd)) for (j in 1:length(cov)) {
    rdistd <- function(n, mean = 1, sigma = cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = cd[i], sigma = cd[i] * cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i + j * length(cov))
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
    set.seed(i + j * length(cov))
    dyn <- dyn_MC(n, rdistd, rdistc, iterations)

    V_over <- rbind(V_over, data.frame(cd = cd[i], cov = cov[j], perf = mean(fibo)/mean(dyn), 
        perf_old = median(fibo/dyn)))
}
write.csv(V_over, "CCPE-data/over-cons.csv", row.names = FALSE)
n <- 64
iterations <- 1000
nb_point <- 30 + 1  # odd because of symmetry
cd <- exp(seq(log(0.1), log(10), length.out = nb_point))
cov <- exp(seq(log(0.01), log(10), length.out = nb_point))

V_over <- data.frame()
for (i in 1:length(cd)) for (j in 1:length(cov)) {
    rdistd <- function(n, mean = 1, sigma = cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = cd[i], sigma = cd[i] * cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i + j * length(cov))
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
    dyn <- dyn_MC(n, rdistd, rdistc, iterations)

    V_over <- rbind(V_over, data.frame(cd = cd[i], cov = cov[j], perf = mean(fibo)/mean(dyn), 
        perf_old = median(fibo/dyn)))
}
write.csv(V_over, "CCPE-data/over-agg.csv", row.names = FALSE)

Non-commutative operations

n <- 64
iterations <- 1000
nb_point <- 30 + 1  # odd because of symmetry
cd <- exp(seq(log(0.1), log(10), length.out = nb_point))
cov <- exp(seq(log(0.01), log(10), length.out = nb_point))

V_non_comm <- data.frame()
for (i in 1:length(cd)) for (j in 1:length(cov)) {
    rdistd <- function(n, mean = 1, sigma = cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = cd[i], sigma = cd[i] * cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i + j * length(cov))
    bino <- static_order_MC(tree_build(n, 1, 0), rdistd, rdistc, iterations)
    set.seed(i + j * length(cov))
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
    set.seed(i + j * length(cov))
    dyn_com <- dyn_com_MC(n, rdistd, rdistc, iterations)

    # Comparing performance item-wise makes sense as the random costs are the
    # same across all methods
    b1 <- length(which(bino < fibo & bino < dyn_com))
    b2 <- length(which(fibo < bino & fibo < dyn_com))
    b3 <- length(which(dyn_com < bino & dyn_com < fibo))
    best_old <- which.max(c(b1, b2, b3))
    best_old <- factor(c("bino", "fibo", "dyn_com")[best_old])
    # Ultimately, the mean could be sufficient
    best <- which.min(c(mean(bino), mean(fibo), mean(dyn_com)))
    best <- factor(c("bino", "fibo", "dyn_com")[best])

    V_non_comm <- rbind(V_non_comm, data.frame(cd = cd[i], cov = cov[j], best = best, 
        best_old = best_old))
}
write.csv(V_non_comm, "CCPE-data/nc-cons.csv", row.names = FALSE)
n <- 64
iterations <- 1000
nb_point <- 30 + 1  # odd because of symmetry
cd <- exp(seq(log(0.1), log(10), length.out = nb_point))
cov <- exp(seq(log(0.01), log(10), length.out = nb_point))

V_non_comm <- data.frame()
for (i in 1:length(cd)) for (j in 1:length(cov)) {
    rdistd <- function(n, mean = 1, sigma = cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }
    rdistc <- function(n, mean = cd[i], sigma = cd[i] * cov[j]) {
        if (mean == 0 || sigma == 0) 
            return(rep(mean, n))
        shape <- mean^2/sigma^2
        rate <- mean/sigma^2
        return(rgamma(n, shape, rate))
    }

    set.seed(i + j * length(cov))
    bino <- static_order_MC(tree_build(n, 1, 0), rdistd, rdistc, iterations)
    fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
    dyn_com <- dyn_com_MC(n, rdistd, rdistc, iterations)

    # Comparing performance item-wise makes sense as the random costs are the
    # same across all methods
    b1 <- length(which(bino < fibo & bino < dyn_com))
    b2 <- length(which(fibo < bino & fibo < dyn_com))
    b3 <- length(which(dyn_com < bino & dyn_com < fibo))
    best_old <- which.max(c(b1, b2, b3))
    best_old <- factor(c("bino", "fibo", "dyn_com")[best_old])
    # Ultimately, the mean could be sufficient
    best <- which.min(c(mean(bino), mean(fibo), mean(dyn_com)))
    best <- factor(c("bino", "fibo", "dyn_com")[best])

    V_non_comm <- rbind(V_non_comm, data.frame(cd = cd[i], cov = cov[j], best = best, 
        best_old = best_old))
}
write.csv(V_non_comm, "CCPE-data/nc-agg.csv", row.names = FALSE)

Extension to other distributions

rbern_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    cov <- sigma/mean
    p <- 1/(cov^2 + 1)
    M <- mean/p
    return(rbinom(n, 1, p) * M)
}

rbino_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    cov <- sigma/mean
    size <- 100
    p <- 1/(1 + cov^2 * size)
    M <- mean/(size * p)
    return(rbinom(n, size, p) * M)
}

rexp_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    cov <- sigma/mean
    lambda <- 1/(mean * cov)
    m <- mean * (1 - cov)
    return(rexp(n, lambda) + m)
}

rbeta_cost <- function(n, mean, sigma, alpha) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    cov <- sigma/mean
    M <- (cov^2 + 1) * mean/(1 - alpha * cov^2)
    beta <- (M/mean - 1) * alpha
    return(rbeta(n, alpha, beta) * M)
}

rgamma_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    shape <- mean^2/sigma^2
    rate <- mean/sigma^2
    return(rgamma(n, shape, rate))
}

rpois_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    lambda <- sigma^2
    m <- mean - lambda
    return(rpois(n, lambda) + m)
}

runif_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    min <- mean - sigma * sqrt(12)/2
    max <- mean + sigma * sqrt(12)/2
    return(runif(n, min, max))
}

library(triangle)
rtriangle_cost <- function(n, mean, sigma) {
    if (mean == 0 || sigma == 0) 
        return(rep(mean, n))
    a <- mean - sqrt(6) * sigma
    b <- mean + sqrt(6) * sigma
    return(rtriangle(n, a, b))
}
get_all_makespans_over <- function(iterations, n, rdist, nb_pointx, nb_pointy = nb_pointx, 
    cov_min = 0.01, cov_max = 10) {
    cd <- exp(seq(log(0.1), log(10), length.out = nb_pointy))
    cov <- exp(seq(log(0.01), log(10), length.out = nb_pointx))
    cov[nb_pointx] <- 10
    V_over <- data.frame()
    for (i in 1:length(cd)) for (j in 1:length(cov)) {
        if (cov[j] < cov_min || cov[j] > cov_max) 
            next
        rdistd <- function(n) return(rdist(n, 1, cov[j]))
        rdistc <- function(n) return(rdist(n, cd[i], cd[i] * cov[j]))
        set.seed(i + j * length(cov))
        fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
        set.seed(i + j * length(cov))
        dyn <- dyn_MC(n, rdistd, rdistc, iterations)
        V_over <- rbind(V_over, data.frame(cd = cd[i], cov = cov[j], perf = mean(fibo)/mean(dyn), 
            perf_old = median(fibo/dyn)))
    }
    return(V_over)
}

n <- 64
iterations <- 1000
nb_point <- 31  # odd because of symmetry
V_over_gamma <- get_all_makespans_over(iterations, n, rgamma_cost, nb_point)
V_over_bern <- get_all_makespans_over(iterations, n, rbern_cost, nb_point)
V_over_bino <- get_all_makespans_over(iterations, n, rbino_cost, nb_point)
V_over_exp <- get_all_makespans_over(iterations, n, rexp_cost, nb_point)
V_over_pois <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rpois_cost(n, 
    mean, sigma)), nb_point, cov_max = 1)
V_over_unif <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(runif_cost(n, 
    mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_over_triangle <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rtriangle_cost(n, 
    mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_over_beta0_01 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_over_beta1 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 1)), nb_point, cov_max = 0.999)
V_over_beta100 <- get_all_makespans_over(iterations, n, function(n, mean, sigma) return(rbeta_cost(n, 
    mean, sigma, 100)), nb_point, cov_max = 0.0999)

V_over_gamma_m <- V_over_gamma[, 1:3]
colnames(V_over_gamma_m)[3] <- "gamma"
V_over_m <- V_over_gamma_m
V_over_bern_m <- V_over_bern[, 1:3]
colnames(V_over_bern_m)[3] <- "bern"
V_over_m <- merge(V_over_m, V_over_bern_m, by = c("cd", "cov"), all = TRUE)
V_over_bino_m <- V_over_bino[, 1:3]
colnames(V_over_bino_m)[3] <- "bino"
V_over_m <- merge(V_over_m, V_over_bino_m, by = c("cd", "cov"), all = TRUE)
V_over_exp_m <- V_over_exp[, 1:3]
colnames(V_over_exp_m)[3] <- "exp"
V_over_m <- merge(V_over_m, V_over_exp_m, by = c("cd", "cov"), all = TRUE)
V_over_pois_m <- V_over_pois[, 1:3]
colnames(V_over_pois_m)[3] <- "pois"
V_over_m <- merge(V_over_m, V_over_pois_m, by = c("cd", "cov"), all = TRUE)
V_over_unif_m <- V_over_unif[, 1:3]
colnames(V_over_unif_m)[3] <- "unif"
V_over_m <- merge(V_over_m, V_over_unif_m, by = c("cd", "cov"), all = TRUE)
V_over_triangle_m <- V_over_triangle[, 1:3]
colnames(V_over_triangle_m)[3] <- "triangle"
V_over_m <- merge(V_over_m, V_over_triangle_m, by = c("cd", "cov"), all = TRUE)
V_over_beta0_01_m <- V_over_beta0_01[, 1:3]
colnames(V_over_beta0_01_m)[3] <- "beta0_01"
V_over_m <- merge(V_over_m, V_over_beta0_01_m, by = c("cd", "cov"), all = TRUE)
V_over_beta1_m <- V_over_beta1[, 1:3]
colnames(V_over_beta1_m)[3] <- "beta1"
V_over_m <- merge(V_over_m, V_over_beta1_m, by = c("cd", "cov"), all = TRUE)
V_over_beta100_m <- V_over_beta100[, 1:3]
colnames(V_over_beta100_m)[3] <- "beta100"
V_over_m <- merge(V_over_m, V_over_beta100_m, by = c("cd", "cov"), all = TRUE)

write.csv(V_over_m, "CCPE-data/merge-over.csv", row.names = FALSE)
get_all_makespans_non_comm <- function(iterations, n, rdist, nb_pointx, nb_pointy = nb_pointx, 
    cov_min = 0.01, cov_max = 10) {
    cd <- exp(seq(log(0.1), log(10), length.out = nb_pointy))
    cov <- exp(seq(log(0.01), log(10), length.out = nb_pointx))
    cov[nb_pointx] <- 10

    V_non_comm <- data.frame()
    for (i in 1:length(cd)) for (j in 1:length(cov)) {
        if (cov[j] < cov_min || cov[j] > cov_max) 
            next
        rdistd <- function(n) return(rdist(n, 1, cov[j]))
        rdistc <- function(n) return(rdist(n, cd[i], cd[i] * cov[j]))

        set.seed(i + j * length(cov))
        bino <- static_order_MC(tree_build(n, 1, 0), rdistd, rdistc, iterations)
        set.seed(i + j * length(cov))
        fibo <- static_order_MC(tree_build(n, 1, 1), rdistd, rdistc, iterations)
        set.seed(i + j * length(cov))
        dyn_com <- dyn_com_MC(n, rdistd, rdistc, iterations)

        # Comparing performance item-wise makes sense as the random costs are the
        # same across all methods
        b1 <- length(which(bino < fibo & bino < dyn_com))
        b2 <- length(which(fibo < bino & fibo < dyn_com))
        b3 <- length(which(dyn_com < bino & dyn_com < fibo))
        best_old <- which.max(c(b1, b2, b3))
        best_old <- factor(c("bino", "fibo", "dyn_com")[best_old])
        # Ultimately, the mean could be sufficient
        best <- which.min(c(mean(bino), mean(fibo), mean(dyn_com)))
        best <- factor(c("bino", "fibo", "dyn_com")[best])

        V_non_comm <- rbind(V_non_comm, data.frame(cd = cd[i], cov = cov[j], 
            best = best, best_old = best_old))
    }
    return(V_non_comm)
}

n <- 64
iterations <- 1000
nb_point <- 31  # odd because of symmetry
V_non_comm_gamma <- get_all_makespans_non_comm(iterations, n, rgamma_cost, nb_point)
V_non_comm_bern <- get_all_makespans_non_comm(iterations, n, rbern_cost, nb_point)
V_non_comm_bino <- get_all_makespans_non_comm(iterations, n, rbino_cost, nb_point)
V_non_comm_exp <- get_all_makespans_non_comm(iterations, n, rexp_cost, nb_point)
V_non_comm_pois <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(rpois_cost(n, mean, sigma)), nb_point, cov_max = 1)
V_non_comm_unif <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(runif_cost(n, mean, sigma)), nb_point, cov_max = 2/sqrt(12))
V_non_comm_triangle <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rtriangle_cost(n, mean, sigma)), nb_point, cov_max = 1/sqrt(6))
V_non_comm_beta0_01 <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rbeta_cost(n, mean, sigma, 0.01)), nb_point, cov_max = 9.99)
V_non_comm_beta1 <- get_all_makespans_non_comm(iterations, n, function(n, mean, 
    sigma) return(rbeta_cost(n, mean, sigma, 1)), nb_point, cov_max = 0.999)
V_non_comm_beta100 <- get_all_makespans_non_comm(iterations, n, function(n, 
    mean, sigma) return(rbeta_cost(n, mean, sigma, 100)), nb_point, cov_max = 0.0999)

V_non_comm_gamma_m <- V_non_comm_gamma[, 1:3]
colnames(V_non_comm_gamma_m)[3] <- "gamma"
V_non_comm_m <- V_non_comm_gamma_m
V_non_comm_bern_m <- V_non_comm_bern[, 1:3]
colnames(V_non_comm_bern_m)[3] <- "bern"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bern_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_bino_m <- V_non_comm_bino[, 1:3]
colnames(V_non_comm_bino_m)[3] <- "bino"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_bino_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_exp_m <- V_non_comm_exp[, 1:3]
colnames(V_non_comm_exp_m)[3] <- "exp"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_exp_m, by = c("cd", "cov"), all = TRUE)
V_non_comm_pois_m <- V_non_comm_pois[, 1:3]
colnames(V_non_comm_pois_m)[3] <- "pois"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_pois_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_unif_m <- V_non_comm_unif[, 1:3]
colnames(V_non_comm_unif_m)[3] <- "unif"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_unif_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_triangle_m <- V_non_comm_triangle[, 1:3]
colnames(V_non_comm_triangle_m)[3] <- "triangle"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_triangle_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta0_01_m <- V_non_comm_beta0_01[, 1:3]
colnames(V_non_comm_beta0_01_m)[3] <- "beta0_01"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta0_01_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta1_m <- V_non_comm_beta1[, 1:3]
colnames(V_non_comm_beta1_m)[3] <- "beta1"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta1_m, by = c("cd", "cov"), 
    all = TRUE)
V_non_comm_beta100_m <- V_non_comm_beta100[, 1:3]
colnames(V_non_comm_beta100_m)[3] <- "beta100"
V_non_comm_m <- merge(V_non_comm_m, V_non_comm_beta100_m, by = c("cd", "cov"), 
    all = TRUE)

write.csv(V_over_m, "CCPE-data/merge-nc.csv", row.names = FALSE)