Edit (2015-1-4): fix siegel implementation
Let's import the data and compute the new heterogeneity measures. We have closed formulas for the consistent and inconsistent cases, and we will rely on estimation for the other cases.
literature_range <- read.table("literature_range.txt", na.strings = "", col.names = c("task_range",
"mach_range", "task_hetero", "mach_hetero", "row_consis", "col_consis"))
literature_CV <- read.table("literature_CV.txt", na.strings = "", col.names = c("task_range",
"mach_range", "task_hetero", "mach_hetero", "row_consis", "col_consis",
"mean"))
Let's compute the values for the range-based method.
result <- NULL
for (i in 1:nrow(literature_range)) {
suppressWarnings(param <- sapply(literature_range[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
CV_X <- sqrt(12)/6 * (param["task_range"] - 1)/(param["task_range"] + 1)
CV_Y <- sqrt(12)/6 * (param["mach_range"] - 1)/(param["mach_range"] + 1)
CV_mean_row_v <- CV_X
mean_CV_row_v <- CV_Y
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
CV_mean_col_v <- param["row_consis"] * sqrt(param["col_consis"]) * CV_Y
if (param["row_consis"] == 0) {
mean_CV_col_v <- sqrt(CV_X^2 * CV_Y^2 + CV_X^2 + CV_Y^2)
} else if (param["row_consis"] == 1) {
mean_CV_col_v <- (1 - param["col_consis"]) * sqrt(CV_X^2 * CV_Y^2 +
CV_X^2 + CV_Y^2) + param["col_consis"] * CV_X
} else {
mat <- generate_matrix_siegel_range(1000, 1000, param["task_range"],
param["mach_range"], param["row_consis"], param["col_consis"])
mean_CV_col_v <- mean_CV_col(mat)
}
result <- rbind(result, data.frame(CV_mean_row = CV_mean_row_v, CV_mean_col = CV_mean_col_v,
mean_CV_col = mean_CV_col_v, mean_CV_row = mean_CV_row_v))
}
literature_range <- cbind(literature_range, result)
Let's perform the same computation for the CV-based method.
result <- NULL
for (i in 1:nrow(literature_CV)) {
suppressWarnings(param <- sapply(literature_CV[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
CV_X <- param["task_range"]
CV_Y <- param["mach_range"]
CV_mean_row_v <- CV_X
mean_CV_row_v <- CV_Y
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
CV_mean_col_v <- param["row_consis"] * sqrt(param["col_consis"]) * CV_Y
if (param["row_consis"] == 0) {
mean_CV_col_v <- sqrt(CV_X^2 * CV_Y^2 + CV_X^2 + CV_Y^2)
} else if (param["row_consis"] == 1) {
mean_CV_col_v <- (1 - param["col_consis"]) * sqrt(CV_X^2 * CV_Y^2 +
CV_X^2 + CV_Y^2) + param["col_consis"] * CV_X
} else {
mat <- generate_matrix_siegel_CV(1000, 1000, param["mean"], param["mean"],
param["task_range"], param["mach_range"], param["row_consis"], param["col_consis"])
mean_CV_col_v <- mean_CV_col(mat)
}
result <- rbind(result, data.frame(CV_mean_row = CV_mean_row_v, CV_mean_col = CV_mean_col_v,
mean_CV_col = mean_CV_col_v, mean_CV_row = mean_CV_row_v))
}
literature_CV <- cbind(literature_CV, result)
Let's plot both summaries with both methods.
With the CV of the mean heterogeneity measure:
library(ggplot2)
p <- ggplot(data = literature_range, aes(x = CV_mean_row, y = CV_mean_col, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
With the mean of the CV heterogeneity measure:
library(ggplot2)
p <- ggplot(data = literature_range, aes(x = mean_CV_col, y = mean_CV_row, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(mu ~ V[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(mu ~ V[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
With the CV of the mean heterogeneity measure:
library(ggplot2)
p <- ggplot(data = literature_CV, aes(x = CV_mean_row, y = CV_mean_col, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(V ~ mu[task]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
p <- p + scale_y_continuous(name = expression(V ~ mu[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "med.med", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "medmed", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
22, 4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
## Warning: Removed 2 rows containing missing values (geom_point).
With the mean of the CV heterogeneity measure:
library(ggplot2)
p <- ggplot(data = literature_CV, aes(x = mean_CV_col, y = mean_CV_row, shape = interaction(task_hetero,
mach_hetero), color = interaction(task_hetero, mach_hetero), fill = interaction(task_hetero,
mach_hetero)))
p <- p + geom_point(size = 4, alpha = 1/2)
p <- p + scale_x_continuous(name = expression(mu ~ V[task]), breaks = seq(0,
2, 0.4), limits = extendrange(c(0.05, 2)))
p <- p + scale_y_continuous(name = expression(mu ~ V[mach]), breaks = seq(0,
2, 0.2), limits = extendrange(c(0.05, 1.05)))
breaks <- c("high.high", "low.high", "high.low", "low.low", "med.med", "NA.NA")
labels <- c("hihi", "lohi", "hilo", "lolo", "medmed", "NA")
p <- p + scale_color_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + scale_shape_manual(name = "Heterogeneity", values = c(21, 24, 25, 23,
22, 4), breaks = breaks, labels = labels)
p <- p + scale_fill_discrete(name = "Heterogeneity", breaks = breaks, labels = labels)
p <- p + theme(legend.position = "bottom")
p
## Warning: Removed 1 rows containing missing values (geom_point).
Let's test each case empirically:
result <- NULL
for (i in 1:nrow(literature_range)) {
suppressWarnings(param <- sapply(literature_range[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
mat <- generate_matrix_siegel_range(1000, 1000, param["task_range"], param["mach_range"],
param["row_consis"], param["col_consis"])
CV_mean_row_v <- CV_mean_row(mat)
CV_mean_col_v <- CV_mean_col(mat)
mean_CV_col_v <- mean_CV_col(mat)
mean_CV_row_v <- mean_CV_row(mat)
result <- c(result, max(abs(CV_mean_row_v - literature_range[i, "CV_mean_row"]),
abs(CV_mean_col_v - literature_range[i, "CV_mean_col"]), abs(mean_CV_col_v -
literature_range[i, "mean_CV_col"]), abs(mean_CV_row_v - literature_range[i,
"mean_CV_row"])))
}
summary(result)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00231 0.01180 0.01710 0.01610 0.02100 0.02560
Not bad.
result <- NULL
for (i in 1:nrow(literature_CV)) {
suppressWarnings(param <- sapply(literature_CV[i, ], function(x) if (is.factor(x[[1]]))
as.numeric(levels(x[[1]]))[x[[1]]] else x))
if (is.na(param["row_consis"]) && is.na(param["col_consis"])) {
param["row_consis"] <- 0
param["col_consis"] <- 0
}
if (is.na(param["mean"]))
param["mean"] <- 1
mat <- generate_matrix_siegel_CV(1000, 1000, param["mean"], param["mean"],
param["task_range"], param["mach_range"], param["row_consis"], param["col_consis"])
CV_mean_row_v <- CV_mean_row(mat)
CV_mean_col_v <- CV_mean_col(mat)
mean_CV_col_v <- mean_CV_col(mat)
mean_CV_row_v <- mean_CV_row(mat)
result <- c(result, max(abs(CV_mean_row_v - literature_CV[i, "CV_mean_row"]),
abs(CV_mean_col_v - literature_CV[i, "CV_mean_col"]), abs(mean_CV_col_v -
literature_CV[i, "mean_CV_col"]), abs(mean_CV_row_v - literature_CV[i,
"mean_CV_row"])))
}
summary(result)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00082 0.00606 0.01370 0.01760 0.02370 0.08950
It is good.