Let’s show how the values are distributed in a vector that is uniformly drawn among all possible vectors.

N <- 100
n <- 10
k <- 1e5L
min_max <- data.frame(min = c(0, 0, 5), max = c(100, 15, 15))
data <- NULL
for (i in 1:nrow(min_max)) {
  values <- python.call(py.foo = "Genererkalphabeta", n = N, s = n,
                        alpha = min_max$min[i], beta = min_max$max[i], k = k,
                        graine = i)
  data <- rbind(data, cbind(as.data.frame(table(unlist(values))),
                            min = min_max$min[i], max = min_max$max[i]))
}

data <- data %>%
  mutate(constraint = factor(interaction(min, max))) %>%
  group_by(constraint) %>%
  mutate(Freq_norm = Freq / max(Freq))

labels <- levels(data$constraint) %>%
  str_extract_all("[0-9]+") %>%
  transpose() %>%
  pmap(~ paste("$\\alpha=", .x, "$\n$\\beta=", .y, "$", sep = "")) %>%
  unlist()

data$constraint <- factor(data$constraint, labels = labels)

g <- ggplot(data, aes(x = as.numeric(Var1), y = Freq_norm, fill = constraint)) +
  geom_histogram(data = subset(data, data$constraint == labels[1]),
                 stat = "identity", alpha = 0.5, color = "black") +
  geom_histogram(data = subset(data, data$constraint == labels[2]),
                 stat = "identity", alpha = 0.5, color = "black") +
  geom_histogram(data = subset(data, data$constraint == labels[3]),
                 stat = "identity", alpha = 0.5, color = "black") +
  scale_fill_hue(name = "Constraints", l = c(100, 80, 10)) +
  scale_x_continuous(name = "Value") +
  scale_y_continuous(name = "Normalized frequency") +
  coord_cartesian(xlim = c(0, 30)) +
  theme(legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 1, override.aes = list(alpha = 0.25)))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

## Warning: Ignoring unknown parameters: binwidth, bins, pad

## Warning: Ignoring unknown parameters: binwidth, bins, pad
print(g)

library(tikzDevice)
tikz(paste(prefix_path, "hist.tex", sep = "/"), width = 3.5, height = 2.5)
print(g)
dev.off()
## png 
##   2

This plot shows that the distribution have different shape depending on the constraints.

Let’s focus on the variance now for all combinations of min and max.

N <- 100
n <- 10
k <- 1e4L
min_max <- expand.grid(min = 0:(N / n - 1),
                       max = 10 ** seq(from = log10(11.01), to = log10(100),
                                       length.out = 10))
data <- NULL
for (i in 1:nrow(min_max)) {
  values <- python.call(py.foo = "Genererkalphabeta", n = N, s = n,
                        alpha = min_max$min[i], beta = floor(min_max$max[i]),
                        k = k, graine = i)
  data <- rbind(data, data.frame(var = mean(sapply(values, var)),
                                 min = min_max$min[i], max = min_max$max[i]))
}

Let’s use a heatmap to show the standard deviation:

g <- data %>%
  ggplot(aes(x = min, y = max, fill = sqrt(var), z = sqrt(var))) +
  geom_tile() +
  scale_fill_distiller(name = "Std dev", palette = "YlGnBu", guide = "colourbar") +
  scale_x_continuous(name = "min ($\\alpha$)") +
  scale_y_log10(name = "max ($\\beta$)") +
  stat_contour(color = "black", breaks = seq(0, 10, 2.5)) +
  annotation_logticks(sides = "l")
print(g)

library(tikzDevice)
tikz(paste(prefix_path, "heat.tex", sep = "/"), width = 3.5, height = 2.5)
print(g)
dev.off()
## png 
##   2

Constraining the values limits the standard deviation. The maximum variance is achieved when there is no constraint and is close to what one can obtain with an exponential distribution with mean \(N/n\).

## R version 3.4.3 (2017-11-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.4 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
##  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
##  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tikzDevice_0.10-1 bindrcpp_0.2      rPython_0.0-6     RJSONIO_1.3-0    
## [5] ggplot2_2.2.1     dplyr_0.7.4       tidyr_0.2.0       purrr_0.2.4      
## [9] stringr_1.2.0    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.10       knitr_1.15.1       bindr_0.1         
##  [4] magrittr_1.5       munsell_0.4        colorspace_1.2-2  
##  [7] R6_2.1.0           rlang_0.1.6        filehash_2.4-1    
## [10] plyr_1.8.1         tools_3.4.3        grid_3.4.3        
## [13] gtable_0.1.2       pacman_0.4.6       png_0.1-7         
## [16] htmltools_0.3.6    lazyeval_0.2.0     yaml_2.1.13       
## [19] rprojroot_1.2      digest_0.6.3       assertthat_0.1    
## [22] tibble_1.3.1       RColorBrewer_1.0-5 glue_1.2.0        
## [25] evaluate_0.10      rmarkdown_1.8      labeling_0.1      
## [28] stringi_0.5-5      compiler_3.4.3     scales_0.4.1      
## [31] backports_1.0.5    pkgconfig_2.0.1