## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

We aims at confirming the conjecture on the correlation convergence of two sorted random vectors.

Let’s generate the data for several distributions:

MC <- 1e3
correlations <- NULL
for (dist in c("runif", "rnorm", "rlogis", "rgamma", "rexp", "rlnorm")) {
  if (dist == "rgamma")
    dist_funct <- function(n) { rgamma(n, shape = 1) }
  else
    dist_funct <- get(dist)
  
  cor_meas <- matrix(NA, nrow = MC, ncol = 2)
  colnames(cor_meas) <- c("n", "cor")
  for (i in 1:MC) {
    n <- as.integer(10^runif(1, min=1, max=5))
    cor_meas[i,1] <- n
    cor_meas[i,2] <- cor(sort(dist_funct(n)), sort(dist_funct(n)))
  }
  correlations <- rbind(correlations, data.frame(cor_meas, dist))
}
levels(correlations$dist) <- c("Uniform", "Normal", "Logistic",
                               "Gamma", "Exponential", "Log normal")
ggplot(correlations, aes(x=n, y=1-cor)) +
  geom_point(alpha=0.3) +
  stat_smooth() +
  scale_x_log10("Vector size") +
  scale_y_log10("1 - correlation") +
  facet_wrap(~ dist, nrow = 1)
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

This supports the proposed conjecture.