3. Robust and High-Dimensional Correlation

Scope

This vignette covers the estimators used when ordinary wide-data correlation is not enough. The package provides two broad extensions:

The relevant functions are:

Robust correlation

Outliers can distort ordinary correlation severely, especially in moderate sample sizes. The robust estimators in matrixCorr are designed to limit that effect, but they do so in different ways.

library(matrixCorr)

set.seed(20)
Y <- data.frame(
  x1 = rnorm(60),
  x2 = rnorm(60),
  x3 = rnorm(60),
  x4 = rnorm(60)
)

idx <- sample.int(nrow(Y), 5)
Y$x1[idx] <- Y$x1[idx] + 8
Y$x2[idx] <- Y$x2[idx] - 6

R_pear <- pearson_corr(Y)
R_bicor <- bicor(Y)
R_pb <- pbcor(Y)
R_win <- wincor(Y)
R_skip <- skipped_corr(Y)

summary(R_pear)
#> Pearson correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.8552 to 0.1514
#> 
#>  item1 item2 estimate
#>  x1    x2    -0.8552 
#>  x3    x4     0.1514 
#>  x1    x3     0.1058 
#>  x1    x4    -0.0400 
#>  x2    x3    -0.0327 
#>  x2    x4     0.0050
summary(R_bicor)
#> Biweight mid-correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.2738 to 0.1699
#> 
#>  item1 item2 estimate
#>  x1    x2    -0.2738 
#>  x3    x4     0.1699 
#>  x1    x4    -0.1324 
#>  x2    x3     0.1279 
#>  x2    x4     0.1062 
#>  x1    x3    -0.0407

In practical terms:

That last property is useful, but it also makes skipped_corr() the most computationally expensive estimator in this group.

summary(R_skip)
#> Skipped correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.2896 to 0.2986
#>   skipped_n   : 2 to 5
#>   skipped_prop: 0.0333 to 0.0833
#> 
#>  item1 item2 estimate n  p_value
#>  x3    x4     0.2986  60 0.0200 
#>  x1    x2    -0.2896  60 0.0244 
#>  x2    x3     0.1829  60 0.1626 
#>  x1    x4    -0.1651  60 0.2083 
#>  x2    x4     0.0834  60 0.5281 
#>  x1    x3    -0.0544  60 0.6811

Inference for robust estimators

Inference is not uniform across the robust estimators, so it is worth stating the current scope explicitly.

fit_bicor_ci <- bicor(Y, ci = TRUE)
summary(fit_bicor_ci)
#> Biweight mid-correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.2738 to 0.1699
#>   inference   : large_sample_bicor
#>   ci          : 95%
#>   ci_method   : fisher_z_bicor
#>   ci_width    : 0.472 to 0.507
#>   cross_zero  : 5 pair(s)
#> 
#>  item1 item2 estimate n  95% CI             p_value
#>  x1    x2    -0.2738  60 [-0.4934, -0.0213] 0.0343 
#>  x3    x4     0.1699  60 [-0.0878, 0.4063]  0.1942 
#>  x1    x4    -0.1324  60 [-0.3738, 0.1257]  0.3132 
#>  x2    x3     0.1279  60 [-0.1302, 0.3699]  0.3299 
#>  x2    x4     0.1062  60 [-0.1518, 0.3507]  0.4192 
#>  x1    x3    -0.0407  60 [-0.2916, 0.2155]  0.7576
estimate(fit_bicor_ci)
#>            x1         x2         x3         x4
#> x1  1.0000000 -0.2737753 -0.0406870 -0.1324173
#> x2 -0.2737753  1.0000000  0.1279485  0.1062372
#> x3 -0.0406870  0.1279485  1.0000000  0.1699477
#> x4 -0.1324173  0.1062372  0.1699477  1.0000000
confint(fit_bicor_ci)
#>   item1 item2         lwr         upr
#> 1    x1    x2 -0.49339972 -0.02133372
#> 2    x1    x3 -0.29159906  0.21546375
#> 3    x2    x3 -0.13020648  0.36985685
#> 4    x1    x4 -0.37377417  0.12573516
#> 5    x2    x4 -0.15178200  0.35070130
#> 6    x3    x4 -0.08776438  0.40633735
ci(fit_bicor_ci)
#> $est
#>            x1         x2         x3         x4
#> x1  1.0000000 -0.2737753 -0.0406870 -0.1324173
#> x2 -0.2737753  1.0000000  0.1279485  0.1062372
#> x3 -0.0406870  0.1279485  1.0000000  0.1699477
#> x4 -0.1324173  0.1062372  0.1699477  1.0000000
#> 
#> $lwr.ci
#>            x1         x2          x3          x4
#> x1  1.0000000 -0.4933997 -0.29159906 -0.37377417
#> x2 -0.4933997  1.0000000 -0.13020648 -0.15178200
#> x3 -0.2915991 -0.1302065  1.00000000 -0.08776438
#> x4 -0.3737742 -0.1517820 -0.08776438  1.00000000
#> 
#> $upr.ci
#>             x1          x2        x3        x4
#> x1  1.00000000 -0.02133372 0.2154638 0.1257352
#> x2 -0.02133372  1.00000000 0.3698569 0.3507013
#> x3  0.21546375  0.36985685 1.0000000 0.4063373
#> x4  0.12573516  0.35070130 0.4063373 1.0000000
#> 
#> $conf.level
#> [1] 0.95
#> 
#> $ci.method
#> [1] "fisher_z_bicor"
tidy(fit_bicor_ci)
#>   item1 item2   estimate         lwr         upr statistic df   p_value
#> 1    x1    x2 -0.2737753 -0.49339972 -0.02133372 2.1678359 58 0.0342880
#> 2    x1    x3 -0.0406870 -0.29159906  0.21546375 0.3101197 58 0.7575815
#> 3    x2    x3  0.1279485 -0.13020648  0.36985685 0.9825024 58 0.3299332
#> 4    x1    x4 -0.1324173 -0.37377417  0.12573516 1.0174191 58 0.3131795
#> 5    x2    x4  0.1062372 -0.15178200  0.35070130 0.8136833 58 0.4191544
#> 6    x3    x4  0.1699477 -0.08776438  0.40633735 1.3133888 58 0.1942241

fit_pb_inf <- pbcor(Y, ci = TRUE, p_value = TRUE, n_boot = 200, seed = 1)
summary(fit_pb_inf)
#> Percentage bend correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.4218 to 0.1735
#>   inference   : percentage_bend_t_test
#>   ci          : 95%
#>   ci_method   : percentile_bootstrap
#>   ci_width    : 0.461 to 0.554
#>   cross_zero  : 5 pair(s)
#> 
#>  item1 item2 estimate n  95% CI             p_value
#>  x1    x2    -0.4218  60 [-0.6878, -0.1340] 0.0008 
#>  x3    x4     0.1735  60 [-0.1188, 0.3812]  0.1849 
#>  x2    x3     0.0967  60 [-0.1530, 0.3448]  0.4625 
#>  x2    x4     0.0919  60 [-0.1571, 0.3043]  0.4848 
#>  x1    x4    -0.0720  60 [-0.3303, 0.1495]  0.5845 
#>  x1    x3     0.0105  60 [-0.2447, 0.2603]  0.9365

fit_win_inf <- wincor(Y, ci = TRUE, p_value = TRUE, n_boot = 200, seed = 1)
summary(fit_win_inf)
#> Winsorized correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.3578 to 0.1682
#>   inference   : winsorized_t_test
#>   ci          : 95%
#>   ci_method   : percentile_bootstrap
#>   ci_width    : 0.486 to 0.601
#>   cross_zero  : 5 pair(s)
#> 
#>  item1 item2 estimate n  95% CI             p_value
#>  x1    x2    -0.3578  60 [-0.6771, -0.0757] 0.0062 
#>  x3    x4     0.1682  60 [-0.1069, 0.4246]  0.2026 
#>  x2    x4     0.1032  60 [-0.1522, 0.3343]  0.4350 
#>  x2    x3     0.0849  60 [-0.1747, 0.3673]  0.5209 
#>  x1    x3     0.0095  60 [-0.2634, 0.2731]  0.9427 
#>  x1    x4    -0.0048  60 [-0.3033, 0.2185]  0.9711

fit_skip_inf <- skipped_corr(Y, ci = TRUE, p_value = TRUE, n_boot = 200, seed = 1)
summary(fit_skip_inf)
#> Skipped correlation summary
#>   output      : matrix
#>   dimensions  : 4 x 4
#>   retained_pairs: 6
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.2896 to 0.2986
#>   skipped_n   : 2 to 5
#>   skipped_prop: 0.0333 to 0.0833
#>   inference   : bootstrap_b2
#>   ci          : 95%
#>   ci_width    : 0.473 to 0.718
#>   cross_zero  : 6 pair(s)
#> 
#>  item1 item2 estimate n  95% CI            p_value
#>  x3    x4     0.2986  60 [-0.1183, 0.5996] 0.2900 
#>  x1    x2    -0.2896  60 [-0.4994, 0.0898] 0.0900 
#>  x2    x3     0.1829  60 [-0.1003, 0.4052] 0.2400 
#>  x1    x4    -0.1651  60 [-0.3979, 0.2096] 0.3600 
#>  x2    x4     0.0834  60 [-0.1242, 0.3487] 0.4000 
#>  x1    x3    -0.0544  60 [-0.3124, 0.2238] 0.6300

For pbcor(), wincor(), and skipped_corr(), the inferential layer is more computationally demanding than the estimate-only path because it is built from bootstrap resampling. The skipped-correlation route remains the most expensive of the three because each bootstrap sample also repeats the pairwise outlier screening step. The vignette uses n_boot = 200 only to keep runtime modest; substantive analyses usually need more resamples.

Partial correlation

Partial correlation addresses a different target. Instead of summarising raw association, it estimates conditional association after accounting for the remaining variables.

set.seed(21)
n <- 300
x2 <- rnorm(n)
x1 <- 0.8 * x2 + rnorm(n, sd = 0.4)
x3 <- 0.8 * x2 + rnorm(n, sd = 0.4)
x4 <- 0.7 * x3 + rnorm(n, sd = 0.5)
x5 <- rnorm(n)
x6 <- rnorm(n)
X <- data.frame(x1, x2, x3, x4, x5, x6)

fit_pcor_sample <- pcorr(X, method = "sample")
fit_pcor_oas <- pcorr(X, method = "oas")
R_raw <- pearson_corr(X)

round(c(
  raw_x1_x3 = R_raw["x1", "x3"],
  partial_x1_x3 = estimate(fit_pcor_sample)["x1", "x3"]
), 2)
#>     raw_x1_x3 partial_x1_x3 
#>          0.77         -0.07

print(fit_pcor_sample, digits = 2)
#>        x1     x2     x3      x4     x5      x6
#> x1  1.000  0.645 -0.070  0.0705  0.071 -0.0759
#> x2  0.645  1.000  0.627  0.0390 -0.028  0.0646
#> x3 -0.070  0.627  1.000  0.4145  0.003  0.0169
#> x4  0.070  0.039  0.415  1.0000 -0.046  0.0094
#> x5  0.071 -0.028  0.003 -0.0460  1.000  0.0522
#> x6 -0.076  0.065  0.017  0.0094  0.052  1.0000
#> attr(,"class")
#> [1] "corr_matrix"         "partial_corr_matrix" "corr_result"        
#> [4] "matrix"             
#> attr(,"method")
#> [1] "partial_correlation_sample"
#> attr(,"description")
#> [1] "Partial correlation matrix"
#> attr(,"package")
#> [1] "matrixCorr"
#> attr(,"output")
#> [1] "matrix"
#> attr(,"threshold")
#> [1] 0
#> attr(,"diag")
#> [1] TRUE
#> attr(,"diagnostics")
#> attr(,"diagnostics")$n_complete
#>     x1  x2  x3  x4  x5  x6
#> x1 300 300 300 300 300 300
#> x2 300 300 300 300 300 300
#> x3 300 300 300 300 300 300
#> x4 300 300 300 300 300 300
#> x5 300 300 300 300 300 300
#> x6 300 300 300 300 300 300
#> 
#> attr(,"diagnostics")$n_conditioning
#>    x1 x2 x3 x4 x5 x6
#> x1  0  4  4  4  4  4
#> x2  4  0  4  4  4  4
#> x3  4  4  0  4  4  4
#> x4  4  4  4  0  4  4
#> x5  4  4  4  4  0  4
#> x6  4  4  4  4  4  0
#> 
#> attr(,"corr_result")
#> [1] TRUE
#> attr(,"corr_output_class")
#> [1] "corr_matrix"
#> attr(,"corr_estimator_class")
#> [1] "partial_corr_matrix"
#> attr(,"corr_dim")
#> [1] 6 6
#> attr(,"corr_dimnames")
#> attr(,"corr_dimnames")[[1]]
#> [1] "x1" "x2" "x3" "x4" "x5" "x6"
#> 
#> attr(,"corr_dimnames")[[2]]
#> [1] "x1" "x2" "x3" "x4" "x5" "x6"
#> 
#> attr(,"corr_symmetric")
#> [1] TRUE
#> attr(,"partial_method")
#> [1] "sample"
#> attr(,"lambda")
#> [1] NA
#> attr(,"rho")
#> [1] NA
#> attr(,"jitter")
#> [1] 0
summary(fit_pcor_oas)
#> Partial correlation summary
#>   output      : matrix
#>   dimensions  : 6 x 6
#>   retained_pairs: 15
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.0676 to 0.6074
#> 
#>  item1 item2 estimate n   p_value
#>  x1    x2     0.6074  300 0.0000 
#>  x2    x3     0.5882  300 0.0000 
#>  x3    x4     0.3966  300 0.0000 
#>  x2    x4     0.0728  300 0.2089 
#>  x1    x4     0.0704  300 0.2243 
#>  ...   ...   ...      ... ...    
#>  x3    x6     0.0225  300 0.6981 
#>  x2    x5    -0.0224  300 0.6995 
#>  x4    x6     0.0100  300 0.8628 
#>  x1    x3    -0.0078  300 0.8937 
#>  x3    x5    -0.0010  300 0.9869 
#> ... 5 more rows not shown (omitted)
#> Use as.data.frame()/tidy()/as.matrix() to inspect the full result.

Here x1 and x3 are strongly associated in the raw correlation matrix because both depend on x2. Partial correlation is useful because it can separate that indirect association from the remaining direct structure.

The choice of method is important:

When the classical sample estimator is appropriate, partial correlation also supports p-values and Fisher-z confidence intervals.

Like the other correlation wrappers, pcorr() returns an estimate matrix by default. Use estimate(), tidy(), ci(), and confint() for public access. If you need the covariance or precision matrix, set return_details = TRUE.

fit_pcor_inf <- pcorr(
  X,
  method = "sample",
  return_p_value = TRUE,
  ci = TRUE
)

summary(fit_pcor_inf)
#> Partial correlation summary
#>   output      : matrix
#>   dimensions  : 6 x 6
#>   retained_pairs: 15
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.0759 to 0.6445
#>   inference   : partial_correlation_t_test
#>   ci          : 95%
#>   ci_method   : fisher_z_partial
#>   ci_width    : 0.134 to 0.228
#>   cross_zero  : 12 pair(s)
#> 
#>  item1 item2 estimate n   95% CI            p_value
#>  x1    x2     0.6445  300 [0.5726, 0.7066]  0.0000 
#>  x2    x3     0.6271  300 [0.5526, 0.6917]  0.0000 
#>  x3    x4     0.4145  300 [0.3155, 0.5047]  0.0000 
#>  x1    x6    -0.0759  300 [-0.1883, 0.0385] 0.1929 
#>  x1    x5     0.0714  300 [-0.0430, 0.1839] 0.2207 
#>  ...   ...   ...      ... ...               ...    
#>  x2    x4     0.0390  300 [-0.0754, 0.1523] 0.5043 
#>  x2    x5    -0.0280  300 [-0.1416, 0.0863] 0.6314 
#>  x3    x6     0.0169  300 [-0.0973, 0.1307] 0.7716 
#>  x4    x6     0.0094  300 [-0.1048, 0.1232] 0.8726 
#>  x3    x5     0.0030  300 [-0.1110, 0.1170] 0.9585 
#> ... 5 more rows not shown (omitted)
#> Use as.data.frame()/tidy()/as.matrix() to inspect the full result.
tidy(fit_pcor_inf)
#>    item1 item2     estimate         lwr        upr  df      p_value
#> 1     x1    x2  0.644531930  0.57260194 0.70661472 294 3.819610e-36
#> 2     x1    x3 -0.070464451 -0.18299890 0.04389267 294 2.267821e-01
#> 3     x2    x3  0.627119740  0.55262485 0.69167343 294 9.272162e-34
#> 4     x1    x4  0.070492784 -0.04386426 0.18302642 294 2.265949e-01
#> 5     x2    x4  0.038963199 -0.07537614 0.15229125 294 5.042833e-01
#> 6     x3    x4  0.414549817  0.31545382 0.50470185 294 1.011392e-13
#> 7     x1    x5  0.071395062 -0.04295912 0.18390274 294 2.206908e-01
#> 8     x2    x5 -0.028001159 -0.14155381 0.08627879 294 6.313654e-01
#> 9     x3    x5  0.003039294 -0.11100369 0.11700328 294 9.584738e-01
#> 10    x4    x5 -0.045972993 -0.15914343 0.06838997 294 4.306854e-01
#> 11    x1    x6 -0.075884967 -0.18826080 0.03845222 294 1.929409e-01
#> 12    x2    x6  0.064634269 -0.04973675 0.17733210 294 2.676614e-01
#> 13    x3    x6  0.016940339 -0.09725201 0.13069246 294 7.716324e-01
#> 14    x4    x6  0.009357220 -0.10475906 0.12323029 294 8.726369e-01
#> 15    x5    x6  0.052198632 -0.06217589 0.16521995 294 3.708568e-01
ci(fit_pcor_inf)
#> $est
#>             x1          x2           x3          x4           x5          x6
#> x1  1.00000000  0.64453193 -0.070464451  0.07049278  0.071395062 -0.07588497
#> x2  0.64453193  1.00000000  0.627119740  0.03896320 -0.028001159  0.06463427
#> x3 -0.07046445  0.62711974  1.000000000  0.41454982  0.003039294  0.01694034
#> x4  0.07049278  0.03896320  0.414549817  1.00000000 -0.045972993  0.00935722
#> x5  0.07139506 -0.02800116  0.003039294 -0.04597299  1.000000000  0.05219863
#> x6 -0.07588497  0.06463427  0.016940339  0.00935722  0.052198632  1.00000000
#> 
#> $lwr.ci
#>             x1          x2          x3          x4          x5          x6
#> x1  1.00000000  0.57260194 -0.18299890 -0.04386426 -0.04295912 -0.18826080
#> x2  0.57260194  1.00000000  0.55262485 -0.07537614 -0.14155381 -0.04973675
#> x3 -0.18299890  0.55262485  1.00000000  0.31545382 -0.11100369 -0.09725201
#> x4 -0.04386426 -0.07537614  0.31545382  1.00000000 -0.15914343 -0.10475906
#> x5 -0.04295912 -0.14155381 -0.11100369 -0.15914343  1.00000000 -0.06217589
#> x6 -0.18826080 -0.04973675 -0.09725201 -0.10475906 -0.06217589  1.00000000
#> 
#> $upr.ci
#>            x1         x2         x3         x4         x5         x6
#> x1 1.00000000 0.70661472 0.04389267 0.18302642 0.18390274 0.03845222
#> x2 0.70661472 1.00000000 0.69167343 0.15229125 0.08627879 0.17733210
#> x3 0.04389267 0.69167343 1.00000000 0.50470185 0.11700328 0.13069246
#> x4 0.18302642 0.15229125 0.50470185 1.00000000 0.06838997 0.12323029
#> x5 0.18390274 0.08627879 0.11700328 0.06838997 1.00000000 0.16521995
#> x6 0.03845222 0.17733210 0.13069246 0.12323029 0.16521995 1.00000000
#> 
#> $conf.level
#> [1] 0.95
#> 
#> $ci.method
#> [1] "fisher_z_partial"
confint(fit_pcor_inf)
#>    item1 item2         lwr        upr
#> 1     x1    x2  0.57260194 0.70661472
#> 2     x1    x3 -0.18299890 0.04389267
#> 3     x2    x3  0.55262485 0.69167343
#> 4     x1    x4 -0.04386426 0.18302642
#> 5     x2    x4 -0.07537614 0.15229125
#> 6     x3    x4  0.31545382 0.50470185
#> 7     x1    x5 -0.04295912 0.18390274
#> 8     x2    x5 -0.14155381 0.08627879
#> 9     x3    x5 -0.11100369 0.11700328
#> 10    x4    x5 -0.15914343 0.06838997
#> 11    x1    x6 -0.18826080 0.03845222
#> 12    x2    x6 -0.04973675 0.17733210
#> 13    x3    x6 -0.09725201 0.13069246
#> 14    x4    x6 -0.10475906 0.12323029
#> 15    x5    x6 -0.06217589 0.16521995

That inference layer is available only for the ordinary sample estimator in the low-dimensional setting. The regularised estimators are primarily estimation tools rather than direct inferential procedures in the current interface.

High-dimensional shrinkage correlation

When the number of variables is large relative to the sample size, direct sample correlation can become unstable. shrinkage_corr() provides a shrinkage correlation route designed for exactly this setting.

set.seed(22)
n <- 40
p_block <- 30

make_block <- function(f) {
  sapply(seq_len(p_block), function(j) 0.8 * f + rnorm(n, sd = 0.8))
}

f1 <- rnorm(n)
f2 <- rnorm(n)
f3 <- rnorm(n)

Xd <- cbind(make_block(f1), make_block(f2), make_block(f3))
p <- ncol(Xd)
colnames(Xd) <- paste0("G", seq_len(p))

R_raw <- stats::cor(Xd)
fit_shr <- shrinkage_corr(Xd)
block <- rep(1:3, each = p_block)
within <- outer(block, block, "==") & upper.tri(R_raw)
between <- outer(block, block, "!=") & upper.tri(R_raw)

round(c(
  raw_within = mean(abs(R_raw[within])),
  raw_between = mean(abs(R_raw[between])),
  shrink_within = mean(abs(fit_shr[within])),
  shrink_between = mean(abs(fit_shr[between]))
), 3)
#>     raw_within    raw_between  shrink_within shrink_between 
#>          0.509          0.119          0.404          0.095

print(fit_shr, digits = 2, max_rows = 6, max_vars = 6)
#> Schafer-Strimmer shrinkage correlation matrix
#>   method      : schafer_shrinkage
#>   dimensions  : 90 x 90
#> 
#>        G1   G2   G3 ...   G88   G89  G90
#> G1   1.00 0.47 0.42 ... -0.16 -0.12 0.01
#> G2   0.47 1.00 0.42 ...  0.12  0.04 0.14
#> G3   0.42 0.42 1.00 ...  0.02  0.02 0.18
#> ...   ...  ...  ... ...   ...   ...  ...
#> G88 -0.16 0.12 0.02 ...  1.00  0.39 0.52
#> G89 -0.12 0.04 0.02 ...  0.39  1.00 0.52
#> G90  0.01 0.14 0.18 ...  0.52  0.52 1.00
#> ... 84 more rows not shown (omitted)
#> ... 84 more variables not shown (omitted)
#> Use summary() for a richer digest.
#> Use as.data.frame()/tidy()/as.matrix() to inspect the full result.
summary(fit_shr)
#> Correlation summary
#>   output      : matrix
#>   dimensions  : 90 x 90
#>   retained_pairs: 4,005
#>   threshold   : 0.0000
#>   diag        : included
#>   estimate    : -0.4097 to 0.6154
#> 
#>  item1 item2 estimate
#>  G65   G78    0.6154 
#>  G53   G60    0.6048 
#>  G14   G18    0.5970 
#>  G31   G60    0.5930 
#>  G53   G57    0.5915 
#>  ...   ...   ...     
#>  G37   G65    0.0003 
#>  G22   G55   -0.0001 
#>  G9    G87   -0.0001 
#>  G12   G82    0.0000 
#>  G51   G75    0.0000 
#> ... 3,995 more rows not shown (omitted)
#> Use as.data.frame()/tidy()/as.matrix() to inspect the full result.

The shrinkage estimator is not intended to reproduce the raw sample matrix. Its purpose is to provide a better-conditioned and more stable estimate in settings where the sample matrix is noisy or singular. In this block-factor simulation, the raw sample matrix retains the main within-block pattern but also carries substantial between-block noise; shrinkage pulls that background noise down without erasing the dominant structure.

Choosing among these methods

The choice is usually governed by the main source of difficulty in the data.

These are not interchangeable estimators. They solve different problems, even though they share the same matrix-oriented calling style.