diagFDR: DIA-NN diagnostics from report.parquet

This vignette demonstrates how to run diagFDR on DIA-NN exports and interpret the key diagnostics in terms of scope, calibration, and stability.

The typical workflow is:

  1. Export DIA-NN results with decoys and a permissive q-value ceiling.
  2. Read report.parquet.
  3. Construct one or more universes (global precursor list, run×precursor, etc.).
  4. Run diagnostics and inspect tables/plots.
  5. (Optional) write tables/plots and a human-readable report to disk.

Runnable toy example (no DIA-NN files required)

We start with a small simulated dataset that exercises the diagFDR functions. Any workflow producing outputs that can be mapped to the columns id, is_decoy, q, pep, run, and score can be handled similarly.

library(diagFDR)

set.seed(1)

n <- 3000
toy_global <- data.frame(
  id = paste0("P", seq_len(n)),
  is_decoy = sample(c(FALSE, TRUE), n, replace = TRUE, prob = c(0.97, 0.03)),
  q = pmin(1, runif(n)^3),       # skew toward small q-values
  pep = NA_real_,
  run = NA_character_,
  score = NA_real_
)

x_global <- as_dfdr_tbl(
  toy_global,
  unit = "precursor",
  scope = "global",
  q_source = "toy",
  q_max_export = 0.5
)

diag <- dfdr_run_all(
  xs = list(global = x_global),
  alpha_main = 0.01,
  alphas = c(1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1, 2e-1),
  low_conf = c(0.2, 0.5)
)

Headline stability at 1%

diag$tables$headline
#> # A tibble: 1 × 24
#>   alpha T_alpha D_alpha FDR_hat CV_hat FDR_minus1 FDR_plus1 FDR_minusK FDR_plusK
#>   <dbl>   <int>   <int>   <dbl>  <dbl>      <dbl>     <dbl>      <dbl>     <dbl>
#> 1  0.01     602      13  0.0216  0.277     0.0199    0.0233    0.00498    0.0382
#> # ℹ 15 more variables: k2sqrtD <int>, FDR_minus2sqrtD <dbl>,
#> #   FDR_plus2sqrtD <dbl>, list <chr>, D_alpha_win <int>, effect_abs <dbl>,
#> #   IPE <dbl>, flag_Dalpha <chr>, flag_CV <chr>, flag_Dwin <chr>,
#> #   flag_IPE <chr>, flag_FDR <chr>, flag_equalchance <chr>, status <chr>,
#> #   interpretation <chr>

Tail support and stability versus threshold

diag$plots$dalpha

diag$plots$cv

Local boundary support

diag$plots$dwin

Threshold elasticity (list sensitivity to changing alpha)

diag$plots$elasticity

Equal-chance plausibility by q-band

diag$tables$equal_chance_pooled
#> # A tibble: 1 × 12
#>   qmax_export low_lo low_hi N_test N_D_test pi_D_hat effect_abs ci95_lo ci95_hi
#>         <dbl>  <dbl>  <dbl>  <int>    <int>    <dbl>      <dbl>   <dbl>   <dbl>
#> 1         0.5    0.2    0.5    596       26   0.0436      0.456  0.0299  0.0632
#> # ℹ 3 more variables: p_value_binom <dbl>, pass_minN <lgl>, list <chr>
diag$plots$equal_chance__global
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> ℹ The deprecated feature was likely used in the diagFDR package.
#>   Please report the issue to the authors.
#> This warning is displayed once per session.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

Real DIA-NN parquet workflow

The following code shows how to run the pipeline on a real DIA-NN report.parquet.

# Requires arrow 
rep <- read_diann_parquet("path/to/report.parquet")

# (A) Global precursor list using Global.Q.Value
# Recommended for experiment-wide (pooled) lists.
x_global_gq <- diann_global_precursor(
  rep,
  q_col = "Global.Q.Value",
  q_max_export = 0.5,
  unit = "precursor",
  scope = "global",
  q_source = "Global.Q.Value"
)

# (B) Run×precursor universe using run-wise Q.Value
# Recommended for per-run decisions / QC.
x_runx <- diann_runxprecursor(
  rep,
  q_col = "Q.Value",
  q_max_export = 0.5,
  id_mode = "runxid",
  unit = "runxprecursor",
  scope = "runwise",
  q_source = "Q.Value"
)

# (C) Scope misuse comparator: min run-wise q over runs per precursor (anti-pattern)
# Useful for demonstrating/diagnosing scope mismatch.
x_minrun <- diann_global_minrunq(
  rep,
  q_col = "Q.Value",
  q_max_export = 0.5,
  unit = "precursor",
  scope = "aggregated",
  q_source = "min_run(Q.Value)"
)

diag <- dfdr_run_all(
  xs = list(global = x_global_gq, runx = x_runx, minrun = x_minrun),
  alpha_main = 0.01,
  compute_pseudo_pvalues = TRUE  # <-- This adds p-value diagnostics
)

# Compare accepted lists across scopes (Jaccard overlap across alpha)
scope_tbl <- dfdr_scope_disagreement(
  x1 = x_global_gq,
  x2 = x_minrun,
  alphas = c(1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2),
  label1 = "Global.Q.Value",
  label2 = "min_run(Q.Value)"
)

# Write outputs to disk (tables + plots; optionally PPTX)
dfdr_write_report(diag, out_dir = "diagFDR_diann_out", formats = c("csv", "png", "manifest", "readme", "summary"))

# Render a single HTML report (requires rmarkdown in Suggests)
dfdr_render_report(diag, out_dir = "diagFDR_diann_out")

Interpretation notes