## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center" ) library(pairwiseLLM) library(dplyr) library(readr) library(tidyr) library(stringr) library(knitr) ## ----------------------------------------------------------------------------- td <- trait_description("overall_quality") td ## ----------------------------------------------------------------------------- data("example_writing_samples", package = "pairwiseLLM") # Inspect the structure glimpse(example_writing_samples) # Print the 20 samples (full text) example_writing_samples |> kable( caption = "20 example writing samples included with pairwiseLLM." ) ## ----------------------------------------------------------------------------- template_ids <- paste0("test", 1:5) template_ids ## ----------------------------------------------------------------------------- cat(substr(get_prompt_template("test1"), 1, 500), "...\n") ## ----eval = FALSE------------------------------------------------------------- # # Retrieve another template # tmpl_test3 <- get_prompt_template("test3") # # # Use it to build a concrete prompt for a single comparison # pairs <- example_writing_samples |> # make_pairs() |> # head(1) # # prompt_text <- build_prompt( # template = tmpl_test3, # trait_name = td$name, # trait_desc = td$description, # text1 = pairs$text1[1], # text2 = pairs$text2[1] # ) # # cat(prompt_text) ## ----------------------------------------------------------------------------- pairs_all <- example_writing_samples |> make_pairs() pairs_forward <- pairs_all |> alternate_pair_order() pairs_reverse <- sample_reverse_pairs( pairs_forward, reverse_pct = 1.0, seed = 2002 ) pairs_forward[1:3, c("ID1", "ID2")] pairs_reverse[1:3, c("ID1", "ID2")] ## ----------------------------------------------------------------------------- summary_path <- system.file("extdata", "template_test_summary_all.csv", package = "pairwiseLLM") if (!nzchar(summary_path)) stop("Data file not found in installed package.") summary_tbl <- readr::read_csv(summary_path, show_col_types = FALSE) head(summary_tbl) ## ----------------------------------------------------------------------------- cat(get_prompt_template("test1")) ## ----------------------------------------------------------------------------- summary_tbl |> filter(template_id == "test1") |> arrange(backend, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Backend = backend, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- cat(get_prompt_template("test2")) ## ----------------------------------------------------------------------------- summary_tbl |> filter(template_id == "test2") |> arrange(backend, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Backend = backend, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- cat(get_prompt_template("test3")) ## ----------------------------------------------------------------------------- summary_tbl |> filter(template_id == "test3") |> arrange(backend, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Backend = backend, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- cat(get_prompt_template("test4")) ## ----------------------------------------------------------------------------- summary_tbl |> filter(template_id == "test4") |> arrange(backend, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Backend = backend, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- cat(get_prompt_template("test5")) ## ----------------------------------------------------------------------------- summary_tbl |> filter(template_id == "test5") |> arrange(backend, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Backend = backend, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- summary_tbl |> filter(backend == "anthropic") |> arrange(template_id, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Template = template_id, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( caption = "Anthropic: Positional-bias summary by template, model, and thinking configuration.", align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- summary_tbl |> filter(backend == "gemini") |> arrange(template_id, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Template = template_id, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( caption = "Gemini: Positional-bias summary by template, model, and thinking configuration.", align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- summary_tbl |> filter(backend == "openai") |> arrange(template_id, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Template = template_id, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( caption = "OpenAI: Positional-bias summary by template, model, and thinking configuration.", align = c("l", "l", "l", "r", "r", "r") ) ## ----------------------------------------------------------------------------- summary_tbl |> filter(backend == "together") |> arrange(template_id, model, thinking) |> mutate( Prop_Consistent = round(prop_consistent, 3), Prop_SAMPLE_1 = round(prop_pos1, 3), Binomial_Test_p = formatC(p_sample1_overall, format = "f", digits = 3) ) |> select( Template = template_id, Model = model, Thinking = thinking, Prop_Consistent, Prop_SAMPLE_1, Binomial_Test_p ) |> kable( caption = "TogetherAI: Positional-bias summary by template, model, and thinking configuration.", align = c("l", "l", "l", "r", "r", "r") )