\name{convert_to_dictionary}
\alias{convert_to_dictionary}
\title{Convert Translation Data to a Sumerian Dictionary}
\description{
Converts a data frame of Sumerian translations into a structured dictionary
format, adding cuneiform representations and phonetic readings for each sign.
}
\usage{
convert_to_dictionary(df, mapping = NULL)
}
\arguments{
\item{df}{A data frame with columns \code{sign_name}, \code{type}, and
\code{meaning}, typically produced by \code{\link{read_translated_text}}.}

\item{mapping}{A data frame containing sign-to-reading mappings with columns
\code{name}, \code{cuneiform} and \code{syllables}. If \code{NULL} (default), the package's
built-in mapping file \code{etcsl_mapping.txt} is used.}
}
\value{
A data frame with the following columns:
\describe{
\item{sign_name}{The normalized Sumerian text (e.g., \code{"A"}, \code{"AN"}, \code{"A2.TAB"})}
\item{row_type}{Type of entry: \code{"cunei."} (cuneiform character), \code{"reading"} (phonetic readings), or \code{"trans."} (translation)}
\item{count}{Number of occurrences for translations; \code{NA} for cuneiform and reading entries}
\item{type}{Grammatical type (e.g., \code{"S"}, \code{"V"}, \code{"A"}) for translations; empty string for other row types}
\item{meaning}{The cuneiform character(s), phonetic reading(s), or translated meaning depending on \code{row_type}}
}

The data frame is sorted by \code{sign_name}, \code{row_type}, and
descending \code{count}.
}
\details{
\subsection{Processing Steps}{
\enumerate{
\item Aggregates translations and counts occurrences of each unique combination in \code{df}
\item Looks up phonetic readings and cuneiform signs for each sign component
\item Combines cuneiform, reading, and translation rows into a single data frame
\item Sorts the result by sign name and row type
}
}

\subsection{Reading Format}{
Phonetic readings are formatted as follows:
\itemize{
\item Multiple possible readings are enclosed in braces: \code{\{a, dur5, duru5\}}
\item For compound signs, readings of individual components are joined with hyphens
\item If a sign has more than three possible readings in a compound, only the first three are shown followed by \code{...}
\item Unknown readings are marked with \code{?}
}
}
}
\seealso{
\code{\link{read_translated_text}} for reading translation files,
\code{\link{make_dictionary}} for creating a complete dictionary with
cuneiform representations and readings in a single step.
}
\examples{
# Read translations from a single text document
filename     <- system.file("extdata", "text_with_translations.txt", package = "sumer")
translations <- read_translated_text(filename)

# View the structure
head(translations)

#Make some custom unifications (here: removing the word "the")
translations$meaning <- gsub("\\\\bthe\\\\b", "", translations$meaning, ignore.case = TRUE)
translations$meaning <- trimws(gsub("\\\\s+", " ", translations$meaning))

# View the structure
head(translations)

#Convert the result into a dictionary
dictionary   <- convert_to_dictionary(translations)

# View the structure
head(dictionary)

# View entries for a specific sign
dictionary[dictionary$sign_name == "EN", ]

# With custom mapping
path  <- system.file("extdata", "etcsl_mapping.txt", package = "sumer")
mapping <- read.csv2(path, sep=";", na.strings="")
translations <- read_translated_text(filename, mapping = mapping)
dictionary <- convert_to_dictionary(translations, mapping = mapping)
head(dictionary)
}
