% \iffalse % vim: set expandtab: % vim: set shiftwidth=2: % vim: set tabstop=2: % \fi % \iffalse meta-comment % % Copyright (C) 2026 by Lukas Heindl % --------------------------------------------------------------------------- % This work may be distributed and/or modified under the % conditions of the LaTeX Project Public License, either version 1.3c % of this license or (at your option) any later version. % The latest version of this license is in % http://www.latex-project.org/lppl.txt % and version 1.3c or later is part of all distributions of LaTeX % version 2008/05/04 or later. % % This work has the LPPL maintenance status `maintained'. % % The Current Maintainer of this work is Lukas Heindl. % % This work consists of all files listed in manifest.txt. % % \fi % % \iffalse %<*driver> \ProvidesFile{hexdumptikz-parser-hd.dtx} % %\NeedsTeXFormat{LaTeX2e}[2022-06-01] % %<*driver> \begin{document} \DocInput{\jobname.dtx} \PrintChanges \PrintIndex \end{document} % % \changes{v0.0.0}{2026-05-14}{First draft} % % % \fi % % \iffalse %<*package> %<@@=hexdumptikz_parser_hd> % \fi % % \maketitle % % \begin{abstract} % Parse files with classical hexdump format such as the one produced by \texttt{od}, \texttt{hd} or \emph{Wireshark}. % \end{abstract} % % Identify the package and give the over all version information. % \begin{macrocode} \ProvidesExplPackage {hexdumptikz-parser-hd} {2026-06-20} {1.0.1} {Printing and annotating hexdumps with TikZ} % \end{macrocode} % % Load dependencies % \begin{macrocode} \RequirePackage { hexdumptikz-common } % \end{macrocode} % % \subsection{Public} % \begin{fn}{\hexdumptikz_parser_hd:nN} % Public entrance function for parsing a hexdump % \begin{sideeffects} % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_ior} \\ % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_line\_tl} \\ % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_line\_int} \\ % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_offset\_tl} \\ % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_bytes\_seq} \\ % \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_finished\_bool} \\ % \end{sideeffects} % \begin{args} % 1 & \ain & filename/-path to parse \\ % 2 & \ain & callback function \\ % \end{args} % \begin{macrocode} \cs_new_protected:Npn \hexdumptikz_parser_hd:nN #1 #2 { % \end{macrocode} % Initialization % \begin{macrocode} \bool_set_true:N \l_hexdumptikz_parser_finished_bool \int_zero:N \l_hexdumptikz_parser_line_int % \end{macrocode} % % Open the file for reading % \begin{macrocode} \ior_open:Nn \l_hexdumptikz_parser_ior { #1 } % \end{macrocode} % % Iterate over the lines of the input file % \begin{macrocode} \ior_map_variable:NNn \l_hexdumptikz_parser_ior \l_hexdumptikz_parser_line_tl { \tl_set:Ne \l_hexdumptikz_parser_line_tl { \tl_trim_spaces:e { \l_hexdumptikz_parser_line_tl } } % \end{macrocode} % % Silently ignore empty lines % \begin{macrocode} \tl_if_blank:nF { \l_hexdumptikz_parser_line_tl } { % \end{macrocode} % % Do the actual parsing % \begin{macrocode} \@@_normalize_line:NNN \l_hexdumptikz_parser_line_tl \l_hexdumptikz_parser_offset_tl \l_hexdumptikz_parser_bytes_seq % \end{macrocode} % % Pass the parsed data to the callback % \begin{macrocode} #2 \l_hexdumptikz_parser_line_int \l_hexdumptikz_parser_offset_tl \l_hexdumptikz_parser_bytes_seq \l_hexdumptikz_parser_finished_bool % \end{macrocode} % % Enable the callback to stop the parsing. % \begin{macrocode} \bool_if:NT \l_hexdumptikz_parser_finished_bool { \ior_map_break: } } % \end{macrocode} % % Count all lines including the empty ones. % \begin{macrocode} \int_incr:N \l_hexdumptikz_parser_line_int } % \end{macrocode} % % Close the input file again % \begin{macrocode} \ior_close:N \l_hexdumptikz_parser_ior } % \end{macrocode} % \end{fn} % % \subsection{Helpers} % \begin{fn}{\@@_normalize_line:NNN} % Normalize a single parsed line (tl) from the slightly different input formats supported by this parser to a unified sequence. % \begin{sideeffects} % \sclobber & \sdir & \texttt{l\_tmpa\_seq} \\ % \end{sideeffects} % \begin{args} % 1 & \ain & parsed line (tl) \\ % 2 & \aout & offset (tl) \\ % 3 & \aout & bytes (seq) \\ % \end{args} % \begin{macrocode} \cs_new_protected:Npn \@@_normalize_line:NNN #1 #2 #3 { % \end{macrocode} % Extract the offset / address from the start of the line % \begin{macrocode} \regex_extract_once:NVNF \c_hexdumptikz_parser_addr_regex #1 \l_tmpa_seq { \msg_critical:nnV { hexdumptikz-parser } { no-valid-offset } #1 } % \end{macrocode} % The regex matched without the \texttt{0x} (in order to make the prefix optional) $\to$ add it here for normalization purposes % \begin{macrocode} \tl_set:Ne #2 { 0x \seq_item:Nn \l_tmpa_seq { 2 } } % \end{macrocode} % % Remove the leading offset / address so the hex-digits forming the \enquote{values} are at the start of the string % \begin{macrocode} \regex_replace_once:NnN \c_hexdumptikz_parser_addr_regex { } #1 % \end{macrocode} % % Convert the series of hex-digits to a sequence of bytes % \begin{macrocode} \@@_hexcompact_to_seq:NNN #1 #3 #2 } % \end{macrocode} % \end{fn} % % \begin{fn}{\@@_hexcompact_to_seq:NNN} % Convert/Sanitize a series of hex digits to a sequence of bytes % \begin{sideeffects} % \sclobber & \sdir & \texttt{l\_tmpa\_str} \\ % \end{sideeffects} % \begin{args} % 1 & \ain & series of hex-digits (tl) \\ % 2 & \aout & sequence (tl) \\ % 3 & \ain & offset (only used to generate nicer error messages which indicate the location of the error in the file) \\ % - & \ain & \texttt{l\_hexdumptikz\_parser\_strict\_byte\_num\_bool} \\ % - & \ain & \texttt{l\_hexdumptikz\_common\_bytes\_per\_row\_int} \\ % - & \ain & \texttt{l\_hexdumptikz\_parser\_leading\_base\_bool} \\ % - & \ain/\aout & \texttt{l\_hexdumptikz\_parser\_last\_line\_seen\_bool} \\ % - & \ain & \texttt{l\_hexdumptikz\_parser\_strict\_hex\_bool} \\ % \end{args} % \begin{macrocode} \cs_new_protected:Npn \@@_hexcompact_to_seq:NNN #1 #2 #3 { % \end{macrocode} % First some optional checks and transformations: % % Check the number of parsed bytes % \begin{macrocode} \bool_if:NT \l_hexdumptikz_parser_strict_byte_num_bool { \bool_if:NT \l_hexdumptikz_parser_last_line_seen_bool { \msg_critical:nneV { hexdumptikz-parser } { weird-byte-count } { \int_use:N \l_hexdumptikz_common_bytes_per_row_int } #3 } } % \end{macrocode} % remove a leading \texttt{0x} indicating the hexadecimal base once % \begin{macrocode} \bool_if:NT \l_hexdumptikz_parser_leading_base_bool { \regex_replace_once:NnNF \c_hexdumptikz_common_leading_hex_base_regex { } #1 { \msg_critical:nnV { hexdumptikz-parser } { leading-base-missing } #1 } } % \end{macrocode} % First clean up and so some initialization % \begin{macrocode} \seq_clear:N #2 \str_set:Ne \l_tmpa_str { #1 } % \end{macrocode} % % Loop over the string and remove the parsed bytes step by step in the process. % \begin{macrocode} \bool_while_do:nn { ! \str_if_empty_p:N \l_tmpa_str } { % \end{macrocode} % % Check the number of bytes parsed from the current line. % % Either throw a critical error on violation or just silently stop the parsing of this line. % Silently stopping the parsing is needed for rudimentary support of the \emph{canonical} format which also shows the ASCII representation at the end of each line. % The support is not full though, as this fails if the last row/line is not fully populated (since this parser is very liberal in terms of where and how many spaces can be present). % \begin{macrocode} \int_compare:nNnT { \l_hexdumptikz_common_bytes_per_row_int } > { 0 } { \int_compare:nNnF { \seq_count:N #2 } < { \l_hexdumptikz_common_bytes_per_row_int } { \bool_if:NT \l_hexdumptikz_parser_strict_byte_num_bool { \msg_critical:nnVeV { hexdumptikz-parser } { too-many-bytes } #3 { \int_use:N \l_hexdumptikz_common_bytes_per_row_int } \l_tmpa_str } % \end{macrocode} % basically stop gracefully parsing the current line % \begin{macrocode} \str_set:Nn \l_tmpa_str { } } } \str_if_empty:NF \l_tmpa_str { % \end{macrocode} % % optionally check for valid hex characters. In principle there is no issue if non-hex characters are present as the content is not interpreted. % \begin{macrocode} \bool_if:NT \l_hexdumptikz_parser_strict_hex_bool { \regex_if_match:NVF \c_hexdumptikz_parser_leading_hex_byte_regex \l_tmpa_str { \msg_critical:nnV { hexdumptikz-parser } { invalid-hex-digits } \l_tmpa_str } } % \end{macrocode} % % actual core parsing logic % \begin{macrocode} \seq_put_right:Ne #2 { \str_range_ignore_spaces:Vnn \l_tmpa_str { 1 } { 2 } } \str_set:Ne \l_tmpa_str { \str_range_ignore_spaces:Vnn \l_tmpa_str { 3 } { -1 } } } } % \end{macrocode} % keep track if we expect this is the last line in the input (the last line normally is the only line which is not fully populated) % \begin{macrocode} \int_compare:nNnT { \seq_count:N #2 } < { \l_hexdumptikz_common_bytes_per_row_int } { \bool_set_true:N \l_hexdumptikz_parser_last_line_seen_bool } } % \end{macrocode} % \end{fn} % % \begin{fn}{\hexdumptikz_parser_dbg:NNNN} % Dummy callback which just prints its arguments and can be used for debugging. % \begin{sideeffects} % \end{sideeffects} % \begin{args} % 1 & \ain & line-index \\ % 2 & \ain & parsed offset/address \\ % 3 & \ain & parsed bytes (seq) \\ % 4 & \aout & variable to signal whether printing / the selection has finished \\ % \end{args} % \begin{macrocode} \cs_new_protected:Npn \hexdumptikz_parser_dbg:NNNN #1 #2 #3 #4 { \iow_term:x { line idx:~\int_use:N #1 } \iow_term:x { offset:~~~\tl_use:N #2 } \iow_term:x { bytes:~~~~\seq_use:Nn #3 {~|~} } \iow_term:x { } } % \end{macrocode} % \end{fn} % % \begin{macro}{\hexdumptikzParserDbg} % Short macro which can aid in debugging. % \begin{macrocode} \NewDocumentCommand { \hexdumptikzParserDbg } { m } { \hexdumptikz_parser_hd:nN { #1 } \hexdumptikz_parser_dbg:NNNN } % \end{macrocode} % \end{macro} % \iffalse % % \fi % % \Finale