% \iffalse meta-comment
%
%% File: l3tl-analysis.dtx Copyright (C) 2011-2012 The LaTeX3 Project
%%
%% It may be distributed and/or modified under the conditions of the
%% LaTeX Project Public License (LPPL), either version 1.3c of this
%% license or (at your option) any later version.  The latest version
%% of this license is in the file
%%
%%    http://www.latex-project.org/lppl.txt
%%
%% This file is part of the "l3experimental bundle" (The Work in LPPL)
%% and all files in that bundle must be distributed together.
%%
%% The released version of this bundle is available from CTAN.
%%
%% -----------------------------------------------------------------------
%%
%% The development version of the bundle can be found at
%%
%%    http://www.latex-project.org/svnroot/experimental/trunk/
%%
%% for those people who are interested.
%%
%%%%%%%%%%%
%% NOTE: %%
%%%%%%%%%%%
%%
%%   Snapshots taken from the repository represent work in progress and may
%%   not work or may contain conflicting material!  We therefore ask
%%   people _not_ to put them into distributions, archives, etc. without
%%   prior consultation with the LaTeX3 Project.
%%
%% -----------------------------------------------------------------------
%
%<*driver|package>
\RequirePackage{expl3}
\GetIdInfo$Id: l3tl-analysis.dtx 4745 2014-05-06 10:41:27Z joseph $
  {L3 Experimental token lists analysis}
%</driver|package>
%<*driver>
\documentclass[full]{l3doc}
\usepackage{amsmath}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
%
% \title{^^A
%   The \textsf{l3tl-analysis} package: analysing token lists^^A
%   \thanks{This file describes v\ExplFileVersion,
%     last revised \ExplFileDate.}^^A
% }
%
% \author{^^A
%  The \LaTeX3 Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released \ExplFileDate}
%
% \maketitle
%
% \begin{documentation}
%
% \section{\pkg{l3tl-analysis} documentation}
%
% This module mostly provides internal functions for use in the
% \pkg{l3regex} module. However, it provides as a side-effect a user
% debugging function, very similar to the \cs{ShowTokens} macro from the
% \pkg{ted} package.
%
% \begin{function}{\tl_show_analysis:N, \tl_show_analysis:n}
%   \begin{syntax}
%     \cs{tl_show_analysis:n} \Arg{token list}
%   \end{syntax}
%   Displays to the terminal the detailed decomposition of the
%   \meta{token list} into tokens, showing the category code of each
%   character token, the meaning of control sequences and active
%   characters, and the value of registers.
% \end{function}
%
% \subsection{Internal functions}
%
% \begin{variable}{\s__tl}
%   The format used to store token lists internally uses the scan mark
%   \cs{s__tl} as a delimiter.
% \end{variable}
%
% \begin{function}{\__tl_analysis_map_inline:nn}
%   \begin{syntax}
%     \cs{__tl_analysis_map_inline:nn} \Arg{token list} \Arg{inline function}
%   \end{syntax}
%   Applies the \meta{inline function} to each individual \meta{token}
%   in the \meta{token list}. The \meta{inline function} receives three
%   arguments:
%   \begin{itemize}
%     \item \meta{tokens}, which both \texttt{o}-expand and
%       \texttt{x}-expand to the \meta{token}. The detailed form of
%       \meta{token} may change in later releases.
%     \item \meta{catcode}, a capital hexadecimal digit which denotes
%       the category code of the \meta{token} (0: control sequence, 1:
%       begin-group, 2: end-group, 3: math shift, 4: alignment tab, 6:
%       parameter, 7: superscript, 8: subscript, A: space, B: letter,
%       C:other, D:active).
%     \item \meta{char code}, a decimal representation of the character
%       code of the token, $-1$ if it is a control sequence (with
%       \meta{catcode} $0$).
%   \end{itemize}
% \end{function}
%
% For optimizations in \pkg{l3regex} (when matching control sequences),
% it may be useful to provide a \cs{__tl_analysis_from_str_map_inline:nn}
% function, perhaps named \cs{__str_analysis_map_inline:nn}.
%
% \subsection{Internal format}
%
% The task of the \pkg{l3tl-analysis} module is to convert token lists
% to an internal format which allows us to extract all the relevant
% information about individual tokens (category code, character code),
% as well as reconstruct the token list quickly. This internal format is
% used in \pkg{l3regex} where we need to support arbitrary tokens, and
% it is used in conversion functions in \pkg{l3str-convert}, where we wish to
% support clusters of characters instead of single tokens.
%
% We thus need a way to encode any \meta{token} (even begin-group and
% end-group character tokens) in a way amenable to manipulating tokens
% individually. The best we can do is to find \meta{tokens} which both
% \texttt{o}-expand and \texttt{x}-expand to the given
% \meta{token}. Collecting more information about the category code and
% character code is also useful for regular expressions, since most
% regexes are catcode-agnostic. The internal format thus takes the form
% of a succession of items of the form
% \begin{quote}
%   \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
% \end{quote}
% The \meta{tokens} \texttt{o}- \emph{and} \texttt{x}-expand to the
% original token in the token list or to the cluster of tokens
% corresponding to one Unicode character in the given encoding (for
% \pkg{l3str-convert}). The \meta{catcode} is given as a single hexadecimal
% digit, $0$ for control sequences. The \meta{char code} is given as a
% decimal number, $-1$ for control sequences.
%
% Using delimited arguments lets us build the \meta{tokens}
% progressively when doing an encoding conversion in \pkg{l3str-convert}. On the
% other hand, the delimiter \cs{s__tl} may not appear unbraced in
% \meta{tokens}. This is not a problem because we are careful to wrap
% control sequences in braces (as an argument to \cs{exp_not:n}) when
% converting from a general token list to the internal format.
%
% The current rule for converting a \meta{token} to a balanced set of
% \meta{tokens} which both \texttt{o}-expands and \texttt{x}-expands to
% it is the following.
% \begin{itemize}
%   \item A control sequence |\cs| becomes |\exp_not:n { \cs }|
%     \cs{s__tl} $0$ $-1$ \cs{s__tl}.
%   \item A begin-group character |{| becomes \cs{exp_after:wN} |{|
%     \cs{if_false:} |}| \cs{fi:} \cs{s__tl} $1$ \meta{char code}
%     \cs{s__tl}.
%   \item An end-group character |}| becomes \cs{if_false:} |{| \cs{fi:}
%     |}| \cs{s__tl} $2$ \meta{char code} \cs{s__tl}.
%   \item A character with any other category code becomes
%     \cs{exp_not:n} \Arg{character} \cs{s__tl} \meta{hex catcode}
%     \meta{char code} \cs{s__tl}.
% \end{itemize}
%
% ^^A todo: ask LuaTeX list for an \ifx\undefined <active char>
% ^^A which does not add the <active char> in memory.
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3tl-analysis} implementation}
%
%    \begin{macrocode}
%<*initex|package>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=tl_analysis>
%    \end{macrocode}
%
%    \begin{macrocode}
\ProvidesExplPackage
  {\ExplFileName}{\ExplFileDate}{\ExplFileVersion}{\ExplFileDescription}
\RequirePackage{l3str}
%    \end{macrocode}
%
% \subsection{Variables and helper functions}
%
% \begin{variable}{\s__tl}
%   The scan mark \cs{s__tl} is used as a delimiter in the internal
%   format. This is more practical than using a quark, because we would
%   then need to control expansion much more carefully: compare
%   \cs{__int_value:w} |`#1| \cs{s__tl} with \cs{__int_value:w} |`#1|
%   \cs{exp_stop_f:} \cs{exp_not:N} \cs{q_mark} to extract a character
%   code followed by the delimiter in an \texttt{x}-expansion.
%    \begin{macrocode}
\__scan_new:N \s__tl
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_internal_tl}
%   This token list variable is used to hand the argument of
%   \cs{tl_show_analysis:n} to \cs{tl_show_analysis:N}.
%    \begin{macrocode}
\tl_new:N \l_@@_internal_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_token}
% \begin{variable}{\l_@@_char_token}
%   The tokens in the token list are probed with the \TeX{} primitive
%   \tn{futurelet}. We use \cs{l_@@_token} in that
%   construction. In some cases, we convert the following token to a
%   string before probing it: then the token variable used is
%   \cs{l_@@_char_token}.
%    \begin{macrocode}
\cs_new_eq:NN \l_@@_token ?
\cs_new_eq:NN \l_@@_char_token ?
%    \end{macrocode}
% \end{variable}
% \end{variable}
%
% \begin{variable}{\l_@@_normal_int}
%   The number of normal (\texttt{N}-type argument) tokens since the
%   last special token.
%    \begin{macrocode}
\int_new:N \l_@@_normal_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_index_int}
%   During the first pass, this is the index in the array being built.
%   During the second pass, it is equal to the maximum index in the
%   array from the first pass.
%    \begin{macrocode}
\int_new:N \l_@@_index_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_nesting_int}
%   Nesting depth of explicit begin-group and end-group characters
%   during the first pass. This lets us detect the end of the token list
%   without a reserved end-marker.
%    \begin{macrocode}
\int_new:N \l_@@_nesting_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_type_int}
%   When encountering special characters, we record their \enquote{type}
%   in this integer.
%    \begin{macrocode}
\int_new:N \l_@@_type_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_result_tl}
%   The result of the conversion is stored in this token list, with a
%   succession of items of the form
%   \begin{quote}
%     \meta{tokens} \cs{s__tl} \meta{catcode} \meta{char code} \cs{s__tl}
%   \end{quote}
%    \begin{macrocode}
\tl_new:N \g_@@_result_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[int, EXP]{\@@_extract_charcode:}
% \begin{macro}[aux, EXP]{\@@_extract_charcode_aux:w}
%   Extracting the character code from the meaning of
%   \cs{l_@@_token}. This has no error checking, and should
%   only be assumed to work for begin-group and end-group character
%   tokens. It produces a number in the form |`|\meta{char}.
%    \begin{macrocode}
\cs_new_nopar:Npn \@@_extract_charcode:
  {
    \exp_after:wN \@@_extract_charcode_aux:w
      \token_to_meaning:N \l_@@_token
  }
\cs_new:Npn \@@_extract_charcode_aux:w #1 ~ #2 ~ { ` }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int, EXP]{\@@_cs_space_count:NN}
% \begin{macro}[aux, EXP]{\@@_cs_space_count:w}
% \begin{macro}[aux, EXP]{\@@_cs_space_count_end:w}
%   Counts the number of spaces in the string representation of its
%   second argument, as well as the number of characters following the
%   last space in that representation, and feeds the two numbers as
%   semicolon-delimited arguments to the first argument. When this
%   function is used, the escape character is printable and non-space.
%    \begin{macrocode}
\cs_new:Npn \@@_cs_space_count:NN #1 #2
  {
    \exp_after:wN #1
    \__int_value:w \__int_eval:w \c_zero
      \exp_after:wN \@@_cs_space_count:w
        \token_to_str:N #2
        \fi: \@@_cs_space_count_end:w ; ~ !
  }
\cs_new:Npn \@@_cs_space_count:w #1 ~
  {
    \if_false: #1 #1 \fi:
    + \c_one
    \@@_cs_space_count:w
  }
\cs_new:Npn \@@_cs_space_count_end:w ; #1 \fi: #2 !
  { \exp_after:wN ; \__int_value:w \str_count_ignore_spaces:n {#1} ; }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Plan of attack}
%
% Our goal is to produce a token list of the form roughly
% \begin{quote}
%   \meta{token 1} \cs{s__tl} \meta{catcode 1} \meta{char code 1} \cs{s__tl} \\
%   \meta{token 2} \cs{s__tl} \meta{catcode 2} \meta{char code 2} \cs{s__tl} \\
%   \ldots{}
%   \meta{token N} \cs{s__tl} \meta{catcode N} \meta{char code N} \cs{s__tl}
% \end{quote}
% Most but not all tokens can be grabbed as an undelimited
% (\texttt{N}-type) argument by \TeX{}. The plan is to have a two pass
% system. In the first pass, locate special tokens, and store them in
% various \tn{toks} registers. In the second pass, which is done within
% an \texttt{x}-expanding assignment, normal tokens are taken in as
% \texttt{N}-type arguments, and special tokens are retrieved from the
% \tn{toks} registers, and removed from the input stream by some means.
% The whole process takes linear time, because we avoid building the
% result one item at a time.
%
% To ease the difficult first pass, we first do some setup with
% \cs{@@_setup:n}. Active characters set equal to non-active
% characters cause trouble, so we disable all active characters by
% setting them equal to \texttt{undefined} locally. We also set there
% the escape character to be printable (backslash, but this later
% oscillates between slash and backslash): this makes it possible to
% distinguish characters from control sequences.
%
% A token has two characteristics: its \tn{meaning}, and what it looks
% like for \TeX{} when it is in scanning mode (\emph{e.g.}, when
% capturing parameters for a macro). For our purposes, we distinguish
% the following meanings:
% \begin{itemize}
%   \item begin-group token (category code $1$), either space (character
%     code $32$), or non-space;
%   \item end-group token (category code $2$), either space (character
%     code $32$), or non-space;
%   \item space token (category code $10$, character code $32$);
%   \item anything else (then the token is always an \texttt{N}-type
%     argument).
% \end{itemize}
% The token itself can \enquote{look like} one of the following
% \begin{itemize}
%   \item a non-active character, in which case its meaning is
%     automatically that associated to its character code and category
%     code, we call it \enquote{true} character;
%   \item an active character (we eliminate those in the setup step);
%   \item a control sequence.
% \end{itemize}
% The only tokens which are not valid \texttt{N}-type arguments are true
% begin-group characters, true end-group characters, and true spaces.
% We will detect those characters by scanning ahead with \tn{futurelet},
% then distinguishing true characters from control sequences set equal
% to them using the \tn{string} representation.
%
% The second pass is a simple exercise in expandable loops.
%
% \begin{macro}[int]{\@@:n}
%   Everything is done within a group, and all definitions will be
%   local. We use \cs{group_align_safe_begin/end:} to avoid problems in
%   case \cs{@@:n} is used within an alignment and its argument
%   contains alignment tab tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@:n #1
  {
    \group_begin:
      \group_align_safe_begin:
        \@@_setup:n {#1}
        \@@_a:n {#1}
        \@@_b:n {#1}
      \group_align_safe_end:
    \group_end:
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Setup}
%
% \begin{macro}[int]{\@@_setup:n}
% \begin{macro}[aux]{\@@_disable_loop:N}
%   Active characters can cause problems later on in the processing,
%   so the first step is to disable them, by setting them to
%   \texttt{undefined}. Since Unicode contains too many characters
%   to loop over all of them, we instead loop over the input token
%   list as a string: any active character in the token list
%   must appear in its string representation. The string is shortened
%   a little by making the escape character unprintable. The active
%   space must be disabled separately (the loop skips over it otherwise),
%   and we end the loop by feeding an odd non-\texttt{N}-type
%   argument to the looping macro.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_setup:n #1
  {
    \int_set_eq:NN \tex_escapechar:D \c_minus_one
    \exp_after:wN \@@_disable_loop:N
      \tl_to_str:n {#1} { ~ } { ? ~ \__prg_break: }
    \__prg_break_point:
  }
\group_begin:
  \char_set_catcode_active:N \^^@
  \cs_new_protected:Npn \@@_disable_loop:N #1
    {
      \tex_lccode:D \c_zero `#1 ~
      \tl_to_lowercase:n { \tex_let:D ^^@ } \tex_undefined:D
      \@@_disable_loop:N
    }
\group_end:
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{First pass}
%
% The goal of this pass is to detect special (non-\texttt{N}-type) tokens,
% and count how many \texttt{N}-type tokens lie between special tokens.
% Also, we wish to store some representation of each special token
% in a \tn{toks} register.
%
% After the setup step, we have $11$ types of tokens:
% \begin{itemize}
% \item[1.] a true non-space begin-group character;
% \item[2.] a true space begin-group character;
% \item[3.] a true non-space end-group character;
% \item[4.] a true space end-group character;
% \item[5.] a true space blank space character;
% \item[6.] an undefined active character;
% \item[7.] any other true character;
% \item[8.] a control sequence equal to a begin-group token (category code $1$);
% \item[9.] a control sequence equal to an end-group token (category code $2$);
% \item[10.] a control sequence equal to a space token
%   (character code $32$, category code $10$);
% \item[11.] any other control sequence.
% \end{itemize}
% Our first tool is \tn{futurelet}. This cannot distinguish
% case $8$ from $1$ or $2$, nor case $9$ from $3$ or $4$,
% nor case $10$ from case $5$. Those cases will be distinguished
% by applying the \tn{string} primitive to the following token,
% after possibly changing the escape character to ensure that
% a control sequence's string representation cannot be mistaken
% for the true character.
%
% In cases $6$, $7$, and $11$, the following token is a valid
% \texttt{N}-type argument, so we grab it and distinguish the case
% of a character from a control sequence: in the latter case,
% \cs{str_tail:n} \Arg{token} is non-empty, because the
% escape character is printable.
%
% \begin{macro}[int]{\@@_a:n}
%   We read tokens one by one using \tn{futurelet}.
%   While performing the loop, we keep track of the number of
%   true begin-group characters minus the number of
%   true end-group characters in \cs{l_@@_nesting_int}.
%   This reaches $-1$ when we read the closing brace.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_a:n #1
  {
    \int_set:Nn \tex_escapechar:D { 92 }
    \int_zero:N \l_@@_normal_int
    \int_zero:N \l_@@_index_int
    \int_zero:N \l_@@_nesting_int
    \if_false: { \fi: \@@_a_loop:w #1 }
    \int_decr:N \l_@@_index_int
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_loop:w}
%   Read one character and check its type.
%    \begin{macrocode}
\cs_new_protected_nopar:Npn \@@_a_loop:w
  { \tex_futurelet:D \l_@@_token \@@_a_type:w }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_type:w}
%   At this point, \cs{l_@@_token} holds the meaning
%   of the following token. We store in \cs{l_@@_type_int}
%   the meaning of the token ahead:
%   \begin{itemize}
%   \item 0 space token;
%   \item 1 begin-group token;
%   \item -1 end-group token;
%   \item 2 other.
%   \end{itemize}
%   The values $0$, $1$, $-1$ correspond to how much a true such
%   character changes the nesting level ($2$ is used only here,
%   and is irrelevant later). Then call the auxiliary for each case.
%   Note that nesting conditionals here is safe because we only skip
%   over \cs{l_@@_token} if it matches with one of the
%   character tokens (hence is not a primitive conditional).
%    \begin{macrocode}
\cs_new_protected_nopar:Npn \@@_a_type:w
  {
    \l_@@_type_int =
      \if_meaning:w \l_@@_token \c_space_token
        \c_zero
      \else:
        \if_catcode:w \exp_not:N \l_@@_token \c_group_begin_token
          \c_one
        \else:
          \if_catcode:w \exp_not:N \l_@@_token \c_group_end_token
            \c_minus_one
          \else:
            \c_two
          \fi:
        \fi:
      \fi:
    \if_case:w \l_@@_type_int
         \exp_after:wN \@@_a_space:w
    \or: \exp_after:wN \@@_a_bgroup:w
    \or: \exp_after:wN \@@_a_safe:N
    \else: \exp_after:wN \@@_a_egroup:w
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_space:w}
% \begin{macro}[aux]{\@@_a_space_test:w}
%   In this branch, the following token's meaning is a blank space.
%   Apply \tn{string} to that token: if it is a control sequence
%   the result starts with the escape character; otherwise it is
%   a true blank space, whose string representation is also a blank space.
%   We test for that in \cs{@@_a_space_test:w},
%   after grabbing as \cs{l_@@_char_token} the first character
%   of the string representation.
%   Also, since \cs{@@_a_store:} expects the special token to be
%   stored in the relevant \tn{toks} register, we do that. The extra
%   \cs{exp_not:n} is unnecessary of course, but it makes the treatment
%   of all tokens more homogeneous.
%   If we discover that the next token was actually a control sequence
%   instead of a true space, then we step the counter of normal tokens.
%   We now have in front of us the whole string representation of
%   the control sequence, including potential spaces; those will appear
%   to be true spaces later in this pass. Hence, all other branches of
%   the code in this first pass need to consider the string representation,
%   so that the second pass does not need to test the meaning of tokens,
%   only strings.
%    \begin{macrocode}
\cs_new_protected_nopar:Npn \@@_a_space:w
  {
    \tex_afterassignment:D \@@_a_space_test:w
    \exp_after:wN \cs_set_eq:NN
    \exp_after:wN \l_@@_char_token
    \token_to_str:N
  }
\cs_new_protected_nopar:Npn \@@_a_space_test:w
  {
    \if_meaning:w \l_@@_char_token \c_space_token
      \tex_toks:D \l_@@_index_int { \exp_not:n { ~ } }
      \@@_a_store:
    \else:
      \int_incr:N \l_@@_normal_int
    \fi:
    \@@_a_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_bgroup:w, \@@_a_egroup:w}
% \begin{macro}[aux]{\@@_a_group:nw}
% \begin{macro}[aux]{\@@_a_group_test:w}
%   The token might be either a true character token with
%   catcode $1$ or $2$, or it could be a control sequence.
%   The only tricky case is if the character code happens
%   to be equal to the escape character: then we change
%   the escape character from backslash to solidus or back,
%   so that the string representation of the true character
%   and of a control sequence set equal to it start differently.
%   Then probe what the first character of that string
%   representation is: this is the place where we need
%   \cs{l_@@_char_token} to be a separate control
%   sequence from \cs{l_@@_token}, to compare them.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_group_begin:N \^^@
  \char_set_catcode_group_end:N \^^E
  \cs_new_protected_nopar:Npn \@@_a_bgroup:w
    { \@@_a_group:nw { \exp_after:wN ^^@ \if_false: ^^E \fi: } }
  \char_set_catcode_group_begin:N \^^B
  \char_set_catcode_group_end:N \^^@
  \cs_new_protected_nopar:Npn \@@_a_egroup:w
    { \@@_a_group:nw { \if_false: ^^B \fi: ^^@ } }
\group_end:
\cs_new_protected:Npn \@@_a_group:nw #1
  {
    \tex_lccode:D \c_zero = \@@_extract_charcode: \scan_stop:
    \tl_to_lowercase:n { \tex_toks:D \l_@@_index_int {#1} }
    \if_int_compare:w \tex_lccode:D \c_zero = \tex_escapechar:D
      \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
    \fi:
    \tex_afterassignment:D \@@_a_group_test:w
    \exp_after:wN \cs_set_eq:NN
    \exp_after:wN \l_@@_char_token
    \token_to_str:N
  }
\cs_new_protected_nopar:Npn \@@_a_group_test:w
  {
    \if_charcode:w \l_@@_token \l_@@_char_token
      \@@_a_store:
    \else:
      \int_incr:N \l_@@_normal_int
    \fi:
    \@@_a_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_store:}
%   This function is called each time we meet a special token;
%   at this point, the \tn{toks} register \cs{l_@@_index_int}
%   holds a token list which expands to the given special token.
%   Also, the value of \cs{l_@@_type_int} indicates which case
%   we are in:
%   \begin{itemize}
%   \item -1 end-group character;
%   \item 0 space character;
%   \item 1 begin-group character.
%   \end{itemize}
%   We need to distinguish further the case of a space character
%   (code $32$) from other character codes, because those will
%   behave differently in the second pass. Namely, after testing
%   the \tn{lccode} of $0$ (which holds the present character code)
%   we change the cases above to
%   \begin{itemize}
%   \item -2 space end-group character;
%   \item -1 non-space end-group character;
%   \item 0 space blank space character;
%   \item 1 non-space begin-group character;
%   \item 2 space begin-group character.
%   \end{itemize}
%   This has the property that non-space characters correspond to odd
%   values of \cs{l_@@_type_int}.
%   The number of normal tokens, and the type of special token,
%   are packed into a \tn{skip} register.
%   Finally, we check whether we reached the last closing brace, in which
%   case we stop by disabling the looping function (locally).
%    \begin{macrocode}
\cs_new_protected_nopar:Npn \@@_a_store:
  {
    \tex_advance:D \l_@@_nesting_int \l_@@_type_int
    \if_int_compare:w \tex_lccode:D \c_zero = \c_thirty_two
      \tex_multiply:D \l_@@_type_int \c_two
    \fi:
    \tex_skip:D \l_@@_index_int
      = \l_@@_normal_int sp plus \l_@@_type_int sp \scan_stop:
    \int_incr:N \l_@@_index_int
    \int_zero:N \l_@@_normal_int
    \if_int_compare:w \l_@@_nesting_int = \c_minus_one
      \cs_set_eq:NN \@@_a_loop:w \scan_stop:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int]{\@@_a_safe:N}
% \begin{macro}[aux]{\@@_a_cs:ww}
%   This should be the simplest case: since the upcoming token is safe,
%   we can simply grab it in a second pass. However, other branches of
%   the code must pass their tokens through \tn{string}, hence we do it
%   here as well, with some optimizations. If the token is a single
%   character (including space), the \cs{if_charcode:w} test yields
%   true, and we simply count one \enquote{normal} token. On the other
%   hand, if the token is a control sequence, we should replace it by
%   its string representation for compatibility with other code
%   branches. Instead of slowly looping through the characters with
%   the main code, we use the knowledge of how the second pass works:
%   if the control sequence name contains no space, count that token
%   as a number of normal tokens equal to its string length. If the
%   control sequence contains spaces, they should be registered as
%   special characters by increasing \cs{l_@@_index_int}
%   (no need to carefully count character between each space), and
%   all characters after the last space should be counted in the
%   following sequence of \enquote{normal} tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_a_safe:N #1
  {
    \if_charcode:w
        \scan_stop:
        \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
        \scan_stop:
      \int_incr:N \l_@@_normal_int
    \else:
      \@@_cs_space_count:NN \@@_a_cs:ww #1
    \fi:
    \@@_a_loop:w
  }
\cs_new_protected:Npn \@@_a_cs:ww #1; #2;
  {
    \if_int_compare:w #1 > \c_zero
      \tex_skip:D \l_@@_index_int
        = \__int_eval:w \l_@@_normal_int + \c_one sp \scan_stop:
      \tex_advance:D \l_@@_index_int #1 \exp_stop_f:
      \l_@@_normal_int #2 \exp_stop_f:
    \else:
      \tex_advance:D \l_@@_normal_int #2 \exp_stop_f:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Second pass}
%
% The second pass is an exercise in expandable loops.
% All the necessary information is stored in \tn{skip}
% and \tn{toks} registers.
%
% \begin{macro}[int]{\@@_b:n}
% \begin{macro}[int, EXP]{\@@_b_loop:w}
%   Start the loop with the index $0$. No need for an end-marker:
%   the loop will stop by itself when the last index is read.
%   We will repeatedly oscillate between reading long stretches
%   of normal tokens, and reading special tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_b:n #1
  {
    \tl_gset:Nx \g_@@_result_tl
      {
        \@@_b_loop:w 0; #1
        \__prg_break_point:
      }
  }
\cs_new:Npn \@@_b_loop:w #1;
  {
    \exp_after:wN \@@_b_normals:ww
      \__int_value:w \tex_skip:D #1 ; #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int, EXP]{\@@_b_normals:ww}
% \begin{macro}[aux, EXP]{\@@_b_normal:wwN}
%   The first argument is the number of normal tokens which remain
%   to be read, and the second argument is the index in the array
%   produced in the first step.
%   A character's string representation is always one character long,
%   while a control sequence is always longer (we have set the escape
%   character to a printable value). In both cases, we leave
%   \cs{exp_not:n} \Arg{token} \cs{s__tl} in the input stream
%   (after \texttt{x}-expansion). Here, \cs{exp_not:n} is used
%   rather than \cs{exp_not:N} because |#3| could be \cs{s__tl},
%   hence must be hidden behind braces in the result.
%    \begin{macrocode}
\cs_new:Npn \@@_b_normals:ww #1;
  {
    \if_int_compare:w #1 = \c_zero
      \@@_b_special:w
    \fi:
    \@@_b_normal:wwN #1;
  }
\cs_new:Npn \@@_b_normal:wwN #1; #2; #3
  {
    \exp_not:n { \exp_not:n { #3 } } \s__tl
    \if_charcode:w
        \scan_stop:
        \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
        \scan_stop:
      \exp_after:wN \@@_b_char:Nww
    \else:
      \exp_after:wN \@@_b_cs:Nww
    \fi:
    #3 #1; #2;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int, EXP]{\@@_b_char:Nww}
%   If the normal token we grab is a character, leave
%   \meta{catcode} \meta{charcode} followed by \cs{s__tl}
%   in the input stream, and call \cs{@@_b_normals:ww}
%   with its first argument decremented.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_other:N A
  \char_set_catcode_other:N B
  \char_set_catcode_other:N C
  \char_set_uccode:nn { `? } { `D }
  \tl_to_uppercase:n
    {
      \cs_new:Npn \@@_b_char:Nww #1
        {
          \if_meaning:w #1 \tex_undefined:D          ? \else:
          \if_catcode:w #1 \c_catcode_other_token    C \else:
          \if_catcode:w #1 \c_catcode_letter_token   B \else:
          \if_catcode:w #1 \c_math_toggle_token      3 \else:
          \if_catcode:w #1 \c_alignment_token        4 \else:
          \if_catcode:w #1 \c_math_superscript_token 7 \else:
          \if_catcode:w #1 \c_math_subscript_token   8 \else:
          \if_catcode:w #1 \c_space_token            A \else:
            6
          \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi:
          \__int_value:w `#1 \s__tl
          \exp_after:wN \@@_b_normals:ww
            \int_use:N \__int_eval:w \c_minus_one +
        }
    }
\group_end:
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[int, EXP]{\@@_b_cs:Nww}
% \begin{macro}[aux, EXP]{\@@_b_cs_test:ww}
%   If the token we grab is a control sequence, leave
%   |0 -1| (as category code and character code) in the input stream,
%   followed by \cs{s__tl},
%   and call \cs{@@_b_normals:ww} with updated arguments.
%    \begin{macrocode}
\cs_new:Npn \@@_b_cs:Nww #1
  {
    0 -1 \s__tl
    \@@_cs_space_count:NN \@@_b_cs_test:ww #1
  }
\cs_new:Npn \@@_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
  {
    \exp_after:wN \@@_b_normals:ww
    \int_use:N \__int_eval:w
    \if_int_compare:w #1 = \c_zero
      #3
    \else:
      \tex_skip:D \__int_eval:w #4 + #1 \__int_eval_end:
    \fi:
    - #2
    \exp_after:wN ;
    \int_use:N \__int_eval:w #4 + #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[int, EXP]{\@@_b_special:w}
% \begin{macro}[aux, EXP]{\@@_b_special_char:wN}
% \begin{macro}[aux, EXP]{\@@_b_special_space:w}
%   Here, |#1| is the current index in the array built in the first pass.
%   Check now whether we reached the end (we shouldn't keep the trailing
%   end-group character that marked the end of the token list in the
%   first pass).
%   Unpack the \tn{toks} register: when \texttt{x}-expanding again,
%   we will get the special token.
%   Then leave the category code in the input stream, followed by
%   the character code, and call \cs{@@_b_loop:w} with the next index.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_other:N A
  \cs_new:Npn \@@_b_special:w
      \fi: \@@_b_normal:wwN 0 ; #1 ;
    {
      \fi:
      \if_int_compare:w #1 = \l_@@_index_int
        \exp_after:wN \__prg_break:
      \fi:
      \tex_the:D \tex_toks:D #1 \s__tl
      \if_case:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
             A
      \or:   1
      \or:   1
      \else: 2
      \fi:
      \if_int_odd:w \etex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
        \exp_after:wN \@@_b_special_char:wN \int_use:N
      \else:
        \exp_after:wN \@@_b_special_space:w \int_use:N
      \fi:
      \__int_eval:w \c_one + #1 \exp_after:wN ;
      \token_to_str:N
    }
\group_end:
\cs_new:Npn \@@_b_special_char:wN #1 ; #2
  {
    \__int_value:w `#2 \s__tl
    \@@_b_loop:w #1 ;
  }
\cs_new:Npn \@@_b_special_space:w #1 ; ~
  {
    32 \s__tl
    \@@_b_loop:w #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Mapping through the analysis}
%
% \begin{macro}[int]{\@@_map_inline:nn}
% \begin{macro}[aux]{\@@_map_inline_aux:Nn}
%   First obtain the analysis of the token list into
%   \cs{g_@@_result_tl}. To allow nested mappings, increase the
%   nesting depth \cs{g__prg_map_int} (shared between all modules), then
%   define the looping macro, which has a name specific to that nesting
%   depth. That looping grabs the \meta{tokens}, \meta{catcode} and
%   \meta{char code}; it checks for the end of the loop with
%   \cs{use_none:n} |##2|, normally empty, but which becomes
%   \cs{tl_map_break:} at the end; it then performs the user's code
%   |#2|, and loops by calling itself. When the loop ends, remember to
%   decrease the nesting depth.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_map_inline:nn #1
  {
    \@@:n {#1}
    \int_gincr:N \g__prg_map_int
    \exp_args:Nc \@@_map_inline_aux:Nn
      { @@_map_inline_ \int_use:N \g__prg_map_int :wNw }
  }
\cs_new_protected:Npn \@@_map_inline_aux:Nn #1#2
  {
    \cs_gset_protected:Npn #1 ##1 \s__tl ##2 ##3 \s__tl
      {
        \use_none:n ##2
        #2
        #1
      }
    \exp_after:wN #1
      \g_@@_result_tl
      \s__tl { ? \tl_map_break: } \s__tl
    \__prg_break_point:Nn \tl_map_break: { \int_gdecr:N \g__prg_map_int }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Showing the results}
%
% \begin{macro}{\tl_show_analysis:N, \tl_show_analysis:n}
% \begin{macro}[int]{\@@_show:N}
%   Add to \cs{@@:n} a third pass to display tokens to the terminal.
%    \begin{macrocode}
\cs_new_protected:Npn \tl_show_analysis:N #1
  {
    \exp_args:No \@@:n {#1}
    \@@_show:N #1
  }
\cs_new_protected:Npn \tl_show_analysis:n #1
  {
    \@@:n {#1}
    \tl_set:Nn \l_@@_internal_tl {#1}
    \@@_show:N \l_@@_internal_tl
  }
\cs_new_protected:Npn \@@_show:N #1
  {
    \group_begin:
    \use:x
      {
        \group_end:
        \exp_not:n { \__msg_show_variable:Nnn #1 }
          { tl-analysis }
          {
            \exp_after:wN \@@_show_loop:wNw \g_@@_result_tl
              \s__tl { ? \__prg_break: } \s__tl
            \__prg_break_point:
          }
      }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[aux, rEXP]{\@@_show_loop:wNw}
%   Here, |#1| \texttt{o}- and \texttt{x}-expands to the token;
%   |#2| is the category code (one uppercase hexadecimal digit),
%   $0$ for control sequences;
%   |#3| is the character code, which we ignore.
%   In the cases of control sequences and active characters,
%   the meaning may overflow one line, and we want to truncate
%   it. Those cases are thus separated out.
%    \begin{macrocode}
\cs_new:Npn \@@_show_loop:wNw #1 \s__tl #2 #3 \s__tl
  {
    \use_none:n #2
    \exp_not:n { \\ > \ \  }
    \if_int_compare:w "#2 = \c_zero
      \exp_after:wN \@@_show_cs:n
    \else:
      \if_int_compare:w "#2 = \c_thirteen
        \exp_after:wN \exp_after:wN
        \exp_after:wN \@@_show_active:n
      \else:
        \exp_after:wN \exp_after:wN
        \exp_after:wN \@@_show_normal:n
      \fi:
    \fi:
    {#1}
    \@@_show_loop:wNw
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[aux, rEXP]{\@@_show_normal:n}
%   Non-active characters are a simple matter of printing
%   the character, and its meaning. Our test suite checks that
%   begin-group and end-group characters do not mess up
%   \TeX{}'s alignment status.
%    \begin{macrocode}
\cs_new:Npn \@@_show_normal:n #1
  {
    \exp_after:wN \token_to_str:N #1 ~
    ( \exp_after:wN \token_to_meaning:N #1 )
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_show_value:N}
%   This expands to the value of |#1| if it has any.
%    \begin{macrocode}
\cs_new:Npn \@@_show_value:N #1
  {
    \token_if_expandable:NF #1
      {
        \token_if_chardef:NTF       #1 \__prg_break: { }
        \token_if_mathchardef:NTF   #1 \__prg_break: { }
        \token_if_dim_register:NTF  #1 \__prg_break: { }
        \token_if_int_register:NTF  #1 \__prg_break: { }
        \token_if_skip_register:NTF #1 \__prg_break: { }
        \token_if_toks_register:NTF #1 \__prg_break: { }
        \use_none:nnn
        \__prg_break_point:
        \use:n { = \tex_the:D #1 }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[aux, rEXP]{\@@_show_cs:n}
% \begin{macro}[aux, rEXP]{\@@_show_active:n}
% \begin{macro}[aux, rEXP]{\@@_show_long:nn}
% \begin{macro}[aux, rEXP]{\@@_show_long_aux:nnnn}
%   Control sequences and active characters are printed in the same way,
%   making sure not to go beyond the \cs{l_iow_line_count_int}. In case
%   of an overflow, we replace the last characters by
%   \cs{c_@@_show_etc_str}.
%    \begin{macrocode}
\cs_new:Npn \@@_show_cs:n #1
  { \exp_args:No \@@_show_long:nn {#1} { control~sequence= } }
\cs_new:Npn \@@_show_active:n #1
  { \exp_args:No \@@_show_long:nn {#1} { active~character= } }
\cs_new:Npn \@@_show_long:nn #1
  {
    \@@_show_long_aux:oofn
      { \token_to_str:N #1 }
      { \token_to_meaning:N #1 }
      { \@@_show_value:N #1 }
  }
\cs_new:Npn \@@_show_long_aux:nnnn #1#2#3#4
  {
    \int_compare:nNnTF
      { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
      > { \l_iow_line_count_int - \c_three }
      {
        \str_range:nnn { #1 ~ ( #4 #2 #3 ) } \c_one
          {
            \l_iow_line_count_int - \c_three
            - \str_count:N \c_@@_show_etc_str
          }
        \c_@@_show_etc_str
      }
      { #1 ~ ( #4 #2 #3 ) }
  }
\cs_generate_variant:Nn \@@_show_long_aux:nnnn { oof }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Messages}
%
% \begin{variable}{\c_@@_show_etc_str}
%   When a control sequence (or active character)
%   and its meaning are too long to fit in one line
%   of the terminal, the end is replaced by this token list.
%    \begin{macrocode}
\tl_const:Nx \c_@@_show_etc_str % (
  { \token_to_str:N \ETC.) }
%    \end{macrocode}
% \end{variable}
%
%    \begin{macrocode}
\__msg_kernel_new:nnn { kernel } { show-tl-analysis }
  {
    The~token~list~
    \str_if_eq:nnF {#1} { \l_@@_internal_tl }
      { \token_to_str:N #1 ~ }
    \tl_if_empty:NTF #1
      { is~empty }
      { contains~the~tokens: }
  }
%    \end{macrocode}
%
%    \begin{macrocode}
%</initex|package>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex