% \iffalse % %% File l3sort.dtx (C) Copyright 2012,2014 The LaTeX3 Project %% %% It may be distributed and/or modified under the conditions of the %% LaTeX Project Public License (LPPL), either version 1.3c of this %% license or (at your option) any later version. The latest version %% of this license is in the file %% %% http://www.latex-project.org/lppl.txt %% %% This file is part of the "l3experimental bundle" (The Work in LPPL) %% and all files in that bundle must be distributed together. %% %% The released version of this bundle is available from CTAN. %% %% ----------------------------------------------------------------------- %% %% The development version of the bundle can be found at %% %% http://www.latex-project.org/svnroot/experimental/trunk/ %% %% for those people who are interested. %% %%%%%%%%%%% %% NOTE: %% %%%%%%%%%%% %% %% Snapshots taken from the repository represent work in progress and may %% not work or may contain conflicting material! We therefore ask %% people _not_ to put them into distributions, archives, etc. without %% prior consultation with the LaTeX Project Team. %% %% ----------------------------------------------------------------------- %% % %<*driver|package> % The version of expl3 required is tested as early as possible, as % some really old versions do not define \ProvidesExplPackage. \RequirePackage{expl3}[2014/11/25] %\@ifpackagelater{expl3}{2014/11/25} % {} % {% % \PackageError{l3sort}{Support package l3kernel too old} % {% % Please install an up to date version of l3kernel\MessageBreak % using your TeX package manager or from CTAN.\MessageBreak % \MessageBreak % Loading l3sort will abort!% % }% % \endinput % } \GetIdInfo$Id: l3sort.dtx 5471 2014-11-25 20:11:29Z joseph $ {L3 Experimental sorting functions} % %<*driver> \documentclass[full]{l3doc} \usepackage{amsmath} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3sort} package\\ Sorting lists^^A % \thanks{This file describes v\ExplFileVersion, % last revised \ExplFileDate.}^^A % } % % \author{^^A % The \LaTeX3 Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % % \date{Released \ExplFileDate} % % \maketitle % % \begin{documentation} % % \section{\pkg{l3sort} documentation} % % \LaTeX3 comes with a facility to sort list variables (sequences, % token lists, or comma-lists) according to some user-defined % comparison. For instance, % \begin{verbatim} % \clist_set:Nn \l_foo_clist { 3 , 01 , -2 , 5 , +1 } % \clist_sort:Nn \l_foo_clist % { % \int_compare:nNnTF { #1 } > { #2 } % { \sort_reversed: } % { \sort_ordered: } % } % \end{verbatim} % will result in \cs{l_foo_clist} holding the values % |{ -2 , 01 , +1 , 3 , 5 }| sorted in non-decreasing order. % % The code defining the comparison should perform % \cs{sort_reversed:} if the two items given as |#1| % and |#2| are not in the correct order, and otherwise it % should call \cs{sort_ordered:} to indicate that % the order of this pair of items should not be changed. % % For instance, a \meta{comparison code} consisting only % of \cs{sort_ordered:} with no test will yield a trivial % sort: the final order is identical to the original order. % Conversely, using a \meta{comparison code} consisting only % of \cs{sort_reversed:} will reverse the list (in a fairly % inefficient way). % % \begin{texnote} % Internally, the code from \pkg{l3sort} stores items in \tn{toks}. % Thus, the \meta{comparison code} should not alter the % contents of any \tn{toks}, nor assume that they hold a % given value. % \end{texnote} % % \begin{function}{\seq_sort:Nn, \seq_gsort:Nn} % \begin{syntax} % \cs{seq_sort:Nn} \meta{sequence} \Arg{comparison code} % \end{syntax} % Sorts the items in the \meta{sequence} according to the % \meta{comparison code}, and assigns the result to % \meta{sequence}. % \end{function} % % \begin{function}{\tl_sort:Nn, \tl_gsort:Nn} % \begin{syntax} % \cs{tl_sort:Nn} \meta{tl var} \Arg{comparison code} % \end{syntax} % Sorts the items in the \meta{tl var} according to the % \meta{comparison code}, and assigns the result to % \meta{tl var}. % \end{function} % % \begin{function}{\clist_sort:Nn, \clist_gsort:Nn} % \begin{syntax} % \cs{clist_sort:Nn} \meta{clist var} \Arg{comparison code} % \end{syntax} % Sorts the items in the \meta{clist var} according to the % \meta{comparison code}, and assigns the result to % \meta{clist var}. % \end{function} % % \begin{function}[EXP]{\tl_sort:nN} % \begin{syntax} % \cs{tl_sort:nN} \Arg{token list} \meta{conditional} % \end{syntax} % Sorts the items in the \meta{token list}, using the % \meta{conditional} to compare items, and leaves the result in the % input stream. The \meta{conditional} should have signature |:nnTF|, % and return \texttt{true} if the two items being compared should be % left in the same order, and \texttt{false} if the items should be % swapped. % \begin{texnote} % The result is returned within \cs{exp_not:n}, which means that the % token list will not expand further when appearing in an % \texttt{x}-type argument expansion. % \end{texnote} % \end{function} % % \end{documentation} % % \begin{implementation} % % \section{\pkg{l3sort} implementation} % % \begin{macrocode} %<*initex|package> % \end{macrocode} % % \begin{macrocode} %<@@=sort> % \end{macrocode} % % \begin{macrocode} %<*package> \ProvidesExplPackage {\ExplFileName}{\ExplFileDate}{\ExplFileVersion}{\ExplFileDescription} % % \end{macrocode} % % \subsection{Variables} % % \begin{variable}{\c_@@_max_length_int} % The maximum length of a sequence which will not overflow % the available registers depends on which engine is in use. % For $2^{N}$ registers, it is $3\cdot 2^{N-2}$: for that number % of items, at the last step the block size will be $2^{N-1}$, % and the two blocks to merge will be of sizes $2^{N-1}$ and % $2^{N-2}$ respectively. When merging, one of the blocks must % be copied to temporary registers; here, the smallest block, % of size $2^{N-2}$, will fill up exactly the $2^{N-2}$ free % registers, totalling $2^{N-1}+2^{N-2}+2^{N-2}=2^{N}$ registers. % \begin{macrocode} \int_const:Nn \c_@@_max_length_int { \luatex_if_engine:TF { 49152 } { 24576 } } % \end{macrocode} % \end{variable} % % \begin{variable}{\l_@@_length_int} % Length of the sequence which is being sorted. % \begin{macrocode} \int_new:N \l_@@_length_int % \end{macrocode} % \end{variable} % % \begin{variable}{\l_@@_block_int} % Merge sort is done in several passes. In each pass, blocks of size % \cs{l_@@_block_int} are merged in pairs. The block size starts % at $1$, and, for a length in the range $[2^k+1,2^{k+1}]$, reaches % $2^{k}$ in the last pass. % \begin{macrocode} \int_new:N \l_@@_block_int % \end{macrocode} % \end{variable} % % \begin{variable}{\l_@@_begin_int} % \begin{variable}{\l_@@_end_int} % When merging two blocks, \cs{l_@@_begin_int} marks the lowest % index in the two blocks, and \cs{l_@@_end_int} marks the % highest index, plus $1$. % \begin{macrocode} \int_new:N \l_@@_begin_int \int_new:N \l_@@_end_int % \end{macrocode} % \end{variable} % \end{variable} % % \begin{variable}{\l_@@_A_int} % \begin{variable}{\l_@@_B_int} % \begin{variable}{\l_@@_C_int} % When merging two blocks (whose end-points are \texttt{beg} % and \texttt{end}), $A$ starts from the high end of the low % block, and decreases until reaching \texttt{beg}. The index % $B$ starts from the top of the range and marks the register % in which a sorted item should be put. Finally, $C$ points % to the copy of the high block in the interval of registers % starting at \cs{l_@@_length_int}, upwards. $C$ starts % from the upper limit of that range. % \begin{macrocode} \int_new:N \l_@@_A_int \int_new:N \l_@@_B_int \int_new:N \l_@@_C_int % \end{macrocode} % \end{variable} % \end{variable} % \end{variable} % % \subsection{Protected user commands} % % \begin{macro}[int]{\@@_main:NNNnNn} % Sorting happens in three steps. First store items % in \tn{toks} registers ranging from $0$ to the length % of the list, while checking that the list is not too % long. If we reach the maximum length, all further % items are entirely ignored after raising an error. % Secondly, sort the array of \tn{toks} registers, % using the user-defined sorting function, |#5|. % Finally, unpack the \tn{toks} registers (now sorted) % into a variable of the right type, by \texttt{x}-expanding % the code in |#3|, specific to each type of list. % \begin{macrocode} \cs_new_protected:Npn \@@_main:NNNnNn #1#2#3#4#5#6 { \group_begin: \l_@@_length_int \c_zero #2 #5 { \if_int_compare:w \l_@@_length_int = \c_@@_max_length_int \@@_too_long_error:NNw #3 #5 \fi: \tex_toks:D \l_@@_length_int {##1} \tex_advance:D \l_@@_length_int \c_one } \cs_set:Npn \sort_compare:nn ##1 ##2 { #6 } \l_@@_block_int \c_one \@@_level: \use:x { \group_end: #1 \exp_not:N #5 {#4} } } % \end{macrocode} % \end{macro} % % \begin{macro}{\seq_sort:Nn, \seq_gsort:Nn} % The first argument to \cs{@@_main:NNNnNn} is the final % assignment function used, either \cs{tl_set:Nn} or % \cs{tl_gset:Nn} to control local versus global results. % The second argument is what mapping function is used when storing % items to \tn{toks} registers, and the third breaks away from the % loop. The fourth is used to build back the correct kind of list % from the contents of the \tn{toks} registers, including the leading % \cs{s__seq}. Fifth and sixth % arguments are the variable to sort, and the sorting method % as inline code. % \begin{macrocode} \cs_new_protected_nopar:Npn \seq_sort:Nn { \@@_main:NNNnNn \tl_set:Nn \seq_map_inline:Nn \seq_map_break: { \s__seq \@@_toks:NNw \exp_not:N \__seq_item:n 0 ; } } \cs_new_protected_nopar:Npn \seq_gsort:Nn { \@@_main:NNNnNn \tl_gset:Nn \seq_map_inline:Nn \seq_map_break: { \s__seq \@@_toks:NNw \exp_not:N \__seq_item:n 0 ; } } % \end{macrocode} % \end{macro} % % \begin{macro}{\tl_sort:Nn, \tl_gsort:Nn} % Again, use \cs{tl_set:Nn} or \cs{tl_gset:Nn} to control % the scope of the assignment. Mapping through the token % list is done with \cs{tl_map_inline:Nn}, and producing % the token list is very similar to sequences, removing % \cs{seq_item:Nn}. % \begin{macrocode} \cs_new_protected_nopar:Npn \tl_sort:Nn { \@@_main:NNNnNn \tl_set:Nn \tl_map_inline:Nn \tl_map_break: { \@@_toks:NNw \prg_do_nothing: \prg_do_nothing: 0 ; } } \cs_new_protected_nopar:Npn \tl_gsort:Nn { \@@_main:NNNnNn \tl_gset:Nn \tl_map_inline:Nn \tl_map_break: { \@@_toks:NNw \prg_do_nothing: \prg_do_nothing: 0 ; } } % \end{macrocode} % \end{macro} % % \begin{macro}{\clist_sort:Nn, \clist_gsort:Nn} % \begin{macro}[aux]{\@@_clist:NNn} % The case of empty comma-lists is a little bit special as usual, % and filtered out: there is nothing to sort in that case. % Otherwise, the input is done with \cs{clist_map_inline:Nn}, % and the output requires some more elaborate processing than % for sequences and token lists. The first comma must be removed. % An item must be wrapped in an extra set of braces if it contains % either the space or the comma characters. This is taken care of % by \cs{clist_wrap_item:n}, but \cs{@@_toks:NNw} would simply % feed \cs{tex_the:D} \cs{tex_toks:D} \meta{number} as an % argument to that function; hence we need to expand this argument % once to unpack the register. % \begin{macrocode} \cs_new_protected_nopar:Npn \clist_sort:Nn { \@@_clist:NNn \tl_set:Nn } \cs_new_protected_nopar:Npn \clist_gsort:Nn { \@@_clist:NNn \tl_gset:Nn } \cs_new_protected:Npn \@@_clist:NNn #1#2#3 { \clist_if_empty:NF #2 { \@@_main:NNNnNn #1 \clist_map_inline:Nn \clist_map_break: { \exp_last_unbraced:Nf \use_none:n { \@@_toks:NNw \exp_args:No \__clist_wrap_item:n 0 ; } } #2 {#3} } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}{\@@_toks:NNw} % Unpack the various \tn{toks} registers, from $0$ to the length % of the list. The functions |#1| and |#2| allow us to treat the % three data structures in a unified way: % \begin{itemize} % \item for sequences, they are \cs{exp_not:N} \cs{__seq_item:n}, % expanding to the \cs{__seq_item:n} separator, as expected; % \item for token lists, they expand to nothing; % \item for comma lists, they expand to \cs{exp_args:No} % \cs{clist_wrap_item:n}, taking care of unpacking the register % before letting the undocumented internal \pkg{clist} function % \cs{clist_wrap_item:n} do the work of putting a comma and possibly % braces. % \end{itemize} % \begin{macrocode} \cs_new:Npn \@@_toks:NNw #1#2#3 ; { \if_int_compare:w #3 < \l_@@_length_int #1 #2 { \tex_the:D \tex_toks:D #3 } \exp_after:wN \@@_toks:NNw \exp_after:wN #1 \exp_after:wN #2 \int_use:N \__int_eval:w #3 + \c_one \exp_after:wN ; \fi: } % \end{macrocode} % \end{macro} % % \subsection{Sorting itself} % % \begin{macro}[int]{\@@_level:} % This function is called once blocks of size \cs{l_@@_block_int} % (initially $1$) are each sorted. If the whole list fits in one % block, then we are done (this also takes care of the case of an % empty list or a list with one item). Otherwise, go through pairs % of blocks starting from $0$, then double the block size, and repeat. % \begin{macrocode} \cs_new_protected_nopar:Npn \@@_level: { \if_int_compare:w \l_@@_block_int < \l_@@_length_int \l_@@_end_int \c_zero \@@_merge_blocks: \tex_multiply:D \l_@@_block_int \c_two \exp_after:wN \@@_level: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[int]{\@@_merge_blocks:} % This function is called to merge a pair of blocks, starting at % the last value of \cs{l_@@_end_int} (end-point of the previous % pair of blocks). If shifting by one block to the right we reach % the end of the list, then this pass has ended: this end of the % list is sorted already. Store the result of that shift in $A$, % which will index the first block starting from the top end. % Then locate the end-point (maximum) of the upper block: shift % \texttt{end} upwards by one more block, checking that we don't % go beyond the length of the list. Copy this upper block of \tn{toks} % registers in registers above \texttt{length}, indexed by $C$: % this is covered by \cs{sort_copy_block:}. Once this is done we % are ready to do the actual merger using \cs{@@_merge_blocks_aux:}, % after shifting $A$, $B$ and $C$ so that they point to the largest % index in their respective ranges rather than pointing just beyond % those ranges. Of course, once that pair of blocks is merged, % move on to the next pair. % \begin{macrocode} \cs_new_protected_nopar:Npn \@@_merge_blocks: { \l_@@_begin_int \l_@@_end_int \tex_advance:D \l_@@_end_int \l_@@_block_int \if_int_compare:w \__int_eval:w \l_@@_end_int < \l_@@_length_int \l_@@_A_int \l_@@_end_int \tex_advance:D \l_@@_end_int \l_@@_block_int \if_int_compare:w \l_@@_end_int > \l_@@_length_int \l_@@_end_int \l_@@_length_int \fi: \l_@@_B_int \l_@@_A_int \l_@@_C_int \l_@@_length_int \sort_copy_block: \tex_advance:D \l_@@_A_int \c_minus_one \tex_advance:D \l_@@_B_int \c_minus_one \tex_advance:D \l_@@_C_int \c_minus_one \@@_merge_blocks_aux: \exp_after:wN \@@_merge_blocks: \fi: } % \end{macrocode} % \end{macro} % % \begin{macro}[int]{\sort_copy_block:} % We wish to store a copy of the \enquote{upper} block of % \tn{toks} registers, ranging between the initial value of % \cs{l_@@_B_int} (included) and \cs{l_@@_end_int} % (excluded) into a new range starting at the initial value % of \cs{l_@@_C_int}, namely \cs{l_@@_length_int}. % \begin{macrocode} \cs_new_protected_nopar:Npn \sort_copy_block: { \tex_toks:D \l_@@_C_int \tex_toks:D \l_@@_B_int \tex_advance:D \l_@@_C_int \c_one \tex_advance:D \l_@@_B_int \c_one \if_int_compare:w \l_@@_B_int = \l_@@_end_int \use_i:nn \fi: \sort_copy_block: } % \end{macrocode} % \end{macro} % % \begin{macro}[aux]{\@@_merge_blocks_aux:} % At this stage, the first block starts at \cs{l_@@_begin_int}, % and ends at \cs{l_@@_A_int}, and the second block starts at % \cs{l_@@_length_int} and ends at \cs{l_@@_C_int}. The result % of the merger is stored at positions indexed by \cs{l_@@_B_int}, % which starts at $\cs{l_@@_end_int}-1$ and decreases down to % \cs{l_@@_begin_int}, covering the full range of the two blocks. % In other words, we are building the merger starting with the % largest values. % The comparison function is defined to return either % \texttt{reversed} or \texttt{ordered}. Of course, this % means the arguments need to be given in the order they % appear originally in the list. % \begin{macrocode} \cs_new_protected_nopar:Npn \@@_merge_blocks_aux: { \exp_after:wN \sort_compare:nn \exp_after:wN { \tex_the:D \tex_toks:D \exp_after:wN \l_@@_A_int \exp_after:wN } \exp_after:wN { \tex_the:D \tex_toks:D \l_@@_C_int } } % \end{macrocode} % \end{macro} % % \begin{macro}[aux]{\sort_ordered:} % If the comparison function returns \texttt{ordered}, % then the second argument fed to \cs{sort_compare:nn} % should remain to the right of the other one. Since % we build the merger starting from the right, we copy % that \tn{toks} register into the allotted range, then % shift the pointers $B$ and $C$, and go on to do one % more step in the merger, unless the second block has % been exhausted: then the remainder of the first block % is already in the correct register and we are done % with merging those two blocks. % \begin{macrocode} \cs_new_protected_nopar:Npn \sort_ordered: { \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_C_int \tex_advance:D \l_@@_B_int \c_minus_one \tex_advance:D \l_@@_C_int \c_minus_one \if_int_compare:w \l_@@_C_int < \l_@@_length_int \use_i:nn \fi: \@@_merge_blocks_aux: } % \end{macrocode} % \end{macro} % % \begin{macro}[aux]{\sort_reversed:} % If the comparison function returns \texttt{reversed}, % then the next item to add to the merger is the first % argument, contents of the \tn{toks} register $A$. % Then shift the pointers $A$ and $B$ to the left, and % go for one more step for the merger, unless the left % block was exhausted ($A$ goes below the threshold). % In that case, all remaining \tn{toks} registers in % the second block, indexed by $C$, should be copied % to the merger (see \cs{@@_merge_blocks_end:}). % \begin{macrocode} \cs_new_protected_nopar:Npn \sort_reversed: { \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_A_int \tex_advance:D \l_@@_B_int \c_minus_one \tex_advance:D \l_@@_A_int \c_minus_one \if_int_compare:w \l_@@_A_int < \l_@@_begin_int \@@_merge_blocks_end: \use_i:nn \fi: \@@_merge_blocks_aux: } % \end{macrocode} % \end{macro} % % \begin{macro}[aux]{\@@_merge_blocks_end:} % This function's task is to copy the \tn{toks} registers % in the block indexed by $C$ to the merger indexed by $B$. % The end can equally be detected by checking when $B$ reaches % the threshold \texttt{begin}, or when $C$ reaches % \texttt{length}. % \begin{macrocode} \cs_new_protected_nopar:Npn \@@_merge_blocks_end: { \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_C_int \tex_advance:D \l_@@_B_int \c_minus_one \tex_advance:D \l_@@_C_int \c_minus_one \if_int_compare:w \l_@@_B_int < \l_@@_begin_int \use_i:nn \fi: \@@_merge_blocks_end: } % \end{macrocode} % \end{macro} % % \subsection{Expandable sorting} % % Sorting expandably is very different from sorting and assigning to a % variable. Since tokens cannot be stored, they must remain in the % input stream, and be read through at every step. It is thus % necessarily much slower (at best $O(n^2\ln n)$) than non-expandable % sorting functions ($O(n\ln n)$). % % A prototypical version of expandable quicksort is as follows. If the % argument has no item, return nothing, otherwise partition, using the % first item as a pivot (argument |#4| of |\__sort:nnNnn|). The % arguments of |\__sort:nnNnn| are 1.~items less than |#4|, 2.~items % greater or equal to |#4|, 3.~comparison, 4.~pivot, 5.~next item to % test. If |#5| is the tail of the list, call \cs{tl_sort:nN} on |#1| % and on |#2|, placing |#4| in between; |\use:ff| expands the parts to % make \cs{tl_sort:nN} \texttt{f}-expandable. Otherwise, compare |#4| % and |#5| using |#3|. If they are ordered, place |#5| amongst the % \enquote{greater} items, otherwise amongst the \enquote{lesser} items, % and continue partitioning. % \begin{verbatim} % \cs_new:Npn \tl_sort:nN #1#2 % { % \tl_if_blank:nF {#1} % { % \__sort:nnNnn { } { } #2 % #1 \q_recursion_tail \q_recursion_stop % } % } % \cs_new:Npn \__sort:nnNnn #1#2#3#4#5 % { % \quark_if_recursion_tail_stop_do:nn {#5} % { \use:ff { \tl_sort:nN {#1} #3 {#4} } { \tl_sort:nN {#2} #3 } } % #3 {#4} {#5} % { \__sort:nnNnn {#1} { #2 {#5} } #3 {#4} } % { \__sort:nnNnn { #1 {#5} } {#2} #3 {#4} } % } % \cs_generate_variant:Nn \use:nn { ff } % \end{verbatim} % There are quite a few optimizations available here: the code below is % less legible, but more than twice as fast. % % In the simple version of the code, |\__sort:nnNnn| is called % $O(n\ln n)$ times on average (the number of comparisons required by % the quicksort algorithm). Hence most of our focus will be on % optimizing that function. % % The first speed up is to avoid testing for the end of the list at % every call to |\__sort:nnNnn|. For this, the list is prepared by % changing each \meta{item} of the original token list into % \meta{command} \Arg{item}, just like sequences are stored. We arrange % things such that the \meta{command} is the \meta{conditional} provided % by the user: the loop over the \meta{prepared tokens} then looks like % \begin{quote} % \ttfamily % \cs{cs_new:Npn}~|\__sort_loop:wNn|~\ldots{}~|#6#7|\\ % ~~|{|\\ % ~~~~|#6|~\Arg{pivot}~|{#7}|~\meta{loop big}~\meta{loop small}\\ % ~~~~~~\meta{extra arguments}\\ % ~~|}|\\ % |\__sort_loop:wNn|~\ldots{}~\meta{prepared tokens}\\ % ~~\meta{end-loop}~|{}|~|\q_stop| % \end{quote} % In this example, which matches the structure of % \cs{__sort_quick_split_i:NnnnnNn} and a few other functions below, the % |\__sort_loop:wNn| auxiliary normally receives the user's % \meta{conditional} as~|#6| and an \meta{item} as~|#7|. This is % compared to the \meta{pivot} (the argument~|#5|, not shown here), and % the \meta{conditional} leaves the \meta{loop big} or \meta{loop small} % auxiliary, which both have the same form as |\__sort_loop:wNn|, % receiving the next pair \meta{conditional} \Arg{item} as |#6| % and~|#7|. At the end, |#6| is the \meta{end-loop} function, which % terminates the loop. % % The second speed up is to minimize the duplicated tokens between the % \texttt{true} and \texttt{false} branches of the conditional. For % this, we introduce two versions of |\__sort:nnNnn|, which receive % the new item as~|#1| and place it either into the list~|#2| of items % less than the pivot~|#4| or into the list~|#3| of items greater or % equal to the pivot. % \begin{verbatim} % \cs_new:Npn \__sort_i:nnnnNn #1#2#3#4#5#6 % { % #5 {#4} {#6} \__sort_ii:nnnnNn \__sort_i:nnnnNn % {#6} { #2 {#1} } {#3} {#4} % } % \cs_new:Npn \__sort_ii:nnnnNn #1#2#3#4#5#6 % { % #5 {#4} {#6} \__sort_ii:nnnnNn \__sort_i:nnnnNn % {#6} {#2} { #3 {#1} } {#4} % } % \end{verbatim} % Note that the two functions have the form of |\__sort_loop:wNn| above, % receiving as~|#5| the conditional or a function to end the loop. In % fact, the lists~|#2| and~|#3| must be made of pairs \meta{conditional} % \Arg{item}, so we have to replace~|{#6}| above by |{|~|#5|~|{#6}|~|}|, % and |{#1}|~by~|#1|. The actual functions have one more argument, so % all argument numbers are shifted compared to this code. % % The third speed up is to avoid |\use:ff| using a continuation-passing % style: \cs{__sort_quick_split:NnNn} expects a list followed by % \cs{q_mark} \Arg{code}, and expands to \meta{code} \meta{sorted list}. % Sorting the two parts of the list around the pivot is done with % \begin{quote} % \ttfamily % \cs{__sort_quick_split:NnNn} |#2| \ldots{} \cs{q_mark}\\ % ~~|{|\\ % ~~~~\cs{__sort_quick_split:NnNn} |#1| \ldots{} \cs{q_mark} \Arg{code}\\ % ~~~~\Arg{pivot}\\ % ~~|}| % \end{quote} % Items which are larger than the \meta{pivot} are sorted, then placed % after code that sorts the smaller items, and after the (braced) % \meta{pivot}. % % The fourth speed up is avoid the recursive call to \cs{tl_sort:nN} % with an empty first argument. For this, we introduce functions % similar to the |\__sort_i:nnnnNn| of the last example, but aware of % whether the list of \meta{conditional} \Arg{item} read so far that are % less than the pivot, and the list of those greater or equal, are empty % or not: see \cs{__sort_quick_split:NnNn} and functions defined below. % Knowing whether the lists are empty or not is useless if we do not use % distinct ending codes as appropriate. The splitting auxiliaries % communicate to the \meta{end-loop} function (that is initially placed % after the ``prepared'' list) by placing a specific ending function, % ignored when looping, but useful at the end. In fact, the % \meta{end-loop} function does nothing but place the appropriate ending % function in front of all its arguments. The ending functions take % care of sorting non-empty sublists, placing the pivot in between, and % the continuation before. % % The final change in fact slows down the code a little, but is required % to avoid memory issues: schematically, when \TeX{} encounters % \begin{verbatim} % \use:n { \use:n { \use:n { ... } ... } ... } % \end{verbatim} % the argument of the first \cs{use:n} is not completely read by the % second \cs{use:n}, hence must remain in memory; then the argument of % the second \cs{use:n} is not completely read when grabbing the % argument of the third \cs{use:n}, hence must remain in memory, and so % on. The memory consumption grows quadratically with the number of % nested \cs{use:n}. In practice, this means that we must read % everything until a trailing \cs{q_stop} once in a while, otherwise % sorting lists of more than a few thousand items would exhaust a % typical \TeX{}'s memory. % % \begin{macro}[EXP]{\tl_sort:nN} % \begin{macro}[aux, EXP] % { % \__sort_quick_prepare:Nnnn, % \__sort_quick_prepare_end:NNNnw, % \__sort_quick_cleanup:w % } % The code within the \cs{exp_not:f} sorts the list, leaving in most % cases a leading \cs{exp_not:f}, which stops the expansion, letting % the result be return within \cs{exp_not:n}. We filter out the case % of a list with no item, which would otherwise cause problems. Then % prepare the token list~|#1| by inserting the conditional~|#2| before % each item. The \texttt{prepare} auxiliary receives the conditional % as~|#1|, the prepared token list so far as~|#2|, the next prepared % item as~|#3|, and the item after that as~|#4|. The loop ends % when~|#4| contains \cs{__prg_break_point:}, then the % \texttt{prepare_end} auxiliary finds the prepared token list % as~|#4|. The scene is then set up for \cs{__sort_quick_split:NnNn}, % which will sort the prepared list and perform the post action placed % after \cs{q_mark}, namely removing the trailing \cs{s__stop} and % \cs{q_stop} and leaving \cs{exp_stop_f:} to stop % \texttt{f}-expansion. % \begin{macrocode} \cs_new:Npn \tl_sort:nN #1#2 { \exp_not:f { \tl_if_blank:nF {#1} { \__sort_quick_prepare:Nnnn #2 { } { } #1 { \__prg_break_point: \__sort_quick_prepare_end:NNNnw } \q_stop } } } \cs_new:Npn \__sort_quick_prepare:Nnnn #1#2#3#4 { \__prg_break: #4 \__prg_break_point: \__sort_quick_prepare:Nnnn #1 { #2 #3 } { #1 {#4} } } \cs_new:Npn \__sort_quick_prepare_end:NNNnw #1#2#3#4#5 \q_stop { \__sort_quick_split:NnNn #4 \__sort_quick_end:nnTFNn { } \q_mark { \__sort_quick_cleanup:w \exp_stop_f: } \s__stop \q_stop } \cs_new:Npn \__sort_quick_cleanup:w #1 \s__stop \q_stop {#1} % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP, aux] % { % \__sort_quick_split:NnNn, % \__sort_quick_only_i:NnnnnNn, % \__sort_quick_only_ii:NnnnnNn, % \__sort_quick_split_i:NnnnnNn, % \__sort_quick_split_ii:NnnnnNn % } % The \texttt{only_i}, \texttt{only_ii}, \texttt{split_i} and % \texttt{split_ii} auxiliaries receive a useless first argument, the % new item~|#2| (that they append to either one of the next two % arguments), the list~|#3| of items less than the pivot, bigger % items~|#4|, the pivot~|#5|, a \meta{function}~|#6|, and an % item~|#7|. The \meta{function} is the user's \meta{conditional} % except at the end of the list where it is % \cs{__sort_quick_end:nnTFNn}. The comparison is applied to the % \meta{pivot} and the \meta{item}, and calls the \texttt{only_i} or % \texttt{split_i} auxiliaries if the \meta{item} is smaller, and the % \texttt{only_ii} or \texttt{split_ii} auxiliaries otherwise. In % both cases, the next auxiliary goes to work right away, with no % intermediate expansion that would slow down operations. Note that % the argument~|#2| left for the next call has the form % \meta{conditional} \Arg{item}, so that the lists~|#3| and~|#4| keep % the right form to be fed to the next sorting function. % The \texttt{split} auxiliary differs from these in that it is % missing three of the arguments, which would be empty, and its first % argument is always the user's \meta{conditional} rather than an % ending function. % \begin{macrocode} \cs_new:Npn \__sort_quick_split:NnNn #1#2#3#4 { #3 {#2} {#4} \__sort_quick_only_ii:NnnnnNn \__sort_quick_only_i:NnnnnNn \__sort_quick_single_end:nnnwnw { #3 {#4} } { } { } {#2} } \cs_new:Npn \__sort_quick_only_i:NnnnnNn #1#2#3#4#5#6#7 { #6 {#5} {#7} \__sort_quick_split_ii:NnnnnNn \__sort_quick_only_i:NnnnnNn \__sort_quick_only_i_end:nnnwnw { #6 {#7} } { #3 #2 } { } {#5} } \cs_new:Npn \__sort_quick_only_ii:NnnnnNn #1#2#3#4#5#6#7 { #6 {#5} {#7} \__sort_quick_only_ii:NnnnnNn \__sort_quick_split_i:NnnnnNn \__sort_quick_only_ii_end:nnnwnw { #6 {#7} } { } { #4 #2 } {#5} } \cs_new:Npn \__sort_quick_split_i:NnnnnNn #1#2#3#4#5#6#7 { #6 {#5} {#7} \__sort_quick_split_ii:NnnnnNn \__sort_quick_split_i:NnnnnNn \__sort_quick_split_end:nnnwnw { #6 {#7} } { #3 #2 } {#4} {#5} } \cs_new:Npn \__sort_quick_split_ii:NnnnnNn #1#2#3#4#5#6#7 { #6 {#5} {#7} \__sort_quick_split_ii:NnnnnNn \__sort_quick_split_i:NnnnnNn \__sort_quick_split_end:nnnwnw { #6 {#7} } {#3} { #4 #2 } {#5} } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP, aux] % { % \__sort_quick_end:nnTFNn, % \__sort_quick_single_end:nnnwnw, % \__sort_quick_only_i_end:nnnwnw, % \__sort_quick_only_ii_end:nnnwnw, % \__sort_quick_split_end:nnnwnw, % } % The \cs{__sort_quick_end:nnTFNn} appears instead of the user's % conditional, and receives as its arguments the pivot~|#1|, a fake % item~|#2|, a \texttt{true} and a \texttt{false} branches |#3| % and~|#4|, followed by an ending function~|#5| (one of the four % auxiliaries here) and another copy~|#6| of the fake item. All those % are discarded except the function~|#5|. This function receives % lists~|#1| and~|#2| of items less than or greater than the % pivot~|#3|, then a continuation code~|#5| just after \cs{q_mark}. % To avoid a memory problem described earlier, all of the ending % functions read~|#6| until \cs{q_stop} and place~|#6| back into the % input stream. When the lists |#1| and~|#2| are empty, the % \texttt{single} auxiliary simply places the continuation~|#5| before % the pivot~|{#3}|. When |#2|~is empty, |#1|~is sorted and placed % before the pivot~|{#3}|, taking care to feed the continuation~|#5| % as a continuation for the function sorting~|#1|. When |#1|~is % empty, |#2|~is sorted, and the continuation argument is used to % place the continuation~|#5| and the pivot~|{#3}| before the sorted % result. Finally, when both lists are non-empty, items larger than % the pivot are sorted, then items less than the pivot, and the % continuations are done in such a way to place the pivot in between. % \begin{macrocode} \cs_new:Npn \__sort_quick_end:nnTFNn #1#2#3#4#5#6 {#5} \cs_new:Npn \__sort_quick_single_end:nnnwnw #1#2#3#4 \q_mark #5#6 \q_stop { #5 {#3} #6 \q_stop } \cs_new:Npn \__sort_quick_only_i_end:nnnwnw #1#2#3#4 \q_mark #5#6 \q_stop { \__sort_quick_split:NnNn #1 \__sort_quick_end:nnTFNn { } \q_mark {#5} {#3} #6 \q_stop } \cs_new:Npn \__sort_quick_only_ii_end:nnnwnw #1#2#3#4 \q_mark #5#6 \q_stop { \__sort_quick_split:NnNn #2 \__sort_quick_end:nnTFNn { } \q_mark { #5 {#3} } #6 \q_stop } \cs_new:Npn \__sort_quick_split_end:nnnwnw #1#2#3#4 \q_mark #5#6 \q_stop { \__sort_quick_split:NnNn #2 \__sort_quick_end:nnTFNn { } \q_mark { \__sort_quick_split:NnNn #1 \__sort_quick_end:nnTFNn { } \q_mark {#5} {#3} } #6 \q_stop } % \end{macrocode} % \end{macro} % % \subsection{Messages} % % \begin{macro}{\@@_too_long_error:NNw} % When there are too many items in a sequence, this is an error, and % we clean up properly the mapping over items in the list: break using % the type-specific breaking function |#1|. % \begin{macrocode} \cs_new_protected:Npn \@@_too_long_error:NNw #1#2 \fi: { \fi: \__msg_kernel_error:nnx { sort } { too-large } { \token_to_str:N #2 } #1 } \__msg_kernel_new:nnnn { sort } { too-large } { The~list~#1~is~too~long~to~be~sorted~by~TeX. } { TeX~has~\int_eval:n { \c_max_register_int + 1 }~registers~available:~ this~only~allows~to~sorts~with~up~to~\int_use:N \c_@@_max_length_int \ items.~All~extra~items~will~be~ignored. } % \end{macrocode} % \end{macro} % % \begin{macrocode} % % \end{macrocode} % % \end{implementation} % % \PrintIndex