// -*- ASCII:EDT -*-
// --------------------------------------------------
// WHAT THIS MACRO DOES AND HOW IT COULD BE USED !
// --------------------------------------------------
//
// a string of the form
//     "emphasis;role;bold;1;end"
// will be divided into its tokens, when ";" is specified as separator:
//     emphasis
//     role
//     bold
//     1
//     end
//
// tokens that are embraced by "[]" are preserved from parsing,
// thus allowing "sublists" to be extracted, which can be "tokenized" in
// a second run for a different purpose.
//     "emphasis;role;[bold;italic;underline];1;end"
// will give:
//     emphasis
//     role
//     [bold;italic;underline]
//     1
//     end
//
//
//// The following lines show, how the tokens can be retrieved.
//// By using local ("named") variables, the mechanism resembles practically
//// to classical 1 Dim Arrays: tk0,tk1, ... tkn
//Assign("tkStr","emphasis;role;?[bold;italic;underline];0");
//Assign("tkSep",";");
//Assign("tkSubOpen","[");
//Assign("tkSubClose","]");
// Assign('tkMatchStr','ro');
//// Display the whole thing
//LetRegNum(7,0);
//SetTracking(0);
//Ins("tokens found: %$('tkCnt');!");NewLine;
//Repeat("%$('tkCnt');",!*>
//    Ins("tk%!7: %$('tk%!7');");NewLine;>
//    LetRegNum(7,%!7+1);>
//    *);
//Ins("That's it!");NewLine;
//SetTracking(1);
//Refresh;


// --------------------------------------------------
// CHECKING IF DEFAULT VALUES SHOULD BE USED !
// --------------------------------------------------

// if the variable was not initialized (created by "Assign") outside
// of this macro, assign the default value ";" to it

// the TokenSeparator
// default value ";"
IfStr("%$('tkSep');","","=",>
    "Assign('tkSep',';'); >
",">
");

// the TokenList Result Prefix
// default value "tk"
IfStr("%$('tkResultPrefix');","","=",>
    "Assign('tkResultPrefix','tk'); >
",">
");

// Treat TokenSeparator as Regular Expression? Default is NO!
IfStr("%$('tkSepRegEx');","1","=",">
    Assign('tkSearchFlags','1011;'); >
",">
    Assign('tkSearchFlags','1010;'); >
");

// the Sublist Open String
// (sublist will not be parsed)
// default "["
IfStr("%$('tkSubOpen');","","=",>
   "Assign(""tkSubOpen"",""["");>
",">
");

// the Sublist Close String
// default "]"
IfStr("%$('tkSubClose');","","=",>
   "Assign(""tkSubClose"",""]"");>
",">
");


// --------------------------------------------------
// GLOBAL INITIALIZING
// --------------------------------------------------

// is there anything at all to tokenize?
IfStr("%$('tkStr');","","=",`>
    Assign("%$('tkResultPrefix');Cnt","0"); >
`,`>
//  else, there will always be at least 1 token >
//  (store this in the counter variable tkCnt)
    Assign("%$('tkResultPrefix');Cnt","1"); >
`);


// the tokens are stored in tk0, tk1, tk2, ... consecutively.
// these are first cleared
LetRegNum(7,0);
Repeat(152,!*>
    Release('tk%!7');>
    LetRegNum(7,%!7+1);>
*);

// where does the first separator occur (if it occurs at all)?
FindInString("%$('tkStr');", "%$('tkSep');", 0,1, "%$('tkSearchFlags');", 0);


// --------------------------------------------------
// THE MAIN PART OF THE MACRO
// --------------------------------------------------

// no separator found when reg0>reg1.
// so we can assign the original string to tk0 and finish the macro
IfNum("%!0","%!1",">","Assign(`%$('tkResultPrefix');0`,`%$('tkStr');`);Release('tkResultPrefix');Exit;>
",">
");

// initialize reg7 as tokencounter
LetRegNum(7,0);
// initialize the resumeindex
LetRegNum(6,0);

// no "tkMatchStr" specified
IfStr("%$('tkMatchStr');","","=",!">
    Assign('tkMatchPos','0'); >
//  with the consequence, that the matching algorithm is skipped >
",">
//  ELSE >
//  in this case always start with an empty "tkMatchPos" >
    Release('tkMatchPos'); >
");



Loop(!*>
//  reg8 is generally used for holding "temporary" extracted strings >
//  not only in the next line >
    ExtractByIndex(8, "%$('tkStr');", %!6, %!0-1);>
//    Loop(!|>
//  [...]-Strings are extracted separately by >
//  checking first the occurrence of a "[" and >
    FindInString("%!8", "%$('tkSubOpen');", 2,3, 1010, 0);>
    IfNum("%!2","%!3","<=",!">
//      in this case, jump behind the next "]" >
        FindInString(""%$('tkStr');"", ""%$('tkSubClose');"", 2,3, 1010, '%!6+1');>
        ExtractByIndex(8, ""%$('tkStr');"", %!6, %!3);>
        LetRegNum(6, %!3+2);>
    ",">
//      ELSE >
        LetRegNum(6, %!1+1);>
    ");>
//  assign the ***extracted result*** (reg8) to token "tk" number (reg7) >
    Assign("%$('tkResultPrefix');%!7","%!8");>
//  -------------------------------------------------- >
//  *** BEGIN matching algorithm *** >
//  already found? then skip >
    IfStr("%$('%$(`tkResultPrefix`);MatchPos');","","=",!">
//      examine, if the actual token begins with the string sequence of "tkMatchStr" >
//      still using reg8, searching options (regexp, ..., Resume index at 0) >
        FindInString(""%!8"", ""<%$('tkMatchStr');"", 0,1,1001,0);>
//      found something? >
        IfNum(""%!0"",""%!1"",""<="",!"">
//          store the actual token counter in tkMatchPos >
            Assign('%$(`tkResultPrefix`);MatchPos','%!7');>
        "","">
//      ELSE >
            Relax; >
        "");>
    "); >
//  END *** matching algorithm *** >
//  -------------------------------------------------- >
//  go on with the "tokenizing job" >
    FindInString("%$('tkStr');", "%$('tkSep');", 0,1, "%$('tkSearchFlags');", %!6);>
    LetRegNum(7,%!7+1);>
    IfNum("%!0","%!1",">","Stop;","");>
//  restrict the tokens to 350, in the case that something might go wrong >
//  (infinite loop)
    IfNum("%!7",350,">","Stop;","");>
  *);

// get the last one (until end of String)
GetLength(5,"%$('tkStr');")
ExtractByIndex(8, "%$('tkStr');", %!6, %!5);
Assign("%$('tkResultPrefix');%!7","%!8");

// save the number of found tokens in variable tkCnt
LetRegNum(7,%!7+1);
Assign("%$('tkResultPrefix');Cnt","%!7");
// reset all used registers
LetReg(5,"");LetReg(6,""); LetReg(7,""); LetReg(8,"");
// and the local variables that do not contain the results
// this is important for the default tokenizer-inputs to work correctly
Release('tkStr'); Release('tkSep');
Release('tkSepRegEx');
Release('tkResultPrefix');
Release('tkSubOpen');Release('tkSubClose');
Release('tkMatchStr');

// END
End;

//----------------------------------------------------------------------
// author: georges.schmitz@heitec.de
//----------------------------------------------------------------------
// version: 1.0
// revised by: robert schlicht (w.m.l@gmx.net)
//----------------------------------------------------------------------
// version: 1.1 (27.06.2001)
// + better documentation
// + added "tkMatchStr" for searching the first token matching at "tkMatchPos"
//----------------------------------------------------------------------
// version 1.11 (08.10.2001)
// + removed a misplaced semi-colon
//----------------------------------------------------------------------
// version 1.12 (02.04.2002)
// + added 'tkSepRegEx' for treating tkSep as Regular Expression
// + added 'tkResultPrefix': the result List, Cnt and MatchPos are prefixed by default with "tk"
// - %!0 is no more the default value for tkStr, if the latter one was not set
//----------------------------------------------------------------------

// BUGS:
// + bug with "tkMatchStr", the last one in a list is not detected