// -*- ASCII:EDT -*- // -------------------------------------------------- // WHAT THIS MACRO DOES AND HOW IT COULD BE USED ! // -------------------------------------------------- // // a string of the form // "emphasis;role;bold;1;end" // will be divided into its tokens, when ";" is specified as separator: // emphasis // role // bold // 1 // end // // tokens that are embraced by "[]" are preserved from parsing, // thus allowing "sublists" to be extracted, which can be "tokenized" in // a second run for a different purpose. // "emphasis;role;[bold;italic;underline];1;end" // will give: // emphasis // role // [bold;italic;underline] // 1 // end // // //// The following lines show, how the tokens can be retrieved. //// By using local ("named") variables, the mechanism resembles practically //// to classical 1 Dim Arrays: tk0,tk1, ... tkn //Assign("tkStr","emphasis;role;?[bold;italic;underline];0"); //Assign("tkSep",";"); //Assign("tkSubOpen","["); //Assign("tkSubClose","]"); // Assign('tkMatchStr','ro'); //// Display the whole thing //LetRegNum(7,0); //SetTracking(0); //Ins("tokens found: %$('tkCnt');!");NewLine; //Repeat("%$('tkCnt');",!*> // Ins("tk%!7: %$('tk%!7');");NewLine;> // LetRegNum(7,%!7+1);> // *); //Ins("That's it!");NewLine; //SetTracking(1); //Refresh; // -------------------------------------------------- // CHECKING IF DEFAULT VALUES SHOULD BE USED ! // -------------------------------------------------- // if the variable was not initialized (created by "Assign") outside // of this macro, assign the default value ";" to it // the TokenSeparator // default value ";" IfStr("%$('tkSep');","","=",> "Assign('tkSep',';'); > ","> "); // the TokenList Result Prefix // default value "tk" IfStr("%$('tkResultPrefix');","","=",> "Assign('tkResultPrefix','tk'); > ","> "); // Treat TokenSeparator as Regular Expression? Default is NO! IfStr("%$('tkSepRegEx');","1","=","> Assign('tkSearchFlags','1011;'); > ","> Assign('tkSearchFlags','1010;'); > "); // the Sublist Open String // (sublist will not be parsed) // default "[" IfStr("%$('tkSubOpen');","","=",> "Assign(""tkSubOpen"",""["");> ","> "); // the Sublist Close String // default "]" IfStr("%$('tkSubClose');","","=",> "Assign(""tkSubClose"",""]"");> ","> "); // -------------------------------------------------- // GLOBAL INITIALIZING // -------------------------------------------------- // is there anything at all to tokenize? IfStr("%$('tkStr');","","=",`> Assign("%$('tkResultPrefix');Cnt","0"); > `,`> // else, there will always be at least 1 token > // (store this in the counter variable tkCnt) Assign("%$('tkResultPrefix');Cnt","1"); > `); // the tokens are stored in tk0, tk1, tk2, ... consecutively. // these are first cleared LetRegNum(7,0); Repeat(152,!*> Release('tk%!7');> LetRegNum(7,%!7+1);> *); // where does the first separator occur (if it occurs at all)? FindInString("%$('tkStr');", "%$('tkSep');", 0,1, "%$('tkSearchFlags');", 0); // -------------------------------------------------- // THE MAIN PART OF THE MACRO // -------------------------------------------------- // no separator found when reg0>reg1. // so we can assign the original string to tk0 and finish the macro IfNum("%!0","%!1",">","Assign(`%$('tkResultPrefix');0`,`%$('tkStr');`);Release('tkResultPrefix');Exit;> ","> "); // initialize reg7 as tokencounter LetRegNum(7,0); // initialize the resumeindex LetRegNum(6,0); // no "tkMatchStr" specified IfStr("%$('tkMatchStr');","","=",!"> Assign('tkMatchPos','0'); > // with the consequence, that the matching algorithm is skipped > ","> // ELSE > // in this case always start with an empty "tkMatchPos" > Release('tkMatchPos'); > "); Loop(!*> // reg8 is generally used for holding "temporary" extracted strings > // not only in the next line > ExtractByIndex(8, "%$('tkStr');", %!6, %!0-1);> // Loop(!|> // [...]-Strings are extracted separately by > // checking first the occurrence of a "[" and > FindInString("%!8", "%$('tkSubOpen');", 2,3, 1010, 0);> IfNum("%!2","%!3","<=",!"> // in this case, jump behind the next "]" > FindInString(""%$('tkStr');"", ""%$('tkSubClose');"", 2,3, 1010, '%!6+1');> ExtractByIndex(8, ""%$('tkStr');"", %!6, %!3);> LetRegNum(6, %!3+2);> ","> // ELSE > LetRegNum(6, %!1+1);> ");> // assign the ***extracted result*** (reg8) to token "tk" number (reg7) > Assign("%$('tkResultPrefix');%!7","%!8");> // -------------------------------------------------- > // *** BEGIN matching algorithm *** > // already found? then skip > IfStr("%$('%$(`tkResultPrefix`);MatchPos');","","=",!"> // examine, if the actual token begins with the string sequence of "tkMatchStr" > // still using reg8, searching options (regexp, ..., Resume index at 0) > FindInString(""%!8"", ""<%$('tkMatchStr');"", 0,1,1001,0);> // found something? > IfNum(""%!0"",""%!1"",""<="",!""> // store the actual token counter in tkMatchPos > Assign('%$(`tkResultPrefix`);MatchPos','%!7');> "",""> // ELSE > Relax; > "");> "); > // END *** matching algorithm *** > // -------------------------------------------------- > // go on with the "tokenizing job" > FindInString("%$('tkStr');", "%$('tkSep');", 0,1, "%$('tkSearchFlags');", %!6);> LetRegNum(7,%!7+1);> IfNum("%!0","%!1",">","Stop;","");> // restrict the tokens to 350, in the case that something might go wrong > // (infinite loop) IfNum("%!7",350,">","Stop;","");> *); // get the last one (until end of String) GetLength(5,"%$('tkStr');") ExtractByIndex(8, "%$('tkStr');", %!6, %!5); Assign("%$('tkResultPrefix');%!7","%!8"); // save the number of found tokens in variable tkCnt LetRegNum(7,%!7+1); Assign("%$('tkResultPrefix');Cnt","%!7"); // reset all used registers LetReg(5,"");LetReg(6,""); LetReg(7,""); LetReg(8,""); // and the local variables that do not contain the results // this is important for the default tokenizer-inputs to work correctly Release('tkStr'); Release('tkSep'); Release('tkSepRegEx'); Release('tkResultPrefix'); Release('tkSubOpen');Release('tkSubClose'); Release('tkMatchStr'); // END End; //---------------------------------------------------------------------- // author: georges.schmitz@heitec.de //---------------------------------------------------------------------- // version: 1.0 // revised by: robert schlicht (w.m.l@gmx.net) //---------------------------------------------------------------------- // version: 1.1 (27.06.2001) // + better documentation // + added "tkMatchStr" for searching the first token matching at "tkMatchPos" //---------------------------------------------------------------------- // version 1.11 (08.10.2001) // + removed a misplaced semi-colon //---------------------------------------------------------------------- // version 1.12 (02.04.2002) // + added 'tkSepRegEx' for treating tkSep as Regular Expression // + added 'tkResultPrefix': the result List, Cnt and MatchPos are prefixed by default with "tk" // - %!0 is no more the default value for tkStr, if the latter one was not set //---------------------------------------------------------------------- // BUGS: // + bug with "tkMatchStr", the last one in a list is not detected