//========================================================================== // STRINGTOKENIZER2.H - part of // OMNeT++/OMNEST // Discrete System Simulation in C++ // // Author: Tamas Borbely // //========================================================================== /*--------------------------------------------------------------* Copyright (C) 2006-2017 OpenSim Ltd. This file is distributed WITHOUT ANY WARRANTY. See the file `license' for details on this and other legal matters. *--------------------------------------------------------------*/ #ifndef __OMNETPP_COMMON_STRINGTOKENIZER2_H #define __OMNETPP_COMMON_STRINGTOKENIZER2_H #include #include #include "commondefs.h" #include "exception.h" namespace omnetpp { namespace common { /** * Exception thrown when a syntax error found while tokenizing a string. */ class COMMON_API StringTokenizerException : public opp_runtime_error { public: /** * Constructor with error message generated in a printf-like manner. */ StringTokenizerException(const char *msg, ...); /** * Destructor with throw clause required by gcc. */ virtual ~StringTokenizerException() throw() {} }; /** * Similar to StringTokenizer, but supports nested parenthesis, quoted * strings and backslash quoted characters in tokens * (tokens do not end with unbalanced parentheses and can contain strings constants). * * Example1: * *
 * const char *str = "This 'string contains' {a lot} (of {[{irritating}]}) 'parentheses: {,[ and ('";
 * StringTokenizer tokenizer(str, " ", "()[]{}", "'");
 * while (tokenizer.hasMoreTokens())
 *     EV << tokenizer.nextToken() << "\n";
 * 
* * Output: *
 * This
 * 'string contains'
 * {a lot}
 * (of {[{irritating}]})
 * 'parentheses: {,[ and ('
 * 
* * Example2: * *
 * const char *str = "\\\\ \\  \\{ \\} \\( \\) \\[ \\] \\' \\n";
 * StringTokenizer tokenizer(str, " ", "()[]{}", "'");
 * while (tokenizer.hasMoreTokens())
 *     EV << tokenizer.nextToken() << "|";
 * 
* * Output: *
 * \| |{|}|(|)|[|]|'|\n|
 * 
* * * @ingroup SimSupport */ class COMMON_API StringTokenizer2 { private: char *str; // copy of the input string, modified during the process char *end; // points to the ending '\0' of str char *dst; // points to the next output character str char *src; // points to the next input character str int tokenStart; // index of the first character of the current token in the original string int tokenEnd; // index of the first character after the current token in the original string std::string delimiterChars; std::string parenChars; std::string quoteChars; void skipDelimiters(); char *readToken(); bool isQuote(char ch) { return quoteChars.find(ch) != std::string::npos; } bool isParen(char ch) { return parenChars.find(ch) != std::string::npos; } bool isDelimiter(char ch) { return delimiterChars.find(ch) != std::string::npos; } public: /** * Constructor. * All input parameters (str, delimiters, parens, and quotes) are copied in the constructor, * so they can safely be deleted after the call. * The delimiters default to all whitespace characters (space, tab, CR, LF, FF). * The parens must be an alternating sequence of open/close characters and * it is default to "()". * The quotes default to the double quote character. */ StringTokenizer2(const char *str, const char *delimiters=nullptr, const char *parens=nullptr, const char *quotes=nullptr); /** * Destructor. */ ~StringTokenizer2(); /** * Change delimiters. This allows for switching delimiters during * tokenization. */ void setDelimiter(const char *s); /** * Change parenthesis characters. * It must be an alternating sequence of open/close characters. */ void setParentheses(const char *parens); /** * Change quote characters. */ void setQuotes(const char *quotes); /** * Returns true if there're more tokens (i.e. the next nextToken() * call won't return nullptr). */ bool hasMoreTokens(); /** * Returns the next token and moves forward the iteration. * The returned pointer will point to the beginning * of the next token in the original input string, 'end' will be set * to the end of the token (exclusive). * If there're no more tokens, nullptr will be returned and 'end' * will not be set. */ const char *nextToken(); /** * Returns the character index of the current token in the original input string. */ int getTokenStart() { return tokenStart; } /** * Returns the length of the current token in the original input string. * It can differ from strlen(nextToken()), due to \ quoting of characters. */ int getTokenLength() { return tokenEnd - tokenStart; } }; inline void StringTokenizer2::skipDelimiters() { while (src < end && (*src=='\0' || isDelimiter(*src))) src++; } } // namespace common } // namespace omnetpp #endif