4.0.3dev_9d4017/doxygen/tokenizer_8hh_source.html

 /*!

  *

  * Copyright (C) 2015 Technical University of Liberec.  All rights reserved.

  *

  * This program is free software; you can redistribute it and/or modify it under

  * the terms of the GNU General Public License version 3 as published by the

  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)

  *

  * This program is distributed in the hope that it will be useful, but WITHOUT

  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

  *

  *

  * @file    tokenizer.hh

  * @brief

  */


 #ifndef TOKENIZER_HH_

 #define TOKENIZER_HH_


 #include <boost/tokenizer.hpp>

 #include <istream>

 #include "system/exceptions.hh"


 class FilePath;


 /**

  * @brief Simple class for parsing text files.

  *

  * Boost library provides nice tokenizer. The string is viewed as a container of tokens and

  * you can iterate over them. This class simplify the usage of the boost's tokenizer and further simplify

  * reading of the text files. Actual tokenizer use backslash '\\' as the escape character and double quotas '"' as quotation

  * character. The separator of tokens can be set in constructor, default value is space ' ' or tabelator '\\t'.

  *

  * !! Used token separator @p escaped_list_separator do not provide possibility to merge several consecutive

  * separator characters into one separator. Consequently, there appears empty tokens when there more spaces

  * then one separating tokens. To overcome this, we drop every empty token.

  *

  * Provides:

  * - method to read @p next_line, automatically skipping empty lines

  * - iterating over tokens on current line

  * - number of lines that the tokenizer has read -- method line_num

  *

  * Example of usage:

  * @code

  * Tokenizer(in_stream);

  * @endcode

  *

  * TODO:

  * - method to reopen - skip to the beginning

  *

  *

  */

 class Tokenizer {

 public:

     /**

      * Shortcut for boost tokenizer.

      */

     typedef boost::escaped_list_separator<char> Separator;

     typedef boost::tokenizer<Separator> BT;


     TYPEDEF_ERR_INFO( EI_File, std::string);

     TYPEDEF_ERR_INFO( EI_Line, unsigned int);

     TYPEDEF_ERR_INFO( EI_Pos, unsigned int);

     DECLARE_EXCEPTION( ExcMissingToken, << "Missing token, file: " << EI_File::qval << ", line: " << EI_Line::qval << ", position: " << EI_Pos::qval << ".\n" );

     DECLARE_EXCEPTION( ExcCannotRead, << "Can not read from stream, file: " << EI_File::qval << ", line: " << EI_Line::qval << ".\n" );


     /**

      * Struct represents actual position of Tokenizer in file.

      *

      * It is necessary to check if stored values are correct. Out of Tokenizer values can be set

      * only during construction. Entered file_position_ must correspond with values line_counter_

      * and line_position_. Unfortunately, any control mechanism of entered values doesn't exist.

      * If Position object is returned out of Tokenizer, value of file_position_ must be set

      * according to the position of Tokenizer.

      */

     struct Position {

         std::streampos file_position_;     ///< Actual (global) position in file.

         unsigned int line_counter_;        ///< Actual line in file.

         unsigned int line_position_;       ///< Actual position in line.


         /// Empty constructor

         Position():

             file_position_(0), line_counter_(0), line_position_(0) {}


         /// Constructor

         Position(std::streampos file_pos, unsigned int line, unsigned int line_pos):

             file_position_(file_pos), line_counter_(line), line_position_(line_pos) {}

     };


     /**

      * Opens a file given by file path @p fp. And construct the tokenizer over the

      * input stream for this file.

      * The stream is read from its actual position. Default value of the separator of the tokens is

      * either tabelator '\\t' or space ' ' and can be overwrite.

      *

      */

     Tokenizer(const  FilePath &fp, Separator separator = Separator("\\"," \t","\"") );

     /**

      * Construct the tokenizer over given input stream @p in.

      * The stream is read from its actual position. Default value of the separator of the tokens is

      * either tabelator '\\t' or space ' ' and can be overwrite.

      *

      * Unfortunately, std::istream can not be passed by value nor by const reference. Thus you can not write, e.g.

      *

      * @code Tokenizer( ifstream("my_file") );

      *

      */

     Tokenizer( std::istream &in, Separator separator = Separator("\\"," \t","\"") );


     /**

      * Skip whole line if the beginning of the trimmed line match the given @p pattern string.

      *

      * TODO: Allow end line comments.

      */

     void set_comment_pattern( const std::string &pattern);

     /**

      * Skip forward to the line that match given string.

      * The tokenizer is set to the begining of that line.

      * Returns true if the @p pattern has been found before the end of file.

      * Optionally, if the parameter @p end_search_pattern is provided, the search is

      * stopped after search for @p pattern on the line where @p end_search_pattern was detected.

      * Next line is not read.

      *

      * TODO: similar method that use regular expressions (e.g. from boost)

      * TODO: add option to find the pattern in the whole file (wrap over the end of file)

      */

     bool skip_to(const std::string &pattern, const std::string &end_search_pattern="");


     /**

      * Drops remaining tokens on the current line and reads the new one.

      * A warning is reported in the case of unprocessed tokens.

      * The lines without any tokens are skipped, but counted into

      * number reported by @p line_num. Retuns false if we reach the end of file

      * otherwise returns true.

      *

      * Optional parameter @p assert_for_remaining_tokens can be set false if you

      * want to ignore remaining tokens on current line. Otherwise an warning for the user is

      * produced since possibly there is error in the data format.

      */

     bool next_line(bool assert_for_remaining_tokens=true);

     /**

      * Dereference of the tokenizer iterator. Returns reference to the string

      * that contains current token.

      */

     const std::string & operator *() const;


     /**

      * Moves to the next token on the line.

      */

     inline BT::iterator & operator ++() {

       if (! eol()) {position_.line_position_++; ++tok_;}

       // skip empty tokens (consecutive separators)

       while (! eol() && (*tok_).size()==0 ) {position_.line_position_++; ++tok_;}

       return tok_;

     }


     /**

      * Returns true if the iterator is over the last token on the current line.

      */

     inline bool eol() const

         { return tok_ == line_tokenizer_.end(); }


     /**

      *  Returns true if at the end of the input stream.

      */

     inline bool eof() const

         { return in_->eof(); }


     /**

      * Returns position on line.

      */

     inline unsigned int pos() const

         { return position_.line_position_;}


     /**

      * Returns number of lines read by the tokenizer.

      * After first call of @p next_line this returns '1'.

      */

     inline unsigned int line_num() const

         {return position_.line_counter_;}


     /**

      * Returns file name.

      */

     inline const std::string &f_name() const

         {return f_name_;}


     /**

      * Returns full position description.

      */

     std::string position_msg() const;


     /**

      * Read access to current input line.

      */

     inline const std::string &line() const

         { return line_;}


     /**

      * Returns actual position in file.

      */

     const Tokenizer::Position get_position();


     /**

      * Set new position of tokenizer in file.

      *

      * Warning! Actual file_position_ must correspond with values line_counter_

      * and line_position_. Method can't check if the values are entered correctly.

      */

     void set_position(const Tokenizer::Position pos);


     /**

      * Destructor close the file if it was opened by tokenizer itself.

      */

     ~Tokenizer();


 protected:

     // reset tokenizer for actual line

     void set_tokenizer();


     /// File name (for better error messages)

     std::string f_name_;

     /**

      * Internal stream , if tokenizer is constructed form FilePath object.

      * Automatic destruction.

      */

     std::ifstream *own_stream_;

     /// Input stream.

     std::istream *in_;

     /// Current line

     std::string line_;

     /// Possible comment pattern

     std::string comment_pattern_;


     /// Number of liner read by the tokenizer.

     Position position_;


     /// Line token iterator

     BT::iterator tok_;

     /// Separator function used by the tokenizer

     Separator separator_;

     /// Line tokenizer (container like object).

     BT line_tokenizer_;

 };


 #endif /* TOKENIZER_HH_ */

FilePath
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54

exceptions.hh

DECLARE_EXCEPTION
#define DECLARE_EXCEPTION(ExcName, Format)
Macro for simple definition of exceptions.
Definition: exceptions.hh:158

TYPEDEF_ERR_INFO
#define TYPEDEF_ERR_INFO(EI_Type, Type)
Macro to simplify declaration of error_info types.
Definition: exceptions.hh:194

operator*
UnitSI operator*(const UnitSI &a, const UnitSI &b)
Product of two units.
Definition: unit_si.cc:235