Flow123d  jenkins-Flow123d-windows-release-multijob-285
tokenizer.cc
Go to the documentation of this file.
1 /*
2  * tokenizer.cc
3  *
4  * Created on: Nov 9, 2012
5  * Author: jb
6  */
7 
8 #include <string>
9 #include <fstream>
10 #include <boost/algorithm/string/predicate.hpp>
11 #include <boost/algorithm/string/trim.hpp>
12 
13 #include "system/global_defs.h"
14 #include "system/system.hh"
15 #include "system/tokenizer.hh"
16 #include "system/file_path.hh"
17 
18 using namespace std;
19 
20 Tokenizer::Tokenizer(const FilePath &fp)
21 : f_name_(fp),
22  own_stream_(NULL),
23  comment_pattern_(""),
24  position_(0, 0, 0),
25  separator_("\\"," \t","\""),
26  line_tokenizer_(line_, separator_)
27 {
28  in_ = own_stream_ = new ifstream;
29  own_stream_->open( string(fp).c_str() );
30  // check correct openning
31  INPUT_CHECK(! own_stream_->fail(), "Can not open input file '%s'.\n", f_name_.c_str() );
32 
33 }
34 
35 
36 
37 Tokenizer::Tokenizer( std::istream &in)
38 : f_name_("__anonymous_stream__"),
39  own_stream_(NULL),
40  in_( &in ),
41  comment_pattern_(""),
42  position_(0, 0, 0),
43  separator_("\\"," \t","\""),
44  line_tokenizer_(line_, separator_)
45 {}
46 
47 
48 void Tokenizer::set_comment_pattern( const std::string &pattern) {
49  comment_pattern_=pattern;
50 }
51 
52 
53 bool Tokenizer::skip_to(const std::string& pattern, const std::string &end_search_pattern)
54 {
55  ASSERT( in_->good(), "Tokenizer stream (for file: %s) is not ready for i/o operations. Perhaps missing check about correct open.\n", f_name_.c_str());
56  bool end_search= (end_search_pattern.size() > 0);
57 
58  while (! eof()) {
59  if (line_.find(pattern)!=string::npos ) {
60  set_tokenizer();
61  return true;
62  }
63  if ( end_search && line_.find(end_search_pattern)!=string::npos ) return false;
64  next_line(false);
65  }
66  return false;
67 }
68 
69 
70 
71 bool Tokenizer::next_line(bool assert_for_remaining_tokens) {
72  // input assert about remaining tokens
73  if (assert_for_remaining_tokens && (! eol() )) {
74  xprintf(Warn, "Remaining tokens, file '%s', line '%d', after token #%d.\n", f_name_.c_str(), line_num(), position_.line_position_);
75  }
76 
77  if (eof()) return false; // we are sure that at least one getline will occur
78 
79  line_="";
80  // skip empty lines
81  while ( ! eof() && line_ == "") {
82  std::getline( *in_, line_);
83  position_.line_counter_++;
84  // check failure bits
85  if (in_->bad()) xprintf(Err, "Can not read from stream, file: '%s', line: '%d'\n", f_name_.c_str(), line_num());
86  boost::trim( line_ );
87  // if pattern is set and beginning of line match it
88  if (comment_pattern_.size() && 0==line_.compare(0, comment_pattern_.size(), comment_pattern_) ) line_="";
89  }
90  if (! in_->fail() ) { // allow only eof state after any getline
91  set_tokenizer();
92  return true;
93  } else {
94  DBGMSG("Line: '%s'\n", line_.c_str());
95  }
96 
97  return false;
98 }
99 
100 
101 
102 const std::string & Tokenizer::operator *() const
103 {
104  if ( eol() ) xprintf(UsrErr, "Missing token, file: '%s', line: '%d', position: '%d'.\n", f_name_.c_str(), line_num(), position_.line_position_);
105  return *tok_;
106 }
107 
108 
109 
110 void Tokenizer::set_tokenizer()
111 {
112  line_tokenizer_.assign(line_);
113  tok_ = line_tokenizer_.begin();
114  position_.line_position_ = 0;
115  // skip leading separators
116  while (! eol() && (*tok_).size()==0 ) {position_.line_position_++; ++tok_;}
117 
118 }
119 
120 
121 
122 string Tokenizer::position_msg() const {
123  stringstream ss;
124  ss << "token: " << pos() << ", line: " << line_num() << ", in file '" << f_name() << "'";
125  return ss.str();
126 }
127 
128 
129 const Tokenizer::Position Tokenizer::get_position()
130 {
131  position_.file_position_ = in_->tellg();
132  return position_;
133 }
134 
135 
136 void Tokenizer::set_position(const Tokenizer::Position pos)
137 {
138  in_->clear();
139  in_->seekg(pos.file_position_);
140  position_ = pos;
141 }
142 
143 
144 Tokenizer::~Tokenizer() {
145  if (own_stream_ != NULL) delete own_stream_; // this also close the input file
146 }
147 
#define DBGMSG(...)
Definition: global_defs.h:196
UnitSI operator*(const UnitSI &a, const UnitSI &b)
Product of two units.
Definition: unit_si.cc:172
Global macros to enhance readability and debugging, general constants.
#define ASSERT(...)
Definition: global_defs.h:121
Definition: system.hh:72
#define xprintf(...)
Definition: system.hh:100
#define INPUT_CHECK(i,...)
Debugging macros.
Definition: global_defs.h:61
Dedicated class for storing path to input and output files.
Definition: file_path.hh:32
Definition: system.hh:72
Definition: system.hh:72