Flow123d
tokenizer.cc
Go to the documentation of this file.
1 /*
2  * tokenizer.cc
3  *
4  * Created on: Nov 9, 2012
5  * Author: jb
6  */
7 
8 #include <string>
9 #include <fstream>
10 #include <boost/algorithm/string/predicate.hpp>
11 #include <boost/algorithm/string/trim.hpp>
12 
13 #include "system/global_defs.h"
14 #include "system/system.hh"
15 #include "system/tokenizer.hh"
16 #include "system/file_path.hh"
17 
18 using namespace std;
19 
20 Tokenizer::Tokenizer(const FilePath &fp)
21 : f_name_(fp),
22  own_stream_(NULL),
23  comment_pattern_(""),
24  line_counter_(0), position_(0),
25  separator_("\\"," \t","\""),
26  line_tokenizer_(line_, separator_)
27 {
28  in_ = own_stream_ = new ifstream;
29  own_stream_->open( string(fp).c_str() );
30  // check correct openning
31  INPUT_CHECK(! own_stream_->fail(), "Can not open input file '%s'.\n", f_name_.c_str() );
32  //own_stream_->exceptions ( ifstream::failbit | ifstream::badbit );
33 
34 }
35 
36 
37 
38 Tokenizer::Tokenizer( std::istream &in)
39 : f_name_("__anonymous_stream__"),
40  own_stream_(NULL),
41  in_( &in ),
42  comment_pattern_(""),
43  line_counter_(0), position_(0),
44  separator_("\\"," \t","\""),
45  line_tokenizer_(line_, separator_)
46 {}
47 
48 
49 void Tokenizer::set_comment_pattern( const std::string &pattern) {
50  comment_pattern_=pattern;
51 }
52 
53 
54 bool Tokenizer::skip_to(const std::string& pattern, const std::string &end_search_pattern)
55 {
56  ASSERT( in_->good(), "Tokenizer stream (for file: %s) is not ready for i/o operations. Perhaps missing check about correct open.\n", f_name_.c_str());
57  bool end_search= (end_search_pattern.size() > 0);
58 
59  while (! eof()) {
60  if (line_.find(pattern)!=string::npos ) {
61  set_tokenizer();
62  return true;
63  }
64  if ( end_search && line_.find(end_search_pattern)!=string::npos ) return false;
65  next_line(false);
66  }
67  return false;
68 }
69 
70 
71 
72 bool Tokenizer::next_line(bool assert_for_remaining_tokens) {
73  // input assert about remaining tokens
74  if (assert_for_remaining_tokens && (! eol() )) {
75  //DBGMSG("Line: '%s'\n", line_.c_str());
76  xprintf(Warn, "Remaining tokens, file '%s', line '%d', after token #%d.\n", f_name_.c_str(), line_num(), position_);
77  }
78 
79  if (eof()) return false; // we are sure that at least one getline will occur
80 
81  line_="";
82  // skip empty lines
83  while ( ! eof() && line_ == "") {
84  std::getline( *in_, line_);
85  line_counter_++;
86  // check failure bits
87  if (in_->bad()) xprintf(Err, "Can not read from stream, file: '%s', line: '%d'\n", f_name_.c_str(), line_num());
88  boost::trim( line_ );
89  // if pattern is set and beginning of line match it
90  if (comment_pattern_.size() && 0==line_.compare(0, comment_pattern_.size(), comment_pattern_) ) line_="";
91  }
92  if (! in_->fail() ) { // allow only eof state after any getline
93  set_tokenizer();
94  return true;
95  } else {
96  DBGMSG("Line: '%s'\n", line_.c_str());
97  }
98 
99  return false;
100 }
101 
102 
103 
104 const std::string & Tokenizer::operator *() const
105 {
106  if ( eol() ) xprintf(UsrErr, "Missing token, file: '%s', line: '%d', position: '%d'.\n", f_name_.c_str(), line_num(), position_);
107  return *tok_;
108 }
109 
110 
111 
112 void Tokenizer::set_tokenizer()
113 {
114  line_tokenizer_.assign(line_);
115  tok_ = line_tokenizer_.begin();
116  position_ = 0;
117  // skip leading separators
118  while (! eol() && (*tok_).size()==0 ) {position_++; ++tok_;}
119 
120 }
121 
122 
123 
124 string Tokenizer::position_msg() const {
125  stringstream ss;
126  ss << "token: " << pos() << ", line: " << line_num() << ", in file '" << f_name() << "'";
127  return ss.str();
128 }
129 
130 
131 Tokenizer::~Tokenizer() {
132  if (own_stream_ != NULL) delete own_stream_; // this also close the input file
133 }
134