Flow123d  JS_before_hm-2198-g122e1f2e2
tokenizer.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file tokenizer.cc
15  * @brief
16  */
17 
18 #include <string>
19 #include <fstream>
20 #include <boost/algorithm/string/predicate.hpp>
21 #include <boost/algorithm/string/trim.hpp>
22 
23 #include "system/global_defs.h"
24 #include "system/system.hh"
25 #include "system/tokenizer.hh"
26 #include "system/file_path.hh"
27 
28 using namespace std;
29 
30 Tokenizer::Tokenizer(const FilePath &fp, Separator separator)
31 : f_name_(fp),
32  own_stream_(nullptr),
33  in_(nullptr),
34  comment_pattern_(""),
35  position_(0, 0, 0),
36  separator_(separator),
37  line_tokenizer_(line_, separator_)
38 {
39  own_stream_ = new ifstream;
40  fp.open_stream(*own_stream_);
41  in_ = own_stream_;
42 }
43 
44 
45 
46 Tokenizer::Tokenizer( std::istream &in, Separator separator)
47 : f_name_("__anonymous_stream__"),
48  own_stream_(nullptr),
49  in_( &in ),
50  comment_pattern_(""),
51  position_(0, 0, 0),
52  separator_(separator),
53  line_tokenizer_(line_, separator_)
54 {}
55 
56 
57 void Tokenizer::set_comment_pattern( const std::string &pattern) {
58  comment_pattern_=pattern;
59 }
60 
61 
62 bool Tokenizer::skip_to(const std::string& pattern, const std::string &end_search_pattern)
63 {
64  //OLD_ASSERT( in_->good(), "Tokenizer stream (for file: %s) is not ready for i/o operations. Perhaps missing check about correct open.\n", f_name_.c_str());
65  bool end_search= (end_search_pattern.size() > 0);
66 
67  while (! eof()) {
68  if (line_.find(pattern)!=string::npos ) {
69  set_tokenizer();
70  return true;
71  }
72  if ( end_search && line_.find(end_search_pattern)!=string::npos ) return false;
73  next_line(false);
74  }
75  return false;
76 }
77 
78 
79 
80 bool Tokenizer::next_line(bool assert_for_remaining_tokens) {
81  // input assert about remaining tokens
82  if (assert_for_remaining_tokens && (! eol() )) {
83  WarningOut().fmt( "Remaining token '{}', file '{}', line {} after token #{}\n",
84  *tok_, f_name_, line_num(), position_.line_position_);
85  }
86 
87  if (eof()) return false; // we are sure that at least one getline will occur
88 
89  line_="";
90  // skip empty lines
91  while ( ! eof() && line_ == "") {
92  std::getline( *in_, line_);
93  position_.line_counter_++;
94  // check failure bits
95  if (in_->bad()) THROW( ExcCannotRead() << EI_File(f_name_) << EI_Line(line_num()) );
96  boost::trim( line_ );
97  // if pattern is set and beginning of line match it
98  if (comment_pattern_.size() && 0==line_.compare(0, comment_pattern_.size(), comment_pattern_) ) line_="";
99  }
100  if (! in_->fail() ) { // allow only eof state after any getline
101  set_tokenizer();
102  return true;
103  }
104 
105  return false;
106 }
107 
108 
109 
110 const std::string & Tokenizer::operator *() const
111 {
112  if ( eol() ) THROW( ExcMissingToken() << EI_File(f_name_) << EI_Line(line_num()) << EI_Pos(position_.line_position_) );
113  return *tok_;
114 }
115 
116 
117 
118 void Tokenizer::set_tokenizer()
119 {
120  line_tokenizer_.assign(line_);
121  tok_ = line_tokenizer_.begin();
122  position_.line_position_ = 0;
123  // skip leading separators
124  while (! eol() && (*tok_).size()==0 ) {position_.line_position_++; ++tok_;}
125 
126 }
127 
128 
129 
130 string Tokenizer::position_msg() const {
131  stringstream ss;
132  ss << "token: " << pos() << ", line: " << line_num() << ", in file '" << f_name() << "'";
133  return ss.str();
134 }
135 
136 
137 const Tokenizer::Position Tokenizer::get_position()
138 {
139  position_.file_position_ = in_->tellg();
140  return position_;
141 }
142 
143 
144 void Tokenizer::set_position(const Tokenizer::Position pos)
145 {
146  in_->clear();
147  in_->seekg(pos.file_position_);
148  position_ = pos;
149  line_="";
150  set_tokenizer();
151 }
152 
153 
154 Tokenizer::~Tokenizer() {
155  if (own_stream_ != NULL) delete own_stream_; // this also close the input file
156 }
157 
file_path.hh
FilePath
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54
THROW
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
Definition: exceptions.hh:53
operator*
UnitSI operator*(const UnitSI &a, const UnitSI &b)
Product of two units.
Definition: unit_si.cc:235
system.hh
FilePath::open_stream
void open_stream(Stream &stream) const
Definition: file_path.cc:211
global_defs.h
Global macros to enhance readability and debugging, general constants.
WarningOut
#define WarningOut()
Macro defining 'warning' record of log.
Definition: logger.hh:278
std
Definition: doxy_dummy_defs.hh:5
tokenizer.hh