Flow123d  release_2.2.0-34-g18a8075
reader_internal_csv.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file reader_internal_csv.cc
15  * @brief
16  */
17 
18 
20 #include "input/input_type.hh"
21 #include "input/csv_tokenizer.hh"
22 
23 #include "system/asserts.hh" // for Assert, ASSERT
24 #include "system/file_path.hh" // for FilePath, File...
25 #include "system/logger.hh" // for operator<<
26 
27 namespace Input {
28 
29 using namespace std;
30 
31 /*******************************************************************
32  * implementation of ReaderInternalCsvInclude
33  */
34 
36 {}
37 
39 {
40  if ( p.is_record_type() ) { // sub-type must be Record or Abstract type
41  // load path to CSV file
42  std::string included_file;
43  if ( p.down("file") ) {
44  included_file = get_included_file(p);
45  p.up();
46  } else {
47  this->generate_input_error(p, array, "Missing key 'file' defines including input file.", false);
48  }
49 
50  // number of head lines to skip
51  unsigned int n_head_lines = 0;
52  if ( p.down("n_head_lines") ) {
53  try {
54  n_head_lines = p.get_int_value();
55  }
56  catch (ExcInputError & e) {
57  complete_input_error(e, p, ValueTypes::int_type);
58  e << EI_InputType("number of lines to skip");
59  throw;
60  }
61  p.up();
62  }
63 
64  // separator characters
65  stringstream separator;
66  if ( p.down("separator") ) {
67  try {
68  separator << p.get_string_value() << " \t";
69  }
70  catch (ExcInputError & e) {
71  complete_input_error(e, p, ValueTypes::str_type);
72  e << EI_InputType("invalid separator of included CSV file");
73  throw;
74  }
75  p.up();
76  } else {
77  separator << ", \t";
78  }
79 
80  // open CSV file, get number of lines, skip head lines
81  FilePath fp((included_file), FilePath::input_file);
82  CSVTokenizer tok( fp, separator.str() );
83 
84  const Type::Abstract * abstract_type = dynamic_cast<const Type::Abstract *>(&array->get_sub_type());
85  std::string record_name = p.get_record_tag();
86  if ((abstract_type != NULL) && (record_name.size() <= 12)) {
87  // missing record name for Abstract
88  this->generate_input_error(p, array,
89  "Missing record descendant in definition of tag in CSV include of Abstract type. Tag must be in format '!include_csv:Descendant_Name'!",
90  false);
91  }
92  const Type::TypeBase &sub_type = ( abstract_type != NULL ) ? // sub-type of array
93  (abstract_type->get_descendant(record_name.substr(12))) : (array->get_sub_type());
94 
95  StorageBase *item_storage; // storage of sub-type record of included array
96  csv_columns_map_.clear();
97  if ( p.down("format") ) {
98  try {
99  csv_subtree_depth_ = p.path_.size();
100  item_storage = make_storage(p, &sub_type);
101  } catch (ExcMultipleDefinitionCsvColumn &e) {
102  e << EI_File(tok.f_name());
103  throw;
104  }
105  p.up();
106  } else {
107  this->generate_input_error(p, array, "Missing key 'format' defines mapping column of CSV file to input subtree.", false);
108  }
109 
110  // get value of maximal column index in map
112  it = csv_columns_map_.end(); --it;
113  unsigned int max_column_index = it->first;
114 
115  unsigned int n_lines = tok.get_n_lines() - n_head_lines;
116  tok.skip_header(n_head_lines);
117  StorageArray *storage_array = new StorageArray(n_lines);
118  std::set<unsigned int> unused_columns;
119  for( unsigned int arr_item=0; arr_item < n_lines; ++arr_item) {
120  unsigned int i_col;
121  tok.next_line();
122  for (i_col=0; !tok.eol(); ++i_col, ++tok) {
123  it = csv_columns_map_.find(i_col);
124  if (it != csv_columns_map_.end()) {
125  switch (it->second.data_type) {
126  case IncludeDataTypes::type_int: {
127  int val;
128  try {
129  val = tok.get_int_val();
130  } catch (ExcWrongCsvFormat &e) {
131  e << EI_Specification("Wrong integer value");
132  e << EI_ErrorAddress(p.as_string());
133  throw;
134  }
135 
136  const Type::Integer *int_type = static_cast<const Type::Integer *>(it->second.type);
137  if ( !int_type->match(val) ) {
138  THROW( ExcWrongCsvFormat() << EI_Specification("Integer value out of bounds")
139  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
140  }
141  set_storage_from_csv( i_col, item_storage, new StorageInt(val) );
142  break;
143  }
144  case IncludeDataTypes::type_double: {
145  double val;
146  try {
147  val = tok.get_double_val();
148  } catch (ExcWrongCsvFormat &e) {
149  e << EI_ErrorAddress(p.as_string());
150  throw;
151  }
152 
153  const Type::Double *double_type = static_cast<const Type::Double *>(it->second.type);
154  if ( !double_type->match(val) ) {
155  THROW( ExcWrongCsvFormat() << EI_Specification("Double value out of bounds")
156  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
157  }
158  set_storage_from_csv( i_col, item_storage, new StorageDouble(val) );
159  break;
160  }
161  case IncludeDataTypes::type_bool: {
162  int val;
163  try {
164  val = tok.get_int_val();
165  } catch (ExcWrongCsvFormat &e) {
166  e << EI_Specification("Wrong boolean value");
167  e << EI_ErrorAddress(p.as_string());
168  throw;
169  }
170  set_storage_from_csv( i_col, item_storage, new StorageBool(val) );
171  break;
172  }
173  case IncludeDataTypes::type_string: {
174  try {
175  set_storage_from_csv( i_col, item_storage, new StorageString(tok.get_string_val()) );
176  } catch (ExcWrongCsvFormat &e) {
177  e << EI_Specification("Wrong string value");
178  e << EI_ErrorAddress(p.as_string());
179  throw;
180  }
181  break;
182  }
183  case IncludeDataTypes::type_sel: {
184  const Type::Selection *selection = static_cast<const Type::Selection *>(it->second.type);
185  try {
186  std::string item_name = tok.get_string_val();
187  int val = selection->name_to_int( item_name );
188  set_storage_from_csv( i_col, item_storage, new StorageInt(val) );
189  } catch (ExcWrongCsvFormat &e) {
190  e << EI_Specification("Wrong selection value");
191  e << EI_ErrorAddress(p.as_string());
192  throw;
193  } catch (Type::Selection::ExcSelectionKeyNotFound &exc) {
194  THROW( ExcWrongCsvFormat() << EI_Specification("Wrong selection value")
195  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
196  }
197  break;
198  }
199  }
200  } else {
201  // add index of unused column
202  unused_columns.insert(i_col);
203  }
204  }
205  if ( max_column_index > (i_col-1) ) {
206  this->generate_input_error(p, array, "Count of columns in CSV file is less than expected index, defined on input.", false);
207  }
208  storage_array->new_item(arr_item, item_storage->deep_copy() );
209  }
210 
211  if (unused_columns.size()) { // print warning with indexes of unused columns
212  stringstream ss;
213  for (std::set<unsigned int>::iterator it=unused_columns.begin(); it!=unused_columns.end(); ++it)
214  ss << (*it) << " ";
215  WarningOut().fmt("Unused columns: {}\nin imported CSV input file: {}\n", ss.str(), tok.f_name());
216  }
217  return storage_array;
218 
219  } else {
220  this->generate_input_error(p, array, "Invalid definition of CSV include.", false);
221  }
222  return NULL;
223 }
224 
226 {
227  int arr_size;
228  if ( (arr_size = p.get_array_size()) != -1 ) {
229  return this->make_array_storage(p, array, arr_size);
230  } else {
231  this->generate_input_error(p, array, "Invalid type in CSV-included part of IST. Expected is Array!\n", false);
232  }
233 
234  return NULL;
235 }
236 
238 {
239  int pos;
240  if ( check_and_read_position_index(p, pos) ) {
241  IncludeCsvData include_data;
242  include_data.data_type = IncludeDataTypes::type_sel;
243  include_data.storage_indexes = create_indexes_vector(p);
244  include_data.type = selection;
245  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
246  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
247  } else {
248  csv_columns_map_[pos] = include_data;
249  }
250 
251  return new StorageInt( 0 );
252  } else {
253  string item_name = read_string_value(p, selection);
254  try {
255  int value = selection->name_to_int( item_name );
256  return new StorageInt( value );
257  } catch (Type::Selection::ExcSelectionKeyNotFound &exc) {
258  this->generate_input_error(p, selection, "Wrong value '" + item_name + "' of the Selection.", false);
259  }
260  }
261  return NULL;
262 }
263 
265 {
266  int pos;
267  if ( check_and_read_position_index(p, pos) ) {
268  IncludeCsvData include_data;
269  include_data.data_type = IncludeDataTypes::type_bool;
270  include_data.storage_indexes = create_indexes_vector(p);
271  include_data.type = bool_type;
272  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
273  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
274  } else {
275  csv_columns_map_[pos] = include_data;
276  }
277 
278  return new StorageInt( 0 );
279  } else {
280  return new StorageInt( read_bool_value(p, bool_type) );
281  }
282 }
283 
285 {
286  int pos;
287  if ( check_and_read_position_index(p, pos) ) {
288  IncludeCsvData include_data;
289  include_data.data_type = IncludeDataTypes::type_int;
290  include_data.storage_indexes = create_indexes_vector(p);
291  include_data.type = int_type;
292  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
293  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
294  } else {
295  csv_columns_map_[pos] = include_data;
296  }
297 
298  return new StorageInt( 0 );
299  } else {
300  return new StorageInt( read_int_value(p, int_type) );
301  }
302 }
303 
305 {
306  int pos;
307  if ( check_and_read_position_index(p, pos) ) {
308  IncludeCsvData include_data;
309  include_data.data_type = IncludeDataTypes::type_double;
310  include_data.storage_indexes = create_indexes_vector(p);
311  include_data.type = double_type;
312  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
313  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
314  } else {
315  csv_columns_map_[pos] = include_data;
316  }
317 
318  return new StorageDouble( 0.0 );
319  } else {
320  return new StorageDouble( read_double_value(p, double_type) );
321  }
322 }
323 
325 {
326  int pos;
327  if ( check_and_read_position_index(p, pos) ) {
328  IncludeCsvData include_data;
329  include_data.data_type = IncludeDataTypes::type_string;
330  include_data.storage_indexes = create_indexes_vector(p);
331  include_data.type = string_type;
332  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
333  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
334  } else {
335  csv_columns_map_[pos] = include_data;
336  }
337 
338  return new StorageString("");
339  } else {
340  return new StorageString( read_string_value(p, string_type) );
341  }
342 }
343 
345 {
346  vector<unsigned int> csv_storage_indexes( p.path_.size()-csv_subtree_depth_ );
347  for (unsigned int i_source=csv_subtree_depth_, i_target=0; i_source<p.path_.size(); ++i_source, ++i_target ) {
348  ASSERT_GE(p.path_[i_source].first, 0).error();
349  csv_storage_indexes[i_target] = p.path_[i_source].first;
350  }
351  return csv_storage_indexes;
352 }
353 
354 void ReaderInternalCsvInclude::set_storage_from_csv(unsigned int column_index, StorageBase * item_storage, StorageBase * new_storage)
355 {
356  map<unsigned int, IncludeCsvData>::iterator it = csv_columns_map_.find(column_index);
357  ASSERT(it!=csv_columns_map_.end()).error();
358 
359  unsigned int i;
360  StorageBase *loop_storage = item_storage;
361  for (i=0; i<it->second.storage_indexes.size()-1; ++i) loop_storage = loop_storage->get_item( it->second.storage_indexes[i] );
362  loop_storage->set_item( it->second.storage_indexes[i], new_storage );
363 }
364 
366 {
367  string value;
368  try {
369  value = p.get_string_value();
370  } catch (ExcInputError &) {
371  // value is not string, return false
372  return false;
373  }
374 
375  // value must start with '$', follows nonnegative number
376  if ( value.size() && (value.substr(0,1) == "$") ) {
377  try {
378  pos = std::stoi( value.substr(1) );
379  return (pos >= 0);
380  } catch (std::invalid_argument &) {
381  return false;
382  }
383  } else {
384  return false;
385  }
386 }
387 
388 
389 } // namespace Input
Base of classes for declaring structure of the input data.
Definition: type_base.hh:99
Base class for nodes of a data storage tree.
Definition: storage.hh:68
vector< unsigned int > create_indexes_vector(PathBase &p)
Create vector which contains actual indexes of subtree imported in CSV file.
virtual StorageBase * get_item(const unsigned int index) const
Definition: storage.cc:66
virtual bool down(unsigned int index)=0
Dive one level down into path hierarchy.
#define ASSERT_GE(a, b)
Definition of comparative assert macro (Greater or Equal)
Definition: asserts.hh:319
Class for declaration of the input of type Bool.
Definition: type_base.hh:458
Base abstract class used by ReaderToStorage class to iterate over the input tree. ...
Definition: path_base.hh:41
int name_to_int(const string &key) const
Converts given value name key to the value.
std::string as_string() const
Returns string address of current position.
Definition: path_base.cc:48
Definitions of ASSERTS.
Data of one column of including CSV file.
#define ASSERT(expr)
Allow use shorter versions of macro names if these names is not used with external library...
Definition: asserts.hh:346
virtual int get_array_size() const =0
Get size of array (sequence type), if object is not array return -1.
Class for declaration of the integral input data.
Definition: type_base.hh:489
StorageBase * make_sub_storage(PathBase &p, const Type::Array *array) override
Create storage of Type::Array type.
Class for declaration of inputs sequences.
Definition: type_base.hh:345
static constexpr bool value
Definition: json.hpp:87
virtual std::string get_record_tag() const =0
Gets value of the record tag, which determines its type.
Class for declaration of the input data that are floating point numbers.
Definition: type_base.hh:540
bool match(std::int64_t value) const
Check valid value of Integer.
Definition: type_base.cc:449
virtual StorageBase * deep_copy() const =0
std::vector< std::pair< int, std::string > > path_
One level of the path_ is either index (nonnegative int) in array or string key in a json object...
Definition: path_base.hh:166
virtual bool is_record_type() const =0
Check if type of head node is record.
bool match(double value) const
Returns true if the given integer value conforms to the Type::Double bounds.
Definition: type_base.cc:491
StorageBase * read_storage(PathBase &p, const Type::Array *array)
void set_storage_from_csv(unsigned int column_index, StorageBase *item_storage, StorageBase *new_storage)
Set storage of simple input type with value given from CSV file.
Class for declaration of polymorphic Record.
virtual void set_item(unsigned int index, StorageBase *item)
Definition: storage.cc:60
void new_item(unsigned int index, StorageBase *item)
Definition: storage.cc:107
virtual std::string get_string_value() const =0
Get string value of head node or throw exception.
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54
virtual std::int64_t get_int_value() const =0
Get integer value of head node or throw exception.
const TypeBase & get_sub_type() const
Getter for the type of array items.
Definition: type_base.hh:403
#define WarningOut()
Macro defining &#39;warning&#39; record of log.
Definition: logger.hh:236
bool check_and_read_position_index(PathBase &p, int &pos)
Checks if value on head represents column position in CSV (starts with &#39;#&#39;). If yes, stores position into pos.
virtual void up()=0
Return one level up in the hierarchy.
Class for declaration of the input data that are in string format.
Definition: type_base.hh:588
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
Definition: exceptions.hh:53
Template for classes storing finite set of named values.