Flow123d  master-f44eb46
reader_internal_csv.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file reader_internal_csv.cc
15  * @brief
16  */
17 
18 
20 #include "input/input_type.hh"
21 #include "input/csv_tokenizer.hh"
22 
23 #include "system/asserts.hh" // for Assert, ASSERT
24 #include "system/file_path.hh" // for FilePath, File...
25 #include "system/logger.hh" // for operator<<
26 
27 namespace Input {
28 
29 using namespace std;
30 
31 /*******************************************************************
32  * implementation of ReaderInternalCsvInclude
33  */
34 
36 {}
37 
39 {
40  if ( p.is_record_type() ) { // sub-type must be Record or Abstract type
41  // load path to CSV file
42  std::string included_file;
43  if ( p.down("file") ) {
44  included_file = get_included_file(p);
45  p.up();
46  } else {
47  this->generate_input_error(p, array, "Missing key 'file' defines including input file.", false);
48  }
49 
50  // number of head lines to skip
51  unsigned int n_head_lines = 0;
52  if ( p.down("n_head_lines") ) {
53  try {
54  n_head_lines = p.get_int_value();
55  }
56  catch (ExcInputError & e) {
57  complete_input_error(e, p, ValueTypes::int_type);
58  e << EI_InputType("number of lines to skip");
59  throw;
60  }
61  p.up();
62  }
63 
64  // separator characters
65  stringstream separator;
66  if ( p.down("separator") ) {
67  try {
68  separator << p.get_string_value() << " \t";
69  }
70  catch (ExcInputError & e) {
71  complete_input_error(e, p, ValueTypes::str_type);
72  e << EI_InputType("invalid separator of included CSV file");
73  throw;
74  }
75  p.up();
76  } else {
77  separator << ", \t";
78  }
79 
80  // open CSV file, get number of lines, skip head lines
81  FilePath fp((included_file), FilePath::input_file);
82  CSVTokenizer tok( fp, separator.str() );
83 
84  const Type::Abstract * abstract_type = dynamic_cast<const Type::Abstract *>(&array->get_sub_type());
85  std::string record_name = p.get_record_tag();
86  if ((abstract_type != NULL) && (record_name.size() <= 12)) {
87  // missing record name for Abstract
88  this->generate_input_error(p, array,
89  "Missing record descendant in definition of tag in CSV include of Abstract type. Tag must be in format '!include_csv:Descendant_Name'!",
90  false);
91  }
92  const Type::TypeBase &sub_type = ( abstract_type != NULL ) ? // sub-type of array
93  (abstract_type->get_descendant(record_name.substr(12))) : (array->get_sub_type());
94 
95  StorageBase *item_storage = nullptr; // storage of sub-type record of included array
96  csv_columns_map_.clear();
97  if ( p.down("format") ) {
98  try {
99  csv_subtree_depth_ = p.path_.size();
100  item_storage = make_storage(p, &sub_type);
101  } catch (ExcMultipleDefinitionCsvColumn &e) {
102  e << EI_File(tok.f_name());
103  throw;
104  }
105  p.up();
106  } else {
107  this->generate_input_error(p, array, "Missing key 'format' defines mapping column of CSV file to input subtree.", false);
108  }
109 
110  // get value of maximal column index in map
112  it = csv_columns_map_.end(); --it;
113  unsigned int max_column_index = it->first;
114 
115  unsigned int n_lines = tok.get_n_lines() - n_head_lines;
116  tok.skip_header(n_head_lines);
117  StorageArray *storage_array = new StorageArray(n_lines);
118  std::set<unsigned int> unused_columns;
119  for( unsigned int arr_item=0; arr_item < n_lines; ++arr_item) {
120  unsigned int i_col;
121  tok.next_line();
122  for (i_col=0; !tok.eol(); ++i_col, ++tok) {
123  it = csv_columns_map_.find(i_col);
124  if (it != csv_columns_map_.end()) {
125  switch (it->second.data_type) {
126  case IncludeDataTypes::type_int: {
127  int val;
128  try {
129  val = tok.get_int_val();
130  } catch (ExcWrongCsvFormat &e) {
131  e << EI_Specification("Wrong integer value");
132  e << EI_ErrorAddress(p.as_string());
133  throw;
134  }
135 
136  const Type::Integer *int_type = static_cast<const Type::Integer *>(it->second.type);
137  if ( !int_type->match(val) ) {
138  THROW( ExcWrongCsvFormat() << EI_Specification("Integer value out of bounds")
139  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
140  }
141  set_storage_from_csv( i_col, item_storage, new StorageInt(val) );
142  break;
143  }
144  case IncludeDataTypes::type_double: {
145  double val;
146  try {
147  val = tok.get_double_val();
148  } catch (ExcWrongCsvFormat &e) {
149  e << EI_ErrorAddress(p.as_string());
150  throw;
151  }
152 
153  const Type::Double *double_type = static_cast<const Type::Double *>(it->second.type);
154  if ( !double_type->match(val) ) {
155  THROW( ExcWrongCsvFormat() << EI_Specification("Double value out of bounds")
156  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
157  }
158  set_storage_from_csv( i_col, item_storage, new StorageDouble(val) );
159  break;
160  }
161  case IncludeDataTypes::type_bool: {
162  int val;
163  try {
164  val = tok.get_int_val();
165  } catch (ExcWrongCsvFormat &e) {
166  e << EI_Specification("Wrong boolean value");
167  e << EI_ErrorAddress(p.as_string());
168  throw;
169  }
170  set_storage_from_csv( i_col, item_storage, new StorageBool(val) );
171  break;
172  }
173  case IncludeDataTypes::type_string: {
174  try {
175  set_storage_from_csv( i_col, item_storage, new StorageString(tok.get_string_val()) );
176  } catch (ExcWrongCsvFormat &e) {
177  e << EI_Specification("Wrong string value");
178  e << EI_ErrorAddress(p.as_string());
179  throw;
180  }
181  break;
182  }
183  case IncludeDataTypes::type_sel: {
184  const Type::Selection *selection = static_cast<const Type::Selection *>(it->second.type);
185  try {
186  std::string item_name = tok.get_string_val();
187  int val = selection->name_to_int( item_name );
188  set_storage_from_csv( i_col, item_storage, new StorageInt(val) );
189  } catch (ExcWrongCsvFormat &e) {
190  e << EI_Specification("Wrong selection value");
191  e << EI_ErrorAddress(p.as_string());
192  throw;
193  } catch (Type::Selection::ExcSelectionKeyNotFound &exc) {
194  THROW( ExcWrongCsvFormat() << EI_Specification("Wrong selection value")
195  << EI_TokenizerMsg(tok.position_msg()) << EI_ErrorAddress(p.as_string()) );
196  }
197  break;
198  }
199  }
200  } else {
201  // add index of unused column
202  unused_columns.insert(i_col);
203  }
204  }
205  if ( max_column_index > (i_col-1) ) {
206  this->generate_input_error(p, array, "Count of columns in CSV file is less than expected index, defined on input.", false);
207  }
208  ASSERT_PTR(item_storage);
209  storage_array->new_item(arr_item, item_storage->deep_copy() );
210  }
211 
212  if (unused_columns.size()) { // print warning with indexes of unused columns
213  stringstream ss;
214  for (std::set<unsigned int>::iterator it=unused_columns.begin(); it!=unused_columns.end(); ++it)
215  ss << (*it) << " ";
216  WarningOut().fmt("Unused columns: {}\nin imported CSV input file: {}\n", ss.str(), tok.f_name());
217  }
218  return storage_array;
219 
220  } else {
221  this->generate_input_error(p, array, "Invalid definition of CSV include.", false);
222  }
223  return NULL;
224 }
225 
227 {
228  int arr_size;
229  if ( (arr_size = p.get_array_size()) != -1 ) {
230  return this->make_array_storage(p, array, arr_size);
231  } else {
232  this->generate_input_error(p, array, "Invalid type in CSV-included part of IST. Expected is Array!\n", false);
233  }
234 
235  return NULL;
236 }
237 
239 {
240  int pos;
241  if ( check_and_read_position_index(p, pos) ) {
242  IncludeCsvData include_data;
243  include_data.data_type = IncludeDataTypes::type_sel;
244  include_data.storage_indexes = create_indexes_vector(p);
245  include_data.type = selection;
246  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
247  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
248  } else {
249  csv_columns_map_[pos] = include_data;
250  }
251 
252  return new StorageInt( 0 );
253  } else {
254  string item_name = read_string_value(p, selection);
255  try {
256  int value = selection->name_to_int( item_name );
257  return new StorageInt( value );
258  } catch (Type::Selection::ExcSelectionKeyNotFound &exc) {
259  this->generate_input_error(p, selection, "Wrong value '" + item_name + "' of the Selection.", false);
260  }
261  }
262  return NULL;
263 }
264 
266 {
267  int pos;
268  if ( check_and_read_position_index(p, pos) ) {
269  IncludeCsvData include_data;
270  include_data.data_type = IncludeDataTypes::type_bool;
271  include_data.storage_indexes = create_indexes_vector(p);
272  include_data.type = bool_type;
273  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
274  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
275  } else {
276  csv_columns_map_[pos] = include_data;
277  }
278 
279  return new StorageInt( 0 );
280  } else {
281  return new StorageInt( read_bool_value(p, bool_type) );
282  }
283 }
284 
286 {
287  int pos;
288  if ( check_and_read_position_index(p, pos) ) {
289  IncludeCsvData include_data;
290  include_data.data_type = IncludeDataTypes::type_int;
291  include_data.storage_indexes = create_indexes_vector(p);
292  include_data.type = int_type;
293  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
294  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
295  } else {
296  csv_columns_map_[pos] = include_data;
297  }
298 
299  return new StorageInt( 0 );
300  } else {
301  return new StorageInt( read_int_value(p, int_type) );
302  }
303 }
304 
306 {
307  int pos;
308  if ( check_and_read_position_index(p, pos) ) {
309  IncludeCsvData include_data;
310  include_data.data_type = IncludeDataTypes::type_double;
311  include_data.storage_indexes = create_indexes_vector(p);
312  include_data.type = double_type;
313  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
314  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
315  } else {
316  csv_columns_map_[pos] = include_data;
317  }
318 
319  return new StorageDouble( 0.0 );
320  } else {
321  return new StorageDouble( read_double_value(p, double_type) );
322  }
323 }
324 
326 {
327  int pos;
328  if ( check_and_read_position_index(p, pos) ) {
329  IncludeCsvData include_data;
330  include_data.data_type = IncludeDataTypes::type_string;
331  include_data.storage_indexes = create_indexes_vector(p);
332  include_data.type = string_type;
333  if (csv_columns_map_.find(pos)!=csv_columns_map_.end()) {
334  THROW( ExcMultipleDefinitionCsvColumn() << EI_ColumnIndex(pos) << EI_ErrorAddress(p.as_string()) );
335  } else {
336  csv_columns_map_[pos] = include_data;
337  }
338 
339  return new StorageString("");
340  } else {
341  return new StorageString( read_string_value(p, string_type) );
342  }
343 }
344 
346 {
347  vector<unsigned int> csv_storage_indexes( p.path_.size()-csv_subtree_depth_ );
348  for (unsigned int i_source=csv_subtree_depth_, i_target=0; i_source<p.path_.size(); ++i_source, ++i_target ) {
349  ASSERT_GE(p.path_[i_source].first, 0).error();
350  csv_storage_indexes[i_target] = p.path_[i_source].first;
351  }
352  return csv_storage_indexes;
353 }
354 
355 void ReaderInternalCsvInclude::set_storage_from_csv(unsigned int column_index, StorageBase * item_storage, StorageBase * new_storage)
356 {
357  map<unsigned int, IncludeCsvData>::iterator it = csv_columns_map_.find(column_index);
358  ASSERT(it!=csv_columns_map_.end()).error();
359 
360  unsigned int i;
361  StorageBase *loop_storage = item_storage;
362  for (i=0; i<it->second.storage_indexes.size()-1; ++i) loop_storage = loop_storage->get_item( it->second.storage_indexes[i] );
363  loop_storage->set_item( it->second.storage_indexes[i], new_storage );
364 }
365 
367 {
368  string value;
369  try {
370  value = p.get_string_value();
371  } catch (ExcInputError &) {
372  // value is not string, return false
373  return false;
374  }
375 
376  // value must start with '$', follows nonnegative number
377  if ( value.size() && (value.substr(0,1) == "$") ) {
378  try {
379  pos = std::stoi( value.substr(1) );
380  return (pos >= 0);
381  } catch (std::invalid_argument &) {
382  return false;
383  }
384  } else {
385  return false;
386  }
387 }
388 
389 
390 } // namespace Input
Definitions of ASSERTS.
#define ASSERT(expr)
Definition: asserts.hh:351
#define ASSERT_GE(a, b)
Definition of comparative assert macro (Greater or Equal) only for debug mode.
Definition: asserts.hh:325
#define ASSERT_PTR(ptr)
Definition of assert macro checking non-null pointer (PTR) only for debug mode.
Definition: asserts.hh:341
Dedicated class for storing path to input and output files.
Definition: file_path.hh:54
@ input_file
Definition: file_path.hh:68
Base abstract class used by ReaderToStorage class to iterate over the input tree.
Definition: path_base.hh:41
std::vector< std::pair< int, std::string > > path_
One level of the path_ is either index (nonnegative int) in array or string key in a json object.
Definition: path_base.hh:166
virtual bool is_record_type() const =0
Check if type of head node is record.
virtual std::string get_record_tag() const =0
Gets value of the record tag, which determines its type.
virtual std::string get_string_value() const =0
Get string value of head node or throw exception.
virtual std::int64_t get_int_value() const =0
Get integer value of head node or throw exception.
virtual void up()=0
Return one level up in the hierarchy.
virtual int get_array_size() const =0
Get size of array (sequence type), if object is not array return -1.
virtual bool down(unsigned int index)=0
Dive one level down into path hierarchy.
std::string as_string() const
Returns string address of current position.
Definition: path_base.cc:48
void set_storage_from_csv(unsigned int column_index, StorageBase *item_storage, StorageBase *new_storage)
Set storage of simple input type with value given from CSV file.
StorageBase * read_storage(PathBase &p, const Type::Array *array)
vector< unsigned int > create_indexes_vector(PathBase &p)
Create vector which contains actual indexes of subtree imported in CSV file.
StorageBase * make_sub_storage(PathBase &p, const Type::Array *array) override
Create storage of Type::Array type.
bool check_and_read_position_index(PathBase &p, int &pos)
Checks if value on head represents column position in CSV (starts with '#'). If yes,...
void new_item(unsigned int index, StorageBase *item)
Definition: storage.cc:107
Base class for nodes of a data storage tree.
Definition: storage.hh:68
virtual StorageBase * get_item(const unsigned int index) const
Definition: storage.cc:66
virtual void set_item(unsigned int index, StorageBase *item)
Definition: storage.cc:60
virtual StorageBase * deep_copy() const =0
Class for declaration of polymorphic Record.
const Record & get_descendant(const string &name) const
Returns reference to the inherited Record with given name.
Class for declaration of inputs sequences.
Definition: type_base.hh:339
Class for declaration of the input of type Bool.
Definition: type_base.hh:452
Class for declaration of the input data that are floating point numbers.
Definition: type_base.hh:534
bool match(double value) const
Returns true if the given integer value conforms to the Type::Double bounds.
Definition: type_base.cc:489
Class for declaration of the integral input data.
Definition: type_base.hh:483
Template for classes storing finite set of named values.
int name_to_int(const string &key) const
Converts given value name key to the value.
Class for declaration of the input data that are in string format.
Definition: type_base.hh:582
Base of classes for declaring structure of the input data.
Definition: type_base.hh:92
#define THROW(whole_exception_expr)
Wrapper for throw. Saves the throwing point.
Definition: exceptions.hh:53
static constexpr bool value
Definition: json.hpp:87
#define WarningOut()
Macro defining 'warning' record of log.
Definition: logger.hh:278
Array< double > array
Definition: armor.hh:890
Abstract linear system class.
Definition: balance.hh:40
Data of one column of including CSV file.