Flow123d  DF_asm_optimization-d5b8fcb
schur.cc
Go to the documentation of this file.
1 /*!
2  *
3  * Copyright (C) 2015 Technical University of Liberec. All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU General Public License version 3 as published by the
7  * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html)
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12  *
13  *
14  * @file schur.cc
15  * @ingroup la
16  * @brief Assembly explicit Schur complement for the given linear system.
17  * Provides method for resolution of the full original vector of unknowns.
18  *
19  * Aim: Explicit schur should be faster then implicit, i.e.
20  *
21  * @todo
22  * - vyresit navaznost na lin sys - solve a export seq vektoru, redukce ... ?
23  * - inv_a - predava se pri konstrukci, ale neumoznuje jeji reuse - aktualizaci assemblace
24  * resp. nutno si na ni drzet ukazatel venku ... reseni ?
25  * - ? remove old_4_new - just for LSView
26  * - automatic preallocation
27  * - eliminated block given by IS
28  * - in place Schur
29  * - ? nemodifikovat puvodni system, leda skrze jeho metody
30  */
31 
32 #include <petscvec.h>
33 #include <algorithm>
34 #include <limits>
35 #include <petscmat.h>
36 #include <armadillo>
37 #include <petscis.h>
38 
39 #include "system/sys_profiler.hh"
40 #include "la/distribution.hh"
42 #include "system/system.hh"
43 #include "la/linsys.hh"
44 #include "la/schur.hh"
45 
46 /**
47  * Create Schur complement system.
48  * @param[in] orig : original system
49  * @param[in] inv_a : inversion of the A block
50  * @param[in] ia : index set of the A block,
51  * default continuous given by inv_a:
52  * proc 1 2 3
53  *
54  * Orig: ****** ****** ****
55  * IA : *** ** ***
56  *
57  *
58  */
59 
61 : LinSys_PETSC(ds), IsA(ia), IsB(ib), state(created)
62 {
63  // check index set
64  ASSERT_PTR(IsA).error("Index set IsA is not defined.\n");
65 
66  // initialize variables
67  Compl = NULL;
68  A = NULL;
69  IA = NULL;
70  B = NULL;
71  Bt = NULL;
72  C = NULL;
73  xA = NULL;
74  IAB = NULL;
75  IsB = NULL;
76  RHS1 = NULL;
77  RHS2 = NULL;
78  Sol1 = NULL;
79  Sol2 = NULL;
80  rhs1sc = NULL;
81  rhs2sc = NULL;
82  sol1sc = NULL;
83  sol2sc = NULL;
84  ds_ = NULL;
85 
86  // create A block index set
87  ISGetLocalSize(IsA, &loc_size_A);
88 
89  // create B block index set
90  if ( IsB == nullptr ) {
91 
92  // compute dual indexset
93 
94  vector<bool> a_used(ds->lsize(), false);
95 
96  const PetscInt *loc_indices;
97  ISGetIndices(IsA, &loc_indices);
98  for(int i=0; i < loc_size_A; i++)
99  a_used[ loc_indices[i] - ds->begin()] = true;
100  ISRestoreIndices(IsA, &loc_indices);
101 
102  loc_size_B = ds->lsize() - loc_size_A;
103  PetscInt *b_indices;
104  PetscMalloc(sizeof(PetscInt)*loc_size_B, &b_indices);
105  for(uint i=0, j=0; i < ds->lsize(); i++)
106  if (! a_used[i]) b_indices[j++] = i + ds->begin();
107  ISCreateGeneral(PETSC_COMM_WORLD, loc_size_B, b_indices, PETSC_COPY_VALUES, &IsB);
108  PetscFree(b_indices);
109  } else {
110  ISGetLocalSize(IsB, &loc_size_B);
111  }
112 }
113 
114 
116 : LinSys_PETSC(other),
117  loc_size_A(other.loc_size_A), loc_size_B(other.loc_size_B), state(other.state),
118  Compl(other.Compl), ds_(other.ds_)
119 {
120  MatCopy(other.A, A, DIFFERENT_NONZERO_PATTERN);
121  MatCopy(other.IA, IA, DIFFERENT_NONZERO_PATTERN);
122  MatCopy(other.IAB, IAB, DIFFERENT_NONZERO_PATTERN);
123  ISCopy(other.IsA, IsA);
124  ISCopy(other.IsB, IsB);
125  VecCopy(other.RHS1, RHS1);
126  VecCopy(other.RHS2, RHS2);
127  VecCopy(other.Sol1, Sol1);
128  VecCopy(other.Sol2, Sol2);
129 
130  B = NULL;
131  Bt = NULL;
132  xA = NULL;
133  C = NULL;
134 }
135 
136 
137 
139 {
141 
142  ASSERT_PTR(Compl).error();
143  Compl->set_from_input( in_rec );
144 }
145 
146 /**
147  * COMPUTE A SCHUR COMPLEMENT OF A PETSC MATRIX
148  *
149  * given symmetric original matrix Orig has form
150  * A B x_1 RHS_1
151  * B' C * x_2 = RHS_2
152  * where the first block is given by index set IsA, and the second block by IsB
153  * user has to provide inverse IA of the A-block
154  * we suppose that original matrix have non-zero pattern for the schur complement
155  *
156  * we return: Shur - schur complement, ShurRHS - RHS of the complemented system:
157  * (B' * IA * B - C) * x_2 = (B' * IA * RHS_1 - RHS_2)
158  * IAB - a matrix to compute eliminated part of the solution:
159  * x_1 = IA * RHS_1 - IAB * x_2
160  *
161  * Actually as B' is stored separetly, the routine can be used also for nonsymetric original
162  * system
163  *
164  */
165 
167 {
168  START_TIMER("form schur complement");
169 
170 
171  PetscErrorCode ierr = 0;
172  MatReuse mat_reuse; // reuse structures after first computation of schur
173  MatStructure mat_subset_pattern;
174  PetscScalar *sol_array;
175 
176  mat_reuse=MAT_REUSE_MATRIX;
177  mat_subset_pattern=SUBSET_NONZERO_PATTERN;
178  if (state==created) {
179  mat_reuse=MAT_INITIAL_MATRIX; // indicate first construction
180  mat_subset_pattern=DIFFERENT_NONZERO_PATTERN;
181 
182  // create complement system
183  // TODO: introduce LS as true object, clarify its internal states
184  // create RHS sub vecs RHS1, RHS2
185  VecCreateMPI(PETSC_COMM_WORLD, loc_size_A, PETSC_DETERMINE, &(RHS1));
186  VecCreateMPI(PETSC_COMM_WORLD, loc_size_B, PETSC_DETERMINE, &(RHS2));
187  VecScatterCreate(rhs_, IsA, RHS1, PETSC_NULL, &rhs1sc);
188  VecScatterCreate(rhs_, IsB, RHS2, PETSC_NULL, &rhs2sc);
189 
190  // create Solution sub vecs Sol1, Sol2, Compl->solution
191  VecCreateMPI(PETSC_COMM_WORLD, loc_size_A, PETSC_DETERMINE, &(Sol1));
192  VecCreateMPI(PETSC_COMM_WORLD, loc_size_B, PETSC_DETERMINE, &(Sol2));
193  VecScatterCreate(solution_, IsA, Sol1, PETSC_NULL, &sol1sc);
194  VecScatterCreate(solution_, IsB, Sol2, PETSC_NULL, &sol2sc);
195 
196  VecGetArray( Sol2, &sol_array );
197  Compl->set_solution( sol_array );
198  VecRestoreArray( Sol2, &sol_array );
199 
200 
201  }
202  DebugOut() << print_var(mat_reuse) << print_var(matrix_changed_) << print_var(state);
203 
204  // compose Schur complement
205  // Petsc need some fill estimate for results of multiplication in form nnz(A*B)/(nnz(A)+nnz(B))
206  // for the first Schur compl: IA*B is bounded by ( d*(d+1) )/( d*d+2*d ) <= 5/6 for d<=4
207  // B'*IA*B bounded by ( (d+1)*(d+1) )/ ( d*(d+1) + d ) ~ 1
208  // for the second Schur : IA*B have fill ratio ~ 1.
209  // B'*IA*B ... ( N/2 *(2*N-1) )/( 2 + 2*N ) <= 1.4
210  // nevertheless Petsc does not allows fill ratio below 1. so we use 1.1 for the first
211  // and 1.5 for the second multiplication
212 
213  // TODO:
214  // In order to let PETSC allocate structure of the complement we can not perform MatGetSubMatrix
215  // and MatAXPY on the same complement matrix, since one operation change the matrix structure for the other.
216  //
217  // Probably no way to make this optimal using high level methods. We should have our own
218  // format for schur complement matrix, store local systems and perform elimination localy.
219  // Or even better assembly the complement directly. (not compatible with raw P0 method)
220 
221  if (matrix_changed_) {
223 
224  // compute IAB=IA*B, loc_size_B removed
225  ierr+=MatGetSubMatrix(matrix_, IsA, IsB, mat_reuse, &B);
226  ierr+=MatMatMult(IA, B, mat_reuse, 1.0 ,&(IAB)); // 6/7 - fill estimate
227  // compute xA=Bt* IAB = Bt * IA * B, locSizeA removed
228  ierr+=MatGetSubMatrix(matrix_, IsB, IsA, mat_reuse, &(Bt));
229  ierr+=MatMatMult(Bt, IAB, mat_reuse, 1.9 ,&(xA)); // 1.1 - fill estimate (PETSC report values over 1.8)
230 
231  // get C block, loc_size_B removed
232  ierr+=MatGetSubMatrix( matrix_, IsB, IsB, mat_reuse, &C);
233 
234  if (state==created) MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, const_cast<Mat *>( Compl->get_matrix() ) );
235  MatZeroEntries( *( Compl->get_matrix()) );
236 
237  // compute complement = (-1)cA+xA = Bt*IA*B - C
238  if ( is_negative_definite() ) {
239  ierr+=MatAXPY(*( Compl->get_matrix() ), 1, C, SUBSET_NONZERO_PATTERN);
240  ierr+=MatAXPY(*( Compl->get_matrix() ), -1, xA, mat_subset_pattern);
241  } else {
242  ierr+=MatAXPY(*( Compl->get_matrix() ), -1, C, SUBSET_NONZERO_PATTERN);
243  ierr+=MatAXPY(*( Compl->get_matrix() ), 1, xA, mat_subset_pattern);
244  }
246 
247  ASSERT( ierr == 0 ).error("PETSC Error during calculation of Schur complement.\n");
248 
249  }
250 
251  form_rhs();
252 
253  matrix_changed_ = false;
254 
255  state=formed;
256 }
257 
259 {
260  START_TIMER("form rhs");
261  if (rhs_changed_ || matrix_changed_) {
262  VecScatterBegin(rhs1sc, rhs_, RHS1, INSERT_VALUES, SCATTER_FORWARD);
263  VecScatterEnd( rhs1sc, rhs_, RHS1, INSERT_VALUES, SCATTER_FORWARD);
264  VecScatterBegin(rhs2sc, rhs_, RHS2, INSERT_VALUES, SCATTER_FORWARD);
265  VecScatterEnd( rhs2sc, rhs_, RHS2, INSERT_VALUES, SCATTER_FORWARD);
266 
267  MatMultTranspose(IAB, RHS1, *( Compl->get_rhs() ));
268  VecAXPY(*( Compl->get_rhs() ), -1, RHS2);
269  if ( is_negative_definite() ) {
270  VecScale(*( Compl->get_rhs() ), -1.0);
271  }
273  rhs_changed_ = false;
274  }
275 
276  state=formed;
277 }
278 
279 
280 
281 void SchurComplement::set_tolerances(double r_tol, double a_tol, double d_tol, unsigned int max_it)
282 {
283  LinSys_PETSC::set_tolerances(r_tol, a_tol, d_tol, max_it);
284  if (Compl !=nullptr) Compl->set_tolerances(r_tol, a_tol, d_tol, max_it);
285 }
286 
288 {
289  ASSERT_PTR(ls).error();
290  Compl = ls;
291 }
292 
293 
295 {
296  if (ds_ != NULL) delete ds_;
297  ds_ = new Distribution(loc_size_B, PETSC_COMM_WORLD);
298  return ds_;
299 }
300 
302 {
303  START_TIMER("create inversion matrix");
304  PetscInt ncols, pos_start, pos_start_IA;
305 
306  MatReuse mat_reuse=MAT_REUSE_MATRIX;
307  if (state==created) mat_reuse=MAT_INITIAL_MATRIX; // indicate first construction
308 
309  MatGetSubMatrix(matrix_, IsA, IsA, mat_reuse, &A);
310  MatDuplicate(A, MAT_DO_NOT_COPY_VALUES, &IA);
311  //MatGetSubMatrix(matrix_, IsA, IsA, mat_reuse, &IA);
312 
313  MatGetOwnershipRange(A,&pos_start,PETSC_NULL);
314  MatGetOwnershipRange(IA,&pos_start_IA,PETSC_NULL);
315 
316  std::vector<PetscInt> submat_rows;
317  const PetscInt *cols;
318  const PetscScalar *vals;
319 
320  std::vector<unsigned int> processed_rows(loc_size_A,0);
321 
322  unsigned int mat_block=1; //actual processed block of matrix
323  for(unsigned int loc_row=0; loc_row < processed_rows.size(); loc_row++) {
324  if (processed_rows[loc_row] != 0) continue;
325 
326  PetscInt min=std::numeric_limits<int>::max(), max=-1, size_submat;
327  PetscInt b_vals = 0; // count of values stored in B-block of Orig system
328  submat_rows.clear();
329  MatGetRow(A, loc_row + pos_start, &ncols, &cols, PETSC_NULL);
330  for (PetscInt i=0; i<ncols; i++) {
331  if (cols[i] < pos_start || cols[i] >= pos_start+loc_size_A) {
332  b_vals++;
333  } else {
334  if (cols[i] < min) {
335  min=cols[i];
336  }
337  if (cols[i] > max) {
338  max=cols[i];
339  }
340  }
341  }
342  size_submat = max - min + 1;
343  ASSERT(ncols-b_vals == size_submat).error("Submatrix cannot contains empty values.\n");
344 
345  MatRestoreRow(A, loc_row + pos_start, &ncols, &cols, PETSC_NULL);
346  arma::mat submat2(size_submat, size_submat);
347  submat2.zeros();
348  for (PetscInt i=0; i<size_submat; i++) {
349  processed_rows[ loc_row + i ] = mat_block;
350  submat_rows.push_back( i + loc_row + pos_start_IA );
351  MatGetRow(A, i + loc_row + pos_start, &ncols, &cols, &vals);
352  for (PetscInt j=0; j<ncols; j++) {
353  if (cols[j] >= pos_start && cols[j] < pos_start+loc_size_A) {
354  submat2( i, cols[j] - loc_row - pos_start ) = vals[j];
355  }
356  }
357  MatRestoreRow(A, i + loc_row + pos_start, &ncols, &cols, &vals);
358  }
359  // get inversion matrix
360  arma::mat invmat = submat2.i();
361  // stored to inversion IA matrix
362  const PetscInt* rows = &submat_rows[0];
363  MatSetValues(IA, submat_rows.size(), rows, submat_rows.size(), rows, invmat.memptr(), INSERT_VALUES);
364 
365  mat_block++;
366  }
367 
368  MatAssemblyBegin(IA, MAT_FINAL_ASSEMBLY);
369  MatAssemblyEnd(IA, MAT_FINAL_ASSEMBLY);
370 
371 
372  /*
373  MatCreateMPIAIJSumSeqAIJ(PETSC_COMM_WORLD, locIA, PETSC_DECIDE, PETSC_DECIDE, reuse, &IA);
374  */
375 }
376 
377 
379 {
380  if (Compl != NULL) {
381  return Compl->get_solution_precision();
382  }
383  return std::numeric_limits<double>::infinity();
384 }
385 
386 
388  START_TIMER("SchurComplement::solve");
389  this->form_schur();
390 
391  VecScatterBegin(sol1sc, solution_, Sol1, INSERT_VALUES, SCATTER_FORWARD);
392  VecScatterEnd( sol1sc, solution_, Sol1, INSERT_VALUES, SCATTER_FORWARD);
393  VecScatterBegin(sol2sc, solution_, Sol2, INSERT_VALUES, SCATTER_FORWARD);
394  VecScatterEnd( sol2sc, solution_, Sol2, INSERT_VALUES, SCATTER_FORWARD);
395 
396  //output schur complement in matlab file
397 // string output_file = FilePath("schur.m", FilePath::output_file);
398 // PetscViewer viewer;
399 // PetscViewerASCIIOpen(PETSC_COMM_WORLD, output_file.c_str(), &viewer);
400 // PetscViewerSetFormat(viewer, PETSC_VIEWER_ASCII_MATLAB);
401 // MatView( *const_cast<Mat*>(Compl->get_matrix()), viewer);
402 // VecView( *const_cast<Vec*>(Compl->get_rhs()), viewer);
403 
404  LinSys::SolveInfo si = Compl->solve();
405 // VecView(Compl->get_solution(), viewer);
406 
407  // TODO: Resolve step is not necessary inside of nonlinear solver. Can optimize.
408  this->resolve();
409  return si;
410 }
411 
412 
413 /**
414  * COMPUTE ELIMINATED PART OF THE ORIG. SYS. & RESTORE RHS and SOLUTION VECTORS
415  * x_1 = IA * RHS_1 - IAB * x_2
416  */
417 
419 {
420  this->form_schur();
421 
422  START_TIMER("SchurComplemet::resolve without form schur");
423 
424  chkerr(MatMult(IAB,Compl->get_solution(),Sol1));
425  chkerr(VecScale(Sol1,-1));
426  chkerr(MatMultAdd(IA,RHS1,Sol1,Sol1));
427  VecScatterBegin(sol1sc, Sol1, solution_, INSERT_VALUES, SCATTER_REVERSE);
428  VecScatterEnd( sol1sc, Sol1, solution_, INSERT_VALUES, SCATTER_REVERSE);
429  VecScatterBegin(sol2sc, Sol2, solution_, INSERT_VALUES, SCATTER_REVERSE);
430  VecScatterEnd( sol2sc, Sol2, solution_, INSERT_VALUES, SCATTER_REVERSE);
431 }
432 
433 
435 {
436  //DebugOut() << print_var(LinSys_PETSC::compute_residual());
437  resolve();
439 }
440 
441 
442 
443 /**
444  * SCHUR COMPLEMENT destructor
445  */
447 
448  if ( A != NULL ) chkerr(MatDestroy(&A));
449  if ( B != NULL ) chkerr(MatDestroy(&B));
450  if ( Bt != NULL ) chkerr(MatDestroy(&Bt));
451  if ( C != NULL ) chkerr(MatDestroy(&C));
452  if ( xA != NULL ) chkerr(MatDestroy(&xA));
453  if ( IA != NULL ) chkerr(MatDestroy(&IA));
454  if ( IAB != NULL ) chkerr(MatDestroy(&IAB));
455  if ( IsA != NULL ) chkerr(ISDestroy(&IsA));
456  if ( IsB != NULL ) chkerr(ISDestroy(&IsB));
457  if ( RHS1 != NULL ) chkerr(VecDestroy(&RHS1));
458  if ( RHS2 != NULL ) chkerr(VecDestroy(&RHS2));
459  if ( Sol1 != NULL ) chkerr(VecDestroy(&Sol1));
460  if ( Sol2 != NULL ) chkerr(VecDestroy(&Sol2));
461  if ( rhs1sc != NULL ) chkerr(VecScatterDestroy(&rhs1sc));
462  if ( rhs2sc != NULL ) chkerr(VecScatterDestroy(&rhs2sc));
463  if ( sol1sc != NULL ) chkerr(VecScatterDestroy(&sol1sc));
464  if ( sol2sc != NULL ) chkerr(VecScatterDestroy(&sol2sc));
465  if ( IA != NULL ) chkerr(MatDestroy(&IA));
466 
467  if (Compl != NULL) delete Compl;
468  if (ds_ != NULL) delete ds_;
469 
470 }
#define ASSERT(expr)
Definition: asserts.hh:351
#define ASSERT_PTR(ptr)
Definition of assert macro checking non-null pointer (PTR) only for debug mode.
Definition: asserts.hh:341
unsigned int begin(int proc) const
get starting local index
unsigned int lsize(int proc) const
get local size
Accessor to the data with type Type::Record.
Definition: accessors.hh:291
double compute_residual() override
Mat matrix_
Petsc matrix of the problem.
LinSys::SolveInfo solve() override
void set_from_input(const Input::Record in_rec) override
Vec rhs_
PETSc vector constructed with vx array.
const Mat * get_matrix() override
Definition: linsys_PETSC.hh:69
double get_solution_precision() override
const Vec * get_rhs() override
Definition: linsys_PETSC.hh:74
void set_tolerances(double r_tol, double a_tol, double d_tol, unsigned int max_it) override
Definition: linsys_PETSC.cc:93
const Vec & get_solution()
Definition: linsys.hh:282
void set_solution(Vec sol_vec)
Definition: linsys.hh:290
void set_rhs_changed()
Definition: linsys.hh:218
friend class SchurComplement
Definition: linsys.hh:91
void set_matrix_changed()
Definition: linsys.hh:212
bool is_negative_definite()
Definition: linsys.hh:600
Vec solution_
PETSc vector constructed with vb array.
Definition: linsys.hh:686
bool matrix_changed_
true if the matrix was changed since the last solve
Definition: linsys.hh:683
bool rhs_changed_
true if the right hand side was changed since the last solve
Definition: linsys.hh:684
SchurState state
Definition: schur.hh:161
Distribution * ds_
Definition: schur.hh:166
VecScatter rhs2sc
Definition: schur.hh:158
void resolve()
Definition: schur.cc:418
Distribution * make_complement_distribution()
get distribution of complement object if complement is defined
Definition: schur.cc:294
void set_from_input(const Input::Record in_rec) override
Definition: schur.cc:138
void set_tolerances(double r_tol, double a_tol, double d_tol, unsigned int max_it) override
Definition: schur.cc:281
VecScatter sol2sc
Definition: schur.hh:159
double compute_residual() override
Definition: schur.cc:434
void set_complement(LinSys_PETSC *ls)
Set complement LinSys object.
Definition: schur.cc:287
void form_schur()
Definition: schur.cc:166
void form_rhs()
Definition: schur.cc:258
LinSys::SolveInfo solve() override
Definition: schur.cc:387
int loc_size_A
Definition: schur.hh:137
~SchurComplement()
Definition: schur.cc:446
double get_solution_precision() override
get precision of solving
Definition: schur.cc:378
VecScatter rhs1sc
Definition: schur.hh:158
VecScatter sol1sc
Definition: schur.hh:159
void create_inversion_matrix()
create IA matrix
Definition: schur.cc:301
int loc_size_B
Definition: schur.hh:137
LinSys_PETSC * Compl
Definition: schur.hh:164
Support classes for parallel programing.
Wrappers for linear systems based on MPIAIJ and MATIS format.
#define print_var(var)
Definition: logger.hh:292
#define DebugOut()
Macro defining 'debug' record of log.
Definition: logger.hh:284
unsigned int uint
ArmaMat< double, N, M > mat
Definition: armor.hh:936
Assembly explicit Schur complement for the given linear system. Provides method for resolution of the...
@ formed
Definition: schur.hh:60
@ created
Definition: schur.hh:59
#define START_TIMER(tag)
Starts a timer with specified tag.
void chkerr(unsigned int ierr)
Replacement of new/delete operator in the spirit of xmalloc.
Definition: system.hh:142