doc/oofemrefman/superlusolver_8C_source.html

 /*
  *
  *                 #####    #####   ######  ######  ###   ###
  *               ##   ##  ##   ##  ##      ##      ## ### ##
  *              ##   ##  ##   ##  ####    ####    ##  #  ##
  *             ##   ##  ##   ##  ##      ##      ##     ##
  *            ##   ##  ##   ##  ##      ##      ##     ##
  *            #####    #####   ##      ######  ##     ##
  *
  *
  *             OOFEM : Object Oriented Finite Element Code
  *
  *               Copyright (C) 1993 - 2013   Borek Patzak
  *
  *
  *
  *       Czech Technical University, Faculty of Civil Engineering,
  *   Department of Structural Mechanics, 166 29 Prague, Czech Republic
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Lesser General Public
  *  License as published by the Free Software Foundation; either
  *  version 2.1 of the License, or (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  *  Lesser General Public License for more details.
  *
  *  You should have received a copy of the GNU Lesser General Public
  *  License along with this library; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  */

 #include "sparsemtrx.h"
 #include "floatarray.h"
 #include "compcol.h"
 #include "linsystsolvertype.h"
 #include "classfactory.h"

 #include "SUPERLU_MT/include/slu_mt_ddefs.h"
 #include "superlusolver.h"
 #include <stdlib.h>
 #include <math.h>
 #include "verbose.h"
 //#include "globals.h"

 #ifdef TIME_REPORT
  #include "timer.h"
 #endif

 namespace oofem {
 REGISTER_SparseLinSolver(SuperLUSolver, ST_SuperLU_MT)


 SuperLUSolver :: SuperLUSolver(Domain *d, EngngModel *m) : SparseLinearSystemNM(d, m)
 { }


 SuperLUSolver :: ~SuperLUSolver() { }


 IRResultType
 SuperLUSolver :: initializeFrom(InputRecord *ir)
 {
     return IRRT_OK;
 }

 NM_Status
 SuperLUSolver :: solve(SparseMtrx &Lhs, FloatArray &b, FloatArray &x)
 {
     //1. Step: Transform SparseMtrx *A to SuperMatrix
     //2. Step: Transfrom FloatArray *b to SuperVector
     //3. Step: Transfrom FLoatArray *x to SuperVector
     //3. Step: call superLU Solve with tranformed input parameters
     //4. Step: Transfrom result back to FloatArray *x
     //5. Step: return SUCCESS!

     CompCol *CC = dynamic_cast< CompCol * >( & Lhs );
     if ( CC ) {
         SuperMatrix A, L, U;
         SuperMatrix B, X;
         int_t nprocs;
         fact_t fact;
         trans_t trans;
         yes_no_t refact, usepr;
         equed_t equed;
         double *a;
         int_t *asub, *xa;
         int_t *perm_c;
         int_t *perm_r;
         void *work;
         superlumt_options_t superlumt_options;
         int_t info, lwork, nrhs, /*ldx,*/ panel_size, relax;
         int_t m, n, nnz, permc_spec;
         double *rhsb, *rhsx /*, *xact*/;
         double *R, *C;
         double *ferr, *berr;
         double /*u,*/ drop_tol, rpg, rcond;
         superlu_memusage_t superlu_memusage;
         void parse_command_line();

         /* Default parameters to control factorization. */
         nprocs = omp_get_max_threads(); //omp_get_num_threads() does not work as we are not in parallel region!!;
         printf("Use number of LU threads: %u\n", nprocs);
         fact  = EQUILIBRATE;
         trans = NOTRANS;
         equed = NOEQUIL;
         refact = NO;
         panel_size = sp_ienv(1);
         relax = sp_ienv(2);
         //u     = 1.0;
         usepr = NO;
         drop_tol = 0.0;
         lwork = 0;
         nrhs  = 1;

         m =  CC->giveNumberOfRows();
         n = CC->giveNumberOfColumns();
         nnz = CC->giveNumberOfNonzeros();
         a =  CC->giveValues().givePointer();
         asub =  CC->giveRowIndex().givePointer();
         xa = CC->giveColPtr().givePointer();

         /* Command line options to modify default behavior. */
         //parse_command_line(argc, argv, &nprocs, &lwork, &panel_size, &relax,
         //         &u, &fact, &trans, &refact, &equed);

         if ( lwork > 0 ) {
             work = SUPERLU_MALLOC(lwork);
             printf("Use work space of size LWORK = " IFMT " bytes\n", lwork);
             if ( !work ) {
                 SUPERLU_ABORT("cannot allocate work[]");
             }
         }

         //printf("Use work space of size LWORK = " IFMT " bytes\n", lwork);

 #if ( PRNTlevel == 1 )
         cpp_defs();
         printf( "int_t %d bytes\n", sizeof( int_t ) );
 #endif

 #ifdef TIME_REPORT
         Timer timer;
         timer.startTimer();
 #endif


         dCreate_CompCol_Matrix(& A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);


         if ( !( rhsb = doubleMalloc(m * nrhs) ) ) {
             SUPERLU_ABORT("Malloc fails for rhsb[].");
         }
         if ( !( rhsx = doubleMalloc(m * nrhs) ) ) {
             SUPERLU_ABORT("Malloc fails for rhsx[].");
         }
         dCreate_Dense_Matrix(& B, m, nrhs, b.givePointer(), m, SLU_DN, SLU_D, SLU_GE);
         dCreate_Dense_Matrix(& X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE);
         //dPrint_Dense_Matrix(&B);
         //dPrint_Dense_Matrix(&X);

         if ( !( perm_r = intMalloc(m) ) ) {
             SUPERLU_ABORT("Malloc fails for perm_r[].");
         }
         if ( !( perm_c = intMalloc(n) ) ) {
             SUPERLU_ABORT("Malloc fails for perm_c[].");
         }
         if ( !( R = ( double * ) SUPERLU_MALLOC( A.nrow * sizeof( double ) ) ) ) {
             SUPERLU_ABORT("SUPERLU_MALLOC fails for R[].");
         }
         if ( !( C = ( double * ) SUPERLU_MALLOC( A.ncol * sizeof( double ) ) ) ) {
             SUPERLU_ABORT("SUPERLU_MALLOC fails for C[].");
         }
         if ( !( ferr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
             SUPERLU_ABORT("SUPERLU_MALLOC fails for ferr[].");
         }
         if ( !( berr = ( double * ) SUPERLU_MALLOC( nrhs * sizeof( double ) ) ) ) {
             SUPERLU_ABORT("SUPERLU_MALLOC fails for berr[].");
         }

         /*
          * Get column permutation vector perm_c[], according to permc_spec:
          *   permc_spec = 0: natural ordering
          *   permc_spec = 1: minimum degree ordering on structure of A'*A
          *   permc_spec = 2: minimum degree ordering on structure of A'+A
          *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
          */

         permc_spec = 2;
         get_perm_c(permc_spec, & A, perm_c);

         superlumt_options.SymmetricMode = YES;
         superlumt_options.diag_pivot_thresh = 0.0;

         superlumt_options.nprocs = nprocs;
         superlumt_options.fact = fact;
         superlumt_options.trans = trans;
         superlumt_options.refact = refact;
         superlumt_options.panel_size = panel_size;
         superlumt_options.relax = relax;
         superlumt_options.usepr = usepr;
         superlumt_options.drop_tol = drop_tol;
         superlumt_options.PrintStat = NO;
         superlumt_options.perm_c = perm_c;
         superlumt_options.perm_r = perm_r;
         //superlumt_options.work = work;
         superlumt_options.lwork = lwork;
         if ( !( superlumt_options.etree = intMalloc(n) ) ) {
             SUPERLU_ABORT("Malloc fails for etree[].");
         }
         if ( !( superlumt_options.colcnt_h = intMalloc(n) ) ) {
             SUPERLU_ABORT("Malloc fails for colcnt_h[].");
         }
         if ( !( superlumt_options.part_super_h = intMalloc(n) ) ) {
             SUPERLU_ABORT("Malloc fails for colcnt_h[].");
         }

         printf("sym_mode %d\tdiag_pivot_thresh %.4e\n",
                superlumt_options.SymmetricMode,
                superlumt_options.diag_pivot_thresh);

         /*
          * Solve the system and compute the condition number
          * and error bounds using pdgssvx.
          */
         pdgssvx(nprocs, & superlumt_options, & A, perm_c, perm_r, & equed, R, C, & L, & U, & B, & X, & rpg, & rcond, ferr, berr, & superlu_memusage, & info);

         //dPrint_Dense_Matrix(&B);
         //dPrint_Dense_Matrix(&X);

 #if 0
         printf("psgssvx(): info " IFMT "\n", info);

         if ( info == 0 || info == n + 1 ) {
             SCPformat *Lstore;
             NCPformat *Ustore;

             printf("Recip. pivot growth = %e\n", rpg);
             printf("Recip. condition number = %e\n", rcond);
             printf("%8s%16s%16s\n", "rhs", "FERR", "BERR");
             for ( int i = 0; i < nrhs; ++i ) {
                 printf(IFMT "%16e%16e\n", i + 1, ferr [ i ], berr [ i ]);
             }

             Lstore = ( SCPformat * ) L.Store;
             Ustore = ( NCPformat * ) U.Store;
             printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
             printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
             printf("No of nonzeros in L+U = " IFMT "\n", Lstore->nnz + Ustore->nnz - n);
             printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions " IFMT "\n",
                    superlu_memusage.for_lu / 1e6, superlu_memusage.total_needed / 1e6,
                    superlu_memusage.expansions);

             fflush(stdout);
         } else if ( info > 0 && lwork == -1 ) {
             printf("** Estimated memory: " IFMT " bytes\n", info - n);
         }
 #else
         if ( info > 0 && lwork == -1 ) {
             printf("** Estimated memory: " IFMT " bytes\n", info - n);
         }
 #endif

         //dPrint_Dense_Matrix(&B);
         // copy B into x (assuming only one rhs)
         x.resize( b.giveSize() );
         this->convertRhs(& X, x);
         SUPERLU_FREE(rhsb);
         SUPERLU_FREE(rhsx);
         //SUPERLU_FREE (xact);
         SUPERLU_FREE(perm_r);
         SUPERLU_FREE(perm_c);
         SUPERLU_FREE(R);
         SUPERLU_FREE(C);
         SUPERLU_FREE(ferr);
         SUPERLU_FREE(berr);
         Destroy_SuperMatrix_Store(& A);
         Destroy_SuperMatrix_Store(& B);
         Destroy_SuperMatrix_Store(& X);
         //    SUPERLU_FREE (superlumt_options.etree);
         //    SUPERLU_FREE (superlumt_options.colcnt_h);
         if ( lwork == 0 ) {
             Destroy_SuperNode_SCP(& L);
             Destroy_CompCol_NCP(& U);
         } else if ( lwork > 0 ) {
             SUPERLU_FREE(work);
         }

 #ifdef TIME_REPORT
         timer.stopTimer();
         OOFEM_LOG_INFO( "SuperLU_MT info: user time consumed by solution: %.2fs\n", timer.getUtime() );
         OOFEM_LOG_INFO( "SuperLU_MT info: real time consumed by solution: %.2fs\n", timer.getWtime() );
 #endif
     } else {
         OOFEM_ERROR("Incompatible sparse storage encountered");
     }


     //dPrint_Dense_Matrix(&B);
     /*
      * Lstore = (SCPformat *) L.Store;
      * Ustore = (NCPformat *) U.Store;
      * printf("No of nonzeros in factor L = " IFMT "\n", Lstore->nnz);
      * printf("No of nonzeros in factor U = " IFMT "\n", Ustore->nnz);
      *
      * fflush(stdout);
      * dPrint_CompCol_Matrix(&L);
      * dPrint_CompCol_Matrix(&U);
      */
     //solved = 1;
     return NM_Success;
 }

 void
 SuperLUSolver :: convertRhs(SuperMatrix *X, FloatArray &x)
 {
     x.zero();

     if ( ( X->Stype == SLU_DN ) && ( X->Dtype == SLU_D ) ) {
         // process only first column                                                    //int r;

         DNformat *data = ( ( DNformat * ) ( X->Store ) );

 #pragma omp parallel
         {
 #pragma omp parallel for
             for ( int r = 0; r <= data->lda - 1; r++ ) {
                 //r = data->nzval[i];
                 x.at(r + 1) = ( ( double * ) data->nzval ) [ r ];
             }
         }
     }   else {
         OOFEM_ERROR("convertRhs: unsupported matrix storage type or data type");
     }
 }

 //dPrint - following code use to print SuperLU matrix
 int_t SuperLUSolver :: dPrint_CompCol_Matrix(SuperMatrix *A)
 {
     NCformat *Astore;
     register int_t i;
     double *dp;

     printf("\nCompCol matrix: ");
     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
     Astore = ( NCformat * ) A->Store;
     dp = ( double * ) Astore->nzval;
     printf("nrow " IFMT ", ncol " IFMT ", nnz " IFMT "\n", A->nrow, A->ncol, Astore->nnz);
     printf("\nnzval: ");
     for ( i = 0; i < Astore->nnz; ++i ) {
         printf("%f  ", dp [ i ]);
     }
     printf("\nrowind: ");
     for ( i = 0; i < Astore->nnz; ++i ) {
         printf(IFMT, Astore->rowind [ i ]);
     }
     printf("\ncolptr: ");
     for ( i = 0; i <= A->ncol; ++i ) {
         printf(IFMT, Astore->colptr [ i ]);
     }
     printf("\nend CompCol matrix.\n");

     return 0;
 }

 int_t SuperLUSolver :: dPrint_Dense_Matrix(SuperMatrix *A)
 {
     DNformat *Astore;
     register int_t i;
     double *dp;

     printf("\nDense matrix: ");
     printf("Stype %d , Dtype %d , Mtype %d\n", A->Stype, A->Dtype, A->Mtype);
     Astore = ( DNformat * ) A->Store;
     dp = ( double * ) Astore->nzval;
     printf("nrow " IFMT ", ncol " IFMT ", lda " IFMT "\n",
            A->nrow, A->ncol, Astore->lda);
     printf("\nnzval: ");
     for ( i = 0; i < A->nrow; ++i ) {
         printf("%f  ", dp [ i ]);
     }
     printf("\nend Dense matrix.\n");

     return 0;
 }

 int_t
 dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg,
                    int_t *colend, int_t *perm)
 {
     register int_t i, j, nzd, nd = 0;

     for ( j = 0; j < n; ++j ) {
         nzd = 0;
         for ( i = colbeg [ j ]; i < colend [ j ]; ++i ) {
             if ( perm [ rowind [ i ] ] == j ) {
                 nzd = 1;
                 ++nd;
                 break;
             }
         }
         if ( nzd == 0 ) {
             printf("Zero diagonal at column " IFMT "\n", j);
         }
     }

     printf(".. dCheckZeroDiagonal() -- # diagonals " IFMT "\n", nd);

     return 0;
 }
 } // end namespace oofem
oofem::CompCol::giveRowIndex
IntArray & giveRowIndex()
Definition: compcol.h:132

oofem::CompCol::giveColPtr
IntArray & giveColPtr()
Definition: compcol.h:133

NM_Success
#define NM_Success
Numerical method exited with success.
Definition: nmstatus.h:47

oofem::Domain
Class and object Domain.
Definition: domain.h:115

oofem::SparseMtrx
Base class for all matrices stored in sparse format.
Definition: sparsemtrx.h:60

oofem::FloatArray::at
double & at(int i)
Coefficient access function.
Definition: floatarray.h:131

oofem::SparseLinearSystemNM
This base class is an abstraction for all numerical methods solving sparse linear system of equations...
Definition: sparselinsystemnm.h:58

oofem::IntArray::givePointer
const int * givePointer() const
Breaks encapsulation.
Definition: intarray.h:336

oofem::SuperLUSolver::dPrint_Dense_Matrix
int_t dPrint_Dense_Matrix(SuperMatrix *A)
Definition: superlusolver.C:373

oofem::NM_Status
unsigned long NM_Status
Mask defining NumMetod Status; which can be asked after finishing computation by Numerical Method...
Definition: nmstatus.h:44

oofem::CompCol::giveValues
FloatArray & giveValues()
Definition: compcol.h:131

sparsemtrx.h

timer.h

oofem::SuperLUSolver::solve
virtual NM_Status solve(SparseMtrx &A, FloatArray &b, FloatArray &x)
Solves the given linear system iteratively by method described by SuperLU SolverType.
Definition: superlusolver.C:70

oofem::SparseMtrx::giveNumberOfRows
int giveNumberOfRows() const
Returns number of rows of receiver.
Definition: sparsemtrx.h:114

OOFEM_LOG_INFO
#define OOFEM_LOG_INFO(...)
Definition: logger.h:127

oofem::SuperLUSolver::initializeFrom
virtual IRResultType initializeFrom(InputRecord *ir)
Definition: superlusolver.C:64

OOFEM_ERROR
#define OOFEM_ERROR(...)
Definition: error.h:61

classfactory.h

oofem::IRRT_OK
Definition: irresulttype.h:47

verbose.h
Initializes the variable VERBOSE, in order to get a few intermediate messages on screen: beginning an...

oofem::FloatArray
Class representing vector of real numbers.
Definition: floatarray.h:82

oofem::SuperLUSolver
Class implementig interface to SuperLU_MT solver.
Definition: superlusolver.h:57

oofem::IRResultType
IRResultType
Type defining the return values of InputRecord reading operations.
Definition: irresulttype.h:47

floatarray.h

oofem::InputRecord
Class representing the general Input Record.
Definition: inputrecord.h:101

oofem::FloatArray::zero
void zero()
Zeroes all coefficients of receiver.
Definition: floatarray.C:658

oofem::REGISTER_SparseLinSolver
REGISTER_SparseLinSolver(IMLSolver, ST_IML)
Definition: imlsolver.C:56

oofem::Timer
Class implementing single timer, providing wall clock and user time capabilities. ...
Definition: timer.h:46

oofem::FloatArray::givePointer
const double * givePointer() const
Gives the pointer to the raw data, breaking encapsulation.
Definition: floatarray.h:469

oofem::SuperLUSolver::~SuperLUSolver
virtual ~SuperLUSolver()
Destructor.
Definition: superlusolver.C:60

compcol.h

oofem::SparseMtrx::giveNumberOfColumns
int giveNumberOfColumns() const
Returns number of columns of receiver.
Definition: sparsemtrx.h:116

oofem::dCheckZeroDiagonal
int_t dCheckZeroDiagonal(int_t n, int_t *rowind, int_t *colbeg, int_t *colend, int_t *perm)
Definition: superlusolver.C:395

oofem::EngngModel
Abstract base class representing the "problem" under consideration.
Definition: engngm.h:181

oofem::Timer::getWtime
double getWtime()
Returns total elapsed wall clock time in seconds.
Definition: timer.C:111

oofem::FloatArray::giveSize
int giveSize() const
Returns the size of receiver.
Definition: floatarray.h:218

oofem
the oofem namespace is to define a context or scope in which all oofem names are defined.

oofem::SuperLUSolver::convertRhs
void convertRhs(SuperMatrix *A, FloatArray &x)
Definition: superlusolver.C:322

oofem::CompCol::giveNumberOfNonzeros
const int giveNumberOfNonzeros()
Definition: compcol.h:136

oofem::Timer::startTimer
void startTimer()
Definition: timer.C:69

oofem::Timer::getUtime
double getUtime()
Returns total user time elapsed in seconds.
Definition: timer.C:105

superlusolver.h

oofem::Timer::stopTimer
void stopTimer()
Definition: timer.C:77

oofem::ST_SuperLU_MT
Definition: linsystsolvertype.h:55

linsystsolvertype.h

oofem::SuperLUSolver::dPrint_CompCol_Matrix
int_t dPrint_CompCol_Matrix(SuperMatrix *A)
Definition: superlusolver.C:345

oofem::CompCol
Definition: compcol.h:84

oofem::FloatArray::resize
void resize(int s)
Resizes receiver towards requested size.
Definition: floatarray.C:631