/* 
  Copyright (c) 1997-2003 Gavin E. Crooks <gec@compbio.berkeley.edu> 
		     Univ. of California, Berkeley

  Permission is hereby granted, free of charge, to any person obtaining a 
  copy of this software and associated documentation files (the "Software"),
  to deal in the Software without restriction, including without limitation
  the rights to use, copy, modify, merge, publish, distribute, sublicense,
  and/or sell copies of the Software, and to permit persons to whom the
  Software is furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included
  in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
  THE SOFTWARE.

  (This is the MIT Open Source License, 
  http://www.opensource.org/licenses/mit-license.html)

*/

/* *******************************************************************
   Computational Biology Toolkit - Utilities

   $Id: cbt_util.c,v 1.1 2003/08/20 22:03:42 gec Exp $
******************************************************************** */


//! @file
//! Computational Biology Toolkit - Utilities.
//!



#include <stdio.h>
#include <stdarg.h>
#include <time.h>
#include <stdlib.h>
#include <math.h>

#include "cbt_util.h"




//! Bits per nat = log_2(e) = 1.4426950...
const double cbt_bits_per_nat = 1.44269504088896340735992468100;


//! Standard maximum Line length for output files
const int cbt_line_length = 78;

//! Maximum Line length for input files. If we unexpectable run
//! past this length while parsing a line, then we assume that
//! something has gone very, very wrong.
const int cbt_line_length_max = 1000;



static int cbt_on_error = CBT_WARN_ON_ERROR;

//! Set library wide error handling
int
cbt_set_on_error(const int e) {
  int old = cbt_on_error;
  cbt_on_error = e;
  return old;
}


int
cbt_error(const int errno, 
          const char file[], 
          const int line, 
          const char func[], 
          const char reason[] ) {
  if( cbt_on_error == CBT_CONTINUE_ON_ERROR) 
    return errno;

  fprintf(stderr, "CBT Error: %s: %d: %s: %s\n",
          file, line, func, reason);

  if( cbt_on_error == CBT_EXIT_ON_ERROR) 
    exit(2);

  return errno;
}




//! Return the index of the minimum element of an array of ints.
//! If several elements are equally minimal, then the index of
//! the first such element is returned.
//! @param len     The length of the array
//! @param vec     An array of integers 
//! @author   GEC 2003

inline size_t 
cbt_argmin(const size_t len, const int vec[len]){
  size_t i;
  int min = vec[0];
  size_t argmin= 0;

  for(i = 1; i<len; i++) {
    if(vec[i]<min) {
      min = vec[i];
      argmin = i;
    }
  }
  return argmin;  

}


//! Return the index of the minimum element of an array of doubles.
//! If several elements are equally minimal, then the index of
//! the first such element is returned.
//! @param len     The length of the array
//! @param vec     An array of doubles 
//! @author   GEC 2003

inline size_t 
cbt_argmin_dbl(const size_t len, const double vec[len]){
  size_t i;
  double min = vec[0];
  size_t argmin= 0;

  for(i = 1; i<len; i++) {
    if(vec[i]<min) {
      min = vec[i];
      argmin = i;
    }
  }
  return argmin;  

}


//! Return the index of the maximum element of an array of ints.
//! If several elements are equally maximal, then the index of
//! the first such element is returned.
//! @param len     The length of the array
//! @param vec     An array of integers 
//! @author   GEC 2003

inline size_t 
cbt_argmax(const size_t len, const int vec[len]){
  size_t i;
  int max = vec[0];
  size_t argmax= 0;

  for(i = 1; i<len; i++) {
    if(vec[i]>max) {
      max = vec[i];
      argmax = i;
    }
  }
  return argmax;
}

//! Return the index of the maximum element of an array of doubles.
//! If several elements are equally maximal, then the index of
//! the first such element is returned.
//! @param len     The length of the array
//! @param vec     An array of doubles 
//! @author   GEC 2003

inline size_t 
cbt_argmax_dbl(const size_t len, const double vec[len]){
  size_t i;
  double max = vec[0];
  size_t argmax= 0;

  for(i = 1; i<len; i++) {
    if(vec[i]>max) {
      max = vec[i];
      argmax = i;
    }
  }
  return argmax;
}


//! Print an error message and exit with failure. 
//! @param comment  Standard printf formatting.
//! @author   GEC 1997

void 
cbt_die(const char *comment, ...) {
 va_list argp;

 va_start(argp, comment);
         vfprintf(stderr, comment, argp);
 va_end(argp);
 fprintf(stderr, "\n");

 exit(2);
}


//! Generate a random number from the system clock
//! @return a random number
//! @author   Adapted from Dan Pop <danpop@cernapo.cern.ch> comp.lang.c.moderated 

unsigned long int  
cbt_ran_clock(void) {
    return( (unsigned long int)clock() 
            + (unsigned long int)time(NULL) );
}


//! Divide each element of vec by the total sum of vec. 
//! @param len  Length of the vec 
//! @param vec  An array of doubles
//! @return     The prenormalized sum of vec
//! @author     GEC 2003
double 
cbt_normalize(const size_t len, double vec[len]) {
  int k;
  double total = 0.0;
  
  for(k=0; k<len; k++) total += vec[k];
  for(k=0; k<len; k++) vec[k] /= total;
  return total;	
}


//! Calculate the arithmetric mean of the array vec 
//! @param len  Length of the vec 
//! @param vec  An array of doubles
//! @return     The arithmetric mean
//! @author     GEC 2003
//! @warning    This is not a sophisticated implementation.
double 
cbt_mean(const size_t len, const double vec[len]) {
  int k;
  double total = 0.0;
  double mean;

  for(k=0; k<len; k++) total += vec[k];
  mean = total/((double) len);
  return mean;
}


//! Calculate the variance of the array  
//! @param len  Length of the vec 
//! @param data  An array of doubles
//! @return     The variance
//! @author     GEC 2003
//! @warning    This is not a sophisticated implementation.
double 
cbt_variance(const size_t len,
             const double data[]) {
  size_t i;
  double mean, var, s;

  // No data, zero variance
  if(len==0 || len==1) return 0.0;
  
  mean = cbt_mean(len, data);
  
  var = 0.0;
  for(i = 0; i <len ; i++) {
    s = data[i] - mean;
    var += s*s;
  }

  var = var / ( (double) len - 1.0);
  return var;
}


//! Calculate the standard deviation of the array  
//! @param len  Length of the vec 
//! @param data  An array of doubles
//! @return     The standard deviation
//! @author     GEC 2003
//! @warning    This is not a sophisticated implementation.
double 
cbt_stddev(const size_t len,
           const double data[]) {
  return sqrt( cbt_variance(len, data) );
}


//! Calculate the entropy of the probability distribution, prob.
//! @param len  Length of the array prob
//! @param prob  A probability distribution; an array of doublet that sums to 1.
//! @return     The entropy in bits.
//! @author     GEC 2003
//! @warning    This is not a sophisticated implementation.
double 
cbt_entropy( const size_t len,  
             const double prob[]) {
  size_t i;
  double ent = 0.0;

  for(i=0; i<len; i++) {
    if(prob[i]>0.00000001) { 
      ent += - prob[i] * log(prob[i]);
    } else {
      // Avoid overflow with very small numbers
      ent += - prob[i] * (1.- prob[i]);
    }

  }
  return cbt_bits_per_nat * ent;
}


//! Allocate a 3 dimensional array of integers on the heap. 
//! (An array of arrays of arrays of ints) All elements are
//! initilized to zero. 
//! @param x    Size of 1st dimension
//! @param y    Size of 2nd dimension
//! @param z    Size of 3rd dimension
//! @return     A pointer to the array, or NULL 
//! @author     Ed Green 2003
//! @author     GEC (Cosmetic changes) 2003

int *** 
cbt_mat3d_alloc( int x, int y, int z ) {
  int *elements, **p2e, ***p2p2e, i;

  if( x<0 || y<0 || z<0) return NULL;

  /* Allocate the pointers to the pointers to the elements */
  p2p2e = (int***) calloc( x, sizeof( int** ) );
  if ( p2p2e == NULL ) return NULL;
  
  /* Allocate the pointers to the elements */
  p2e = (int**) malloc( x * y * sizeof(int*));
  if ( p2e == NULL ) return NULL;

  /* Allocate the elements */
  elements = (int*) malloc( x * y * z  * sizeof( int ) );
  if ( elements == NULL ) return NULL;

  /* Point the p2e at the rows */
  for ( i = 0; i < x * y; i++ ) {
    p2e[ i ] = &elements[ z * i ];
  }

  /* Now Load the p2p2e pointers */
  for ( i = 0; i < x; i++ ) {
    p2p2e[ i ] = &p2e[ y * i ];
  }
  
  return p2p2e;
}


//! Free a 3d array allocated by cbt_mat3d_alloc.
//! @author     Ed Green 2003
//! @author     GEC (Cosmetic changes) 2003

int ***
cbt_mat3d_free( int *** mat2free ) {
  free( &mat2free[ 0 ][ 0 ][ 0 ] );
  free( &mat2free[ 0 ][ 0 ] );
  free( &mat2free[ 0 ] );
  return NULL;
}
