view lib/HR2_v1.04.cpp @ 2:23970530a518 draft

master branch Updating with tag :CI_COMMIT_TAG - - Fxx
author fgiacomoni
date Tue, 17 Jan 2023 10:31:32 +0000
parents 86296c048e46
children
line wrap: on
line source

/*

 HR2.C
 V1.04
 (Changes: meb)

 A program to calculate elemental compositions for a given mass.
 See the file README for details.

--------------------------------------------------------------------
 Copyright (c) 2001...2005 Joerg Hau <joerg.hau(at)dplanet.ch>.

 mail: joerg.hau@dplanet.ch
 www:  http://www.mysunrise.ch/users/joerg.hau/

 *changed version by Tobias Kind (TK), 2006 , Fiehnlab,
 *added extended valencies, added implementation of
  seven golden rules of molecular formula filtering
 

 This program is free software; you can redistribute it and/or
 modify it under the terms of version 2 of the GNU General Public
 License as published by the Free Software Foundation. See the
 file LICENSE for details.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
--------------------------------------------------------------------

 Creation:	somewhere in 1992 by JHa.
 Revision:  2001-04-18, GPL'd, first public release (JHa)
            2001-04-21, improved help text (JHa)
            2002-06-27, added sodium (JHa)
            2002-10-09, added 15N (JHa)
            2005-02-25, added -v option; license now GPL *v2* (JHa)
            2005-02-27, optimised code in calc loop (JHa)
            2005-02-28, verified and updated atomic masses (JHa)
            2005-06-17, added GPL text when "-h" is used (JHa)
			2006-01-01, extended version for BMC Bioinformatics publication - HR2 (TK)
			2006-03-03, added element ratio checks, extended valencies, only even electrons - HR2 (TK)
			2006-09-09,	1000x-10000x speedup hand optimized hehe. - HR2 (TK)
						-->special version for CHNSOP-F-Cl-Br-Si 
			2009-05-28, David Enot introduced the concept of 'Adducts' (nadd) for MZedDB and corrected 
						some inaccuracies in April 2008. Manfred Beckmann has now corrected the use of 
						'nadd' and 'charge' by calculating 'nadd' from 'charge' using abscharge = abs(charge)
						(did it manually because I couldn't find 'abs') to correct 'measured_mass' and limits, 
						but keeping nadd = 0 for rdb calculation of neutral MW (MB)
			2014-08-29, Marion Landi corrected the mass of atoms.

 This is ANSI C and should compile with any C compiler; use
 something along the lines of "gcc -Wall -O3 -o hr hr.c".
 "g++ -O2 -o myhr HR2.cpp"  on a Mac OS X G5 proc
 Optimize for speed, you may gain factor 3!
 NOW compiled under Visual C++ Express (faster than GCC) in C++ mode for boolean type.


 ---------------------------------------------------------------------
 Example arguments:
 1) -m 1 -t 100000 -C 1-100 -H 1-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10
 2) -m 500 -t 1 -C 50-100 -H 10-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10
 3) hr2-all-res -c "Hexaflumuron_459.982882Da_3ppm" -m 459.982882 -t 1.37995 -C 0-39 -H 0-98 -N 0-34 -O 0-30 -P 0-12 -S 0-12 -F 0-12 -L 0-14 -B 0-7 -I 0-0 
	945 formulas found in    253 seconds. (now 4 seconds, before eternal)
 4) hr2 -m 459.982882 -t 1.37995 -C 10-39 -H 28-98 -N 4-34 -O 0-30 -P 1-12 -S 1-12 -F 0-12 -L 1-14 -B 2-6 -I 0-0
	1 formula in 0 seconds (former eternal time)
 */

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <time.h>

/* Uncomment this for windows
#include <windows.h>
#include <process.h> 
*/
#include <math.h>


#define VERSION "20050617"	/* String ! */
#define TRUE 	1
#define FALSE 	0
#define MAXLEN  181          /* max. length of input string */

#define _CRT_SECURE_NO_DEPRECATE 1

typedef struct 	{
		const char *sym;	/* symbol */
		const char *symi;	/* non iso symbol */
		const double mass;	/* accurate mass */
		const float val;	/* to calculate unsaturations */
		const int key;		/* used for decoding cmd line */
		int min,		/* atom count min */
		    max,		/* atom count max */
		    cnt,		/* atom count actual */
		    save;		/* atom count old  - for loop exiting*/
		} Element;


/* --- atomic masses as published by IUPAC, 2002-10-02 ---------- */

Element el[]=
/* array of the elements used here:
   Symbol, exact mass, dbe, keycode, number, default-min, default-max
   dle: isoptopes are given by iC for 13C, iK for 41K etc....
*/
{
{ "C", "C",  12.000000000,   +2.0, 'C', 0, 100, 0,0 },
{ "iC", "C", 13.0033548378, +2.0, '1', 0, 0, 0 ,0 },
{ "H",  "H",   1.0078250321,  -1.0, 'H', 0, 200, 0 ,0},
{ "iH", "H",   2.0141017780,  -1.0, 'D', 0, 0, 0 ,0},
{ "N",  "N",  14.0030740052,  +1.0, 'N', 0, 40, 0,0 },          //org +1 = valence = 3: now +3 for valence = 5
{ "iN",  "N",  15.0001088984,   +1.0, 'M', 0, 0, 0,0 },
{ "O",  "O",  15.9949146221,   0.0, 'O', 0, 70, 0 ,0},
{ "F", "F",  18.99840322,    -1.0, 'F', 0, 0, 0 ,0},
{ "Na", "Na", 22.98976928,    -1.0, 'A', 0, 0, 0 ,0},
{ "Si", "Si", 27.9769265327,  +2.0, 'I', 0, 0, 0 ,0},
{ "P", "P",  30.97376163,    +3.0, 'P', 0, 10, 0 ,0},            //org +1 valence = 3: now +3 for valence = 5
{ "S", "S",  31.97207069,    +4.0, 'S', 0, 0, 0 ,0},            //org 0 = valence = 2; now +4 for valence = 6
{ "Cl", "Cl", 34.96885268,    -1.0, 'L', 0, 0, 0 ,0},
{ "iCl", "Cl", 36.96590259,    -1.0, 'E', 0, 0, 0 ,0},  // added dle
{ "Br", "Br", 78.9183371,     -1.0, 'B', 0, 0, 0 ,0},
{ "iBr", "Br", 80.9162906, -1.0, 'G', 0, 0, 0 ,0},         // added dle
{ "K", "K", 38.96370668,    -1.0, 'K', 0, 0, 0 ,0},             // added dle
{ "iK", "K", 40.96182576,    -1.0, 'J', 0, 0, 0 ,0},            // added dle
};

const double electron = 0.0005486;


/* --- global variables --- */

double  charge,		/* charge on the molecule */
        tol,		/* mass tolerance in mmu */
        abscharge,  /* abs(charge): to calculate 'measured_mass' and limits - meb */
        nadd;       /* nadd now calculated as abs(charge) - meb */
char    comment[MAXLEN]="";	/* some text ;-) */
int     single;		/* flag to indicate if we calculate only once and exit */
int     nr_el;		/* number of elements in array (above) */
bool	verbose;
bool	applygr;

/* --- some variables needed for reading the cmd line --- */

char   *optarg;		/* global: pointer to argument of current option */
static int optind = 1;	/* global: index of which argument is next. Is used
                as a global variable for collection of further
                arguments (= not options) via argv pointers. */


/* --- function prototypes ------------------- */

int     input(char *text, double *zahl);
int     readfile(char *whatfile);
double  calc_mass(void);
float   calc_rdb(void);
long     do_calculations(double mass, double tolerance);
int     clean (char *buf);
int     getopt(int argc, char *argv[], char *optionS);
/* you have to compile with C++ or define yourself this bool type (C99 compiler definition) */
bool calc_element_ratios(bool element_probability);

/* --- threading ------------------- */
/* mass and RDB calculation could be in several other threads
however the loop is very fast, context switching takes longer and
slows down the whole process. Single-Thread multiprocessor (cluster) implementation
seems superior here. */
/* Uncomment for windows
HANDLE hThread2,hThread3; 
unsigned threadID2,threadID3;
*/


/* --- main --- */

int main (int argc, char **argv)
{
double mz;	/* mass */
char buf[MAXLEN];
int i, tmp;

static char *id =
"hr version %s. Copyright (C) by Joerg Hau 2001...2005 & Tobias Kind 2006 :-).\n";

static char *disclaimer =
"\nThis program is free software; you can redistribute it and/or modify it under\n"
"the terms of version 2 of the GNU General Public License as published by the\n"
"Free Software Foundation.\n\n"
"This program is distributed in the hope that it will be useful, but WITHOUT ANY\n"
"WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\n"
"PARTICULAR PURPOSE. See the GNU General Public License for details.\n";

static char *msg =
"Calculates possible elemental compositions for a given mass.\n\n"
"usage: hr [options] file\n\nValid command line options are:\n"
"-h      This Help screen.\n"
"-v      Display version information.\n"
"-t tol  Set tolerance to 'tol' mmu (default 5).\n"
"-m mz   Set mass to 'mz'.\n"
"-s		 Print only the results (dle)"
"-c      Number of charges i.e -c 1 is equivalent to -p (dle).\n"
"-p      Positive ions; electron mass is removed from the formula.\n"
"-n      Negative ions; electron mass is added to the formula.\n"
"-g      Does not apply 4-7 golden rules (dle).\n"
"-a      Maximum num. of adducts.  (dle)\n"
"-X a-b  For element X, use atom range a to b. List of valid atoms:\n\n"
"           X    key   mass (6 decimals shown)\n"
"        -------------------------------------\n";

/* initialise variables */

nadd = 0.0; /* added dle: number of adducts*/
single = FALSE;			/* run continuously */
charge = 0.0;	       	 	/* default charge is neutral */
tol = 1.0;			/* default tolerance in mmu */
nr_el = sizeof(el)/sizeof(el[0]);	/* calculate array size */
verbose = TRUE;  /* added dle: should everything printed out?*/
applygr = TRUE;   /* added dle: should rules 4-7 applied?*/


/* decode and read the command line */

while ((tmp = getopt(argc, argv, "hvpnsgt:m:c:a:C:H:N:M:O:D:1:S:F:L:B:P:I:A:K:E:G:J:")) != EOF)
	switch (tmp)
		{
		case 'h':     	  		/* help me */
			printf (id, VERSION);
            printf (msg);
			for (i=0; i < nr_el; i++)
				printf ("        %4s     -%c %15.6lf\n",
				el[i].sym, el[i].key, el[i].mass);
			printf(disclaimer, "\n");
			return 0;
		case 'v':     	  		/* version */
			printf (id, VERSION);
			return 0;
		case 'p':    			/* positive charge */
			charge = +1.0;
			continue;
		case 's':    			/* simple output dle*/
			verbose = FALSE;
			continue;
		case 'n':			/* negative carge */
			charge = -1.0;
			continue;
		case 'g':			/* apply 7GR dle */
			applygr = false;
			continue;
		case 't':       		/* tolerance */
			strcpy(buf, optarg);
			sscanf(buf, "%lf", &tol);
			continue;
		case 'a':       		/* num of adducts dle */
			strcpy(buf, optarg);
			sscanf(buf, "%lf", &nadd);
			continue; 
		case 'm':			/* single mass */
			strcpy(buf, optarg);
		        sscanf(buf, "%lf", &mz);
		        single = TRUE;
	        	continue;
		case 'c':			/* comment for single mass */
			strcpy(buf, optarg);
			sscanf(buf, "%lf", &charge);
			continue;
		        continue;
		case 'C':      		/* C12 */
 		case 'H':      		/* 1H */
		case 'N':      		/* 14N */
		case 'M':      		/* 15N */
		case 'O':      		/* 16O */
 		case 'D':      		/* 2H */
 		case '1':      		/* 13C */
		case 'A':      		/* Na ('N' is taken for Nitrogen!) */
		case 'S':      		/* 32S */
		case 'F':      		/* 19F */
		case 'L':      		/* 35Cl ('C' is taken!) */
		case 'E':      		/* 37Cl (chloridE!) */
		case 'B':      		/* 79Br */
		case 'K':      		/* 39K */
		case 'J':      		/* 41K */
		case 'G':      		/* 81Br */
		case 'P':      		/* 31P */
		case 'I':      		/* 28Si ('S' is taken!) */
			i = 0;
			/* compare keys until found */
			while ((i < nr_el) && (el[i].key != tmp))
				i++;
			strcpy(buf, optarg);
			sscanf(buf, "%d-%d", &el[i].min, &el[i].max);	/* copy over */
			if (el[i].min > el[i].max)			/* swap them */
				{
				tmp = el[i].min;
				el[i].min = el[i].max;
				el[i].max = tmp;
				}

			/* printf ("\n %c = %c ... %s (%d-%d)", \
			tmp, el[i].key, el[i].sym, el[i].min, el[i].max); */
			continue;
		case '~':    	  	/* invalid arg */
		default:
			printf ("'%s -h' for help.\n", argv[0]);
			return 1;
		}

if (argv[optind] != NULL)	 /* remaining parameter on cmd line? */
	/* must be a file -- treat it line by line */
	return (readfile (argv[optind]));

if (single == TRUE)  	   	 	/* only one calculation requested? */
	do_calculations(mz, tol);       /* do it, then exit ... */
else
	{				/* otherwise run a loop */
	while (input(comment, &mz))
		{
		tmp = do_calculations(mz, tol);
		printf("\n");
		}
	}

return 0;
}


/***************************************************************************
* INPUT:	reads a dataset in "dialog mode".			   *
* Input: 	Pointer to comment text, pointer to mass.		   *
* Returns:	Result of sscanf(), 0 if prog is to be finished. 	   *
* Note:		This is a fairly primitive way to do it, but it works ;-)  *
****************************************************************************/
int input(char *txt, double *zahl)
{
char buf[MAXLEN];				/* input line */

*zahl = 0.0;				    /* reset */

printf("Comment             : ");	/* display prompt */
fgets(buf, MAXLEN-1, stdin);	/* read line */
buf[MAXLEN] = 0x0;				/* terminate string */
clean (buf);				    /* remove linefeed */
strcpy(txt, buf);			    /* copy text over */

printf("Mass (ENTER to quit): ");	/* display prompt */
fgets(buf, MAXLEN-1, stdin);       	/* read line */
buf[MAXLEN] = 0x0;			    /* terminate string */
if (!clean (buf))			    /* only a CR ? --> quit */
	return 0;
sscanf(buf,"%lf", zahl);		/* scan string */

return 1;
}


/***************************************************************************
* READFILE:	reads dataset from file.				   *
* Input: 	Pointer to comment text, pointer to mass.		   *
* Returns:	0 if OK, 1 if error.					   *
****************************************************************************/
int readfile(char *whatfile)
{
double mz;		/* measured mass */
char buf[MAXLEN];		/* input line */
FILE *infile;

infile = fopen(whatfile, "r");
if (NULL == infile)
	{
	fprintf (stderr, "Error: Cannot open %s.", whatfile);
	return 1;
	}

while (fgets(buf, MAXLEN-1, infile))
	{
	buf[MAXLEN] = 0x0;			/* terminate string */
	if (*buf == ';')		/* comment line */
		continue;
	if (!clean (buf))		/* only a CR ? --> quit */
		return 0;
	sscanf(buf,"%s %lf", comment, &mz);	/* scan string */
	do_calculations(mz, tol);
	mz = 0.0;				/* reset */
	}
return 0;
}


/************************************************************************
* CALC_MASS:	Calculates mass of an ion from its composition.	  	*
* Input: 	nothing (uses global variables) 	      		*
* Returns. 	mass of the ion.	  				*
* Note:		Takes care of charge and electron mass!   		*
* 		(Positive charge means removal of electrons).	 	*
*************************************************************************/
double calc_mass(void)
{
int i;
double sum = 0.0;

for (i=0; i < nr_el; i++)
	sum += el[i].mass * el[i].cnt;

return (sum - (charge * electron));
}


/************************************************************************
* CALC_RDB:	Calculates rings & double bond equivalents.    		*
* Input: 	nothing (uses global variables)			   	*
* Returns. 	RDB.				       			*
*************************************************************************/
float calc_rdb(void)
{
int i;
float sum = 2.0;

for (i=0; i < nr_el; i++)
	sum += el[i].val * el[i].cnt;

return (sum/2.0);
}
/************************************************************************
* Calculates element ratios , CH2 (more than 8 electrons needed is not handled)  		
* Calculations element probabilities if element_probability = true 
* Input: 	nothing (uses global variables)			   	
* Returns. true/false.				       			
*************************************************************************/
bool calc_element_ratios(bool element_probability)
{
bool CHNOPS_ok;	
float HC_ratio;
float NC_ratio;
float OC_ratio;
float PC_ratio;
float SC_ratio;

/* added the number of isotopes to the calculation - dle*/
float C_count = (float)el[0].cnt+(float)el[1].cnt; // C_count=12C+13C
float H_count = (float)el[2].cnt+(float)el[3].cnt;
float N_count = (float)el[4].cnt+(float)el[5].cnt;
/* modif end here */
float O_count = (float)el[6].cnt;
float P_count = (float)el[10].cnt;
float S_count = (float)el[11].cnt;


		/* ELEMENT RATIOS allowed
			MIN		MAX (99.99%)
		H/C	0.07	6.00
		N/C	0.00	4.00
		O/C	0.00	3.00
		P/C	0.00	2.00
		S/C	0.00	6.00
		*/	

	// set CHNOPS_ok = true and assume all ratios are ok
	CHNOPS_ok = true;
	/*element_probability = false;	 */
	
	
	if (C_count && H_count >0)					// C and H  must have one count anyway (remove for non-organics//
	{	
		HC_ratio = H_count/C_count;
		if (element_probability)
		{
			if ((HC_ratio <  0.2) || (HC_ratio >  3.0)) // this is the H/C probability check ;
			CHNOPS_ok = false;
		}
		else if (HC_ratio >  6.0) // this is the normal H/C ratio check - type cast from int to float is important
			CHNOPS_ok = false;
	}

	if (N_count >0)	// if positive number of nitrogens then thes N/C ratio else just calc normal
	{
		NC_ratio = N_count/C_count;
		if (element_probability)
		{
			if (NC_ratio >  2.0) // this is the N/C probability check ;
			CHNOPS_ok = false;
		}
		else if (NC_ratio >  4.0)
			CHNOPS_ok = false;
	}	
	
	if (O_count >0)	// if positive number of O then thes O/C ratio else just calc normal
	{	
		OC_ratio = O_count/C_count;
		if (element_probability)
		{
			if (OC_ratio >  1.2) // this is the O/C  probability check ;
			CHNOPS_ok = false;		
		}
		else if (OC_ratio >  3.0)
				CHNOPS_ok = false;
	}	

	if (P_count >0)	// if positive number of P then thes P/C ratio else just calc normal
	{	
		PC_ratio = 	P_count/C_count;
		if (element_probability)
		{
			if (PC_ratio >  0.32) // this is the P/C  probability check ;
			CHNOPS_ok = false;	
		
		}
		else if (PC_ratio >  6.0)
			CHNOPS_ok = false;
	}	

	if (S_count >0)	// if positive number of S then thes S/C ratio else just calc normal
	{	
		SC_ratio = 	S_count/C_count;
		if (element_probability)
		{
			if (SC_ratio >  0.65) // this is the S/C  probability check ;
			CHNOPS_ok = false;	
		}
		else if (SC_ratio >  2.0)
			CHNOPS_ok = false;
	}	

//-----------------------------------------------------------------------------	
		
	// check for multiple element ratios together with probability check 
	//if N<10, O<20, P<4, S<3 then true
	if (element_probability && (N_count > 10) && (O_count > 20) && (P_count > 4) && (S_count > 1))
		CHNOPS_ok = false;	
	
	// NOP check for multiple element ratios together with probability check
	// NOP all > 3 and (N<11, O <22, P<6 then true)
	if (element_probability && (N_count > 3) && (O_count > 3) && (P_count > 3))
		{
		if (element_probability && (N_count > 11) && (O_count > 22) && (P_count > 6))
			CHNOPS_ok = false;	
		}
	
	// OPS check for multiple element ratios together with probability check
	// O<14, P<3, S<3 then true
	if (element_probability && (O_count > 14) && (P_count > 3) && (S_count > 3))
		CHNOPS_ok = false;	

	// PSN check for multiple element ratios together with probability check
	// P<3, S<3, N<4 then true
	if (element_probability && (P_count > 3) && (S_count > 3) && (N_count >4))
		CHNOPS_ok = false;	

	
	// NOS check for multiple element ratios together with probability check
	// NOS all > 6 and (N<19 O<14 S<8 then true)
	if (element_probability && (N_count >6) && (O_count >6) && (S_count >6))
	{
		if (element_probability && (N_count >19) && (O_count >14) && (S_count >8))
			CHNOPS_ok = false;	
	}	

	// function return value;
	if (CHNOPS_ok == true)
		return true;
	else 
		return false;
}

/************************************************************************
* DO_CALCULATIONS: Does the actual calculation loop.			*
* Input: 	   measured mass (in amu), tolerance (in mmu)	    	*
* Returns. 	   number of hits.	       				*
*************************************************************************/
long do_calculations (double measured_mass, double tolerance)
{
time_t start, finish;
double elapsed_time;

double mass;			/* calc'd mass */
double limit_lo, limit_hi;	/* mass limits */
float rdb, lewis, rdbori;			/* Rings & double bonds */
long i,j;			
long long hit;		/* counts the hits, with long declaration, overflow after 25h with all formulas < 2000 Da
							    long = FFFFFFFFh = 4,294,967,295d*/
int niso;
long long counter;
bool elementcheck;
bool set_break; 


time( &start );		// start time

/* calculate limits */
/* correct m/z value 'measured_mass' for z>1 using 'abscharge'; 
keep charge = 0 for neutral mass searches (z=0);
additionally, keep the idea of 'adducts' for 'rdb', but use charge state instead -meb */
if (charge<0) {
	abscharge = -charge;
	nadd = abscharge;
} else if (charge>0) {
	abscharge = charge;
	nadd = abscharge;
} else if (charge==0) {
	abscharge = 1;			/* for limits */
	nadd = 0;				/* for rdb */
}

limit_lo = (measured_mass * abscharge) - (tolerance / 1000.0);
limit_hi = (measured_mass * abscharge) + (tolerance / 1000.0);

if(verbose) /* extended output as in original code - dle */
{
    if (strlen(comment))	/* print only if there is some text to print */
	   printf ("Text      \t%s\n", comment);

	printf("\n");		/* linefeed */
	printf ("Composition =  \t");
	for (i=0; i < nr_el; i++)
		if (el[i].max > 0)
			printf("%s:%d-%d ", el[i].sym, el[i].min, el[i].max);
	printf ("\n");
	
	printf ("Mass Tolerance [mmu]:  \t%.1f\n",tolerance);
	printf ("Measured Mass:  \t%.6lf\n", measured_mass);
	printf ("Charge:  \t%+.0lf\n", charge);  /* dle */
	printf ("Num. of Adducts/Losses:  \t%.0lf\n", nadd);  /* dle */
	printf ("Apply 7GR:  \t%d\n\n", applygr);  /* dle */
}

hit = 0;			/* Reset counter */
counter = 0;
set_break = false;	/* set breaker for element counts to false */

/* Now let's run the big big loop ... I'd like to do that
   recursively but did not yet figure out how ;-) 
   TK Adds: the loop is just fine.
*/

/* now comes the "COOL trick" for calculating all formulae:
sorting the high mass elements to the outer loops, the small weights (H)
to the inner loops;

This will reduce the computational time by factor ~10-60-1000
OLD HR: Cangrelor at 1ppm  4465 formulas found in   5866 seconds.
NEW HR2: Cangrelor at 1ppm 4465 formulas found in     96 seconds.
NEW2 HR2: Cangrelor at 1ppm 4465 formulas found in     60 seconds.
NEW3 HR2: Cangrelor at 1ppm 4465 formulas found in     59 seconds.
HR2 Fast: Cangrelor at 1ppm 4465 formulas found in     41 seconds by evaluating 2,003,436,894 formulae.
hr2 -c "Cangrelor" -m  774.948 -t 0.77 -C 1-64 -H 1-112 -N 0-30 -O 0-80 -P 0-12 -S 0-9 -F 0-10 -L 0-10

Another additional trick is to end the 2nd.. 3rd.. 4th.. xth innermost loop
to prevent loops which are just higher and higher in mass.
*/

 /*dle: process new elements */
el[17].cnt = el[17].min - 1;  el[17].save = el[17].cnt; 
while (el[17].cnt++ < el[17].max) /*"iK"*/{ 
	 
el[16].cnt = el[16].min - 1;  el[16].save = el[16].cnt; 
while (el[16].cnt++ < el[16].max) /*"K"*/{ 
	 
el[15].cnt = el[15].min - 1;  el[15].save = el[15].cnt; 
while (el[15].cnt++ < el[15].max) /* "iBr"*/ { 

el[14].cnt = el[14].min - 1;  el[14].save = el[14].cnt; 
while (el[14].cnt++ < el[14].max) /* "Br"*/ { 

el[13].cnt = el[13].min - 1;  el[13].save = el[13].cnt; 
while (el[13].cnt++ < el[13].max) /*"iCl"*/ { 

el[12].cnt = el[12].min - 1;  el[12].save = el[12].cnt; 
while (el[12].cnt++ < el[12].max) /*"Cl"*/ { 

el[11].cnt = el[11].min - 1;  el[11].save = el[11].cnt; 
while (el[11].cnt++ < el[11].max) /*"S"*/ { 
	 
el[10].cnt = el[10].min - 1;  el[10].save = el[10].cnt; 
while (el[10].cnt++ < el[10].max) /*"P"*/ { 
	 
el[9].cnt = el[9].min - 1;  el[9].save = el[9].cnt; 
while (el[9].cnt++ < el[9].max) /*"Si"*/ { 

el[8].cnt = el[8].min - 1;  el[8].save = el[8].cnt; 
while (el[8].cnt++ < el[8].max) /*"Na"*/{ 

el[7].cnt = el[7].min - 1;  el[7].save = el[7].cnt; 
while (el[7].cnt++ < el[7].max) /*"F"*/ { 
 
el[6].cnt = el[6].min - 1;  el[6].save = el[6].cnt; 
while (el[6].cnt++ < el[6].max) /*"O"*/ { 
	 
el[5].cnt = el[5].min - 1;  el[5].save = el[5].cnt; 
while (el[5].cnt++ < el[5].max) /*"15N"*/{ 

el[4].cnt = el[4].min - 1;  el[4].save = el[4].cnt; 
while (el[4].cnt++ < el[4].max) /*"N"*/{ 
	 
el[1].cnt = el[1].min - 1; el[1].save = el[1].cnt; 
while (el[1].cnt++ < el[1].max) /*"13C"*/ { 

el[0].cnt = el[0].min - 1; el[0].save = el[0].cnt; 
while (el[0].cnt++ < el[0].max) /* "C"*/ { 

el[3].cnt = el[3].min - 1; 	el[3].save = el[3].cnt; 
while (el[3].cnt++ < el[3].max) /*"D"*/{ 

el[2].cnt = el[2].min - 1; el[2].save = el[2].cnt; 
while (el[2].cnt++ < el[2].max) /*"H"*/{ 

	mass = calc_mass();
	counter++;

	//just for debug purposes
	//if (mass > limit_hi)  
	//printf("mass: %f\tC: %d  H: %d  N: %d O: %d P: %d S: %d Cl: %d Br: %d\n",mass,el[0].cnt,el[2].cnt,el[4].cnt,el[6].cnt,el[10].cnt,el[11].cnt,el[12].cnt,el[13].cnt);
    	
	/* if we exceed the upper limit, we can stop the calculation
       for this particular element (JHa 20050227). <-- comment TK that will only bust the innermost while loop, which is "H"*/

	// break H loop 
	if (mass > limit_hi)  break;

    //************************************************************************************************************/	
	//Calculus loop with print out
	//************************************************************************************************************/	

	if ((mass >= limit_lo) && (mass <= limit_hi)) /* within limits? */
	{	
		// element check will be performed always, if variable bool element_probability is true also probabilities will be calculated
		// not an elegant implementation, but fast.
			
		 elementcheck = calc_element_ratios(applygr);  /* pass applygr boolean by dle */
		   
		 if (elementcheck)
		{ 
		rdbori = calc_rdb();	/* get RDB */

		rdb = rdbori + 0.5*nadd; /* dle: if nadd addcuts */
		lewis = (float)(fmod(rdb, 1)); /*calc reminder*/
		if ((rdb >= 0) && (lewis != 0.5) && (lewis !=-0.5))/* less than -0.5 RDB does not make sense */
			{													/* NO(!) CH3F10NS2 exists , RDB =  -4.0   M= 282.9547*/
			hit ++;
			for (i = 0; i < nr_el; i++)			/* print composition */
			    if (el[i].cnt > 0)				/* but only if useful */
				  printf("%s%d.", el[i].sym, el[i].cnt);	// print formula
				  
			printf("\t");
	/* dle: print out a more explicit molecular formula for further processing and
	 variable niso counts number of isotope elements in the solution */
			niso=0;
			for (i = 0; i < nr_el; i++)
			{			/* print composition */
			    if (el[i].cnt > 0)
			    {				/* but only if useful */
				  printf("%s%d", el[i].symi, el[i].cnt);	// print formula
				  if (el[i].symi != el[i].sym)
				    niso=niso+el[i].cnt;
			    }
				
			}
    /* dle: end of molecular print out */
    
    /* dle: additionnaly print nadd and niso */
		    printf("\t%.2f\t%.7lf\t%.0lf\t%d\t%+.2lf\n", rdb, mass, nadd,niso, 1000.0 * ((measured_mass * abscharge) - mass));
				  
			}	/* end of 'rdb' loop */

		}	// end of elementcheck loop
	
	}	/* end of 'limit' loop */
	//************************************************************************************************************/
		

	/*
	TK: if the current mass is larger than the limit the loop can be exited.
	Each element must point to the element which is in use and before.
	This is a static implementation which can be enhanced with a pointer chain to the lower element.
	Actually now its only allowed for CHNSOP-Fl-Cl-Br-Si !!! Brute-force <> elegance :-)
	*/
		} /*"H"*/
		
		if ((mass >= limit_lo) && (el[2].save == el[2].cnt-1)) break;
		} /*"D"*/
		
		if ((mass >= limit_lo) && (el[3].save == el[3].cnt-1)) break;  /* dle addons */
		} /* "C"*/
		
		if ((mass >= limit_lo) && (el[0].save == el[0].cnt-1)) break;
		} /*"13C"*/

		if ((mass >= limit_lo) && (el[1].save == el[1].cnt-1)) break;  /* dle addons */
		} /*"N"*/
		
        if ((mass >= limit_lo) && (el[4].save == el[4].cnt-1)) break;
		} /*"15N"*/

        if ((mass >= limit_lo) && (el[5].save == el[5].cnt-1)) break;  /* dle addons */
		} /*"O"*/
		
	    if ((mass >= limit_lo) && (el[6].save == el[6].cnt-1)) break;
		} /*"F"*/
		
	    if ((mass >= limit_lo) && (el[7].save == el[7].cnt-1)) break;
		} /*"Na"*/
		
	    if ((mass >= limit_lo) && (el[8].save == el[8].cnt-1)) break;
		}  /*"Si"*/
		
		if ((mass >= limit_lo) && (el[9].save == el[9].cnt-1)) break;
		} /*"P"*/
		
		if ((mass >= limit_lo) && (el[10].save == el[10].cnt-1)) break;
		} /*"S"*/
		
		if ((mass >= limit_lo) && (el[11].save == el[11].cnt-1)) break; 
		} /*"Cl"*/
		
		if ((mass >= limit_lo) && (el[12].save == el[12].cnt-1)) break;
		} /*"iCl"*/
				
		if ((mass >= limit_lo) && (el[13].save == el[13].cnt-1)) break;  /* dle addons */
		} /*"Br"*/

		if ((mass >= limit_lo) && (el[14].save == el[14].cnt-1)) break;
		} /*"iBr"*/

		if ((mass >= limit_lo) && (el[15].save == el[15].cnt-1)) break;	 /* dle addons */
		} /*"K"*/

		if ((mass >= limit_lo) && (el[16].save == el[16].cnt-1)) break;	 /* dle addons */
		} /*"iK"*/

/* close that giant loop thing started above */


	

time(&finish);		// stop timer
elapsed_time = difftime(finish , start);	// calulate time difference

if(verbose){
	if (!hit)
		printf("No matching combination found in %6.0f seconds.\n", elapsed_time );
	else{
		printf("\n%llu formulas found in %6.0f seconds by evaluating %llu formulae.\n",hit,elapsed_time,counter);
		printf("RDBs are overloaded to maximum valence values (N=3,P=5,S=6).\n");
	}
}
return hit;
}


/************************************************************************
* CLEAN:	"cleans" a buffer obtained by fgets() 			*
* Input: 	Pointer to text buffer					*
* Returns:	strlen of buffer.   					*
*************************************************************************/
int clean (char *buf)
{
int i;

for(i = 0;  i < strlen(buf);  i++)		/* search for CR/LF */
	{
	if(buf[i] == '\n' || buf[i] == '\r')
		{
		buf[i] = 0;			/* stop at CR or LF */
		break;
		}
	}
return (strlen(buf));
}



/***************************************************************************
* GETOPT: Command line parser, system V style.
*
*  This routine is widely (and wildly) adapted from code that was
*  made available by Borland International Inc.
*
*  Standard option syntax is:
*
*    option ::= SW [optLetter]* [argLetter space* argument]
*
*  where
*    - SW is '-'
*    - there is no space before any optLetter or argLetter.
*    - opt/arg letters are alphabetic, not punctuation characters.
*    - optLetters, if present, must be matched in optionS.
*    - argLetters, if present, are found in optionS followed by ':'.
*    - argument is any white-space delimited string.  Note that it
*      can include the SW character.
*    - upper and lower case letters are distinct.
*
*  There may be multiple option clusters on a command line, each
*  beginning with a SW, but all must appear before any non-option
*  arguments (arguments not introduced by SW).  Opt/arg letters may
*  be repeated: it is up to the caller to decide if that is an error.
*
*  The character SW appearing alone as the last argument is an error.
*  The lead-in sequence SWSW ("--") causes itself and all the rest
*  of the line to be ignored (allowing non-options which begin
*  with the switch char).
*
*  The string *optionS allows valid opt/arg letters to be recognized.
*  argLetters are followed with ':'.  Getopt () returns the value of
*  the option character found, or EOF if no more options are in the
*  command line. If option is an argLetter then the global optarg is
*  set to point to the argument string (having skipped any white-space).
*
*  The global optind is initially 1 and is always left as the index
*  of the next argument of argv[] which getopt has not taken.  Note
*  that if "--" or "//" are used then optind is stepped to the next
*  argument before getopt() returns EOF.
*
*  If an error occurs, that is an SW char precedes an unknown letter,
*  then getopt() will return a '~' character and normally prints an
*  error message via perror().  If the global variable opterr is set
*  to false (zero) before calling getopt() then the error message is
*  not printed.
*
*  For example, if
*
*    *optionS == "A:F:PuU:wXZ:"
*
*  then 'P', 'u', 'w', and 'X' are option letters and 'A', 'F',
*  'U', 'Z' are followed by arguments. A valid command line may be:
*
*    aCommand  -uPFPi -X -A L someFile
*
*  where:
*    - 'u' and 'P' will be returned as isolated option letters.
*    - 'F' will return with "Pi" as its argument string.
*    - 'X' is an isolated option.
*    - 'A' will return with "L" as its argument.
*    - "someFile" is not an option, and terminates getOpt.  The
*      caller may collect remaining arguments using argv pointers.
***************************************************************************/
int getopt(int argc, char *argv[], char *optionS)
{
static char *letP	= NULL;		/* remember next option char's location */
static char SW		= '-';		/* switch character */

int opterr = 1;				/* allow error message	*/
unsigned char ch;
char *optP;

if (argc > optind)
	{
	if (letP == NULL)
		{
		if ((letP = argv[optind]) == NULL || *(letP++) != SW)
			goto gopEOF;

		if (*letP == SW)
			{
			optind++;
			goto gopEOF;
			}
		}
	if (0 == (ch = *(letP++)))
		{
		optind++;
		goto gopEOF;
		}
	if (':' == ch  ||  (optP = strchr(optionS, ch)) == NULL)
		goto gopError;
	if (':' == *(++optP))
		{
		optind++;
		if (0 == *letP)
			{
			if (argc <= optind)
				goto  gopError;
			letP = argv[optind++];
			}
		optarg = letP;
		letP = NULL;
	}
	else
	{
	if (0 == *letP)
		{
		optind++;
		letP = NULL;
		}
	optarg = NULL;
	}
	return ch;
}

gopEOF:
	optarg = letP = NULL;
	return EOF;

gopError:
	optarg = NULL;
	errno  = EINVAL;
	if (opterr)
		perror ("Command line option");
	return ('~');
}

/*
List of elements sorted according to mass
_______________________
INo#	El	Mass
2		H	1.007825032
3		D	2.014101778
0		C	12
1		13C	13.00335484
4		N	14.00307401
5		15N	15.0001089
6		O	15.99491462
7		F	18.9984032
8		Na	22.98976967
9		Si	27.97692653
10		P	30.97376151
11		S	31.97207069
12		Cl	34.96885271
13		Br	78.9183376
------------------------
*/