Mercurial > repos > fgiacomoni > hr2
view lib/HR2_v1.04.cpp @ 3:78afd7f439f3 draft default tip
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
author | fgiacomoni |
---|---|
date | Wed, 15 Feb 2023 15:57:49 +0000 |
parents | 86296c048e46 |
children |
line wrap: on
line source
/* HR2.C V1.04 (Changes: meb) A program to calculate elemental compositions for a given mass. See the file README for details. -------------------------------------------------------------------- Copyright (c) 2001...2005 Joerg Hau <joerg.hau(at)dplanet.ch>. mail: joerg.hau@dplanet.ch www: http://www.mysunrise.ch/users/joerg.hau/ *changed version by Tobias Kind (TK), 2006 , Fiehnlab, *added extended valencies, added implementation of seven golden rules of molecular formula filtering This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. See the file LICENSE for details. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -------------------------------------------------------------------- Creation: somewhere in 1992 by JHa. Revision: 2001-04-18, GPL'd, first public release (JHa) 2001-04-21, improved help text (JHa) 2002-06-27, added sodium (JHa) 2002-10-09, added 15N (JHa) 2005-02-25, added -v option; license now GPL *v2* (JHa) 2005-02-27, optimised code in calc loop (JHa) 2005-02-28, verified and updated atomic masses (JHa) 2005-06-17, added GPL text when "-h" is used (JHa) 2006-01-01, extended version for BMC Bioinformatics publication - HR2 (TK) 2006-03-03, added element ratio checks, extended valencies, only even electrons - HR2 (TK) 2006-09-09, 1000x-10000x speedup hand optimized hehe. - HR2 (TK) -->special version for CHNSOP-F-Cl-Br-Si 2009-05-28, David Enot introduced the concept of 'Adducts' (nadd) for MZedDB and corrected some inaccuracies in April 2008. Manfred Beckmann has now corrected the use of 'nadd' and 'charge' by calculating 'nadd' from 'charge' using abscharge = abs(charge) (did it manually because I couldn't find 'abs') to correct 'measured_mass' and limits, but keeping nadd = 0 for rdb calculation of neutral MW (MB) 2014-08-29, Marion Landi corrected the mass of atoms. This is ANSI C and should compile with any C compiler; use something along the lines of "gcc -Wall -O3 -o hr hr.c". "g++ -O2 -o myhr HR2.cpp" on a Mac OS X G5 proc Optimize for speed, you may gain factor 3! NOW compiled under Visual C++ Express (faster than GCC) in C++ mode for boolean type. --------------------------------------------------------------------- Example arguments: 1) -m 1 -t 100000 -C 1-100 -H 1-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10 2) -m 500 -t 1 -C 50-100 -H 10-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10 3) hr2-all-res -c "Hexaflumuron_459.982882Da_3ppm" -m 459.982882 -t 1.37995 -C 0-39 -H 0-98 -N 0-34 -O 0-30 -P 0-12 -S 0-12 -F 0-12 -L 0-14 -B 0-7 -I 0-0 945 formulas found in 253 seconds. (now 4 seconds, before eternal) 4) hr2 -m 459.982882 -t 1.37995 -C 10-39 -H 28-98 -N 4-34 -O 0-30 -P 1-12 -S 1-12 -F 0-12 -L 1-14 -B 2-6 -I 0-0 1 formula in 0 seconds (former eternal time) */ #include <stdio.h> #include <string.h> #include <errno.h> #include <time.h> /* Uncomment this for windows #include <windows.h> #include <process.h> */ #include <math.h> #define VERSION "20050617" /* String ! */ #define TRUE 1 #define FALSE 0 #define MAXLEN 181 /* max. length of input string */ #define _CRT_SECURE_NO_DEPRECATE 1 typedef struct { const char *sym; /* symbol */ const char *symi; /* non iso symbol */ const double mass; /* accurate mass */ const float val; /* to calculate unsaturations */ const int key; /* used for decoding cmd line */ int min, /* atom count min */ max, /* atom count max */ cnt, /* atom count actual */ save; /* atom count old - for loop exiting*/ } Element; /* --- atomic masses as published by IUPAC, 2002-10-02 ---------- */ Element el[]= /* array of the elements used here: Symbol, exact mass, dbe, keycode, number, default-min, default-max dle: isoptopes are given by iC for 13C, iK for 41K etc.... */ { { "C", "C", 12.000000000, +2.0, 'C', 0, 100, 0,0 }, { "iC", "C", 13.0033548378, +2.0, '1', 0, 0, 0 ,0 }, { "H", "H", 1.0078250321, -1.0, 'H', 0, 200, 0 ,0}, { "iH", "H", 2.0141017780, -1.0, 'D', 0, 0, 0 ,0}, { "N", "N", 14.0030740052, +1.0, 'N', 0, 40, 0,0 }, //org +1 = valence = 3: now +3 for valence = 5 { "iN", "N", 15.0001088984, +1.0, 'M', 0, 0, 0,0 }, { "O", "O", 15.9949146221, 0.0, 'O', 0, 70, 0 ,0}, { "F", "F", 18.99840322, -1.0, 'F', 0, 0, 0 ,0}, { "Na", "Na", 22.98976928, -1.0, 'A', 0, 0, 0 ,0}, { "Si", "Si", 27.9769265327, +2.0, 'I', 0, 0, 0 ,0}, { "P", "P", 30.97376163, +3.0, 'P', 0, 10, 0 ,0}, //org +1 valence = 3: now +3 for valence = 5 { "S", "S", 31.97207069, +4.0, 'S', 0, 0, 0 ,0}, //org 0 = valence = 2; now +4 for valence = 6 { "Cl", "Cl", 34.96885268, -1.0, 'L', 0, 0, 0 ,0}, { "iCl", "Cl", 36.96590259, -1.0, 'E', 0, 0, 0 ,0}, // added dle { "Br", "Br", 78.9183371, -1.0, 'B', 0, 0, 0 ,0}, { "iBr", "Br", 80.9162906, -1.0, 'G', 0, 0, 0 ,0}, // added dle { "K", "K", 38.96370668, -1.0, 'K', 0, 0, 0 ,0}, // added dle { "iK", "K", 40.96182576, -1.0, 'J', 0, 0, 0 ,0}, // added dle }; const double electron = 0.0005486; /* --- global variables --- */ double charge, /* charge on the molecule */ tol, /* mass tolerance in mmu */ abscharge, /* abs(charge): to calculate 'measured_mass' and limits - meb */ nadd; /* nadd now calculated as abs(charge) - meb */ char comment[MAXLEN]=""; /* some text ;-) */ int single; /* flag to indicate if we calculate only once and exit */ int nr_el; /* number of elements in array (above) */ bool verbose; bool applygr; /* --- some variables needed for reading the cmd line --- */ char *optarg; /* global: pointer to argument of current option */ static int optind = 1; /* global: index of which argument is next. Is used as a global variable for collection of further arguments (= not options) via argv pointers. */ /* --- function prototypes ------------------- */ int input(char *text, double *zahl); int readfile(char *whatfile); double calc_mass(void); float calc_rdb(void); long do_calculations(double mass, double tolerance); int clean (char *buf); int getopt(int argc, char *argv[], char *optionS); /* you have to compile with C++ or define yourself this bool type (C99 compiler definition) */ bool calc_element_ratios(bool element_probability); /* --- threading ------------------- */ /* mass and RDB calculation could be in several other threads however the loop is very fast, context switching takes longer and slows down the whole process. Single-Thread multiprocessor (cluster) implementation seems superior here. */ /* Uncomment for windows HANDLE hThread2,hThread3; unsigned threadID2,threadID3; */ /* --- main --- */ int main (int argc, char **argv) { double mz; /* mass */ char buf[MAXLEN]; int i, tmp; static char *id = "hr version %s. Copyright (C) by Joerg Hau 2001...2005 & Tobias Kind 2006 :-).\n"; static char *disclaimer = "\nThis program is free software; you can redistribute it and/or modify it under\n" "the terms of version 2 of the GNU General Public License as published by the\n" "Free Software Foundation.\n\n" "This program is distributed in the hope that it will be useful, but WITHOUT ANY\n" "WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\n" "PARTICULAR PURPOSE. See the GNU General Public License for details.\n"; static char *msg = "Calculates possible elemental compositions for a given mass.\n\n" "usage: hr [options] file\n\nValid command line options are:\n" "-h This Help screen.\n" "-v Display version information.\n" "-t tol Set tolerance to 'tol' mmu (default 5).\n" "-m mz Set mass to 'mz'.\n" "-s Print only the results (dle)" "-c Number of charges i.e -c 1 is equivalent to -p (dle).\n" "-p Positive ions; electron mass is removed from the formula.\n" "-n Negative ions; electron mass is added to the formula.\n" "-g Does not apply 4-7 golden rules (dle).\n" "-a Maximum num. of adducts. (dle)\n" "-X a-b For element X, use atom range a to b. List of valid atoms:\n\n" " X key mass (6 decimals shown)\n" " -------------------------------------\n"; /* initialise variables */ nadd = 0.0; /* added dle: number of adducts*/ single = FALSE; /* run continuously */ charge = 0.0; /* default charge is neutral */ tol = 1.0; /* default tolerance in mmu */ nr_el = sizeof(el)/sizeof(el[0]); /* calculate array size */ verbose = TRUE; /* added dle: should everything printed out?*/ applygr = TRUE; /* added dle: should rules 4-7 applied?*/ /* decode and read the command line */ while ((tmp = getopt(argc, argv, "hvpnsgt:m:c:a:C:H:N:M:O:D:1:S:F:L:B:P:I:A:K:E:G:J:")) != EOF) switch (tmp) { case 'h': /* help me */ printf (id, VERSION); printf (msg); for (i=0; i < nr_el; i++) printf (" %4s -%c %15.6lf\n", el[i].sym, el[i].key, el[i].mass); printf(disclaimer, "\n"); return 0; case 'v': /* version */ printf (id, VERSION); return 0; case 'p': /* positive charge */ charge = +1.0; continue; case 's': /* simple output dle*/ verbose = FALSE; continue; case 'n': /* negative carge */ charge = -1.0; continue; case 'g': /* apply 7GR dle */ applygr = false; continue; case 't': /* tolerance */ strcpy(buf, optarg); sscanf(buf, "%lf", &tol); continue; case 'a': /* num of adducts dle */ strcpy(buf, optarg); sscanf(buf, "%lf", &nadd); continue; case 'm': /* single mass */ strcpy(buf, optarg); sscanf(buf, "%lf", &mz); single = TRUE; continue; case 'c': /* comment for single mass */ strcpy(buf, optarg); sscanf(buf, "%lf", &charge); continue; continue; case 'C': /* C12 */ case 'H': /* 1H */ case 'N': /* 14N */ case 'M': /* 15N */ case 'O': /* 16O */ case 'D': /* 2H */ case '1': /* 13C */ case 'A': /* Na ('N' is taken for Nitrogen!) */ case 'S': /* 32S */ case 'F': /* 19F */ case 'L': /* 35Cl ('C' is taken!) */ case 'E': /* 37Cl (chloridE!) */ case 'B': /* 79Br */ case 'K': /* 39K */ case 'J': /* 41K */ case 'G': /* 81Br */ case 'P': /* 31P */ case 'I': /* 28Si ('S' is taken!) */ i = 0; /* compare keys until found */ while ((i < nr_el) && (el[i].key != tmp)) i++; strcpy(buf, optarg); sscanf(buf, "%d-%d", &el[i].min, &el[i].max); /* copy over */ if (el[i].min > el[i].max) /* swap them */ { tmp = el[i].min; el[i].min = el[i].max; el[i].max = tmp; } /* printf ("\n %c = %c ... %s (%d-%d)", \ tmp, el[i].key, el[i].sym, el[i].min, el[i].max); */ continue; case '~': /* invalid arg */ default: printf ("'%s -h' for help.\n", argv[0]); return 1; } if (argv[optind] != NULL) /* remaining parameter on cmd line? */ /* must be a file -- treat it line by line */ return (readfile (argv[optind])); if (single == TRUE) /* only one calculation requested? */ do_calculations(mz, tol); /* do it, then exit ... */ else { /* otherwise run a loop */ while (input(comment, &mz)) { tmp = do_calculations(mz, tol); printf("\n"); } } return 0; } /*************************************************************************** * INPUT: reads a dataset in "dialog mode". * * Input: Pointer to comment text, pointer to mass. * * Returns: Result of sscanf(), 0 if prog is to be finished. * * Note: This is a fairly primitive way to do it, but it works ;-) * ****************************************************************************/ int input(char *txt, double *zahl) { char buf[MAXLEN]; /* input line */ *zahl = 0.0; /* reset */ printf("Comment : "); /* display prompt */ fgets(buf, MAXLEN-1, stdin); /* read line */ buf[MAXLEN] = 0x0; /* terminate string */ clean (buf); /* remove linefeed */ strcpy(txt, buf); /* copy text over */ printf("Mass (ENTER to quit): "); /* display prompt */ fgets(buf, MAXLEN-1, stdin); /* read line */ buf[MAXLEN] = 0x0; /* terminate string */ if (!clean (buf)) /* only a CR ? --> quit */ return 0; sscanf(buf,"%lf", zahl); /* scan string */ return 1; } /*************************************************************************** * READFILE: reads dataset from file. * * Input: Pointer to comment text, pointer to mass. * * Returns: 0 if OK, 1 if error. * ****************************************************************************/ int readfile(char *whatfile) { double mz; /* measured mass */ char buf[MAXLEN]; /* input line */ FILE *infile; infile = fopen(whatfile, "r"); if (NULL == infile) { fprintf (stderr, "Error: Cannot open %s.", whatfile); return 1; } while (fgets(buf, MAXLEN-1, infile)) { buf[MAXLEN] = 0x0; /* terminate string */ if (*buf == ';') /* comment line */ continue; if (!clean (buf)) /* only a CR ? --> quit */ return 0; sscanf(buf,"%s %lf", comment, &mz); /* scan string */ do_calculations(mz, tol); mz = 0.0; /* reset */ } return 0; } /************************************************************************ * CALC_MASS: Calculates mass of an ion from its composition. * * Input: nothing (uses global variables) * * Returns. mass of the ion. * * Note: Takes care of charge and electron mass! * * (Positive charge means removal of electrons). * *************************************************************************/ double calc_mass(void) { int i; double sum = 0.0; for (i=0; i < nr_el; i++) sum += el[i].mass * el[i].cnt; return (sum - (charge * electron)); } /************************************************************************ * CALC_RDB: Calculates rings & double bond equivalents. * * Input: nothing (uses global variables) * * Returns. RDB. * *************************************************************************/ float calc_rdb(void) { int i; float sum = 2.0; for (i=0; i < nr_el; i++) sum += el[i].val * el[i].cnt; return (sum/2.0); } /************************************************************************ * Calculates element ratios , CH2 (more than 8 electrons needed is not handled) * Calculations element probabilities if element_probability = true * Input: nothing (uses global variables) * Returns. true/false. *************************************************************************/ bool calc_element_ratios(bool element_probability) { bool CHNOPS_ok; float HC_ratio; float NC_ratio; float OC_ratio; float PC_ratio; float SC_ratio; /* added the number of isotopes to the calculation - dle*/ float C_count = (float)el[0].cnt+(float)el[1].cnt; // C_count=12C+13C float H_count = (float)el[2].cnt+(float)el[3].cnt; float N_count = (float)el[4].cnt+(float)el[5].cnt; /* modif end here */ float O_count = (float)el[6].cnt; float P_count = (float)el[10].cnt; float S_count = (float)el[11].cnt; /* ELEMENT RATIOS allowed MIN MAX (99.99%) H/C 0.07 6.00 N/C 0.00 4.00 O/C 0.00 3.00 P/C 0.00 2.00 S/C 0.00 6.00 */ // set CHNOPS_ok = true and assume all ratios are ok CHNOPS_ok = true; /*element_probability = false; */ if (C_count && H_count >0) // C and H must have one count anyway (remove for non-organics// { HC_ratio = H_count/C_count; if (element_probability) { if ((HC_ratio < 0.2) || (HC_ratio > 3.0)) // this is the H/C probability check ; CHNOPS_ok = false; } else if (HC_ratio > 6.0) // this is the normal H/C ratio check - type cast from int to float is important CHNOPS_ok = false; } if (N_count >0) // if positive number of nitrogens then thes N/C ratio else just calc normal { NC_ratio = N_count/C_count; if (element_probability) { if (NC_ratio > 2.0) // this is the N/C probability check ; CHNOPS_ok = false; } else if (NC_ratio > 4.0) CHNOPS_ok = false; } if (O_count >0) // if positive number of O then thes O/C ratio else just calc normal { OC_ratio = O_count/C_count; if (element_probability) { if (OC_ratio > 1.2) // this is the O/C probability check ; CHNOPS_ok = false; } else if (OC_ratio > 3.0) CHNOPS_ok = false; } if (P_count >0) // if positive number of P then thes P/C ratio else just calc normal { PC_ratio = P_count/C_count; if (element_probability) { if (PC_ratio > 0.32) // this is the P/C probability check ; CHNOPS_ok = false; } else if (PC_ratio > 6.0) CHNOPS_ok = false; } if (S_count >0) // if positive number of S then thes S/C ratio else just calc normal { SC_ratio = S_count/C_count; if (element_probability) { if (SC_ratio > 0.65) // this is the S/C probability check ; CHNOPS_ok = false; } else if (SC_ratio > 2.0) CHNOPS_ok = false; } //----------------------------------------------------------------------------- // check for multiple element ratios together with probability check //if N<10, O<20, P<4, S<3 then true if (element_probability && (N_count > 10) && (O_count > 20) && (P_count > 4) && (S_count > 1)) CHNOPS_ok = false; // NOP check for multiple element ratios together with probability check // NOP all > 3 and (N<11, O <22, P<6 then true) if (element_probability && (N_count > 3) && (O_count > 3) && (P_count > 3)) { if (element_probability && (N_count > 11) && (O_count > 22) && (P_count > 6)) CHNOPS_ok = false; } // OPS check for multiple element ratios together with probability check // O<14, P<3, S<3 then true if (element_probability && (O_count > 14) && (P_count > 3) && (S_count > 3)) CHNOPS_ok = false; // PSN check for multiple element ratios together with probability check // P<3, S<3, N<4 then true if (element_probability && (P_count > 3) && (S_count > 3) && (N_count >4)) CHNOPS_ok = false; // NOS check for multiple element ratios together with probability check // NOS all > 6 and (N<19 O<14 S<8 then true) if (element_probability && (N_count >6) && (O_count >6) && (S_count >6)) { if (element_probability && (N_count >19) && (O_count >14) && (S_count >8)) CHNOPS_ok = false; } // function return value; if (CHNOPS_ok == true) return true; else return false; } /************************************************************************ * DO_CALCULATIONS: Does the actual calculation loop. * * Input: measured mass (in amu), tolerance (in mmu) * * Returns. number of hits. * *************************************************************************/ long do_calculations (double measured_mass, double tolerance) { time_t start, finish; double elapsed_time; double mass; /* calc'd mass */ double limit_lo, limit_hi; /* mass limits */ float rdb, lewis, rdbori; /* Rings & double bonds */ long i,j; long long hit; /* counts the hits, with long declaration, overflow after 25h with all formulas < 2000 Da long = FFFFFFFFh = 4,294,967,295d*/ int niso; long long counter; bool elementcheck; bool set_break; time( &start ); // start time /* calculate limits */ /* correct m/z value 'measured_mass' for z>1 using 'abscharge'; keep charge = 0 for neutral mass searches (z=0); additionally, keep the idea of 'adducts' for 'rdb', but use charge state instead -meb */ if (charge<0) { abscharge = -charge; nadd = abscharge; } else if (charge>0) { abscharge = charge; nadd = abscharge; } else if (charge==0) { abscharge = 1; /* for limits */ nadd = 0; /* for rdb */ } limit_lo = (measured_mass * abscharge) - (tolerance / 1000.0); limit_hi = (measured_mass * abscharge) + (tolerance / 1000.0); if(verbose) /* extended output as in original code - dle */ { if (strlen(comment)) /* print only if there is some text to print */ printf ("Text \t%s\n", comment); printf("\n"); /* linefeed */ printf ("Composition = \t"); for (i=0; i < nr_el; i++) if (el[i].max > 0) printf("%s:%d-%d ", el[i].sym, el[i].min, el[i].max); printf ("\n"); printf ("Mass Tolerance [mmu]: \t%.1f\n",tolerance); printf ("Measured Mass: \t%.6lf\n", measured_mass); printf ("Charge: \t%+.0lf\n", charge); /* dle */ printf ("Num. of Adducts/Losses: \t%.0lf\n", nadd); /* dle */ printf ("Apply 7GR: \t%d\n\n", applygr); /* dle */ } hit = 0; /* Reset counter */ counter = 0; set_break = false; /* set breaker for element counts to false */ /* Now let's run the big big loop ... I'd like to do that recursively but did not yet figure out how ;-) TK Adds: the loop is just fine. */ /* now comes the "COOL trick" for calculating all formulae: sorting the high mass elements to the outer loops, the small weights (H) to the inner loops; This will reduce the computational time by factor ~10-60-1000 OLD HR: Cangrelor at 1ppm 4465 formulas found in 5866 seconds. NEW HR2: Cangrelor at 1ppm 4465 formulas found in 96 seconds. NEW2 HR2: Cangrelor at 1ppm 4465 formulas found in 60 seconds. NEW3 HR2: Cangrelor at 1ppm 4465 formulas found in 59 seconds. HR2 Fast: Cangrelor at 1ppm 4465 formulas found in 41 seconds by evaluating 2,003,436,894 formulae. hr2 -c "Cangrelor" -m 774.948 -t 0.77 -C 1-64 -H 1-112 -N 0-30 -O 0-80 -P 0-12 -S 0-9 -F 0-10 -L 0-10 Another additional trick is to end the 2nd.. 3rd.. 4th.. xth innermost loop to prevent loops which are just higher and higher in mass. */ /*dle: process new elements */ el[17].cnt = el[17].min - 1; el[17].save = el[17].cnt; while (el[17].cnt++ < el[17].max) /*"iK"*/{ el[16].cnt = el[16].min - 1; el[16].save = el[16].cnt; while (el[16].cnt++ < el[16].max) /*"K"*/{ el[15].cnt = el[15].min - 1; el[15].save = el[15].cnt; while (el[15].cnt++ < el[15].max) /* "iBr"*/ { el[14].cnt = el[14].min - 1; el[14].save = el[14].cnt; while (el[14].cnt++ < el[14].max) /* "Br"*/ { el[13].cnt = el[13].min - 1; el[13].save = el[13].cnt; while (el[13].cnt++ < el[13].max) /*"iCl"*/ { el[12].cnt = el[12].min - 1; el[12].save = el[12].cnt; while (el[12].cnt++ < el[12].max) /*"Cl"*/ { el[11].cnt = el[11].min - 1; el[11].save = el[11].cnt; while (el[11].cnt++ < el[11].max) /*"S"*/ { el[10].cnt = el[10].min - 1; el[10].save = el[10].cnt; while (el[10].cnt++ < el[10].max) /*"P"*/ { el[9].cnt = el[9].min - 1; el[9].save = el[9].cnt; while (el[9].cnt++ < el[9].max) /*"Si"*/ { el[8].cnt = el[8].min - 1; el[8].save = el[8].cnt; while (el[8].cnt++ < el[8].max) /*"Na"*/{ el[7].cnt = el[7].min - 1; el[7].save = el[7].cnt; while (el[7].cnt++ < el[7].max) /*"F"*/ { el[6].cnt = el[6].min - 1; el[6].save = el[6].cnt; while (el[6].cnt++ < el[6].max) /*"O"*/ { el[5].cnt = el[5].min - 1; el[5].save = el[5].cnt; while (el[5].cnt++ < el[5].max) /*"15N"*/{ el[4].cnt = el[4].min - 1; el[4].save = el[4].cnt; while (el[4].cnt++ < el[4].max) /*"N"*/{ el[1].cnt = el[1].min - 1; el[1].save = el[1].cnt; while (el[1].cnt++ < el[1].max) /*"13C"*/ { el[0].cnt = el[0].min - 1; el[0].save = el[0].cnt; while (el[0].cnt++ < el[0].max) /* "C"*/ { el[3].cnt = el[3].min - 1; el[3].save = el[3].cnt; while (el[3].cnt++ < el[3].max) /*"D"*/{ el[2].cnt = el[2].min - 1; el[2].save = el[2].cnt; while (el[2].cnt++ < el[2].max) /*"H"*/{ mass = calc_mass(); counter++; //just for debug purposes //if (mass > limit_hi) //printf("mass: %f\tC: %d H: %d N: %d O: %d P: %d S: %d Cl: %d Br: %d\n",mass,el[0].cnt,el[2].cnt,el[4].cnt,el[6].cnt,el[10].cnt,el[11].cnt,el[12].cnt,el[13].cnt); /* if we exceed the upper limit, we can stop the calculation for this particular element (JHa 20050227). <-- comment TK that will only bust the innermost while loop, which is "H"*/ // break H loop if (mass > limit_hi) break; //************************************************************************************************************/ //Calculus loop with print out //************************************************************************************************************/ if ((mass >= limit_lo) && (mass <= limit_hi)) /* within limits? */ { // element check will be performed always, if variable bool element_probability is true also probabilities will be calculated // not an elegant implementation, but fast. elementcheck = calc_element_ratios(applygr); /* pass applygr boolean by dle */ if (elementcheck) { rdbori = calc_rdb(); /* get RDB */ rdb = rdbori + 0.5*nadd; /* dle: if nadd addcuts */ lewis = (float)(fmod(rdb, 1)); /*calc reminder*/ if ((rdb >= 0) && (lewis != 0.5) && (lewis !=-0.5))/* less than -0.5 RDB does not make sense */ { /* NO(!) CH3F10NS2 exists , RDB = -4.0 M= 282.9547*/ hit ++; for (i = 0; i < nr_el; i++) /* print composition */ if (el[i].cnt > 0) /* but only if useful */ printf("%s%d.", el[i].sym, el[i].cnt); // print formula printf("\t"); /* dle: print out a more explicit molecular formula for further processing and variable niso counts number of isotope elements in the solution */ niso=0; for (i = 0; i < nr_el; i++) { /* print composition */ if (el[i].cnt > 0) { /* but only if useful */ printf("%s%d", el[i].symi, el[i].cnt); // print formula if (el[i].symi != el[i].sym) niso=niso+el[i].cnt; } } /* dle: end of molecular print out */ /* dle: additionnaly print nadd and niso */ printf("\t%.2f\t%.7lf\t%.0lf\t%d\t%+.2lf\n", rdb, mass, nadd,niso, 1000.0 * ((measured_mass * abscharge) - mass)); } /* end of 'rdb' loop */ } // end of elementcheck loop } /* end of 'limit' loop */ //************************************************************************************************************/ /* TK: if the current mass is larger than the limit the loop can be exited. Each element must point to the element which is in use and before. This is a static implementation which can be enhanced with a pointer chain to the lower element. Actually now its only allowed for CHNSOP-Fl-Cl-Br-Si !!! Brute-force <> elegance :-) */ } /*"H"*/ if ((mass >= limit_lo) && (el[2].save == el[2].cnt-1)) break; } /*"D"*/ if ((mass >= limit_lo) && (el[3].save == el[3].cnt-1)) break; /* dle addons */ } /* "C"*/ if ((mass >= limit_lo) && (el[0].save == el[0].cnt-1)) break; } /*"13C"*/ if ((mass >= limit_lo) && (el[1].save == el[1].cnt-1)) break; /* dle addons */ } /*"N"*/ if ((mass >= limit_lo) && (el[4].save == el[4].cnt-1)) break; } /*"15N"*/ if ((mass >= limit_lo) && (el[5].save == el[5].cnt-1)) break; /* dle addons */ } /*"O"*/ if ((mass >= limit_lo) && (el[6].save == el[6].cnt-1)) break; } /*"F"*/ if ((mass >= limit_lo) && (el[7].save == el[7].cnt-1)) break; } /*"Na"*/ if ((mass >= limit_lo) && (el[8].save == el[8].cnt-1)) break; } /*"Si"*/ if ((mass >= limit_lo) && (el[9].save == el[9].cnt-1)) break; } /*"P"*/ if ((mass >= limit_lo) && (el[10].save == el[10].cnt-1)) break; } /*"S"*/ if ((mass >= limit_lo) && (el[11].save == el[11].cnt-1)) break; } /*"Cl"*/ if ((mass >= limit_lo) && (el[12].save == el[12].cnt-1)) break; } /*"iCl"*/ if ((mass >= limit_lo) && (el[13].save == el[13].cnt-1)) break; /* dle addons */ } /*"Br"*/ if ((mass >= limit_lo) && (el[14].save == el[14].cnt-1)) break; } /*"iBr"*/ if ((mass >= limit_lo) && (el[15].save == el[15].cnt-1)) break; /* dle addons */ } /*"K"*/ if ((mass >= limit_lo) && (el[16].save == el[16].cnt-1)) break; /* dle addons */ } /*"iK"*/ /* close that giant loop thing started above */ time(&finish); // stop timer elapsed_time = difftime(finish , start); // calulate time difference if(verbose){ if (!hit) printf("No matching combination found in %6.0f seconds.\n", elapsed_time ); else{ printf("\n%llu formulas found in %6.0f seconds by evaluating %llu formulae.\n",hit,elapsed_time,counter); printf("RDBs are overloaded to maximum valence values (N=3,P=5,S=6).\n"); } } return hit; } /************************************************************************ * CLEAN: "cleans" a buffer obtained by fgets() * * Input: Pointer to text buffer * * Returns: strlen of buffer. * *************************************************************************/ int clean (char *buf) { int i; for(i = 0; i < strlen(buf); i++) /* search for CR/LF */ { if(buf[i] == '\n' || buf[i] == '\r') { buf[i] = 0; /* stop at CR or LF */ break; } } return (strlen(buf)); } /*************************************************************************** * GETOPT: Command line parser, system V style. * * This routine is widely (and wildly) adapted from code that was * made available by Borland International Inc. * * Standard option syntax is: * * option ::= SW [optLetter]* [argLetter space* argument] * * where * - SW is '-' * - there is no space before any optLetter or argLetter. * - opt/arg letters are alphabetic, not punctuation characters. * - optLetters, if present, must be matched in optionS. * - argLetters, if present, are found in optionS followed by ':'. * - argument is any white-space delimited string. Note that it * can include the SW character. * - upper and lower case letters are distinct. * * There may be multiple option clusters on a command line, each * beginning with a SW, but all must appear before any non-option * arguments (arguments not introduced by SW). Opt/arg letters may * be repeated: it is up to the caller to decide if that is an error. * * The character SW appearing alone as the last argument is an error. * The lead-in sequence SWSW ("--") causes itself and all the rest * of the line to be ignored (allowing non-options which begin * with the switch char). * * The string *optionS allows valid opt/arg letters to be recognized. * argLetters are followed with ':'. Getopt () returns the value of * the option character found, or EOF if no more options are in the * command line. If option is an argLetter then the global optarg is * set to point to the argument string (having skipped any white-space). * * The global optind is initially 1 and is always left as the index * of the next argument of argv[] which getopt has not taken. Note * that if "--" or "//" are used then optind is stepped to the next * argument before getopt() returns EOF. * * If an error occurs, that is an SW char precedes an unknown letter, * then getopt() will return a '~' character and normally prints an * error message via perror(). If the global variable opterr is set * to false (zero) before calling getopt() then the error message is * not printed. * * For example, if * * *optionS == "A:F:PuU:wXZ:" * * then 'P', 'u', 'w', and 'X' are option letters and 'A', 'F', * 'U', 'Z' are followed by arguments. A valid command line may be: * * aCommand -uPFPi -X -A L someFile * * where: * - 'u' and 'P' will be returned as isolated option letters. * - 'F' will return with "Pi" as its argument string. * - 'X' is an isolated option. * - 'A' will return with "L" as its argument. * - "someFile" is not an option, and terminates getOpt. The * caller may collect remaining arguments using argv pointers. ***************************************************************************/ int getopt(int argc, char *argv[], char *optionS) { static char *letP = NULL; /* remember next option char's location */ static char SW = '-'; /* switch character */ int opterr = 1; /* allow error message */ unsigned char ch; char *optP; if (argc > optind) { if (letP == NULL) { if ((letP = argv[optind]) == NULL || *(letP++) != SW) goto gopEOF; if (*letP == SW) { optind++; goto gopEOF; } } if (0 == (ch = *(letP++))) { optind++; goto gopEOF; } if (':' == ch || (optP = strchr(optionS, ch)) == NULL) goto gopError; if (':' == *(++optP)) { optind++; if (0 == *letP) { if (argc <= optind) goto gopError; letP = argv[optind++]; } optarg = letP; letP = NULL; } else { if (0 == *letP) { optind++; letP = NULL; } optarg = NULL; } return ch; } gopEOF: optarg = letP = NULL; return EOF; gopError: optarg = NULL; errno = EINVAL; if (opterr) perror ("Command line option"); return ('~'); } /* List of elements sorted according to mass _______________________ INo# El Mass 2 H 1.007825032 3 D 2.014101778 0 C 12 1 13C 13.00335484 4 N 14.00307401 5 15N 15.0001089 6 O 15.99491462 7 F 18.9984032 8 Na 22.98976967 9 Si 27.97692653 10 P 30.97376151 11 S 31.97207069 12 Cl 34.96885271 13 Br 78.9183376 ------------------------ */