Previous changeset 0:ce5a8082bbb8 (2014-08-14) Next changeset 2:dfe9332138cf (2014-08-14) |
Commit message:
Deleted selected files |
removed:
bwa-0.7.9a/COPYING bwa-0.7.9a/ChangeLog bwa-0.7.9a/NEWS.md bwa-0.7.9a/QSufSort.c bwa-0.7.9a/QSufSort.h bwa-0.7.9a/README.md bwa-0.7.9a/bamlite.c bwa-0.7.9a/bamlite.h bwa-0.7.9a/bntseq.c bwa-0.7.9a/bntseq.h bwa-0.7.9a/bwa-helper.js bwa-0.7.9a/bwa.1 bwa-0.7.9a/bwa.c bwa-0.7.9a/bwa.h bwa-0.7.9a/bwape.c bwa-0.7.9a/bwase.c bwa-0.7.9a/bwase.h bwa-0.7.9a/bwaseqio.c bwa-0.7.9a/bwt.c bwa-0.7.9a/bwt.h bwa-0.7.9a/bwt_gen.c bwa-0.7.9a/bwt_lite.c bwa-0.7.9a/bwt_lite.h bwa-0.7.9a/bwtaln.c bwa-0.7.9a/bwtaln.h bwa-0.7.9a/bwtgap.c bwa-0.7.9a/bwtgap.h bwa-0.7.9a/bwtindex.c bwa-0.7.9a/bwtsw2.h bwa-0.7.9a/bwtsw2_aux.c bwa-0.7.9a/bwtsw2_chain.c bwa-0.7.9a/bwtsw2_core.c bwa-0.7.9a/bwtsw2_main.c bwa-0.7.9a/bwtsw2_pair.c bwa-0.7.9a/example.c bwa-0.7.9a/fastmap.c bwa-0.7.9a/is.c bwa-0.7.9a/kbtree.h bwa-0.7.9a/khash.h bwa-0.7.9a/kopen.c bwa-0.7.9a/kseq.h bwa-0.7.9a/ksort.h bwa-0.7.9a/kstring.c bwa-0.7.9a/kstring.h bwa-0.7.9a/ksw.c bwa-0.7.9a/ksw.h bwa-0.7.9a/kthread.c bwa-0.7.9a/kvec.h bwa-0.7.9a/main.c bwa-0.7.9a/malloc_wrap.c bwa-0.7.9a/malloc_wrap.h bwa-0.7.9a/pemerge.c bwa-0.7.9a/qualfa2fq.pl bwa-0.7.9a/utils.c bwa-0.7.9a/utils.h bwa-0.7.9a/xa2multi.pl |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/COPYING --- a/bwa-0.7.9a/COPYING Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,674 +0,0 @@\n- GNU GENERAL PUBLIC LICENSE\n- Version 3, 29 June 2007\n-\n- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n- Everyone is permitted to copy and distribute verbatim copies\n- of this license document, but changing it is not allowed.\n-\n- Preamble\n-\n- The GNU General Public License is a free, copyleft license for\n-software and other kinds of works.\n-\n- The licenses for most software and other practical works are designed\n-to take away your freedom to share and change the works. By contrast,\n-the GNU General Public License is intended to guarantee your freedom to\n-share and change all versions of a program--to make sure it remains free\n-software for all its users. We, the Free Software Foundation, use the\n-GNU General Public License for most of our software; it applies also to\n-any other work released this way by its authors. You can apply it to\n-your programs, too.\n-\n- When we speak of free software, we are referring to freedom, not\n-price. Our General Public Licenses are designed to make sure that you\n-have the freedom to distribute copies of free software (and charge for\n-them if you wish), that you receive source code or can get it if you\n-want it, that you can change the software or use pieces of it in new\n-free programs, and that you know you can do these things.\n-\n- To protect your rights, we need to prevent others from denying you\n-these rights or asking you to surrender the rights. Therefore, you have\n-certain responsibilities if you distribute copies of the software, or if\n-you modify it: responsibilities to respect the freedom of others.\n-\n- For example, if you distribute copies of such a program, whether\n-gratis or for a fee, you must pass on to the recipients the same\n-freedoms that you received. You must make sure that they, too, receive\n-or can get the source code. And you must show them these terms so they\n-know their rights.\n-\n- Developers that use the GNU GPL protect your rights with two steps:\n-(1) assert copyright on the software, and (2) offer you this License\n-giving you legal permission to copy, distribute and/or modify it.\n-\n- For the developers\' and authors\' protection, the GPL clearly explains\n-that there is no warranty for this free software. For both users\' and\n-authors\' sake, the GPL requires that modified versions be marked as\n-changed, so that their problems will not be attributed erroneously to\n-authors of previous versions.\n-\n- Some devices are designed to deny users access to install or run\n-modified versions of the software inside them, although the manufacturer\n-can do so. This is fundamentally incompatible with the aim of\n-protecting users\' freedom to change the software. The systematic\n-pattern of such abuse occurs in the area of products for individuals to\n-use, which is precisely where it is most unacceptable. Therefore, we\n-have designed this version of the GPL to prohibit the practice for those\n-products. If such problems arise substantially in other domains, we\n-stand ready to extend this provision to those domains in future versions\n-of the GPL, as needed to protect the freedom of users.\n-\n- Finally, every program is threatened constantly by software patents.\n-States should not allow patents to restrict development and use of\n-software on general-purpose computers, but in those that do, we wish to\n-avoid the special danger that patents applied to a free program could\n-make it effectively proprietary. To prevent this, the GPL assures that\n-patents cannot be used to render the program non-free.\n-\n- The precise terms and conditions for copying, distribution and\n-modification follow.\n-\n- TERMS AND CONDITIONS\n-\n- 0. Definitions.\n-\n- "This License" refers to version 3 of the GNU General Public License.\n-\n- "Copyright" also means copyright-like laws that apply to other kinds of\n-works, such as semiconductor masks.\n-\n- "The Program" refers to a'..b'THE PROGRAM\n-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n-\n- 16. Limitation of Liability.\n-\n- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n-SUCH DAMAGES.\n-\n- 17. Interpretation of Sections 15 and 16.\n-\n- If the disclaimer of warranty and limitation of liability provided\n-above cannot be given local legal effect according to their terms,\n-reviewing courts shall apply local law that most closely approximates\n-an absolute waiver of all civil liability in connection with the\n-Program, unless a warranty or assumption of liability accompanies a\n-copy of the Program in return for a fee.\n-\n- END OF TERMS AND CONDITIONS\n-\n- How to Apply These Terms to Your New Programs\n-\n- If you develop a new program, and you want it to be of the greatest\n-possible use to the public, the best way to achieve this is to make it\n-free software which everyone can redistribute and change under these terms.\n-\n- To do so, attach the following notices to the program. It is safest\n-to attach them to the start of each source file to most effectively\n-state the exclusion of warranty; and each file should have at least\n-the "copyright" line and a pointer to where the full notice is found.\n-\n- <one line to give the program\'s name and a brief idea of what it does.>\n- Copyright (C) <year> <name of author>\n-\n- This program is free software: you can redistribute it and/or modify\n- it under the terms of the GNU General Public License as published by\n- the Free Software Foundation, either version 3 of the License, or\n- (at your option) any later version.\n-\n- This program is distributed in the hope that it will be useful,\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n- GNU General Public License for more details.\n-\n- You should have received a copy of the GNU General Public License\n- along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\n-Also add information on how to contact you by electronic and paper mail.\n-\n- If the program does terminal interaction, make it output a short\n-notice like this when it starts in an interactive mode:\n-\n- <program> Copyright (C) <year> <name of author>\n- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n- This is free software, and you are welcome to redistribute it\n- under certain conditions; type `show c\' for details.\n-\n-The hypothetical commands `show w\' and `show c\' should show the appropriate\n-parts of the General Public License. Of course, your program\'s commands\n-might be different; for a GUI interface, you would use an "about box".\n-\n- You should also get your employer (if you work as a programmer) or school,\n-if any, to sign a "copyright disclaimer" for the program, if necessary.\n-For more information on this, and how to apply and follow the GNU GPL, see\n-<http://www.gnu.org/licenses/>.\n-\n- The GNU General Public License does not permit incorporating your program\n-into proprietary programs. If your program is a subroutine library, you\n-may consider it more useful to permit linking proprietary applications with\n-the library. If this is what you want to do, use the GNU Lesser General\n-Public License instead of this License. But first, please read\n-<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/ChangeLog --- a/bwa-0.7.9a/ChangeLog Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,3864 +0,0 @@\n-------------------------------------------------------------------------\n-r1605 | lh3 | 2010-12-29 20:20:20 -0500 (Wed, 29 Dec 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/bwtsw2_aux.c\n- M /branches/prog/bwa/main.c\n-\n- * bwa-0.5.9rc1-2 (r1605)\n- * fixed a typo/bug in bwasw\n-\n-------------------------------------------------------------------------\n-r1587 | lh3 | 2010-12-21 18:48:30 -0500 (Tue, 21 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/bwa.1\n-\n-a typo in the manual\n-\n-------------------------------------------------------------------------\n-r1586 | lh3 | 2010-12-21 18:47:48 -0500 (Tue, 21 Dec 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/bwape.c\n- M /branches/prog/bwa/bwase.c\n- M /branches/prog/bwa/bwtaln.c\n- M /branches/prog/bwa/bwtsw2_main.c\n- M /branches/prog/bwa/main.c\n- M /branches/prog/bwa/utils.c\n- M /branches/prog/bwa/utils.h\n-\n- * bwa-0.5.9rc1-1 (r1586)\n- * a few patches by John\n-\n-------------------------------------------------------------------------\n-r1562 | lh3 | 2010-12-10 01:02:06 -0500 (Fri, 10 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/bwa.1\n- M /branches/prog/bwa/bwape.c\n- M /branches/prog/bwa/bwase.c\n-\n-documentation on specifying @RG\n-\n-------------------------------------------------------------------------\n-r1561 | lh3 | 2010-12-10 00:45:40 -0500 (Fri, 10 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/ChangeLog\n- M /branches/prog/bwa/NEWS\n- M /branches/prog/bwa/bwa.1\n- M /branches/prog/bwa/main.c\n-\n-Release bwa-0.5.9rc1 (r1561)\n-\n-------------------------------------------------------------------------\n-r1560 | lh3 | 2010-12-10 00:29:08 -0500 (Fri, 10 Dec 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/bwaseqio.c\n- M /branches/prog/bwa/main.c\n-\n- * fixed a small memory leak caused by the BAM reader\n- * fixed a memory violation, also in the BAM reader\n-\n-------------------------------------------------------------------------\n-r1559 | lh3 | 2010-12-10 00:10:48 -0500 (Fri, 10 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/ChangeLog\n- M /branches/prog/bwa/Makefile\n-\n-change Makefile gcc options\n-\n-------------------------------------------------------------------------\n-r1558 | lh3 | 2010-12-10 00:09:22 -0500 (Fri, 10 Dec 2010) | 4 lines\n-Changed paths:\n- M /branches/prog/bwa/bwtsw2_aux.c\n- M /branches/prog/bwa/bwtsw2_core.c\n- M /branches/prog/bwa/main.c\n-\n- * bwa-0.5.8-6 (r1557)\n- * added a little more comments to BWA-SW\n- * randomly choosing a mapping if there are more than one\n-\n-------------------------------------------------------------------------\n-r1557 | lh3 | 2010-12-09 21:58:00 -0500 (Thu, 09 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bwtsw2_aux.c\n-\n-sometimes unmapped reads may not be printed...\n-\n-------------------------------------------------------------------------\n-r1556 | lh3 | 2010-12-09 21:50:26 -0500 (Thu, 09 Dec 2010) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bwtsw2_aux.c\n-\n-print unmapped reads\n-\n-------------------------------------------------------------------------\n-r1555 | lh3 | 2010-12-09 21:17:20 -0500 (Thu, 09 Dec 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/ChangeLog\n- M /branches/prog/bwa/bwa.1\n- M /branches/prog/bwa/bwtaln.c\n- M /branches/prog/bwa/main.c\n-\n- * bwa-0.5.8-5 (r1555)\n- * BAM input documentation\n-\n-------------------------------------------------------------------------\n-r1544 | lh3 | 2010-11-23 11:01:41 -0500 (Tue, 23 Nov 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/bwape.c\n- M /branches/prog/bwa/bwase.c\n- M /branches/prog/bwa/main.c\n-\n- * bwa-0.5.8-4 (r1544)\n- * supporting adding RG tags and RG lines\n-\n-------------------------------------------------------------------------\n-r1543 | lh3 | 2010-11-23 00:16:40 -0500 (Tue, 23 Nov 2010) | 3 lines\n-Changed paths:\n- M /branches/prog/b'..b'\n-\n- * load .sa and .fmv files\n- * exact alignment now works\n-\n-------------------------------------------------------------------------\n-r303 | lh3 | 2008-05-27 06:33:38 -0400 (Tue, 27 May 2008) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/bntseq.c\n- M /branches/prog/bwa/bwt.c\n- M /branches/prog/bwa/bwtio.c\n- M /branches/prog/bwa/utils.c\n- M /branches/prog/bwa/utils.h\n-\n-add xassert and fix a bug\n-\n-------------------------------------------------------------------------\n-r302 | lh3 | 2008-05-27 06:23:20 -0400 (Tue, 27 May 2008) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bntseq.c\n- M /branches/prog/bwa/bwt.c\n- M /branches/prog/bwa/bwtio.c\n- A /branches/prog/bwa/utils.c\n- A /branches/prog/bwa/utils.h\n-\n-improve error message and error handling\n-\n-------------------------------------------------------------------------\n-r301 | lh3 | 2008-05-27 05:37:51 -0400 (Tue, 27 May 2008) | 4 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bwt.c\n- M /branches/prog/bwa/bwt.h\n- M /branches/prog/bwa/bwt2fmv.c\n- A /branches/prog/bwa/bwtio.c\n- M /branches/prog/bwa/main.c\n- M /branches/prog/bwa/main.h\n-\n- * move I/O codes to bwtio.c\n- * SA can be dumped and interestingly, it is identical to BWTSW\n- * now, .fmv is still different from BWTSW\n-\n-------------------------------------------------------------------------\n-r299 | lh3 | 2008-05-26 18:07:44 -0400 (Mon, 26 May 2008) | 2 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bwt.c\n- M /branches/prog/bwa/bwt.h\n- M /branches/prog/bwa/bwt2fmv.c\n-\n-generate/retrieve SA and Occ\n-\n-------------------------------------------------------------------------\n-r298 | lh3 | 2008-05-26 13:16:49 -0400 (Mon, 26 May 2008) | 3 lines\n-Changed paths:\n- M /branches/prog/bwa/bntseq.h\n- M /branches/prog/bwa/bwt.c\n- M /branches/prog/bwa/bwt.h\n- M /branches/prog/bwa/bwt2fmv.c\n-\n- * retrieve occ value at any position\n- * move bwt_cal_occ() to bwt.c\n-\n-------------------------------------------------------------------------\n-r297 | lh3 | 2008-05-25 17:43:58 -0400 (Sun, 25 May 2008) | 6 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- A /branches/prog/bwa/bwt.c\n- A /branches/prog/bwa/bwt.h\n- A /branches/prog/bwa/bwt2fmv.c\n- M /branches/prog/bwa/main.c\n- M /branches/prog/bwa/main.h\n- M /branches/prog/bwa/pac2bwt.c\n-\n- * add bwt2fmv. It works to some extend. However, I do not understand\n- the purpose of some weird codes in BWT-SW. As a consequence, bwt2fmv\n- could generate a file almost identical, but not exactly identical, to\n- the .fmv file from BWT-SW.\n-\n-\n-------------------------------------------------------------------------\n-r296 | lh3 | 2008-05-24 18:35:02 -0400 (Sat, 24 May 2008) | 5 lines\n-Changed paths:\n- M /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bntseq.c\n- M /branches/prog/bwa/bntseq.h\n- M /branches/prog/bwa/main.c\n- M /branches/prog/bwa/main.h\n- A /branches/prog/bwa/pac2bwt.c\n-\n-Burrows-Wheeler Transform now works. At least on one example, the\n-current code generates the same BWT as BWT-SW. Kind of magical, I would\n-say. :)\n-\n-\n-------------------------------------------------------------------------\n-r295 | lh3 | 2008-05-24 11:25:31 -0400 (Sat, 24 May 2008) | 3 lines\n-Changed paths:\n- A /branches/prog/bwa/Makefile\n- M /branches/prog/bwa/bntseq.c\n- A /branches/prog/bwa/main.c\n- A /branches/prog/bwa/main.h\n-\n- * add Makefile and main.*\n- * improve interface to fa2bns, a bit\n-\n-------------------------------------------------------------------------\n-r293 | lh3 | 2008-05-24 10:57:03 -0400 (Sat, 24 May 2008) | 3 lines\n-Changed paths:\n- A /branches/prog/bwa\n- A /branches/prog/bwa/bntseq.c\n- A /branches/prog/bwa/bntseq.h\n- A /branches/prog/bwa/seq.c\n- A /branches/prog/bwa/seq.h\n-\n- * Burrow-Wheeler Alignment\n- * initial codes\n-\n-------------------------------------------------------------------------\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/NEWS.md --- a/bwa-0.7.9a/NEWS.md Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,1055 +0,0 @@\n-Release 0.7.9 (19 May, 2014)\n-----------------------------\n-\n-This release brings several major changes to BWA-MEM. Notably, BWA-MEM now\n-formally supports PacBio read-to-reference alignment and experimentally supports\n-PacBio read-to-read alignment. BWA-MEM also runs faster at a minor cost of\n-accuracy. The speedup is more significant when GRCh38 is in use. More\n-specifically:\n-\n- * Support PacBio subread-to-reference alignment. Although older BWA-MEM works\n- with PacBio data in principle, the resultant alignments are frequently\n- fragmented. In this release, we fine tuned existing methods and introduced\n- new heuristics to improve PacBio alignment. These changes are not used by\n- default. Users need to add option "-x pacbio" to enable the feature.\n-\n- * Support PacBio subread-to-subread alignment (EXPERIMENTAL). This feature is\n- enabled with option "-x pbread". In this mode, the output only gives the\n- overlapping region between a pair of reads without detailed alignment.\n-\n- * Output alternative hits in the XA tag if there are not so many of them. This\n- is a BWA-backtrack feature.\n-\n- * Support mapping to ALT contigs in GRCh38 (EXPERIMENTAL). We provide a script\n- to postprocess hits in the XA tag to adjust the mapping quality and generate\n- new primary alignments to all overlapping ALT contigs. We would *NOT*\n- recommend this feature for production uses.\n-\n- * Improved alignments to many short reference sequences. Older BWA-MEM may\n- generate an alignment bridging two or more adjacent reference sequences.\n- Such alignments are split at a later step as postprocessing. This approach\n- is complex and does not always work. This release forbids these alignments\n- from the very beginning. BWA-MEM should not produce an alignment bridging\n- two or more reference sequences any more.\n-\n- * Reduced the maximum seed occurrence from 10000 to 500. Reduced the maximum\n- rounds of Smith-Waterman mate rescue from 100 to 50. Added a heuristic to\n- lower the mapping quality if a read contains seeds with excessive\n- occurrences. These changes make BWA-MEM faster at a minor cost of accuracy\n- in highly repetitive regions.\n-\n- * Added an option "-Y" to use soft clipping for supplementary alignments.\n-\n- * Bugfix: incomplete alignment extension in corner cases.\n-\n- * Bugfix: integer overflow when aligning long query sequences.\n-\n- * Bugfix: chain score is not computed correctly (almost no practical effect)\n-\n- * General code cleanup\n-\n- * Added FAQs to README\n-\n-Changes in BWA-backtrack:\n-\n- * Bugfix: a segmentation fault when an alignment stands out of the end of the\n- last chromosome.\n-\n-(0.7.9: 19 May 2014, r783)\n-\n-\n-\n-Release 0.7.8 (31 March, 2014)\n-------------------------------\n-\n-Changes in BWA-MEM:\n-\n- * Bugfix: off-diagonal X-dropoff (option -d) not working as intended.\n- Short-read alignment is not affected.\n-\n- * Bugfix: unnecessarily large bandwidth used during global alignment,\n- which reduces the mapping speed by -5% for short reads. Results are not\n- affected.\n-\n- * Bugfix: when the matching score is not one, paired-end mapping quality is\n- inaccurate.\n-\n- * When the matching score (option -A) is changed, scale all score-related\n- options accordingly unless overridden by users.\n-\n- * Allow to specify different gap open (or extension) penalties for deletions\n- and insertions separately.\n-\n- * Allow to specify the insert size distribution.\n-\n- * Better and more detailed debugging information.\n-\n-With the default setting, 0.7.8 and 0.7.7 gave identical output on one million\n-100bp read pairs.\n-\n-(0.7.8: 31 March 2014, r455)\n-\n-\n-\n-Release 0.7.7 (25 Feburary, 2014)\n----------------------------------\n-\n-This release fixes incorrect MD tags in the BWA-MEM output.\n-\n-A note about short-read mapping to GRCh38. The new human reference genome\n-GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are\n-hard masked as they cannot be localized. The'..b" any N.\n-\n- * Automatically choose the maximum allowed number of differences. This\n- is important when reads of different lengths are mixed together.\n-\n- * Print mate coordinate if only one end is unmapped.\n-\n- * Generate MD tag. This tag encodes the mismatching positions and the\n- reference bases at these positions. Deletions from the reference will\n- also be printed.\n-\n- * Optionally dump multiple hits from samse, in another concise format\n- rather than SAM.\n-\n- * Optionally disable iterative search. This is VERY SLOOOOW, though.\n-\n- * Fixed a bug in generate SAM.\n-\n-(0.4.3: 22 January 2009, r787)\n-\n-\n-\n-Beta Release 0.4.2 (9 January, 2009)\n-------------------------------------\n-\n-Aaron Quinlan found a bug in the indexer: the bwa indexer segfaults if\n-there are no comment texts in the FASTA header. This is a critical\n-bug. Nothing else was changed.\n-\n-(0.4.2: 9 January 2009, r769)\n-\n-\n-\n-Beta Release 0.4.1 (7 January, 2009)\n-------------------------------------\n-\n-I am sorry for the quick updates these days. I like to set a milestone\n-for BWA and this release seems to be. For paired end reads, BWA also\n-does Smith-Waterman alignment for an unmapped read whose mate can be\n-mapped confidently. With this strategy BWA achieves similar accuracy to\n-maq. Benchmark is also updated accordingly.\n-\n-(0.4.1: 7 January 2009, r760)\n-\n-\n-\n-Beta Release 0.4.0 (6 January, 2009)\n-------------------------------------\n-\n-In comparison to the release two days ago, this release is mainly tuned\n-for performance with some tricks I learnt from Bowtie. However, as the\n-indexing format has also been changed, I have to increase the version\n-number to 0.4.0 to emphasize that *DATABASE MUST BE RE-INDEXED* with\n-'bwa index'.\n-\n- * Improved the speed by about 20%.\n-\n- * Added multi-threading to 'bwa aln'.\n-\n-(0.4.0: 6 January 2009, r756)\n-\n-\n-\n-Beta Release 0.3.0 (4 January, 2009)\n-------------------------------------\n-\n- * Added paired-end support by separating SA calculation and alignment\n- output.\n-\n- * Added SAM output.\n-\n- * Added evaluation to the documentation.\n-\n-(0.3.0: 4 January 2009, r741)\n-\n-\n-\n-Beta Release 0.2.0 (15 Augusst, 2008)\n--------------------------------------\n-\n- * Take the subsequence at the 5'-end as seed. Seeding strategy greatly\n- improves the speed for long reads, at the cost of missing a few true\n- hits that contain many differences in the seed. Seeding also increase\n- the memory by 800MB.\n-\n- * Fixed a bug which may miss some gapped alignments. Fixing the bug\n- also slows the speed a little.\n-\n-(0.2.0: 15 August 2008, r428)\n-\n-\n-\n-Beta Release 0.1.6 (08 Augusst, 2008)\n--------------------------------------\n-\n- * Give accurate CIGAR string.\n-\n- * Add a simple interface to SW/NW alignment\n-\n-(0.1.6: 08 August 2008, r414)\n-\n-\n-\n-Beta Release 0.1.5 (27 July, 2008)\n-----------------------------------\n-\n- * Improve the speed. This version is expected to give the same results.\n-\n-(0.1.5: 27 July 2008, r400)\n-\n-\n-\n-Beta Release 0.1.4 (22 July, 2008)\n-----------------------------------\n-\n- * Fixed a bug which may cause missing gapped alignments.\n-\n- * More clearly define what alignments can be found by BWA (See\n- manual). Now BWA runs a little slower because it will visit more\n- potential gapped alignments.\n-\n- * A bit code clean up.\n-\n-(0.1.4: 22 July 2008, r387)\n-\n-\n-\n-Beta Release 0.1.3 (21 July, 2008)\n-----------------------------------\n-\n-Improve the speed with some tricks on retrieving occurences. The results\n-should be exactly the same as that of 0.1.2.\n-\n-(0.1.3: 21 July 2008, r382)\n-\n-\n-\n-Beta Release 0.1.2 (17 July, 2008)\n-----------------------------------\n-\n-Support gapped alignment. Codes for ungapped alignment has been removed.\n-\n-(0.1.2: 17 July 2008, r371)\n-\n-\n-\n-Beta Release 0.1.1 (03 June, 2008)\n------------------------------------\n-\n-This is the first release of BWA, Burrows-Wheeler Alignment tool. Please\n-read man page for more information about this software.\n-\n-(0.1.1: 03 June 2008, r349)\n" |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/QSufSort.c --- a/bwa-0.7.9a/QSufSort.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,402 +0,0 @@\n-/* QSufSort.c\n-\n- Original source from qsufsort.c\n-\n- Copyright 1999, N. Jesper Larsson, all rights reserved.\n-\n- This file contains an implementation of the algorithm presented in "Faster\n- Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko\n- Sadakane (sada@is.s.u-tokyo.ac.jp).\n-\n- This software may be used freely for any purpose. However, when distributed,\n- the original source must be clearly stated, and, when the source code is\n- distributed, the copyright notice must be retained and any alterations in\n- the code must be clearly marked. No warranty is given regarding the quality\n- of this software.\n-\n- Modified by Wong Chi-Kwong, 2004\n-\n- Changes summary:\t- Used long variable and function names\n-\t\t\t\t\t- Removed global variables\n-\t\t\t\t\t- Replace pointer references with array references\n-\t\t\t\t\t- Used insertion sort in place of selection sort and increased insertion sort threshold\n-\t\t\t\t\t- Reconstructing suffix array from inverse becomes an option\n-\t\t\t\t\t- Add handling where end-of-text symbol is not necessary < all characters\n-\t\t\t\t\t- Removed codes for supporting alphabet size > number of characters\n- \n- No warrenty is given regarding the quality of the modifications.\n-\n-*/\n-\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <limits.h>\n-#include "QSufSort.h"\n-\n-#define min(value1, value2)\t\t\t\t\t\t( ((value1) < (value2)) ? (value1) : (value2) )\n-#define med3(a, b, c)\t\t\t\t\t\t\t( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))\n-#define swap(a, b, t);\t\t\t\t\t\t\tt = a; a = b; b = t;\n-\n-// Static functions\n-static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, \n-\t\t\t\t\t\t\t const qsint_t highestPos, const qsint_t numSortedChar);\n-static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, \n-\t\t\t\t\t\t\t const qsint_t highestPos, const qsint_t numSortedChar);\n-static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, \n-\t\t\t\t\t\t\t\t\tconst qsint_t highestPos, const qsint_t numSortedChar);\n-static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize);\n-static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, \n-\t\t\t\t\t\t\t const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated);\n-\n-/* Makes suffix array p of x. x becomes inverse of p. p and x are both of size\n- n+1. Contents of x[0...n-1] are integers in the range l...k-1. Original\n- contents of x[n] is disregarded, the n-th symbol being regarded as\n- end-of-string smaller than all other symbols.*/\n-void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, \n-\t\t\t\t\t\tconst qsint_t smallestInputSymbol, const int skipTransform)\n-{\n-\tqsint_t i, j;\n-\tqsint_t s, negatedSortedGroupLength;\n-\tqsint_t numSymbolAggregated;\n-\tqsint_t numSortedPos = 1;\n-\tqsint_t newAlphabetSize;\n- \n-\tif (!skipTransform) {\n-\t\t/* bucketing possible*/\n-\t\tnewAlphabetSize = QSufSortTransform(V, I, numChar, largestInputSymbol, smallestInputSymbol, \n-\t\t\t\t\t\t\t\t\t\t\tnumChar, &numSymbolAggregated);\n-\t\tQSufSortBucketSort(V, I, numChar, newAlphabetSize);\n-\t\tI[0] = -1;\n-\t\tV[numChar] = 0;\n-\t\tnumSortedPos = numSymbolAggregated;\n-\t}\n-\n-\twhile ((qsint_t)(I[0]) >= -(qsint_t)numChar) {\n-\t\ti = 0;\n-\t\tnegatedSortedGroupLength = 0;\n-\t\tdo {\n-\t\t\ts = I[i];\n-\t\t\tif (s < 0) {\n-\t\t\t\ti -= s;\t\t\t\t\t\t/* skip over sorted group.*/\n-\t\t\t\tnegatedSortedGroupLength += s;\n-\t\t\t} else {\n-\t\t\t\tif (negatedSortedGroupLength) {\n-\t\t\t\t\tI[i+negatedSortedGroupLength] = negatedSortedGroupLength;\t/* combine preceding sorted groups */\n-\t\t\t\t\tnegatedSortedGroupLength = 0;\n-\t\t\t\t}\n-\t\t\t\tj = V[s] + 1;\n-\t\t\t\tQSufSortSortSplit(V, I, i, j - 1, numSortedPos);\n-\t\t\t\ti = j;\n-\t\t\t}\n-\t\t} while (i <= numChar);\n-\t\tif (negatedSortedGroupLength) {\n-\t\t\t'..b'nt_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize)\n-{\n-\tqsint_t i, c;\n-\tqsint_t d;\n-\tqsint_t groupNum;\n-\tqsint_t currentIndex;\n-\n-\t// mark linked list empty\n-\tfor (i=0; i<alphabetSize; i++)\n-\t\tI[i] = -1;\n-\n-\t// insert to linked list\n-\tfor (i=0; i<=numChar; i++) {\n-\t\tc = V[i];\n-\t\tV[i] = (qsint_t)(I[c]);\n-\t\tI[c] = i;\n-\t}\n-\n-\tcurrentIndex = numChar;\n-\tfor (i=alphabetSize; i>0; i--) {\n-\t\tc = I[i-1];\n-\t\td = (qsint_t)(V[c]);\n-\t\tgroupNum = currentIndex;\n-\t\tV[c] = groupNum;\n-\t\tif (d >= 0) {\n-\t\t\tI[currentIndex] = c;\n-\t\t\twhile (d >= 0) {\n-\t\t\t\tc = d;\n-\t\t\t\td = V[c];\n-\t\t\t\tV[c] = groupNum;\n-\t\t\t\tcurrentIndex--;\n-\t\t\t\tI[currentIndex] = c;\n-\t\t\t}\n-\t\t} else {\n-\t\t\t// sorted group\n-\t\t\tI[currentIndex] = -1;\n-\t\t}\n-\t\tcurrentIndex--;\n-\t}\n-}\n-\n-/* Transforms the alphabet of x by attempting to aggregate several symbols into\n- one, while preserving the suffix order of x. The alphabet may also be\n- compacted, so that x on output comprises all integers of the new alphabet\n- with no skipped numbers.\n-\n- Input: x is an array of size n+1 whose first n elements are positive\n- integers in the range l...k-1. p is array of size n+1, used for temporary\n- storage. q controls aggregation and compaction by defining the maximum intue\n- for any symbol during transformation: q must be at least k-l; if q<=n,\n- compaction is guaranteed; if k-l>n, compaction is never done; if q is\n- INT_MAX, the maximum number of symbols are aggregated into one.\n- \n- Output: Returns an integer j in the range 1...q representing the size of the\n- new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is\n- set to the number of old symbols grouped into one. Only x[n] is 0.*/\n-static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, \n-\t\t\t\t\t\t\t const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated)\n-{\n-\tqsint_t c, i, j;\n-\tqsint_t a;\t// numSymbolAggregated\n-\tqsint_t mask;\n-\tqsint_t minSymbolInChunk = 0, maxSymbolInChunk = 0;\n-\tqsint_t newAlphabetSize;\n-\tqsint_t maxNumInputSymbol, maxNumBit, maxSymbol;\n-\n-\tmaxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;\n-\n-\tfor (maxNumBit = 0, i = maxNumInputSymbol; i; i >>= 1) ++maxNumBit;\n-\tmaxSymbol = QSINT_MAX >> maxNumBit;\n-\n-\tc = maxNumInputSymbol;\n-\tfor (a = 0; a < numChar && maxSymbolInChunk <= maxSymbol && c <= maxNewAlphabetSize; a++) {\n-\t\tminSymbolInChunk = (minSymbolInChunk << maxNumBit) | (V[a] - smallestInputSymbol + 1);\n-\t\tmaxSymbolInChunk = c;\n-\t\tc = (maxSymbolInChunk << maxNumBit) | maxNumInputSymbol;\n-\t}\n-\n-\tmask = (1 << (a-1) * maxNumBit) - 1;\t/* mask masks off top old symbol from chunk.*/\n-\tV[numChar] = smallestInputSymbol - 1;\t/* emulate zero terminator.*/\n-\n-\t/* bucketing possible, compact alphabet.*/\n-\tfor (i=0; i<=maxSymbolInChunk; i++)\n-\t\tI[i] = 0;\t/* zero transformation table.*/\n-\tc = minSymbolInChunk;\n-\tfor (i=a; i<=numChar; i++) {\n-\t\tI[c] = 1;\t\t\t/* mark used chunk symbol.*/\n-\t\tc = ((c & mask) << maxNumBit) | (V[i] - smallestInputSymbol + 1);\t/* shift in next old symbol in chunk.*/\n-\t}\n-\tfor (i=1; i<a; i++) {\t/* handle last r-1 positions.*/\n-\t\tI[c] = 1;\t\t\t/* mark used chunk symbol.*/\n-\t\tc = (c & mask) << maxNumBit;\t/* shift in next old symbol in chunk.*/\n-\t}\n-\tnewAlphabetSize = 1;\n-\tfor (i=0; i<=maxSymbolInChunk; i++) {\n-\t\tif (I[i]) {\n-\t\t\tI[i] = newAlphabetSize;\n-\t\t\tnewAlphabetSize++;\n-\t\t}\n-\t}\n-\tc = minSymbolInChunk;\n-\tfor (i=0, j=a; j<=numChar; i++, j++) {\n-\t\tV[i] = I[c];\t\t\t\t\t\t/* transform to new alphabet.*/\n-\t\tc = ((c & mask) << maxNumBit) | (V[j] - smallestInputSymbol + 1);\t/* shift in next old symbol in chunk.*/\n-\t}\n-\tfor (; i<numChar; i++) {\t/* handle last a-1 positions.*/\n-\t\tV[i] = I[c];\t\t\t/* transform to new alphabet.*/\n-\t\tc = (c & mask) << maxNumBit;\t/* shift right-end zero in chunk.*/\n-\t}\n-\n-\tV[numChar] = 0;\t\t/* end-of-string symbol is zero.*/\n-\n- *numSymbolAggregated = a;\n-\treturn newAlphabetSize;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/QSufSort.h --- a/bwa-0.7.9a/QSufSort.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,45 +0,0 @@ -/* QSufSort.h - - Header file for QSufSort.c - - This file contains an implementation of the algorithm presented in "Faster - Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko - Sadakane (sada@is.s.u-tokyo.ac.jp). - - This software may be used freely for any purpose. However, when distributed, - the original source must be clearly stated, and, when the source code is - distributed, the copyright notice must be retained and any alterations in - the code must be clearly marked. No warranty is given regarding the quality - of this software. - - Modified by Wong Chi-Kwong, 2004 - - Changes summary: - Used long variable and function names - - Removed global variables - - Replace pointer references with array references - - Used insertion sort in place of selection sort and increased insertion sort threshold - - Reconstructing suffix array from inverse becomes an option - - Add handling where end-of-text symbol is not necessary < all characters - - Removed codes for supporting alphabet size > number of characters - - No warrenty is given regarding the quality of the modifications. - -*/ - -#ifndef __QSUFSORT_H__ -#define __QSUFSORT_H__ - -#include <stdint.h> - -#define KEY(V, I, p, h) ( V[ I[p] + h ] ) -#define INSERT_SORT_NUM_ITEM 16 - -typedef int64_t qsint_t; -#define QSINT_MAX INT64_MAX - -void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, - const qsint_t smallestInputSymbol, const int skipTransform); -void QSufSortGenerateSaFromInverse(const qsint_t *V, qsint_t* __restrict I, const qsint_t numChar); - - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/README.md --- a/bwa-0.7.9a/README.md Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,182 +0,0 @@\n-##Getting started\n-\n-\tgit clone https://github.com/lh3/bwa.git\n-\tcd bwa; make\n-\t./bwa index ref.fa\n-\t./bwa mem ref.fa read-se.fq.gz | gzip -3 > aln-se.sam.gz\n-\t./bwa mem ref.fa read1.fq read2.fq | gzip -3 > aln-pe.sam.gz\n-\n-##Introduction\n-\n-BWA is a software package for mapping DNA sequences against a large reference\n-genome, such as the human genome. It consists of three algorithms:\n-BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina\n-sequence reads up to 100bp, while the rest two for longer sequences ranged from\n-70bp to a few megabases. BWA-MEM and BWA-SW share similar features such as the\n-support of long reads and chimeric alignment, but BWA-MEM, which is the latest,\n-is generally recommended as it is faster and more accurate. BWA-MEM also has\n-better performance than BWA-backtrack for 70-100bp Illumina reads.\n-\n-For all the algorithms, BWA first needs to construct the FM-index for the\n-reference genome (the **index** command). Alignment algorithms are invoked with\n-different sub-commands: **aln/samse/sampe** for BWA-backtrack,\n-**bwasw** for BWA-SW and **mem** for the BWA-MEM algorithm.\n-\n-##Availability\n-\n-BWA is released under [GPLv3][1]. The latest source code is [freely\n-available at github][2]. Released packages can [be downloaded][3] at\n-SourceForge. After you acquire the source code, simply use `make` to compile\n-and copy the single executable `bwa` to the destination you want. The only\n-dependency required to build BWA is [zlib][14].\n-\n-##Seeking helps\n-\n-The detailed usage is described in the man page available together with the\n-source code. You can use `man ./bwa.1` to view the man page in a terminal. The\n-[HTML version][4] of the man page can be found at the [BWA website][5]. If you\n-have questions about BWA, you may [sign up the mailing list][6] and then send\n-the questions to [bio-bwa-help@sourceforge.net][7]. You may also ask questions\n-in forums such as [BioStar][8] and [SEQanswers][9].\n-\n-##Citing BWA\n-\n-* Li H. and Durbin R. (2009) Fast and accurate short read alignment with\n- Burrows-Wheeler transform. *Bioinformatics*, **25**, 1754-1760. [PMID:\n- [19451168][10]]. (if you use the BWA-backtrack algorithm)\n-\n-* Li H. and Durbin R. (2010) Fast and accurate long-read alignment with\n- Burrows-Wheeler transform. *Bioinformatics*, **26**, 589-595. [PMID:\n- [20080505][11]]. (if you use the BWA-SW algorithm)\n-\n-* Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs\n- with BWA-MEM. [arXiv:1303.3997v2][12] [q-bio.GN]. (if you use the BWA-MEM\n- algorithm or the **fastmap** command, or want to cite the whole BWA package)\n-\n-Please note that the last reference is a preprint hosted at [arXiv.org][13]. I\n-do not have plan to submit it to a peer-reviewed journal in the near future.\n-\n-##Frequently asked questions (FAQs)\n-\n-1. [What types of data does BWA work with?](#type)\n-2. [Why does a read appear multiple times in the output SAM?](#multihit)\n-3. [Does BWA work on reference sequences longer than 4GB in total?](#4gb)\n-4. [Why can one read in a pair has high mapping quality but the other has zero?](#pe0)\n-5. [How can a BWA-backtrack alignment stands out of the end of a chromosome?](#endref)\n-6. [How to map sequences to GRCh38 with ALT contigs?](#h38)\n-\n-####<a name="type"></a>1. What types of data does BWA work with?\n-\n-BWA works with a variety types of DNA sequence data, though the optimal\n-algorithm and setting may vary. The following list gives the recommended\n-settings:\n-\n-* Illumina/454/IonTorrent single-end reads longer than ~70bp or assembly\n- contigs up to a few megabases mapped to a close related reference genome:\n-\n-\t\tbwa mem ref.fa reads.fq > aln.sam\n-\n-* Illumina single-end reads no longer than ~70bp:\n-\n-\t\tbwa aln ref.fa reads.fq > reads.sai; bwa samse ref.fa reads.sai reads.fq > aln-se.sam\n-\n-* Illumina/454/IonTorrent paired-end reads longer than ~70bp:\n-\n-\t\tbwa mem ref.fa read1.fq read2.fq > aln-pe.sam\n-\n-* Illumina paired-end reads'..b'ncing error rate as high as ~15%.\n-\n-####<a name="multihit"></a>2. Why does a read appear multiple times in the output SAM?\n-\n-BWA-SW and BWA-MEM perform local alignments. If there is a translocation, a gene\n-fusion or a long deletion, a read bridging the break point may have two hits,\n-occupying two lines in the SAM output. With the default setting of BWA-MEM, one\n-and only one line is primary and is soft clipped; other lines are tagged with\n-0x800 SAM flag (supplementary alignment) and are hard clipped.\n-\n-####<a name="4gb"></a>3. Does BWA work on reference sequences longer than 4GB in total?\n-\n-Yes. Since 0.6.x, all BWA algorithms work with a genome with total length over\n-4GB. However, individual chromosome should not be longer than 2GB.\n-\n-####<a name="pe0"></a>4. Why can one read in a pair has high mapping quality but the other has zero?\n-\n-This is correct. Mapping quality is assigned for individual read, not for a read\n-pair. It is possible that one read can be mapped unambiguously, but its mate\n-falls in a tandem repeat and thus its accurate position cannot be determined.\n-\n-####<a name="endref"></a>5. How can a BWA-backtrack alignment stands out of the end of a chromosome?\n-\n-Internally BWA concatenates all reference sequences into one long sequence. A\n-read may be mapped to the junction of two adjacent reference sequences. In this\n-case, BWA-backtrack will flag the read as unmapped (0x4), but you will see\n-position, CIGAR and all the tags. A similar issue may occur to BWA-SW alignment\n-as well. BWA-MEM does not have this problem.\n-\n-####<a name="h38"></a>6. How to map sequences to GRCh38 with ALT contigs?\n-\n-BWA-backtrack and BWA-MEM partially support mapping to a reference containing\n-ALT contigs that represent alternative alleles highly divergent from the\n-reference genome.\n-\n-\t# download the K8 executable required by bwa-helper.js\n-\twget http://sourceforge.net/projects/lh3/files/k8/k8-0.2.1.tar.bz2/download\n-\ttar -jxf k8-0.2.1.tar.bz2\n-\n-\t# download the ALT-to-GRCh38 alignment in the SAM format\n-\twget http://sourceforge.net/projects/bio-bwa/files/hs38.alt.sam.gz/download\n-\n-\t# download the GRCh38 sequences with ALT contigs\n-\twget ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz\n-\n-\t# index and mapping\n-\tbwa index -p hs38a GCA_000001405.15_GRCh38_full_analysis_set.fna.gz\n-\tbwa mem -h50 hs38a reads.fq | ./k8-linux bwa-helper.js genalt hs38.alt.sam.gz > out.sam\n-\n-Here, option `-h50` asks bwa-mem to output multiple hits in the XA tag if the\n-read has 50 or fewer hits. For each SAM line containing the XA tag,\n-`bwa-helper.js genalt` decodes the alignments in the XA tag, groups hits lifted\n-to the same chromosomal region, adjusts mapping quality and outputs all the\n-hits overlapping the reported hit. A read may be mapped to both the primary\n-assembly and one or more ALT contigs all with high mapping quality.\n-\n-Note that this procedure assumes reads are single-end and may miss hits to\n-highly repetitive regions as these hits will not be reported with option\n-`-h50`. `bwa-helper.js` is a prototype implementation not recommended for\n-production uses.\n-\n-\n-\n-[1]: http://en.wikipedia.org/wiki/GNU_General_Public_License\n-[2]: https://github.com/lh3/bwa\n-[3]: http://sourceforge.net/projects/bio-bwa/files/\n-[4]: http://bio-bwa.sourceforge.net/bwa.shtml\n-[5]: http://bio-bwa.sourceforge.net/\n-[6]: https://lists.sourceforge.net/lists/listinfo/bio-bwa-help\n-[7]: mailto:bio-bwa-help@sourceforge.net\n-[8]: http://biostars.org\n-[9]: http://seqanswers.com/\n-[10]: http://www.ncbi.nlm.nih.gov/pubmed/19451168\n-[11]: http://www.ncbi.nlm.nih.gov/pubmed/20080505\n-[12]: http://arxiv.org/abs/1303.3997\n-[13]: http://arxiv.org/\n-[14]: http://zlib.net/\n-[15]: https://github.com/lh3/bwa/tree/mem\n-[16]: ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bamlite.c --- a/bwa-0.7.9a/bamlite.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,210 +0,0 @@ -#include <stdlib.h> -#include <ctype.h> -#include <string.h> -#include <stdio.h> -#include <errno.h> -#include "bamlite.h" - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -/********************* - * from bam_endian.c * - *********************/ - -static inline int bam_is_big_endian() -{ - long one= 1; - return !(*((char *)(&one))); -} -static inline uint16_t bam_swap_endian_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} -static inline void *bam_swap_endian_2p(void *x) -{ - *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); - return x; -} -static inline uint32_t bam_swap_endian_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} -static inline void *bam_swap_endian_4p(void *x) -{ - *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); - return x; -} -static inline uint64_t bam_swap_endian_8(uint64_t v) -{ - v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); - v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); - return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); -} -static inline void *bam_swap_endian_8p(void *x) -{ - *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); - return x; -} - -/************** - * from bam.c * - **************/ - -int bam_is_be; - -bam_header_t *bam_header_init() -{ - bam_is_be = bam_is_big_endian(); - return (bam_header_t*)calloc(1, sizeof(bam_header_t)); -} - -void bam_header_destroy(bam_header_t *header) -{ - int32_t i; - if (header == 0) return; - if (header->target_name) { - for (i = 0; i < header->n_targets; ++i) - if (header->target_name[i]) free(header->target_name[i]); - if (header->target_len) free(header->target_len); - free(header->target_name); - } - if (header->text) free(header->text); - free(header); -} - -bam_header_t *bam_header_read(bamFile fp) -{ - bam_header_t *header; - char buf[4]; - int magic_len; - int32_t i = 1, name_len; - // read "BAM1" - magic_len = bam_read(fp, buf, 4); - if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { - fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); - return NULL; - } - header = bam_header_init(); - // read plain text and the number of reference sequences - if (bam_read(fp, &header->l_text, 4) != 4) goto fail; - if (bam_is_be) bam_swap_endian_4p(&header->l_text); - header->text = (char*)calloc(header->l_text + 1, 1); - if (bam_read(fp, header->text, header->l_text) != header->l_text) goto fail; - if (bam_read(fp, &header->n_targets, 4) != 4) goto fail; - if (bam_is_be) bam_swap_endian_4p(&header->n_targets); - // read reference sequence names and lengths - header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); - header->target_len = (uint32_t*)calloc(header->n_targets, 4); - for (i = 0; i != header->n_targets; ++i) { - if (bam_read(fp, &name_len, 4) != 4) goto fail; - if (bam_is_be) bam_swap_endian_4p(&name_len); - header->target_name[i] = (char*)calloc(name_len, 1); - if (bam_read(fp, header->target_name[i], name_len) != name_len) { - goto fail; - } - if (bam_read(fp, &header->target_len[i], 4) != 4) goto fail; - if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); - } - return header; - fail: - bam_header_destroy(header); - return NULL; -} - -static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) -{ - uint8_t *s; - uint32_t i, *cigar = (uint32_t*)(data + c->l_qname); - s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2; - for (i = 0; i < c->n_cigar; ++i) bam_swap_endian_4p(&cigar[i]); - while (s < data + data_len) { - uint8_t type; - s += 2; // skip key - type = toupper(*s); ++s; // skip type - if (type == 'C' || type == 'A') ++s; - else if (type == 'S') { bam_swap_endian_2p(s); s += 2; } - else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; } - else if (type == 'D') { bam_swap_endian_8p(s); s += 8; } - else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; } - } -} - -int bam_read1(bamFile fp, bam1_t *b) -{ - bam1_core_t *c = &b->core; - int32_t block_len, ret, i; - uint32_t x[8]; - - if ((ret = bam_read(fp, &block_len, 4)) != 4) { - if (ret == 0) return -1; // normal end-of-file - else return -2; // truncated - } - if (bam_read(fp, x, sizeof(bam1_core_t)) != sizeof(bam1_core_t)) return -3; - if (bam_is_be) { - bam_swap_endian_4p(&block_len); - for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); - } - c->tid = x[0]; c->pos = x[1]; - c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; - c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; - c->l_qseq = x[4]; - c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7]; - b->data_len = block_len - sizeof(bam1_core_t); - if (b->m_data < b->data_len) { - b->m_data = b->data_len; - kroundup32(b->m_data); - b->data = (uint8_t*)realloc(b->data, b->m_data); - } - if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4; - b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; - if (bam_is_be) swap_endian_data(c, b->data_len, b->data); - return 4 + block_len; -} - - -#ifdef USE_VERBOSE_ZLIB_WRAPPERS -// Versions of gzopen, gzread and gzclose that print up error messages - -gzFile bamlite_gzopen(const char *fn, const char *mode) { - gzFile fp; - if (strcmp(fn, "-") == 0) { - fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); - if (!fp) { - fprintf(stderr, "Couldn't open %s : %s", - (strstr(mode, "r"))? "stdin" : "stdout", - strerror(errno)); - } - return fp; - } - if ((fp = gzopen(fn, mode)) == 0) { - fprintf(stderr, "Couldn't open %s : %s\n", fn, - errno ? strerror(errno) : "Out of memory"); - } - return fp; -} - -int bamlite_gzread(gzFile file, void *ptr, unsigned int len) { - int ret = gzread(file, ptr, len); - - if (ret < 0) { - int errnum = 0; - const char *msg = gzerror(file, &errnum); - fprintf(stderr, "gzread error: %s\n", - Z_ERRNO == errnum ? strerror(errno) : msg); - } - return ret; -} - -int bamlite_gzclose(gzFile file) { - int ret = gzclose(file); - if (Z_OK != ret) { - fprintf(stderr, "gzclose error: %s\n", - Z_ERRNO == ret ? strerror(errno) : zError(ret)); - } - - return ret; -} -#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bamlite.h --- a/bwa-0.7.9a/bamlite.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,114 +0,0 @@ -#ifndef BAMLITE_H_ -#define BAMLITE_H_ - -#include <stdint.h> -#include <zlib.h> - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -#define USE_VERBOSE_ZLIB_WRAPPERS - -typedef gzFile bamFile; -#ifdef USE_VERBOSE_ZLIB_WRAPPERS -/* These print error messages on failure */ -# define bam_open(fn, mode) bamlite_gzopen(fn, mode) -# define bam_dopen(fd, mode) gzdopen(fd, mode) -# define bam_close(fp) bamlite_gzclose(fp) -# define bam_read(fp, buf, size) bamlite_gzread(fp, buf, size) -#else -# define bam_open(fn, mode) gzopen(fn, mode) -# define bam_dopen(fd, mode) gzdopen(fd, mode) -# define bam_close(fp) gzclose(fp) -# define bam_read(fp, buf, size) gzread(fp, buf, size) -#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ - -typedef struct { - int32_t n_targets; - char **target_name; - uint32_t *target_len; - size_t l_text, n_text; - char *text; -} bam_header_t; - -#define BAM_FPAIRED 1 -#define BAM_FPROPER_PAIR 2 -#define BAM_FUNMAP 4 -#define BAM_FMUNMAP 8 -#define BAM_FREVERSE 16 -#define BAM_FMREVERSE 32 -#define BAM_FREAD1 64 -#define BAM_FREAD2 128 -#define BAM_FSECONDARY 256 -#define BAM_FQCFAIL 512 -#define BAM_FDUP 1024 - -#define BAM_CIGAR_SHIFT 4 -#define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1) - -#define BAM_CMATCH 0 -#define BAM_CINS 1 -#define BAM_CDEL 2 -#define BAM_CREF_SKIP 3 -#define BAM_CSOFT_CLIP 4 -#define BAM_CHARD_CLIP 5 -#define BAM_CPAD 6 - -typedef struct { - int32_t tid; - int32_t pos; - uint32_t bin:16, qual:8, l_qname:8; - uint32_t flag:16, n_cigar:16; - int32_t l_qseq; - int32_t mtid; - int32_t mpos; - int32_t isize; -} bam1_core_t; - -typedef struct { - bam1_core_t core; - int l_aux, data_len, m_data; - uint8_t *data; -} bam1_t; - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) -#define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0) -#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname)) -#define bam1_qname(b) ((char*)((b)->data)) -#define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname) -#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) -#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) -#define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) - -#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t))) -#define bam_destroy1(b) do { \ - if (b) { free((b)->data); free(b); } \ - } while (0) - -extern int bam_is_be; - -#ifdef __cplusplus -extern "C" { -#endif - - bam_header_t *bam_header_init(void); - void bam_header_destroy(bam_header_t *header); - bam_header_t *bam_header_read(bamFile fp); - int bam_read1(bamFile fp, bam1_t *b); - -#ifdef USE_VERBOSE_ZLIB_WRAPPERS - gzFile bamlite_gzopen(const char *fn, const char *mode); - int bamlite_gzread(gzFile file, void *ptr, unsigned int len); - int bamlite_gzclose(gzFile file); -#endif /* USE_VERBOSE_ZLIB_WRAPPERS */ - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bntseq.c --- a/bwa-0.7.9a/bntseq.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,412 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <zlib.h>\n-#include <unistd.h>\n-#include <errno.h>\n-#include "bntseq.h"\n-#include "utils.h"\n-\n-#include "kseq.h"\n-KSEQ_DECLARE(gzFile)\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-unsigned char nst_nt4_table[256] = {\n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*\'-\'*/, 4, 4,\n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4\n-};\n-\n-void bns_dump(const bntseq_t *bns, const char *prefix)\n-{\n-\tchar str[1024];\n-\tFILE *fp;\n-\tint i;\n-\t{ // dump .ann\n-\t\tstrcpy(str, prefix); strcat(str, ".ann");\n-\t\tfp = xopen(str, "w");\n-\t\terr_fprintf(fp, "%lld %d %u\\n", (long long)bns->l_pac, bns->n_seqs, bns->seed);\n-\t\tfor (i = 0; i != bns->n_seqs; ++i) {\n-\t\t\tbntann1_t *p = bns->anns + i;\n-\t\t\terr_fprintf(fp, "%d %s", p->gi, p->name);\n-\t\t\tif (p->anno[0]) err_fprintf(fp, " %s\\n", p->anno);\n-\t\t\telse err_fprintf(fp, "\\n");\n-\t\t\terr_fprintf(fp, "%lld %d %d\\n", (long long)p->offset, p->len, p->n_ambs);\n-\t\t}\n-\t\terr_fflush(fp);\n-\t\terr_fclose(fp);\n-\t}\n-\t{ // dump .amb\n-\t\tstrcpy(str, prefix); strcat(str, ".amb");\n-\t\tfp = xopen(str, "w");\n-\t\terr_fprintf(fp, "%lld %d %u\\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes);\n-\t\tfor (i = 0; i != bns->n_holes; ++i) {\n-\t\t\tbntamb1_t *p = bns->ambs + i;\n-\t\t\terr_fprintf(fp, "%lld %d %c\\n", (long long)p->offset, p->len, p->amb);\n-\t\t}\n-\t\terr_fflush(fp);\n-\t\terr_fclose(fp);\n-\t}\n-}\n-\n-bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename)\n-{\n-\tchar str[1024];\n-\tFILE *fp;\n-\tconst char *fname;\n-\tbntseq_t *bns;\n-\tlong long xx;\n-\tint i;\n-\tint scanres;\n-\tbns = (bntseq_t*)calloc(1, sizeof(bntseq_t));\n-\t{ // read .ann\n-\t\tfp = xopen(fname = ann_filename, "r");\n-\t\tscanres = fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);\n-\t\tif (scanres != 3) goto badread;\n-\t\tbns->l_pac = xx;\n-\t\tbns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));\n-\t\tfor (i = 0; i < bns->n_seqs; ++i) {\n-\t\t\tbntann1_t *p = bns->anns + i;\n-\t\t\tchar *q = '..b'-\t\t// close .pac file\n-\t\terr_fflush(fp);\n-\t\terr_fclose(fp);\n-\t}\n-\tbns_dump(bns, prefix);\n-\tbns_destroy(bns);\n-\tkseq_destroy(seq);\n-\tfree(pac);\n-\treturn ret;\n-}\n-\n-int bwa_fa2pac(int argc, char *argv[])\n-{\n-\tint c, for_only = 0;\n-\tgzFile fp;\n-\twhile ((c = getopt(argc, argv, "f")) >= 0) {\n-\t\tswitch (c) {\n-\t\t\tcase \'f\': for_only = 1; break;\n-\t\t}\n-\t}\n-\tif (argc == optind) {\n-\t\tfprintf(stderr, "Usage: bwa fa2pac [-f] <in.fasta> [<out.prefix>]\\n");\n-\t\treturn 1;\n-\t}\n-\tfp = xzopen(argv[optind], "r");\n-\tbns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);\n-\terr_gzclose(fp);\n-\treturn 0;\n-}\n-\n-int bns_pos2rid(const bntseq_t *bns, int64_t pos_f)\n-{\n-\tint left, mid, right;\n-\tif (pos_f >= bns->l_pac) return -1;\n-\tleft = 0; mid = 0; right = bns->n_seqs;\n-\twhile (left < right) { // binary search\n-\t\tmid = (left + right) >> 1;\n-\t\tif (pos_f >= bns->anns[mid].offset) {\n-\t\t\tif (mid == bns->n_seqs - 1) break;\n-\t\t\tif (pos_f < bns->anns[mid+1].offset) break; // bracketed\n-\t\t\tleft = mid + 1;\n-\t\t} else right = mid;\n-\t}\n-\treturn mid;\n-}\n-\n-int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re)\n-{\n-\tint is_rev, rid_b, rid_e;\n-\tif (rb < bns->l_pac && re > bns->l_pac) return -2;\n-\trid_b = bns_pos2rid(bns, bns_depos(bns, rb, &is_rev));\n-\trid_e = bns_pos2rid(bns, bns_depos(bns, re, &is_rev) - 1);\n-\treturn rid_b == rid_e? rid_b : -1;\n-}\n-\n-int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)\n-{\n-\tint left, mid, right, nn;\n-\tif (ref_id) *ref_id = bns_pos2rid(bns, pos_f);\n-\tleft = 0; right = bns->n_holes; nn = 0;\n-\twhile (left < right) {\n-\t\tmid = (left + right) >> 1;\n-\t\tif (pos_f >= bns->ambs[mid].offset + bns->ambs[mid].len) left = mid + 1;\n-\t\telse if (pos_f + len <= bns->ambs[mid].offset) right = mid;\n-\t\telse { // overlap\n-\t\t\tif (pos_f >= bns->ambs[mid].offset) {\n-\t\t\t\tnn += bns->ambs[mid].offset + bns->ambs[mid].len < pos_f + len?\n-\t\t\t\t\tbns->ambs[mid].offset + bns->ambs[mid].len - pos_f : len;\n-\t\t\t} else {\n-\t\t\t\tnn += bns->ambs[mid].offset + bns->ambs[mid].len < pos_f + len?\n-\t\t\t\t\tbns->ambs[mid].len : len - (bns->ambs[mid].offset - pos_f);\n-\t\t\t}\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\treturn nn;\n-}\n-\n-uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len)\n-{\n-\tuint8_t *seq = 0;\n-\tif (end < beg) end ^= beg, beg ^= end, end ^= beg; // if end is smaller, swap\n-\tif (end > l_pac<<1) end = l_pac<<1;\n-\tif (beg < 0) beg = 0;\n-\tif (beg >= l_pac || end <= l_pac) {\n-\t\tint64_t k, l = 0;\n-\t\t*len = end - beg;\n-\t\tseq = malloc(end - beg);\n-\t\tif (beg >= l_pac) { // reverse strand\n-\t\t\tint64_t beg_f = (l_pac<<1) - 1 - end;\n-\t\t\tint64_t end_f = (l_pac<<1) - 1 - beg;\n-\t\t\tfor (k = end_f; k > beg_f; --k)\n-\t\t\t\tseq[l++] = 3 - _get_pac(pac, k);\n-\t\t} else { // forward strand\n-\t\t\tfor (k = beg; k < end; ++k)\n-\t\t\t\tseq[l++] = _get_pac(pac, k);\n-\t\t}\n-\t} else *len = 0; // if bridging the forward-reverse boundary, return nothing\n-\treturn seq;\n-}\n-\n-uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid)\n-{\n-\tint64_t far_beg, far_end, len;\n-\tint is_rev;\n-\tuint8_t *seq;\n-\n-\tif (*end < *beg) *end ^= *beg, *beg ^= *end, *end ^= *beg; // if end is smaller, swap\n-\tassert(*beg <= mid && mid < *end);\n-\t*rid = bns_pos2rid(bns, bns_depos(bns, mid, &is_rev));\n-\tfar_beg = bns->anns[*rid].offset;\n-\tfar_end = far_beg + bns->anns[*rid].len;\n-\tif (is_rev) { // flip to the reverse strand\n-\t\tint64_t tmp = far_beg;\n-\t\tfar_beg = (bns->l_pac<<1) - far_end;\n-\t\tfar_end = (bns->l_pac<<1) - tmp;\n-\t}\n-\t*beg = *beg > far_beg? *beg : far_beg;\n-\t*end = *end < far_end? *end : far_end;\n-\tseq = bns_get_seq(bns->l_pac, pac, *beg, *end, &len);\n-\tif (seq == 0 || *end - *beg != len) {\n-\t\tfprintf(stderr, "[E::%s] begin=%ld, mid=%ld, end=%ld, len=%ld, seq=%p, rid=%d, far_beg=%ld, far_end=%ld\\n",\n-\t\t\t\t__func__, (long)*beg, (long)mid, (long)*end, (long)len, seq, *rid, (long)far_beg, (long)far_end);\n-\t}\n-\tassert(seq && *end - *beg == len); // assertion failure should never happen\n-\treturn seq;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bntseq.h --- a/bwa-0.7.9a/bntseq.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,91 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@sanger.ac.uk> */ - -#ifndef BWT_BNTSEQ_H -#define BWT_BNTSEQ_H - -#include <assert.h> -#include <stdint.h> -#include <stdio.h> -#include <zlib.h> - -#ifndef BWA_UBYTE -#define BWA_UBYTE -typedef uint8_t ubyte_t; -#endif - -typedef struct { - int64_t offset; - int32_t len; - int32_t n_ambs; - uint32_t gi; - char *name, *anno; -} bntann1_t; - -typedef struct { - int64_t offset; - int32_t len; - char amb; -} bntamb1_t; - -typedef struct { - int64_t l_pac; - int32_t n_seqs; - uint32_t seed; - bntann1_t *anns; // n_seqs elements - int32_t n_holes; - bntamb1_t *ambs; // n_holes elements - FILE *fp_pac; -} bntseq_t; - -extern unsigned char nst_nt4_table[256]; - -#ifdef __cplusplus -extern "C" { -#endif - - void bns_dump(const bntseq_t *bns, const char *prefix); - bntseq_t *bns_restore(const char *prefix); - bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename); - void bns_destroy(bntseq_t *bns); - int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only); - int bns_pos2rid(const bntseq_t *bns, int64_t pos_f); - int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id); - uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len); - uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid); - int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re); - -#ifdef __cplusplus -} -#endif - -static inline int64_t bns_depos(const bntseq_t *bns, int64_t pos, int *is_rev) -{ - return (*is_rev = (pos >= bns->l_pac))? (bns->l_pac<<1) - 1 - pos : pos; -} - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwa-helper.js --- a/bwa-0.7.9a/bwa-helper.js Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,706 +0,0 @@\n-/*****************************************************************\n- * The K8 Javascript interpreter is required to run this script. *\n- * *\n- * Source code: https://github.com/attractivechaos/k8 *\n- * Binary: http://sourceforge.net/projects/lh3/files/k8/ *\n- * *\n- * Data file used for generating GRCh38 ALT alignments: *\n- * *\n- * http://sourceforge.net/projects/bio-bwa/files/ *\n- *****************************************************************/\n-\n-/******************\n- *** From k8.js ***\n- ******************/\n-\n-var getopt = function(args, ostr) {\n-\tvar oli; // option letter list index\n-\tif (typeof(getopt.place) == \'undefined\')\n-\t\tgetopt.ind = 0, getopt.arg = null, getopt.place = -1;\n-\tif (getopt.place == -1) { // update scanning pointer\n-\t\tif (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != \'-\') {\n-\t\t\tgetopt.place = -1;\n-\t\t\treturn null;\n-\t\t}\n-\t\tif (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == \'-\') { // found "--"\n-\t\t\t++getopt.ind;\n-\t\t\tgetopt.place = -1;\n-\t\t\treturn null;\n-\t\t}\n-\t}\n-\tvar optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity\n-\tif (optopt == \':\' || (oli = ostr.indexOf(optopt)) < 0) {\n-\t\tif (optopt == \'-\') return null; // if the user didn\'t specify \'-\' as an option, assume it means null.\n-\t\tif (getopt.place < 0) ++getopt.ind;\n-\t\treturn \'?\';\n-\t}\n-\tif (oli+1 >= ostr.length || ostr.charAt(++oli) != \':\') { // don\'t need argument\n-\t\tgetopt.arg = null;\n-\t\tif (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;\n-\t} else { // need an argument\n-\t\tif (getopt.place >= 0 && getopt.place < args[getopt.ind].length)\n-\t\t\tgetopt.arg = args[getopt.ind].substr(getopt.place);\n-\t\telse if (args.length <= ++getopt.ind) { // no arg\n-\t\t\tgetopt.place = -1;\n-\t\t\tif (ostr.length > 0 && ostr.charAt(0) == \':\') return \':\';\n-\t\t\treturn \'?\';\n-\t\t} else getopt.arg = args[getopt.ind]; // white space\n-\t\tgetopt.place = -1;\n-\t\t++getopt.ind;\n-\t}\n-\treturn optopt;\n-}\n-\n-function obj2str(o)\n-{\n-\tif (typeof(o) != \'object\') {\n-\t\treturn o.toString();\n-\t} else if (o == null) {\n-\t\treturn "null";\n-\t} else if (Array.isArray(o)) {\n-\t\tvar s = "[";\n-\t\tfor (var i = 0; i < o.length; ++i) {\n-\t\t\tif (i) s += \',\';\n-\t\t\ts += obj2str(o[i]);\n-\t\t}\n-\t\treturn s + "]";\n-\t} else {\n-\t\tvar i = 0, s = "{";\n-\t\tfor (var key in o) {\n-\t\t\tif (i++) s += \',\';\n-\t\t\ts += key + ":";\n-\t\t\ts += obj2str(o[key]);\n-\t\t}\n-\t\treturn s + "}";\n-\t}\n-}\n-\n-Bytes.prototype.reverse = function()\n-{\n-\tfor (var i = 0; i < this.length>>1; ++i) {\n-\t\tvar tmp = this[i];\n-\t\tthis[i] = this[this.length - i - 1];\n-\t\tthis[this.length - i - 1] = tmp;\n-\t}\n-}\n-\n-Bytes.prototype.revcomp = function()\n-{\n-\tif (Bytes.rctab == null) {\n-\t\tvar s1 = \'WSATUGCYRKMBDHVNwsatugcyrkmbdhvn\';\n-\t\tvar s2 = \'WSTAACGRYMKVHDBNwstaacgrymkvhdbn\';\n-\t\tBytes.rctab = [];\n-\t\tfor (var i = 0; i < 256; ++i) Bytes.rctab[i] = 0;\n-\t\tfor (var i = 0; i < s1.length; ++i)\n-\t\t\tBytes.rctab[s1.charCodeAt(i)] = s2.charCodeAt(i);\n-\t}\n-\tfor (var i = 0; i < this.length>>1; ++i) {\n-\t\tvar tmp = this[this.length - i - 1];\n-\t\tthis[this.length - i - 1] = Bytes.rctab[this[i]];\n-\t\tthis[i] = Bytes.rctab[tmp];\n-\t}\n-\tif (this.length>>1)\n-\t\tthis[this.length>>1] = Bytes.rctab[this[this.length>>1]];\n-}\n-\n-/************************\n- *** command markovlp ***\n- ************************/\n-\n-function bwa_markOvlp(args)\n-{\n-\tvar c, min_aln_ratio = .9, min_ext = 50;\n-\twhile ((c = getopt(args, "r:e:")) != null) {\n-\t\tif (c == \'r\') min_aln_ratio = parseFloat(getopt.arg);\n-\t\telse if (c == \'e\') min_ext = parseInt(getopt.arg);\n-\t}\n-\n-\tvar file = args.length == getopt.ind? new File() : new File(args[getopt.ind]);\n-\tvar buf = new Bytes();\n-\tvar dir4 = [\'>>\', \'><\', \'<>\', \'<<\'];\n-\n-\twhile (file.readline(buf) >= 0) '..b'g = g;\n-\t\t\tend = end > hits[i].pend? end : hits[i].pend;\n-\t\t}\n-\t\tvar reported_g = null, reported_i = null;\n-\t\tfor (var i = 0; i < hits.length; ++i)\n-\t\t\tif (hits[i].i == 0)\n-\t\t\t\treported_g = hits[i].g, reported_i = i;\n-\t\tvar n_group0 = 0; // #hits overlapping the reported hit\n-\t\tfor (var i = 0; i < hits.length; ++i)\n-\t\t\tif (hits[i].g == reported_g)\n-\t\t\t\t++n_group0;\n-\t\tif (n_group0 == 1) { // then keep the reported alignment and mapQ\n-\t\t\tif (opt.verbose < 4) print(line);\n-\t\t\tcontinue;\n-\t\t}\n-\n-\t\t// re-estimate mapQ\n-\t\tvar group_max = [];\n-\t\tfor (var i = 0; i < hits.length; ++i) {\n-\t\t\tvar g = hits[i].g;\n-\t\t\tif (group_max[g] == null || group_max[g][0] < hits[i].score)\n-\t\t\t\tgroup_max[g] = [hits[i].score, g];\n-\t\t}\n-\t\tif (group_max.length > 1)\n-\t\t\tgroup_max.sort(function(x,y) {return y[0]-x[0]});\n-\t\tvar mapQ;\n-\t\tif (group_max[0][1] == reported_g) { // the best hit is the hit reported in SAM\n-\t\t\tmapQ = group_max.length == 1? 60 : 6 * (group_max[0][0] - group_max[1][0]);\n-\t\t} else mapQ = 0;\n-\t\tmapQ = mapQ < 60? mapQ : 60;\n-\t\tvar ori_mapQ = parseInt(t[4]);\n-\t\tmapQ = mapQ > ori_mapQ? mapQ : ori_mapQ;\n-\n-\t\t// generate lifted_str\n-\t\tfor (var i = 0; i < hits.length; ++i) {\n-\t\t\tif (hits[i].lifted && hits[i].lifted.length) {\n-\t\t\t\tvar lifted = hits[i].lifted;\n-\t\t\t\tvar u = \'\';\n-\t\t\t\tfor (var j = 0; j < lifted.length; ++j)\n-\t\t\t\t\tu += lifted[j][0] + "," + lifted[j][2] + "," + lifted[j][3] + "," + (lifted[j][1]?\'-\':\'+\') + ";";\n-\t\t\t\thits[i].lifted_str = u;\n-\t\t\t}\n-\t\t}\n-\n-\t\t// generate reversed quality and reverse-complemented sequence if necessary\n-\t\tvar rs = null, rq = null; // reversed quality and reverse complement sequence\n-\t\tvar need_rev = false;\n-\t\tfor (var i = 0; i < hits.length; ++i) {\n-\t\t\tif (hits[i].g != reported_g || i == reported_i) continue;\n-\t\t\tif (hits[i].rev != hits[reported_i].rev)\n-\t\t\t\tneed_rev = true;\n-\t\t}\n-\t\tif (need_rev) { // reverse and reverse complement\n-\t\t\taux.set(t[9], 0); aux.revcomp(); rs = aux.toString();\n-\t\t\taux.set(t[10],0); aux.reverse(); rq = aux.toString();\n-\t\t}\n-\n-\t\t// print\n-\t\tt[4] = mapQ;\n-\t\tt.push("om:i:"+ori_mapQ);\n-\t\tif (hits[reported_i].lifted_str) t.push("lt:Z:" + hits[reported_i].lifted_str);\n-\t\tprint(t.join("\\t"));\n-\t\tvar cnt = 0;\n-\t\tfor (var i = 0; i < hits.length; ++i) {\n-\t\t\tif (opt.verbose >= 5) print(obj2str(hits[i]));\n-\t\t\tif (hits[i].g != reported_g || i == reported_i) continue;\n-\t\t\tvar s = [t[0], flag&0xf10, hits[i].ctg, hits[i].start+1, mapQ, hits[i].cigar, \'*\', 0, 0];\n-\t\t\t// update name\n-\t\t\tif (flag&0x40) s[0] += "/1";\n-\t\t\tif (flag&0x80) s[0] += "/2";\n-\t\t\ts[0] += "_" + (++cnt);\n-\t\t\tif (hits[i].rev == hits[reported_i].rev) s.push(t[9], t[10]);\n-\t\t\telse s.push(rs, rq);\n-\t\t\ts.push("NM:i:" + hits[i].NM);\n-\t\t\tif (hits[i].lifted_str) s.push("lt:Z:" + hits[i].lifted_str);\n-\t\t\tprint(s.join("\\t"));\n-\t\t}\n-\t}\n-\tfile.close();\n-\n-\taux.destroy();\n-\tbuf.destroy();\n-}\n-\n-/*********************\n- *** Main function ***\n- *********************/\n-\n-function main(args)\n-{\n-\tif (args.length == 0) {\n-\t\tprint("\\nUsage: k8 bwa-helper.js <command> [arguments]\\n");\n-\t\tprint("Commands: genalt generate ALT alignments");\n-\t\tprint(" sam2pas convert SAM to pairwise alignment summary format (PAS)");\n-\t\tprint(" pas2reg extract covered regions");\n-\t\tprint(" reg2cut regions to extract for the 2nd round bwa-mem");\n-\t\tprint(" markovlp identify bi-directional overlaps");\n-\t\tprint(" gff2sam convert GFF3 alignment to SAM");\n-\t\tprint(" shortname shorten sequence name after subseq (PacBio read names only)");\n-\t\tprint("");\n-\t\texit(1);\n-\t}\n-\n-\tvar cmd = args.shift();\n-\tif (cmd == \'sam2pas\') bwa_sam2pas(args);\n-\telse if (cmd == \'gff2sam\') bwa_gff2sam(args);\n-\telse if (cmd == \'markovlp\') bwa_markOvlp(args);\n-\telse if (cmd == \'pas2reg\') bwa_pas2reg(args);\n-\telse if (cmd == \'reg2cut\') bwa_reg2cut(args);\n-\telse if (cmd == \'genalt\') bwa_genalt(args);\n-\telse if (cmd == \'shortname\') bwa_shortname(args);\n-\telse warn("Unrecognized command");\n-}\n-\n-main(arguments);\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwa.1 --- a/bwa-0.7.9a/bwa.1 Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,782 +0,0 @@\n-.TH bwa 1 "19 May 2014" "bwa-0.7.9-r783" "Bioinformatics tools"\n-.SH NAME\n-.PP\n-bwa - Burrows-Wheeler Alignment Tool\n-.SH SYNOPSIS\n-.PP\n-bwa index ref.fa\n-.PP\n-bwa mem ref.fa reads.fq > aln-se.sam\n-.PP\n-bwa mem ref.fa read1.fq read2.fq > aln-pe.sam\n-.PP\n-bwa aln ref.fa short_read.fq > aln_sa.sai\n-.PP\n-bwa samse ref.fa aln_sa.sai short_read.fq > aln-se.sam\n-.PP\n-bwa sampe ref.fa aln_sa1.sai aln_sa2.sai read1.fq read2.fq > aln-pe.sam\n-.PP\n-bwa bwasw ref.fa long_read.fq > aln.sam\n-\n-.SH DESCRIPTION\n-.PP\n-BWA is a software package for mapping low-divergent sequences against a large\n-reference genome, such as the human genome. It consists of three algorithms:\n-BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina\n-sequence reads up to 100bp, while the rest two for longer sequences ranged from\n-70bp to 1Mbp. BWA-MEM and BWA-SW share similar features such as long-read\n-support and split alignment, but BWA-MEM, which is the latest, is generally\n-recommended for high-quality queries as it is faster and more accurate.\n-BWA-MEM also has better performance than BWA-backtrack for 70-100bp Illumina\n-reads.\n-\n-For all the algorithms, BWA first needs to construct the FM-index for\n-the reference genome (the\n-.B index\n-command). Alignment algorithms are invoked with different sub-commands:\n-.BR aln / samse / sampe\n-for BWA-backtrack,\n-.B bwasw\n-for BWA-SW and\n-.B mem\n-for the BWA-MEM algorithm.\n-\n-.SH COMMANDS AND OPTIONS\n-.TP\n-.B index\n-.B bwa index\n-.RB [ -p\n-.IR prefix ]\n-.RB [ -a\n-.IR algoType ]\n-.I db.fa\n-\n-Index database sequences in the FASTA format.\n-\n-.B OPTIONS:\n-.RS\n-.TP 10\n-.BI -p \\ STR\n-Prefix of the output database [same as db filename]\n-.TP\n-.BI -a \\ STR\n-Algorithm for constructing BWT index. BWA implements two algorithms for BWT\n-construction:\n-.B is\n-and\n-.BR bwtsw .\n-The first algorithm is a little faster for small database but requires large\n-RAM and does not work for databases with total length longer than 2GB. The\n-second algorithm is adapted from the BWT-SW source code. It in theory works\n-with database with trillions of bases. When this option is not specified, the\n-appropriate algorithm will be chosen automatically.\n-.RE\n-\n-.TP\n-.B mem\n-.B bwa mem\n-.RB [ -aCHMpP ]\n-.RB [ -t\n-.IR nThreads ]\n-.RB [ -k\n-.IR minSeedLen ]\n-.RB [ -w\n-.IR bandWidth ]\n-.RB [ -d\n-.IR zDropoff ]\n-.RB [ -r\n-.IR seedSplitRatio ]\n-.RB [ -c\n-.IR maxOcc ]\n-.RB [ -A\n-.IR matchScore ]\n-.RB [ -B\n-.IR mmPenalty ]\n-.RB [ -O\n-.IR gapOpenPen ]\n-.RB [ -E\n-.IR gapExtPen ]\n-.RB [ -L\n-.IR clipPen ]\n-.RB [ -U\n-.IR unpairPen ]\n-.RB [ -R\n-.IR RGline ]\n-.RB [ -v\n-.IR verboseLevel ]\n-.I db.prefix\n-.I reads.fq\n-.RI [ mates.fq ]\n-\n-Align 70bp-1Mbp query sequences with the BWA-MEM algorithm. Briefly, the\n-algorithm works by seeding alignments with maximal exact matches (MEMs) and\n-then extending seeds with the affine-gap Smith-Waterman algorithm (SW).\n-\n-If\n-.I mates.fq\n-file is absent and option\n-.B -p\n-is not set, this command regards input reads are single-end. If\n-.I mates.fq\n-is present, this command assumes the\n-.IR i -th\n-read in\n-.I reads.fq\n-and the\n-.IR i -th\n-read in\n-.I mates.fq\n-constitute a read pair. If\n-.B -p\n-is used, the command assumes the\n-.RI 2 i -th\n-and the\n-.RI (2 i +1)-th\n-read in\n-.I reads.fq\n-constitute a read pair (such input file is said to be interleaved). In this case,\n-.I mates.fq\n-is ignored. In the paired-end mode, the\n-.B mem\n-command will infer the read orientation and the insert size distribution from a\n-batch of reads.\n-\n-The BWA-MEM algorithm performs local alignment. It may produce multiple primary\n-alignments for different part of a query sequence. This is a crucial feature\n-for long sequences. However, some tools such as Picard\'s markDuplicates does\n-not work with split alignments. One may consider to use option\n-.B -M\n-to flag shorter split hits as secondary.\n-\n-.RS\n-.TP 10\n-.B ALGORITHM OPTIONS:\n-.TP\n-.BI -t \\ INT\n-Number of threads [1]\n-.TP\n-.BI -k \\ INT\n-Minimum seed length. Match'..b'is feature makes it possible to integrate the forward and reverse complemented\n-genome in one FM-index, which speeds up both BWA-short and BWA-SW. As a tradeoff,\n-BWA uses more memory because it has to keep all positions and ranks in 64-bit\n-integers, twice larger than 32-bit integers used in the previous versions.\n-\n-The latest BWA-SW also works for paired-end reads longer than 100bp. In\n-comparison to BWA-short, BWA-SW tends to be more accurate for highly unique\n-reads and more robust to relative long INDELs and structural variants.\n-Nonetheless, BWA-short usually has higher power to distinguish the optimal hit\n-from many suboptimal hits. The choice of the mapping algorithm may depend on\n-the application.\n-\n-.SH SEE ALSO\n-BWA website <http://bio-bwa.sourceforge.net>, Samtools website\n-<http://samtools.sourceforge.net>\n-\n-.SH AUTHOR\n-Heng Li at the Sanger Institute wrote the key source codes and\n-integrated the following codes for BWT construction: bwtsw\n-<http://i.cs.hku.hk/~ckwong3/bwtsw/>, implemented by Chi-Kwong Wong at\n-the University of Hong Kong and IS\n-<http://yuta.256.googlepages.com/sais> originally proposed by Nong Ge\n-<http://www.cs.sysu.edu.cn/nong/> at the Sun Yat-Sen University and\n-implemented by Yuta Mori.\n-\n-.SH LICENSE AND CITATION\n-.PP\n-The full BWA package is distributed under GPLv3 as it uses source codes\n-from BWT-SW which is covered by GPL. Sorting, hash table, BWT and IS\n-libraries are distributed under the MIT license.\n-.PP\n-If you use the BWA-backtrack algorithm, please cite the following\n-paper:\n-.PP\n-Li H. and Durbin R. (2009) Fast and accurate short read alignment with\n-Burrows-Wheeler transform. Bioinformatics, 25, 1754-1760. [PMID: 19451168]\n-.PP\n-If you use the BWA-SW algorithm, please cite:\n-.PP\n-Li H. and Durbin R. (2010) Fast and accurate long-read alignment with\n-Burrows-Wheeler transform. Bioinformatics, 26, 589-595. [PMID: 20080505]\n-.PP\n-If you use BWA-MEM or the fastmap component of BWA, please cite:\n-.PP\n-Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs with\n-BWA-MEM. arXiv:1303.3997v1 [q-bio.GN].\n-.PP\n-It is likely that the BWA-MEM manuscript will not appear in a peer-reviewed\n-journal.\n-\n-.SH HISTORY\n-BWA is largely influenced by BWT-SW. It uses source codes from BWT-SW\n-and mimics its binary file formats; BWA-SW resembles BWT-SW in several\n-ways. The initial idea about BWT-based alignment also came from the\n-group who developed BWT-SW. At the same time, BWA is different enough\n-from BWT-SW. The short-read alignment algorithm bears no similarity to\n-Smith-Waterman algorithm any more. While BWA-SW learns from BWT-SW, it\n-introduces heuristics that can hardly be applied to the original\n-algorithm. In all, BWA does not guarantee to find all local hits as what\n-BWT-SW is designed to do, but it is much faster than BWT-SW on both\n-short and long query sequences.\n-\n-I started to write the first piece of codes on 24 May 2008 and got the\n-initial stable version on 02 June 2008. During this period, I was\n-acquainted that Professor Tak-Wah Lam, the first author of BWT-SW paper,\n-was collaborating with Beijing Genomics Institute on SOAP2, the successor\n-to SOAP (Short Oligonucleotide Analysis Package). SOAP2 has come out in\n-November 2008. According to the SourceForge download page, the third\n-BWT-based short read aligner, bowtie, was first released in August\n-2008. At the time of writing this manual, at least three more BWT-based\n-short-read aligners are being implemented.\n-\n-The BWA-SW algorithm is a new component of BWA. It was conceived in\n-November 2008 and implemented ten months later.\n-\n-The BWA-MEM algorithm is based on an algorithm finding super-maximal exact\n-matches (SMEMs), which was first published with the fermi assembler paper\n-in 2012. I first implemented the basic SMEM algorithm in the\n-.B fastmap\n-command for an experiment and then extended the basic algorithm and added the\n-extension part in Feburary 2013 to make BWA-MEM a fully featured mapper.\n-\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwa.c --- a/bwa-0.7.9a/bwa.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,321 +0,0 @@\n-#include <string.h>\n-#include <stdio.h>\n-#include <zlib.h>\n-#include <assert.h>\n-#include "bntseq.h"\n-#include "bwa.h"\n-#include "ksw.h"\n-#include "utils.h"\n-#include "kstring.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-int bwa_verbose = 3;\n-char bwa_rg_id[256];\n-\n-/************************\n- * Batch FASTA/Q reader *\n- ************************/\n-\n-#include "kseq.h"\n-KSEQ_DECLARE(gzFile)\n-\n-static inline void trim_readno(kstring_t *s)\n-{\n-\tif (s->l > 2 && s->s[s->l-2] == \'/\' && isdigit(s->s[s->l-1]))\n-\t\ts->l -= 2, s->s[s->l] = 0;\n-}\n-\n-static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)\n-{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice\n-\ts->name = strdup(ks->name.s);\n-\ts->comment = ks->comment.l? strdup(ks->comment.s) : 0;\n-\ts->seq = strdup(ks->seq.s);\n-\ts->qual = ks->qual.l? strdup(ks->qual.s) : 0;\n-\ts->l_seq = strlen(s->seq);\n-}\n-\n-bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)\n-{\n-\tkseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;\n-\tint size = 0, m, n;\n-\tbseq1_t *seqs;\n-\tm = n = 0; seqs = 0;\n-\twhile (kseq_read(ks) >= 0) {\n-\t\tif (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads\n-\t\t\tfprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\\n", __func__);\n-\t\t\tbreak;\n-\t\t}\n-\t\tif (n >= m) {\n-\t\t\tm = m? m<<1 : 256;\n-\t\t\tseqs = realloc(seqs, m * sizeof(bseq1_t));\n-\t\t}\n-\t\ttrim_readno(&ks->name);\n-\t\tkseq2bseq1(ks, &seqs[n]);\n-\t\tsize += seqs[n++].l_seq;\n-\t\tif (ks2) {\n-\t\t\ttrim_readno(&ks2->name);\n-\t\t\tkseq2bseq1(ks2, &seqs[n]);\n-\t\t\tsize += seqs[n++].l_seq;\n-\t\t}\n-\t\tif (size >= chunk_size && (n&1) == 0) break;\n-\t}\n-\tif (size == 0) { // test if the 2nd file is finished\n-\t\tif (ks2 && kseq_read(ks2) >= 0)\n-\t\t\tfprintf(stderr, "[W::%s] the 1st file has fewer sequences.\\n", __func__);\n-\t}\n-\t*n_ = n;\n-\treturn seqs;\n-}\n-\n-/*****************\n- * CIGAR related *\n- *****************/\n-\n-void bwa_fill_scmat(int a, int b, int8_t mat[25])\n-{\n-\tint i, j, k;\n-\tfor (i = k = 0; i < 4; ++i) {\n-\t\tfor (j = 0; j < 4; ++j)\n-\t\t\tmat[k++] = i == j? a : -b;\n-\t\tmat[k++] = -1; // ambiguous base\n-\t}\n-\tfor (j = 0; j < 5; ++j) mat[k++] = -1;\n-}\n-\n-// Generate CIGAR when the alignment end points are known\n-uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM)\n-{\n-\tuint32_t *cigar = 0;\n-\tuint8_t tmp, *rseq;\n-\tint i;\n-\tint64_t rlen;\n-\tkstring_t str;\n-\tconst char *int2base;\n-\n-\tif (n_cigar) *n_cigar = 0;\n-\tif (NM) *NM = -1;\n-\tif (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand\n-\trseq = bns_get_seq(l_pac, pac, rb, re, &rlen);\n-\tif (re - rb != rlen) goto ret_gen_cigar; // possible if out of range\n-\tif (rb >= l_pac) { // then reverse both query and rseq; this is to ensure indels to be placed at the leftmost position\n-\t\tfor (i = 0; i < l_query>>1; ++i)\n-\t\t\ttmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;\n-\t\tfor (i = 0; i < rlen>>1; ++i)\n-\t\t\ttmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], rseq[rlen - 1 - i] = tmp;\n-\t}\n-\tif (l_query == re - rb && w_ == 0) { // no gap; no need to do DP\n-\t\t// UPDATE: we come to this block now... FIXME: due to an issue in mem_reg2aln(), we never come to this block. This does not affect accuracy, but it hurts performance.\n-\t\tif (n_cigar) {\n-\t\t\tcigar = malloc(4);\n-\t\t\tcigar[0] = l_query<<4 | 0;\n-\t\t\t*n_cigar = 1;\n-\t\t}\n-\t\tfor (i = 0, *score = 0; i < l_query; ++i)\n-\t\t\t*score += mat[rseq[i]*5 + query[i]];\n-\t} else {\n-\t\tint w, max_gap, max_ins, max_del, min_w;\n-\t\t// set the band-width\n-\t\tmax_ins = (int)((double)(((l_query+1)>>1) * mat[0] - o_ins) / e_ins + 1.);\n-\t\tmax_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.);\n-\t\tmax_gap = max_ins > max_del? max_ins : max_del;\n-\t\tmax_gap = max_gap > 1? max_gap : 1;\n-\t\tw = '..b' < l_query>>1; ++i)\n-\t\t\ttmp = query[i], query[i] = query[l_query - 1 - i], query[l_query - 1 - i] = tmp;\n-\n-ret_gen_cigar:\n-\tfree(rseq);\n-\treturn cigar;\n-}\n-\n-uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM)\n-{\n-\treturn bwa_gen_cigar2(mat, q, r, q, r, w_, l_pac, pac, l_query, query, rb, re, score, n_cigar, NM);\n-}\n-\n-/*********************\n- * Full index reader *\n- *********************/\n-\n-char *bwa_idx_infer_prefix(const char *hint)\n-{\n-\tchar *prefix;\n-\tint l_hint;\n-\tFILE *fp;\n-\tl_hint = strlen(hint);\n-\tprefix = malloc(l_hint + 3 + 4 + 1);\n-\tstrcpy(prefix, hint);\n-\tstrcpy(prefix + l_hint, ".64.bwt");\n-\tif ((fp = fopen(prefix, "rb")) != 0) {\n-\t\tfclose(fp);\n-\t\tprefix[l_hint + 3] = 0;\n-\t\treturn prefix;\n-\t} else {\n-\t\tstrcpy(prefix + l_hint, ".bwt");\n-\t\tif ((fp = fopen(prefix, "rb")) == 0) {\n-\t\t\tfree(prefix);\n-\t\t\treturn 0;\n-\t\t} else {\n-\t\t\tfclose(fp);\n-\t\t\tprefix[l_hint] = 0;\n-\t\t\treturn prefix;\n-\t\t}\n-\t}\n-}\n-\n-bwt_t *bwa_idx_load_bwt(const char *hint)\n-{\n-\tchar *tmp, *prefix;\n-\tbwt_t *bwt;\n-\tprefix = bwa_idx_infer_prefix(hint);\n-\tif (prefix == 0) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\\n", __func__);\n-\t\treturn 0;\n-\t}\n-\ttmp = calloc(strlen(prefix) + 5, 1);\n-\tstrcat(strcpy(tmp, prefix), ".bwt"); // FM-index\n-\tbwt = bwt_restore_bwt(tmp);\n-\tstrcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)\n-\tbwt_restore_sa(tmp, bwt);\n-\tfree(tmp); free(prefix);\n-\treturn bwt;\n-}\n-\n-bwaidx_t *bwa_idx_load(const char *hint, int which)\n-{\n-\tbwaidx_t *idx;\n-\tchar *prefix;\n-\tprefix = bwa_idx_infer_prefix(hint);\n-\tif (prefix == 0) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\\n", __func__);\n-\t\treturn 0;\n-\t}\n-\tidx = calloc(1, sizeof(bwaidx_t));\n-\tif (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint);\n-\tif (which & BWA_IDX_BNS) {\n-\t\tidx->bns = bns_restore(prefix);\n-\t\tif (which & BWA_IDX_PAC) {\n-\t\t\tidx->pac = calloc(idx->bns->l_pac/4+1, 1);\n-\t\t\terr_fread_noeof(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence\n-\t\t\terr_fclose(idx->bns->fp_pac);\n-\t\t\tidx->bns->fp_pac = 0;\n-\t\t}\n-\t}\n-\tfree(prefix);\n-\treturn idx;\n-}\n-\n-void bwa_idx_destroy(bwaidx_t *idx)\n-{\n-\tif (idx == 0) return;\n-\tif (idx->bwt) bwt_destroy(idx->bwt);\n-\tif (idx->bns) bns_destroy(idx->bns);\n-\tif (idx->pac) free(idx->pac);\n-\tfree(idx);\n-}\n-\n-/***********************\n- * SAM header routines *\n- ***********************/\n-\n-void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line)\n-{\n-\tint i;\n-\textern char *bwa_pg;\n-\tfor (i = 0; i < bns->n_seqs; ++i)\n-\t\terr_printf("@SQ\\tSN:%s\\tLN:%d\\n", bns->anns[i].name, bns->anns[i].len);\n-\tif (rg_line) err_printf("%s\\n", rg_line);\n-\terr_printf("%s\\n", bwa_pg);\n-}\n-\n-static char *bwa_escape(char *s)\n-{\n-\tchar *p, *q;\n-\tfor (p = q = s; *p; ++p) {\n-\t\tif (*p == \'\\\\\') {\n-\t\t\t++p;\n-\t\t\tif (*p == \'t\') *q++ = \'\\t\';\n-\t\t\telse if (*p == \'n\') *q++ = \'\\n\';\n-\t\t\telse if (*p == \'r\') *q++ = \'\\r\';\n-\t\t\telse if (*p == \'\\\\\') *q++ = \'\\\\\';\n-\t\t} else *q++ = *p;\n-\t}\n-\t*q = \'\\0\';\n-\treturn s;\n-}\n-\n-char *bwa_set_rg(const char *s)\n-{\n-\tchar *p, *q, *r, *rg_line = 0;\n-\tmemset(bwa_rg_id, 0, 256);\n-\tif (strstr(s, "@RG") != s) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] the read group line is not started with @RG\\n", __func__);\n-\t\tgoto err_set_rg;\n-\t}\n-\trg_line = strdup(s);\n-\tbwa_escape(rg_line);\n-\tif ((p = strstr(rg_line, "\\tID:")) == 0) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] no ID at the read group line\\n", __func__);\n-\t\tgoto err_set_rg;\n-\t}\n-\tp += 4;\n-\tfor (q = p; *q && *q != \'\\t\' && *q != \'\\n\'; ++q);\n-\tif (q - p + 1 > 256) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] @RG:ID is longer than 255 characters\\n", __func__);\n-\t\tgoto err_set_rg;\n-\t}\n-\tfor (q = p, r = bwa_rg_id; *q && *q != \'\\t\' && *q != \'\\n\'; ++q)\n-\t\t*r++ = *q;\n-\treturn rg_line;\n-\n-err_set_rg:\n-\tfree(rg_line);\n-\treturn 0;\n-}\n-\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwa.h --- a/bwa-0.7.9a/bwa.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,50 +0,0 @@ -#ifndef BWA_H_ -#define BWA_H_ - -#include <stdint.h> -#include "bntseq.h" -#include "bwt.h" - -#define BWA_IDX_BWT 0x1 -#define BWA_IDX_BNS 0x2 -#define BWA_IDX_PAC 0x4 -#define BWA_IDX_ALL 0x7 - -typedef struct { - bwt_t *bwt; // FM-index - bntseq_t *bns; // information on the reference sequences - uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base -} bwaidx_t; - -typedef struct { - int l_seq; - char *name, *comment, *seq, *qual, *sam; -} bseq1_t; - -extern int bwa_verbose; -extern char bwa_rg_id[256]; - -#ifdef __cplusplus -extern "C" { -#endif - - bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_); - - void bwa_fill_scmat(int a, int b, int8_t mat[25]); - uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); - uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM); - - char *bwa_idx_infer_prefix(const char *hint); - bwt_t *bwa_idx_load_bwt(const char *hint); - - bwaidx_t *bwa_idx_load(const char *hint, int which); - void bwa_idx_destroy(bwaidx_t *idx); - - void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line); - char *bwa_set_rg(const char *s); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwape.c --- a/bwa-0.7.9a/bwape.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,783 +0,0 @@\n-#include <unistd.h>\n-#include <math.h>\n-#include <stdlib.h>\n-#include <time.h>\n-#include <stdio.h>\n-#include <string.h>\n-#include "bwtaln.h"\n-#include "kvec.h"\n-#include "bntseq.h"\n-#include "utils.h"\n-#include "bwase.h"\n-#include "bwa.h"\n-#include "ksw.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-typedef struct {\n-\tint n;\n-\tbwtint_t *a;\n-} poslist_t;\n-\n-typedef struct {\n-\tdouble avg, std, ap_prior;\n-\tbwtint_t low, high, high_bayesian;\n-} isize_info_t;\n-\n-#define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y)\n-#define b128_hash(a) ((uint32_t)(a).x)\n-\n-#include "khash.h"\n-KHASH_INIT(b128, pair64_t, poslist_t, 1, b128_hash, b128_eq)\n-\n-typedef struct {\n-\tpair64_v arr;\n-\tpair64_v pos[2];\n-\tkvec_t(bwt_aln1_t) aln[2];\n-} pe_data_t;\n-\n-#define MIN_HASH_WIDTH 1000\n-\n-extern int g_log_n[256]; // in bwase.c\n-static kh_b128_t *g_hash;\n-\n-void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi);\n-void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);\n-int bwa_approx_mapQ(const bwa_seq_t *p, int mm);\n-void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2);\n-bntseq_t *bwa_open_nt(const char *prefix);\n-void bwa_print_sam_SQ(const bntseq_t *bns);\n-\n-pe_opt_t *bwa_init_pe_opt()\n-{\n-\tpe_opt_t *po;\n-\tpo = (pe_opt_t*)calloc(1, sizeof(pe_opt_t));\n-\tpo->max_isize = 500;\n-\tpo->force_isize = 0;\n-\tpo->max_occ = 100000;\n-\tpo->n_multi = 3;\n-\tpo->N_multi = 10;\n-\tpo->type = BWA_PET_STD;\n-\tpo->is_sw = 1;\n-\tpo->ap_prior = 1e-5;\n-\treturn po;\n-}\n-/*\n-static double ierfc(double x) // inverse erfc(); iphi(x) = M_SQRT2 *ierfc(2 * x);\n-{\n-\tconst double a = 0.140012;\n-\tdouble b, c;\n-\tb = log(x * (2 - x));\n-\tc = 2./M_PI/a + b / 2.;\n-\treturn sqrt(sqrt(c * c - b / a) - c);\n-}\n-*/\n-\n-// for normal distribution, this is about 3std\n-#define OUTLIER_BOUND 2.0\n-\n-static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double ap_prior, int64_t L)\n-{\n-\tuint64_t x, *isizes, n_ap = 0;\n-\tint n, i, tot, p25, p75, p50, max_len = 1, tmp;\n-\tdouble skewness = 0.0, kurtosis = 0.0, y;\n-\n-\tii->avg = ii->std = -1.0;\n-\tii->low = ii->high = ii->high_bayesian = 0;\n-\tisizes = (uint64_t*)calloc(n_seqs, 8);\n-\tfor (i = 0, tot = 0; i != n_seqs; ++i) {\n-\t\tbwa_seq_t *p[2];\n-\t\tp[0] = seqs[0] + i; p[1] = seqs[1] + i;\n-\t\tif (p[0]->mapQ >= 20 && p[1]->mapQ >= 20) {\n-\t\t\tx = (p[0]->pos < p[1]->pos)? p[1]->pos + p[1]->len - p[0]->pos : p[0]->pos + p[0]->len - p[1]->pos;\n-\t\t\tif (x < 100000) isizes[tot++] = x;\n-\t\t}\n-\t\tif (p[0]->len > max_len) max_len = p[0]->len;\n-\t\tif (p[1]->len > max_len) max_len = p[1]->len;\n-\t}\n-\tif (tot < 20) {\n-\t\tfprintf(stderr, "[infer_isize] fail to infer insert size: too few good pairs\\n");\n-\t\tfree(isizes);\n-\t\treturn -1;\n-\t}\n-\tks_introsort_64(tot, isizes);\n-\tp25 = isizes[(int)(tot*0.25 + 0.5)];\n-\tp50 = isizes[(int)(tot*0.50 + 0.5)];\n-\tp75 = isizes[(int)(tot*0.75 + 0.5)];\n-\ttmp = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);\n-\tii->low = tmp > max_len? tmp : max_len; // ii->low is unsigned\n-\tii->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);\n-\tif (ii->low > ii->high) {\n-\t\tfprintf(stderr, "[infer_isize] fail to infer insert size: upper bound is smaller than read length\\n");\n-\t\tfree(isizes);\n-\t\treturn -1;\n-\t}\n-\tfor (i = 0, x = n = 0; i < tot; ++i)\n-\t\tif (isizes[i] >= ii->low && isizes[i] <= ii->high)\n-\t\t\t++n, x += isizes[i];\n-\tii->avg = (double)x / n;\n-\tfor (i = 0; i < tot; ++i) {\n-\t\tif (isizes[i] >= ii->low && isizes[i] <= ii->high) {\n-\t\t\tdouble tmp = (isizes[i] - ii->avg) * (isizes[i] - ii->avg);\n-\t\t\tii->std += tmp;\n-\t\t\tskewness += tmp * (isizes[i] - ii->avg);\n-\t\t\tkurtosis += tmp * tmp;\n-\t\t}\n-\t}\n-\tkurtosis = kurtosis/n / (ii->std / n * ii->std / n) - 3;\n-\tii->std = sqrt(ii->std / n); // it would be better as n-1, but n is usually very large\n-\tskewness = skewness / n / (ii->std * ii->std * ii->std);\n-\tfor (y = 1.0; y < 10.0; y += 0.01)\n-\t\tif (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) '..b'ntf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();\n-\n-\t\tfprintf(stderr, "[bwa_sai2sam_pe_core] refine gapped alignments... ");\n-\t\tfor (j = 0; j < 2; ++j)\n-\t\t\tbwa_refine_gapped(bns, n_seqs, seqs[j], pacseq);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();\n-\t\tif (pac == 0) free(pacseq);\n-\n-\t\tfprintf(stderr, "[bwa_sai2sam_pe_core] print alignments... ");\n-\t\tfor (i = 0; i < n_seqs; ++i) {\n-\t\t\tbwa_seq_t *p[2];\n-\t\t\tp[0] = seqs[0] + i; p[1] = seqs[1] + i;\n-\t\t\tif (p[0]->bc[0] || p[1]->bc[0]) {\n-\t\t\t\tstrcat(p[0]->bc, p[1]->bc);\n-\t\t\t\tstrcpy(p[1]->bc, p[0]->bc);\n-\t\t\t}\n-\t\t\tbwa_print_sam1(bns, p[0], p[1], opt.mode, opt.max_top2);\n-\t\t\tbwa_print_sam1(bns, p[1], p[0], opt.mode, opt.max_top2);\n-\t\t\tif (strcmp(p[0]->name, p[1]->name) != 0) err_fatal(__func__, "paired reads have different names: \\"%s\\", \\"%s\\"\\n", p[0]->name, p[1]->name);\n-\t\t}\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();\n-\n-\t\tfor (j = 0; j < 2; ++j)\n-\t\t\tbwa_free_read_seq(n_seqs, seqs[j]);\n-\t\tfprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\\n", tot_seqs);\n-\t\tlast_ii = ii;\n-\t}\n-\n-\t// destroy\n-\tbns_destroy(bns);\n-\tfor (i = 0; i < 2; ++i) {\n-\t\tbwa_seq_close(ks[i]);\n-\t\terr_fclose(fp_sa[i]);\n-\t}\n-\tfor (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)\n-\t\tif (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);\n-\tkh_destroy(b128, g_hash);\n-\tif (pac) {\n-\t\tfree(pac); bwt_destroy(bwt);\n-\t}\n-}\n-\n-int bwa_sai2sam_pe(int argc, char *argv[])\n-{\n-\tint c;\n-\tpe_opt_t *popt;\n-\tchar *prefix, *rg_line = 0;\n-\n-\tpopt = bwa_init_pe_opt();\n-\twhile ((c = getopt(argc, argv, "a:o:sPn:N:c:f:Ar:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'r\':\n-\t\t\tif ((rg_line = bwa_set_rg(optarg)) == 0) return 1;\n-\t\t\tbreak;\n-\t\tcase \'a\': popt->max_isize = atoi(optarg); break;\n-\t\tcase \'o\': popt->max_occ = atoi(optarg); break;\n-\t\tcase \'s\': popt->is_sw = 0; break;\n-\t\tcase \'P\': popt->is_preload = 1; break;\n-\t\tcase \'n\': popt->n_multi = atoi(optarg); break;\n-\t\tcase \'N\': popt->N_multi = atoi(optarg); break;\n-\t\tcase \'c\': popt->ap_prior = atof(optarg); break;\n-\t\tcase \'f\': xreopen(optarg, "w", stdout); break;\n-\t\tcase \'A\': popt->force_isize = 1; break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\n-\tif (optind + 5 > argc) {\n-\t\tfprintf(stderr, "\\n");\n-\t\tfprintf(stderr, "Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>\\n\\n");\n-\t\tfprintf(stderr, "Options: -a INT maximum insert size [%d]\\n", popt->max_isize);\n-\t\tfprintf(stderr, " -o INT maximum occurrences for one end [%d]\\n", popt->max_occ);\n-\t\tfprintf(stderr, " -n INT maximum hits to output for paired reads [%d]\\n", popt->n_multi);\n-\t\tfprintf(stderr, " -N INT maximum hits to output for discordant pairs [%d]\\n", popt->N_multi);\n-\t\tfprintf(stderr, " -c FLOAT prior of chimeric rate (lower bound) [%.1le]\\n", popt->ap_prior);\n- fprintf(stderr, " -f FILE sam file to output results to [stdout]\\n");\n-\t\tfprintf(stderr, " -r STR read group header line such as `@RG\\\\tID:foo\\\\tSM:bar\' [null]\\n");\n-\t\tfprintf(stderr, " -P preload index into memory (for base-space reads only)\\n");\n-\t\tfprintf(stderr, " -s disable Smith-Waterman for the unmapped mate\\n");\n-\t\tfprintf(stderr, " -A disable insert size estimate (force -s)\\n\\n");\n-\t\tfprintf(stderr, "Notes: 1. For SOLiD reads, <in1.fq> corresponds R3 reads and <in2.fq> to F3.\\n");\n-\t\tfprintf(stderr, " 2. For reads shorter than 30bp, applying a smaller -o is recommended to\\n");\n-\t\tfprintf(stderr, " to get a sensible speed at the cost of pairing accuracy.\\n");\n-\t\tfprintf(stderr, "\\n");\n-\t\treturn 1;\n-\t}\n-\tif ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {\n-\t\tfprintf(stderr, "[%s] fail to locate the index\\n", __func__);\n-\t\treturn 1;\n-\t}\n-\tbwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt, rg_line);\n-\tfree(prefix); free(popt);\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwase.c --- a/bwa-0.7.9a/bwase.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,602 +0,0 @@\n-#include <unistd.h>\n-#include <string.h>\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <math.h>\n-#include <time.h>\n-#include <assert.h>\n-#include "bwase.h"\n-#include "bwtaln.h"\n-#include "bntseq.h"\n-#include "utils.h"\n-#include "kstring.h"\n-#include "bwa.h"\n-#include "ksw.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-int g_log_n[256];\n-\n-void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi)\n-{\n-\tint i, cnt, best;\n-\tif (n_aln == 0) {\n-\t\ts->type = BWA_TYPE_NO_MATCH;\n-\t\ts->c1 = s->c2 = 0;\n-\t\treturn;\n-\t}\n-\n-\tif (set_main) {\n-\t\tbest = aln[0].score;\n-\t\tfor (i = cnt = 0; i < n_aln; ++i) {\n-\t\t\tconst bwt_aln1_t *p = aln + i;\n-\t\t\tif (p->score > best) break;\n-\t\t\tif (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {\n-\t\t\t\ts->n_mm = p->n_mm; s->n_gapo = p->n_gapo; s->n_gape = p->n_gape;\n-\t\t\t\ts->ref_shift = (int)p->n_del - (int)p->n_ins;\n-\t\t\t\ts->score = p->score;\n-\t\t\t\ts->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());\n-\t\t\t}\n-\t\t\tcnt += p->l - p->k + 1;\n-\t\t}\n-\t\ts->c1 = cnt;\n-\t\tfor (; i < n_aln; ++i) cnt += aln[i].l - aln[i].k + 1;\n-\t\ts->c2 = cnt - s->c1;\n-\t\ts->type = s->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;\n-\t}\n-\n-\tif (n_multi) {\n-\t\tint k, rest, n_occ, z = 0;\n-\t\tfor (k = n_occ = 0; k < n_aln; ++k) {\n-\t\t\tconst bwt_aln1_t *q = aln + k;\n-\t\t\tn_occ += q->l - q->k + 1;\n-\t\t}\n-\t\tif (s->multi) free(s->multi);\n-\t\tif (n_occ > n_multi + 1) { // if there are too many hits, generate none of them\n-\t\t\ts->multi = 0; s->n_multi = 0;\n-\t\t\treturn;\n-\t\t}\n-\t\t/* The following code is more flexible than what is required\n-\t\t * here. In principle, due to the requirement above, we can\n-\t\t * simply output all hits, but the following samples "rest"\n-\t\t * number of random hits. */\n-\t\trest = n_occ > n_multi + 1? n_multi + 1 : n_occ; // find one additional for ->sa\n-\t\ts->multi = calloc(rest, sizeof(bwt_multi1_t));\n-\t\tfor (k = 0; k < n_aln; ++k) {\n-\t\t\tconst bwt_aln1_t *q = aln + k;\n-\t\t\tif (q->l - q->k + 1 <= rest) {\n-\t\t\t\tbwtint_t l;\n-\t\t\t\tfor (l = q->k; l <= q->l; ++l) {\n-\t\t\t\t\ts->multi[z].pos = l;\n-\t\t\t\t\ts->multi[z].gap = q->n_gapo + q->n_gape;\n-\t\t\t\t\ts->multi[z].ref_shift = (int)q->n_del - (int)q->n_ins;\n-\t\t\t\t\ts->multi[z++].mm = q->n_mm;\n-\t\t\t\t}\n-\t\t\t\trest -= q->l - q->k + 1;\n-\t\t\t} else { // Random sampling (http://code.activestate.com/recipes/272884/). In fact, we never come here. \n-\t\t\t\tint j, i;\n-\t\t\t\tfor (j = rest, i = q->l - q->k + 1; j > 0; --j) {\n-\t\t\t\t\tdouble p = 1.0, x = drand48();\n-\t\t\t\t\twhile (x < p) p -= p * j / (i--);\n-\t\t\t\t\ts->multi[z].pos = q->l - i;\n-\t\t\t\t\ts->multi[z].gap = q->n_gapo + q->n_gape;\n-\t\t\t\t\ts->multi[z].ref_shift = (int)q->n_del - (int)q->n_ins;\n-\t\t\t\t\ts->multi[z++].mm = q->n_mm;\n-\t\t\t\t}\n-\t\t\t\trest = 0;\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\t\ts->n_multi = z;\n-\t}\n-}\n-\n-void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s)\n-{\n-\tbwa_aln2seq_core(n_aln, aln, s, 1, 0);\n-}\n-\n-int bwa_approx_mapQ(const bwa_seq_t *p, int mm)\n-{\n-\tint n;\n-\tif (p->c1 == 0) return 23;\n-\tif (p->c1 > 1) return 0;\n-\tif (p->n_mm == mm) return 25;\n-\tif (p->c2 == 0) return 37;\n-\tn = (p->c2 >= 255)? 255 : p->c2;\n-\treturn (23 < g_log_n[n])? 0 : 23 - g_log_n[n];\n-}\n-\n-bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int ref_len, int *strand)\n-{\n-\tbwtint_t pos_f;\n-\tint is_rev;\n-\tpos_f = bwt_sa(bwt, sapos); // position on the forward-reverse coordinate\n-\tif (pos_f < bns->l_pac && bns->l_pac < pos_f + ref_len) return (bwtint_t)-1;\n-\tpos_f = bns_depos(bns, pos_f, &is_rev); // position on the forward strand; this may be the first base or the last base\n-\t*strand = !is_rev;\n-\tif (is_rev) pos_f = pos_f + 1 < ref_len? 0 : pos_f - ref_len + 1; // position of the first base\n-\treturn pos_f; // FIXME: it is possible that pos_f < bns->anns[ref_id].offset\n-}\n-\n-/**\n- * Derive the actual position in the read from the given suffix array\n- * coordinates. Note that the position will be approximate based on\n- * whether indels appear in the read and whether calculations are\n- * performed fro'..b'-\t\t}\n-\t\terr_putchar(\'\\n\');\n-\t} else { // this read has no match\n-\t\t//ubyte_t *s = p->strand? p->rseq : p->seq;\n-\t\tint flag = p->extra_flag | SAM_FSU;\n-\t\tif (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;\n-\t\terr_printf("%s\\t%d\\t*\\t0\\t0\\t*\\t*\\t0\\t0\\t", p->name, flag);\n-\t\t//Why did this work differently to the version above??\n-\t\t//for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);\n-\t\tbwa_print_seq(stdout, p);\n-\t\terr_putchar(\'\\t\');\n-\t\tif (p->qual) {\n-\t\t\tif (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality\n-\t\t\terr_printf("%s", p->qual);\n-\t\t} else err_printf("*");\n-\t\tif (bwa_rg_id[0]) err_printf("\\tRG:Z:%s", bwa_rg_id);\n-\t\tif (p->bc[0]) err_printf("\\tBC:Z:%s", p->bc);\n-\t\tif (p->clip_len < p->full_len) err_printf("\\tXC:i:%d", p->clip_len);\n-\t\terr_putchar(\'\\n\');\n-\t}\n-}\n-\n-void bwase_initialize() \n-{\n-\tint i;\n-\tfor (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5);\n-}\n-\n-void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ, const char *rg_line)\n-{\n-\textern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);\n-\tint i, n_seqs, tot_seqs = 0, m_aln;\n-\tbwt_aln1_t *aln = 0;\n-\tbwa_seq_t *seqs;\n-\tbwa_seqio_t *ks;\n-\tclock_t t;\n-\tbntseq_t *bns;\n-\tFILE *fp_sa;\n-\tgap_opt_t opt;\n-\tchar magic[4];\n-\n-\t// initialization\n-\tbwase_initialize();\n-\tbns = bns_restore(prefix);\n-\tsrand48(bns->seed);\n-\tfp_sa = xopen(fn_sa, "r");\n-\n-\tm_aln = 0;\n-\terr_fread_noeof(magic, 1, 4, fp_sa);\n-\tif (strncmp(magic, SAI_MAGIC, 4) != 0) {\n-\t\tfprintf(stderr, "[E::%s] Unmatched SAI magic. Please re-run `aln\' with the same version of bwa.\\n", __func__);\n-\t\texit(1);\n-\t}\n-\terr_fread_noeof(&opt, sizeof(gap_opt_t), 1, fp_sa);\n-\tbwa_print_sam_hdr(bns, rg_line);\n-\t// set ks\n-\tks = bwa_open_reads(opt.mode, fn_fa);\n-\t// core loop\n-\twhile ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode, opt.trim_qual)) != 0) {\n-\t\ttot_seqs += n_seqs;\n-\t\tt = clock();\n-\n-\t\t// read alignment\n-\t\tfor (i = 0; i < n_seqs; ++i) {\n-\t\t\tbwa_seq_t *p = seqs + i;\n-\t\t\tint n_aln;\n-\t\t\terr_fread_noeof(&n_aln, 4, 1, fp_sa);\n-\t\t\tif (n_aln > m_aln) {\n-\t\t\t\tm_aln = n_aln;\n-\t\t\t\taln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);\n-\t\t\t}\n-\t\t\terr_fread_noeof(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);\n-\t\t\tbwa_aln2seq_core(n_aln, aln, p, 1, n_occ);\n-\t\t}\n-\n-\t\tfprintf(stderr, "[bwa_aln_core] convert to sequence coordinate... ");\n-\t\tbwa_cal_pac_pos(bns, prefix, n_seqs, seqs, opt.max_diff, opt.fnr); // forward bwt will be destroyed here\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();\n-\n-\t\tfprintf(stderr, "[bwa_aln_core] refine gapped alignments... ");\n-\t\tbwa_refine_gapped(bns, n_seqs, seqs, 0);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();\n-\n-\t\tfprintf(stderr, "[bwa_aln_core] print alignments... ");\n-\t\tfor (i = 0; i < n_seqs; ++i)\n-\t\t\tbwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\n-\t\tbwa_free_read_seq(n_seqs, seqs);\n-\t\tfprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\\n", tot_seqs);\n-\t}\n-\n-\t// destroy\n-\tbwa_seq_close(ks);\n-\tbns_destroy(bns);\n-\terr_fclose(fp_sa);\n-\tfree(aln);\n-}\n-\n-int bwa_sai2sam_se(int argc, char *argv[])\n-{\n-\tint c, n_occ = 3;\n-\tchar *prefix, *rg_line = 0;\n-\twhile ((c = getopt(argc, argv, "hn:f:r:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'h\': break;\n-\t\tcase \'r\':\n-\t\t\tif ((rg_line = bwa_set_rg(optarg)) == 0) return 1;\n-\t\t\tbreak;\n-\t\tcase \'n\': n_occ = atoi(optarg); break;\n-\t\tcase \'f\': xreopen(optarg, "w", stdout); break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\n-\tif (optind + 3 > argc) {\n-\t\tfprintf(stderr, "Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] <prefix> <in.sai> <in.fq>\\n");\n-\t\treturn 1;\n-\t}\n-\tif ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {\n-\t\tfprintf(stderr, "[%s] fail to locate the index\\n", __func__);\n-\t\treturn 1;\n-\t}\n-\tbwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ, rg_line);\n-\tfree(prefix);\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwase.h --- a/bwa-0.7.9a/bwase.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -#ifndef BWASE_H -#define BWASE_H - -#include "bntseq.h" -#include "bwt.h" -#include "bwtaln.h" - -#ifdef __cplusplus -extern "C" { -#endif - - // Initialize mapping tables in the bwa single-end mapper. - void bwase_initialize(); - // Calculate the approximate position of the sequence from the specified bwt with loaded suffix array. - void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t* bwt, bwa_seq_t* seq, const int max_mm, const float fnr); - // Refine the approximate position of the sequence to an actual placement for the sequence. - void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq); - // Backfill certain alignment properties mainly centering around number of matches. - void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s); - // Calculate the end position of a read given a certain sequence. - int64_t pos_end(const bwa_seq_t *p); - // - bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand); - -#ifdef __cplusplus -} -#endif - -#endif // BWASE_H |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwaseqio.c --- a/bwa-0.7.9a/bwaseqio.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,235 +0,0 @@ -#include <zlib.h> -#include <ctype.h> -#include "bwtaln.h" -#include "utils.h" -#include "bamlite.h" - -#include "kseq.h" -KSEQ_DECLARE(gzFile) - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -extern unsigned char nst_nt4_table[256]; -static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; - -struct __bwa_seqio_t { - // for BAM input - int is_bam, which; // 1st bit: read1, 2nd bit: read2, 3rd: SE - bamFile fp; - // for fastq input - kseq_t *ks; -}; - -bwa_seqio_t *bwa_bam_open(const char *fn, int which) -{ - bwa_seqio_t *bs; - bam_header_t *h; - bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t)); - bs->is_bam = 1; - bs->which = which; - bs->fp = bam_open(fn, "r"); - if (0 == bs->fp) err_fatal_simple("Couldn't open bam file"); - h = bam_header_read(bs->fp); - bam_header_destroy(h); - return bs; -} - -bwa_seqio_t *bwa_seq_open(const char *fn) -{ - gzFile fp; - bwa_seqio_t *bs; - bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t)); - fp = xzopen(fn, "r"); - bs->ks = kseq_init(fp); - return bs; -} - -void bwa_seq_close(bwa_seqio_t *bs) -{ - if (bs == 0) return; - if (bs->is_bam) { - if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); - } else { - err_gzclose(bs->ks->f->f); - kseq_destroy(bs->ks); - } - free(bs); -} - -void seq_reverse(int len, ubyte_t *seq, int is_comp) -{ - int i; - if (is_comp) { - for (i = 0; i < len>>1; ++i) { - char tmp = seq[len-1-i]; - if (tmp < 4) tmp = 3 - tmp; - seq[len-1-i] = (seq[i] >= 4)? seq[i] : 3 - seq[i]; - seq[i] = tmp; - } - if (len&1) seq[i] = (seq[i] >= 4)? seq[i] : 3 - seq[i]; - } else { - for (i = 0; i < len>>1; ++i) { - char tmp = seq[len-1-i]; - seq[len-1-i] = seq[i]; seq[i] = tmp; - } - } -} - -int bwa_trim_read(int trim_qual, bwa_seq_t *p) -{ - int s = 0, l, max = 0, max_l = p->len; - if (trim_qual < 1 || p->qual == 0) return 0; - for (l = p->len - 1; l >= BWA_MIN_RDLEN; --l) { - s += trim_qual - (p->qual[l] - 33); - if (s < 0) break; - if (s > max) max = s, max_l = l; - } - p->clip_len = p->len = max_l; - return p->full_len - p->len; -} - -static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual) -{ - bwa_seq_t *seqs, *p; - int n_seqs, l, i; - long n_trimmed = 0, n_tot = 0; - bam1_t *b; - int res; - - b = bam_init1(); - n_seqs = 0; - seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); - while ((res = bam_read1(bs->fp, b)) >= 0) { - uint8_t *s, *q; - int go = 0; - if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1; - if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1; - if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1; - if (go == 0) continue; - l = b->core.l_qseq; - p = &seqs[n_seqs++]; - p->tid = -1; // no assigned to a thread - p->qual = 0; - p->full_len = p->clip_len = p->len = l; - n_tot += p->full_len; - s = bam1_seq(b); q = bam1_qual(b); - p->seq = (ubyte_t*)calloc(p->len + 1, 1); - p->qual = (ubyte_t*)calloc(p->len + 1, 1); - for (i = 0; i != p->full_len; ++i) { - p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)]; - p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126; - } - if (bam1_strand(b)) { // then reverse - seq_reverse(p->len, p->seq, 1); - seq_reverse(p->len, p->qual, 0); - } - if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p); - p->rseq = (ubyte_t*)calloc(p->full_len, 1); - memcpy(p->rseq, p->seq, p->len); - seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() - seq_reverse(p->len, p->rseq, is_comp); - p->name = strdup((const char*)bam1_qname(b)); - if (n_seqs == n_needed) break; - } - if (res < 0 && res != -1) err_fatal_simple("Error reading bam file"); - *n = n_seqs; - if (n_seqs && trim_qual >= 1) - fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); - if (n_seqs == 0) { - free(seqs); - bam_destroy1(b); - return 0; - } - bam_destroy1(b); - return seqs; -} - -#define BARCODE_LOW_QUAL 13 - -bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual) -{ - bwa_seq_t *seqs, *p; - kseq_t *seq = bs->ks; - int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24; - long n_trimmed = 0, n_tot = 0; - - if (l_bc > BWA_MAX_BCLEN) { - fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN); - return 0; - } - if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input - n_seqs = 0; - seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t)); - while ((l = kseq_read(seq)) >= 0) { - if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) { - // skip reads that are marked to be filtered by Casava - char *s = index(seq->comment.s, ':'); - if (s && *(++s) == 'Y') { - continue; - } - } - if (is_64 && seq->qual.l) - for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31; - if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length - p = &seqs[n_seqs++]; - if (l_bc) { // then trim barcode - for (i = 0; i < l_bc; ++i) - p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]); - p->bc[i] = 0; - for (; i < seq->seq.l; ++i) - seq->seq.s[i - l_bc] = seq->seq.s[i]; - seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0; - if (seq->qual.l) { - for (i = l_bc; i < seq->qual.l; ++i) - seq->qual.s[i - l_bc] = seq->qual.s[i]; - seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0; - } - l = seq->seq.l; - } else p->bc[0] = 0; - p->tid = -1; // no assigned to a thread - p->qual = 0; - p->full_len = p->clip_len = p->len = l; - n_tot += p->full_len; - p->seq = (ubyte_t*)calloc(p->full_len, 1); - for (i = 0; i != p->full_len; ++i) - p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]]; - if (seq->qual.l) { // copy quality - p->qual = (ubyte_t*)strdup((char*)seq->qual.s); - if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p); - } - p->rseq = (ubyte_t*)calloc(p->full_len, 1); - memcpy(p->rseq, p->seq, p->len); - seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped() - seq_reverse(p->len, p->rseq, is_comp); - p->name = strdup((const char*)seq->name.s); - { // trim /[12]$ - int t = strlen(p->name); - if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0'; - } - if (n_seqs == n_needed) break; - } - *n = n_seqs; - if (n_seqs && trim_qual >= 1) - fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot); - if (n_seqs == 0) { - free(seqs); - return 0; - } - return seqs; -} - -void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs) -{ - int i, j; - for (i = 0; i != n_seqs; ++i) { - bwa_seq_t *p = seqs + i; - for (j = 0; j < p->n_multi; ++j) - if (p->multi[j].cigar) free(p->multi[j].cigar); - free(p->name); - free(p->seq); free(p->rseq); free(p->qual); free(p->aln); free(p->md); free(p->multi); - free(p->cigar); - } - free(seqs); -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwt.c --- a/bwa-0.7.9a/bwt.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,437 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-#include <stdlib.h>\n-#include <stdio.h>\n-#include <string.h>\n-#include <assert.h>\n-#include <stdint.h>\n-#include "utils.h"\n-#include "bwt.h"\n-#include "kvec.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-void bwt_gen_cnt_table(bwt_t *bwt)\n-{\n-\tint i, j;\n-\tfor (i = 0; i != 256; ++i) {\n-\t\tuint32_t x = 0;\n-\t\tfor (j = 0; j != 4; ++j)\n-\t\t\tx |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);\n-\t\tbwt->cnt_table[i] = x;\n-\t}\n-}\n-\n-static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inverse CSA\n-{\n-\tbwtint_t x = k - (k > bwt->primary);\n-\tx = bwt_B0(bwt, x);\n-\tx = bwt->L2[x] + bwt_occ(bwt, k, x);\n-\treturn k == bwt->primary? 0 : x;\n-}\n-\n-// bwt->bwt and bwt->occ must be precalculated\n-void bwt_cal_sa(bwt_t *bwt, int intv)\n-{\n-\tbwtint_t isa, sa, i; // S(isa) = sa\n-\tint intv_round = intv;\n-\n-\tkv_roundup32(intv_round);\n-\txassert(intv_round == intv, "SA sample interval is not a power of 2.");\n-\txassert(bwt->bwt, "bwt_t::bwt is not initialized.");\n-\n-\tif (bwt->sa) free(bwt->sa);\n-\tbwt->sa_intv = intv;\n-\tbwt->n_sa = (bwt->seq_len + intv) / intv;\n-\tbwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));\n-\t// calculate SA value\n-\tisa = 0; sa = bwt->seq_len;\n-\tfor (i = 0; i < bwt->seq_len; ++i) {\n-\t\tif (isa % intv == 0) bwt->sa[isa/intv] = sa;\n-\t\t--sa;\n-\t\tisa = bwt_invPsi(bwt, isa);\n-\t}\n-\tif (isa % intv == 0) bwt->sa[isa/intv] = sa;\n-\tbwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len\n-}\n-\n-bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)\n-{\n-\tbwtint_t sa = 0, mask = bwt->sa_intv - 1;\n-\twhile (k & mask) {\n-\t\t++sa;\n-\t\tk = bwt_invPsi(bwt, k);\n-\t}\n-\t/* without setting bwt->sa[0] = -1, the following line should be\n-\t changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */\n-\treturn sa + bwt->sa[k/bwt->sa_intv];\n-}\n-\n-static inline int __occ_aux(uint64_t y, int c)\n-{\n-\t// reduce nucleotide counting to bits counting\n-\ty = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;\n-\t// count the number of 1s in y\n-\ty = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);\n-\treturn ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;\n-}\n-\n-bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)\n-{\n-\tbwtint_t n;\n-\tuint32_t *p, *end;\n-\n-\tif (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];\n-\tif (k == (bwtint_t)(-1)) return 0;\n-\tk -= (k >= bwt->primary); // because $ is not in bwt\n-\n-\t// retrieve Occ at k/OCC_INTERVAL\n-\tn = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];\n-\tp += sizeof(bwtint_t); // jump to the start of the first BWT cell\n-\n-\t// calculate Occ up to the last k/32\n-\tend = p + (((k>>5) - ((k&~OCC_INTV_MASK)>>5))<<1);\n-\tfor (; p < end; p += 2) n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);\n-\n-\t'..b'interval if we reach the end\n-\tbwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first\n-\tret = curr->a[0].info; // this will be the returned value\n-\tswap = curr; curr = prev; prev = swap;\n-\n-\tfor (i = x - 1; i >= -1; --i) { // backward search for MEMs\n-\t\tc = i < 0? -1 : q[i] < 4? q[i] : -1; // c==-1 if i<0 or q[i] is an ambiguous base\n-\t\tfor (j = 0, curr->n = 0; j < prev->n; ++j) {\n-\t\t\tbwtintv_t *p = &prev->a[j];\n-\t\t\tbwt_extend(bwt, p, ok, 1);\n-\t\t\tif (c < 0 || ok[c].x[2] < min_intv) { // keep the hit if reaching the beginning or an ambiguous base or the intv is small enough\n-\t\t\t\tif (curr->n == 0) { // test curr->n>0 to make sure there are no longer matches\n-\t\t\t\t\tif (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches\n-\t\t\t\t\t\tik = *p; ik.info |= (uint64_t)(i + 1)<<32;\n-\t\t\t\t\t\tkv_push(bwtintv_t, *mem, ik);\n-\t\t\t\t\t}\n-\t\t\t\t} // otherwise the match is contained in another longer match\n-\t\t\t} else if (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2]) {\n-\t\t\t\tok[c].info = p->info;\n-\t\t\t\tkv_push(bwtintv_t, *curr, ok[c]);\n-\t\t\t}\n-\t\t}\n-\t\tif (curr->n == 0) break;\n-\t\tswap = curr; curr = prev; prev = swap;\n-\t}\n-\tbwt_reverse_intvs(mem); // s.t. sorted by the start coordinate\n-\n-\tif (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);\n-\tif (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);\n-\treturn ret;\n-}\n-\n-/*************************\n- * Read/write BWT and SA *\n- *************************/\n-\n-void bwt_dump_bwt(const char *fn, const bwt_t *bwt)\n-{\n-\tFILE *fp;\n-\tfp = xopen(fn, "wb");\n-\terr_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);\n-\terr_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);\n-\terr_fwrite(bwt->bwt, 4, bwt->bwt_size, fp);\n-\terr_fflush(fp);\n-\terr_fclose(fp);\n-}\n-\n-void bwt_dump_sa(const char *fn, const bwt_t *bwt)\n-{\n-\tFILE *fp;\n-\tfp = xopen(fn, "wb");\n-\terr_fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);\n-\terr_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);\n-\terr_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);\n-\terr_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);\n-\terr_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);\n-\terr_fflush(fp);\n-\terr_fclose(fp);\n-}\n-\n-static bwtint_t fread_fix(FILE *fp, bwtint_t size, void *a)\n-{ // Mac/Darwin has a bug when reading data longer than 2GB. This function fixes this issue by reading data in small chunks\n-\tconst int bufsize = 0x1000000; // 16M block\n-\tbwtint_t offset = 0;\n-\twhile (size) {\n-\t\tint x = bufsize < size? bufsize : size;\n-\t\tif ((x = err_fread_noeof(a + offset, 1, x, fp)) == 0) break;\n-\t\tsize -= x; offset += x;\n-\t}\n-\treturn offset;\n-}\n-\n-void bwt_restore_sa(const char *fn, bwt_t *bwt)\n-{\n-\tchar skipped[256];\n-\tFILE *fp;\n-\tbwtint_t primary;\n-\n-\tfp = xopen(fn, "rb");\n-\terr_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);\n-\txassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");\n-\terr_fread_noeof(skipped, sizeof(bwtint_t), 4, fp); // skip\n-\terr_fread_noeof(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);\n-\terr_fread_noeof(&primary, sizeof(bwtint_t), 1, fp);\n-\txassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");\n-\n-\tbwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;\n-\tbwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));\n-\tbwt->sa[0] = -1;\n-\n-\tfread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1);\n-\terr_fclose(fp);\n-}\n-\n-bwt_t *bwt_restore_bwt(const char *fn)\n-{\n-\tbwt_t *bwt;\n-\tFILE *fp;\n-\n-\tbwt = (bwt_t*)calloc(1, sizeof(bwt_t));\n-\tfp = xopen(fn, "rb");\n-\terr_fseek(fp, 0, SEEK_END);\n-\tbwt->bwt_size = (err_ftell(fp) - sizeof(bwtint_t) * 5) >> 2;\n-\tbwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);\n-\terr_fseek(fp, 0, SEEK_SET);\n-\terr_fread_noeof(&bwt->primary, sizeof(bwtint_t), 1, fp);\n-\terr_fread_noeof(bwt->L2+1, sizeof(bwtint_t), 4, fp);\n-\tfread_fix(fp, bwt->bwt_size<<2, bwt->bwt);\n-\tbwt->seq_len = bwt->L2[4];\n-\terr_fclose(fp);\n-\tbwt_gen_cnt_table(bwt);\n-\n-\treturn bwt;\n-}\n-\n-void bwt_destroy(bwt_t *bwt)\n-{\n-\tif (bwt == 0) return;\n-\tfree(bwt->sa); free(bwt->bwt);\n-\tfree(bwt);\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwt.h --- a/bwa-0.7.9a/bwt.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,128 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@sanger.ac.uk> */ - -#ifndef BWA_BWT_H -#define BWA_BWT_H - -#include <stdint.h> -#include <stddef.h> - -// requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line because some part of the code assume OCC_INTERVAL=0x80 -#define OCC_INTV_SHIFT 7 -#define OCC_INTERVAL (1LL<<OCC_INTV_SHIFT) -#define OCC_INTV_MASK (OCC_INTERVAL - 1) - -#ifndef BWA_UBYTE -#define BWA_UBYTE -typedef unsigned char ubyte_t; -#endif - -typedef uint64_t bwtint_t; - -typedef struct { - bwtint_t primary; // S^{-1}(0), or the primary index of BWT - bwtint_t L2[5]; // C(), cumulative count - bwtint_t seq_len; // sequence length - bwtint_t bwt_size; // size of bwt, about seq_len/4 - uint32_t *bwt; // BWT - // occurance array, separated to two parts - uint32_t cnt_table[256]; - // suffix array - int sa_intv; - bwtint_t n_sa; - bwtint_t *sa; -} bwt_t; - -typedef struct { - bwtint_t x[3], info; -} bwtintv_t; - -typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v; - -/* For general OCC_INTERVAL, the following is correct: -#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) -#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) -*/ - -// The following two lines are ONLY correct when OCC_INTERVAL==0x80 -#define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)]) -#define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4)) - -/* retrieve a character from the $-removed BWT string. Note that - * bwt_t::bwt is not exactly the BWT string and therefore this macro is - * called bwt_B0 instead of bwt_B */ -#define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3) - -#define bwt_set_intv(bwt, c, ik) ((ik).x[0] = (bwt)->L2[(int)(c)]+1, (ik).x[2] = (bwt)->L2[(int)(c)+1]-(bwt)->L2[(int)(c)], (ik).x[1] = (bwt)->L2[3-(c)]+1, (ik).info = 0) - -#ifdef __cplusplus -extern "C" { -#endif - - void bwt_dump_bwt(const char *fn, const bwt_t *bwt); - void bwt_dump_sa(const char *fn, const bwt_t *bwt); - - bwt_t *bwt_restore_bwt(const char *fn); - void bwt_restore_sa(const char *fn, bwt_t *bwt); - - void bwt_destroy(bwt_t *bwt); - - void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW - void bwt_cal_sa(bwt_t *bwt, int intv); - - void bwt_bwtupdate_core(bwt_t *bwt); - - bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c); - void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]); - bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k); - - // more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values - void bwt_gen_cnt_table(bwt_t *bwt); - void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol); - void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]); - - int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end); - int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0); - - /** - * Extend bi-SA-interval _ik_ - */ - void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back); - - /** - * Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_. - * Return the end of the longest exact match starting from _x_. - */ - int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]); - - // SMEM iterator interface - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwt_gen.c --- a/bwa-0.7.9a/bwt_gen.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1627 +0,0 @@\n-/*\n-\n- BWTConstruct.c\t\tBWT-Index Construction\n-\n- This module constructs BWT and auxiliary data structures.\n-\n- Copyright (C) 2004, Wong Chi Kwong.\n-\n- This program is free software; you can redistribute it and/or\n- modify it under the terms of the GNU General Public License\n- as published by the Free Software Foundation; either version 2\n- of the License, or (at your option) any later version.\n-\n- This program is distributed in the hope that it will be useful,\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n- GNU General Public License for more details.\n-\n- You should have received a copy of the GNU General Public License\n- along with this program; if not, write to the Free Software\n- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n-\n-*/\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <assert.h>\n-#include <stdint.h>\n-#include <errno.h>\n-#include "QSufSort.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-typedef uint64_t bgint_t;\n-typedef int64_t sbgint_t;\n-\n-#define ALPHABET_SIZE\t\t\t\t4\n-#define BIT_PER_CHAR\t\t\t\t2\n-#define CHAR_PER_WORD\t\t\t\t16\n-#define CHAR_PER_BYTE\t\t\t\t4\n-\n-#define BITS_IN_WORD 32\n-#define BITS_IN_BYTE 8\n-#define BYTES_IN_WORD 4\n-\n-#define ALL_ONE_MASK 0xFFFFFFFF\n-#define DNA_OCC_CNT_TABLE_SIZE_IN_WORD\t65536\n-\n-#define BITS_PER_OCC_VALUE\t\t\t16\n-#define OCC_VALUE_PER_WORD\t\t\t2\n-#define OCC_INTERVAL\t\t\t\t256\n-#define OCC_INTERVAL_MAJOR\t\t\t65536\n-\n-#define TRUE 1\n-#define FALSE 0\n-\n-#define BWTINC_INSERT_SORT_NUM_ITEM 7\n-\n-#define MIN_AVAILABLE_WORD 0x10000\n-\n-#define average(value1, value2)\t\t\t\t\t( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 )\n-#define min(value1, value2)\t\t\t\t\t\t( ((value1) < (value2)) ? (value1) : (value2) )\n-#define max(value1, value2)\t\t\t\t\t\t( ((value1) > (value2)) ? (value1) : (value2) )\n-#define med3(a, b, c)\t\t\t\t\t\t\t( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))\n-#define swap(a, b, t);\t\t\t\t\t\t\tt = a; a = b; b = t;\n-#define truncateLeft(value, offset)\t\t\t\t( (value) << (offset) >> (offset) )\n-#define truncateRight(value, offset)\t\t\t( (value) >> (offset) << (offset) )\n-#define DNA_OCC_SUM_EXCEPTION(sum)\t\t\t((sum & 0xfefefeff) == 0)\n-\n-typedef struct BWT {\n-\tbgint_t textLength;\t\t\t\t\t// length of the text\n-\tbgint_t inverseSa0;\t\t\t\t\t// SA-1[0]\n-\tbgint_t *cumulativeFreq;\t\t\t// cumulative frequency\n-\tunsigned int *bwtCode;\t\t\t\t// BWT code\n-\tunsigned int *occValue;\t\t\t\t// Occurrence values stored explicitly\n-\tbgint_t *occValueMajor;\t\t\t\t// Occurrence values stored explicitly\n-\tunsigned int *decodeTable;\t\t\t// For decoding BWT by table lookup\n-\tbgint_t bwtSizeInWord;\t\t\t\t// Temporary variable to hold the memory allocated\n-\tbgint_t occSizeInWord;\t\t\t\t// Temporary variable to hold the memory allocated\n-\tbgint_t occMajorSizeInWord;\t\t\t// Temporary variable to hold the memory allocated\n-} BWT;\n-\n-typedef struct BWTInc {\n-\tBWT *bwt;\n-\tunsigned int numberOfIterationDone;\n-\tbgint_t *cumulativeCountInCurrentBuild;\n-\tbgint_t availableWord;\n-\tbgint_t buildSize;\n-\tbgint_t initialMaxBuildSize;\n-\tbgint_t incMaxBuildSize;\n-\tunsigned int firstCharInLastIteration;\n-\tunsigned int *workingMemory;\n-\tunsigned int *packedText;\n-\tunsigned char *textBuffer;\n-\tunsigned int *packedShift;\n-} BWTInc;\n-\n-static bgint_t TextLengthFromBytePacked(bgint_t bytePackedLength, unsigned int bitPerChar,\n-\t\t\t\t\t\t\t\t\t\t\t unsigned int lastByteLength)\n-{\n-\treturn (bytePackedLength - 1) * (BITS_IN_BYTE / bitPerChar) + lastByteLength;\n-}\n-\n-static void initializeVAL(unsigned int *startAddr, const bgint_t length, const unsigned int initValue)\n-{\n-\tbgint_t i;\n-\tfor (i=0; i<length; i++) startAddr[i] = initValue;\n-}\n-\n-static void initializeVAL_bg(bgint_t *startAddr, const bgint_t length, const bgint_t initValue)\n-{\n-\tbgint_t i;\n-\tfor (i=0; i<length; i++) startAddr[i] = initValue;\n-}\n-\n-static void GenerateDNAOccCountTable(unsigned int *dnaDecodeTabl'..b',\n-\t\t\t\tferror(packedFile)? strerror(errno) : "Unexpected end of file");\n-\t\texit(1);\n-\t}\n-\tif (fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR) != 0) {\n-\t\tfprintf(stderr, "BWTIncConstructFromPacked() : Can\'t seek on %s : %s\\n",\n-\t\t\t\tinputFileName, strerror(errno));\n-\t\texit(1);\n-\t}\n-\n-\tConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);\n-\tBWTIncConstruct(bwtInc, textToLoad);\n-\n-\tprocessedTextLength = textToLoad;\n-\n-\twhile (processedTextLength < totalTextLength) {\n-\t\ttextToLoad = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;\n-\t\tif (textToLoad > totalTextLength - processedTextLength) {\n-\t\t\ttextToLoad = totalTextLength - processedTextLength;\n-\t\t}\n-\t\ttextSizeInByte = textToLoad / CHAR_PER_BYTE;\n-\t\tif (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {\n-\t\t\tfprintf(stderr, "BWTIncConstructFromPacked() : Can\'t seek on %s : %s\\n",\n-\t\t\t\t\tinputFileName, strerror(errno));\n-\t\t\texit(1);\n-\t\t}\n-\t\tif (fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile) != textSizeInByte) {\n-\t\t\tfprintf(stderr,\n-\t\t\t\t"BWTIncConstructFromPacked() : Can\'t read from %s : %s\\n",\n-\t\t\t\tinputFileName,\n-\t\t\t\tferror(packedFile)? strerror(errno) : "Unexpected end of file");\n-\t\t\texit(1);\n-\t\t}\n-\t\tif (fseek(packedFile, -((long)textSizeInByte), SEEK_CUR) != 0) {\n-\t\t\tfprintf(stderr, "BWTIncConstructFromPacked() : Can\'t seek on %s : %s\\n",\n-\t\t\t\t\tinputFileName, strerror(errno));\n-\t\t\texit(1);\n-\t\t}\n-\t\tConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);\n-\t\tBWTIncConstruct(bwtInc, textToLoad);\n-\t\tprocessedTextLength += textToLoad;\n-\t\tif (bwtInc->numberOfIterationDone % 10 == 0) {\n-\t\t\tfprintf(stderr, "[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\\n",\n-\t\t\t\t\t(long)bwtInc->numberOfIterationDone, (long)processedTextLength);\n-\t\t}\n-\t}\n-\treturn bwtInc;\n-}\n-\n-void BWTFree(BWT *bwt)\n-{\n-\tif (bwt == 0) return;\n-\tfree(bwt->cumulativeFreq);\n-\tfree(bwt->bwtCode);\n-\tfree(bwt->occValue);\n-\tfree(bwt->occValueMajor);\n-\tfree(bwt->decodeTable);\n-\tfree(bwt);\n-}\n-\n-void BWTIncFree(BWTInc *bwtInc)\n-{\n-\tif (bwtInc == 0) return;\n-\tfree(bwtInc->bwt);\n-\tfree(bwtInc->workingMemory);\n-\tfree(bwtInc);\n-}\n-\n-static bgint_t BWTFileSizeInWord(const bgint_t numChar)\n-{\n-\t// The $ in BWT at the position of inverseSa0 is not encoded\n-\treturn (numChar + CHAR_PER_WORD - 1) / CHAR_PER_WORD;\n-}\n-\n-void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *occValueFileName)\n-{\n-\tFILE *bwtFile;\n-/*\tFILE *occValueFile; */\n-\tbgint_t bwtLength;\n-\n-\tbwtFile = (FILE*)fopen(bwtFileName, "wb");\n-\tif (bwtFile == NULL) {\n-\t\tfprintf(stderr,\n-\t\t\t\t"BWTSaveBwtCodeAndOcc(): Cannot open %s for writing: %s\\n",\n-\t\t\t\tbwtFileName, strerror(errno));\n-\t\texit(1);\n-\t}\n-\n-\tbwtLength = BWTFileSizeInWord(bwt->textLength);\n-\n-\tif (fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile) != 1\n-\t\t|| fwrite(bwt->cumulativeFreq + 1,\n-\t\t\t\t sizeof(bgint_t), ALPHABET_SIZE, bwtFile) != ALPHABET_SIZE\n-\t\t|| fwrite(bwt->bwtCode,\n-\t\t\t\t sizeof(unsigned int), bwtLength, bwtFile) != bwtLength) {\n-\t\tfprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error writing to %s : %s\\n",\n-\t\t\t\tbwtFileName, strerror(errno));\n-\t\texit(1);\n-\t}\n-\tif (fclose(bwtFile) != 0) {\n-\t\tfprintf(stderr, "BWTSaveBwtCodeAndOcc(): Error on closing %s : %s\\n",\n-\t\t\t\tbwtFileName, strerror(errno));\n-\t\texit(1);\n-\t}\n-}\n-\n-void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)\n-{\n-\tBWTInc *bwtInc;\n-\tbwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);\n-\tprintf("[bwt_gen] Finished constructing BWT in %u iterations.\\n", bwtInc->numberOfIterationDone);\n-\tBWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);\n-\tBWTIncFree(bwtInc);\n-}\n-\n-int bwt_bwtgen_main(int argc, char *argv[])\n-{\n-\tif (argc < 3) {\n-\t\tfprintf(stderr, "Usage: bwtgen <in.pac> <out.bwt>\\n");\n-\t\treturn 1;\n-\t}\n-\tbwt_bwtgen(argv[1], argv[2]);\n-\treturn 0;\n-}\n-\n-#ifdef MAIN_BWT_GEN\n-\n-int main(int argc, char *argv[])\n-{\n-\treturn bwt_bwtgen_main(argc, argv);\n-}\n-\n-#endif\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwt_lite.c --- a/bwa-0.7.9a/bwt_lite.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,98 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include "bwt_lite.h" - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -int is_sa(const uint8_t *T, int *SA, int n); -int is_bwt(uint8_t *T, int n); - -bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq) -{ - bwtl_t *b; - int i; - b = (bwtl_t*)calloc(1, sizeof(bwtl_t)); - b->seq_len = len; - - { // calculate b->bwt - uint8_t *s; - b->sa = (uint32_t*)calloc(len + 1, 4); - is_sa(seq, (int*)b->sa, len); - s = (uint8_t*)calloc(len + 1, 1); - for (i = 0; i <= len; ++i) { - if (b->sa[i] == 0) b->primary = i; - else s[i] = seq[b->sa[i] - 1]; - } - for (i = b->primary; i < len; ++i) s[i] = s[i + 1]; - b->bwt_size = (len + 15) / 16; - b->bwt = (uint32_t*)calloc(b->bwt_size, 4); - for (i = 0; i < len; ++i) - b->bwt[i>>4] |= s[i] << ((15 - (i&15)) << 1); - free(s); - } - { // calculate b->occ - uint32_t c[4]; - b->n_occ = (len + 15) / 16 * 4; - b->occ = (uint32_t*)calloc(b->n_occ, 4); - memset(c, 0, 16); - for (i = 0; i < len; ++i) { - if (i % 16 == 0) - memcpy(b->occ + (i/16) * 4, c, 16); - ++c[bwtl_B0(b, i)]; - } - memcpy(b->L2+1, c, 16); - for (i = 2; i < 5; ++i) b->L2[i] += b->L2[i-1]; - } - { // generate cnt_table - for (i = 0; i != 256; ++i) { - u_int32_t j, x = 0; - for (j = 0; j != 4; ++j) - x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3); - b->cnt_table[i] = x; - } - } - return b; -} -uint32_t bwtl_occ(const bwtl_t *bwt, uint32_t k, uint8_t c) -{ - uint32_t n, b; - if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c]; - if (k == (uint32_t)(-1)) return 0; - if (k >= bwt->primary) --k; // because $ is not in bwt - n = bwt->occ[k/16<<2|c]; - b = bwt->bwt[k/16] & ~((1U<<((15-(k&15))<<1)) - 1); - n += (bwt->cnt_table[b&0xff] + bwt->cnt_table[b>>8&0xff] - + bwt->cnt_table[b>>16&0xff] + bwt->cnt_table[b>>24]) >> (c<<3) & 0xff; - if (c == 0) n -= 15 - (k&15); // corrected for the masked bits - return n; -} -void bwtl_occ4(const bwtl_t *bwt, uint32_t k, uint32_t cnt[4]) -{ - uint32_t x, b; - if (k == (uint32_t)(-1)) { - memset(cnt, 0, 16); - return; - } - if (k >= bwt->primary) --k; // because $ is not in bwt - memcpy(cnt, bwt->occ + (k>>4<<2), 16); - b = bwt->bwt[k>>4] & ~((1U<<((~k&15)<<1)) - 1); - x = bwt->cnt_table[b&0xff] + bwt->cnt_table[b>>8&0xff] - + bwt->cnt_table[b>>16&0xff] + bwt->cnt_table[b>>24]; - x -= 15 - (k&15); - cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24; -} -void bwtl_2occ4(const bwtl_t *bwt, uint32_t k, uint32_t l, uint32_t cntk[4], uint32_t cntl[4]) -{ - bwtl_occ4(bwt, k, cntk); - bwtl_occ4(bwt, l, cntl); -} -void bwtl_destroy(bwtl_t *bwt) -{ - if (bwt) { - free(bwt->occ); free(bwt->bwt); free(bwt->sa); - free(bwt); - } -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwt_lite.h --- a/bwa-0.7.9a/bwt_lite.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,29 +0,0 @@ -#ifndef BWT_LITE_H_ -#define BWT_LITE_H_ - -#include <stdint.h> - -typedef struct { - uint32_t seq_len, bwt_size, n_occ; - uint32_t primary; - uint32_t *bwt, *occ, *sa, L2[5]; - uint32_t cnt_table[256]; -} bwtl_t; - -#define bwtl_B0(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3) - -#ifdef __cplusplus -extern "C" { -#endif - - bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq); - uint32_t bwtl_occ(const bwtl_t *bwt, uint32_t k, uint8_t c); - void bwtl_occ4(const bwtl_t *bwt, uint32_t k, uint32_t cnt[4]); - void bwtl_2occ4(const bwtl_t *bwt, uint32_t k, uint32_t l, uint32_t cntk[4], uint32_t cntl[4]); - void bwtl_destroy(bwtl_t *bwt); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtaln.c --- a/bwa-0.7.9a/bwtaln.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,320 +0,0 @@\n-#include <stdio.h>\n-#include <unistd.h>\n-#include <math.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <time.h>\n-#include <stdint.h>\n-#ifdef HAVE_CONFIG_H\n-#include "config.h"\n-#endif\n-#include "bwtaln.h"\n-#include "bwtgap.h"\n-#include "utils.h"\n-#include "bwa.h"\n-\n-#ifdef HAVE_PTHREAD\n-#include <pthread.h>\n-#endif\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-gap_opt_t *gap_init_opt()\n-{\n-\tgap_opt_t *o;\n-\to = (gap_opt_t*)calloc(1, sizeof(gap_opt_t));\n-\t/* IMPORTANT: s_mm*10 should be about the average base error\n-\t rate. Voilating this requirement will break pairing! */\n-\to->s_mm = 3; o->s_gapo = 11; o->s_gape = 4;\n-\to->max_diff = -1; o->max_gapo = 1; o->max_gape = 6;\n-\to->indel_end_skip = 5; o->max_del_occ = 10; o->max_entries = 2000000;\n-\to->mode = BWA_MODE_GAPE | BWA_MODE_COMPREAD;\n-\to->seed_len = 32; o->max_seed_diff = 2;\n-\to->fnr = 0.04;\n-\to->n_threads = 1;\n-\to->max_top2 = 30;\n-\to->trim_qual = 0;\n-\treturn o;\n-}\n-\n-int bwa_cal_maxdiff(int l, double err, double thres)\n-{\n-\tdouble elambda = exp(-l * err);\n-\tdouble sum, y = 1.0;\n-\tint k, x = 1;\n-\tfor (k = 1, sum = elambda; k < 1000; ++k) {\n-\t\ty *= l * err;\n-\t\tx *= k;\n-\t\tsum += elambda * y / x;\n-\t\tif (1.0 - sum < thres) return k;\n-\t}\n-\treturn 2;\n-}\n-\n-// width must be filled as zero\n-int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width)\n-{\n-\tbwtint_t k, l, ok, ol;\n-\tint i, bid;\n-\tbid = 0;\n-\tk = 0; l = bwt->seq_len;\n-\tfor (i = 0; i < len; ++i) {\n-\t\tubyte_t c = str[i];\n-\t\tif (c < 4) {\n-\t\t\tbwt_2occ(bwt, k - 1, l, c, &ok, &ol);\n-\t\t\tk = bwt->L2[c] + ok + 1;\n-\t\t\tl = bwt->L2[c] + ol;\n-\t\t}\n-\t\tif (k > l || c > 3) { // then restart\n-\t\t\tk = 0;\n-\t\t\tl = bwt->seq_len;\n-\t\t\t++bid;\n-\t\t}\n-\t\twidth[i].w = l - k + 1;\n-\t\twidth[i].bid = bid;\n-\t}\n-\twidth[len].w = 0;\n-\twidth[len].bid = ++bid;\n-\treturn bid;\n-}\n-\n-void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt)\n-{\n-\tint i, j, max_l = 0, max_len;\n-\tgap_stack_t *stack;\n-\tbwt_width_t *w, *seed_w;\n-\tgap_opt_t local_opt = *opt;\n-\n-\t// initiate priority stack\n-\tfor (i = max_len = 0; i != n_seqs; ++i)\n-\t\tif (seqs[i].len > max_len) max_len = seqs[i].len;\n-\tif (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr);\n-\tif (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff;\n-\tstack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt);\n-\n-\tseed_w = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t));\n-\tw = 0;\n-\tfor (i = 0; i != n_seqs; ++i) {\n-\t\tbwa_seq_t *p = seqs + i;\n-#ifdef HAVE_PTHREAD\n-\t\tif (i % opt->n_threads != tid) continue;\n-#endif\n-\t\tp->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0;\n-\t\tif (max_l < p->len) {\n-\t\t\tmax_l = p->len;\n-\t\t\tw = (bwt_width_t*)realloc(w, (max_l + 1) * sizeof(bwt_width_t));\n-\t\t\tmemset(w, 0, (max_l + 1) * sizeof(bwt_width_t));\n-\t\t}\n-\t\tbwt_cal_width(bwt, p->len, p->seq, w);\n-\t\tif (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr);\n-\t\tlocal_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff;\n-\t\tif (p->len > opt->seed_len)\n-\t\t\tbwt_cal_width(bwt, opt->seed_len, p->seq + (p->len - opt->seed_len), seed_w);\n-\t\t// core function\n-\t\tfor (j = 0; j < p->len; ++j) // we need to complement\n-\t\t\tp->seq[j] = p->seq[j] > 3? 4 : 3 - p->seq[j];\n-\t\tp->aln = bwt_match_gap(bwt, p->len, p->seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack);\n-\t\t//fprintf(stderr, "mm=%lld,ins=%lld,del=%lld,gapo=%lld\\n", p->aln->n_mm, p->aln->n_ins, p->aln->n_del, p->aln->n_gapo);\n-\t\t// clean up the unused data in the record\n-\t\tfree(p->name); free(p->seq); free(p->rseq); free(p->qual);\n-\t\tp->name = 0; p->seq = p->rseq = p->qual = 0;\n-\t}\n-\tfree(seed_w); free(w);\n-\tgap_destroy_stack(stack);\n-}\n-\n-#ifdef HAVE_PTHREAD\n-typedef struct {\n-\tint tid;\n-\tbwt_t *bwt;\n-\tint n_seqs;\n-\tbwa_seq_t *seqs;\n-\tconst gap_opt_t *opt;\n-} thread_aux_t;\n-\n-s'..b'pt->max_del_occ = atoi(optarg); break;\n-\t\tcase \'i\': opt->indel_end_skip = atoi(optarg); break;\n-\t\tcase \'l\': opt->seed_len = atoi(optarg); break;\n-\t\tcase \'k\': opt->max_seed_diff = atoi(optarg); break;\n-\t\tcase \'m\': opt->max_entries = atoi(optarg); break;\n-\t\tcase \'t\': opt->n_threads = atoi(optarg); break;\n-\t\tcase \'L\': opt->mode |= BWA_MODE_LOGGAP; break;\n-\t\tcase \'R\': opt->max_top2 = atoi(optarg); break;\n-\t\tcase \'q\': opt->trim_qual = atoi(optarg); break;\n-\t\tcase \'N\': opt->mode |= BWA_MODE_NONSTOP; opt->max_top2 = 0x7fffffff; break;\n-\t\tcase \'f\': xreopen(optarg, "wb", stdout); break;\n-\t\tcase \'b\': opt->mode |= BWA_MODE_BAM; break;\n-\t\tcase \'0\': opt->mode |= BWA_MODE_BAM_SE; break;\n-\t\tcase \'1\': opt->mode |= BWA_MODE_BAM_READ1; break;\n-\t\tcase \'2\': opt->mode |= BWA_MODE_BAM_READ2; break;\n-\t\tcase \'I\': opt->mode |= BWA_MODE_IL13; break;\n-\t\tcase \'Y\': opt->mode |= BWA_MODE_CFY; break;\n-\t\tcase \'B\': opt->mode |= atoi(optarg) << 24; break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\tif (opte > 0) {\n-\t\topt->max_gape = opte;\n-\t\topt->mode &= ~BWA_MODE_GAPE;\n-\t}\n-\n-\tif (optind + 2 > argc) {\n-\t\tfprintf(stderr, "\\n");\n-\t\tfprintf(stderr, "Usage: bwa aln [options] <prefix> <in.fq>\\n\\n");\n-\t\tfprintf(stderr, "Options: -n NUM max #diff (int) or missing prob under %.2f err rate (float) [%.2f]\\n",\n-\t\t\t\tBWA_AVG_ERR, opt->fnr);\n-\t\tfprintf(stderr, " -o INT maximum number or fraction of gap opens [%d]\\n", opt->max_gapo);\n-\t\tfprintf(stderr, " -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]\\n");\n-\t\tfprintf(stderr, " -i INT do not put an indel within INT bp towards the ends [%d]\\n", opt->indel_end_skip);\n-\t\tfprintf(stderr, " -d INT maximum occurrences for extending a long deletion [%d]\\n", opt->max_del_occ);\n-\t\tfprintf(stderr, " -l INT seed length [%d]\\n", opt->seed_len);\n-\t\tfprintf(stderr, " -k INT maximum differences in the seed [%d]\\n", opt->max_seed_diff);\n-\t\tfprintf(stderr, " -m INT maximum entries in the queue [%d]\\n", opt->max_entries);\n-\t\tfprintf(stderr, " -t INT number of threads [%d]\\n", opt->n_threads);\n-\t\tfprintf(stderr, " -M INT mismatch penalty [%d]\\n", opt->s_mm);\n-\t\tfprintf(stderr, " -O INT gap open penalty [%d]\\n", opt->s_gapo);\n-\t\tfprintf(stderr, " -E INT gap extension penalty [%d]\\n", opt->s_gape);\n-\t\tfprintf(stderr, " -R INT stop searching when there are >INT equally best hits [%d]\\n", opt->max_top2);\n-\t\tfprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\\n", BWA_MIN_RDLEN, opt->trim_qual);\n- fprintf(stderr, " -f FILE file to write output to instead of stdout\\n");\n-\t\tfprintf(stderr, " -B INT length of barcode\\n");\n-\t\tfprintf(stderr, " -L log-scaled gap penalty for long deletions\\n");\n-\t\tfprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\\n");\n-\t\tfprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\\n");\n-\t\tfprintf(stderr, " -b the input read file is in the BAM format\\n");\n-\t\tfprintf(stderr, " -0 use single-end reads only (effective with -b)\\n");\n-\t\tfprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\\n");\n-\t\tfprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\\n");\n-\t\tfprintf(stderr, " -Y filter Casava-filtered sequences\\n");\n-\t\tfprintf(stderr, "\\n");\n-\t\treturn 1;\n-\t}\n-\tif (opt->fnr > 0.0) {\n-\t\tint i, k;\n-\t\tfor (i = 17, k = 0; i <= 250; ++i) {\n-\t\t\tint l = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);\n-\t\t\tif (l != k) fprintf(stderr, "[bwa_aln] %dbp reads: max_diff = %d\\n", i, l);\n-\t\t\tk = l;\n-\t\t}\n-\t}\n-\tif ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) {\n-\t\tfprintf(stderr, "[%s] fail to locate the index\\n", __func__);\n-\t\tfree(opt);\n-\t\treturn 1;\n-\t}\n-\tbwa_aln_core(prefix, argv[optind+1], opt);\n-\tfree(opt); free(prefix);\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtaln.h --- a/bwa-0.7.9a/bwtaln.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,153 +0,0 @@ -#ifndef BWTALN_H -#define BWTALN_H - -#include <stdint.h> -#include "bwt.h" - -#define BWA_TYPE_NO_MATCH 0 -#define BWA_TYPE_UNIQUE 1 -#define BWA_TYPE_REPEAT 2 -#define BWA_TYPE_MATESW 3 - -#define SAM_FPD 1 // paired -#define SAM_FPP 2 // properly paired -#define SAM_FSU 4 // self-unmapped -#define SAM_FMU 8 // mate-unmapped -#define SAM_FSR 16 // self on the reverse strand -#define SAM_FMR 32 // mate on the reverse strand -#define SAM_FR1 64 // this is read one -#define SAM_FR2 128 // this is read two -#define SAM_FSC 256 // secondary alignment - -#define BWA_AVG_ERR 0.02 -#define BWA_MIN_RDLEN 35 // for read trimming - -#define BWA_MAX_BCLEN 63 // maximum barcode length; 127 is the maximum - -#ifndef bns_pac -#define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3) -#endif - -#define FROM_M 0 -#define FROM_I 1 -#define FROM_D 2 -#define FROM_S 3 - -#define SAI_MAGIC "SAI\1" - -typedef struct { - bwtint_t w; - int bid; -} bwt_width_t; - -typedef struct { - uint64_t n_mm:8, n_gapo:8, n_gape:8, score:20, n_ins:10, n_del:10; - bwtint_t k, l; -} bwt_aln1_t; - -typedef uint16_t bwa_cigar_t; -/* rgoya: If changing order of bytes, beware of operations like: - * s->cigar[0] += s->full_len - s->len; - */ -#define CIGAR_OP_SHIFT 14 -#define CIGAR_LN_MASK 0x3fff - -#define __cigar_op(__cigar) ((__cigar)>>CIGAR_OP_SHIFT) -#define __cigar_len(__cigar) ((__cigar)&CIGAR_LN_MASK) -#define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len)) - -typedef struct { - uint32_t n_cigar:15, gap:8, mm:8, strand:1; - int ref_shift; - bwtint_t pos; - bwa_cigar_t *cigar; -} bwt_multi1_t; - -typedef struct { - char *name; - ubyte_t *seq, *rseq, *qual; - uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8; - uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8; - int score; - int clip_len; - // alignments in SA coordinates - int n_aln; - bwt_aln1_t *aln; - // multiple hits - int n_multi; - bwt_multi1_t *multi; - // alignment information - bwtint_t sa, pos; - uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ - int ref_shift; - int n_cigar; - bwa_cigar_t *cigar; - // for multi-threading only - int tid; - // barcode - char bc[BWA_MAX_BCLEN+1]; // null terminated; up to BWA_MAX_BCLEN bases - // NM and MD tags - uint32_t full_len:20, nm:12; - char *md; -} bwa_seq_t; - -#define BWA_MODE_GAPE 0x01 -#define BWA_MODE_COMPREAD 0x02 -#define BWA_MODE_LOGGAP 0x04 -#define BWA_MODE_CFY 0x08 -#define BWA_MODE_NONSTOP 0x10 -#define BWA_MODE_BAM 0x20 -#define BWA_MODE_BAM_SE 0x40 -#define BWA_MODE_BAM_READ1 0x80 -#define BWA_MODE_BAM_READ2 0x100 -#define BWA_MODE_IL13 0x200 - -typedef struct { - int s_mm, s_gapo, s_gape; - int mode; // bit 24-31 are the barcode length - int indel_end_skip, max_del_occ, max_entries; - float fnr; - int max_diff, max_gapo, max_gape; - int max_seed_diff, seed_len; - int n_threads; - int max_top2; - int trim_qual; -} gap_opt_t; - -#define BWA_PET_STD 1 - -typedef struct { - int max_isize, force_isize; - int max_occ; - int n_multi, N_multi; - int type, is_sw, is_preload; - double ap_prior; -} pe_opt_t; - -struct __bwa_seqio_t; -typedef struct __bwa_seqio_t bwa_seqio_t; - -#ifdef __cplusplus -extern "C" { -#endif - - gap_opt_t *gap_init_opt(); - void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt); - - bwa_seqio_t *bwa_seq_open(const char *fn); - bwa_seqio_t *bwa_bam_open(const char *fn, int which); - void bwa_seq_close(bwa_seqio_t *bs); - void seq_reverse(int len, ubyte_t *seq, int is_comp); - bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual); - void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs); - - int bwa_cal_maxdiff(int l, double err, double thres); - void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt); - - void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtgap.c --- a/bwa-0.7.9a/bwtgap.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,264 +0,0 @@\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include "bwtgap.h"\n-#include "bwtaln.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#define STATE_M 0\n-#define STATE_I 1\n-#define STATE_D 2\n-\n-#define aln_score(m,o,e,p) ((m)*(p)->s_mm + (o)*(p)->s_gapo + (e)*(p)->s_gape)\n-\n-gap_stack_t *gap_init_stack2(int max_score)\n-{\n-\tgap_stack_t *stack;\n-\tstack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t));\n-\tstack->n_stacks = max_score;\n-\tstack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t));\n-\treturn stack;\n-}\n-\n-gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt)\n-{\n-\treturn gap_init_stack2(aln_score(max_mm+1, max_gapo+1, max_gape+1, opt));\n-}\n-\n-void gap_destroy_stack(gap_stack_t *stack)\n-{\n-\tint i;\n-\tfor (i = 0; i != stack->n_stacks; ++i) free(stack->stacks[i].stack);\n-\tfree(stack->stacks);\n-\tfree(stack);\n-}\n-\n-static void gap_reset_stack(gap_stack_t *stack)\n-{\n-\tint i;\n-\tfor (i = 0; i != stack->n_stacks; ++i)\n-\t\tstack->stacks[i].n_entries = 0;\n-\tstack->best = stack->n_stacks;\n-\tstack->n_entries = 0;\n-}\n-\n-static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape, int n_ins, int n_del,\n-\t\t\t\t\t\t\tint state, int is_diff, const gap_opt_t *opt)\n-{\n-\tint score;\n-\tgap_entry_t *p;\n-\tgap_stack1_t *q;\n-\tscore = aln_score(n_mm, n_gapo, n_gape, opt);\n-\tq = stack->stacks + score;\n-\tif (q->n_entries == q->m_entries) {\n-\t\tq->m_entries = q->m_entries? q->m_entries<<1 : 4;\n-\t\tq->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);\n-\t}\n-\tp = q->stack + q->n_entries;\n-\tp->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l;\n-\tp->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape;\n-\tp->n_ins = n_ins; p->n_del = n_del;\n-\tp->state = state; \n-\tp->last_diff_pos = is_diff? i : 0;\n-\t++(q->n_entries);\n-\t++(stack->n_entries);\n-\tif (stack->best > score) stack->best = score;\n-}\n-\n-static inline void gap_pop(gap_stack_t *stack, gap_entry_t *e)\n-{\n-\tgap_stack1_t *q;\n-\tq = stack->stacks + stack->best;\n-\t*e = q->stack[q->n_entries - 1];\n-\t--(q->n_entries);\n-\t--(stack->n_entries);\n-\tif (q->n_entries == 0 && stack->n_entries) { // reset best\n-\t\tint i;\n-\t\tfor (i = stack->best + 1; i < stack->n_stacks; ++i)\n-\t\t\tif (stack->stacks[i].n_entries != 0) break;\n-\t\tstack->best = i;\n-\t} else if (stack->n_entries == 0) stack->best = stack->n_stacks;\n-}\n-\n-static inline void gap_shadow(int x, int len, bwtint_t max, int last_diff_pos, bwt_width_t *w)\n-{\n-\tint i, j;\n-\tfor (i = j = 0; i < last_diff_pos; ++i) {\n-\t\tif (w[i].w > x) w[i].w -= x;\n-\t\telse if (w[i].w == x) {\n-\t\t\tw[i].bid = 1;\n-\t\t\tw[i].w = max - (++j);\n-\t\t} // else should not happen\n-\t}\n-}\n-\n-static inline int int_log2(uint32_t v)\n-{\n-\tint c = 0;\n-\tif (v & 0xffff0000u) { v >>= 16; c |= 16; }\n-\tif (v & 0xff00) { v >>= 8; c |= 8; }\n-\tif (v & 0xf0) { v >>= 4; c |= 4; }\n-\tif (v & 0xc) { v >>= 2; c |= 2; }\n-\tif (v & 0x2) c |= 1;\n-\treturn c;\n-}\n-\n-bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_width_t *width,\n-\t\t\t\t\t\t bwt_width_t *seed_width, const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)\n-{ // $seq is the reverse complement of the input read\n-\tint best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);\n-\tint best_diff = opt->max_diff + 1, max_diff = opt->max_diff;\n-\tint best_cnt = 0;\n-\tint max_entries = 0, j, _j, n_aln, m_aln;\n-\tbwt_aln1_t *aln;\n-\n-\tm_aln = 4; n_aln = 0;\n-\taln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t));\n-\n-\t// check whether there are too many N\n-\tfor (j = _j = 0; j < len; ++j)\n-\t\tif (seq[j] > 3) ++_j;\n-\tif (_j > max_diff) {\n-\t\t*_n_aln = n_aln;\n-\t\treturn aln;\n-\t}\n-\n-\t//for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\\t[%d,%u]\\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);\n-\tgap_reset_stack(stack); // reset stack\n-\tgap_push(stack, len, 0, bwt->seq_len, 0, 0, 0, 0, 0, 0, 0, opt);\n-\n-\twhile (stack->n_entries) {\n-\t\tgap_entry_t e;\n-\t\tint i, m, m_see'..b'l);\n-\t\t\tif (n_aln == 0) {\n-\t\t\t\tbest_score = score;\n-\t\t\t\tbest_diff = e.n_mm + e.n_gapo;\n-\t\t\t\tif (opt->mode & BWA_MODE_GAPE) best_diff += e.n_gape;\n-\t\t\t\tif (!(opt->mode & BWA_MODE_NONSTOP))\n-\t\t\t\t\tmax_diff = (best_diff + 1 > opt->max_diff)? opt->max_diff : best_diff + 1; // top2 behaviour\n-\t\t\t}\n-\t\t\tif (score == best_score) best_cnt += l - k + 1;\n-\t\t\telse if (best_cnt > opt->max_top2) break; // top2b behaviour\n-\t\t\tif (e.n_gapo) { // check whether the hit has been found. this may happen when a gap occurs in a tandem repeat\n-\t\t\t\tfor (j = 0; j != n_aln; ++j)\n-\t\t\t\t\tif (aln[j].k == k && aln[j].l == l) break;\n-\t\t\t\tif (j < n_aln) do_add = 0;\n-\t\t\t}\n-\t\t\tif (do_add) { // append\n-\t\t\t\tbwt_aln1_t *p;\n-\t\t\t\tgap_shadow(l - k + 1, len, bwt->seq_len, e.last_diff_pos, width);\n-\t\t\t\tif (n_aln == m_aln) {\n-\t\t\t\t\tm_aln <<= 1;\n-\t\t\t\t\taln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t));\n-\t\t\t\t\tmemset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t));\n-\t\t\t\t}\n-\t\t\t\tp = aln + n_aln;\n-\t\t\t\tp->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape;\n-\t\t\t\tp->n_ins = e.n_ins; p->n_del = e.n_del;\n-\t\t\t\tp->k = k; p->l = l;\n-\t\t\t\tp->score = score;\n-\t\t\t\t//fprintf(stderr, "*** n_mm=%d,n_gapo=%d,n_gape=%d,n_ins=%d,n_del=%d\\n", e.n_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del);\n-\t\t\t\t++n_aln;\n-\t\t\t}\n-\t\t\tcontinue;\n-\t\t}\n-\n-\t\t--i;\n-\t\tbwt_2occ4(bwt, k - 1, l, cnt_k, cnt_l); // retrieve Occ values\n-\t\tocc = l - k + 1;\n-\t\t// test whether diff is allowed\n-\t\tallow_diff = allow_M = 1;\n-\t\tif (i > 0) {\n-\t\t\tint ii = i - (len - opt->seed_len);\n-\t\t\tif (width[i-1].bid > m-1) allow_diff = 0;\n-\t\t\telse if (width[i-1].bid == m-1 && width[i].bid == m-1 && width[i-1].w == width[i].w) allow_M = 0;\n-\t\t\tif (seed_width && ii > 0) {\n-\t\t\t\tif (seed_width[ii-1].bid > m_seed-1) allow_diff = 0;\n-\t\t\t\telse if (seed_width[ii-1].bid == m_seed-1 && seed_width[ii].bid == m_seed-1\n-\t\t\t\t\t\t && seed_width[ii-1].w == seed_width[ii].w) allow_M = 0;\n-\t\t\t}\n-\t\t}\n-\t\t// indels\n-\t\ttmp = (opt->mode & BWA_MODE_LOGGAP)? int_log2(e.n_gape + e.n_gapo)/2+1 : e.n_gapo + e.n_gape;\n-\t\tif (allow_diff && i >= opt->indel_end_skip + tmp && len - i >= opt->indel_end_skip + tmp) {\n-\t\t\tif (e.state == STATE_M) { // gap open\n-\t\t\t\tif (e.n_gapo < opt->max_gapo) { // gap open is allowed\n-\t\t\t\t\t// insertion\n-\t\t\t\t\tgap_push(stack, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, e.n_ins + 1, e.n_del, STATE_I, 1, opt);\n-\t\t\t\t\t// deletion\n-\t\t\t\t\tfor (j = 0; j != 4; ++j) {\n-\t\t\t\t\t\tk = bwt->L2[j] + cnt_k[j] + 1;\n-\t\t\t\t\t\tl = bwt->L2[j] + cnt_l[j];\n-\t\t\t\t\t\tif (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, e.n_ins, e.n_del + 1, STATE_D, 1, opt);\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else if (e.state == STATE_I) { // extention of an insertion\n-\t\t\t\tif (e.n_gape < opt->max_gape) // gap extention is allowed\n-\t\t\t\t\tgap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, e.n_ins + 1, e.n_del, STATE_I, 1, opt);\n-\t\t\t} else if (e.state == STATE_D) { // extention of a deletion\n-\t\t\t\tif (e.n_gape < opt->max_gape) { // gap extention is allowed\n-\t\t\t\t\tif (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {\n-\t\t\t\t\t\tfor (j = 0; j != 4; ++j) {\n-\t\t\t\t\t\t\tk = bwt->L2[j] + cnt_k[j] + 1;\n-\t\t\t\t\t\t\tl = bwt->L2[j] + cnt_l[j];\n-\t\t\t\t\t\t\tif (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, e.n_ins, e.n_del + 1, STATE_D, 1, opt);\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t\t// mismatches\n-\t\tif (allow_diff && allow_M) { // mismatch is allowed\n-\t\t\tfor (j = 1; j <= 4; ++j) {\n-\t\t\t\tint c = (seq[i] + j) & 3;\n-\t\t\t\tint is_mm = (j != 4 || seq[i] > 3);\n-\t\t\t\tk = bwt->L2[c] + cnt_k[c] + 1;\n-\t\t\t\tl = bwt->L2[c] + cnt_l[c];\n-\t\t\t\tif (k <= l) gap_push(stack, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del, STATE_M, is_mm, opt);\n-\t\t\t}\n-\t\t} else if (seq[i] < 4) { // try exact match only\n-\t\t\tint c = seq[i] & 3;\n-\t\t\tk = bwt->L2[c] + cnt_k[c] + 1;\n-\t\t\tl = bwt->L2[c] + cnt_l[c];\n-\t\t\tif (k <= l) gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape, e.n_ins, e.n_del, STATE_M, 0, opt);\n-\t\t}\n-\t}\n-\n-\t*_n_aln = n_aln;\n-\t//fprintf(stderr, "max_entries = %d\\n", max_entries);\n-\treturn aln;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtgap.h --- a/bwa-0.7.9a/bwtgap.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,40 +0,0 @@ -#ifndef BWTGAP_H_ -#define BWTGAP_H_ - -#include "bwt.h" -#include "bwtaln.h" - -typedef struct { // recursion stack - u_int32_t info; // score<<21 | i - u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6; - u_int32_t n_ins:16, n_del:16; - int last_diff_pos; - bwtint_t k, l; // (k,l) is the SA region of [i,n-1] -} gap_entry_t; - -typedef struct { - int n_entries, m_entries; - gap_entry_t *stack; -} gap_stack1_t; - -typedef struct { - int n_stacks, best, n_entries; - gap_stack1_t *stacks; -} gap_stack_t; - -#ifdef __cplusplus -extern "C" { -#endif - - gap_stack_t *gap_init_stack2(int max_score); - gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt); - void gap_destroy_stack(gap_stack_t *stack); - bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_width_t *w, - bwt_width_t *seed_w, const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack); - void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtindex.c --- a/bwa-0.7.9a/bwtindex.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,287 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <unistd.h>\n-#include <time.h>\n-#include <zlib.h>\n-#include "bntseq.h"\n-#include "bwt.h"\n-#include "utils.h"\n-\n-#ifdef _DIVBWT\n-#include "divsufsort.h"\n-#endif\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-\n-int is_bwt(ubyte_t *T, int n);\n-\n-int64_t bwa_seq_len(const char *fn_pac)\n-{\n-\tFILE *fp;\n-\tint64_t pac_len;\n-\tubyte_t c;\n-\tfp = xopen(fn_pac, "rb");\n-\terr_fseek(fp, -1, SEEK_END);\n-\tpac_len = err_ftell(fp);\n-\terr_fread_noeof(&c, 1, 1, fp);\n-\terr_fclose(fp);\n-\treturn (pac_len - 1) * 4 + (int)c;\n-}\n-\n-bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)\n-{\n-\tbwt_t *bwt;\n-\tubyte_t *buf, *buf2;\n-\tint i, pac_size;\n-\tFILE *fp;\n-\n-\t// initialization\n-\tbwt = (bwt_t*)calloc(1, sizeof(bwt_t));\n-\tbwt->seq_len = bwa_seq_len(fn_pac);\n-\tbwt->bwt_size = (bwt->seq_len + 15) >> 4;\n-\tfp = xopen(fn_pac, "rb");\n-\n-\t// prepare sequence\n-\tpac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);\n-\tbuf2 = (ubyte_t*)calloc(pac_size, 1);\n-\terr_fread_noeof(buf2, 1, pac_size, fp);\n-\terr_fclose(fp);\n-\tmemset(bwt->L2, 0, 5 * 4);\n-\tbuf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);\n-\tfor (i = 0; i < bwt->seq_len; ++i) {\n-\t\tbuf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;\n-\t\t++bwt->L2[1+buf[i]];\n-\t}\n-\tfor (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];\n-\tfree(buf2);\n-\n-\t// Burrows-Wheeler Transform\n-\tif (use_is) {\n-\t\tbwt->primary = is_bwt(buf, bwt->seq_len);\n-\t} else {\n-#ifdef _DIVBWT\n-\t\tbwt->primary = divbwt(buf, buf, 0, bwt->seq_len);\n-#else\n-\t\terr_fatal_simple("libdivsufsort is not compiled in.");\n-#endif\n-\t}\n-\tbwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);\n-\tfor (i = 0; i < bwt->seq_len; ++i)\n-\t\tbwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);\n-\tfree(buf);\n-\treturn bwt;\n-}\n-\n-int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt generated at this step CANNOT be used with BWA. bwtupdate is required!\n-{\n-\tbwt_t *bwt;\n-\tint c, use_is = 1;\n-\twhile ((c = getopt(argc, argv, "d")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'d\': use_is = 0; break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\tif (optind + 2 > argc) {\n-\t\tfprintf(stderr, "Usage: bwa pac2bwt [-d] <in.pac> <out.bwt>\\n");\n-\t\treturn 1;\n-\t}\n-\tbwt = bwt_pac2bwt(argv[optind], use_is);\n-\tbwt_dump_bwt(argv[optind+1], bwt);\n-\tbwt_destroy(bwt);\n-\treturn 0;\n-}\n-\n-#define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)\n-\n-void bwt_bwtupdate_core(bwt_t *bwt)\n-{\n-\tbwtint_t i, k, c[4], n_occ;\n-\tuint32_t *buf;\n-\n-\tn_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;\n-\tbwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size\n-\tbuf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt\n-\tc[0] = c[1] = c[2] = c[3] = 0;\n-\tfor (i = k = 0; i < bwt->seq_len; ++i) {\n-\t\tif (i % OCC_INTERVA'..b':")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'i\': sa_intv = atoi(optarg); break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\tif (optind + 2 > argc) {\n-\t\tfprintf(stderr, "Usage: bwa bwt2sa [-i %d] <in.bwt> <out.sa>\\n", sa_intv);\n-\t\treturn 1;\n-\t}\n-\tbwt = bwt_restore_bwt(argv[optind]);\n-\tbwt_cal_sa(bwt, sa_intv);\n-\tbwt_dump_sa(argv[optind+1], bwt);\n-\tbwt_destroy(bwt);\n-\treturn 0;\n-}\n-\n-int bwa_index(int argc, char *argv[]) // the "index" command\n-{\n-\textern void bwa_pac_rev_core(const char *fn, const char *fn_rev);\n-\n-\tchar *prefix = 0, *str, *str2, *str3;\n-\tint c, algo_type = 0, is_64 = 0;\n-\tclock_t t;\n-\tint64_t l_pac;\n-\n-\twhile ((c = getopt(argc, argv, "6a:p:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'a\': // if -a is not set, algo_type will be determined later\n-\t\t\tif (strcmp(optarg, "div") == 0) algo_type = 1;\n-\t\t\telse if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;\n-\t\t\telse if (strcmp(optarg, "is") == 0) algo_type = 3;\n-\t\t\telse err_fatal(__func__, "unknown algorithm: \'%s\'.", optarg);\n-\t\t\tbreak;\n-\t\tcase \'p\': prefix = strdup(optarg); break;\n-\t\tcase \'6\': is_64 = 1; break;\n-\t\tdefault: return 1;\n-\t\t}\n-\t}\n-\n-\tif (optind + 1 > argc) {\n-\t\tfprintf(stderr, "\\n");\n-\t\tfprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\\n\\n");\n-\t\tfprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\\n");\n-\t\tfprintf(stderr, " -p STR prefix of the index [same as fasta name]\\n");\n-\t\tfprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \\n");\n-\t\tfprintf(stderr, "\\n");\n-\t\tfprintf(stderr,\t"Warning: `-a bwtsw\' does not work for short genomes, while `-a is\' and\\n");\n-\t\tfprintf(stderr, " `-a div\' do not work not for long genomes. Please choose `-a\'\\n");\n-\t\tfprintf(stderr, " according to the length of the genome.\\n\\n");\n-\t\treturn 1;\n-\t}\n-\tif (prefix == 0) {\n-\t\tprefix = malloc(strlen(argv[optind]) + 4);\n-\t\tstrcpy(prefix, argv[optind]);\n-\t\tif (is_64) strcat(prefix, ".64");\n-\t}\n-\tstr = (char*)calloc(strlen(prefix) + 10, 1);\n-\tstr2 = (char*)calloc(strlen(prefix) + 10, 1);\n-\tstr3 = (char*)calloc(strlen(prefix) + 10, 1);\n-\n-\t{ // nucleotide indexing\n-\t\tgzFile fp = xzopen(argv[optind], "r");\n-\t\tt = clock();\n-\t\tfprintf(stderr, "[bwa_index] Pack FASTA... ");\n-\t\tl_pac = bns_fasta2bntseq(fp, prefix, 0);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\t\terr_gzclose(fp);\n-\t}\n-\tif (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT\n-\t{\n-\t\tstrcpy(str, prefix); strcat(str, ".pac");\n-\t\tstrcpy(str2, prefix); strcat(str2, ".bwt");\n-\t\tt = clock();\n-\t\tfprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\\n");\n-\t\tif (algo_type == 2) bwt_bwtgen(str, str2);\n-\t\telse if (algo_type == 1 || algo_type == 3) {\n-\t\t\tbwt_t *bwt;\n-\t\t\tbwt = bwt_pac2bwt(str, algo_type == 3);\n-\t\t\tbwt_dump_bwt(str2, bwt);\n-\t\t\tbwt_destroy(bwt);\n-\t\t}\n-\t\tfprintf(stderr, "[bwa_index] %.2f seconds elapse.\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\t}\n-\t{\n-\t\tbwt_t *bwt;\n-\t\tstrcpy(str, prefix); strcat(str, ".bwt");\n-\t\tt = clock();\n-\t\tfprintf(stderr, "[bwa_index] Update BWT... ");\n-\t\tbwt = bwt_restore_bwt(str);\n-\t\tbwt_bwtupdate_core(bwt);\n-\t\tbwt_dump_bwt(str, bwt);\n-\t\tbwt_destroy(bwt);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\t}\n-\t{\n-\t\tgzFile fp = xzopen(argv[optind], "r");\n-\t\tt = clock();\n-\t\tfprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");\n-\t\tl_pac = bns_fasta2bntseq(fp, prefix, 1);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\t\terr_gzclose(fp);\n-\t}\n-\t{\n-\t\tbwt_t *bwt;\n-\t\tstrcpy(str, prefix); strcat(str, ".bwt");\n-\t\tstrcpy(str3, prefix); strcat(str3, ".sa");\n-\t\tt = clock();\n-\t\tfprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");\n-\t\tbwt = bwt_restore_bwt(str);\n-\t\tbwt_cal_sa(bwt, 32);\n-\t\tbwt_dump_sa(str3, bwt);\n-\t\tbwt_destroy(bwt);\n-\t\tfprintf(stderr, "%.2f sec\\n", (float)(clock() - t) / CLOCKS_PER_SEC);\n-\t}\n-\tfree(str3); free(str2); free(str); free(prefix);\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2.h --- a/bwa-0.7.9a/bwtsw2.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,69 +0,0 @@ -#ifndef LH3_BWTSW2_H -#define LH3_BWTSW2_H - -#include <stdint.h> -#include "bntseq.h" -#include "bwt_lite.h" -#include "bwt.h" - -#define BSW2_FLAG_MATESW 0x100 -#define BSW2_FLAG_TANDEM 0x200 -#define BSW2_FLAG_MOVED 0x400 -#define BSW2_FLAG_RESCUED 0x800 - -typedef struct { - int skip_sw:8, cpy_cmt:8, hard_clip:16; - int a, b, q, r, t, qr, bw, max_ins, max_chain_gap; - int z, is, t_seeds, multi_2nd; - float mask_level, coef; - int n_threads, chunk_size; -} bsw2opt_t; - -typedef struct { - bwtint_t k, l; - uint32_t flag:18, n_seeds:13, is_rev:1; - int len, G, G2; - int beg, end; -} bsw2hit_t; - -typedef struct { - int flag, nn, n_cigar, chr, pos, qual, mchr, mpos, pqual, isize, nm; - uint32_t *cigar; -} bsw2aux_t; - -typedef struct { - int n, max; - bsw2hit_t *hits; - bsw2aux_t *aux; -} bwtsw2_t; - -typedef struct { - void *stack; - int max_l; - uint8_t *aln_mem; -} bsw2global_t; - -typedef struct { - int l, tid; - char *name, *seq, *qual, *sam, *comment; -} bsw2seq1_t; - -#ifdef __cplusplus -extern "C" { -#endif - - bsw2opt_t *bsw2_init_opt(); - bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool); - void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn, const char *fn2); - void bsw2_destroy(bwtsw2_t *b); - - bsw2global_t *bsw2_global_init(); - void bsw2_global_destroy(bsw2global_t *_pool); - - void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, bsw2seq1_t *seq, bwtsw2_t **hit); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2_aux.c --- a/bwa-0.7.9a/bwtsw2_aux.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,776 +0,0 @@\n-#include <stdlib.h>\n-#include <stdio.h>\n-#include <math.h>\n-#ifdef HAVE_CONFIG_H\n-#include "config.h"\n-#endif\n-#ifdef HAVE_PTHREAD\n-#include <pthread.h>\n-#endif\n-#include "bntseq.h"\n-#include "bwt_lite.h"\n-#include "utils.h"\n-#include "bwtsw2.h"\n-#include "kstring.h"\n-#include "bwa.h"\n-#include "ksw.h"\n-\n-#include "kseq.h"\n-KSEQ_DECLARE(gzFile)\n-\n-#include "ksort.h"\n-#define __left_lt(a, b) ((a).end > (b).end)\n-KSORT_INIT(hit, bsw2hit_t, __left_lt)\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-\n-extern unsigned char nst_nt4_table[256];\n-\n-unsigned char nt_comp_table[256] = {\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'T\',\'V\',\'G\', \'H\',\'N\',\'N\',\'C\', \'D\',\'N\',\'N\',\'M\', \'N\',\'K\',\'N\',\'N\',\n-\t\'N\',\'N\',\'Y\',\'S\', \'A\',\'N\',\'B\',\'W\', \'X\',\'R\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'n\',\'t\',\'v\',\'g\', \'h\',\'n\',\'n\',\'c\', \'d\',\'n\',\'n\',\'m\', \'n\',\'k\',\'n\',\'n\',\n-\t\'n\',\'n\',\'y\',\'s\', \'a\',\'n\',\'b\',\'w\', \'x\',\'r\',\'n\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\',\n-\t\'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\', \'N\',\'N\',\'N\',\'N\'\n-};\n-\n-extern int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS);\n-extern int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level);\n-\n-bsw2opt_t *bsw2_init_opt()\n-{\n-\tbsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));\n-\to->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;\n-\to->bw = 50;\n-\to->max_ins = 20000;\n-\to->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;\n-\to->mask_level = 0.50f; o->coef = 5.5f;\n-\to->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;\n-\to->max_chain_gap = 10000;\n-\to->cpy_cmt = 0;\n-\treturn o;\n-}\n-\n-void bsw2_destroy(bwtsw2_t *b)\n-{\n-\tint i;\n-\tif (b == 0) return;\n-\tif (b->aux)\n-\t\tfor (i = 0; i < b->n; ++i) free(b->aux[i].cigar);\n-\tfree(b->aux); free(b->hits);\n-\tfree(b);\n-}\n-\n-bwtsw2_t *bsw2_dup_no_cigar(const bwtsw2_t *b)\n-{\n-\tbwtsw2_t *p;\n-\tp = calloc(1, sizeof(bwtsw2_t));\n-\tp->max = p->n = b->n;\n-\tif (b->n) {\n-\t\tkroundup32(p->max);\n-\t\tp->hits = calloc(p->max, sizeof(bsw2hit_t));\n-\t\tmemcpy(p->hits, b->hits, p->n * sizeof(bsw2hit_t));\n-\t}\n-\treturn p;\n-}\n-\n-#define __gen_ap(par, opt) do {\t\t\t\t\t\t\t\t\t\\\n-\t\tint i;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfor (i = 0; i < 25; ++i) (par).matrix[i] = -(opt)->b;\t\\\n-\t\tfor (i = 0; i < 4; ++i) (par).matrix[i*5+i] = (opt)->a; \\\n-\t\t(par).gap_open = (opt)->q; (par).gap_ext = (opt)->r;\t\\\n-\t\t(par).gap_end = (opt)->r;\t\t\t\t\t\t\t\t\\\n-\t\t(par).row = 5; (par).band_width = opt->bw;\t\t\t\t\\\n-\t} while (0)\n-\n-void bsw2_extend_left(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *_query, int lq, uint8_t *pac, bwtint_t l_pac, uint8_t *_mem)\n-{\n-\tint i;\n-\tbwtint_t k;\n-\tuint8_t *target = 0, *query;\n-\tint8_t mat[25];\n-\n-\tbwa_fill_scmat(opt->a, opt->b, mat);\n-\tquery = calloc(lq, 1);\n-\t// sort according to the descending order of query end\n-\tks_introsort(hit, b->n, b->hits);\n-\ttarget = calloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1);\n-\t// reverse _query\n-\tfor (i = 0; i < lq; ++i) query[lq - i - 1] = _query[i];\n-\t// core loop\n-\tfor (i = 0; i < b->n; ++i) {\n-\t\tbsw2hit_t *p = b->hits + i;\n-\t\tint lt = ((p->beg + 1) / 2 * opt->a + opt->r) / opt->r + lq;\n-\t\tint score, j, qle, tle;\n-\t\tp->n_seeds = 1;\n-\t\tif (p->l || p->k == 0) continue;\n-\t\tfor (j = score = 0; j < i; ++j) {\n-\t\t\tbsw2hit_t *q = b->hits + j;\n-\t\t\tif (q->beg <= p->beg && q->k <= p->k '..b' tid, is_pe;\n-\tbsw2seq_t *_seq;\n-\tconst bsw2opt_t *_opt;\n-\tconst bntseq_t *bns;\n-\tuint8_t *pac;\n-\tconst bwt_t *target;\n-} thread_aux_t;\n-\n-/* another interface to bsw2_aln_core() to facilitate pthread_create() */\n-static void *worker(void *data)\n-{\n-\tthread_aux_t *p = (thread_aux_t*)data;\n-\tbsw2_aln_core(p->_seq, p->_opt, p->bns, p->pac, p->target, p->is_pe);\n-\treturn 0;\n-}\n-#endif\n-\n-/* process sequences stored in _seq, generate SAM lines for these\n- * sequences and reset _seq afterwards. */\n-static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, const bwt_t *target, int is_pe)\n-{\n-\tint i;\n-\tis_pe = is_pe? 1 : 0;\n-\n-#ifdef HAVE_PTHREAD\n-\tif (opt->n_threads <= 1) {\n-\t\tbsw2_aln_core(_seq, opt, bns, pac, target, is_pe);\n-\t} else {\n-\t\tpthread_t *tid;\n-\t\tpthread_attr_t attr;\n-\t\tthread_aux_t *data;\n-\t\tint j;\n-\t\tpthread_attr_init(&attr);\n-\t\tpthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);\n-\t\tdata = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));\n-\t\ttid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));\n-\t\tfor (j = 0; j < opt->n_threads; ++j) {\n-\t\t\tthread_aux_t *p = data + j;\n-\t\t\tp->tid = j; p->_opt = opt; p->bns = bns; p->is_pe = is_pe;\n-\t\t\tp->pac = pac; p->target = target;\n-\t\t\tp->_seq = calloc(1, sizeof(bsw2seq_t));\n-\t\t\tp->_seq->max = (_seq->n + opt->n_threads - 1) / opt->n_threads + 1;\n-\t\t\tp->_seq->n = 0;\n-\t\t\tp->_seq->seq = calloc(p->_seq->max, sizeof(bsw2seq1_t));\n-\t\t}\n-\t\tfor (i = 0; i < _seq->n; ++i) { // assign sequences to each thread\n-\t\t\tbsw2seq_t *p = data[(i>>is_pe)%opt->n_threads]._seq;\n-\t\t\tp->seq[p->n++] = _seq->seq[i];\n-\t\t}\n-\t\tfor (j = 0; j < opt->n_threads; ++j) pthread_create(&tid[j], &attr, worker, &data[j]);\n-\t\tfor (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);\n-\t\tfor (j = 0; j < opt->n_threads; ++j) data[j]._seq->n = 0;\n-\t\tfor (i = 0; i < _seq->n; ++i) { // copy the result from each thread back\n-\t\t\tbsw2seq_t *p = data[(i>>is_pe)%opt->n_threads]._seq;\n-\t\t\t_seq->seq[i] = p->seq[p->n++];\n-\t\t}\n-\t\tfor (j = 0; j < opt->n_threads; ++j) {\n-\t\t\tthread_aux_t *p = data + j;\n-\t\t\tfree(p->_seq->seq);\n-\t\t\tfree(p->_seq);\n-\t\t}\n-\t\tfree(data); free(tid);\n-\t}\n-#else\n-\tbsw2_aln_core(_seq, opt, bns, pac, target, is_pe);\n-#endif\n-\n-\t// print and reset\n-\tfor (i = 0; i < _seq->n; ++i) {\n-\t\tbsw2seq1_t *p = _seq->seq + i;\n-\t\tif (p->sam) err_printf("%s", p->sam);\n-\t\tfree(p->name); free(p->seq); free(p->qual); free(p->sam);\n-\t\tp->tid = -1; p->l = 0;\n-\t\tp->name = p->seq = p->qual = p->sam = 0;\n-\t}\n-\terr_fflush(stdout);\n-\t_seq->n = 0;\n-}\n-\n-void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn, const char *fn2)\n-{\n-\tgzFile fp, fp2;\n-\tkseq_t *ks, *ks2;\n-\tint l, is_pe = 0, i, n;\n-\tuint8_t *pac;\n-\tbsw2seq_t *_seq;\n-\tbseq1_t *bseq;\n-\n-\tpac = calloc(bns->l_pac/4+1, 1);\n-\tfor (l = 0; l < bns->n_seqs; ++l)\n-\t\terr_printf("@SQ\\tSN:%s\\tLN:%d\\n", bns->anns[l].name, bns->anns[l].len);\n-\terr_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);\n-\tfp = xzopen(fn, "r");\n-\tks = kseq_init(fp);\n-\t_seq = calloc(1, sizeof(bsw2seq_t));\n-\tif (fn2) {\n-\t\tfp2 = xzopen(fn2, "r");\n-\t\tks2 = kseq_init(fp2);\n-\t\tis_pe = 1;\n-\t} else fp2 = 0, ks2 = 0, is_pe = 0;\n-\twhile ((bseq = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {\n-\t\tint size = 0;\n-\t\tif (n > _seq->max) {\n-\t\t\t_seq->max = n;\n-\t\t\tkroundup32(_seq->max);\n-\t\t\t_seq->seq = realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t));\n-\t\t}\n-\t\t_seq->n = n;\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tbseq1_t *b = &bseq[i];\n-\t\t\tbsw2seq1_t *p = &_seq->seq[i];\n-\t\t\tp->tid = -1; p->l = b->l_seq;\n-\t\t\tp->name = b->name; p->seq = b->seq; p->qual = b->qual; p->comment = b->comment; p->sam = 0;\n-\t\t\tsize += p->l;\n-\t\t}\n-\t\tfprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp) ...\\n", n, size);\n-\t\tfree(bseq);\n-\t\tprocess_seqs(_seq, opt, bns, pac, target, is_pe);\n-\t}\n-\t// free\n-\tfree(pac);\n-\tfree(_seq->seq); free(_seq);\n-\tkseq_destroy(ks);\n-\terr_gzclose(fp);\n-\tif (fn2) {\n-\t\tkseq_destroy(ks2);\n-\t\terr_gzclose(fp2);\n-\t}\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2_chain.c --- a/bwa-0.7.9a/bwtsw2_chain.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,112 +0,0 @@ -#include <stdio.h> -#include "bwtsw2.h" - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -typedef struct { - uint32_t tbeg, tend; - int qbeg, qend; - uint32_t flag:1, idx:31; - int chain; // also reuse as a counter -} hsaip_t; - -#define _hsaip_lt(a, b) ((a).qbeg < (b).qbeg) - -#include "ksort.h" -KSORT_INIT(hsaip, hsaip_t, _hsaip_lt) - -static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t *chain) -{ - int j, k, m = 0; - ks_introsort(hsaip, n, z); - for (j = 0; j < n; ++j) { - hsaip_t *p = z + j; - for (k = m - 1; k >= 0; --k) { - hsaip_t *q = chain + k; - int x = p->qbeg - q->qbeg; // always positive - int y = p->tbeg - q->tbeg; - if (y > 0 && x < opt->max_chain_gap && y < opt->max_chain_gap && x - y <= opt->bw && y - x <= opt->bw) { // chained - if (p->qend > q->qend) q->qend = p->qend; - if (p->tend > q->tend) q->tend = p->tend; - ++q->chain; - p->chain = shift + k; - break; - } else if (q->chain > opt->t_seeds * 2) k = 0; // if the chain is strong enough, do not check the previous chains - } - if (k < 0) { // not added to any previous chains - chain[m] = *p; - chain[m].chain = 1; - chain[m].idx = p->chain = shift + m; - ++m; - } - } - return m; -} - -void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]) -{ - hsaip_t *z[2], *chain[2]; - int i, j, k, n[2], m[2], thres = opt->t_seeds * 2; - char *flag; - // initialization - n[0] = b[0]->n; n[1] = b[1]->n; - z[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); - z[1] = z[0] + n[0]; - chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); - for (k = j = 0; k < 2; ++k) { - for (i = 0; i < b[k]->n; ++i) { - bsw2hit_t *p = b[k]->hits + i; - hsaip_t *q = z[k] + i; - q->flag = k; q->idx = i; - q->tbeg = p->k; q->tend = p->k + p->len; - q->chain = -1; - q->qbeg = p->beg; q->qend = p->end; - } - } - // chaining - m[0] = chaining(opt, 0, n[0], z[0], chain[0]); - chain[1] = chain[0] + m[0]; - m[1] = chaining(opt, m[0], n[1], z[1], chain[1]); - // change query coordinate on the reverse strand - for (k = 0; k < m[1]; ++k) { - hsaip_t *p = chain[1] + k; - int tmp = p->qbeg; - p->qbeg = len - p->qend; p->qend = len - tmp; - } - //for (k = 0; k < m[0]; ++k) printf("%d, [%d,%d), [%d,%d)\n", chain[0][k].chain, chain[0][k].tbeg, chain[0][k].tend, chain[0][k].qbeg, chain[0][k].qend); - // filtering - flag = calloc(m[0] + m[1], 1); - ks_introsort(hsaip, m[0] + m[1], chain[0]); - for (k = 1; k < m[0] + m[1]; ++k) { - hsaip_t *p = chain[0] + k; - for (j = 0; j < k; ++j) { - hsaip_t *q = chain[0] + j; - if (flag[q->idx]) continue; - if (q->qend >= p->qend && q->chain > p->chain * thres && p->chain < thres) { - flag[p->idx] = 1; - break; - } - } - } - for (k = 0; k < n[0] + n[1]; ++k) { - hsaip_t *p = z[0] + k; - if (flag[p->chain]) - b[p->flag]->hits[p->idx].G = 0; - } - free(flag); - // squeeze out filtered elements in b[2] - for (k = 0; k < 2; ++k) { - for (j = i = 0; j < n[k]; ++j) { - bsw2hit_t *p = b[k]->hits + j; - if (p->G) { - if (i != j) b[k]->hits[i++] = *p; - else ++i; - } - } - b[k]->n = i; - } - // free - free(z[0]); free(chain[0]); -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2_core.c --- a/bwa-0.7.9a/bwtsw2_core.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,619 +0,0 @@\n-#include <stdlib.h>\n-#include <string.h>\n-#include <stdio.h>\n-#include <sys/resource.h>\n-#include <assert.h>\n-#include "bwt_lite.h"\n-#include "bwtsw2.h"\n-#include "bwt.h"\n-#include "kvec.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-typedef struct {\n-\tbwtint_t k, l;\n-} qintv_t;\n-\n-#define qintv_eq(a, b) ((a).k == (b).k && (a).l == (b).l)\n-#define qintv_hash(a) ((a).k>>7^(a).l<<17)\n-\n-#include "khash.h"\n-KHASH_INIT(qintv, qintv_t, uint64_t, 1, qintv_hash, qintv_eq)\n-KHASH_MAP_INIT_INT64(64, uint64_t)\n-\n-#define MINUS_INF -0x3fffffff\n-#define MASK_LEVEL 0.90f\n-\n-struct __mempool_t;\n-static void mp_destroy(struct __mempool_t*);\n-typedef struct {\n-\tbwtint_t qk, ql;\n-\tint I, D, G;\n-\tuint32_t pj:2, qlen:30;\n-\tint tlen;\n-\tint ppos, upos;\n-\tint cpos[4];\n-} bsw2cell_t;\n-\n-#include "ksort.h"\n-KSORT_INIT_GENERIC(int)\n-#define __hitG_lt(a, b) (((a).G + ((int)(a).n_seeds<<2)) > (b).G + ((int)(b).n_seeds<<2))\n-KSORT_INIT(hitG, bsw2hit_t, __hitG_lt)\n-\n-static const bsw2cell_t g_default_cell = { 0, 0, MINUS_INF, MINUS_INF, MINUS_INF, 0, 0, 0, -1, -1, {-1, -1, -1, -1} };\n-\n-typedef struct {\n-\tint n, max;\n-\tuint32_t tk, tl; // this is fine\n-\tbsw2cell_t *array;\n-} bsw2entry_t, *bsw2entry_p;\n-\n-/* --- BEGIN: Stack operations --- */\n-typedef struct {\n-\tint n_pending;\n-\tkvec_t(bsw2entry_p) stack0, pending;\n-\tstruct __mempool_t *pool;\n-} bsw2stack_t;\n-\n-#define stack_isempty(s) (kv_size(s->stack0) == 0 && s->n_pending == 0)\n-static void stack_destroy(bsw2stack_t *s) { mp_destroy(s->pool); kv_destroy(s->stack0); kv_destroy(s->pending); free(s); }\n-inline static void stack_push0(bsw2stack_t *s, bsw2entry_p e) { kv_push(bsw2entry_p, s->stack0, e); }\n-inline static bsw2entry_p stack_pop(bsw2stack_t *s)\n-{\n-\tassert(!(kv_size(s->stack0) == 0 && s->n_pending != 0));\n-\treturn kv_pop(s->stack0);\n-}\n-/* --- END: Stack operations --- */\n-\n-/* --- BEGIN: memory pool --- */\n-typedef struct __mempool_t {\n-\tint cnt; // if cnt!=0, then there must be memory leak\n-\tkvec_t(bsw2entry_p) pool;\n-} mempool_t;\n-inline static bsw2entry_p mp_alloc(mempool_t *mp)\n-{\n-\t++mp->cnt;\n-\tif (kv_size(mp->pool) == 0) return (bsw2entry_t*)calloc(1, sizeof(bsw2entry_t));\n-\telse return kv_pop(mp->pool);\n-}\n-inline static void mp_free(mempool_t *mp, bsw2entry_p e)\n-{\n-\t--mp->cnt; e->n = 0;\n-\tkv_push(bsw2entry_p, mp->pool, e);\n-}\n-static void mp_destroy(struct __mempool_t *mp)\n-{\n-\tint i;\n-\tfor (i = 0; i != kv_size(mp->pool); ++i) {\n-\t\tfree(kv_A(mp->pool, i)->array);\n-\t\tfree(kv_A(mp->pool, i));\n-\t}\n-\tkv_destroy(mp->pool);\n-\tfree(mp);\n-}\n-/* --- END: memory pool --- */\n-\n-/* --- BEGIN: utilities --- */\n-static khash_t(64) *bsw2_connectivity(const bwtl_t *b)\n-{\n-\tkhash_t(64) *h;\n-\tuint32_t k, l, cntk[4], cntl[4]; // this is fine\n-\tuint64_t x;\n-\tkhiter_t iter;\n-\tint j, ret;\n-\tkvec_t(uint64_t) stack;\n-\n-\tkv_init(stack);\n-\th = kh_init(64);\n-\tkh_resize(64, h, b->seq_len * 4);\n-\tx = b->seq_len;\n-\tkv_push(uint64_t, stack, x);\n-\twhile (kv_size(stack)) {\n-\t\tx = kv_pop(stack);\n-\t\tk = x>>32; l = (uint32_t)x;\n-\t\tbwtl_2occ4(b, k-1, l, cntk, cntl);\n-\t\tfor (j = 0; j != 4; ++j) {\n-\t\t\tk = b->L2[j] + cntk[j] + 1;\n-\t\t\tl = b->L2[j] + cntl[j];\n-\t\t\tif (k > l) continue;\n-\t\t\tx = (uint64_t)k << 32 | l;\n-\t\t\titer = kh_put(64, h, x, &ret);\n-\t\t\tif (ret) { // if not present\n-\t\t\t\tkh_value(h, iter) = 1;\n-\t\t\t\tkv_push(uint64_t, stack, x);\n-\t\t\t} else ++kh_value(h, iter);\n-\t\t}\n-\t}\n-\tkv_destroy(stack);\n-\t//fprintf(stderr, "[bsw2_connectivity] %u nodes in the DAG\\n", kh_size(h));\n-\treturn h;\n-}\n-// pick up top T matches at a node\n-static void cut_tail(bsw2entry_t *u, int T, bsw2entry_t *aux)\n-{\n-\tint i, *a, n, x;\n-\tif (u->n <= T) return;\n-\tif (aux->max < u->n) {\n-\t\taux->max = u->n;\n-\t\taux->array = (bsw2cell_t*)realloc(aux->array, aux->max * sizeof(bsw2cell_t));\n-\t}\n-\ta = (int*)aux->array;\n-\tfor (i = n = 0; i != u->n; ++i)\n-\t\tif (u->array[i].ql && u->array[i].G > 0)\n-\t\t\ta[n++] = -u->array[i].G;\n-\tif (n <= T) return;\n-\tx = -ks_ksmall(int, n, a, T);\n-\tn = 0;\n-\tfor (i = 0; i < u->n; ++i'..b') continue; // deleted node\n-\t\t\t\tc[0] = x = push_array_p(u);\n-\t\t\t\tx->G = MINUS_INF;\n-\t\t\t\tp->upos = x->upos = -1;\n-\t\t\t\tif (p->ppos >= 0) { // parent has been visited\n-\t\t\t\t\tc[1] = (v->array[p->ppos].upos >= 0)? u->array + v->array[p->ppos].upos : 0;\n-\t\t\t\t\tc[3] = v->array + p->ppos; c[2] = p;\n-\t\t\t\t\tif (fill_cell(opt, curr_score_mat[p->pj], c) > 0) { // then update topology at p and x\n-\t\t\t\t\t\tx->ppos = v->array[p->ppos].upos; // the parent pos in u\n-\t\t\t\t\t\tp->upos = u->n++; // the current pos in u\n-\t\t\t\t\t\tif (x->ppos >= 0) u->array[x->ppos].cpos[p->pj] = p->upos; // the child pos of its parent in u\n-\t\t\t\t\t\tis_added = 1;\n-\t\t\t\t\t}\n-\t\t\t\t} else {\n-\t\t\t\t\tx->D = p->D > p->G - opt->q? p->D - opt->r : p->G - opt->qr;\n-\t\t\t\t\tif (x->D > 0) {\n-\t\t\t\t\t\tx->G = x->D;\n-\t\t\t\t\t\tx->I = MINUS_INF; x->ppos = -1;\n-\t\t\t\t\t\tp->upos = u->n++;\n-\t\t\t\t\t\tis_added = 1;\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t\tif (is_added) { // x has been added to u->array. fill the remaining variables\n-\t\t\t\t\tx->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1;\n-\t\t\t\t\tx->pj = p->pj; x->qk = p->qk; x->ql = p->ql; x->qlen = p->qlen; x->tlen = p->tlen + 1;\n-\t\t\t\t\tif (x->G > -heap[0]) {\n-\t\t\t\t\t\theap[0] = -x->G;\n-\t\t\t\t\t\tks_heapadjust(int, 0, heap_size, heap);\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t\tif ((x->G > opt->qr && x->G >= -heap[0]) || i < old_n) { // good node in u, or in v\n-\t\t\t\t\tif (p->cpos[0] == -1 || p->cpos[1] == -1 || p->cpos[2] == -1 || p->cpos[3] == -1) {\n-\t\t\t\t\t\tbwt_2occ4(query, p->qk - 1, p->ql, qcntk, qcntl);\n-\t\t\t\t\t\tfor (qj = 0; qj != 4; ++qj) { // descend to the prefix trie\n-\t\t\t\t\t\t\tif (p->cpos[qj] != -1) continue; // this node will be visited later\n-\t\t\t\t\t\t\tk = query->L2[qj] + qcntk[qj] + 1;\n-\t\t\t\t\t\t\tl = query->L2[qj] + qcntl[qj];\n-\t\t\t\t\t\t\tif (k > l) { p->cpos[qj] = -2; continue; }\n-\t\t\t\t\t\t\tx = push_array_p(v);\n-\t\t\t\t\t\t\tp = v->array + i; // p may not point to the correct position after realloc\n-\t\t\t\t\t\t\tx->G = x->I = x->D = MINUS_INF;\n-\t\t\t\t\t\t\tx->qk = k; x->ql = l; x->pj = qj; x->qlen = p->qlen + 1; x->ppos = i; x->tlen = p->tlen;\n-\t\t\t\t\t\t\tx->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1;\n-\t\t\t\t\t\t\tp->cpos[qj] = v->n++;\n-\t\t\t\t\t\t} // ~for(qj)\n-\t\t\t\t\t} // ~if(p->cpos[])\n-\t\t\t\t} // ~if\n-\t\t\t} // ~for(i)\n-\t\t\tif (u->n) save_hits(target, opt->t, b->hits, u);\n-\t\t\t{ // push u to the stack (or to the pending array)\n-\t\t\t\tuint32_t cnt, pos;\n-\t\t\t\tcnt = (uint32_t)kh_value(chash, iter);\n-\t\t\t\tpos = kh_value(chash, iter)>>32;\n-\t\t\t\tif (pos) { // something in the pending array, then merge\n-\t\t\t\t\tbsw2entry_t *w = kv_A(stack->pending, pos-1);\n-\t\t\t\t\tif (u->n) {\n-\t\t\t\t\t\tif (w->n < u->n) { // swap\n-\t\t\t\t\t\t\tw = u; u = kv_A(stack->pending, pos-1); kv_A(stack->pending, pos-1) = w;\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\tmerge_entry(opt, w, u, b);\n-\t\t\t\t\t}\n-\t\t\t\t\tif (cnt == 0) { // move from pending to stack0\n-\t\t\t\t\t\tremove_duplicate(w, rhash);\n-\t\t\t\t\t\tsave_narrow_hits(target, w, b1, opt->t, opt->is);\n-\t\t\t\t\t\tcut_tail(w, opt->z, u);\n-\t\t\t\t\t\tstack_push0(stack, w);\n-\t\t\t\t\t\tkv_A(stack->pending, pos-1) = 0;\n-\t\t\t\t\t\t--stack->n_pending;\n-\t\t\t\t\t}\n-\t\t\t\t\tmp_free(stack->pool, u);\n-\t\t\t\t} else if (cnt) { // the first time\n-\t\t\t\t\tif (u->n) { // push to the pending queue\n-\t\t\t\t\t\t++stack->n_pending;\n-\t\t\t\t\t\tkv_push(bsw2entry_p, stack->pending, u);\n-\t\t\t\t\t\tkh_value(chash, iter) = (uint64_t)kv_size(stack->pending)<<32 | cnt;\n-\t\t\t\t\t} else mp_free(stack->pool, u);\n-\t\t\t\t} else { // cnt == 0, then push to the stack\n-\t\t\t\t\tbsw2entry_t *w = mp_alloc(stack->pool);\n-\t\t\t\t\tsave_narrow_hits(target, u, b1, opt->t, opt->is);\n-\t\t\t\t\tcut_tail(u, opt->z, w);\n-\t\t\t\t\tmp_free(stack->pool, w);\n-\t\t\t\t\tstack_push0(stack, u);\n-\t\t\t\t}\n-\t\t\t}\n-\t\t} // ~for(tj)\n-\t\tmp_free(stack->pool, v);\n-\t} // while(top)\n-\tgetrusage(0, &curr);\n-\tfor (i = 0; i < 2; ++i)\n-\t\tfor (j = 0; j < b_ret[i]->n; ++j)\n-\t\t\tb_ret[i]->hits[j].n_seeds = 0;\n-\tbsw2_resolve_duphits(bns, query, b, opt->is);\n-\tbsw2_resolve_duphits(bns, query, b1, opt->is);\n-\t//fprintf(stderr, "stats: %.3lf sec; %d elems\\n", time_elapse(&curr, &last), n_tot);\n-\t// free\n-\tfree(heap);\n-\tkh_destroy(qintv, rhash);\n-\tkh_destroy(64, chash);\n-\tstack->pending.n = stack->stack0.n = 0;\n-\treturn b_ret;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2_main.c --- a/bwa-0.7.9a/bwtsw2_main.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,89 +0,0 @@ -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <math.h> -#include "bwt.h" -#include "bwtsw2.h" -#include "utils.h" -#include "bwa.h" - -int bwa_bwtsw2(int argc, char *argv[]) -{ - bsw2opt_t *opt; - bwaidx_t *idx; - int c; - - opt = bsw2_init_opt(); - srand48(11); - while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:SG:C")) >= 0) { - switch (c) { - case 'q': opt->q = atoi(optarg); break; - case 'r': opt->r = atoi(optarg); break; - case 'a': opt->a = atoi(optarg); break; - case 'b': opt->b = atoi(optarg); break; - case 'w': opt->bw = atoi(optarg); break; - case 'T': opt->t = atoi(optarg); break; - case 't': opt->n_threads = atoi(optarg); break; - case 'z': opt->z = atoi(optarg); break; - case 's': opt->is = atoi(optarg); break; - case 'm': opt->mask_level = atof(optarg); break; - case 'c': opt->coef = atof(optarg); break; - case 'N': opt->t_seeds = atoi(optarg); break; - case 'M': opt->multi_2nd = 1; break; - case 'H': opt->hard_clip = 1; break; - case 'f': xreopen(optarg, "w", stdout); break; - case 'I': opt->max_ins = atoi(optarg); break; - case 'S': opt->skip_sw = 1; break; - case 'C': opt->cpy_cmt = 1; break; - case 'G': opt->max_chain_gap = atoi(optarg); break; - default: return 1; - } - } - opt->qr = opt->q + opt->r; - - if (optind + 2 > argc) { - fprintf(stderr, "\n"); - fprintf(stderr, "Usage: bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n"); - fprintf(stderr, "Options: -a INT score for a match [%d]\n", opt->a); - fprintf(stderr, " -b INT mismatch penalty [%d]\n", opt->b); - fprintf(stderr, " -q INT gap open penalty [%d]\n", opt->q); - fprintf(stderr, " -r INT gap extension penalty [%d]\n", opt->r); - fprintf(stderr, " -w INT band width [%d]\n", opt->bw); - fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level); - fprintf(stderr, "\n"); - fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); - fprintf(stderr, " -f FILE file to output results to instead of stdout\n"); - fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n"); - fprintf(stderr, " -C copy FASTA/Q comment to SAM output\n"); - fprintf(stderr, " -M mark multi-part alignments as secondary\n"); - fprintf(stderr, " -S skip Smith-Waterman read pairing\n"); - fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins); - fprintf(stderr, "\n"); - fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t); - fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef); - fprintf(stderr, " -z INT Z-best [%d]\n", opt->z); - fprintf(stderr, " -s INT maximum seeding interval size [%d]\n", opt->is); - fprintf(stderr, " -N INT # seeds to trigger rev aln; 2*INT is also the chaining threshold [%d]\n", opt->t_seeds); - fprintf(stderr, " -G INT maximum gap size during chaining [%d]\n", opt->max_chain_gap); - fprintf(stderr, "\n"); - fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n"); - fprintf(stderr, " BACs, the default setting usually works well. For the current PacBio\n"); - fprintf(stderr, " reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n"); - fprintf(stderr, " increase '-z' for better sensitivity.\n"); - fprintf(stderr, "\n"); - - return 1; - } - - // adjust opt for opt->a - opt->t *= opt->a; - opt->coef *= opt->a; - - if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1; - bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); - bwa_idx_destroy(idx); - free(opt); - - return 0; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/bwtsw2_pair.c --- a/bwa-0.7.9a/bwtsw2_pair.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,268 +0,0 @@\n-#include <math.h>\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include "utils.h"\n-#include "bwt.h"\n-#include "bntseq.h"\n-#include "bwtsw2.h"\n-#include "kstring.h"\n-#include "ksw.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#define MIN_RATIO 0.8\n-#define OUTLIER_BOUND 2.0\n-#define MAX_STDDEV 4.0\n-#define EXT_STDDEV 4.0\n-\n-typedef struct {\n-\tint low, high, failed;\n-\tdouble avg, std;\n-} bsw2pestat_t;\n-\n-bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)\n-{\n-\tint i, k, x, p25, p50, p75, tmp, max_len = 0;\n-\tuint64_t *isize;\n-\tbsw2pestat_t r;\n-\n-\tmemset(&r, 0, sizeof(bsw2pestat_t));\n-\tisize = calloc(n, 8);\n-\tfor (i = k = 0; i < n; i += 2) {\n-\t\tbsw2hit_t *t[2];\n-\t\tint l;\n-\t\tif (buf[i] == 0 || buf[i]->n != 1 || buf[i+1]->n != 1) continue; // more than 1 hits\n-\t\tt[0] = &buf[i]->hits[0]; t[1] = &buf[i+1]->hits[0];\n-\t\tif (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough\n-\t\tif (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough\n-\t\tl = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;\n-\t\tif (l >= max_ins) continue; // skip pairs with excessively large insert\n-\t\tmax_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;\n-\t\tmax_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;\n-\t\tisize[k++] = l;\n-\t}\n-\tks_introsort_64(k, isize);\n-\tp25 = isize[(int)(.25 * k + .499)];\n-\tp50 = isize[(int)(.50 * k + .499)];\n-\tp75 = isize[(int)(.75 * k + .499)];\n-\tksprintf(msg, "[%s] infer the insert size distribution from %d high-quality pairs.\\n", __func__, k);\n-\tif (k < 8) {\n-\t\tksprintf(msg, "[%s] fail to infer the insert size distribution: too few good pairs.\\n", __func__);\n-\t\tfree(isize);\n-\t\tr.failed = 1;\n-\t\treturn r;\n-\t}\n-\ttmp = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);\n-\tr.low = tmp > max_len? tmp : max_len;\n-\tif (r.low < 1) r.low = 1;\n-\tr.high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);\n-\tif (r.low > r.high) {\n-\t\tksprintf(msg, "[%s] fail to infer the insert size distribution: upper bound is smaller than max read length.\\n", __func__);\n-\t\tfree(isize);\n-\t\tr.failed = 1;\n-\t\treturn r;\n-\t}\n-\tksprintf(msg, "[%s] (25, 50, 75) percentile: (%d, %d, %d)\\n", __func__, p25, p50, p75);\n-\tksprintf(msg, "[%s] low and high boundaries for computing mean and std.dev: (%d, %d)\\n", __func__, r.low, r.high);\n-\tfor (i = x = 0, r.avg = 0; i < k; ++i)\n-\t\tif (isize[i] >= r.low && isize[i] <= r.high)\n-\t\t\tr.avg += isize[i], ++x;\n-\tr.avg /= x;\n-\tfor (i = 0, r.std = 0; i < k; ++i)\n-\t\tif (isize[i] >= r.low && isize[i] <= r.high)\n-\t\t\tr.std += (isize[i] - r.avg) * (isize[i] - r.avg);\n-\tr.std = sqrt(r.std / x);\n-\tksprintf(msg, "[%s] mean and std.dev: (%.2f, %.2f)\\n", __func__, r.avg, r.std);\n-\ttmp = (int)(p25 - 3. * (p75 - p25) + .499);\n-\tr.low = tmp > max_len? tmp : max_len;\n-\tif (r.low < 1) r.low = 1;\n-\tr.high = (int)(p75 + 3. * (p75 - p25) + .499);\n-\tif (r.low > r.avg - MAX_STDDEV * r.std) r.low = (int)(r.avg - MAX_STDDEV * r.std + .499);\n-\tr.low = tmp > max_len? tmp : max_len;\n-\tif (r.high < r.avg - MAX_STDDEV * r.std) r.high = (int)(r.avg + MAX_STDDEV * r.std + .499);\n-\tksprintf(msg, "[%s] low and high boundaries for proper pairs: (%d, %d)\\n", __func__, r.low, r.high);\n-\tfree(isize);\n-\treturn r;\n-}\n-\n-typedef struct {\n-\tint n_cigar, beg, end, len;\n-\tint64_t pos;\n-\tuint32_t *cigar;\n-} pairaux_t;\n-\n-extern unsigned char nst_nt4_table[256];\n-\n-void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const bsw2pestat_t *st, const bsw2hit_t *h, int l_mseq, const char *mseq, bsw2hit_t *a, int8_t g_mat[25])\n-{\n-\textern void seq_reverse(int len, ubyte_t *seq, int is_comp);\n-\tint64_t k, beg, end;\n-\tuint8_t *seq, *ref;\n-\tint i;\n-\t// compute the region start and end\n-\ta->n_seeds = 1; a->flag |= BSW2_FLAG_MATESW; // before calling this routine, *a has been cleared with memset(0); the flag is set with 1<<6/7\n-\tif (h->is_rev == 0) {\n-\t\tbeg = (int64_t)(h-'..b' < hits[i+j]->n; ++k) {\n-\t\t\t\tbsw2hit_t *p = &hits[i+j]->hits[k];\n-\t\t\t\tp->flag |= 1<<(6+j);\n-\t\t\t}\n-\t\t}\n-\t\tif (pes.failed) continue;\n-\t\tif (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N\n-\t\tif (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit\n-\t\tif (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit\n-\t\tif (!opt->skip_sw) {\n-\t\t\tif (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);\n-\t\t\tif (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);\n-\t\t} // else a[0].G == a[1].G == a[0].G2 == a[1].G2 == 0\n-\t\t// the following enumerate all possibilities. It is tedious but necessary...\n-\t\tif (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;\n-\t\t\tbwtsw2_t *p[2];\n-\t\t\tint which;\n-\t\t\tif (hits[i]->n == 1) p[0] = hits[i], p[1] = hits[i+1], which = 1;\n-\t\t\telse p[0] = hits[i+1], p[1] = hits[i], which = 0;\n-\t\t\tif (a[which].G == 0) continue;\n-\t\t\ta[which].flag |= BSW2_FLAG_RESCUED;\n-\t\t\tif (p[1]->max == 0) {\n-\t\t\t\tp[1]->max = 1;\n-\t\t\t\tp[1]->hits = malloc(sizeof(bsw2hit_t));\n-\t\t\t}\n-\t\t\tp[1]->hits[0] = a[which];\n-\t\t\tp[1]->n = 1;\n-\t\t\tp[0]->hits[0].flag |= 2;\n-\t\t\tp[1]->hits[0].flag |= 2;\n-\t\t\t++n_rescued;\n-\t\t} else { // then both ends mapped\n-\t\t\tint is_fixed = 0;\n-\t\t\t//fprintf(stderr, "%d; %lld,%lld; %d,%d\\n", a[0].is_rev, hits[i]->hits[0].k, a[0].k, hits[i]->hits[0].end, a[0].end);\n-\t\t\tfor (j = 0; j < 2; ++j) { // fix wrong mappings and wrong suboptimal alignment score\n-\t\t\t\tbsw2hit_t *p = &hits[i+j]->hits[0];\n-\t\t\t\tif (p->G < a[j].G) { // the orginal mapping is suboptimal\n-\t\t\t\t\ta[j].G2 = a[j].G2 > p->G? a[j].G2 : p->G; // FIXME: reset BSW2_FLAG_TANDEM?\n-\t\t\t\t\t*p = a[j];\n-\t\t\t\t\t++n_fixed;\n-\t\t\t\t\tis_fixed = 1;\n-\t\t\t\t} else if (p->k != a[j].k && p->G2 < a[j].G) {\n-\t\t\t\t\tp->G2 = a[j].G;\n-\t\t\t\t} else if (p->k == a[j].k && p->G2 < a[j].G2) {\n-\t\t\t\t\tp->G2 = a[j].G2;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tif (hits[i]->hits[0].k == a[0].k && hits[i+1]->hits[0].k == a[1].k) { // properly paired and no ends need to be moved\n-\t\t\t\tfor (j = 0; j < 2; ++j)\n-\t\t\t\t\thits[i+j]->hits[0].flag |= 2 | (a[j].flag & BSW2_FLAG_TANDEM);\n-\t\t\t} else if (hits[i]->hits[0].k == a[0].k || hits[i+1]->hits[0].k == a[1].k) { // a tandem match\n-\t\t\t\tfor (j = 0; j < 2; ++j) {\n-\t\t\t\t\thits[i+j]->hits[0].flag |= 2;\n-\t\t\t\t\tif (hits[i+j]->hits[0].k != a[j].k)\n-\t\t\t\t\t\thits[i+j]->hits[0].flag |= BSW2_FLAG_TANDEM;\n-\t\t\t\t}\n-\t\t\t} else if (!is_fixed && (a[0].G || a[1].G)) { // it is possible to move one end\n-\t\t\t\tif (a[0].G && a[1].G) { // now we have two "proper pairs"\n-\t\t\t\t\tint G[2];\n-\t\t\t\t\tdouble diff;\n-\t\t\t\t\tG[0] = hits[i]->hits[0].G + a[1].G;\n-\t\t\t\t\tG[1] = hits[i+1]->hits[0].G + a[0].G;\n-\t\t\t\t\tdiff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);\n-\t\t\t\t\tif (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0;\n-\t\t\t\t}\n-\t\t\t\tif (a[0].G == 0 || a[1].G == 0) { // one proper pair only\n-\t\t\t\t\tbsw2hit_t *p[2]; // p[0] points the unchanged hit; p[1] to the hit to be moved\n-\t\t\t\t\tint which, isize;\n-\t\t\t\t\tdouble dev, diff;\n-\t\t\t\t\tif (a[0].G) p[0] = &hits[i+1]->hits[0], p[1] = &hits[i]->hits[0], which = 0;\n-\t\t\t\t\telse p[0] = &hits[i]->hits[0], p[1] = &hits[i+1]->hits[0], which = 1;\n-\t\t\t\t\tisize = p[0]->is_rev? p[0]->k + p[0]->len - a[which].k : a[which].k + a[which].len - p[0]->k;\n-\t\t\t\t\tdev = fabs(isize - pes.avg) / pes.std;\n-\t\t\t\t\tdiff = (double)(p[1]->G - a[which].G) / (opt->a + opt->b) / (p[1]->end - p[1]->beg) * 100.0;\n-\t\t\t\t\tif (diff < dev * 2.) { // then move (heuristic)\n-\t\t\t\t\t\ta[which].G2 = a[which].G;\n-\t\t\t\t\t\tp[1][0] = a[which];\n-\t\t\t\t\t\tp[1]->flag |= BSW2_FLAG_MOVED | 2;\n-\t\t\t\t\t\tp[0]->flag |= 2;\n-\t\t\t\t\t\t++n_moved;\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else if (is_fixed) {\n-\t\t\t\thits[i+0]->hits[0].flag |= 2;\n-\t\t\t\thits[i+1]->hits[0].flag |= 2;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tksprintf(&msg, "[%s] #fixed=%d, #rescued=%d, #moved=%d\\n", __func__, n_fixed, n_rescued, n_moved);\n-\tfputs(msg.s, stderr);\n-\tfree(msg.s);\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/example.c --- a/bwa-0.7.9a/example.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,64 +0,0 @@ -#include <stdio.h> -#include <zlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include "bwamem.h" -#include "kseq.h" // for the FASTA/Q parser -KSEQ_DECLARE(gzFile) - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -int main(int argc, char *argv[]) -{ - bwaidx_t *idx; - gzFile fp; - kseq_t *ks; - mem_opt_t *opt; - - if (argc < 3) { - fprintf(stderr, "Usage: bwamem-lite <idx.base> <reads.fq>\n"); - return 1; - } - - idx = bwa_idx_load(argv[1], BWA_IDX_ALL); // load the BWA index - if (NULL == idx) { - fprintf(stderr, "Index load failed.\n"); - exit(EXIT_FAILURE); - } - fp = strcmp(argv[2], "-")? gzopen(argv[2], "r") : gzdopen(fileno(stdin), "r"); - if (NULL == fp) { - fprintf(stderr, "Couldn't open %s : %s\n", - strcmp(argv[2], "-") ? argv[2] : "stdin", - errno ? strerror(errno) : "Out of memory"); - exit(EXIT_FAILURE); - } - ks = kseq_init(fp); // initialize the FASTA/Q parser - opt = mem_opt_init(); // initialize the BWA-MEM parameters to the default values - - while (kseq_read(ks) >= 0) { // read one sequence - mem_alnreg_v ar; - int i, k; - ar = mem_align1(opt, idx->bwt, idx->bns, idx->pac, ks->seq.l, ks->seq.s); // get all the hits - for (i = 0; i < ar.n; ++i) { // traverse each hit - mem_aln_t a; - if (ar.a[i].secondary >= 0) continue; // skip secondary alignments - a = mem_reg2aln(opt, idx->bns, idx->pac, ks->seq.l, ks->seq.s, &ar.a[i]); // get forward-strand position and CIGAR - // print alignment - err_printf("%s\t%c\t%s\t%ld\t%d\t", ks->name.s, "+-"[a.is_rev], idx->bns->anns[a.rid].name, (long)a.pos, a.mapq); - for (k = 0; k < a.n_cigar; ++k) // print CIGAR - err_printf("%d%c", a.cigar[k]>>4, "MIDSH"[a.cigar[k]&0xf]); - err_printf("\t%d\n", a.NM); // print edit distance - free(a.cigar); // don't forget to deallocate CIGAR - } - free(ar.a); // and deallocate the hit list - } - - free(opt); - kseq_destroy(ks); - err_gzclose(fp); - bwa_idx_destroy(idx); - return 0; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/fastmap.c --- a/bwa-0.7.9a/fastmap.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,328 +0,0 @@\n-#include <zlib.h>\n-#include <stdio.h>\n-#include <unistd.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <ctype.h>\n-#include <math.h>\n-#include "bwa.h"\n-#include "bwamem.h"\n-#include "kvec.h"\n-#include "utils.h"\n-#include "kseq.h"\n-#include "utils.h"\n-KSEQ_DECLARE(gzFile)\n-\n-extern unsigned char nst_nt4_table[256];\n-\n-void *kopen(const char *fn, int *_fd);\n-int kclose(void *a);\n-\n-static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)\n-{\n-\tif (opt0->a) { // matching score is changed\n-\t\tif (!opt0->b) opt->b *= opt->a;\n-\t\tif (!opt0->T) opt->T *= opt->a;\n-\t\tif (!opt0->o_del) opt->o_del *= opt->a;\n-\t\tif (!opt0->e_del) opt->e_del *= opt->a;\n-\t\tif (!opt0->o_ins) opt->o_ins *= opt->a;\n-\t\tif (!opt0->e_ins) opt->e_ins *= opt->a;\n-\t\tif (!opt0->zdrop) opt->zdrop *= opt->a;\n-\t\tif (!opt0->pen_clip5) opt->pen_clip5 *= opt->a;\n-\t\tif (!opt0->pen_clip3) opt->pen_clip3 *= opt->a;\n-\t\tif (!opt0->pen_unpaired) opt->pen_unpaired *= opt->a;\n-\t}\n-}\n-\n-int main_mem(int argc, char *argv[])\n-{\n-\tmem_opt_t *opt, opt0;\n-\tint fd, fd2, i, c, n, copy_comment = 0;\n-\tgzFile fp, fp2 = 0;\n-\tkseq_t *ks, *ks2 = 0;\n-\tbseq1_t *seqs;\n-\tbwaidx_t *idx;\n-\tchar *p, *rg_line = 0;\n-\tconst char *mode = 0;\n-\tvoid *ko = 0, *ko2 = 0;\n-\tint64_t n_processed = 0;\n-\tmem_pestat_t pes[4], *pes0 = 0;\n-\n-\tmemset(pes, 0, 4 * sizeof(mem_pestat_t));\n-\tfor (i = 0; i < 4; ++i) pes[i].failed = 1;\n-\n-\topt = mem_opt_init();\n-\tmemset(&opt0, 0, sizeof(mem_opt_t));\n-\twhile ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {\n-\t\tif (c == \'k\') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;\n-\t\telse if (c == \'x\') mode = optarg;\n-\t\telse if (c == \'w\') opt->w = atoi(optarg), opt0.w = 1;\n-\t\telse if (c == \'A\') opt->a = atoi(optarg), opt0.a = 1;\n-\t\telse if (c == \'B\') opt->b = atoi(optarg), opt0.b = 1;\n-\t\telse if (c == \'T\') opt->T = atoi(optarg), opt0.T = 1;\n-\t\telse if (c == \'U\') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1;\n-\t\telse if (c == \'t\') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1;\n-\t\telse if (c == \'P\') opt->flag |= MEM_F_NOPAIRING;\n-\t\telse if (c == \'a\') opt->flag |= MEM_F_ALL;\n-\t\telse if (c == \'p\') opt->flag |= MEM_F_PE;\n-\t\telse if (c == \'M\') opt->flag |= MEM_F_NO_MULTI;\n-\t\telse if (c == \'S\') opt->flag |= MEM_F_NO_RESCUE;\n-\t\telse if (c == \'e\') opt->flag |= MEM_F_SELF_OVLP;\n-\t\telse if (c == \'F\') opt->flag |= MEM_F_ALN_REG;\n-\t\telse if (c == \'Y\') opt->flag |= MEM_F_SOFTCLIP;\n-\t\telse if (c == \'c\') opt->max_occ = atoi(optarg), opt0.max_occ = 1;\n-\t\telse if (c == \'d\') opt->zdrop = atoi(optarg), opt0.zdrop = 1;\n-\t\telse if (c == \'v\') bwa_verbose = atoi(optarg);\n-\t\telse if (c == \'r\') opt->split_factor = atof(optarg), opt0.split_factor = 1.;\n-\t\telse if (c == \'D\') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;\n-\t\telse if (c == \'m\') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;\n-\t\telse if (c == \'h\') opt->max_hits = atoi(optarg), opt0.max_hits = 1;\n-\t\telse if (c == \'s\') opt->split_width = atoi(optarg), opt0.split_width = 1;\n-\t\telse if (c == \'G\') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;\n-\t\telse if (c == \'N\') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;\n-\t\telse if (c == \'W\') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;\n-\t\telse if (c == \'C\') copy_comment = 1;\n-\t\telse if (c == \'Q\') {\n-\t\t\topt0.mapQ_coef_len = 1;\n-\t\t\topt->mapQ_coef_len = atoi(optarg);\n-\t\t\topt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0;\n-\t\t} else if (c == \'O\') {\n-\t\t\topt0.o_del = opt0.o_ins = 1;\n-\t\t\topt->o_del = opt->o_ins = strtol(optarg, &p, 10);\n-\t\t\tif (*p != 0 && ispunct(*p) && isdigit(p[1]))\n-\t\t\t\topt->o_ins = strtol(p+1, &p, 10);\n-\t\t} else if (c == \'E\') {\n-\t\t\topt0.e_del = opt0.e_ins = 1;\n-\t\t\topt->e_del = opt->e_ins = strtol(optarg, &p, 10);\n-\t\t\tif (*p != 0 && ispunct(*p) && isdigit(p[1]))\n-\t\t\t\topt->e_ins = strtol(p+1, &p, 10);\n-\t\t} else if (c == \'L\') {\n-\t\t\topt0.pen_clip5 = opt0.pen_clip3 = 1;\n-\t\t\topt->pen_clip5 ='..b't->a, opt->b, opt->mat);\n-\n-\tif ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak\n-\n-\tko = kopen(argv[optind + 1], &fd);\n-\tif (ko == 0) {\n-\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s\'.\\n", __func__, argv[optind + 1]);\n-\t\treturn 1;\n-\t}\n-\tfp = gzdopen(fd, "r");\n-\tks = kseq_init(fp);\n-\tif (optind + 2 < argc) {\n-\t\tif (opt->flag&MEM_F_PE) {\n-\t\t\tif (bwa_verbose >= 2)\n-\t\t\t\tfprintf(stderr, "[W::%s] when \'-p\' is in use, the second query file will be ignored.\\n", __func__);\n-\t\t} else {\n-\t\t\tko2 = kopen(argv[optind + 2], &fd2);\n-\t\t\tif (ko2 == 0) {\n-\t\t\t\tif (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s\'.\\n", __func__, argv[optind + 2]);\n-\t\t\t\treturn 1;\n-\t\t\t}\n-\t\t\tfp2 = gzdopen(fd2, "r");\n-\t\t\tks2 = kseq_init(fp2);\n-\t\t\topt->flag |= MEM_F_PE;\n-\t\t}\n-\t}\n-\tif (!(opt->flag & MEM_F_ALN_REG))\n-\t\tbwa_print_sam_hdr(idx->bns, rg_line);\n-\twhile ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {\n-\t\tint64_t size = 0;\n-\t\tif ((opt->flag & MEM_F_PE) && (n&1) == 1) {\n-\t\t\tif (bwa_verbose >= 2)\n-\t\t\t\tfprintf(stderr, "[W::%s] odd number of reads in the PE mode; last read dropped\\n", __func__);\n-\t\t\tn = n>>1<<1;\n-\t\t}\n-\t\tif (!copy_comment)\n-\t\t\tfor (i = 0; i < n; ++i) {\n-\t\t\t\tfree(seqs[i].comment); seqs[i].comment = 0;\n-\t\t\t}\n-\t\tfor (i = 0; i < n; ++i) size += seqs[i].l_seq;\n-\t\tif (bwa_verbose >= 3)\n-\t\t\tfprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\\n", __func__, n, (long)size);\n-\t\tmem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0);\n-\t\tn_processed += n;\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tif (seqs[i].sam) err_fputs(seqs[i].sam, stdout);\n-\t\t\tfree(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);\n-\t\t}\n-\t\tfree(seqs);\n-\t}\n-\n-\tfree(opt);\n-\tbwa_idx_destroy(idx);\n-\tkseq_destroy(ks);\n-\terr_gzclose(fp); kclose(ko);\n-\tif (ks2) {\n-\t\tkseq_destroy(ks2);\n-\t\terr_gzclose(fp2); kclose(ko2);\n-\t}\n-\treturn 0;\n-}\n-\n-int main_fastmap(int argc, char *argv[])\n-{\n-\tint c, i, min_iwidth = 20, min_len = 17, print_seq = 0;\n-\tkseq_t *seq;\n-\tbwtint_t k;\n-\tgzFile fp;\n-\tsmem_i *itr;\n-\tconst bwtintv_v *a;\n-\tbwaidx_t *idx;\n-\n-\twhile ((c = getopt(argc, argv, "w:l:p")) >= 0) {\n-\t\tswitch (c) {\n-\t\t\tcase \'p\': print_seq = 1; break;\n-\t\t\tcase \'w\': min_iwidth = atoi(optarg); break;\n-\t\t\tcase \'l\': min_len = atoi(optarg); break;\n-\t\t default: return 1;\n-\t\t}\n-\t}\n-\tif (optind + 1 >= argc) {\n-\t\tfprintf(stderr, "Usage: bwa fastmap [-p] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\\n", min_len, min_iwidth);\n-\t\treturn 1;\n-\t}\n-\n-\tfp = xzopen(argv[optind + 1], "r");\n-\tseq = kseq_init(fp);\n-\tif ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;\n-\titr = smem_itr_init(idx->bwt);\n-\twhile (kseq_read(seq) >= 0) {\n-\t\terr_printf("SQ\\t%s\\t%ld", seq->name.s, seq->seq.l);\n-\t\tif (print_seq) {\n-\t\t\terr_putchar(\'\\t\');\n-\t\t\terr_puts(seq->seq.s);\n-\t\t} else err_putchar(\'\\n\');\n-\t\tfor (i = 0; i < seq->seq.l; ++i)\n-\t\t\tseq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];\n-\t\tsmem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);\n-\t\twhile ((a = smem_next(itr)) != 0) {\n-\t\t\tfor (i = 0; i < a->n; ++i) {\n-\t\t\t\tbwtintv_t *p = &a->a[i];\n-\t\t\t\tif ((uint32_t)p->info - (p->info>>32) < min_len) continue;\n-\t\t\t\terr_printf("EM\\t%d\\t%d\\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);\n-\t\t\t\tif (p->x[2] <= min_iwidth) {\n-\t\t\t\t\tfor (k = 0; k < p->x[2]; ++k) {\n-\t\t\t\t\t\tbwtint_t pos;\n-\t\t\t\t\t\tint len, is_rev, ref_id;\n-\t\t\t\t\t\tlen = (uint32_t)p->info - (p->info>>32);\n-\t\t\t\t\t\tpos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);\n-\t\t\t\t\t\tif (is_rev) pos -= len - 1;\n-\t\t\t\t\t\tbns_cnt_ambi(idx->bns, pos, len, &ref_id);\n-\t\t\t\t\t\terr_printf("\\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);\n-\t\t\t\t\t}\n-\t\t\t\t} else err_puts("\\t*");\n-\t\t\t\terr_putchar(\'\\n\');\n-\t\t\t}\n-\t\t}\n-\t\terr_puts("//");\n-\t}\n-\n-\tsmem_itr_destroy(itr);\n-\tbwa_idx_destroy(idx);\n-\tkseq_destroy(seq);\n-\terr_gzclose(fp);\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/is.c --- a/bwa-0.7.9a/is.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,223 +0,0 @@ -/* - * sais.c for sais-lite - * Copyright (c) 2008 Yuta Mori All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <stdlib.h> - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -typedef unsigned char ubyte_t; -#define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i]) - -/* find the start or end of each bucket */ -static void getCounts(const unsigned char *T, int *C, int n, int k, int cs) -{ - int i; - for (i = 0; i < k; ++i) C[i] = 0; - for (i = 0; i < n; ++i) ++C[chr(i)]; -} -static void getBuckets(const int *C, int *B, int k, int end) -{ - int i, sum = 0; - if (end) { - for (i = 0; i < k; ++i) { - sum += C[i]; - B[i] = sum; - } - } else { - for (i = 0; i < k; ++i) { - sum += C[i]; - B[i] = sum - C[i]; - } - } -} - -/* compute SA */ -static void induceSA(const unsigned char *T, int *SA, int *C, int *B, int n, int k, int cs) -{ - int *b, i, j; - int c0, c1; - /* compute SAl */ - if (C == B) getCounts(T, C, n, k, cs); - getBuckets(C, B, k, 0); /* find starts of buckets */ - j = n - 1; - b = SA + B[c1 = chr(j)]; - *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; - for (i = 0; i < n; ++i) { - j = SA[i], SA[i] = ~j; - if (0 < j) { - --j; - if ((c0 = chr(j)) != c1) { - B[c1] = b - SA; - b = SA + B[c1 = c0]; - } - *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j; - } - } - /* compute SAs */ - if (C == B) getCounts(T, C, n, k, cs); - getBuckets(C, B, k, 1); /* find ends of buckets */ - for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) { - if (0 < (j = SA[i])) { - --j; - if ((c0 = chr(j)) != c1) { - B[c1] = b - SA; - b = SA + B[c1 = c0]; - } - *--b = ((j == 0) || (chr(j - 1) > c1)) ? ~j : j; - } else SA[i] = ~j; - } -} - -/* - * find the suffix array SA of T[0..n-1] in {0..k-1}^n use a working - * space (excluding T and SA) of at most 2n+O(1) for a constant alphabet - */ -static int sais_main(const unsigned char *T, int *SA, int fs, int n, int k, int cs) -{ - int *C, *B, *RA; - int i, j, c, m, p, q, plen, qlen, name; - int c0, c1; - int diff; - - /* stage 1: reduce the problem by at least 1/2 sort all the - * S-substrings */ - if (k <= fs) { - C = SA + n; - B = (k <= (fs - k)) ? C + k : C; - } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2; - getCounts(T, C, n, k, cs); - getBuckets(C, B, k, 1); /* find ends of buckets */ - for (i = 0; i < n; ++i) SA[i] = 0; - for (i = n - 2, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) { - if ((c0 = chr(i)) < (c1 + c)) c = 1; - else if (c != 0) SA[--B[c1]] = i + 1, c = 0; - } - induceSA(T, SA, C, B, n, k, cs); - if (fs < k) free(C); - /* compact all the sorted substrings into the first m items of SA - * 2*m must be not larger than n (proveable) */ - for (i = 0, m = 0; i < n; ++i) { - p = SA[i]; - if ((0 < p) && (chr(p - 1) > (c0 = chr(p)))) { - for (j = p + 1; (j < n) && (c0 == (c1 = chr(j))); ++j); - if ((j < n) && (c0 < c1)) SA[m++] = p; - } - } - for (i = m; i < n; ++i) SA[i] = 0; /* init the name array buffer */ - /* store the length of all substrings */ - for (i = n - 2, j = n, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) { - if ((c0 = chr(i)) < (c1 + c)) c = 1; - else if (c != 0) { - SA[m + ((i + 1) >> 1)] = j - i - 1; - j = i + 1; - c = 0; - } - } - /* find the lexicographic names of all substrings */ - for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) { - p = SA[i], plen = SA[m + (p >> 1)], diff = 1; - if (plen == qlen) { - for (j = 0; (j < plen) && (chr(p + j) == chr(q + j)); j++); - if (j == plen) diff = 0; - } - if (diff != 0) ++name, q = p, qlen = plen; - SA[m + (p >> 1)] = name; - } - - /* stage 2: solve the reduced problem recurse if names are not yet - * unique */ - if (name < m) { - RA = SA + n + fs - m; - for (i = n - 1, j = m - 1; m <= i; --i) { - if (SA[i] != 0) RA[j--] = SA[i] - 1; - } - if (sais_main((unsigned char *) RA, SA, fs + n - m * 2, m, name, sizeof(int)) != 0) return -2; - for (i = n - 2, j = m - 1, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) { - if ((c0 = chr(i)) < (c1 + c)) c = 1; - else if (c != 0) RA[j--] = i + 1, c = 0; /* get p1 */ - } - for (i = 0; i < m; ++i) SA[i] = RA[SA[i]]; /* get index */ - } - /* stage 3: induce the result for the original problem */ - if (k <= fs) { - C = SA + n; - B = (k <= (fs - k)) ? C + k : C; - } else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2; - /* put all left-most S characters into their buckets */ - getCounts(T, C, n, k, cs); - getBuckets(C, B, k, 1); /* find ends of buckets */ - for (i = m; i < n; ++i) SA[i] = 0; /* init SA[m..n-1] */ - for (i = m - 1; 0 <= i; --i) { - j = SA[i], SA[i] = 0; - SA[--B[chr(j)]] = j; - } - induceSA(T, SA, C, B, n, k, cs); - if (fs < k) free(C); - return 0; -} - -/** - * Constructs the suffix array of a given string. - * @param T[0..n-1] The input string. - * @param SA[0..n] The output array of suffixes. - * @param n The length of the given string. - * @return 0 if no error occurred - */ -int is_sa(const ubyte_t *T, int *SA, int n) -{ - if ((T == NULL) || (SA == NULL) || (n < 0)) return -1; - SA[0] = n; - if (n <= 1) { - if (n == 1) SA[1] = 0; - return 0; - } - return sais_main(T, SA+1, 0, n, 256, 1); -} - -/** - * Constructs the burrows-wheeler transformed string of a given string. - * @param T[0..n-1] The input string. - * @param n The length of the given string. - * @return The primary index if no error occurred, -1 or -2 otherwise. - */ -int is_bwt(ubyte_t *T, int n) -{ - int *SA, i, primary = 0; - SA = (int*)calloc(n+1, sizeof(int)); - - if (is_sa(T, SA, n)) return -1; - - for (i = 0; i <= n; ++i) { - if (SA[i] == 0) primary = i; - else SA[i] = T[SA[i] - 1]; - } - for (i = 0; i < primary; ++i) T[i] = SA[i]; - for (; i < n; ++i) T[i] = SA[i + 1]; - free(SA); - return primary; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kbtree.h --- a/bwa-0.7.9a/kbtree.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,388 +0,0 @@\n-/*-\n- * Copyright 1997-1999, 2001, John-Mark Gurney.\n- * 2008-2009, Attractive Chaos <attractor@live.co.uk>\n- *\n- * Redistribution and use in source and binary forms, with or without\n- * modification, are permitted provided that the following conditions\n- * are met:\n- *\n- * 1. Redistributions of source code must retain the above copyright\n- * notice, this list of conditions and the following disclaimer.\n- * 2. Redistributions in binary form must reproduce the above copyright\n- * notice, this list of conditions and the following disclaimer in the\n- * documentation and/or other materials provided with the distribution.\n- *\n- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS\'\' AND\n- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\n- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\n- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n- * SUCH DAMAGE.\n- */\n-\n-#ifndef __AC_KBTREE_H\n-#define __AC_KBTREE_H\n-\n-#include <stdlib.h>\n-#include <string.h>\n-#include <stdint.h>\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-typedef struct {\n-\tint32_t is_internal:1, n:31;\n-} kbnode_t;\n-\n-#define\t__KB_KEY(type, x)\t((type*)((char*)x + 4))\n-#define __KB_PTR(btr, x)\t((kbnode_t**)((char*)x + btr->off_ptr))\n-\n-#define __KB_TREE_T(name)\t\t\t\t\t\t\\\n-\ttypedef struct {\t\t\t\t\t\t\t\\\n-\t\tkbnode_t *root;\t\t\t\t\t\t\t\\\n-\t\tint\toff_key, off_ptr, ilen, elen;\t\t\\\n-\t\tint\tn, t;\t\t\t\t\t\t\t\t\\\n-\t\tint\tn_keys, n_nodes;\t\t\t\t\t\\\n-\t} kbtree_##name##_t;\n-\n-#define __KB_INIT(name, key_t)\t\t\t\t\t\t\t\t\t\t\t\\\n-\tkbtree_##name##_t *kb_init_##name(int size)\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkbtree_##name##_t *b;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tb = (kbtree_##name##_t*)calloc(1, sizeof(kbtree_##name##_t));\t\\\n-\t\tb->t = ((size - 4 - sizeof(void*)) / (sizeof(void*) + sizeof(key_t)) + 1) >> 1; \\\n-\t\tif (b->t < 2) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(b); return 0;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tb->n = 2 * b->t - 1;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tb->off_ptr = 4 + b->n * sizeof(key_t);\t\t\t\t\t\t\t\\\n-\t\tb->ilen = (4 + sizeof(void*) + b->n * (sizeof(void*) + sizeof(key_t)) + 3) >> 2 << 2; \\\n-\t\tb->elen = (b->off_ptr + 3) >> 2 << 2;\t\t\t\t\t\t\t\\\n-\t\tb->root = (kbnode_t*)calloc(1, b->ilen);\t\t\t\t\t\t\\\n-\t\t++b->n_nodes;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn b;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define __kb_destroy(b) do {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint i, max = 8;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkbnode_t *x, **top, **stack = 0;\t\t\t\t\t\t\t\t\\\n-\t\tif (b) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\ttop = stack = (kbnode_t**)calloc(max, sizeof(kbnode_t*));\t\\\n-\t\t\t*top++ = (b)->root;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\twhile (top != stack) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tx = *--top;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (x == 0 || x->is_internal == 0) { free(x); continue; } \\\n-\t\t\t\tfor (i = 0; i <= x->n; ++i)\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (__KB_PTR(b, x)[i]) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (top - stack == max) {\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\tmax <<= 1;\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\tstack = (kbnode_t**)realloc(stack, max * sizeof(kbnode_t*)); \\\n-\t\t\t\t\t\t\ttop = stack + (max>>1);\t\t\t\t\t\t\\\n-\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t*top++ = __KB_PTR(b, x)[i];\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tfree(x);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(b); free(stack);\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} while (0)\n-\n-#define __kb_get_first(key_t, b, ret) do {\t\\\n-\t\tkbnode_t *__x = (b)->root;\t\t\t\\\n-\t\twhile (__KB_PTR(b, __x)[0] != 0)\t\\\n-\t\t\t__x = __KB_PTR(b, __x)[0];\t\t\\\n-\t\t(ret) = __KB_KEY(key_t, __x)[0];\t\\\n-\t} while (0)\n-\n-#define __KB_GET_AUX0(name, key_t, __cmp)\t\t\t\t\t\t\t\t\\\n-\tstatic inline int __kb_get_aux_##name(const kbnode_t * __restrict x, const key_t * __restrict k, int *r) \\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t'..b'\t__KB_KEY(key_t, y)[y->n++] = __KB_KEY(key_t, x)[i - 1];\t\\\n-\t\t\t\tmemmove(__KB_KEY(key_t, y) + y->n, __KB_KEY(key_t, xp), xp->n * sizeof(key_t));\t\\\n-\t\t\t\tif (y->is_internal) memmove(__KB_PTR(b, y) + y->n, __KB_PTR(b, xp), (xp->n + 1) * sizeof(void*)); \\\n-\t\t\t\ty->n += xp->n;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tmemmove(__KB_KEY(key_t, x) + i - 1, __KB_KEY(key_t, x) + i, (x->n - i) * sizeof(key_t)); \\\n-\t\t\t\tmemmove(__KB_PTR(b, x) + i, __KB_PTR(b, x) + i + 1, (x->n - i) * sizeof(void*)); \\\n-\t\t\t\t--x->n;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tfree(xp);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\txp = y;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else if (i < x->n && (y = __KB_PTR(b, x)[i + 1])->n == b->t - 1) { \\\n-\t\t\t\t__KB_KEY(key_t, xp)[xp->n++] = __KB_KEY(key_t, x)[i];\t\\\n-\t\t\t\tmemmove(__KB_KEY(key_t, xp) + xp->n, __KB_KEY(key_t, y), y->n * sizeof(key_t));\t\\\n-\t\t\t\tif (xp->is_internal) memmove(__KB_PTR(b, xp) + xp->n, __KB_PTR(b, y), (y->n + 1) * sizeof(void*)); \\\n-\t\t\t\txp->n += y->n;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tmemmove(__KB_KEY(key_t, x) + i, __KB_KEY(key_t, x) + i + 1, (x->n - i - 1) * sizeof(key_t)); \\\n-\t\t\t\tmemmove(__KB_PTR(b, x) + i + 1, __KB_PTR(b, x) + i + 2, (x->n - i - 1) * sizeof(void*)); \\\n-\t\t\t\t--x->n;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tfree(y);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn __kb_delp_aux_##name(b, xp, k, s);\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic key_t kb_delp_##name(kbtree_##name##_t *b, const key_t * __restrict k) \\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkbnode_t *x;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkey_t ret;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tret = __kb_delp_aux_##name(b, b->root, k, 0);\t\t\t\t\t\\\n-\t\t--b->n_keys;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (b->root->n == 0 && b->root->is_internal) {\t\t\t\t\t\\\n-\t\t\t--b->n_nodes;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tx = b->root;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tb->root = __KB_PTR(b, x)[0];\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(x);\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn ret;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline key_t kb_del_##name(kbtree_##name##_t *b, const key_t k) \\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn kb_delp_##name(b, &k);\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-typedef struct {\n-\tkbnode_t *x;\n-\tint i;\n-} __kbstack_t;\n-\n-#define __kb_traverse(key_t, b, __func) do {\t\t\t\t\t\t\t\\\n-\t\tint __kmax = 8;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t__kbstack_t *__kstack, *__kp;\t\t\t\t\t\t\t\t\t\\\n-\t\t__kp = __kstack = (__kbstack_t*)calloc(__kmax, sizeof(__kbstack_t)); \\\n-\t\t__kp->x = (b)->root; __kp->i = 0;\t\t\t\t\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\twhile (__kp->x && __kp->i <= __kp->x->n) {\t\t\t\t\t\\\n-\t\t\t\tif (__kp - __kstack == __kmax - 1) {\t\t\t\t\t\\\n-\t\t\t\t\t__kmax <<= 1;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t__kstack = (__kbstack_t*)realloc(__kstack, __kmax * sizeof(__kbstack_t)); \\\n-\t\t\t\t\t__kp = __kstack + (__kmax>>1) - 1;\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t(__kp+1)->i = 0; (__kp+1)->x = __kp->x->is_internal? __KB_PTR(b, __kp->x)[__kp->i] : 0; \\\n-\t\t\t\t++__kp;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t--__kp;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__kp >= __kstack) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (__kp->x && __kp->i < __kp->x->n) __func(&__KB_KEY(key_t, __kp->x)[__kp->i]); \\\n-\t\t\t\t++__kp->i;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else break;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(__kstack);\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} while (0)\n-\n-#define KBTREE_INIT(name, key_t, __cmp)\t\t\t\\\n-\t__KB_TREE_T(name)\t\t\t\t\t\t\t\\\n-\t__KB_INIT(name, key_t)\t\t\t\t\t\t\\\n-\t__KB_GET_AUX1(name, key_t, __cmp)\t\t\t\\\n-\t__KB_GET(name, key_t)\t\t\t\t\t\t\\\n-\t__KB_INTERVAL(name, key_t)\t\t\t\t\t\\\n-\t__KB_PUT(name, key_t, __cmp)\t\t\t\t\\\n-\t__KB_DEL(name, key_t)\n-\n-#define KB_DEFAULT_SIZE 512\n-\n-#define kbtree_t(name) kbtree_##name##_t\n-#define kb_init(name, s) kb_init_##name(s)\n-#define kb_destroy(name, b) __kb_destroy(b)\n-#define kb_get(name, b, k) kb_get_##name(b, k)\n-#define kb_put(name, b, k) kb_put_##name(b, k)\n-#define kb_del(name, b, k) kb_del_##name(b, k)\n-#define kb_interval(name, b, k, l, u) kb_interval_##name(b, k, l, u)\n-#define kb_getp(name, b, k) kb_getp_##name(b, k)\n-#define kb_putp(name, b, k) kb_putp_##name(b, k)\n-#define kb_delp(name, b, k) kb_delp_##name(b, k)\n-#define kb_intervalp(name, b, k, l, u) kb_intervalp_##name(b, k, l, u)\n-\n-#define kb_size(b) ((b)->n_keys)\n-\n-#define kb_generic_cmp(a, b) (((b) < (a)) - ((a) < (b)))\n-#define kb_str_cmp(a, b) strcmp(a, b)\n-\n-#endif\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/khash.h --- a/bwa-0.7.9a/khash.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,614 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/*\n- An example:\n-\n-#include "khash.h"\n-KHASH_MAP_INIT_INT(32, char)\n-int main() {\n-\tint ret, is_missing;\n-\tkhiter_t k;\n-\tkhash_t(32) *h = kh_init(32);\n-\tk = kh_put(32, h, 5, &ret);\n-\tkh_value(h, k) = 10;\n-\tk = kh_get(32, h, 10);\n-\tis_missing = (k == kh_end(h));\n-\tk = kh_get(32, h, 5);\n-\tkh_del(32, h, k);\n-\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n-\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n-\tkh_destroy(32, h);\n-\treturn 0;\n-}\n-*/\n-\n-/*\n- 2011-12-29 (0.2.7):\n-\n- * Minor code clean up; no actual effect.\n-\n- 2011-09-16 (0.2.6):\n-\n-\t* The capacity is a power of 2. This seems to dramatically improve the\n-\t speed for simple keys. Thank Zilong Tan for the suggestion. Reference:\n-\n-\t - http://code.google.com/p/ulib/\n-\t - http://nothings.org/computer/judy/\n-\n-\t* Allow to optionally use linear probing which usually has better\n-\t performance for random input. Double hashing is still the default as it\n-\t is more robust to certain non-random input.\n-\n-\t* Added Wang\'s integer hash function (not used by default). This hash\n-\t function is more robust to certain non-random input.\n-\n- 2011-02-14 (0.2.5):\n-\n- * Allow to declare global functions.\n-\n- 2009-09-26 (0.2.4):\n-\n- * Improve portability\n-\n- 2008-09-19 (0.2.3):\n-\n-\t* Corrected the example\n-\t* Improved interfaces\n-\n- 2008-09-11 (0.2.2):\n-\n-\t* Improved speed a little in kh_put()\n-\n- 2008-09-10 (0.2.1):\n-\n-\t* Added kh_clear()\n-\t* Fixed a compiling error\n-\n- 2008-09-02 (0.2.0):\n-\n-\t* Changed to token concatenation which increases flexibility.\n-\n- 2008-08-31 (0.1.2):\n-\n-\t* Fixed a bug in kh_get(), which has not been tested previously.\n-\n- 2008-08-31 (0.1.1):\n-\n-\t* Added destructor\n-*/\n-\n-\n-#ifndef __AC_KHASH_H\n-#define __AC_KHASH_H\n-\n-/*!\n- @header\n-\n- Generic hash table library.\n- */\n-\n-#define AC_VERSION_KHASH_H "0.2.6"\n-\n-#include <stdlib.h>\n-#include <string.h>\n-#include <limits.h>\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-/* compipler specific configuration */\n-\n-#if UINT_MAX == 0xffffffffu\n-typedef unsigned int khint32_t;\n-#elif ULONG_MAX == 0xffffffffu\n-typedef unsigned long khint32_t;\n-#endif\n-\n-#if ULONG_MAX == ULLONG_MAX\n-typedef unsigned long khint64_t;\n-#else\n-typedef unsigned long long khint64_t;\n-#endif\n-\n-#ifdef _MSC_VER\n-#define kh_inline __inline\n-#else\n-#define kh_inline inline\n-#endif\n-\n-typedef khint32_t khint_t;\n-typedef khint_t khiter_t;\n-\n-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)\n-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)\n-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)\n-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))\n-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))\n-#define __ac_set_isboth_false(flag, i) (flag'..b'r to the bucket [khint_t]\n- @return Value [type of values]\n- @discussion For hash sets, calling this results in segfault.\n- */\n-#define kh_val(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Alias of kh_val()\n- */\n-#define kh_value(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Get the start iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The start iterator [khint_t]\n- */\n-#define kh_begin(h) (khint_t)(0)\n-\n-/*! @function\n- @abstract Get the end iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The end iterator [khint_t]\n- */\n-#define kh_end(h) ((h)->n_buckets)\n-\n-/*! @function\n- @abstract Get the number of elements in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of elements in the hash table [khint_t]\n- */\n-#define kh_size(h) ((h)->size)\n-\n-/*! @function\n- @abstract Get the number of buckets in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of buckets in the hash table [khint_t]\n- */\n-#define kh_n_buckets(h) ((h)->n_buckets)\n-\n-/*! @function\n- @abstract Iterate over the entries in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param kvar Variable to which key will be assigned\n- @param vvar Variable to which value will be assigned\n- @param code Block of code to execute\n- */\n-#define kh_foreach(h, kvar, vvar, code) { khint_t __i;\t\t\\\n-\tfor (__i = kh_begin(h); __i != kh_end(h); ++__i) {\t\t\\\n-\t\tif (!kh_exist(h,__i)) continue;\t\t\t\t\t\t\\\n-\t\t(kvar) = kh_key(h,__i);\t\t\t\t\t\t\t\t\\\n-\t\t(vvar) = kh_val(h,__i);\t\t\t\t\t\t\t\t\\\n-\t\tcode;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} }\n-\n-/*! @function\n- @abstract Iterate over the values in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param vvar Variable to which value will be assigned\n- @param code Block of code to execute\n- */\n-#define kh_foreach_value(h, vvar, code) { khint_t __i;\t\t\\\n-\tfor (__i = kh_begin(h); __i != kh_end(h); ++__i) {\t\t\\\n-\t\tif (!kh_exist(h,__i)) continue;\t\t\t\t\t\t\\\n-\t\t(vvar) = kh_val(h,__i);\t\t\t\t\t\t\t\t\\\n-\t\tcode;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} }\n-\n-/* More conenient interfaces */\n-\n-/*! @function\n- @abstract Instantiate a hash set containing integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-typedef const char *kh_cstr_t;\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n-\n-#endif /* __AC_KHASH_H */\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kopen.c --- a/bwa-0.7.9a/kopen.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,374 +0,0 @@\n-#include <stdio.h>\n-#include <fcntl.h>\n-#include <errno.h>\n-#include <ctype.h>\n-#include <unistd.h>\n-#include <string.h>\n-#include <stdlib.h>\n-#include <signal.h>\n-#include <sys/wait.h>\n-#include <sys/types.h>\n-#ifndef _WIN32\n-#include <netdb.h>\n-#include <arpa/inet.h>\n-#include <sys/socket.h>\n-#endif\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#ifdef _WIN32\n-#define _KO_NO_NET\n-#endif\n-\n-#ifndef _KO_NO_NET\n-static int socket_wait(int fd, int is_read)\n-{\n-\tfd_set fds, *fdr = 0, *fdw = 0;\n-\tstruct timeval tv;\n-\tint ret;\n-\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n-\tFD_ZERO(&fds);\n-\tFD_SET(fd, &fds);\n-\tif (is_read) fdr = &fds;\n-\telse fdw = &fds;\n-\tret = select(fd+1, fdr, fdw, 0, &tv);\n-\tif (ret == -1) perror("select");\n-\treturn ret;\n-}\n-\n-static int socket_connect(const char *host, const char *port)\n-{\n-#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n-\n-\tint on = 1, fd;\n-\tstruct linger lng = { 0, 0 };\n-\tstruct addrinfo hints, *res = 0;\n-\tmemset(&hints, 0, sizeof(struct addrinfo));\n-\thints.ai_family = AF_UNSPEC;\n-\thints.ai_socktype = SOCK_STREAM;\n-\tif (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");\n-\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n-\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");\n-\tif (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");\n-\tif (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");\n-\tfreeaddrinfo(res);\n-\treturn fd;\n-#undef __err_connect\n-}\n-\n-static int write_bytes(int fd, const char *buf, size_t len)\n-{\n-\tssize_t bytes;\n-\tdo {\n-\t\tbytes = write(fd, buf, len);\n-\t\tif (bytes >= 0) {\n-\t\t\tlen -= bytes;\n-\t\t} else if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) {\n-\t\t\treturn -1;\n-\t\t}\n-\t} while (len > 0);\n-\n-\treturn 0;\n-}\n-\n-static int http_open(const char *fn)\n-{\n-\tchar *p, *proxy, *q, *http_host, *host, *port, *path, *buf;\n-\tint fd, ret, l;\n-\tssize_t bytes = 0, bufsz = 0x10000;\n-\n-\t/* parse URL; adapted from khttp_parse_url() in knetfile.c */\n-\tif (strstr(fn, "http://") != fn) return 0;\n-\t// set ->http_host\n-\tfor (p = (char*)fn + 7; *p && *p != \'/\'; ++p);\n-\tl = p - fn - 7;\n-\thttp_host = calloc(l + 1, 1);\n-\tstrncpy(http_host, fn + 7, l);\n-\thttp_host[l] = 0;\n-\tfor (q = http_host; *q && *q != \':\'; ++q);\n-\tif (*q == \':\') *q++ = 0;\n-\t// get http_proxy\n-\tproxy = getenv("http_proxy");\n-\t// set host, port and path\n-\tif (proxy == 0) {\n-\t\thost = strdup(http_host); // when there is no proxy, server name is identical to http_host name.\n-\t\tport = strdup(*q? q : "80");\n-\t\tpath = strdup(*p? p : "/");\n-\t} else {\n-\t\thost = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);\n-\t\tfor (q = host; *q && *q != \':\'; ++q);\n-\t\tif (*q == \':\') *q++ = 0; \n-\t\tport = strdup(*q? q : "80");\n-\t\tpath = strdup(fn);\n-\t}\n-\n-\t/* connect; adapted from khttp_connect() in knetfile.c */\n-\tl = 0;\n-\tfd = socket_connect(host, port);\n-\tbuf = calloc(bufsz, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.\n-\tl += snprintf(buf + l, bufsz, "GET %s HTTP/1.0\\r\\nHost: %s\\r\\n\\r\\n",\n-\t\t\t\t path, http_host);\n-\tif (write_bytes(fd, buf, l) != 0) {\n-\t\tclose(fd);\n-\t\tfd = -1;\n-\t\tgoto out;\n-\t}\n-\tl = 0;\n- retry:\n-\twhile (l < bufsz && (bytes = read(fd, buf + l, 1)) > 0) { // read HTTP header; FIXME: bad efficiency\n-\t\tif (buf[l] == \'\\n\' && l >= 3)\n-\t\t\tif (strncmp(buf + l - 3, "\\r\\n\\r\\n", 4) == 0) break;\n-\t\t++l;\n-\t}\n-\tif (bytes < 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) goto retry;\n-\n-\tbuf[l] = 0;\n-\tif (bytes < 0 || l < 14) { // prematured header\n-\t\tclose(fd);\n-\t\tfd = -1;\n-\t\tgoto out;\n-\t}\n-\tret = strtol(buf + 8, &p, 0); // HTTP return code\n-\tif (ret != 200) {\n-\t\tclose(fd);\n-\t\tfd = -1;\n-\t}\n- out:\n-\tfree(buf); free(http_host); free(host); free(port); free(path);\n-\treturn fd;\n-}\n-\n-typedef struct {\n-\tint max_r'..b'[5]);\n-\tmemcpy(pasv_ip, v, 4 * sizeof(int));\n-\tpasv_port = (v[4]<<8&0xff00) + v[5];\n-\tkftp_send_cmd(&aux, retr, 0);\n-\tsprintf(host2, "%d.%d.%d.%d", pasv_ip[0], pasv_ip[1], pasv_ip[2], pasv_ip[3]);\n-\tsprintf(port2, "%d", pasv_port);\n-\tfd = socket_connect(host2, port2);\n-\tif (fd == -1) goto ftp_open_end;\n-\tret = kftp_get_response(&aux);\n-\tif (ret != 150) {\n-\t\tclose(fd);\n-\t\tfd = -1;\n-\t}\n-\tclose(aux.ctrl_fd);\n-\n-ftp_open_end:\n-\tfree(host); free(port); free(retr); free(aux.response);\n-\treturn fd;\n-}\n-#endif /* !defined(_KO_NO_NET) */\n-\n-static char **cmd2argv(const char *cmd)\n-{\n-\tint i, beg, end, argc;\n-\tchar **argv, *str;\n-\tend = strlen(cmd);\n-\tfor (i = end - 1; i >= 0; --i)\n-\t\tif (!isspace(cmd[i])) break;\n-\tend = i + 1;\n-\tfor (beg = 0; beg < end; ++beg)\n-\t\tif (!isspace(cmd[beg])) break;\n-\tif (beg == end) return 0;\n-\tfor (i = beg + 1, argc = 0; i < end; ++i)\n-\t\tif (isspace(cmd[i]) && !isspace(cmd[i-1]))\n-\t\t\t++argc;\n-\targv = (char**)calloc(argc + 2, sizeof(void*));\n-\targv[0] = str = (char*)calloc(end - beg + 1, 1);\n-\tstrncpy(argv[0], cmd + beg, end - beg);\n-\tfor (i = argc = 1; i < end - beg; ++i)\n-\t\tif (isspace(str[i])) str[i] = 0;\n-\t\telse if (str[i] && str[i-1] == 0) argv[argc++] = &str[i];\n-\treturn argv;\n-}\n-\n-#define KO_STDIN 1\n-#define KO_FILE 2\n-#define KO_PIPE 3\n-#define KO_HTTP 4\n-#define KO_FTP 5\n-\n-typedef struct {\n-\tint type, fd;\n-\tpid_t pid;\n-} koaux_t;\n-\n-void *kopen(const char *fn, int *_fd)\n-{\n-\tkoaux_t *aux = 0;\n-\t*_fd = -1;\n-\tif (strstr(fn, "http://") == fn) {\n-\t\taux = calloc(1, sizeof(koaux_t));\n-\t\taux->type = KO_HTTP;\n-\t\taux->fd = http_open(fn);\n-\t} else if (strstr(fn, "ftp://") == fn) {\n-\t\taux = calloc(1, sizeof(koaux_t));\n-\t\taux->type = KO_FTP;\n-\t\taux->fd = ftp_open(fn);\n-\t} else if (strcmp(fn, "-") == 0) {\n-\t\taux = calloc(1, sizeof(koaux_t));\n-\t\taux->type = KO_STDIN;\n-\t\taux->fd = STDIN_FILENO;\n-\t} else {\n-\t\tconst char *p, *q;\n-\t\tfor (p = fn; *p; ++p)\n-\t\t\tif (!isspace(*p)) break;\n-\t\tif (*p == \'<\') { // pipe open\n-\t\t\tint need_shell, pfd[2];\n-\t\t\tpid_t pid;\n-\t\t\t// a simple check to see if we need to invoke a shell; not always working\n-\t\t\tfor (q = p + 1; *q; ++q)\n-\t\t\t\tif (ispunct(*q) && *q != \'.\' && *q != \'_\' && *q != \'-\' && *q != \':\')\n-\t\t\t\t\tbreak;\n-\t\t\tneed_shell = (*q != 0);\n-\t\t\tif (pipe(pfd) != 0) return 0;\n-\t\t\tpid = vfork();\n-\t\t\tif (pid == -1) { /* vfork() error */\n-\t\t\t\tclose(pfd[0]); close(pfd[1]);\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\tif (pid == 0) { /* the child process */\n-\t\t\t\tchar **argv; /* FIXME: I do not know if this will lead to a memory leak */\n-\t\t\t\tclose(pfd[0]);\n-\t\t\t\tdup2(pfd[1], STDOUT_FILENO);\n-\t\t\t\tclose(pfd[1]);\n-\t\t\t\tif (!need_shell) {\n-\t\t\t\t\targv = cmd2argv(p + 1);\n-\t\t\t\t\texecvp(argv[0], argv);\n-\t\t\t\t\tfree(argv[0]); free(argv);\n-\t\t\t\t} else execl("/bin/sh", "sh", "-c", p + 1, NULL);\n-\t\t\t\texit(1);\n-\t\t\t} else { /* parent process */\n-\t\t\t\tclose(pfd[1]);\n-\t\t\t\taux = calloc(1, sizeof(koaux_t));\n-\t\t\t\taux->type = KO_PIPE;\n-\t\t\t\taux->fd = pfd[0];\n-\t\t\t\taux->pid = pid;\n-\t\t\t}\n-\t\t} else {\n-#ifdef _WIN32\n-\t\t\t*_fd = open(fn, O_RDONLY | O_BINARY);\n-#else\n-\t\t\t*_fd = open(fn, O_RDONLY);\n-#endif\n-\t\t\tif (*_fd >= 0) {\n-\t\t\t\taux = calloc(1, sizeof(koaux_t));\n-\t\t\t\taux->type = KO_FILE;\n-\t\t\t\taux->fd = *_fd;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tif (aux) *_fd = aux->fd;\n-\treturn aux;\n-}\n-\n-int kclose(void *a)\n-{\n-\tkoaux_t *aux = (koaux_t*)a;\n-\tif (aux->type == KO_PIPE) {\n-\t\tint status;\n-\t\tpid_t pid;\n-\t\tpid = waitpid(aux->pid, &status, WNOHANG);\n-\t\tif (pid != aux->pid) kill(aux->pid, 15);\n-\t}\n-\tfree(aux);\n-\treturn 0;\n-}\n-\n-#ifdef _KO_MAIN\n-#define BUF_SIZE 0x10000\n-int main(int argc, char *argv[])\n-{\n-\tvoid *x;\n-\tint l, fd;\n-\tunsigned char buf[BUF_SIZE];\n-\tFILE *fp;\n-\tif (argc == 1) {\n-\t\tfprintf(stderr, "Usage: kopen <file>\\n");\n-\t\treturn 1;\n-\t}\n-\tx = kopen(argv[1], &fd);\n-\tfp = fdopen(fd, "r");\n-\tif (fp == 0) {\n-\t\tfprintf(stderr, "ERROR: fail to open the input\\n");\n-\t\treturn 1;\n-\t}\n-\tdo {\n-\t\tif ((l = fread(buf, 1, BUF_SIZE, fp)) != 0)\n-\t\t\tfwrite(buf, 1, l, stdout);\n-\t} while (l == BUF_SIZE);\n-\tfclose(fp);\n-\tkclose(x);\n-\treturn 0;\n-}\n-#endif\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kseq.h --- a/bwa-0.7.9a/kseq.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,239 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Last Modified: 05MAR2012 */\n-\n-#ifndef AC_KSEQ_H\n-#define AC_KSEQ_H\n-\n-#include <ctype.h>\n-#include <string.h>\n-#include <stdlib.h>\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#define KS_SEP_SPACE 0 // isspace(): \\t, \\n, \\v, \\f, \\r\n-#define KS_SEP_TAB 1 // isspace() && !\' \'\n-#define KS_SEP_LINE 2 // line separator: "\\n" (Unix) or "\\r\\n" (Windows)\n-#define KS_SEP_MAX 2\n-\n-#define __KS_TYPE(type_t)\t\t\t\t\t\t\\\n-\ttypedef struct __kstream_t {\t\t\t\t\\\n-\t\tunsigned char *buf;\t\t\t\t\t\t\\\n-\t\tint begin, end, is_eof;\t\t\t\t\t\\\n-\t\ttype_t f;\t\t\t\t\t\t\t\t\\\n-\t} kstream_t;\n-\n-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)\n-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)\n-\n-#define __KS_BASIC(type_t, __bufsize)\t\t\t\t\t\t\t\t\\\n-\tstatic inline kstream_t *ks_init(type_t f)\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \\\n-\t\tks->f = f;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks->buf = (unsigned char*)malloc(__bufsize);\t\t\t\t\t\t\\\n-\t\treturn ks;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline void ks_destroy(kstream_t *ks)\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(ks->buf);\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define __KS_GETC(__read, __bufsize)\t\t\t\t\t\t\\\n-\tstatic inline int ks_getc(kstream_t *ks)\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks->is_eof && ks->begin >= ks->end) return -1;\t\\\n-\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\\\n-\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\\\n-\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\\\n-\t\t\tif (ks->end == 0) return -1;\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn (int)ks->buf[ks->begin++];\t\t\t\t\t\\\n-\t}\n-\n-#ifndef KSTRING_T\n-#define KSTRING_T kstring_t\n-typedef struct __kstring_t {\n-\tsize_t l, m;\n-\tchar *s;\n-} kstring_t;\n-#endif\n-\n-#ifndef kroundup32\n-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n-#endif\n-\n-#define __KS_GETUNTIL(__read, __bufsize)\t\t\t\t\t\t\t\t\\\n-\tstatic int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (dret) *dret = 0;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tstr->l = append? str->l : 0;\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks->begin >= ks->end && ks->is_eof) return -1;\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tint i;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (!ks->is_eof) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\t\\\n-\t\t\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\t\\\n-\t\t\t\t\tif (ks->end == 0) break;\t\t\t\t\t\t\t\\\n-\t\t\t\t} else break;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (delimiter == KS_SEP_LINE) { \\\n-\t\t\t\tfor (i = ks->begin; i < ks->end; ++i) \\\n-\t\t\t\t\tif (ks->buf[i] == \'\\n\') break; \\\n-\t\t\t} else if (delimiter > KS_SEP_MAX) {\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = ks->begin; i '..b"str->l; \\\n-\t\tstr->s[str->l] = '\\0';\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn str->l;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} \\\n-\tstatic inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \\\n-\t{ return ks_getuntil2(ks, delimiter, str, dret, 0); }\n-\n-#define KSTREAM_INIT(type_t, __read, __bufsize) \\\n-\t__KS_TYPE(type_t)\t\t\t\t\t\t\t\\\n-\t__KS_BASIC(type_t, __bufsize)\t\t\t\t\\\n-\t__KS_GETC(__read, __bufsize)\t\t\t\t\\\n-\t__KS_GETUNTIL(__read, __bufsize)\n-\n-#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)\n-\n-#define __KSEQ_BASIC(SCOPE, type_t)\t\t\t\t\t\t\t\t\t\t\\\n-\tSCOPE kseq_t *kseq_init(type_t fd)\t\t\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));\t\t\t\t\\\n-\t\ts->f = ks_init(fd);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn s;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tSCOPE void kseq_destroy(kseq_t *ks)\t\t\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (!ks) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(ks->name.s); free(ks->comment.s); free(ks->seq.s);\tfree(ks->qual.s); \\\n-\t\tks_destroy(ks->f);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-/* Return value:\n- >=0 length of the sequence (normal)\n- -1 end-of-file\n- -2 truncated quality string\n- */\n-#define __KSEQ_READ(SCOPE) \\\n-\tSCOPE int kseq_read(kseq_t *seq) \\\n-\t{ \\\n-\t\tint c; \\\n-\t\tkstream_t *ks = seq->f; \\\n-\t\tif (seq->last_char == 0) { /* then jump to the next header line */ \\\n-\t\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \\\n-\t\t\tif (c == -1) return -1; /* end of file */ \\\n-\t\t\tseq->last_char = c; \\\n-\t\t} /* else: the first header char has been read in the previous call */ \\\n-\t\tseq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \\\n-\t\tif (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \\\n-\t\tif (c != '\\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \\\n-\t\tif (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \\\n-\t\t\tseq->seq.m = 256; \\\n-\t\t\tseq->seq.s = (char*)malloc(seq->seq.m); \\\n-\t\t} \\\n-\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \\\n-\t\t\tif (c == '\\n') continue; /* skip empty lines */ \\\n-\t\t\tseq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \\\n-\t\t\tks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \\\n-\t\t} \\\n-\t\tif (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */\t\\\n-\t\tif (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \\\n-\t\t\tseq->seq.m = seq->seq.l + 2; \\\n-\t\t\tkroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \\\n-\t\t\tseq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \\\n-\t\t} \\\n-\t\tseq->seq.s[seq->seq.l] = 0;\t/* null terminated string */ \\\n-\t\tif (c != '+') return seq->seq.l; /* FASTA */ \\\n-\t\tif (seq->qual.m < seq->seq.m) {\t/* allocate memory for qual in case insufficient */ \\\n-\t\t\tseq->qual.m = seq->seq.m; \\\n-\t\t\tseq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \\\n-\t\t} \\\n-\t\twhile ((c = ks_getc(ks)) != -1 && c != '\\n'); /* skip the rest of '+' line */ \\\n-\t\tif (c == -1) return -2; /* error: no quality string */ \\\n-\t\twhile (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \\\n-\t\tseq->last_char = 0;\t/* we have not come to the next header line */ \\\n-\t\tif (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \\\n-\t\treturn seq->seq.l; \\\n-\t}\n-\n-#define __KSEQ_TYPE(type_t)\t\t\t\t\t\t\\\n-\ttypedef struct {\t\t\t\t\t\t\t\\\n-\t\tkstring_t name, comment, seq, qual;\t\t\\\n-\t\tint last_char;\t\t\t\t\t\t\t\\\n-\t\tkstream_t *f;\t\t\t\t\t\t\t\\\n-\t} kseq_t;\n-\n-#define KSEQ_INIT2(SCOPE, type_t, __read)\t\t\\\n-\tKSTREAM_INIT(type_t, __read, 16384)\t\t\t\\\n-\t__KSEQ_TYPE(type_t)\t\t\t\t\t\t\t\\\n-\t__KSEQ_BASIC(SCOPE, type_t)\t\t\t\t\t\\\n-\t__KSEQ_READ(SCOPE)\n-\n-#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)\n-\n-#define KSEQ_DECLARE(type_t) \\\n-\t__KS_TYPE(type_t) \\\n-\t__KSEQ_TYPE(type_t) \\\n-\textern kseq_t *kseq_init(type_t fd); \\\n-\tvoid kseq_destroy(kseq_t *ks); \\\n-\tint kseq_read(kseq_t *seq);\n-\n-#endif\n" |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/ksort.h --- a/bwa-0.7.9a/ksort.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,273 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/*\n- 2008-11-16 (0.1.4):\n-\n- * Fixed a bug in introsort() that happens in rare cases.\n-\n- 2008-11-05 (0.1.3):\n-\n- * Fixed a bug in introsort() for complex comparisons.\n-\n-\t* Fixed a bug in mergesort(). The previous version is not stable.\n-\n- 2008-09-15 (0.1.2):\n-\n-\t* Accelerated introsort. On my Mac (not on another Linux machine),\n-\t my implementation is as fast as std::sort on random input.\n-\n-\t* Added combsort and in introsort, switch to combsort if the\n-\t recursion is too deep.\n-\n- 2008-09-13 (0.1.1):\n-\n-\t* Added k-small algorithm\n-\n- 2008-09-05 (0.1.0):\n-\n-\t* Initial version\n-\n-*/\n-\n-#ifndef AC_KSORT_H\n-#define AC_KSORT_H\n-\n-#include <stdlib.h>\n-#include <string.h>\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-typedef struct {\n-\tvoid *left, *right;\n-\tint depth;\n-} ks_isort_stack_t;\n-\n-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n-\n-#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n-\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\\\n-\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n-\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n-\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n-\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n-\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n-\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n-\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n-\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n-\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n-\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tvoid ks_heapadjust_##name(size_t i, size_t n, type_t l['..b'\t\t\t\t\t\t\t\t\\\n-\tvoid ks_introsort_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint d;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks_isort_stack_t *top, *stack;\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t rp, swap_tmp;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *s, *t, *i, *j, *k;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (n < 1) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\telse if (n == 2) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n-\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n-\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n-\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n-\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n-\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n-\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n-\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n-\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n-\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n-\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n-\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n-\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n-\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n-\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n-\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n-\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n-\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n-\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n-\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n-\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n-\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n-\n-#define ks_lt_generic(a, b) ((a) < (b))\n-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n-\n-typedef const char *ksstr_t;\n-\n-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n-\n-#endif\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kstring.c --- a/bwa-0.7.9a/kstring.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,39 +0,0 @@ -#include <stdarg.h> -#include <stdio.h> -#include "kstring.h" - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -int ksprintf(kstring_t *s, const char *fmt, ...) -{ - va_list ap; - int l; - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); - va_end(ap); - if (l + 1 > s->m - s->l) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); - } - va_end(ap); - s->l += l; - return l; -} - -#ifdef KSTRING_MAIN -#include <stdio.h> -int main() -{ - kstring_t *s; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); - ksprintf(s, "abcdefg: %d", 100); - printf("%s\n", s->s); - free(s); - return 0; -} -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kstring.h --- a/bwa-0.7.9a/kstring.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,115 +0,0 @@ -#ifndef KSTRING_H -#define KSTRING_H - -#include <stdlib.h> -#include <string.h> - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef KSTRING_T -#define KSTRING_T kstring_t -typedef struct __kstring_t { - size_t l, m; - char *s; -} kstring_t; -#endif - -static inline void ks_resize(kstring_t *s, size_t size) -{ - if (s->m < size) { - s->m = size; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } -} - -static inline int kputsn(const char *p, int l, kstring_t *s) -{ - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - memcpy(s->s + s->l, p, l); - s->l += l; - s->s[s->l] = 0; - return l; -} - -static inline int kputs(const char *p, kstring_t *s) -{ - return kputsn(p, strlen(p), s); -} - -static inline int kputc(int c, kstring_t *s) -{ - if (s->l + 1 >= s->m) { - s->m = s->l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - s->s[s->l++] = c; - s->s[s->l] = 0; - return c; -} - -static inline int kputw(int c, kstring_t *s) -{ - char buf[16]; - int l, x; - if (c == 0) return kputc('0', s); - for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; - if (c < 0) buf[l++] = '-'; - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; - s->s[s->l] = 0; - return 0; -} - -static inline int kputuw(unsigned c, kstring_t *s) -{ - char buf[16]; - int l, i; - unsigned x; - if (c == 0) return kputc('0', s); - for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; - s->s[s->l] = 0; - return 0; -} - -static inline int kputl(long c, kstring_t *s) -{ - char buf[32]; - long l, x; - if (c == 0) return kputc('0', s); - for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; - if (c < 0) buf[l++] = '-'; - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; - s->s[s->l] = 0; - return 0; -} - -int ksprintf(kstring_t *s, const char *fmt, ...); - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/ksw.c --- a/bwa-0.7.9a/ksw.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,713 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-#include <stdlib.h>\n-#include <stdint.h>\n-#include <assert.h>\n-#include <emmintrin.h>\n-#include "ksw.h"\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#ifdef __GNUC__\n-#define LIKELY(x) __builtin_expect((x),1)\n-#define UNLIKELY(x) __builtin_expect((x),0)\n-#else\n-#define LIKELY(x) (x)\n-#define UNLIKELY(x) (x)\n-#endif\n-\n-const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 };\n-\n-struct _kswq_t {\n-\tint qlen, slen;\n-\tuint8_t shift, mdiff, max, size;\n-\t__m128i *qp, *H0, *H1, *E, *Hmax;\n-};\n-\n-/**\n- * Initialize the query data structure\n- *\n- * @param size Number of bytes used to store a score; valid valures are 1 or 2\n- * @param qlen Length of the query sequence\n- * @param query Query sequence\n- * @param m Size of the alphabet\n- * @param mat Scoring matrix in a one-dimension array\n- *\n- * @return Query data structure\n- */\n-kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat)\n-{\n-\tkswq_t *q;\n-\tint slen, a, tmp, p;\n-\n-\tsize = size > 1? 2 : 1;\n-\tp = 8 * (3 - size); // # values per __m128i\n-\tslen = (qlen + p - 1) / p; // segmented length\n-\tq = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory\n-\tq->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory\n-\tq->H0 = q->qp + slen * m;\n-\tq->H1 = q->H0 + slen;\n-\tq->E = q->H1 + slen;\n-\tq->Hmax = q->E + slen;\n-\tq->slen = slen; q->qlen = qlen; q->size = size;\n-\t// compute shift\n-\ttmp = m * m;\n-\tfor (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score\n-\t\tif (mat[a] < (int8_t)q->shift) q->shift = mat[a];\n-\t\tif (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];\n-\t}\n-\tq->max = q->mdiff;\n-\tq->shift = 256 - q->shift; // NB: q->shift is uint8_t\n-\tq->mdiff += q->shift; // this is the difference between the min and max scores\n-\t// An example: p=8, qlen=19, slen=3 and segmentation:\n-\t// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}\n-\tif (size == 1) {\n-\t\tint8_t *t = (int8_t*)q->qp;\n-\t\tfor (a = 0; a < m; ++a) {\n-\t\t\tint i, k, nlen = slen * p;\n-\t\t\tconst int8_t *ma = mat + a * m;\n-\t\t\tfor (i = 0; i < slen; ++i)\n-\t\t\t\tfor (k = i; k < nlen; k += slen) // p iterations\n-\t\t\t\t\t*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;\n-\t\t}\n-\t} else {\n-\t\tint16_t *t = (int16_t*)q->qp;\n-\t\tfor (a = 0; a < m; ++a) {\n-\t\t\tint i, k, nlen = slen * p;\n-\t\t\tconst int8_t *ma = mat + a * m;\n-\t\t\tfor (i = 0; i < slen; ++i)\n-\t\t\t\tfor (k = i; k < nlen; k += slen) // p iterations\n-\t\t\t\t\t*t++ = (k >= qlen? 0 : ma[query[k]]);\n-\t\t}\n-\t}\n-\treturn q;\n-}\n-\n-kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, int _e_ins, int xtra) // the first gap costs -(_o+_e)\n-{\n-\tint slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;\n-\tuint64_t *b;\n-\t_'..b't uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_)\n-{\n-\treturn ksw_global2(qlen, query, tlen, target, m, mat, gapo, gape, gapo, gape, w, n_cigar_, cigar_);\n-}\n-\n-/*******************************************\n- * Main function (not compiled by default) *\n- *******************************************/\n-\n-#ifdef _KSW_MAIN\n-\n-#include <unistd.h>\n-#include <stdio.h>\n-#include <zlib.h>\n-#include "kseq.h"\n-KSEQ_INIT(gzFile, err_gzread)\n-\n-unsigned char seq_nt4_table[256] = {\n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, \n-\t4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4\n-};\n-\n-int main(int argc, char *argv[])\n-{\n-\tint c, sa = 1, sb = 3, i, j, k, forward_only = 0, max_rseq = 0;\n-\tint8_t mat[25];\n-\tint gapo = 5, gape = 2, minsc = 0, xtra = KSW_XSTART;\n-\tuint8_t *rseq = 0;\n-\tgzFile fpt, fpq;\n-\tkseq_t *kst, *ksq;\n-\n-\t// parse command line\n-\twhile ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) {\n-\t\tswitch (c) {\n-\t\t\tcase \'a\': sa = atoi(optarg); break;\n-\t\t\tcase \'b\': sb = atoi(optarg); break;\n-\t\t\tcase \'q\': gapo = atoi(optarg); break;\n-\t\t\tcase \'r\': gape = atoi(optarg); break;\n-\t\t\tcase \'t\': minsc = atoi(optarg); break;\n-\t\t\tcase \'f\': forward_only = 1; break;\n-\t\t\tcase \'1\': xtra |= KSW_XBYTE; break;\n-\t\t}\n-\t}\n-\tif (optind + 2 > argc) {\n-\t\tfprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] <target.fa> <query.fa>\\n", sa, sb, gapo, gape, minsc);\n-\t\treturn 1;\n-\t}\n-\tif (minsc > 0xffff) minsc = 0xffff;\n-\txtra |= KSW_XSUBO | minsc;\n-\t// initialize scoring matrix\n-\tfor (i = k = 0; i < 4; ++i) {\n-\t\tfor (j = 0; j < 4; ++j)\n-\t\t\tmat[k++] = i == j? sa : -sb;\n-\t\tmat[k++] = 0; // ambiguous base\n-\t}\n-\tfor (j = 0; j < 5; ++j) mat[k++] = 0;\n-\t// open file\n-\tfpt = xzopen(argv[optind], "r"); kst = kseq_init(fpt);\n-\tfpq = xzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);\n-\t// all-pair alignment\n-\twhile (kseq_read(ksq) > 0) {\n-\t\tkswq_t *q[2] = {0, 0};\n-\t\tkswr_t r;\n-\t\tfor (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];\n-\t\tif (!forward_only) { // reverse\n-\t\t\tif ((int)ksq->seq.m > max_rseq) {\n-\t\t\t\tmax_rseq = ksq->seq.m;\n-\t\t\t\trseq = (uint8_t*)realloc(rseq, max_rseq);\n-\t\t\t}\n-\t\t\tfor (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j)\n-\t\t\t\trseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i];\n-\t\t}\n-\t\tgzrewind(fpt); kseq_rewind(kst);\n-\t\twhile (kseq_read(kst) > 0) {\n-\t\t\tfor (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];\n-\t\t\tr = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);\n-\t\t\tif (r.score >= minsc)\n-\t\t\t\terr_printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%d\\t%d\\t%d\\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);\n-\t\t\tif (rseq) {\n-\t\t\t\tr = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);\n-\t\t\t\tif (r.score >= minsc)\n-\t\t\t\t\terr_printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%d\\t%d\\t%d\\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);\n-\t\t\t}\n-\t\t}\n-\t\tfree(q[0]); free(q[1]);\n-\t}\n-\tfree(rseq);\n-\tkseq_destroy(kst); err_gzclose(fpt);\n-\tkseq_destroy(ksq); err_gzclose(fpq);\n-\treturn 0;\n-}\n-#endif\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/ksw.h --- a/bwa-0.7.9a/ksw.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,114 +0,0 @@ -#ifndef __AC_KSW_H -#define __AC_KSW_H - -#include <stdint.h> - -#define KSW_XBYTE 0x10000 -#define KSW_XSTOP 0x20000 -#define KSW_XSUBO 0x40000 -#define KSW_XSTART 0x80000 - -struct _kswq_t; -typedef struct _kswq_t kswq_t; - -typedef struct { - int score; // best score - int te, qe; // target end and query end - int score2, te2; // second best score and ending position on the target - int tb, qb; // target start and query start -} kswr_t; - -#ifdef __cplusplus -extern "C" { -#endif - - /** - * Aligning two sequences - * - * @param qlen length of the query sequence (typically <tlen) - * @param query query sequence with 0 <= query[i] < m - * @param tlen length of the target sequence - * @param target target sequence - * @param m number of residue types - * @param mat m*m scoring matrix in one-dimension array - * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)" - * @param gape gap extension penalty - * @param xtra extra information (see below) - * @param qry query profile (see below) - * - * @return alignment information in a struct; unset values to -1 - * - * When xtra==0, ksw_align() uses a signed two-byte integer to store a - * score and only finds the best score and the end positions. The 2nd best - * score or the start positions are not attempted. The default behavior can - * be tuned by setting KSW_X* flags: - * - * KSW_XBYTE: use an unsigned byte to store a score. If overflow occurs, - * kswr_t::score will be set to 255 - * - * KSW_XSUBO: track the 2nd best score and the ending position on the - * target if the 2nd best is higher than (xtra&0xffff) - * - * KSW_XSTOP: stop if the maximum score is above (xtra&0xffff) - * - * KSW_XSTART: find the start positions - * - * When *qry==NULL, ksw_align() will compute and allocate the query profile - * and when the function returns, *qry will point to the profile, which can - * be deallocated simply by free(). If one query is aligned against multiple - * target sequences, *qry should be set to NULL during the first call and - * freed after the last call. Note that qry can equal 0. In this case, the - * query profile will be deallocated in ksw_align(). - */ - kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry); - kswr_t ksw_align2(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int xtra, kswq_t **qry); - - /** - * Banded global alignment - * - * @param qlen query length - * @param query query sequence with 0 <= query[i] < m - * @param tlen target length - * @param target target sequence with 0 <= target[i] < m - * @param m number of residue types - * @param mat m*m scoring mattrix in one-dimension array - * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)" - * @param gape gap extension penalty - * @param w band width - * @param n_cigar (out) number of CIGAR elements - * @param cigar (out) BAM-encoded CIGAR; caller need to deallocate with free() - * - * @return score of the alignment - */ - int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar, uint32_t **cigar); - int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int *n_cigar, uint32_t **cigar); - - /** - * Extend alignment - * - * The routine aligns $query and $target, assuming their upstream sequences, - * which are not provided, have been aligned with score $h0. In return, - * region [0,*qle) on the query and [0,*tle) on the target sequences are - * aligned together. If *gscore>=0, *gscore keeps the best score such that - * the entire query sequence is aligned; *gtle keeps the position on the - * target where *gscore is achieved. Returning *gscore and *gtle helps the - * caller to decide whether an end-to-end hit or a partial hit is preferred. - * - * The first 9 parameters are identical to those in ksw_global() - * - * @param h0 alignment score of upstream sequences - * @param _qle (out) length of the query in the alignment - * @param _tle (out) length of the target in the alignment - * @param _gtle (out) length of the target if query is fully aligned - * @param _gscore (out) score of the best end-to-end alignment; negative if not found - * - * @return best semi-local alignment score - */ - int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); - int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kthread.c --- a/bwa-0.7.9a/kthread.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,53 +0,0 @@ -#include <pthread.h> -#include <stdlib.h> - -struct kt_for_t; - -typedef struct { - struct kt_for_t *t; - int i; -} ktf_worker_t; - -typedef struct kt_for_t { - int n_threads, n; - ktf_worker_t *w; - void (*func)(void*,int,int); - void *data; -} kt_for_t; - -static inline int steal_work(kt_for_t *t) -{ - int i, k, min = 0x7fffffff, min_i = -1; - for (i = 0; i < t->n_threads; ++i) - if (min > t->w[i].i) min = t->w[i].i, min_i = i; - k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads); - return k >= t->n? -1 : k; -} - -static void *ktf_worker(void *data) -{ - ktf_worker_t *w = (ktf_worker_t*)data; - int i; - for (;;) { - i = __sync_fetch_and_add(&w->i, w->t->n_threads); - if (i >= w->t->n) break; - w->t->func(w->t->data, i, w - w->t->w); - } - while ((i = steal_work(w->t)) >= 0) - w->t->func(w->t->data, i, w - w->t->w); - pthread_exit(0); -} - -void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n) -{ - int i; - kt_for_t t; - pthread_t *tid; - t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; - t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t)); - tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t)); - for (i = 0; i < n_threads; ++i) - t.w[i].t = &t, t.w[i].i = i; - for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); - for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/kvec.h --- a/bwa-0.7.9a/kvec.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,94 +0,0 @@ -/* The MIT License - - Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk> - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* - An example: - -#include "kvec.h" -int main() { - kvec_t(int) array; - kv_init(array); - kv_push(int, array, 10); // append - kv_a(int, array, 20) = 5; // dynamic - kv_A(array, 20) = 4; // static - kv_destroy(array); - return 0; -} -*/ - -/* - 2008-09-22 (0.1.0): - - * The initial version. - -*/ - -#ifndef AC_KVEC_H -#define AC_KVEC_H - -#include <stdlib.h> - -#ifdef USE_MALLOC_WRAPPERS -# include "malloc_wrap.h" -#endif - -#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) - -#define kvec_t(type) struct { size_t n, m; type *a; } -#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) -#define kv_destroy(v) free((v).a) -#define kv_A(v, i) ((v).a[(i)]) -#define kv_pop(v) ((v).a[--(v).n]) -#define kv_size(v) ((v).n) -#define kv_max(v) ((v).m) - -#define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) - -#define kv_copy(type, v1, v0) do { \ - if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ - (v1).n = (v0).n; \ - memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ - } while (0) \ - -#define kv_push(type, v, x) do { \ - if ((v).n == (v).m) { \ - (v).m = (v).m? (v).m<<1 : 2; \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ - } \ - (v).a[(v).n++] = (x); \ - } while (0) - -#define kv_pushp(type, v) ((((v).n == (v).m)? \ - ((v).m = ((v).m? (v).m<<1 : 2), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : 0), &(v).a[(v).n++]) - -#define kv_a(type, v, i) (((v).m <= (size_t)(i)? \ - ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ - (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ - : (v).n <= (size_t)(i)? (v).n = (i) + 1 \ - : 0), (v).a[(i)]) - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/main.c --- a/bwa-0.7.9a/main.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,100 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include "kstring.h" -#include "utils.h" - -#ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.9a-r786" -#endif - -int bwa_fa2pac(int argc, char *argv[]); -int bwa_pac2bwt(int argc, char *argv[]); -int bwa_bwtupdate(int argc, char *argv[]); -int bwa_bwt2sa(int argc, char *argv[]); -int bwa_index(int argc, char *argv[]); -int bwt_bwtgen_main(int argc, char *argv[]); - -int bwa_aln(int argc, char *argv[]); -int bwa_sai2sam_se(int argc, char *argv[]); -int bwa_sai2sam_pe(int argc, char *argv[]); - -int bwa_bwtsw2(int argc, char *argv[]); - -int main_fastmap(int argc, char *argv[]); -int main_mem(int argc, char *argv[]); - -int main_pemerge(int argc, char *argv[]); - -char *bwa_pg; - -static int usage() -{ - fprintf(stderr, "\n"); - fprintf(stderr, "Program: bwa (alignment via Burrows-Wheeler transformation)\n"); - fprintf(stderr, "Version: %s\n", PACKAGE_VERSION); - fprintf(stderr, "Contact: Heng Li <lh3@sanger.ac.uk>\n\n"); - fprintf(stderr, "Usage: bwa <command> [options]\n\n"); - fprintf(stderr, "Command: index index sequences in the FASTA format\n"); - fprintf(stderr, " mem BWA-MEM algorithm\n"); - fprintf(stderr, " fastmap identify super-maximal exact matches\n"); - fprintf(stderr, " pemerge merge overlapping paired ends (EXPERIMENTAL)\n"); - fprintf(stderr, " aln gapped/ungapped alignment\n"); - fprintf(stderr, " samse generate alignment (single ended)\n"); - fprintf(stderr, " sampe generate alignment (paired ended)\n"); - fprintf(stderr, " bwasw BWA-SW for long queries\n"); - fprintf(stderr, "\n"); - fprintf(stderr, " fa2pac convert FASTA to PAC format\n"); - fprintf(stderr, " pac2bwt generate BWT from PAC\n"); - fprintf(stderr, " pac2bwtgen alternative algorithm for generating BWT\n"); - fprintf(stderr, " bwtupdate update .bwt to the new format\n"); - fprintf(stderr, " bwt2sa generate SA from BWT and Occ\n"); - fprintf(stderr, "\n"); - fprintf(stderr, -"Note: To use BWA, you need to first index the genome with `bwa index'.\n" -" There are three alignment algorithms in BWA: `mem', `bwasw', and\n" -" `aln/samse/sampe'. If you are not sure which to use, try `bwa mem'\n" -" first. Please `man ./bwa.1' for the manual.\n\n"); - return 1; -} - -int main(int argc, char *argv[]) -{ - int i, ret; - double t_real; - kstring_t pg = {0,0,0}; - t_real = realtime(); - ksprintf(&pg, "@PG\tID:bwa\tPN:bwa\tVN:%s\tCL:%s", PACKAGE_VERSION, argv[0]); - for (i = 1; i < argc; ++i) ksprintf(&pg, " %s", argv[i]); - bwa_pg = pg.s; - if (argc < 2) return usage(); - if (strcmp(argv[1], "fa2pac") == 0) ret = bwa_fa2pac(argc-1, argv+1); - else if (strcmp(argv[1], "pac2bwt") == 0) ret = bwa_pac2bwt(argc-1, argv+1); - else if (strcmp(argv[1], "pac2bwtgen") == 0) ret = bwt_bwtgen_main(argc-1, argv+1); - else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1); - else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1); - else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1); - else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1); - else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1); - else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1); - else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1); - else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); - else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1); - else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1); - else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1); - else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1); - else { - fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); - return 1; - } - err_fflush(stdout); - err_fclose(stdout); - if (ret == 0) { - fprintf(stderr, "[%s] Version: %s\n", __func__, PACKAGE_VERSION); - fprintf(stderr, "[%s] CMD:", __func__); - for (i = 0; i < argc; ++i) - fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime()); - } - free(bwa_pg); - return ret; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/malloc_wrap.c --- a/bwa-0.7.9a/malloc_wrap.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,57 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <errno.h> -#ifdef USE_MALLOC_WRAPPERS -/* Don't wrap ourselves */ -# undef USE_MALLOC_WRAPPERS -#endif -#include "malloc_wrap.h" - -void *wrap_calloc(size_t nmemb, size_t size, - const char *file, unsigned int line, const char *func) { - void *p = calloc(nmemb, size); - if (NULL == p) { - fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", - func, nmemb * size, file, line, strerror(errno)); - exit(EXIT_FAILURE); - } - return p; -} - -void *wrap_malloc(size_t size, - const char *file, unsigned int line, const char *func) { - void *p = malloc(size); - if (NULL == p) { - fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", - func, size, file, line, strerror(errno)); - exit(EXIT_FAILURE); - } - return p; -} - -void *wrap_realloc(void *ptr, size_t size, - const char *file, unsigned int line, const char *func) { - void *p = realloc(ptr, size); - if (NULL == p) { - fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", - func, size, file, line, strerror(errno)); - exit(EXIT_FAILURE); - } - return p; -} - -char *wrap_strdup(const char *s, - const char *file, unsigned int line, const char *func) { - char *p = strdup(s); - if (NULL == p) { - fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", - func, strlen(s), file, line, strerror(errno)); - exit(EXIT_FAILURE); - } - return p; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/malloc_wrap.h --- a/bwa-0.7.9a/malloc_wrap.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -#ifndef MALLOC_WRAP_H -#define MALLOC_WRAP_H - -#include <stdlib.h> /* Avoid breaking the usual definitions */ -#include <string.h> - -#ifdef __cplusplus -extern "C" { -#endif - - void *wrap_calloc(size_t nmemb, size_t size, - const char *file, unsigned int line, const char *func); - void *wrap_malloc(size_t size, - const char *file, unsigned int line, const char *func); - void *wrap_realloc(void *ptr, size_t size, - const char *file, unsigned int line, const char *func); - char *wrap_strdup(const char *s, - const char *file, unsigned int line, const char *func); - -#ifdef __cplusplus -} -#endif - -#ifdef USE_MALLOC_WRAPPERS -# ifdef calloc -# undef calloc -# endif -# define calloc(n, s) wrap_calloc( (n), (s), __FILE__, __LINE__, __func__) - -# ifdef malloc -# undef malloc -# endif -# define malloc(s) wrap_malloc( (s), __FILE__, __LINE__, __func__) - -# ifdef realloc -# undef realloc -# endif -# define realloc(p, s) wrap_realloc((p), (s), __FILE__, __LINE__, __func__) - -# ifdef strdup -# undef strdup -# endif -# define strdup(s) wrap_strdup( (s), __FILE__, __LINE__, __func__) - -#endif /* USE_MALLOC_WRAPPERS */ - -#endif /* MALLOC_WRAP_H */ |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/pemerge.c --- a/bwa-0.7.9a/pemerge.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,291 +0,0 @@\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <unistd.h>\n-#include <string.h>\n-#include <zlib.h>\n-#include <pthread.h>\n-#include <errno.h>\n-#include "ksw.h"\n-#include "kseq.h"\n-#include "kstring.h"\n-#include "bwa.h"\n-#include "utils.h"\n-KSEQ_DECLARE(gzFile)\n-\n-#ifdef USE_MALLOC_WRAPPERS\n-# include "malloc_wrap.h"\n-#endif\n-\n-#define MAX_SCORE_RATIO 0.9f\n-#define MAX_ERR 8\n-\n-static const char *err_msg[MAX_ERR+1] = {\n-\t"successful merges",\n-\t"low-scoring pairs",\n-\t"pairs where the best SW alignment is not an overlap (long left end)",\n-\t"pairs where the best SW alignment is not an overlap (long right end)",\n-\t"pairs with large 2nd best SW score",\n-\t"pairs with gapped overlap",\n-\t"pairs where the end-to-end alignment is inconsistent with SW",\n-\t"pairs potentially with tandem overlaps",\n-\t"pairs with high sum of errors"\n-};\n-\n-typedef struct {\n-\tint a, b, q, r, w;\n-\tint q_def, q_thres;\n-\tint T;\n-\tint chunk_size;\n-\tint n_threads;\n-\tint flag; // bit 1: print merged; 2: print unmerged\n-\tint8_t mat[25];\n-} pem_opt_t;\n-\n-pem_opt_t *pem_opt_init()\n-{\n-\tpem_opt_t *opt;\n-\topt = calloc(1, sizeof(pem_opt_t));\n-\topt->a = 5; opt->b = 4; opt->q = 2, opt->r = 17; opt->w = 20;\n-\topt->T = opt->a * 10;\n-\topt->q_def = 20;\n-\topt->q_thres = 70;\n-\topt->chunk_size = 10000000;\n-\topt->n_threads = 1;\n-\topt->flag = 3;\n-\tbwa_fill_scmat(opt->a, opt->b, opt->mat);\n-\treturn opt;\n-}\n-\n-int bwa_pemerge(const pem_opt_t *opt, bseq1_t x[2])\n-{\n-\tuint8_t *s[2], *q[2], *seq, *qual;\n-\tint i, xtra, l, l_seq, sum_q, ret = 0;\n-\tkswr_t r;\n-\n-\ts[0] = malloc(x[0].l_seq); q[0] = malloc(x[0].l_seq);\n-\ts[1] = malloc(x[1].l_seq); q[1] = malloc(x[1].l_seq);\n-\tfor (i = 0; i < x[0].l_seq; ++i) {\n-\t\tint c = x[0].seq[i];\n-\t\ts[0][i] = c < 0 || c > 127? 4 : c <= 4? c : nst_nt4_table[c];\n-\t\tq[0][i] = x[0].qual? x[0].qual[i] - 33 : opt->q_def;\n-\t}\n-\tfor (i = 0; i < x[1].l_seq; ++i) {\n-\t\tint c = x[1].seq[x[1].l_seq - 1 - i];\n-\t\tc = c < 0 || c > 127? 4 : c < 4? c : nst_nt4_table[c];\n-\t\ts[1][i] = c < 4? 3 - c : 4;\n-\t\tq[1][i] = x[1].qual? x[1].qual[x[1].l_seq - 1 - i] - 33 : opt->q_def;\n-\t}\n-\n-\txtra = KSW_XSTART | KSW_XSUBO;\n-\tr = ksw_align(x[1].l_seq, s[1], x[0].l_seq, s[0], 5, opt->mat, opt->q, opt->r, xtra, 0);\n-\t++r.qe; ++r.te; // change to the half-close-half-open coordinates\n-\n-\tif (r.score < opt->T) { ret = -1; goto pem_ret; } // poor alignment\n-\tif (r.tb < r.qb) { ret = -2; goto pem_ret; } // no enough space for the left end\n-\tif (x[0].l_seq - r.te > x[1].l_seq - r.qe) { ret = -3; goto pem_ret; } // no enough space for the right end\n-\tif ((double)r.score2 / r.score >= MAX_SCORE_RATIO) { ret = -4; goto pem_ret; } // the second best score is too large\n-\tif (r.qe - r.qb != r.te - r.tb) { ret = -5; goto pem_ret; } // we do not allow gaps\n-\n-\t{ // test tandem match; O(n^2)\n-\t\tint max_m, max_m2, min_l, max_l, max_l2;\n-\t\tmax_m = max_m2 = 0; max_l = max_l2 = 0;\n-\t\tmin_l = x[0].l_seq < x[1].l_seq? x[0].l_seq : x[1].l_seq;\n-\t\tfor (l = 1; l < min_l; ++l) {\n-\t\t\tint m = 0, o = x[0].l_seq - l;\n-\t\t\tuint8_t *s0o = &s[0][o], *s1 = s[1];\n-\t\t\tfor (i = 0; i < l; ++i) // TODO: in principle, this can be done with SSE2. It is the bottleneck!\n-\t\t\t\tm += opt->mat[(s1[i]<<2) + s1[i] + s0o[i]]; // equivalent to s[1][i]*5 + s[0][o+i]\n-\t\t\tif (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l;\n-\t\t\telse if (m > max_m2) max_m2 = m, max_l2 = l;\n-\t\t}\n-\t\tif (max_m < opt->T || max_l != x[0].l_seq - (r.tb - r.qb)) { ret = -6; goto pem_ret; }\n-\t\tif (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO) {\n-\t\t\tret = -7; goto pem_ret;\n-\t\t}\n-\t\tif (max_l2 > max_l && (double)max_m2 / max_m >= MAX_SCORE_RATIO) { ret = -7; goto pem_ret; }\n-\t}\n-\n-\tl = x[0].l_seq - (r.tb - r.qb); // length to merge\n-\tl_seq = x[0].l_seq + x[1].l_seq - l;\n-\tseq = malloc(l_seq + 1);\n-\tqual = malloc(l_seq + 1);\n-\tmemcpy(seq, s[0], x[0].l_seq); memcpy(seq + x[0].l_seq, &s[1][l], x[1].l_seq - l);\n-\tmemcpy(qual, q[0], x[0].l_seq); memcpy(qual + x[0].l_seq, &q'..b'rr_putchar(s->qual? \'@\' : \'>\');\n-\terr_fputs(s->name, stdout);\n-\tif (rn == 1 || rn == 2) {\n-\t\terr_putchar(\'/\'); err_putchar(\'0\' + rn); err_putchar(\'\\n\');\n-\t} else err_puts(" merged");\n-\terr_puts(s->seq);\n-\tif (s->qual) {\n-\t\terr_puts("+"); err_puts(s->qual);\n-\t}\n-}\n-\n-typedef struct {\n-\tint n, start;\n-\tbseq1_t *seqs;\n-\tint64_t cnt[MAX_ERR+1];\n-\tconst pem_opt_t *opt;\n-} worker_t;\n-\n-void *worker(void *data)\n-{\n-\tworker_t *w = (worker_t*)data;\n-\tint i;\n-\tfor (i = w->start; i < w->n>>1; i += w->opt->n_threads)\n-\t\t++w->cnt[-bwa_pemerge(w->opt, &w->seqs[i<<1])];\n-\treturn 0;\n-}\n-\n-static void process_seqs(const pem_opt_t *opt, int n_, bseq1_t *seqs, int64_t cnt[MAX_ERR+1])\n-{\n-\tint i, j, n = n_>>1<<1;\n-\tworker_t *w;\n-\n-\tw = calloc(opt->n_threads, sizeof(worker_t));\n-\tfor (i = 0; i < opt->n_threads; ++i) {\n-\t\tworker_t *p = &w[i];\n-\t\tp->start = i; p->n = n;\n-\t\tp->opt = opt;\n-\t\tp->seqs = seqs;\n-\t}\n-\tif (opt->n_threads == 1) {\n-\t\tworker(w);\n-\t} else {\n-\t\tpthread_t *tid;\n-\t\ttid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));\n-\t\tfor (i = 0; i < opt->n_threads; ++i) pthread_create(&tid[i], 0, worker, &w[i]);\n-\t\tfor (i = 0; i < opt->n_threads; ++i) pthread_join(tid[i], 0);\n-\t\tfree(tid);\n-\t}\n-\tfor (i = 0; i < opt->n_threads; ++i) {\n-\t\tworker_t *p = &w[i];\n-\t\tfor (j = 0; j <= MAX_ERR; ++j) cnt[j] += p->cnt[j];\n-\t}\n-\tfree(w);\n-\tfor (i = 0; i < n>>1; ++i) {\n-\t\tif (seqs[i<<1|1].l_seq != 0) {\n-\t\t\tif (opt->flag&2) {\n-\t\t\t\tprint_bseq(&seqs[i<<1|0], 1);\n-\t\t\t\tprint_bseq(&seqs[i<<1|1], 2);\n-\t\t\t}\n-\t\t} else if (opt->flag&1)\n-\t\t\tprint_bseq(&seqs[i<<1|0], 0);\n-\t}\n-\tfor (i = 0; i < n; ++i) {\n-\t\tbseq1_t *s = &seqs[i];\n-\t\tfree(s->name); free(s->seq); free(s->qual); free(s->comment);\n-\t}\n-}\n-\n-int main_pemerge(int argc, char *argv[])\n-{\n-\tint c, flag = 0, i, n, min_ovlp = 10;\n-\tint64_t cnt[MAX_ERR+1];\n-\tbseq1_t *bseq;\n-\tgzFile fp, fp2 = 0;\n-\tkseq_t *ks, *ks2 = 0;\n-\tpem_opt_t *opt;\n-\n-\topt = pem_opt_init();\n-\twhile ((c = getopt(argc, argv, "muQ:t:T:")) >= 0) {\n-\t\tif (c == \'m\') flag |= 1;\n-\t\telse if (c == \'u\') flag |= 2;\n-\t\telse if (c == \'Q\') opt->q_thres = atoi(optarg);\n-\t\telse if (c == \'t\') opt->n_threads = atoi(optarg);\n-\t\telse if (c == \'T\') min_ovlp = atoi(optarg);\n-\t\telse return 1;\n-\t}\n-\tif (flag == 0) flag = 3;\n-\topt->flag = flag;\n-\topt->T = opt->a * min_ovlp;\n-\n-\tif (optind == argc) {\n-\t\tfprintf(stderr, "\\n");\n-\t\tfprintf(stderr, "Usage: bwa pemerge [-mu] <read1.fq> [read2.fq]\\n\\n");\n-\t\tfprintf(stderr, "Options: -m output merged reads only\\n");\n-\t\tfprintf(stderr, " -u output unmerged reads only\\n");\n-\t\tfprintf(stderr, " -t INT number of threads [%d]\\n", opt->n_threads);\n-\t\tfprintf(stderr, " -T INT minimum end overlap [%d]\\n", min_ovlp);\n-\t\tfprintf(stderr, " -Q INT max sum of errors [%d]\\n", opt->q_thres);\n-\t\tfprintf(stderr, "\\n");\n-\t\tfree(opt);\n-\t\treturn 1;\n-\t}\n-\n-\tfp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");\n-\tif (NULL == fp) {\n-\t\tfprintf(stderr, "Couldn\'t open %s : %s\\n",\n-\t\t\t\tstrcmp(argv[optind], "-") ? argv[optind] : "stdin",\n-\t\t\t\terrno ? strerror(errno) : "Out of memory");\n-\t\texit(EXIT_FAILURE);\n-\t}\n-\tks = kseq_init(fp);\n-\tif (optind + 1 < argc) {\n-\t\tfp2 = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "r") : gzdopen(fileno(stdin), "r");\n-\t\tif (NULL == fp) {\n-\t\t\tfprintf(stderr, "Couldn\'t open %s : %s\\n",\n-\t\t\t\t\tstrcmp(argv[optind+1], "-") ? argv[optind+1] : "stdin",\n-\t\t\t\t\terrno ? strerror(errno) : "Out of memory");\n-\t\t\texit(EXIT_FAILURE);\n-\t\t}\n-\t\tks2 = kseq_init(fp2);\n-\t}\n-\n-\tmemset(cnt, 0, 8 * (MAX_ERR+1));\n-\twhile ((bseq = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {\n-\t\tprocess_seqs(opt, n, bseq, cnt);\n-\t\tfree(bseq);\n-\t}\n-\n-\tfprintf(stderr, "%12ld %s\\n", (long)cnt[0], err_msg[0]);\n-\tfor (i = 1; i <= MAX_ERR; ++i)\n-\t\tfprintf(stderr, "%12ld %s\\n", (long)cnt[i], err_msg[i]);\n-\tkseq_destroy(ks);\n-\terr_gzclose(fp);\n-\tif (ks2) {\n-\t\tkseq_destroy(ks2);\n-\t\terr_gzclose(fp2);\n-\t}\n-\tfree(opt);\n-\n-\terr_fflush(stdout);\n-\n-\treturn 0;\n-}\n' |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/qualfa2fq.pl --- a/bwa-0.7.9a/qualfa2fq.pl Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,27 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use warnings; - -die("Usage: qualfa2fq.pl <in.fasta> <in.qual>\n") if (@ARGV != 2); - -my ($fhs, $fhq, $q); -open($fhs, ($ARGV[0] =~ /\.gz$/)? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die; -open($fhq, ($ARGV[1] =~ /\.gz$/)? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die; - -$/ = ">"; <$fhs>; <$fhq>; $/ = "\n"; -while (<$fhs>) { - $q = <$fhq>; - print "\@$_"; - $/ = ">"; - $_ = <$fhs>; $q = <$fhq>; - chomp; chomp($q); - $q =~ s/\s*(\d+)\s*/chr($1+33)/eg; - print $_, "+\n"; - for (my $i = 0; $i < length($q); $i += 60) { - print substr($q, $i, 60), "\n"; - } - $/ = "\n"; -} - -close($fhs); close($fhq); |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/utils.c --- a/bwa-0.7.9a/utils.c Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,284 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@sanger.ac.uk> */ -#define FSYNC_ON_FLUSH - -#include <stdio.h> -#include <stdarg.h> -#include <stdlib.h> -#include <string.h> -#include <zlib.h> -#include <errno.h> -#ifdef FSYNC_ON_FLUSH -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#endif -#include <sys/resource.h> -#include <sys/time.h> -#include "utils.h" - -#include "ksort.h" -#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y)) -KSORT_INIT(128, pair64_t, pair64_lt) -KSORT_INIT(64, uint64_t, ks_lt_generic) - -#include "kseq.h" -KSEQ_INIT2(, gzFile, err_gzread) - -/******************** - * System utilities * - ********************/ - -FILE *err_xopen_core(const char *func, const char *fn, const char *mode) -{ - FILE *fp = 0; - if (strcmp(fn, "-") == 0) - return (strstr(mode, "r"))? stdin : stdout; - if ((fp = fopen(fn, mode)) == 0) { - err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); - } - return fp; -} - -FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp) -{ - if (freopen(fn, mode, fp) == 0) { - err_fatal(func, "fail to open file '%s' : %s", fn, strerror(errno)); - } - return fp; -} - -gzFile err_xzopen_core(const char *func, const char *fn, const char *mode) -{ - gzFile fp; - if (strcmp(fn, "-") == 0) { - fp = gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode); - /* According to zlib.h, this is the only reason gzdopen can fail */ - if (!fp) err_fatal(func, "Out of memory"); - return fp; - } - if ((fp = gzopen(fn, mode)) == 0) { - err_fatal(func, "fail to open file '%s' : %s", fn, errno ? strerror(errno) : "Out of memory"); - } - return fp; -} - -void err_fatal(const char *header, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - fprintf(stderr, "[%s] ", header); - vfprintf(stderr, fmt, args); - fprintf(stderr, "\n"); - va_end(args); - exit(EXIT_FAILURE); -} - -void err_fatal_core(const char *header, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - fprintf(stderr, "[%s] ", header); - vfprintf(stderr, fmt, args); - fprintf(stderr, " Abort!\n"); - va_end(args); - abort(); -} - -void _err_fatal_simple(const char *func, const char *msg) -{ - fprintf(stderr, "[%s] %s\n", func, msg); - exit(EXIT_FAILURE); -} - -void _err_fatal_simple_core(const char *func, const char *msg) -{ - fprintf(stderr, "[%s] %s Abort!\n", func, msg); - abort(); -} - -size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) -{ - size_t ret = fwrite(ptr, size, nmemb, stream); - if (ret != nmemb) - _err_fatal_simple("fwrite", strerror(errno)); - return ret; -} - -size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream) -{ - size_t ret = fread(ptr, size, nmemb, stream); - if (ret != nmemb) - { - _err_fatal_simple("fread", ferror(stream) ? strerror(errno) : "Unexpected end of file"); - } - return ret; -} - -int err_gzread(gzFile file, void *ptr, unsigned int len) -{ - int ret = gzread(file, ptr, len); - - if (ret < 0) - { - int errnum = 0; - const char *msg = gzerror(file, &errnum); - _err_fatal_simple("gzread", Z_ERRNO == errnum ? strerror(errno) : msg); - } - - return ret; -} - -int err_fseek(FILE *stream, long offset, int whence) -{ - int ret = fseek(stream, offset, whence); - if (0 != ret) - { - _err_fatal_simple("fseek", strerror(errno)); - } - return ret; -} - -long err_ftell(FILE *stream) -{ - long ret = ftell(stream); - if (-1 == ret) - { - _err_fatal_simple("ftell", strerror(errno)); - } - return ret; -} - -int err_printf(const char *format, ...) -{ - va_list arg; - int done; - va_start(arg, format); - done = vfprintf(stdout, format, arg); - int saveErrno = errno; - va_end(arg); - if (done < 0) _err_fatal_simple("vfprintf(stdout)", strerror(saveErrno)); - return done; -} - -int err_fprintf(FILE *stream, const char *format, ...) -{ - va_list arg; - int done; - va_start(arg, format); - done = vfprintf(stream, format, arg); - int saveErrno = errno; - va_end(arg); - if (done < 0) _err_fatal_simple("vfprintf", strerror(saveErrno)); - return done; -} - -int err_fputc(int c, FILE *stream) -{ - int ret = putc(c, stream); - if (EOF == ret) - { - _err_fatal_simple("fputc", strerror(errno)); - } - - return ret; -} - -int err_fputs(const char *s, FILE *stream) -{ - int ret = fputs(s, stream); - if (EOF == ret) - { - _err_fatal_simple("fputs", strerror(errno)); - } - - return ret; -} - -int err_fflush(FILE *stream) -{ - int ret = fflush(stream); - if (ret != 0) _err_fatal_simple("fflush", strerror(errno)); - -#ifdef FSYNC_ON_FLUSH - /* Calling fflush() ensures that all the data has made it to the - kernel buffers, but this may not be sufficient for remote filesystems - (e.g. NFS, lustre) as an error may still occur while the kernel - is copying the buffered data to the file server. To be sure of - catching these errors, we need to call fsync() on the file - descriptor, but only if it is a regular file. */ - { - struct stat sbuf; - if (0 != fstat(fileno(stream), &sbuf)) - _err_fatal_simple("fstat", strerror(errno)); - - if (S_ISREG(sbuf.st_mode)) - { - if (0 != fsync(fileno(stream))) - _err_fatal_simple("fsync", strerror(errno)); - } - } -#endif - return ret; -} - -int err_fclose(FILE *stream) -{ - int ret = fclose(stream); - if (ret != 0) _err_fatal_simple("fclose", strerror(errno)); - return ret; -} - -int err_gzclose(gzFile file) -{ - int ret = gzclose(file); - if (Z_OK != ret) - { - _err_fatal_simple("gzclose", Z_ERRNO == ret ? strerror(errno) : zError(ret)); - } - - return ret; -} - -/********* - * Timer * - *********/ - -double cputime() -{ - struct rusage r; - getrusage(RUSAGE_SELF, &r); - return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); -} - -double realtime() -{ - struct timeval tp; - struct timezone tzp; - gettimeofday(&tp, &tzp); - return tp.tv_sec + tp.tv_usec * 1e-6; -} |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/utils.h --- a/bwa-0.7.9a/utils.h Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,111 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@sanger.ac.uk> */ - -#ifndef LH3_UTILS_H -#define LH3_UTILS_H - -#include <stdint.h> -#include <stdio.h> -#include <zlib.h> - -#ifdef __GNUC__ -// Tell GCC to validate printf format string and args -#define ATTRIBUTE(list) __attribute__ (list) -#else -#define ATTRIBUTE(list) -#endif - -#define err_fatal_simple(msg) _err_fatal_simple(__func__, msg) -#define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg) - -#define xopen(fn, mode) err_xopen_core(__func__, fn, mode) -#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) -#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) - -#define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) - -typedef struct { - uint64_t x, y; -} pair64_t; - -typedef struct { size_t n, m; uint64_t *a; } uint64_v; -typedef struct { size_t n, m; pair64_t *a; } pair64_v; - -#ifdef __cplusplus -extern "C" { -#endif - - void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); - void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); - void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn)); - void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn)); - FILE *err_xopen_core(const char *func, const char *fn, const char *mode); - FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); - gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); - size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); - size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream); - - int err_gzread(gzFile file, void *ptr, unsigned int len); - int err_fseek(FILE *stream, long offset, int whence); -#define err_rewind(FP) err_fseek((FP), 0, SEEK_SET) - long err_ftell(FILE *stream); - int err_fprintf(FILE *stream, const char *format, ...) - ATTRIBUTE((format(printf, 2, 3))); - int err_printf(const char *format, ...) - ATTRIBUTE((format(printf, 1, 2))); - int err_fputc(int c, FILE *stream); -#define err_putchar(C) err_fputc((C), stdout) - int err_fputs(const char *s, FILE *stream); -#define err_puts(S) err_fputs((S), stdout) - int err_fflush(FILE *stream); - int err_fclose(FILE *stream); - int err_gzclose(gzFile file); - - double cputime(); - double realtime(); - - void ks_introsort_64 (size_t n, uint64_t *a); - void ks_introsort_128(size_t n, pair64_t *a); - -#ifdef __cplusplus -} -#endif - -static inline uint64_t hash_64(uint64_t key) -{ - key += ~(key << 32); - key ^= (key >> 22); - key += ~(key << 13); - key ^= (key >> 8); - key += (key << 3); - key ^= (key >> 15); - key += ~(key << 27); - key ^= (key >> 31); - return key; -} - -#endif |
b |
diff -r ce5a8082bbb8 -r abdbc8fe98dd bwa-0.7.9a/xa2multi.pl --- a/bwa-0.7.9a/xa2multi.pl Thu Aug 14 02:16:48 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,25 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use warnings; - -while (<>) { - if (/\tXA:Z:(\S+)/) { - my $l = $1; - print; - my @t = split("\t"); - while ($l =~ /([^,;]+),([-+]\d+),([^,]+),(\d+);/g) { - my $mchr = ($t[6] eq $1)? '=' : $t[6]; # FIXME: TLEN/ISIZE is not calculated! - my $seq = $t[9]; - my $phred = $t[10]; - # if alternative alignment has other orientation than primary, - # then print the reverse (complement) of sequence and phred string - if ((($t[1]&0x10)>0) xor ($2<0)) { - $seq = reverse $seq; - $seq =~ tr/ACGTacgt/TGCAtgca/; - $phred = reverse $phred; - } - print(join("\t", $t[0], 0x100|($t[1]&0x6e9)|($2<0?0x10:0), $1, abs($2), 0, $3, @t[6..7], 0, $seq, $phred, "NM:i:$4"), "\n"); - } - } else { print; } -} |