changeset 0:dd1186b11b3b draft

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:14 -0400
parents
children a9636dc1e99a
files bwa-0.6.2/COPYING bwa-0.6.2/ChangeLog bwa-0.6.2/Makefile bwa-0.6.2/NEWS bwa-0.6.2/QSufSort.c bwa-0.6.2/QSufSort.h bwa-0.6.2/README bwa-0.6.2/bamlite.c bwa-0.6.2/bamlite.h bwa-0.6.2/bntseq.c bwa-0.6.2/bntseq.h bwa-0.6.2/bwa.1 bwa-0.6.2/bwa.c bwa-0.6.2/bwa.h bwa-0.6.2/bwape.c bwa-0.6.2/bwase.c bwa-0.6.2/bwase.h bwa-0.6.2/bwaseqio.c bwa-0.6.2/bwt.c bwa-0.6.2/bwt.h bwa-0.6.2/bwt_gen.c bwa-0.6.2/bwt_lite.c bwa-0.6.2/bwt_lite.h bwa-0.6.2/bwtaln.c bwa-0.6.2/bwtaln.h bwa-0.6.2/bwtgap.c bwa-0.6.2/bwtgap.h bwa-0.6.2/bwtindex.c bwa-0.6.2/bwtio.c bwa-0.6.2/bwtmisc.c bwa-0.6.2/bwtsw2.h bwa-0.6.2/bwtsw2_aux.c bwa-0.6.2/bwtsw2_chain.c bwa-0.6.2/bwtsw2_core.c bwa-0.6.2/bwtsw2_main.c bwa-0.6.2/bwtsw2_pair.c bwa-0.6.2/cs2nt.c bwa-0.6.2/fastmap.c bwa-0.6.2/is.c bwa-0.6.2/khash.h bwa-0.6.2/kseq.h bwa-0.6.2/ksort.h bwa-0.6.2/kstring.c bwa-0.6.2/kstring.h bwa-0.6.2/ksw.c bwa-0.6.2/ksw.h bwa-0.6.2/kvec.h bwa-0.6.2/main.c bwa-0.6.2/main.h bwa-0.6.2/qualfa2fq.pl bwa-0.6.2/simple_dp.c bwa-0.6.2/solid2fastq.pl bwa-0.6.2/stdaln.c bwa-0.6.2/stdaln.h bwa-0.6.2/utils.c bwa-0.6.2/utils.h bwa-0.6.2/xa2multi.pl
diffstat 57 files changed, 18573 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/COPYING	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/ChangeLog	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,3864 @@
+------------------------------------------------------------------------
+r1605 | lh3 | 2010-12-29 20:20:20 -0500 (Wed, 29 Dec 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.9rc1-2 (r1605)
+ * fixed a typo/bug in bwasw
+
+------------------------------------------------------------------------
+r1587 | lh3 | 2010-12-21 18:48:30 -0500 (Tue, 21 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+a typo in the manual
+
+------------------------------------------------------------------------
+r1586 | lh3 | 2010-12-21 18:47:48 -0500 (Tue, 21 Dec 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/utils.c
+   M /branches/prog/bwa/utils.h
+
+ * bwa-0.5.9rc1-1 (r1586)
+ * a few patches by John
+
+------------------------------------------------------------------------
+r1562 | lh3 | 2010-12-10 01:02:06 -0500 (Fri, 10 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+
+documentation on specifying @RG
+
+------------------------------------------------------------------------
+r1561 | lh3 | 2010-12-10 00:45:40 -0500 (Fri, 10 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.9rc1 (r1561)
+
+------------------------------------------------------------------------
+r1560 | lh3 | 2010-12-10 00:29:08 -0500 (Fri, 10 Dec 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/main.c
+
+ * fixed a small memory leak caused by the BAM reader
+ * fixed a memory violation, also in the BAM reader
+
+------------------------------------------------------------------------
+r1559 | lh3 | 2010-12-10 00:10:48 -0500 (Fri, 10 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/Makefile
+
+change Makefile gcc options
+
+------------------------------------------------------------------------
+r1558 | lh3 | 2010-12-10 00:09:22 -0500 (Fri, 10 Dec 2010) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.8-6 (r1557)
+ * added a little more comments to BWA-SW
+ * randomly choosing a mapping if there are more than one
+
+------------------------------------------------------------------------
+r1557 | lh3 | 2010-12-09 21:58:00 -0500 (Thu, 09 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_aux.c
+
+sometimes unmapped reads may not be printed...
+
+------------------------------------------------------------------------
+r1556 | lh3 | 2010-12-09 21:50:26 -0500 (Thu, 09 Dec 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_aux.c
+
+print unmapped reads
+
+------------------------------------------------------------------------
+r1555 | lh3 | 2010-12-09 21:17:20 -0500 (Thu, 09 Dec 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.8-5 (r1555)
+ * BAM input documentation
+
+------------------------------------------------------------------------
+r1544 | lh3 | 2010-11-23 11:01:41 -0500 (Tue, 23 Nov 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.8-4 (r1544)
+ * supporting adding RG tags and RG lines
+
+------------------------------------------------------------------------
+r1543 | lh3 | 2010-11-23 00:16:40 -0500 (Tue, 23 Nov 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.8-3 (r1543)
+ * fixed a memory leak
+
+------------------------------------------------------------------------
+r1542 | lh3 | 2010-11-22 23:50:56 -0500 (Mon, 22 Nov 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.8-2 (r1542)
+ * fixed a long existing bug in random placement of reads
+
+------------------------------------------------------------------------
+r1541 | lh3 | 2010-11-22 23:27:29 -0500 (Mon, 22 Nov 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/bamlite.c
+   A /branches/prog/bwa/bamlite.h
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+preliminary BAM input support
+
+------------------------------------------------------------------------
+r1537 | lh3 | 2010-10-16 23:46:20 -0400 (Sat, 16 Oct 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+
+change version number and ChangeLog
+
+------------------------------------------------------------------------
+r1536 | lh3 | 2010-10-16 23:35:10 -0400 (Sat, 16 Oct 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+
+ * fixed a bug in the scoring matrix
+ * release bwa-0.5.8c (r1536)
+
+------------------------------------------------------------------------
+r1451 | lh3 | 2010-06-15 09:43:52 -0400 (Tue, 15 Jun 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+version change
+
+------------------------------------------------------------------------
+r1450 | lh3 | 2010-06-15 09:42:21 -0400 (Tue, 15 Jun 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+
+ * bwa-0.5.8b (r1450)
+ * fixed a bug in scoring matrix
+
+------------------------------------------------------------------------
+r1445 | lh3 | 2010-06-11 08:58:33 -0400 (Fri, 11 Jun 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+
+fixed a serious bug
+
+------------------------------------------------------------------------
+r1442 | lh3 | 2010-06-08 10:22:14 -0400 (Tue, 08 Jun 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.8 (r1442)
+
+------------------------------------------------------------------------
+r1440 | lh3 | 2010-05-19 13:43:50 -0400 (Wed, 19 May 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-r1440
+ * sorry, forget to remove a debugging line
+
+------------------------------------------------------------------------
+r1439 | lh3 | 2010-05-19 13:43:08 -0400 (Wed, 19 May 2010) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-r1439
+ * fixed a bug in bwasw caused by a recent modification
+ * throwing insane insert size when estimating isize
+
+------------------------------------------------------------------------
+r1425 | lh3 | 2010-04-29 15:15:23 -0400 (Thu, 29 Apr 2010) | 10 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.7-7 (r1425)
+ * fixed a minor bug in bwasw command-line parsing
+ * When band-width is not large enough, bwasw may find two highly
+   overlapping but not completely overlapping alignments. The old
+   version will filter out one of them, which leads to false
+   negatives. The current outputs both. This solution is obviously not
+   ideal. The ideal one would be to increase the band-width and redo the
+   alignment.
+
+
+------------------------------------------------------------------------
+r1399 | lh3 | 2010-04-16 09:20:49 -0400 (Fri, 16 Apr 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.7-6 (r1399)
+ * fixed a typo/bug (by Vaughn Iverson)
+
+------------------------------------------------------------------------
+r1329 | lh3 | 2010-03-19 23:32:46 -0400 (Fri, 19 Mar 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+small correction
+
+------------------------------------------------------------------------
+r1328 | lh3 | 2010-03-19 23:28:44 -0400 (Fri, 19 Mar 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.7-4 (r1328)
+ * automatically adjust ap_prior based on alignment
+
+------------------------------------------------------------------------
+r1327 | lh3 | 2010-03-19 23:02:40 -0400 (Fri, 19 Mar 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.5.7-3 (r1327)
+ * evaluate hits obtained from SW alignment in a more proper way.
+
+------------------------------------------------------------------------
+r1320 | lh3 | 2010-03-17 15:13:22 -0400 (Wed, 17 Mar 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+
+fixed a potential out-of-boundary error. Need more testing.
+
+------------------------------------------------------------------------
+r1319 | lh3 | 2010-03-14 22:44:46 -0400 (Sun, 14 Mar 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+
+insert size is `weird' if the 3rd quatile larger than 100,000bp
+
+------------------------------------------------------------------------
+r1318 | lh3 | 2010-03-14 22:37:35 -0400 (Sun, 14 Mar 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.7-2 (r1318)
+ * in sampe, allow to disable insert size estimate
+
+------------------------------------------------------------------------
+r1317 | lh3 | 2010-03-14 22:14:14 -0400 (Sun, 14 Mar 2010) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/solid2fastq.pl
+
+ * bwa-0.5.7-1 (r1317)
+ * fixed a potential bug in solid2fastq.pl
+ * fixed a bug in calculating mapping quality (by Rodrigo Goya)
+ * fixed a very rare bug (if ever occur) about pairing
+
+------------------------------------------------------------------------
+r1310 | lh3 | 2010-03-01 10:35:45 -0500 (Mon, 01 Mar 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.7
+
+------------------------------------------------------------------------
+r1309 | lh3 | 2010-02-26 21:42:22 -0500 (Fri, 26 Feb 2010) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.6-2 (r1309)
+ * fixed an unfixed bug (by Carol Scott)
+ * fixed some tiny formatting
+
+------------------------------------------------------------------------
+r1305 | lh3 | 2010-02-25 13:47:58 -0500 (Thu, 25 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.6-1 (r1304)
+ * optionally write output to a file (by Tim Fennel)
+
+------------------------------------------------------------------------
+r1303 | lh3 | 2010-02-10 23:43:48 -0500 (Wed, 10 Feb 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.6
+
+------------------------------------------------------------------------
+r1302 | lh3 | 2010-02-10 11:11:49 -0500 (Wed, 10 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-10 (r1302)
+ * improve max insert size estimate (method suggested by Gerton Lunter)
+
+------------------------------------------------------------------------
+r1301 | lh3 | 2010-02-09 16:15:28 -0500 (Tue, 09 Feb 2010) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-9 (r1301)
+ * improve mapping quality calculation for abnomalous pairs
+ * fixed a bug in multiple hits
+ * SOLiD multiple hits should work now
+
+------------------------------------------------------------------------
+r1300 | lh3 | 2010-02-09 12:50:02 -0500 (Tue, 09 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-8 (r1300)
+ * output kurtosis
+
+------------------------------------------------------------------------
+r1299 | lh3 | 2010-02-09 12:33:34 -0500 (Tue, 09 Feb 2010) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-7 (r1299)
+ * calculate skewness in sampe
+ * increase min_len in SW to 20
+ * perform more SW to fix discordant pairs
+
+------------------------------------------------------------------------
+r1298 | lh3 | 2010-02-08 12:40:31 -0500 (Mon, 08 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.5.5-6 (r1297)
+ * prepare to replace all 16-bit CIGAR (patches by Rodrigo Goya)
+
+------------------------------------------------------------------------
+r1297 | lh3 | 2010-02-05 22:26:11 -0500 (Fri, 05 Feb 2010) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/solid2fastq.pl
+
+the old fix seems not working!
+
+------------------------------------------------------------------------
+r1296 | lh3 | 2010-02-05 21:51:03 -0500 (Fri, 05 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-5 (r1296)
+ * fixed a minor issue that the lower bound of insert size is not correctly set.
+
+------------------------------------------------------------------------
+r1295 | lh3 | 2010-02-05 21:01:10 -0500 (Fri, 05 Feb 2010) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-4 (r1295)
+ * fixed a memory leak
+ * change the behaviour of -n (samse and sampe)
+ * change the default of -n
+
+------------------------------------------------------------------------
+r1294 | lh3 | 2010-02-05 17:24:06 -0500 (Fri, 05 Feb 2010) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-3 (r1294)
+ * improved multi-hit report
+
+------------------------------------------------------------------------
+r1293 | lh3 | 2010-02-05 12:57:38 -0500 (Fri, 05 Feb 2010) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/solid2fastq.pl
+
+ * bwa-0.5.5-2 (r1293)
+ * bugfix: truncated quality string
+ * bugfix: quality -1 in solid->fastq conversion
+ * bugfix: color reads on the reverse strand is not complemented
+
+------------------------------------------------------------------------
+r1279 | lh3 | 2009-11-23 22:42:34 -0500 (Mon, 23 Nov 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwase.c
+   A /branches/prog/bwa/bwase.h
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.5-1 (r1279)
+ * incorporate changes from Matt Hanna for Java bindings.
+
+------------------------------------------------------------------------
+r1275 | lh3 | 2009-11-10 22:13:10 -0500 (Tue, 10 Nov 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+
+update ChangeLog
+
+------------------------------------------------------------------------
+r1273 | lh3 | 2009-11-10 22:08:16 -0500 (Tue, 10 Nov 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+   A /branches/prog/bwa/qualfa2fq.pl
+
+Release bwa-0.5.5 (r1273)
+
+------------------------------------------------------------------------
+r1272 | lh3 | 2009-11-10 22:02:50 -0500 (Tue, 10 Nov 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.4-3 (r1272)
+ * fixed another typo which may lead to incorrect single-end mapping quality
+
+------------------------------------------------------------------------
+r1271 | lh3 | 2009-11-10 21:59:47 -0500 (Tue, 10 Nov 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.4-2 (r1271)
+ * fixed a serious typo/bug which does not hurt if we allow one gap open
+   and work with <200bp reads, but causes segfault for long reads.
+
+------------------------------------------------------------------------
+r1270 | lh3 | 2009-11-09 23:12:42 -0500 (Mon, 09 Nov 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.4-1 (r1270)
+ * fixed a bug in color alignment
+
+------------------------------------------------------------------------
+r1245 | lh3 | 2009-10-09 07:42:52 -0400 (Fri, 09 Oct 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.4
+
+------------------------------------------------------------------------
+r1244 | lh3 | 2009-10-09 05:53:52 -0400 (Fri, 09 Oct 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+
+ * bwa-0.5.3-4 (r1244)
+ * output the clipped length in XC:i: tag
+ * skip mate alignment when stdaln is buggy
+ * fixed a bug in NM:i: tag
+
+------------------------------------------------------------------------
+r1243 | lh3 | 2009-10-07 08:15:04 -0400 (Wed, 07 Oct 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.3-3 (r1243)
+ * sampe: fixed a bug when a read sequence is identical its reverse complement.
+
+------------------------------------------------------------------------
+r1242 | lh3 | 2009-10-07 07:49:13 -0400 (Wed, 07 Oct 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.3-2 (r1242)
+ * sampe: optionall preload the full index into memory
+ * aln: change the default seed length to 32bp
+
+------------------------------------------------------------------------
+r1238 | lh3 | 2009-09-26 18:38:15 -0400 (Sat, 26 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/khash.h
+
+Improve portability of khash.h
+
+------------------------------------------------------------------------
+r1228 | lh3 | 2009-09-15 09:20:22 -0400 (Tue, 15 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+
+fixed a typo
+
+------------------------------------------------------------------------
+r1227 | lh3 | 2009-09-15 09:19:35 -0400 (Tue, 15 Sep 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.3-1 (r1226)
+ * in dBWT-SW, optionall use hard clipping instead of soft clipping
+
+------------------------------------------------------------------------
+r1225 | lh3 | 2009-09-15 08:32:30 -0400 (Tue, 15 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.3 (r1225)
+
+------------------------------------------------------------------------
+r1223 | lh3 | 2009-09-13 07:30:41 -0400 (Sun, 13 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.2
+
+------------------------------------------------------------------------
+r1222 | lh3 | 2009-09-11 09:11:39 -0400 (Fri, 11 Sep 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.1-5 (r1222)
+ * fixed a typo. No real change
+
+------------------------------------------------------------------------
+r1221 | lh3 | 2009-09-11 09:09:44 -0400 (Fri, 11 Sep 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.1-4 (r1221)
+ * trim reads before alignment
+
+------------------------------------------------------------------------
+r1216 | lh3 | 2009-09-08 17:50:15 -0400 (Tue, 08 Sep 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.1-3 (r1216)
+ * fixed a bug about NM tags for gapped alignment
+ * print SAM header
+
+------------------------------------------------------------------------
+r1215 | lh3 | 2009-09-08 17:14:42 -0400 (Tue, 08 Sep 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.1-2 (r1215)
+ * fixed a bug when read lengths vary (by John Marshall)
+
+------------------------------------------------------------------------
+r1213 | lh3 | 2009-09-06 18:58:15 -0400 (Sun, 06 Sep 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.1-1 (r1213)
+ * change default -T to 30
+
+------------------------------------------------------------------------
+r1209 | lh3 | 2009-09-02 06:06:02 -0400 (Wed, 02 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.1
+
+------------------------------------------------------------------------
+r1208 | lh3 | 2009-09-02 05:56:33 -0400 (Wed, 02 Sep 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+
+ * ChangeLog
+
+------------------------------------------------------------------------
+r1206 | lh3 | 2009-08-30 18:27:30 -0400 (Sun, 30 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.0-6 (r1206)
+ * fixed two bugs caused by previous modification
+
+------------------------------------------------------------------------
+r1205 | lh3 | 2009-08-30 17:28:36 -0400 (Sun, 30 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.0-4 (r1205)
+ * reduce false coordinates and CIGAR when a query bridges two reference
+   sequences, although some very rare cases may fail bwa.
+
+------------------------------------------------------------------------
+r1204 | lh3 | 2009-08-30 06:06:16 -0400 (Sun, 30 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.0-3 (r1204)
+ * choose one repetitive hit to extend
+
+------------------------------------------------------------------------
+r1203 | lh3 | 2009-08-29 18:11:51 -0400 (Sat, 29 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.0-2 (r1203)
+ * dBWT-SW: change a parameter in calculating mapping quality
+ * fixed a bug in samse
+
+------------------------------------------------------------------------
+r1202 | lh3 | 2009-08-28 19:48:41 -0400 (Fri, 28 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.5.0-1 (r1202)
+ * change default band width to 50
+ * improve mapping quality a bit
+
+------------------------------------------------------------------------
+r1200 | lh3 | 2009-08-20 06:21:24 -0400 (Thu, 20 Aug 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.5.0 (r1200)
+
+------------------------------------------------------------------------
+r1199 | lh3 | 2009-08-20 04:49:15 -0400 (Thu, 20 Aug 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+
+Updated ChangeLog and the manual
+
+------------------------------------------------------------------------
+r1198 | lh3 | 2009-08-19 11:09:15 -0400 (Wed, 19 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-36 (r1198)
+ * simplify duphits removal. The accuracy is changed a tiny bit, sometimes better, sometimes worse.
+
+------------------------------------------------------------------------
+r1197 | lh3 | 2009-08-19 08:15:05 -0400 (Wed, 19 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_aux.c
+   A /branches/prog/bwa/bwtsw2_chain.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-35 (r1197)
+ * further heuristic acceleration for long queries
+
+------------------------------------------------------------------------
+r1196 | lh3 | 2009-08-18 06:54:03 -0400 (Tue, 18 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-34 (r1196)
+ * updated the manual page
+ * output base quality if the input is fastq
+
+------------------------------------------------------------------------
+r1195 | lh3 | 2009-08-18 06:23:00 -0400 (Tue, 18 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/simple_dp.c
+
+ * bwa-0.4.9-33 (r1191)
+ * fixed a bug in sampe/samse when gaps occur to the 5'-end in SW alignment
+ * in dbwtsw adjust -T and -c according to -a
+
+------------------------------------------------------------------------
+r1192 | lh3 | 2009-08-13 05:37:28 -0400 (Thu, 13 Aug 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update manual
+
+------------------------------------------------------------------------
+r1191 | lh3 | 2009-08-12 19:40:51 -0400 (Wed, 12 Aug 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtsw2_main.c
+
+update documentation
+
+------------------------------------------------------------------------
+r1190 | lh3 | 2009-08-12 08:56:10 -0400 (Wed, 12 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-32 (r1190)
+ * only help messages are changed
+
+------------------------------------------------------------------------
+r1189 | lh3 | 2009-08-11 09:28:55 -0400 (Tue, 11 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-31 (r1189)
+ * in bwape/bwase, print CIGAR "*" if the read is unmapped
+ * improved the calculation of mapping quality
+
+------------------------------------------------------------------------
+r1181 | lh3 | 2009-08-03 12:09:41 -0400 (Mon, 03 Aug 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+
+fflush()
+
+------------------------------------------------------------------------
+r1180 | lh3 | 2009-08-03 12:08:46 -0400 (Mon, 03 Aug 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-30 (r1180)
+ * fixed a memory problem
+ * multi-threading sometimes does not work...
+
+------------------------------------------------------------------------
+r1179 | lh3 | 2009-08-03 11:04:39 -0400 (Mon, 03 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-29 (r1179)
+ * preliminary mutli-threading support in dbwtsw
+
+------------------------------------------------------------------------
+r1178 | lh3 | 2009-08-03 09:14:54 -0400 (Mon, 03 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-28 (r1178)
+ * fixed a bug in printing repetitive hits
+
+------------------------------------------------------------------------
+r1177 | lh3 | 2009-08-03 05:03:42 -0400 (Mon, 03 Aug 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-27 (r1177)
+ * bwtsw2: fixed a hidden memory leak
+
+------------------------------------------------------------------------
+r1176 | lh3 | 2009-07-31 10:58:24 -0400 (Fri, 31 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-26
+ * change the way mapping quality is calculated
+
+------------------------------------------------------------------------
+r1175 | lh3 | 2009-07-31 09:15:54 -0400 (Fri, 31 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-25
+ * code clean up
+ * automatically adjust ->t and ->is_rev based on input
+
+------------------------------------------------------------------------
+r1174 | lh3 | 2009-07-30 08:50:25 -0400 (Thu, 30 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-24
+ * fixed a bug in printing the hits
+
+------------------------------------------------------------------------
+r1173 | lh3 | 2009-07-29 18:32:43 -0400 (Wed, 29 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-23
+ * allow to skip reverse alignment
+ * increase opt->t to 37
+
+------------------------------------------------------------------------
+r1172 | lh3 | 2009-07-29 17:22:39 -0400 (Wed, 29 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-22
+ * report if the hit is found in both directions
+
+------------------------------------------------------------------------
+r1171 | lh3 | 2009-07-29 17:12:02 -0400 (Wed, 29 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-21
+ * dbwtsw: map to both forward and reverse BWT to reduce false alignment
+
+------------------------------------------------------------------------
+r1170 | lh3 | 2009-07-29 15:25:14 -0400 (Wed, 29 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+save hits before cut_tail()
+
+------------------------------------------------------------------------
+r1169 | lh3 | 2009-07-29 08:06:01 -0400 (Wed, 29 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.9-19
+ * use a global memory pool to reduce the CPU time spent on malloc/free().
+
+------------------------------------------------------------------------
+r1168 | lh3 | 2009-07-29 06:13:29 -0400 (Wed, 29 Jul 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-18
+ * reduce unnecessary extension to the 5'-end
+ * allow to use different interval size for the 2 rounds
+ * change default parameters
+
+------------------------------------------------------------------------
+r1167 | lh3 | 2009-07-28 19:06:17 -0400 (Tue, 28 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-17
+ * dbwtsw: fixed THE memory leak.
+
+------------------------------------------------------------------------
+r1166 | lh3 | 2009-07-28 16:31:41 -0400 (Tue, 28 Jul 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+
+ * bwa-0.4.9-16
+ * fixed a memory leak
+ * a small memory leak still occurs to bwtsw2_core(). I will work on that later.
+ * changed the default parameters
+
+------------------------------------------------------------------------
+r1165 | lh3 | 2009-07-28 10:15:40 -0400 (Tue, 28 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+
+ * bwa-0.4.9-15
+ * generate CIGAR right before output. This saves unnecessary computation.
+ * this version may be buggy as I have not tested it.
+
+------------------------------------------------------------------------
+r1164 | lh3 | 2009-07-28 09:04:14 -0400 (Tue, 28 Jul 2009) | 11 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.9-14
+
+ * deplete unique hits in dbwtsw and postprocess them with standard sw
+
+ * in principle, this stratgy should be faster and more accurate, but I
+   have not tested this point. I may switch back to the old method if
+   this does not work.
+
+ * the code looks quite nasty now. it needs clean up...
+
+
+------------------------------------------------------------------------
+r1163 | lh3 | 2009-07-27 17:41:10 -0400 (Mon, 27 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+
+change a default parameter
+
+------------------------------------------------------------------------
+r1162 | lh3 | 2009-07-27 17:04:35 -0400 (Mon, 27 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-13
+ * dbwtsw: switch between small and large Z-best
+
+------------------------------------------------------------------------
+r1161 | lh3 | 2009-07-27 12:17:41 -0400 (Mon, 27 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-12
+ * changed the default -z to 100
+ * heuristically speed up alignments for polyA reads
+
+------------------------------------------------------------------------
+r1160 | lh3 | 2009-07-27 07:50:57 -0400 (Mon, 27 Jul 2009) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-11
+
+ * dbwtsw potentially generates less false alignments, although in
+   practice, the modification brings no improvement.
+
+
+------------------------------------------------------------------------
+r1159 | lh3 | 2009-07-27 04:37:02 -0400 (Mon, 27 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-10
+ * disabled debugging code
+ * add "BAM_FMU" if both ends are unmapped
+
+------------------------------------------------------------------------
+r1158 | lh3 | 2009-07-24 09:36:52 -0400 (Fri, 24 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+
+nothing, really
+
+------------------------------------------------------------------------
+r1157 | lh3 | 2009-07-24 09:05:44 -0400 (Fri, 24 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-9
+ * bwtsw2: generate SAM output
+
+------------------------------------------------------------------------
+r1156 | lh3 | 2009-07-24 05:42:47 -0400 (Fri, 24 Jul 2009) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-8
+
+ * fixed a weird deadloop which only happens to icc -O3. Thanks John
+   Marshall for the fix.
+
+
+------------------------------------------------------------------------
+r1155 | lh3 | 2009-07-24 05:28:40 -0400 (Fri, 24 Jul 2009) | 8 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-7
+
+ * fixed a typo in bwtsw2 alignment. Now score from the standard SW
+   seems to agree with score from bwtsw2, except that in reporting
+   alignments, bwtsw2 may report non-optimal segments. This is expected,
+   though. I will improve in future.
+
+
+------------------------------------------------------------------------
+r1154 | lh3 | 2009-07-23 17:40:20 -0400 (Thu, 23 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * aln_left_core() seems to work properly
+ * aln_local_core() has a bug... AN EVER EXISTING BUG!!!!!!!!!!!
+
+------------------------------------------------------------------------
+r1153 | lh3 | 2009-07-23 17:06:09 -0400 (Thu, 23 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/stdaln.c
+
+removed debugging code...
+
+------------------------------------------------------------------------
+r1152 | lh3 | 2009-07-23 17:01:00 -0400 (Thu, 23 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/stdaln.c
+
+ * radical changes failed...
+ * fixed a bug
+
+------------------------------------------------------------------------
+r1151 | lh3 | 2009-07-23 14:46:35 -0400 (Thu, 23 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/stdaln.c
+
+temporary changes. Will apply some radical changes to this file...
+
+------------------------------------------------------------------------
+r1150 | lh3 | 2009-07-23 10:09:56 -0400 (Thu, 23 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/stdaln.c
+
+fixed a long-existing bug in Smith-Waterman alignment
+
+------------------------------------------------------------------------
+r1149 | lh3 | 2009-07-23 08:50:52 -0400 (Thu, 23 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.9-6
+ * unexplained inconsistency still occurs, but the results largely look reasonable.
+
+------------------------------------------------------------------------
+r1148 | lh3 | 2009-07-23 08:07:29 -0400 (Thu, 23 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/stdaln.c
+
+half DP
+
+------------------------------------------------------------------------
+r1147 | lh3 | 2009-07-22 08:03:06 -0400 (Wed, 22 Jul 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+
+a bit code clean up
+
+------------------------------------------------------------------------
+r1145 | lh3 | 2009-07-21 15:52:05 -0400 (Tue, 21 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-5
+ * fixed a bug in determining sub-optimal hits
+ * removed some debugging codes
+
+------------------------------------------------------------------------
+r1144 | lh3 | 2009-07-21 10:17:29 -0400 (Tue, 21 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-4
+ * better cmd interface
+ * faster speed
+
+------------------------------------------------------------------------
+r1143 | lh3 | 2009-07-20 16:38:18 -0400 (Mon, 20 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+bwtsw2 (dBWT-SW) is working apparently...
+
+
+------------------------------------------------------------------------
+r1139 | lh3 | 2009-07-15 05:52:18 -0400 (Wed, 15 Jul 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.9-2
+ * bwtsw2: change cut_tail() such that it is faster but more likely to
+   miss true hits
+
+------------------------------------------------------------------------
+r1138 | lh3 | 2009-07-15 05:18:42 -0400 (Wed, 15 Jul 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/bwt_lite.c
+   A /branches/prog/bwa/bwt_lite.h
+   A /branches/prog/bwa/bwtsw2.h
+   A /branches/prog/bwa/bwtsw2_aux.c
+   A /branches/prog/bwa/bwtsw2_core.c
+   A /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * bwa-0.4.9-1
+ * added back bwtsw2
+
+------------------------------------------------------------------------
+r1075 | lh3 | 2009-05-19 05:14:50 -0400 (Tue, 19 May 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.9
+
+------------------------------------------------------------------------
+r1073 | lh3 | 2009-05-18 17:13:19 -0400 (Mon, 18 May 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.8
+
+------------------------------------------------------------------------
+r1069 | lh3 | 2009-05-14 09:54:54 -0400 (Thu, 14 May 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.7-2
+ * change the default of "aln -R" to 30
+
+------------------------------------------------------------------------
+r1068 | lh3 | 2009-05-14 09:27:55 -0400 (Thu, 14 May 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.7-1
+ * search for suboptimal hits if the top hit is not so repetitive
+
+------------------------------------------------------------------------
+r1066 | lh3 | 2009-05-12 15:31:31 -0400 (Tue, 12 May 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.7
+
+------------------------------------------------------------------------
+r1065 | lh3 | 2009-05-12 15:20:40 -0400 (Tue, 12 May 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.6-9
+ * fixed compiling errors on some Linux machines
+
+------------------------------------------------------------------------
+r1064 | lh3 | 2009-05-12 07:30:46 -0400 (Tue, 12 May 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.6-8
+ * avoid compilation error on some systems.
+
+------------------------------------------------------------------------
+r1035 | lh3 | 2009-05-09 05:41:33 -0400 (Sat, 09 May 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.6-7
+ * fixed an integer overflow caused by previous modifications
+ * made insert size estimation more robust
+
+------------------------------------------------------------------------
+r1008 | lh3 | 2009-04-29 05:41:58 -0400 (Wed, 29 Apr 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.6-5
+ * fixed a integer overflow problem which may cause seg fault in very rare cases
+ * made XN tags more accurate
+
+------------------------------------------------------------------------
+r1005 | lh3 | 2009-04-27 07:37:23 -0400 (Mon, 27 Apr 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.6-4
+ * heuristic rules to detect suboptimal alignment
+ * stdsw: support double-strand and protein alignment
+
+------------------------------------------------------------------------
+r1003 | lh3 | 2009-04-26 12:48:19 -0400 (Sun, 26 Apr 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.6-2
+ * improve the functionality of stdsw
+ * allow to add a threshold on SW alignment. Hope this does not incur new bugs...
+
+------------------------------------------------------------------------
+r1002 | lh3 | 2009-04-22 03:56:15 -0400 (Wed, 22 Apr 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.6-1
+ * output SM and AM tag
+
+------------------------------------------------------------------------
+r914 | lh3 | 2009-03-09 17:53:50 -0400 (Mon, 09 Mar 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.6
+
+------------------------------------------------------------------------
+r913 | lh3 | 2009-03-09 17:23:24 -0400 (Mon, 09 Mar 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   A /branches/prog/bwa/solid2fastq.pl
+
+ * added notes to bwa
+ * added a script to convert SOLiD reads
+ * updated documentations
+
+------------------------------------------------------------------------
+r912 | lh3 | 2009-03-09 16:57:05 -0400 (Mon, 09 Mar 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/kstring.c
+   M /branches/prog/bwa/main.c
+
+fixed a bug in kstring
+
+------------------------------------------------------------------------
+r881 | lh3 | 2009-03-02 15:36:06 -0500 (Mon, 02 Mar 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtmisc.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.5-7
+ * fixed a bug in pac2cspac
+
+------------------------------------------------------------------------
+r880 | lh3 | 2009-03-01 16:34:08 -0500 (Sun, 01 Mar 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+
+disable debugging
+
+------------------------------------------------------------------------
+r879 | lh3 | 2009-03-01 16:28:04 -0500 (Sun, 01 Mar 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.5-6
+ * fixed problems with coordinates for color gapped alignment
+
+------------------------------------------------------------------------
+r878 | lh3 | 2009-03-01 13:43:09 -0500 (Sun, 01 Mar 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.5-5
+ * added support for gapped color alignment
+
+------------------------------------------------------------------------
+r877 | lh3 | 2009-03-01 10:27:52 -0500 (Sun, 01 Mar 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/cs2nt.c
+   M /branches/prog/bwa/main.c
+
+ * convert cs read to nt read (for ungapped alignment only)
+
+------------------------------------------------------------------------
+r860 | lh3 | 2009-02-27 08:58:39 -0500 (Fri, 27 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwase.c
+   A /branches/prog/bwa/cs2nt.c
+
+prepare to implement cs->nt conversion (have not yet...)
+
+------------------------------------------------------------------------
+r859 | lh3 | 2009-02-27 07:00:03 -0500 (Fri, 27 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/bwtmisc.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * bwa-0.4.5-3
+ * generate color index from nucleotide fasta reference
+
+------------------------------------------------------------------------
+r857 | lh3 | 2009-02-26 10:22:58 -0500 (Thu, 26 Feb 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.5-2
+ * improved mapping quality a bit if one end falls in a tandem repeat
+   but the mate is unique.
+
+------------------------------------------------------------------------
+r856 | lh3 | 2009-02-26 10:02:29 -0500 (Thu, 26 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.5-1
+ * make bwa work for SOLiD reads
+
+------------------------------------------------------------------------
+r828 | lh3 | 2009-02-18 17:36:41 -0500 (Wed, 18 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.5
+
+------------------------------------------------------------------------
+r827 | lh3 | 2009-02-18 16:48:48 -0500 (Wed, 18 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.4.4-6
+ * fixed a bug in SW alignment when no residue matches
+
+------------------------------------------------------------------------
+r824 | lh3 | 2009-02-17 05:33:07 -0500 (Tue, 17 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.4-5
+ * fixed that bounary bug
+
+------------------------------------------------------------------------
+r823 | lh3 | 2009-02-17 04:54:18 -0500 (Tue, 17 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwape.c
+
+just change some logging information
+
+------------------------------------------------------------------------
+r822 | lh3 | 2009-02-17 04:20:39 -0500 (Tue, 17 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update manual
+
+------------------------------------------------------------------------
+r821 | lh3 | 2009-02-17 04:11:14 -0500 (Tue, 17 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.4-4
+ * fixed a bug on boundary check in pair_sw
+
+------------------------------------------------------------------------
+r820 | lh3 | 2009-02-16 17:43:37 -0500 (Mon, 16 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.4-3
+ * allow to change mismatch penalty
+
+------------------------------------------------------------------------
+r819 | lh3 | 2009-02-16 17:40:28 -0500 (Mon, 16 Feb 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.4-2
+ * remove timer
+ * allow to change default gapo and gape penalty at the command line
+
+------------------------------------------------------------------------
+r818 | lh3 | 2009-02-16 09:30:51 -0500 (Mon, 16 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update benchmark
+
+------------------------------------------------------------------------
+r817 | lh3 | 2009-02-16 08:44:40 -0500 (Mon, 16 Feb 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/kvec.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.4-1
+ * automatically detect insert size
+ * use insert size in pairing. This may potentially improve accuracy (untested!)
+
+------------------------------------------------------------------------
+r814 | lh3 | 2009-02-15 11:10:23 -0500 (Sun, 15 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.4
+
+------------------------------------------------------------------------
+r813 | lh3 | 2009-02-15 10:22:50 -0500 (Sun, 15 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.3-5
+ * impose boundary check in refine_gapped
+
+------------------------------------------------------------------------
+r811 | lh3 | 2009-02-14 09:46:13 -0500 (Sat, 14 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.3-4
+ * change MD tag to match the latest SAM specification
+
+------------------------------------------------------------------------
+r810 | lh3 | 2009-02-13 04:46:04 -0500 (Fri, 13 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+
+update ChangeLog
+
+------------------------------------------------------------------------
+r799 | lh3 | 2009-02-05 12:01:17 -0500 (Thu, 05 Feb 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+change MD tag to meet the latest SAM specification
+
+------------------------------------------------------------------------
+r796 | lh3 | 2009-02-05 08:35:13 -0500 (Thu, 05 Feb 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.3-2
+ * fixed a bug on counting 'N'
+
+------------------------------------------------------------------------
+r795 | lh3 | 2009-02-05 07:41:27 -0500 (Thu, 05 Feb 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.3-1
+ * fixed potential boundary problems
+ * update benchmark result
+
+------------------------------------------------------------------------
+r791 | lh3 | 2009-01-25 05:20:47 -0500 (Sun, 25 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update some numbers
+
+------------------------------------------------------------------------
+r790 | lh3 | 2009-01-24 15:13:03 -0500 (Sat, 24 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update benchmark
+
+------------------------------------------------------------------------
+r789 | lh3 | 2009-01-22 10:18:44 -0500 (Thu, 22 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtindex.c
+
+a warning message for index
+
+------------------------------------------------------------------------
+r788 | lh3 | 2009-01-22 09:54:06 -0500 (Thu, 22 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/main.c
+
+forget to change release number
+
+------------------------------------------------------------------------
+r786 | lh3 | 2009-01-22 06:27:39 -0500 (Thu, 22 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+
+Release bwa-0.4.3
+
+------------------------------------------------------------------------
+r785 | lh3 | 2009-01-22 06:27:16 -0500 (Thu, 22 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+
+Release bwa-0.4.3
+
+------------------------------------------------------------------------
+r784 | lh3 | 2009-01-22 06:19:59 -0500 (Thu, 22 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-10
+ * update documentation
+ * fixed a bug on generating MD tags for SW alignment
+
+------------------------------------------------------------------------
+r782 | lh3 | 2009-01-19 12:08:38 -0500 (Mon, 19 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-9
+ * fixed a bug in samse -n...
+
+------------------------------------------------------------------------
+r781 | lh3 | 2009-01-19 11:26:37 -0500 (Mon, 19 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-8
+ * given -N, the previous version would stop if the top hit is a repeat. Now changed.
+
+------------------------------------------------------------------------
+r780 | lh3 | 2009-01-19 11:20:18 -0500 (Mon, 19 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-7
+ * use a bit-wise flag to replace some member variables in the option struct
+ * allow to switch off the iterative strategy
+
+------------------------------------------------------------------------
+r779 | lh3 | 2009-01-19 10:45:57 -0500 (Mon, 19 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-6
+ * allow to dump multiple hits from samse, in another format, though
+
+------------------------------------------------------------------------
+r778 | lh3 | 2009-01-19 06:24:29 -0500 (Mon, 19 Jan 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/kseq.h
+   A /branches/prog/bwa/kstring.c
+   A /branches/prog/bwa/kstring.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/simple_dp.c
+
+ * bwa-0.4.2-5
+ * update kseq.h to the latest version
+ * generate MD tag
+ * print mate coordinate if only one end is unmapped
+
+------------------------------------------------------------------------
+r775 | lh3 | 2009-01-18 05:40:35 -0500 (Sun, 18 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-4
+ * fixed a bug for SAM format
+
+------------------------------------------------------------------------
+r774 | lh3 | 2009-01-17 13:48:52 -0500 (Sat, 17 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-3
+ * change default fnr to 0.04
+ * print max_diff for valid fnr
+
+------------------------------------------------------------------------
+r773 | lh3 | 2009-01-17 05:54:37 -0500 (Sat, 17 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-2
+ * automatically choose max_diff
+
+------------------------------------------------------------------------
+r772 | lh3 | 2009-01-16 18:16:14 -0500 (Fri, 16 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.2-1
+ * take N as a mismatch
+
+------------------------------------------------------------------------
+r768 | lh3 | 2009-01-09 11:57:23 -0500 (Fri, 09 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.2
+
+------------------------------------------------------------------------
+r759 | lh3 | 2009-01-07 09:55:43 -0500 (Wed, 07 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.1
+
+------------------------------------------------------------------------
+r758 | lh3 | 2009-01-07 05:36:06 -0500 (Wed, 07 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.0-2
+ * make mate_sw fully working
+
+------------------------------------------------------------------------
+r757 | lh3 | 2009-01-06 18:04:29 -0500 (Tue, 06 Jan 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.4.0-1
+ * do SW alignment for unmapped mate. It is working.
+ * I still need to do some extra work for SW alignment, but it is too late
+   and I am getting tired... I will do tomorrow.
+
+------------------------------------------------------------------------
+r755 | lh3 | 2009-01-06 10:23:29 -0500 (Tue, 06 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.4.0
+
+------------------------------------------------------------------------
+r754 | lh3 | 2009-01-06 07:45:02 -0500 (Tue, 06 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-12
+ * better lock
+
+------------------------------------------------------------------------
+r753 | lh3 | 2009-01-06 06:17:21 -0500 (Tue, 06 Jan 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-11
+ * fixed a small memory leak in bwa_seq_close()
+ * fixed "uninitialized memory" from bwt_aln1_t
+ * multithreading for "aln" command
+
+------------------------------------------------------------------------
+r752 | lh3 | 2009-01-05 17:34:13 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   D /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwt_gen/bwt_gen.c
+   A /branches/prog/bwa/bwtmisc.c (from /branches/prog/bwa/pac2bwt.c:748)
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   D /branches/prog/bwa/pac2bwt.c
+
+ * bwa-0.3.0-10
+ * a little bit code clean up
+
+------------------------------------------------------------------------
+r751 | lh3 | 2009-01-05 17:19:04 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-9
+ * use 64-bit integer to speed up Occ calculate, although just a little bit
+
+------------------------------------------------------------------------
+r750 | lh3 | 2009-01-05 16:44:26 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-8
+ * a little bit code cleanup
+
+------------------------------------------------------------------------
+r749 | lh3 | 2009-01-05 16:37:28 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-7
+ * accelerate Occ calculation
+
+------------------------------------------------------------------------
+r748 | lh3 | 2009-01-05 16:12:28 -0500 (Mon, 05 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/pac2bwt.c
+
+ * bwa-0.3.0-6
+ * put occ table along with bwt to save another cache miss
+ * this version is already faster than the previous and I can still improve it...
+
+------------------------------------------------------------------------
+r747 | lh3 | 2009-01-05 10:16:18 -0500 (Mon, 05 Jan 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-5
+ * remove occ_major to save a cache miss; however, OCC_INTERVAL has to be
+   increased to keep the same memory. As a result, the speed is a little
+   slower in fact.
+
+------------------------------------------------------------------------
+r746 | lh3 | 2009-01-05 09:50:53 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-4
+ * added back optimization codes (it is a pain...)
+
+------------------------------------------------------------------------
+r745 | lh3 | 2009-01-05 08:23:00 -0500 (Mon, 05 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-3
+ * faster bit operations
+
+------------------------------------------------------------------------
+r744 | lh3 | 2009-01-05 05:58:46 -0500 (Mon, 05 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-2
+ * removed optimization codes again...
+ * use a new method to count the bits
+
+------------------------------------------------------------------------
+r743 | lh3 | 2009-01-04 17:18:38 -0500 (Sun, 04 Jan 2009) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.3.0-1
+ * added back the optimization codes
+ * added a new option to aln: max_entries, although this is disabled by default
+ * updated benchmark
+
+------------------------------------------------------------------------
+r742 | lh3 | 2009-01-04 07:56:12 -0500 (Sun, 04 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+add URL
+
+------------------------------------------------------------------------
+r740 | lh3 | 2009-01-04 07:39:43 -0500 (Sun, 04 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.3.0
+
+------------------------------------------------------------------------
+r739 | lh3 | 2009-01-04 06:55:06 -0500 (Sun, 04 Jan 2009) | 2 lines
+Changed paths:
+   A /branches/prog/bwa/COPYING
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/utils.c
+   M /branches/prog/bwa/utils.h
+
+added licensing information
+
+------------------------------------------------------------------------
+r738 | lh3 | 2009-01-04 06:18:25 -0500 (Sun, 04 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-31
+ * better mapping quality
+ * update benchmark
+
+------------------------------------------------------------------------
+r737 | lh3 | 2009-01-03 16:00:58 -0500 (Sat, 03 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+
+update documentation
+
+------------------------------------------------------------------------
+r736 | lh3 | 2009-01-02 10:26:38 -0500 (Fri, 02 Jan 2009) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update documentation
+
+------------------------------------------------------------------------
+r735 | lh3 | 2009-01-02 07:10:20 -0500 (Fri, 02 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-30
+ * reduce memory a little bit
+ * update documentation
+
+------------------------------------------------------------------------
+r734 | lh3 | 2009-01-01 13:45:45 -0500 (Thu, 01 Jan 2009) | 8 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-29
+ * sampe: removed -O option; changed default -o to 100000
+ * sampe: fixed a bug in calculating paired mapping quality
+ * aln: added an option to search for suboptimal hits even if the best is a repeat.
+   This option will make sampe MUCH SLOWER.
+ * sampe: set isize as zero if mapped to two different chr
+ * update manual (unfinished)
+
+------------------------------------------------------------------------
+r733 | lh3 | 2009-01-01 11:01:20 -0500 (Thu, 01 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-28
+ * fixed a bug in calculating paired mapping quality
+
+------------------------------------------------------------------------
+r732 | lh3 | 2009-01-01 09:27:46 -0500 (Thu, 01 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   A /branches/prog/bwa/khash.h (from /branches/prog/sclib/khash/khash.h:675)
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-27
+ * accelerate sampe by storing visited large intervals
+
+------------------------------------------------------------------------
+r731 | lh3 | 2009-01-01 06:51:21 -0500 (Thu, 01 Jan 2009) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-26
+ * remove the optimation codes
+
+------------------------------------------------------------------------
+r730 | lh3 | 2009-01-01 06:48:59 -0500 (Thu, 01 Jan 2009) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-25
+ * accelerate OCC calculation by ~7%. However, it seems not worth doing
+   this by complicate the codes. I will change back later.
+
+------------------------------------------------------------------------
+r729 | lh3 | 2008-12-31 16:43:56 -0500 (Wed, 31 Dec 2008) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-24
+ * change command "sai2sam_pe" to "sampe"
+ * print usage for sampe command
+ * in sampe: change default max_occ to 1000
+ * fixed a few compiling warnings in bntseq.c
+
+------------------------------------------------------------------------
+r728 | lh3 | 2008-12-27 07:14:59 -0500 (Sat, 27 Dec 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-22
+ * mating information can be printed to SAM
+
+------------------------------------------------------------------------
+r727 | lh3 | 2008-12-26 18:10:59 -0500 (Fri, 26 Dec 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-21
+ * implement pairing (still UNFINISHED)
+ * output all reads even if full of N
+
+------------------------------------------------------------------------
+r726 | lh3 | 2008-12-26 13:31:27 -0500 (Fri, 26 Dec 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/bwape.c
+   M /branches/prog/bwa/bwase.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * bwa-0.2.0-20
+ * remove "-t" from aln cmd
+ * code clean up: move some functions in bwt2fmv.c to other source files
+ * added sai2sam_pe cmd: *UNFINISHED*
+
+------------------------------------------------------------------------
+r725 | lh3 | 2008-12-26 07:04:11 -0500 (Fri, 26 Dec 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/bwase.c
+   A /branches/prog/bwa/bwaseqio.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/kseq.h
+   A /branches/prog/bwa/ksort.h (from /branches/prog/sclib/ksort/ksort.h:712)
+   A /branches/prog/bwa/kvec.h (from /branches/prog/sclib/kvec/kvec.h:537)
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-19
+ * considerable code cleanup; no actual changes
+
+------------------------------------------------------------------------
+r724 | lh3 | 2008-12-25 11:32:11 -0500 (Thu, 25 Dec 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-18
+ * generate SAM output
+
+------------------------------------------------------------------------
+r723 | lh3 | 2008-12-25 10:48:31 -0500 (Thu, 25 Dec 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * bwa-0.2.0-17
+ * remove bwtsw2 related codes
+ * separate searching for SA interval from generating alignments
+
+------------------------------------------------------------------------
+r722 | lh3 | 2008-12-25 08:57:13 -0500 (Thu, 25 Dec 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt2fmv.c
+   D /branches/prog/bwa/bwt_lite.c
+   D /branches/prog/bwa/bwt_lite.h
+   M /branches/prog/bwa/bwtgap.c
+   D /branches/prog/bwa/bwtsw2.h
+   D /branches/prog/bwa/bwtsw2_aux.c
+   D /branches/prog/bwa/bwtsw2_core.c
+   D /branches/prog/bwa/bwtsw2_main.c
+   D /branches/prog/bwa/khash.h
+   D /branches/prog/bwa/ksort.h
+   D /branches/prog/bwa/kvec.h
+   M /branches/prog/bwa/main.c
+
+ * added interface to "aln -t"
+ * remove bwtsw2 related codes
+
+------------------------------------------------------------------------
+r666 | lh3 | 2008-11-18 18:34:29 -0500 (Tue, 18 Nov 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-16
+ * allow to set max mismatches based on read length, but I do not know
+   whether this really works
+
+------------------------------------------------------------------------
+r665 | lh3 | 2008-11-18 08:34:03 -0500 (Tue, 18 Nov 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-15
+ * fixed a bug in sequence parser.
+
+------------------------------------------------------------------------
+r612 | lh3 | 2008-10-28 06:50:53 -0400 (Tue, 28 Oct 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/utils.c
+
+ * bwa-0.2.0-14
+ * fixed a bug caused by the change of the FASTA/Q parser
+
+------------------------------------------------------------------------
+r611 | lh3 | 2008-10-28 06:24:56 -0400 (Tue, 28 Oct 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   A /branches/prog/bwa/kseq.h
+   D /branches/prog/bwa/seq.c
+   D /branches/prog/bwa/seq.h
+   M /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/utils.c
+   M /branches/prog/bwa/utils.h
+
+replace seq.* with kseq.h
+
+------------------------------------------------------------------------
+r610 | lh3 | 2008-10-27 13:00:04 -0400 (Mon, 27 Oct 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-13
+ * make bwtsw2 output sub-optimal hits. not completed
+
+------------------------------------------------------------------------
+r609 | lh3 | 2008-10-24 16:52:00 -0400 (Fri, 24 Oct 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/kvec.h
+
+little...
+
+------------------------------------------------------------------------
+r532 | lh3 | 2008-09-19 05:28:45 -0400 (Fri, 19 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/khash.h
+
+improve interface of khash
+
+------------------------------------------------------------------------
+r531 | lh3 | 2008-09-18 06:52:59 -0400 (Thu, 18 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+improve minor things, which make bwtsw2 slower, but should miss less true hits
+
+------------------------------------------------------------------------
+r530 | lh3 | 2008-09-17 18:19:26 -0400 (Wed, 17 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * fixed a bug in calculating ->D
+ * enforce band-width checking
+
+------------------------------------------------------------------------
+r529 | lh3 | 2008-09-17 18:06:49 -0400 (Wed, 17 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+delete a line of code that is never visited
+
+------------------------------------------------------------------------
+r528 | lh3 | 2008-09-17 17:58:51 -0400 (Wed, 17 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+a bit code clean up
+
+------------------------------------------------------------------------
+r527 | lh3 | 2008-09-17 10:55:45 -0400 (Wed, 17 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-12
+ * max-depth can be set, although it does not help the speed at all
+
+------------------------------------------------------------------------
+r526 | lh3 | 2008-09-16 17:59:36 -0400 (Tue, 16 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+cut_tail after remove duplicate
+
+------------------------------------------------------------------------
+r525 | lh3 | 2008-09-16 17:56:11 -0400 (Tue, 16 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/khash.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-11
+ * improved cut_tail()
+
+------------------------------------------------------------------------
+r524 | lh3 | 2008-09-15 16:53:22 -0400 (Mon, 15 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-10
+ * fixed a bug in cut_tail()
+
+------------------------------------------------------------------------
+r518 | lh3 | 2008-09-15 04:35:59 -0400 (Mon, 15 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+a bit code clean up
+
+------------------------------------------------------------------------
+r517 | lh3 | 2008-09-14 18:18:11 -0400 (Sun, 14 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+improve speed (<1%)
+
+------------------------------------------------------------------------
+r516 | lh3 | 2008-09-14 18:08:55 -0400 (Sun, 14 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * fixed two potential bugs, although I have not seen their effects
+ * improve speed a bit (<2%)
+
+------------------------------------------------------------------------
+r515 | lh3 | 2008-09-14 17:26:49 -0400 (Sun, 14 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+
+nothing, really
+
+------------------------------------------------------------------------
+r514 | lh3 | 2008-09-14 17:10:13 -0400 (Sun, 14 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+disable X-drop, which has to be reimplemented in the current algorithm
+
+------------------------------------------------------------------------
+r513 | lh3 | 2008-09-14 16:49:42 -0400 (Sun, 14 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt_lite.c
+   M /branches/prog/bwa/bwt_lite.h
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+
+ * temporarily disable cut_tail()
+ * calculate SA in bwt_lite.c
+ * fixed a bug in reversing the sequence
+
+------------------------------------------------------------------------
+r512 | lh3 | 2008-09-13 17:35:40 -0400 (Sat, 13 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   A /branches/prog/bwa/ksort.h
+
+n-best method
+
+------------------------------------------------------------------------
+r507 | lh3 | 2008-09-13 09:06:54 -0400 (Sat, 13 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_core.c
+
+give correct result again
+
+------------------------------------------------------------------------
+r506 | lh3 | 2008-09-13 08:12:07 -0400 (Sat, 13 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+I think I know the reason. It needs more work...
+
+------------------------------------------------------------------------
+r505 | lh3 | 2008-09-13 06:20:43 -0400 (Sat, 13 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_core.c
+
+fixed another bug, but still have
+
+------------------------------------------------------------------------
+r504 | lh3 | 2008-09-12 18:13:37 -0400 (Fri, 12 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+fixed another bug
+
+------------------------------------------------------------------------
+r503 | lh3 | 2008-09-12 17:15:56 -0400 (Fri, 12 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/khash.h
+
+ * do not segfault, but the result is WRONG!
+ * prepare to remove bsw2_connectivity_check()
+
+------------------------------------------------------------------------
+r502 | lh3 | 2008-09-12 15:52:41 -0400 (Fri, 12 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/kvec.h
+
+more revisions
+
+------------------------------------------------------------------------
+r501 | lh3 | 2008-09-11 18:06:15 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+further simply codes with kvec.h
+
+------------------------------------------------------------------------
+r500 | lh3 | 2008-09-11 17:42:15 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+part of revisions... have not finished
+
+------------------------------------------------------------------------
+r499 | lh3 | 2008-09-11 17:24:15 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/khash.h
+   A /branches/prog/bwa/kvec.h
+
+prepare for abrupt change
+
+------------------------------------------------------------------------
+r496 | lh3 | 2008-09-11 10:34:38 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+fixed a bug; now "bwtsw2 -d" is useless
+
+------------------------------------------------------------------------
+r495 | lh3 | 2008-09-11 09:22:03 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+improve speed a little bit
+
+------------------------------------------------------------------------
+r494 | lh3 | 2008-09-11 08:28:08 -0400 (Thu, 11 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+remove debug codes
+
+------------------------------------------------------------------------
+r493 | lh3 | 2008-09-11 07:49:53 -0400 (Thu, 11 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * improve the speed a little bit (<5%)
+ * prepare to remove BSW_DEBUG
+
+------------------------------------------------------------------------
+r492 | lh3 | 2008-09-11 06:15:56 -0400 (Thu, 11 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-9
+ * support reverse strand
+ * fixed a bug that causes missing hits
+
+------------------------------------------------------------------------
+r491 | lh3 | 2008-09-11 05:46:16 -0400 (Thu, 11 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-8
+ * better progress report
+
+------------------------------------------------------------------------
+r490 | lh3 | 2008-09-10 17:04:49 -0400 (Wed, 10 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-7
+ * avoid some missing hits
+ * add maximum depth
+
+------------------------------------------------------------------------
+r489 | lh3 | 2008-09-10 11:51:13 -0400 (Wed, 10 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-6
+ * bwtsw2 works although on the forward strand only for now
+ * better progress information
+
+------------------------------------------------------------------------
+r488 | lh3 | 2008-09-10 10:21:53 -0400 (Wed, 10 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * implement memory pool
+ * avoid some rehashing
+
+------------------------------------------------------------------------
+r487 | lh3 | 2008-09-10 09:23:38 -0400 (Wed, 10 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_main.c
+
+ * fixed a memory leak
+ * prepare to implement mempool
+
+------------------------------------------------------------------------
+r486 | lh3 | 2008-09-10 09:10:09 -0400 (Wed, 10 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/khash.h
+
+ * add X-dropoff
+ * remove duplicated results
+ * switch to simple stack
+
+------------------------------------------------------------------------
+r485 | lh3 | 2008-09-10 06:31:20 -0400 (Wed, 10 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+
+ * check whether t-node has been visited
+ * prepare to remove two-level stack
+
+------------------------------------------------------------------------
+r484 | lh3 | 2008-09-10 05:00:57 -0400 (Wed, 10 Sep 2008) | 2 lines
+Changed paths:
+   A /branches/prog/bwa/khash.h
+
+khash library
+
+------------------------------------------------------------------------
+r483 | lh3 | 2008-09-10 04:22:53 -0400 (Wed, 10 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+add inline
+
+------------------------------------------------------------------------
+r482 | lh3 | 2008-09-09 16:34:57 -0400 (Tue, 09 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+
+improve speed
+
+------------------------------------------------------------------------
+r481 | lh3 | 2008-09-09 13:13:00 -0400 (Tue, 09 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2_core.c
+
+Use a 128bit hash table to keep all (tk,tl,qk,ql). This is slow. Just
+keep a copy in case I may need this in future.
+
+
+------------------------------------------------------------------------
+r480 | lh3 | 2008-09-09 12:53:32 -0400 (Tue, 09 Sep 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * no principal modification
+
+------------------------------------------------------------------------
+r479 | lh3 | 2008-09-09 11:01:45 -0400 (Tue, 09 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2_core.c
+
+ * fixed a bug which may cause duplicated matching
+ * accelerate the speed a bit, although using hash in avoiding duplications
+   slows the speed down in the end
+
+------------------------------------------------------------------------
+r474 | lh3 | 2008-09-03 17:22:57 -0400 (Wed, 03 Sep 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtsw2.h
+   M /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-5
+ * indel seems to work on toy example
+ * add band
+
+------------------------------------------------------------------------
+r469 | lh3 | 2008-09-01 09:18:45 -0400 (Mon, 01 Sep 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt_lite.c
+   M /branches/prog/bwa/bwt_lite.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtsw2.h
+   A /branches/prog/bwa/bwtsw2_aux.c
+   M /branches/prog/bwa/bwtsw2_core.c
+   M /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/is.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/simple_dp.c
+
+ * bwa-0.2.0-4
+ * updated bwtsw2, which seems to work properly on toy examples
+
+------------------------------------------------------------------------
+r447 | lh3 | 2008-08-27 10:05:09 -0400 (Wed, 27 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-3
+ * tune for longer gaps, but it does not really work with kilo-bp gaps...
+
+------------------------------------------------------------------------
+r446 | lh3 | 2008-08-26 13:30:41 -0400 (Tue, 26 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-2
+ * changed the way to extend long deletions. Now use max_del_occ.
+
+------------------------------------------------------------------------
+r445 | lh3 | 2008-08-26 13:05:58 -0400 (Tue, 26 Aug 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt_lite.c
+   M /branches/prog/bwa/bwt_lite.h
+
+updated from bwtsw2_lite
+
+------------------------------------------------------------------------
+r436 | lh3 | 2008-08-23 12:28:44 -0400 (Sat, 23 Aug 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.h
+   A /branches/prog/bwa/bwt_lite.c
+   A /branches/prog/bwa/bwt_lite.h
+   A /branches/prog/bwa/bwtsw2.h
+   A /branches/prog/bwa/bwtsw2_core.c
+   A /branches/prog/bwa/bwtsw2_main.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.2.0-1
+ * add bwt_lite: a light-weighted version of bwt (NOT TESTED!)
+ * add core codes for bwtsw2: NOT TESTED!!!
+
+------------------------------------------------------------------------
+r427 | lh3 | 2008-08-15 05:38:12 -0400 (Fri, 15 Aug 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.2.0
+
+------------------------------------------------------------------------
+r426 | lh3 | 2008-08-14 11:26:19 -0400 (Thu, 14 Aug 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.6-7
+ * change default seed length to 31
+ * add incomplete support to color sequences (not tested yet!)
+
+------------------------------------------------------------------------
+r425 | lh3 | 2008-08-14 06:23:11 -0400 (Thu, 14 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.6-6
+ * change default seed length to 33bp
+
+------------------------------------------------------------------------
+r424 | lh3 | 2008-08-14 05:55:33 -0400 (Thu, 14 Aug 2008) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.6-5
+ * fixed a bug that may miss true alignments. this bugs exists in most
+   early versions.
+ * fixed a bug that yields wrong coordinates for reads mapped on the forward
+   strands with gaps.
+
+------------------------------------------------------------------------
+r423 | lh3 | 2008-08-14 04:07:28 -0400 (Thu, 14 Aug 2008) | 2 lines
+Changed paths:
+   D /branches/prog/bwa/Makefile.div
+
+useless
+
+------------------------------------------------------------------------
+r422 | lh3 | 2008-08-13 19:21:14 -0400 (Wed, 13 Aug 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.6-4
+ * fixed one bug
+ * there is another one...
+
+------------------------------------------------------------------------
+r421 | lh3 | 2008-08-13 18:23:33 -0400 (Wed, 13 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.6-3
+ * almost there, but not quite right
+
+------------------------------------------------------------------------
+r419 | lh3 | 2008-08-13 17:27:02 -0400 (Wed, 13 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * improve the seeding method
+ * prepare to load two BWTs into memory. A BIG change!
+
+------------------------------------------------------------------------
+r418 | lh3 | 2008-08-13 10:56:54 -0400 (Wed, 13 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * added seeding
+ * unfinished yet
+
+------------------------------------------------------------------------
+r413 | lh3 | 2008-08-08 11:48:35 -0400 (Fri, 08 Aug 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.6
+
+------------------------------------------------------------------------
+r410 | lh3 | 2008-08-06 15:48:22 -0400 (Wed, 06 Aug 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/simple_dp.c
+
+sw: output alignment score
+
+------------------------------------------------------------------------
+r407 | lh3 | 2008-08-04 10:01:20 -0400 (Mon, 04 Aug 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   A /branches/prog/bwa/simple_dp.c
+   M /branches/prog/bwa/stdaln.c
+   M /branches/prog/bwa/stdaln.h
+
+ * bwa-0.1.5-3
+ * added a simple interface to SW/NW alignment
+ * stdaln-0.9.8 (see header for more details)
+
+------------------------------------------------------------------------
+r406 | lh3 | 2008-08-01 19:21:59 -0400 (Fri, 01 Aug 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   A /branches/prog/bwa/stdaln.c
+   A /branches/prog/bwa/stdaln.h
+
+ * bwa-0.1.5-2
+ * give accurate gap positions
+
+------------------------------------------------------------------------
+r405 | lh3 | 2008-08-01 19:06:19 -0400 (Fri, 01 Aug 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+
+unfinished, but I am tired...
+
+------------------------------------------------------------------------
+r401 | lh3 | 2008-07-30 05:59:24 -0400 (Wed, 30 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.5-1
+ * fixed a potential bug which may produce an alignment in N regions,
+   although extremely rare.
+
+------------------------------------------------------------------------
+r399 | lh3 | 2008-07-27 11:41:52 -0400 (Sun, 27 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.5
+
+------------------------------------------------------------------------
+r398 | lh3 | 2008-07-25 12:14:47 -0400 (Fri, 25 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update documentation
+
+------------------------------------------------------------------------
+r397 | lh3 | 2008-07-25 09:58:56 -0400 (Fri, 25 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * 
+
+------------------------------------------------------------------------
+r396 | lh3 | 2008-07-25 06:42:01 -0400 (Fri, 25 Jul 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.4-4
+ * add timer for debugging
+
+------------------------------------------------------------------------
+r395 | lh3 | 2008-07-24 05:46:21 -0400 (Thu, 24 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.4-3
+ * fixed a bug in the previous code
+ * this version gives identical result to bwa-0.1.4, just 10% faster
+
+------------------------------------------------------------------------
+r394 | lh3 | 2008-07-24 05:18:53 -0400 (Thu, 24 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.4-2
+ * further improve the speed
+ * The result is slightly different from bwa-0.1.4 now. I need to check...
+
+------------------------------------------------------------------------
+r393 | lh3 | 2008-07-23 12:04:16 -0400 (Wed, 23 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+
+comments only
+
+------------------------------------------------------------------------
+r392 | lh3 | 2008-07-23 10:34:03 -0400 (Wed, 23 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+further improve the speed in Occ functions
+
+------------------------------------------------------------------------
+r386 | lh3 | 2008-07-22 10:03:54 -0400 (Tue, 22 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.4
+
+------------------------------------------------------------------------
+r385 | lh3 | 2008-07-22 09:44:50 -0400 (Tue, 22 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+
+update documentation and ChangeLog
+
+------------------------------------------------------------------------
+r384 | lh3 | 2008-07-22 08:50:03 -0400 (Tue, 22 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.3-2
+ * fixed the bug in the last modification
+ * now the alignment should be more clearly defined
+
+------------------------------------------------------------------------
+r383 | lh3 | 2008-07-21 18:32:21 -0400 (Mon, 21 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.3-1
+ * this is a buggy verion!
+ * i will fix the bug tomorrow. It is late...
+
+------------------------------------------------------------------------
+r381 | lh3 | 2008-07-21 06:45:32 -0400 (Mon, 21 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.3
+
+------------------------------------------------------------------------
+r380 | lh3 | 2008-07-21 06:07:43 -0400 (Mon, 21 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.2-3
+ * improve the speed for gcc on Intel Mac OS X, but not really on icc on Linux
+ * aln: more command-line options
+
+------------------------------------------------------------------------
+r373 | lh3 | 2008-07-17 09:09:46 -0400 (Thu, 17 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.2-2
+ * further improve the speed
+ * this version gives exactly the same result as bwa-0.1.2
+
+------------------------------------------------------------------------
+r372 | lh3 | 2008-07-17 07:51:08 -0400 (Thu, 17 Jul 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.2-1
+ * speed up by about 5%
+
+------------------------------------------------------------------------
+r370 | lh3 | 2008-07-17 05:12:00 -0400 (Thu, 17 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.2
+
+------------------------------------------------------------------------
+r368 | lh3 | 2008-07-16 08:51:25 -0400 (Wed, 16 Jul 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   D /branches/prog/bwa/bwt1away.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   D /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-9
+ * some code cleanup
+ * remove 1away and top2
+
+------------------------------------------------------------------------
+r367 | lh3 | 2008-07-16 08:24:34 -0400 (Wed, 16 Jul 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/is.c
+
+Yuta Mori's implementation of IS algorithm.
+
+------------------------------------------------------------------------
+r365 | lh3 | 2008-07-16 06:58:04 -0400 (Wed, 16 Jul 2008) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-8
+ * improve gapped alignment
+ * this version will miss more gapped alignments, but the speed is much faster
+ * prepare to remove top2 and 1away algorithms
+ * prepare to add SAIS algorithm for bwt construction
+
+------------------------------------------------------------------------
+r358 | lh3 | 2008-06-09 06:03:04 -0400 (Mon, 09 Jun 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-7
+ * change END_SKIP from 3 to 5, but still gaps may be wrongly added
+ * change default '-g' from 5 to 3
+
+------------------------------------------------------------------------
+r357 | lh3 | 2008-06-09 05:18:36 -0400 (Mon, 09 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-6
+ * fix a bug in nested stack
+
+------------------------------------------------------------------------
+r356 | lh3 | 2008-06-08 18:43:13 -0400 (Sun, 08 Jun 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   A /branches/prog/bwa/bwtgap.h
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-5
+ * replace heap with nested stacks
+ * there are still obvious bugs...
+
+------------------------------------------------------------------------
+r355 | lh3 | 2008-06-08 17:13:44 -0400 (Sun, 08 Jun 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * bwa-0.1.1-4
+ * add interface to affine gap alignment
+ * there are obvious bugs and I will fix them later
+
+------------------------------------------------------------------------
+r354 | lh3 | 2008-06-08 15:39:05 -0400 (Sun, 08 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-3
+ * affine gap seems to work, at least partially
+
+------------------------------------------------------------------------
+r353 | lh3 | 2008-06-08 09:27:18 -0400 (Sun, 08 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   A /branches/prog/bwa/bwtgap.c
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-2
+ * initial gapped alignment. not work at the moment
+
+------------------------------------------------------------------------
+r352 | lh3 | 2008-06-06 04:37:34 -0400 (Fri, 06 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.1-1
+ * ungap: remove a useless varible in top2_entry_t
+
+------------------------------------------------------------------------
+r348 | lh3 | 2008-06-03 09:04:12 -0400 (Tue, 03 Jun 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/ChangeLog
+   A /branches/prog/bwa/NEWS
+   M /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/main.c
+
+Release bwa-0.1.1
+
+------------------------------------------------------------------------
+r347 | lh3 | 2008-06-03 05:45:08 -0400 (Tue, 03 Jun 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwa.1
+
+update documentation
+
+------------------------------------------------------------------------
+r346 | lh3 | 2008-06-02 18:59:50 -0400 (Mon, 02 Jun 2008) | 5 lines
+Changed paths:
+   A /branches/prog/bwa/ChangeLog
+   A /branches/prog/bwa/bwa.1
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-11
+ * improve approximating mapping qualities
+ * add documentation
+ * add ChangeLog
+
+------------------------------------------------------------------------
+r345 | lh3 | 2008-06-02 16:04:39 -0400 (Mon, 02 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-10
+ * output a random position for repetitive reads
+
+------------------------------------------------------------------------
+r344 | lh3 | 2008-06-02 15:03:54 -0400 (Mon, 02 Jun 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/pac2bwt.c
+
+ * bwa-0.1.0-9
+ * fix memory leaks
+ * fix a potential bug in coverting to the real coordinate
+
+------------------------------------------------------------------------
+r343 | lh3 | 2008-06-02 13:44:51 -0400 (Mon, 02 Jun 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile.div
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-8
+ * fix a bug about strand
+ * update Makefile.div
+ * change top2b as the default method
+
+------------------------------------------------------------------------
+r342 | lh3 | 2008-06-02 11:23:26 -0400 (Mon, 02 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt1away.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-7
+ * use bwt_2occ() and bwt_2occ4() in other functions
+
+------------------------------------------------------------------------
+r341 | lh3 | 2008-06-02 09:31:39 -0400 (Mon, 02 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-6
+ * fix a bug for missing hits
+
+------------------------------------------------------------------------
+r340 | lh3 | 2008-06-02 09:10:18 -0400 (Mon, 02 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-5
+ * accelerate comparisons in heap, a bit
+
+------------------------------------------------------------------------
+r339 | lh3 | 2008-06-02 08:41:31 -0400 (Mon, 02 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-4
+ * avoid marginal repeated calculation in occ
+
+------------------------------------------------------------------------
+r338 | lh3 | 2008-06-02 06:46:51 -0400 (Mon, 02 Jun 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-3
+ * fix a bug caused by previours change
+ * fix a bug in heap
+ * order the heap by more criteria
+
+------------------------------------------------------------------------
+r337 | lh3 | 2008-06-01 19:11:15 -0400 (Sun, 01 Jun 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+
+ * bwa-0.1.0-2
+ * also sort sa range in heapsort, in attempt to improve cache performance.
+   Unfortunately, it does not work well at all.
+
+------------------------------------------------------------------------
+r336 | lh3 | 2008-06-01 17:45:23 -0400 (Sun, 01 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/Makefile.div
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/main.c
+
+ * 0.1.0-1
+ * fix a bug in calculating the real coordinate
+
+------------------------------------------------------------------------
+r335 | lh3 | 2008-06-01 16:03:09 -0400 (Sun, 01 Jun 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+
+nothing, really
+
+------------------------------------------------------------------------
+r334 | lh3 | 2008-06-01 15:59:13 -0400 (Sun, 01 Jun 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/Makefile.div
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/pac2bwt.c
+
+use IS algorithm by default
+
+------------------------------------------------------------------------
+r333 | lh3 | 2008-06-01 15:05:15 -0400 (Sun, 01 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/is.c
+   M /branches/prog/bwa/pac2bwt.c
+
+ * a bit code clean up in is.c
+ * add IS algorithm for constructing BWT, albeit slower
+
+------------------------------------------------------------------------
+r332 | lh3 | 2008-06-01 13:23:08 -0400 (Sun, 01 Jun 2008) | 2 lines
+Changed paths:
+   A /branches/prog/bwa/is.c
+
+IS linear-time algorithm for constructing SA/BWT
+
+------------------------------------------------------------------------
+r331 | lh3 | 2008-06-01 10:35:26 -0400 (Sun, 01 Jun 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bntseq.c
+   A /branches/prog/bwa/bwtindex.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * fix a bug in generating .pac
+ * index in one go
+
+------------------------------------------------------------------------
+r330 | lh3 | 2008-06-01 09:17:05 -0400 (Sun, 01 Jun 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwttop2.c
+
+real coordinates can be ouput
+
+------------------------------------------------------------------------
+r329 | lh3 | 2008-05-31 19:21:02 -0400 (Sat, 31 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwttop2.c
+
+add top2e which is similar to 1away
+
+------------------------------------------------------------------------
+r328 | lh3 | 2008-05-31 18:46:12 -0400 (Sat, 31 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwttop2.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * unified cmd-line interface for ungapped alignment
+ * add two alternatives to top2 algorithm
+
+------------------------------------------------------------------------
+r327 | lh3 | 2008-05-31 18:14:46 -0400 (Sat, 31 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+add cmd-line interface to alntop2
+
+------------------------------------------------------------------------
+r326 | lh3 | 2008-05-31 17:59:31 -0400 (Sat, 31 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt1away.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   A /branches/prog/bwa/bwttop2.c
+
+top2 algorithm seems to work. I need to change interface, though
+
+------------------------------------------------------------------------
+r325 | lh3 | 2008-05-31 15:11:49 -0400 (Sat, 31 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt1away.c
+
+change the variable in the structure
+
+------------------------------------------------------------------------
+r324 | lh3 | 2008-05-31 14:52:13 -0400 (Sat, 31 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt1away.c
+
+set a slightly better bound on the maximum allowed mismatches
+
+------------------------------------------------------------------------
+r323 | lh3 | 2008-05-30 18:40:21 -0400 (Fri, 30 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+
+ * output time statistics
+
+------------------------------------------------------------------------
+r322 | lh3 | 2008-05-30 17:58:25 -0400 (Fri, 30 May 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   A /branches/prog/bwa/bwt1away.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+
+ * presumably better way to make use of prefix. But for the moment I do
+   not know whether it is correct or not.
+ * a bit code clean up: separate alignment part
+
+------------------------------------------------------------------------
+r321 | lh3 | 2008-05-30 13:57:43 -0400 (Fri, 30 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt_gen/Makefile
+   M /branches/prog/bwa/bwt_gen/bwt_gen.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/pac2bwt.c
+
+ * a bit code clean up
+ * put bwt_gen in bwa
+
+------------------------------------------------------------------------
+r320 | lh3 | 2008-05-30 11:40:11 -0400 (Fri, 30 May 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtio.c
+
+ * improve cmd-line interface
+ * fix a bug in loading .sa
+ * change default sa interval to 32
+
+------------------------------------------------------------------------
+r319 | lh3 | 2008-05-30 10:31:37 -0400 (Fri, 30 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwtaln.c
+
+ * fix memory leak (I know that. Just a bit lazy)
+ * change to another method to do 1-away alignment
+
+------------------------------------------------------------------------
+r318 | lh3 | 2008-05-30 09:21:49 -0400 (Fri, 30 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+best unique match is partially finished
+
+------------------------------------------------------------------------
+r317 | lh3 | 2008-05-30 06:33:28 -0400 (Fri, 30 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+remove "ungapped" command and related codes
+
+------------------------------------------------------------------------
+r316 | lh3 | 2008-05-30 06:05:20 -0400 (Fri, 30 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+
+change variable name thick to width
+
+------------------------------------------------------------------------
+r315 | lh3 | 2008-05-29 19:06:13 -0400 (Thu, 29 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/pac2bwt.c
+
+revised algorithm for ungapped alignment. the old one can still be used.
+
+------------------------------------------------------------------------
+r314 | lh3 | 2008-05-29 16:36:11 -0400 (Thu, 29 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwt_gen/bwt_gen.c
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/pac2bwt.c
+
+ * make commands more independent, but ungapped does not work at the moment
+
+------------------------------------------------------------------------
+r313 | lh3 | 2008-05-29 15:56:14 -0400 (Thu, 29 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt_gen/bwt_gen.c
+
+little...
+
+------------------------------------------------------------------------
+r312 | lh3 | 2008-05-29 15:54:01 -0400 (Thu, 29 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt_gen/bwt_gen.c
+   M /branches/prog/bwa/bwt_gen/bwt_gen.h
+
+ * add CopyRight information from the original codes
+ * do not dump .fmv files
+
+------------------------------------------------------------------------
+r311 | lh3 | 2008-05-29 15:44:36 -0400 (Thu, 29 May 2008) | 2 lines
+Changed paths:
+   A /branches/prog/bwa/bwt_gen
+   A /branches/prog/bwa/bwt_gen/Makefile
+   A /branches/prog/bwa/bwt_gen/QSufSort.c
+   A /branches/prog/bwa/bwt_gen/QSufSort.h
+   A /branches/prog/bwa/bwt_gen/bwt_gen.c
+   A /branches/prog/bwa/bwt_gen/bwt_gen.h
+
+codes from BWT-SW, for building BWT from packed file
+
+------------------------------------------------------------------------
+r310 | lh3 | 2008-05-28 17:03:35 -0400 (Wed, 28 May 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * change OCC_INTERVAL to 0x40, which makes bwa twice as fast.
+ * write Occ file as ".occ" as it is using a different interval from
+   .fmv, the BWT-SW correspondance of .occ
+
+------------------------------------------------------------------------
+r309 | lh3 | 2008-05-28 11:39:37 -0400 (Wed, 28 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+
+fix a bug
+
+------------------------------------------------------------------------
+r308 | lh3 | 2008-05-28 09:56:16 -0400 (Wed, 28 May 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+
+add heuristics to improve the speed, but I have not tested whether the
+results are correct or not.
+
+
+------------------------------------------------------------------------
+r307 | lh3 | 2008-05-28 06:31:34 -0400 (Wed, 28 May 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/bwtaln.c
+   M /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * make ungapped alignment basically works...
+ * but it is very slow in comparison to others...
+ * also I need to improve the interface...
+ * a lot of things to keep me busy today...
+
+------------------------------------------------------------------------
+r306 | lh3 | 2008-05-27 18:41:27 -0400 (Tue, 27 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwtaln.c
+
+ * remove recursion
+ * fixed a bug in bwt_occ()
+
+------------------------------------------------------------------------
+r305 | lh3 | 2008-05-27 16:59:44 -0400 (Tue, 27 May 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwtaln.c
+
+ * bwa now tells whether a sequenced can be mapped with maximum allowed
+   mismatches. ONLY ungapped.
+ * this is a recursive version. I will remove recursion later.
+
+
+------------------------------------------------------------------------
+r304 | lh3 | 2008-05-27 09:12:17 -0400 (Tue, 27 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   A /branches/prog/bwa/bwtaln.c
+   A /branches/prog/bwa/bwtaln.h
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/utils.c
+
+ * load .sa and .fmv files
+ * exact alignment now works
+
+------------------------------------------------------------------------
+r303 | lh3 | 2008-05-27 06:33:38 -0400 (Tue, 27 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/utils.c
+   M /branches/prog/bwa/utils.h
+
+add xassert and fix a bug
+
+------------------------------------------------------------------------
+r302 | lh3 | 2008-05-27 06:23:20 -0400 (Tue, 27 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwtio.c
+   A /branches/prog/bwa/utils.c
+   A /branches/prog/bwa/utils.h
+
+improve error message and error handling
+
+------------------------------------------------------------------------
+r301 | lh3 | 2008-05-27 05:37:51 -0400 (Tue, 27 May 2008) | 4 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+   A /branches/prog/bwa/bwtio.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+
+ * move I/O codes to bwtio.c
+ * SA can be dumped and interestingly, it is identical to BWTSW
+ * now, .fmv is still different from BWTSW
+
+------------------------------------------------------------------------
+r299 | lh3 | 2008-05-26 18:07:44 -0400 (Mon, 26 May 2008) | 2 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+
+generate/retrieve SA and Occ
+
+------------------------------------------------------------------------
+r298 | lh3 | 2008-05-26 13:16:49 -0400 (Mon, 26 May 2008) | 3 lines
+Changed paths:
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/bwt.c
+   M /branches/prog/bwa/bwt.h
+   M /branches/prog/bwa/bwt2fmv.c
+
+ * retrieve occ value at any position
+ * move bwt_cal_occ() to bwt.c
+
+------------------------------------------------------------------------
+r297 | lh3 | 2008-05-25 17:43:58 -0400 (Sun, 25 May 2008) | 6 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   A /branches/prog/bwa/bwt.c
+   A /branches/prog/bwa/bwt.h
+   A /branches/prog/bwa/bwt2fmv.c
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   M /branches/prog/bwa/pac2bwt.c
+
+ * add bwt2fmv. It works to some extend. However, I do not understand
+   the purpose of some weird codes in BWT-SW. As a consequence, bwt2fmv
+   could generate a file almost identical, but not exactly identical, to
+   the .fmv file from BWT-SW.
+
+
+------------------------------------------------------------------------
+r296 | lh3 | 2008-05-24 18:35:02 -0400 (Sat, 24 May 2008) | 5 lines
+Changed paths:
+   M /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bntseq.c
+   M /branches/prog/bwa/bntseq.h
+   M /branches/prog/bwa/main.c
+   M /branches/prog/bwa/main.h
+   A /branches/prog/bwa/pac2bwt.c
+
+Burrows-Wheeler Transform now works. At least on one example, the
+current code generates the same BWT as BWT-SW. Kind of magical, I would
+say. :)
+
+
+------------------------------------------------------------------------
+r295 | lh3 | 2008-05-24 11:25:31 -0400 (Sat, 24 May 2008) | 3 lines
+Changed paths:
+   A /branches/prog/bwa/Makefile
+   M /branches/prog/bwa/bntseq.c
+   A /branches/prog/bwa/main.c
+   A /branches/prog/bwa/main.h
+
+ * add Makefile and main.*
+ * improve interface to fa2bns, a bit
+
+------------------------------------------------------------------------
+r293 | lh3 | 2008-05-24 10:57:03 -0400 (Sat, 24 May 2008) | 3 lines
+Changed paths:
+   A /branches/prog/bwa
+   A /branches/prog/bwa/bntseq.c
+   A /branches/prog/bwa/bntseq.h
+   A /branches/prog/bwa/seq.c
+   A /branches/prog/bwa/seq.h
+
+ * Burrow-Wheeler Alignment
+ * initial codes
+
+------------------------------------------------------------------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/Makefile	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,49 @@
+CC=			gcc
+CXX=		g++
+CFLAGS=		-g -Wall -O2
+CXXFLAGS=	$(CFLAGS)
+AR=			ar
+DFLAGS=		-DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
+LOBJS=		bwa.o bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o stdaln.o \
+			bwaseqio.o bwase.o kstring.o
+AOBJS=		QSufSort.o bwt_gen.o \
+			is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
+			bwape.o cs2nt.o \
+			bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
+			bwtsw2_chain.o fastmap.o bwtsw2_pair.o
+PROG=		bwa
+INCLUDES=	
+LIBS=		-lm -lz -lpthread
+SUBDIRS=	.
+
+.SUFFIXES:.c .o .cc
+
+.c.o:
+		$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
+.cc.o:
+		$(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
+
+all:$(PROG)
+
+bwa:libbwa.a $(AOBJS) main.o
+		$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
+
+libbwa.a:$(LOBJS)
+		$(AR) -csru $@ $(LOBJS)
+
+bwa.o:bwa.h
+
+QSufSort.o:QSufSort.h
+
+bwt.o:bwt.h
+bwtio.o:bwt.h
+bwtaln.o:bwt.h bwtaln.h kseq.h
+bntseq.o:bntseq.h
+bwtgap.o:bwtgap.h bwtaln.h bwt.h
+
+bwtsw2_core.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
+bwtsw2_aux.o:bwtsw2.h bwt.h bwt_lite.h stdaln.h
+bwtsw2_main.o:bwtsw2.h
+
+clean:
+		rm -f gmon.out *.o a.out $(PROG) *~ *.a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/NEWS	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,658 @@
+Release 0.6.2 (19 June, 2012)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is largely a bug-fix release. Notable changes in BWA-short and BWA-SW:
+
+ * Bugfix: BWA-SW may give bad alignments due to incorrect band width.
+
+ * Bugfix: A segmentation fault due to an out-of-boundary error. The fix is a
+   temporary solution. The real cause has not been identified.
+
+ * Attempt to read index from prefix.64.bwt, such that the 32-bit and 64-bit
+   index can coexist.
+
+ * Added options '-I' and '-S' to control BWA-SW pairing.
+
+(0.6.2: 19 June 2012, r126)
+
+
+
+Release 0.6.1 (28 November, 2011)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes to BWA-short:
+
+ * Bugfix: duplicated alternative hits in the XA tag.
+
+ * Bugfix: when trimming enabled, bwa-aln trims 1bp less.
+
+ * Disabled the color-space alignment. 0.6.x is not working with SOLiD reads at
+   present.
+
+Notable changes to BWA-SW:
+
+ * Bugfix: segfault due to excessive ambiguous bases.
+
+ * Bugfix: incorrect mate position in the SE mode.
+
+ * Bugfix: rare segfault in the PE mode
+
+ * When macro _NO_SSE2 is in use, fall back to the standard Smith-Waterman
+   instead of SSE2-SW.
+
+ * Optionally mark split hits with lower alignment scores as secondary.
+
+Changes to fastmap:
+
+ * Bugfix: infinite loop caused by ambiguous bases.
+
+ * Optionally output the query sequence.
+
+(0.6.1: 28 November 2011, r104)
+
+
+
+Release 0.5.10 and 0.6.0 (12 November, 2011)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 0.6.0 release comes with two major changes. Firstly, the index data
+structure has been changed to support genomes longer than 4GB. The forward and
+reverse backward genome is now integrated in one index. This change speeds up
+BWA-short by about 20% and BWA-SW by 90% with the mapping acccuracy largely
+unchanged. A tradeoff is BWA requires more memory, but this is the price almost
+all mappers that index the genome have to pay.
+
+Secondly, BWA-SW in 0.6.0 now works with paired-end data. It is more accurate
+for highly unique reads and more robust to long indels and structural
+variations. However, BWA-short still has edges for reads with many suboptimal
+hits. It is yet to know which algorithm is the best for variant calling.
+
+0.5.10 is a bugfix release only and is likely to be the last release in the 0.5
+branch unless I find critical bugs in future.
+
+Other notable changes:
+
+ * Added the `fastmap' command that finds super-maximal exact matches. It does
+   not give the final alignment, but runs much faster. It can be a building
+   block for other alignment algorithms. [0.6.0 only]
+
+ * Output the timing information before BWA exits. This also tells users that
+   the task has been finished instead of being killed or aborted. [0.6.0 only]
+
+ * Sped up multi-threading when using many (>20) CPU cores.
+
+ * Check I/O error.
+
+ * Increased the maximum barcode length to 63bp.
+
+ * Automatically choose the indexing algorithm.
+
+ * Bugfix: very rare segfault due to an uninitialized variable. The bug also
+   affects the placement of suboptimal alignments. The effect is very minor.
+
+This release involves quite a lot of tricky changes. Although it has been
+tested on a few data sets, subtle bugs may be still hidden. It is *NOT*
+recommended to use this release in a production pipeline. In future, however,
+BWA-SW may be better when reads continue to go longer. I would encourage users
+to try the 0.6 release. I would also like to hear the users' experience. Thank
+you.
+
+(0.6.0: 12 November 2011, r85)
+
+
+
+Beta Release 0.5.9 (24 January, 2011)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes:
+
+ * Feature: barcode support via the `-B' option.
+
+ * Feature: Illumina 1.3+ read format support via the `-I' option.
+
+ * Bugfix: RG tags are not attached to unmapped reads.
+
+ * Bugfix: very rare bwasw mismappings
+
+ * Recommend options for PacBio reads in bwasw help message.
+
+
+Also, since January 13, the BWA master repository has been moved to github:
+
+  https://github.com/lh3/bwa
+
+The revision number has been reset. All recent changes will be first
+committed to this repository.
+
+(0.5.9: 24 January 2011, r16)
+
+
+
+Beta Release Candidate 0.5.9rc1 (10 December, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes in bwasw:
+
+ * Output unmapped reads.
+
+ * For a repetitive read, choose a random hit instead of a fixed
+   one. This is not well tested.
+
+Notable changes in bwa-short:
+
+ * Fixed a bug in the SW scoring system, which may lead to unexpected
+   gaps towards the end of a read.
+
+ * Fixed a bug which invalidates the randomness of repetitive reads.
+
+ * Fixed a rare memory leak.
+
+ * Allowed to specify the read group at the command line.
+
+ * Take name-grouped BAM files as input.
+
+Changes to this release are usually safe in that they do not interfere
+with the key functionality. However, the release has only been tested on
+small samples instead of on large-scale real data. If anything weird
+happens, please report the bugs to the bio-bwa-help mailing list.
+
+(0.5.9rc1: 10 December 2010, r1561)
+
+
+
+Beta Release 0.5.8 (8 June, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes in bwasw:
+
+ * Fixed an issue of missing alignments. This should happen rarely and
+   only when the contig/read alignment is multi-part. Very rarely, bwasw
+   may still miss a segment in a multi-part alignment. This is difficult
+   to fix, although possible.
+
+Notable changes in bwa-short:
+
+ * Discard the SW alignment when the best single-end alignment is much
+   better. Such a SW alignment may caused by structural variations and
+   forcing it to be aligned leads to false alignment. This fix has not
+   been tested thoroughly. It would be great to receive more users
+   feedbacks on this issue.
+
+ * Fixed a typo/bug in sampe which leads to unnecessarily large memory
+   usage in some cases.
+
+ * Further reduced the chance of reporting `weird pairing'.
+
+(0.5.8: 8 June 2010, r1442)
+
+
+
+Beta Release 0.5.7 (1 March, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release only has an effect on paired-end data with fat insert-size
+distribution. Users are still recommended to update as the new release
+improves the robustness to poor data.
+
+ * The fix for `weird pairing' was not working in version 0.5.6, pointed
+   out by Carol Scott. It should work now.
+
+ * Optionally output to a normal file rather than to stdout (by Tim
+   Fennel).
+
+(0.5.7: 1 March 2010, r1310)
+
+
+
+Beta Release 0.5.6 (10 Feburary, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes in bwa-short:
+
+ * Report multiple hits in the SAM format at a new tag XA encoded as:
+   (chr,pos,CIGAR,NM;)*. By default, if a paired or single-end read has
+   4 or fewer hits, they will all be reported; if a read in a anomalous
+   pair has 11 or fewer hits, all of them will be reported.
+
+ * Perform Smith-Waterman alignment also for anomalous read pairs when
+   both ends have quality higher than 17. This reduces false positives
+   for some SV discovery algorithms.
+
+ * Do not report "weird pairing" when the insert size distribution is
+   too fat or has a mean close to zero.
+
+ * If a read is bridging two adjacent chromsomes, flag it as unmapped.
+
+ * Fixed a small but long existing memory leak in paired-end mapping.
+
+ * Multiple bug fixes in SOLiD mapping: a) quality "-1" can be correctly
+   parsed by solid2fastq.pl; b) truncated quality string is resolved; c)
+   SOLiD read mapped to the reverse strand is complemented.
+
+ * Bwa now calculates skewness and kurtosis of the insert size
+   distribution.
+
+ * Deploy a Bayesian method to estimate the maximum distance for a read
+   pair considered to be paired properly. The method is proposed by
+   Gerton Lunter, but bwa only implements a simplified version.
+
+ * Export more functions for Java bindings, by Matt Hanna (See:
+   http://www.broadinstitute.org/gsa/wiki/index.php/Sting_BWA/C_bindings)
+
+ * Abstract bwa CIGAR for further extension, by Rodrigo Goya.
+
+(0.5.6: 10 Feburary 2010, r1303)
+
+
+
+Beta Release 0.5.5 (10 November, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is a bug fix release:
+
+ * Fixed a serious bug/typo in aln which does not occur given short
+   reads, but will lead to segfault for >500bp reads. Of course, the aln
+   command is not recommended for reads longer than 200bp, but this is a
+   bug anyway.
+
+ * Fixed a minor bug/typo which leads to incorrect single-end mapping
+   quality when one end is moved to meet the mate-pair requirement.
+
+ * Fixed a bug in samse for mapping in the color space. This bug is
+   caused by quality filtration added since 0.5.1.
+
+(0.5.5: 10 November 2009, r1273)
+
+
+
+Beta Release 0.5.4 (9 October, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since this version, the default seed length used in the "aln" command is
+changed to 32.
+
+Notable changes in bwa-short:
+
+ * Added a new tag "XC:i" which gives the length of clipped reads.
+
+ * In sampe, skip alignments in case of a bug in the Smith-Waterman
+   alignment module.
+
+ * In sampe, fixed a bug in pairing when the read sequence is identical
+   to its reverse complement.
+
+ * In sampe, optionally preload the entire FM-index into memory to
+   reduce disk operations.
+
+Notable changes in dBWT-SW/BWA-SW:
+
+ * Changed name dBWT-SW to BWA-SW.
+
+ * Optionally use "hard clipping" in the SAM output.
+
+(0.5.4: 9 October 2009, r1245)
+
+
+
+Beta Release 0.5.3 (15 September, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Fixed a critical bug in bwa-short: reads mapped to the reverse strand
+are not complemented.
+
+(0.5.3: 15 September 2009, r1225)
+
+
+
+Beta Release 0.5.2 (13 September, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes in bwa-short:
+
+ * Optionally trim reads before alignment. See the manual page on `aln
+   -q' for detailed description.
+
+ * Fixed a bug in calculating the NM tag for a gapped alignment.
+
+ * Fixed a bug given a mixture of reads with some longer than the seed
+   length and some shorter.
+
+ * Print SAM header.
+
+Notable changes in dBWT-SW:
+
+ * Changed the default value of -T to 30. As a result, the accuracy is a
+   little higher for short reads at the cost of speed.
+
+(0.5.2: 13 September 2009, r1223)
+
+
+
+Beta Release 0.5.1 (2 September, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes in the short read alignment component:
+
+ * Fixed a bug in samse: do not write mate coordinates.
+
+Notable changes in dBWT-SW:
+
+ * Randomly choose one alignment if the read is a repetitive.
+
+ * Fixed a flaw when a read is mapped across two adjacent reference
+   sequences. However, wrong alignment reports may still occur rarely in
+   this case.
+
+ * Changed the default band width to 50. The speed is slower due to this
+   change.
+
+ * Improved the mapping quality a little given long query sequences.
+
+(0.5.1: 2 September 2009, r1209)
+
+
+
+Beta Release 0.5.0 (20 August, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release implements a novel algorithm, dBWT-SW, specifically
+designed for long reads. It is 10-50 times faster than SSAHA2, depending
+on the characteristics of the input data, and achieves comparable
+alignment accuracy while allowing chimera detection. In comparison to
+BLAT, dBWT-SW is several times faster and much more accurate especially
+when the error rate is high. Please read the manual page for more
+information.
+
+The dBWT-SW algorithm is kind of developed for future sequencing
+technologies which produce much longer reads with a little higher error
+rate. It is still at its early development stage. Some features are
+missing and it may be buggy although I have evaluated on several
+simulated and real data sets. But following the "release early"
+paradigm, I would like the users to try it first.
+
+Other notable changes in BWA are:
+
+ * Fixed a rare bug in the Smith-Waterman alignment module.
+
+ * Fixed a rare bug about the wrong alignment coordinate when a read is
+   poorly aligned.
+
+ * Fixed a bug in generating the "mate-unmap" SAM tag when both ends in
+   a pair are unmapped.
+
+(0.5.0: 20 August 2009, r1200)
+
+
+
+Beta Release 0.4.9 (19 May, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interestingly, the integer overflow bug claimed to be fixed in 0.4.7 has
+not in fact. Now I have fixed the bug. Sorry for this and thank Quan
+Long for pointing out the bug (again).
+
+(0.4.9: 19 May 2009, r1075)
+
+
+
+Beta Release 0.4.8 (18 May, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One change to "aln -R". Now by default, if there are no more than `-R'
+equally best hits, bwa will search for suboptimal hits. This change
+affects the ability in finding SNPs in segmental duplications.
+
+I have not tested this option thoroughly, but this simple change is less
+likely to cause new bugs. Hope I am right.
+
+(0.4.8: 18 May 2009, r1073)
+
+
+
+Beta Release 0.4.7 (12 May, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes:
+
+ * Output SM (single-end mapping quality) and AM (smaller mapping
+   quality among the two ends) tag from sam output.
+
+ * Improved the functionality of stdsw.
+
+ * Made the XN tag more accurate.
+
+ * Fixed a very rare segfault caused by integer overflow.
+
+ * Improve the insert size estimation.
+
+ * Fixed compiling errors for some Linux systems.
+
+(0.4.7: 12 May 2009, r1066)
+
+
+
+Beta Release 0.4.6 (9 March, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release improves the SOLiD support. First, a script for converting
+SOLiD raw data is provided. This script is adapted from solid2fastq.pl
+in the MAQ package. Second, a nucleotide reference file can be directly
+used with `bwa index'. Third, SOLiD paired-end support is
+completed. Fourth, color-space reads will be converted to nucleotides
+when SAM output is generated. Color errors are corrected in this
+process. Please note that like MAQ, BWA cannot make use of the primer
+base and the first color.
+
+In addition, the calculation of mapping quality is also improved a
+little bit, although end-users may barely observe the difference.
+
+(0.4.6: 9 March 2009, r915)
+
+
+
+Beta Release 0.4.5 (18 Feburary, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not much happened, but I think it would be good to let the users use the
+latest version.
+
+Notable changes (Thank Bob Handsaker for catching the two bugs):
+
+ * Improved bounary check. Previous version may still give incorrect
+   alignment coordinates in rare cases.
+
+ * Fixed a bug in SW alignment when no residue matches. This only
+   affects the `sampe' command.
+
+ * Robustly estimate insert size without setting the maximum on the
+   command line. Since this release `sampe -a' only has an effect if
+   there are not enough good pairs to infer the insert size
+   distribution.
+
+ * Reduced false PE alignments a little bit by using the inferred insert
+   size distribution. This fix may be more important for long insert
+   size libraries.
+
+(0.4.5: 18 Feburary 2009, r829)
+
+
+
+Beta Release 0.4.4 (15 Feburary, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is mainly a bug fix release. Notable changes are:
+
+ * Imposed boundary check for extracting subsequence from the
+   genome. Previously this causes memory problem in rare cases.
+
+ * Fixed a bug in failing to find whether an alignment overlapping with
+   N on the genome.
+
+ * Changed MD tag to meet the latest SAM specification.
+
+(0.4.4: 15 Feburary 2009, r815)
+
+
+
+Beta Release 0.4.3 (22 January, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes:
+
+ * Treat an ambiguous base N as a mismatch. Previous versions will not
+   map reads containing any N.
+
+ * Automatically choose the maximum allowed number of differences. This
+   is important when reads of different lengths are mixed together.
+
+ * Print mate coordinate if only one end is unmapped.
+
+ * Generate MD tag. This tag encodes the mismatching positions and the
+   reference bases at these positions. Deletions from the reference will
+   also be printed.
+
+ * Optionally dump multiple hits from samse, in another concise format
+   rather than SAM.
+
+ * Optionally disable iterative search. This is VERY SLOOOOW, though.
+
+ * Fixed a bug in generate SAM.
+
+(0.4.3: 22 January 2009, r787)
+
+
+
+Beta Release 0.4.2 (9 January, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Aaron Quinlan found a bug in the indexer: the bwa indexer segfaults if
+there are no comment texts in the FASTA header. This is a critical
+bug. Nothing else was changed.
+
+(0.4.2: 9 January 2009, r769)
+
+
+
+Beta Release 0.4.1 (7 January, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+I am sorry for the quick updates these days. I like to set a milestone
+for BWA and this release seems to be. For paired end reads, BWA also
+does Smith-Waterman alignment for an unmapped read whose mate can be
+mapped confidently. With this strategy BWA achieves similar accuracy to
+maq. Benchmark is also updated accordingly.
+
+(0.4.1: 7 January 2009, r760)
+
+
+
+Beta Release 0.4.0 (6 January, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In comparison to the release two days ago, this release is mainly tuned
+for performance with some tricks I learnt from Bowtie. However, as the
+indexing format has also been changed, I have to increase the version
+number to 0.4.0 to emphasize that *DATABASE MUST BE RE-INDEXED* with
+`bwa index'.
+
+ * Improved the speed by about 20%.
+
+ * Added multi-threading to `bwa aln'.
+
+(0.4.0: 6 January 2009, r756)
+
+
+
+Beta Release 0.3.0 (4 January, 2009)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ * Added paired-end support by separating SA calculation and alignment
+   output.
+
+ * Added SAM output.
+
+ * Added evaluation to the documentation.
+
+(0.3.0: 4 January 2009, r741)
+
+
+
+Beta Release 0.2.0 (15 Augusst, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ * Take the subsequence at the 5'-end as seed. Seeding strategy greatly
+   improves the speed for long reads, at the cost of missing a few true
+   hits that contain many differences in the seed. Seeding also increase
+   the memory by 800MB.
+
+ * Fixed a bug which may miss some gapped alignments. Fixing the bug
+   also slows the speed a little.
+
+(0.2.0: 15 August 2008, r428)
+
+
+
+Beta Release 0.1.6 (08 Augusst, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ * Give accurate CIGAR string.
+
+ * Add a simple interface to SW/NW alignment
+
+(0.1.6: 08 August 2008, r414)
+
+
+
+Beta Release 0.1.5 (27 July, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ * Improve the speed. This version is expected to give the same results.
+
+(0.1.5: 27 July 2008, r400)
+
+
+
+Beta Release 0.1.4 (22 July, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ * Fixed a bug which may cause missing gapped alignments.
+
+ * More clearly define what alignments can be found by BWA (See
+   manual). Now BWA runs a little slower because it will visit more
+   potential gapped alignments.
+
+ * A bit code clean up.
+
+(0.1.4: 22 July 2008, r387)
+
+
+
+Beta Release 0.1.3 (21 July, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Improve the speed with some tricks on retrieving occurences. The results
+should be exactly the same as that of 0.1.2.
+
+(0.1.3: 21 July 2008, r382)
+
+
+
+Beta Release 0.1.2 (17 July, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Support gapped alignment. Codes for ungapped alignment has been removed.
+
+(0.1.2: 17 July 2008, r371)
+
+
+
+Beta Release 0.1.1 (03 June, 2008)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is the first release of BWA, Burrows-Wheeler Alignment tool. Please
+read man page for more information about this software.
+
+(0.1.1: 03 June 2008, r349)
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/QSufSort.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,405 @@
+/* QSufSort.c
+
+   Original source from qsufsort.c
+
+   Copyright 1999, N. Jesper Larsson, all rights reserved.
+
+   This file contains an implementation of the algorithm presented in "Faster
+   Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko
+   Sadakane (sada@is.s.u-tokyo.ac.jp).
+
+   This software may be used freely for any purpose. However, when distributed,
+   the original source must be clearly stated, and, when the source code is
+   distributed, the copyright notice must be retained and any alterations in
+   the code must be clearly marked. No warranty is given regarding the quality
+   of this software.
+
+   Modified by Wong Chi-Kwong, 2004
+
+   Changes summary:	- Used long variable and function names
+					- Removed global variables
+					- Replace pointer references with array references
+					- Used insertion sort in place of selection sort and increased insertion sort threshold
+					- Reconstructing suffix array from inverse becomes an option
+					- Add handling where end-of-text symbol is not necessary < all characters
+					- Removed codes for supporting alphabet size > number of characters
+  
+  No warrenty is given regarding the quality of the modifications.
+
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "QSufSort.h"
+
+#define min(value1, value2)						( ((value1) < (value2)) ? (value1) : (value2) )
+#define med3(a, b, c)							( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
+#define swap(a, b, t);							t = a; a = b; b = t;
+
+// Static functions
+static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+							  const qsint_t highestPos, const qsint_t numSortedChar);
+static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+							   const qsint_t highestPos, const qsint_t numSortedChar);
+static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+									const qsint_t highestPos, const qsint_t numSortedChar);
+static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize);
+static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, 
+							 const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated);
+
+/* Makes suffix array p of x. x becomes inverse of p. p and x are both of size
+   n+1. Contents of x[0...n-1] are integers in the range l...k-1. Original
+   contents of x[n] is disregarded, the n-th symbol being regarded as
+   end-of-string smaller than all other symbols.*/
+void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, 
+						const qsint_t smallestInputSymbol, const int skipTransform)
+{
+	qsint_t i, j;
+	qsint_t s, negatedSortedGroupLength;
+	qsint_t numSymbolAggregated;
+	qsint_t maxNumInputSymbol;
+	qsint_t numSortedPos = 1;
+	qsint_t newAlphabetSize;
+   
+	maxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;
+
+	if (!skipTransform) {
+		/* bucketing possible*/
+		newAlphabetSize = QSufSortTransform(V, I, numChar, largestInputSymbol, smallestInputSymbol, 
+											numChar, &numSymbolAggregated);
+		QSufSortBucketSort(V, I, numChar, newAlphabetSize);
+		I[0] = -1;
+		V[numChar] = 0;
+		numSortedPos = numSymbolAggregated;
+	}
+
+	while ((qsint_t)(I[0]) >= -(qsint_t)numChar) {
+		i = 0;
+		negatedSortedGroupLength = 0;
+		do {
+			s = I[i];
+			if (s < 0) {
+				i -= s;						/* skip over sorted group.*/
+				negatedSortedGroupLength += s;
+			} else {
+				if (negatedSortedGroupLength) {
+					I[i+negatedSortedGroupLength] = negatedSortedGroupLength;	/* combine preceding sorted groups */
+					negatedSortedGroupLength = 0;
+				}
+				j = V[s] + 1;
+				QSufSortSortSplit(V, I, i, j - 1, numSortedPos);
+				i = j;
+			}
+		} while (i <= numChar);
+		if (negatedSortedGroupLength) {
+			/* array ends with a sorted group.*/
+			I[i+negatedSortedGroupLength] = negatedSortedGroupLength;	/* combine sorted groups at end of I.*/
+		}
+		numSortedPos *= 2;	/* double sorted-depth.*/
+	}
+}
+
+void QSufSortGenerateSaFromInverse(const qsint_t* V, qsint_t* __restrict I, const qsint_t numChar)
+{
+	qsint_t i;
+	for (i=0; i<=numChar; i++)
+		I[V[i]] = i + 1;
+}
+
+/* Sorting routine called for each unsorted group. Sorts the array of integers
+   (suffix numbers) of length n starting at p. The algorithm is a ternary-split
+   quicksort taken from Bentley & McIlroy, "Engineering a Sort Function",
+   Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This
+   function is based on Program 7.*/
+static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+							  const qsint_t highestPos, const qsint_t numSortedChar) {
+
+	qsint_t a, b, c, d;
+	qsint_t l, m;
+	qsint_t f, v, s, t;
+	qsint_t tmp;
+	qsint_t numItem;
+
+	numItem = highestPos - lowestPos + 1;
+
+	if (numItem <= INSERT_SORT_NUM_ITEM) {
+		QSufSortInsertSortSplit(V, I, lowestPos, highestPos, numSortedChar);
+		return;
+	}
+
+	v = QSufSortChoosePivot(V, I, lowestPos, highestPos, numSortedChar);
+
+	a = b = lowestPos;
+	c = d = highestPos;
+
+	while (1) {
+		while (c >= b && (f = KEY(V, I, b, numSortedChar)) <= v) {
+			if (f == v) {
+				swap(I[a], I[b], tmp);
+				a++;
+			}
+			b++;
+		}
+		while (c >= b && (f = KEY(V, I, c, numSortedChar)) >= v) {
+			if (f == v) {
+				swap(I[c], I[d], tmp);
+				d--;
+			}
+			c--;
+		}
+		if (b > c)
+			break;
+		swap(I[b], I[c], tmp);
+		b++;
+		c--;
+	}
+
+	s = a - lowestPos;
+	t = b - a;
+	s = min(s, t);
+	for (l = lowestPos, m = b - s; m < b; l++, m++) {
+		swap(I[l], I[m], tmp);
+	}
+
+	s = d - c;
+	t = highestPos - d;
+	s = min(s, t);
+	for (l = b, m = highestPos - s + 1; m <= highestPos; l++, m++) {
+		swap(I[l], I[m], tmp);
+	}
+
+	s = b - a;
+	t = d - c;
+	if (s > 0)
+		QSufSortSortSplit(V, I, lowestPos, lowestPos + s - 1, numSortedChar);
+
+	// Update group number for equal portion
+	a = lowestPos + s;
+	b = highestPos - t;
+	if (a == b) {
+		// Sorted group
+		V[I[a]] = a;
+		I[a] = -1;
+	} else {
+		// Unsorted group
+		for (c=a; c<=b; c++)
+			V[I[c]] = b;
+	}
+
+	if (t > 0)
+		QSufSortSortSplit(V, I, highestPos - t + 1, highestPos, numSortedChar);
+
+}
+
+/* Algorithm by Bentley & McIlroy.*/
+static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+							   const qsint_t highestPos, const qsint_t numSortedChar) {
+
+	qsint_t m;
+	qsint_t keyl, keym, keyn;
+	qsint_t key1, key2, key3;
+	qsint_t s;
+	qsint_t numItem;
+
+	numItem = highestPos - lowestPos + 1;
+
+	m = lowestPos + numItem / 2;
+
+	s = numItem / 8;
+	key1 = KEY(V, I, lowestPos, numSortedChar);
+	key2 = KEY(V, I, lowestPos+s, numSortedChar);
+	key3 = KEY(V, I, lowestPos+2*s, numSortedChar);
+	keyl = med3(key1, key2, key3);
+	key1 = KEY(V, I, m-s, numSortedChar);
+	key2 = KEY(V, I, m, numSortedChar);
+	key3 = KEY(V, I, m+s, numSortedChar);
+	keym = med3(key1, key2, key3);
+	key1 = KEY(V, I, highestPos-2*s, numSortedChar);
+	key2 = KEY(V, I, highestPos-s, numSortedChar);
+	key3 = KEY(V, I, highestPos, numSortedChar);
+	keyn = med3(key1, key2, key3);
+
+	return med3(keyl, keym, keyn);
+
+
+}
+
+/* Quadratic sorting method to use for small subarrays. */
+static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, 
+									const qsint_t highestPos, const qsint_t numSortedChar)
+{
+	qsint_t i, j;
+	qsint_t tmpKey, tmpPos;
+	qsint_t numItem;
+	qsint_t key[INSERT_SORT_NUM_ITEM], pos[INSERT_SORT_NUM_ITEM];
+	qsint_t negativeSortedLength;
+	qsint_t groupNum;
+
+	numItem = highestPos - lowestPos + 1;
+
+	for (i=0; i<numItem; i++) {
+		pos[i] = I[lowestPos + i];
+		key[i] = V[pos[i] + numSortedChar];
+	}
+
+	for (i=1; i<numItem; i++) {
+		tmpKey = key[i];
+		tmpPos = pos[i];
+		for (j=i; j>0 && key[j-1] > tmpKey; j--) {
+			key[j] = key[j-1];
+			pos[j] = pos[j-1];
+		}
+		key[j] = tmpKey;
+		pos[j] = tmpPos;
+	}
+
+	negativeSortedLength = -1;
+
+	i = numItem - 1;
+	groupNum = highestPos;
+	while (i > 0) {
+		I[i+lowestPos] = pos[i];
+		V[I[i+lowestPos]] = groupNum;
+		if (key[i-1] == key[i]) {
+			negativeSortedLength = 0;
+		} else {
+			if (negativeSortedLength < 0)
+				I[i+lowestPos] = negativeSortedLength;
+			groupNum = i + lowestPos - 1;
+			negativeSortedLength--;
+		}
+		i--;
+	}
+
+	I[lowestPos] = pos[0];
+	V[I[lowestPos]] = groupNum;
+	if (negativeSortedLength < 0)
+		I[lowestPos] = negativeSortedLength;
+}
+
+/* Bucketsort for first iteration.
+
+   Input: x[0...n-1] holds integers in the range 1...k-1, all of which appear
+   at least once. x[n] is 0. (This is the corresponding output of transform.) k
+   must be at most n+1. p is array of size n+1 whose contents are disregarded.
+
+   Output: x is V and p is I after the initial sorting stage of the refined
+   suffix sorting algorithm.*/
+      
+static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize)
+{
+	qsint_t i, c;
+	qsint_t d;
+	qsint_t groupNum;
+	qsint_t currentIndex;
+
+	// mark linked list empty
+	for (i=0; i<alphabetSize; i++)
+		I[i] = -1;
+
+	// insert to linked list
+	for (i=0; i<=numChar; i++) {
+		c = V[i];
+		V[i] = (qsint_t)(I[c]);
+		I[c] = i;
+	}
+
+	currentIndex = numChar;
+	for (i=alphabetSize; i>0; i--) {
+		c = I[i-1];
+		d = (qsint_t)(V[c]);
+		groupNum = currentIndex;
+		V[c] = groupNum;
+		if (d >= 0) {
+			I[currentIndex] = c;
+			while (d >= 0) {
+				c = d;
+				d = V[c];
+				V[c] = groupNum;
+				currentIndex--;
+				I[currentIndex] = c;
+			}
+		} else {
+			// sorted group
+			I[currentIndex] = -1;
+		}
+		currentIndex--;
+	}
+}
+
+/* Transforms the alphabet of x by attempting to aggregate several symbols into
+   one, while preserving the suffix order of x. The alphabet may also be
+   compacted, so that x on output comprises all integers of the new alphabet
+   with no skipped numbers.
+
+   Input: x is an array of size n+1 whose first n elements are positive
+   integers in the range l...k-1. p is array of size n+1, used for temporary
+   storage. q controls aggregation and compaction by defining the maximum intue
+   for any symbol during transformation: q must be at least k-l; if q<=n,
+   compaction is guaranteed; if k-l>n, compaction is never done; if q is
+   INT_MAX, the maximum number of symbols are aggregated into one.
+   
+   Output: Returns an integer j in the range 1...q representing the size of the
+   new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is
+   set to the number of old symbols grouped into one. Only x[n] is 0.*/
+static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, 
+							 const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated)
+{
+	qsint_t c, i, j;
+	qsint_t a;	// numSymbolAggregated
+	qsint_t mask;
+	qsint_t minSymbolInChunk = 0, maxSymbolInChunk = 0;
+	qsint_t newAlphabetSize;
+	qsint_t maxNumInputSymbol, maxNumBit, maxSymbol;
+
+	maxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;
+
+	for (maxNumBit = 0, i = maxNumInputSymbol; i; i >>= 1) ++maxNumBit;
+	maxSymbol = QSINT_MAX >> maxNumBit;
+
+	c = maxNumInputSymbol;
+	for (a = 0; a < numChar && maxSymbolInChunk <= maxSymbol && c <= maxNewAlphabetSize; a++) {
+		minSymbolInChunk = (minSymbolInChunk << maxNumBit) | (V[a] - smallestInputSymbol + 1);
+		maxSymbolInChunk = c;
+		c = (maxSymbolInChunk << maxNumBit) | maxNumInputSymbol;
+	}
+
+	mask = (1 << (a-1) * maxNumBit) - 1;	/* mask masks off top old symbol from chunk.*/
+	V[numChar] = smallestInputSymbol - 1;	/* emulate zero terminator.*/
+
+	/* bucketing possible, compact alphabet.*/
+	for (i=0; i<=maxSymbolInChunk; i++)
+		I[i] = 0;	/* zero transformation table.*/
+	c = minSymbolInChunk;
+	for (i=a; i<=numChar; i++) {
+		I[c] = 1;			/* mark used chunk symbol.*/
+		c = ((c & mask) << maxNumBit) | (V[i] - smallestInputSymbol + 1);	/* shift in next old symbol in chunk.*/
+	}
+	for (i=1; i<a; i++) {	/* handle last r-1 positions.*/
+		I[c] = 1;			/* mark used chunk symbol.*/
+		c = (c & mask) << maxNumBit;	/* shift in next old symbol in chunk.*/
+	}
+	newAlphabetSize = 1;
+	for (i=0; i<=maxSymbolInChunk; i++) {
+		if (I[i]) {
+			I[i] = newAlphabetSize;
+			newAlphabetSize++;
+		}
+	}
+	c = minSymbolInChunk;
+	for (i=0, j=a; j<=numChar; i++, j++) {
+		V[i] = I[c];						/* transform to new alphabet.*/
+		c = ((c & mask) << maxNumBit) | (V[j] - smallestInputSymbol + 1);	/* shift in next old symbol in chunk.*/
+	}
+	for (; i<numChar; i++) {	/* handle last a-1 positions.*/
+		V[i] = I[c];			/* transform to new alphabet.*/
+		c = (c & mask) << maxNumBit;	/* shift right-end zero in chunk.*/
+	}
+
+	V[numChar] = 0;		/* end-of-string symbol is zero.*/
+
+    *numSymbolAggregated = a;
+	return newAlphabetSize;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/QSufSort.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,45 @@
+/* QSufSort.h
+
+   Header file for QSufSort.c
+
+   This file contains an implementation of the algorithm presented in "Faster
+   Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko
+   Sadakane (sada@is.s.u-tokyo.ac.jp).
+
+   This software may be used freely for any purpose. However, when distributed,
+   the original source must be clearly stated, and, when the source code is
+   distributed, the copyright notice must be retained and any alterations in
+   the code must be clearly marked. No warranty is given regarding the quality
+   of this software.
+
+   Modified by Wong Chi-Kwong, 2004
+
+   Changes summary:	- Used long variable and function names
+					- Removed global variables
+					- Replace pointer references with array references
+					- Used insertion sort in place of selection sort and increased insertion sort threshold
+					- Reconstructing suffix array from inverse becomes an option
+					- Add handling where end-of-text symbol is not necessary < all characters
+					- Removed codes for supporting alphabet size > number of characters
+  
+  No warrenty is given regarding the quality of the modifications.
+
+*/
+
+#ifndef __QSUFSORT_H__
+#define __QSUFSORT_H__
+
+#include <stdint.h>
+
+#define KEY(V, I, p, h)					( V[ I[p] + h ] )
+#define INSERT_SORT_NUM_ITEM	16
+
+typedef int64_t qsint_t;
+#define QSINT_MAX INT64_MAX
+
+void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, 
+						const qsint_t smallestInputSymbol, const int skipTransform);
+void QSufSortGenerateSaFromInverse(const qsint_t *V, qsint_t* __restrict I, const qsint_t numChar);
+
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/README	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,36 @@
+Released packages can be downloaded from SourceForge.net:
+
+  http://sourceforge.net/projects/bio-bwa/files/
+
+Introduction and FAQ are available at:
+
+  http://bio-bwa.sourceforge.net
+
+Manual page at:
+
+  http://bio-bwa.sourceforge.net/bwa.shtml
+
+Mailing list:
+
+  bio-bwa-help@lists.sourceforge.net
+
+To sign up:
+
+  http://sourceforge.net/mail/?group_id=276243
+
+Publications (Open Access):
+
+  http://www.ncbi.nlm.nih.gov/pubmed/20080505
+  http://www.ncbi.nlm.nih.gov/pubmed/19451168
+
+Incomplete list of citations (via HubMed.org):
+
+  http://www.hubmed.org/references.cgi?uids=20080505
+  http://www.hubmed.org/references.cgi?uids=19451168
+
+Related projects:
+
+  http://pbwa.sourceforge.net/
+  http://www.many-core.group.cam.ac.uk/projects/lam.shtml
+  http://biodoop-seal.sourceforge.net/
+  http://gitorious.org/bwa-cuda
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bamlite.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,155 @@
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include "bamlite.h"
+
+/*********************
+ * from bam_endian.c *
+ *********************/
+
+static inline int bam_is_big_endian()
+{
+	long one= 1;
+	return !(*((char *)(&one)));
+}
+static inline uint16_t bam_swap_endian_2(uint16_t v)
+{
+	return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
+}
+static inline void *bam_swap_endian_2p(void *x)
+{
+	*(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x);
+	return x;
+}
+static inline uint32_t bam_swap_endian_4(uint32_t v)
+{
+	v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
+	return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
+}
+static inline void *bam_swap_endian_4p(void *x)
+{
+	*(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x);
+	return x;
+}
+static inline uint64_t bam_swap_endian_8(uint64_t v)
+{
+	v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
+	v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
+	return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
+}
+static inline void *bam_swap_endian_8p(void *x)
+{
+	*(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x);
+	return x;
+}
+
+/**************
+ * from bam.c *
+ **************/
+
+int bam_is_be;
+
+bam_header_t *bam_header_init()
+{
+	bam_is_be = bam_is_big_endian();
+	return (bam_header_t*)calloc(1, sizeof(bam_header_t));
+}
+
+void bam_header_destroy(bam_header_t *header)
+{
+	int32_t i;
+	if (header == 0) return;
+	if (header->target_name) {
+		for (i = 0; i < header->n_targets; ++i)
+			free(header->target_name[i]);
+		free(header->target_name);
+		free(header->target_len);
+	}
+	free(header->text);
+	free(header);
+}
+
+bam_header_t *bam_header_read(bamFile fp)
+{
+	bam_header_t *header;
+	char buf[4];
+	int magic_len;
+	int32_t i = 1, name_len;
+	// read "BAM1"
+	magic_len = bam_read(fp, buf, 4);
+	if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
+		fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
+		return 0;
+	}
+	header = bam_header_init();
+	// read plain text and the number of reference sequences
+	bam_read(fp, &header->l_text, 4);
+	if (bam_is_be) bam_swap_endian_4p(&header->l_text);
+	header->text = (char*)calloc(header->l_text + 1, 1);
+	bam_read(fp, header->text, header->l_text);
+	bam_read(fp, &header->n_targets, 4);
+	if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
+	// read reference sequence names and lengths
+	header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
+	header->target_len = (uint32_t*)calloc(header->n_targets, 4);
+	for (i = 0; i != header->n_targets; ++i) {
+		bam_read(fp, &name_len, 4);
+		if (bam_is_be) bam_swap_endian_4p(&name_len);
+		header->target_name[i] = (char*)calloc(name_len, 1);
+		bam_read(fp, header->target_name[i], name_len);
+		bam_read(fp, &header->target_len[i], 4);
+		if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);
+	}
+	return header;
+}
+
+static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
+{
+	uint8_t *s;
+	uint32_t i, *cigar = (uint32_t*)(data + c->l_qname);
+	s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2;
+	for (i = 0; i < c->n_cigar; ++i) bam_swap_endian_4p(&cigar[i]);
+	while (s < data + data_len) {
+		uint8_t type;
+		s += 2; // skip key
+		type = toupper(*s); ++s; // skip type
+		if (type == 'C' || type == 'A') ++s;
+		else if (type == 'S') { bam_swap_endian_2p(s); s += 2; }
+		else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; }
+		else if (type == 'D') { bam_swap_endian_8p(s); s += 8; }
+		else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; }
+	}
+}
+
+int bam_read1(bamFile fp, bam1_t *b)
+{
+	bam1_core_t *c = &b->core;
+	int32_t block_len, ret, i;
+	uint32_t x[8];
+
+	if ((ret = bam_read(fp, &block_len, 4)) != 4) {
+		if (ret == 0) return -1; // normal end-of-file
+		else return -2; // truncated
+	}
+	if (bam_read(fp, x, sizeof(bam1_core_t)) != sizeof(bam1_core_t)) return -3;
+	if (bam_is_be) {
+		bam_swap_endian_4p(&block_len);
+		for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
+	}
+	c->tid = x[0]; c->pos = x[1];
+	c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
+	c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
+	c->l_qseq = x[4];
+	c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
+	b->data_len = block_len - sizeof(bam1_core_t);
+	if (b->m_data < b->data_len) {
+		b->m_data = b->data_len;
+		kroundup32(b->m_data);
+		b->data = (uint8_t*)realloc(b->data, b->m_data);
+	}
+	if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
+	b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
+	if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
+	return 4 + block_len;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bamlite.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,94 @@
+#ifndef BAMLITE_H_
+#define BAMLITE_H_
+
+#include <stdint.h>
+#include <zlib.h>
+
+typedef gzFile bamFile;
+#define bam_open(fn, mode) gzopen(fn, mode)
+#define bam_dopen(fd, mode) gzdopen(fd, mode)
+#define bam_close(fp) gzclose(fp)
+#define bam_read(fp, buf, size) gzread(fp, buf, size)
+
+typedef struct {
+	int32_t n_targets;
+	char **target_name;
+	uint32_t *target_len;
+	size_t l_text, n_text;
+	char *text;
+} bam_header_t;
+
+#define BAM_FPAIRED        1
+#define BAM_FPROPER_PAIR   2
+#define BAM_FUNMAP         4
+#define BAM_FMUNMAP        8
+#define BAM_FREVERSE      16
+#define BAM_FMREVERSE     32
+#define BAM_FREAD1        64
+#define BAM_FREAD2       128
+#define BAM_FSECONDARY   256
+#define BAM_FQCFAIL      512
+#define BAM_FDUP        1024
+
+#define BAM_CIGAR_SHIFT 4
+#define BAM_CIGAR_MASK  ((1 << BAM_CIGAR_SHIFT) - 1)
+
+#define BAM_CMATCH      0
+#define BAM_CINS        1
+#define BAM_CDEL        2
+#define BAM_CREF_SKIP   3
+#define BAM_CSOFT_CLIP  4
+#define BAM_CHARD_CLIP  5
+#define BAM_CPAD        6
+
+typedef struct {
+	int32_t tid;
+	int32_t pos;
+	uint32_t bin:16, qual:8, l_qname:8;
+	uint32_t flag:16, n_cigar:16;
+	int32_t l_qseq;
+	int32_t mtid;
+	int32_t mpos;
+	int32_t isize;
+} bam1_core_t;
+
+typedef struct {
+	bam1_core_t core;
+	int l_aux, data_len, m_data;
+	uint8_t *data;
+} bam1_t;
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
+#define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
+#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
+#define bam1_qname(b) ((char*)((b)->data))
+#define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)
+#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
+#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
+#define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)
+
+#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
+#define bam_destroy1(b) do {					\
+		if (b) { free((b)->data); free(b); }	\
+	} while (0)
+
+extern int bam_is_be;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	bam_header_t *bam_header_init(void);
+	void bam_header_destroy(bam_header_t *header);
+	bam_header_t *bam_header_read(bamFile fp);
+	int bam_read1(bamFile fp, bam1_t *b);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bntseq.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,323 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <unistd.h>
+#include "bntseq.h"
+#include "main.h"
+#include "utils.h"
+
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+unsigned char nst_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+void bns_dump(const bntseq_t *bns, const char *prefix)
+{
+	char str[1024];
+	FILE *fp;
+	int i;
+	{ // dump .ann
+		strcpy(str, prefix); strcat(str, ".ann");
+		fp = xopen(str, "w");
+		fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->seed);
+		for (i = 0; i != bns->n_seqs; ++i) {
+			bntann1_t *p = bns->anns + i;
+			fprintf(fp, "%d %s", p->gi, p->name);
+			if (p->anno[0]) fprintf(fp, " %s\n", p->anno);
+			else fprintf(fp, "\n");
+			fprintf(fp, "%lld %d %d\n", (long long)p->offset, p->len, p->n_ambs);
+		}
+		fclose(fp);
+	}
+	{ // dump .amb
+		strcpy(str, prefix); strcat(str, ".amb");
+		fp = xopen(str, "w");
+		fprintf(fp, "%lld %d %u\n", (long long)bns->l_pac, bns->n_seqs, bns->n_holes);
+		for (i = 0; i != bns->n_holes; ++i) {
+			bntamb1_t *p = bns->ambs + i;
+			fprintf(fp, "%lld %d %c\n", (long long)p->offset, p->len, p->amb);
+		}
+		fclose(fp);
+	}
+}
+
+bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename)
+{
+	char str[1024];
+	FILE *fp;
+	bntseq_t *bns;
+	long long xx;
+	int i;
+	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
+	{ // read .ann
+		fp = xopen(ann_filename, "r");
+		fscanf(fp, "%lld%d%u", &xx, &bns->n_seqs, &bns->seed);
+		bns->l_pac = xx;
+		bns->anns = (bntann1_t*)calloc(bns->n_seqs, sizeof(bntann1_t));
+		for (i = 0; i < bns->n_seqs; ++i) {
+			bntann1_t *p = bns->anns + i;
+			char *q = str;
+			int c;
+			// read gi and sequence name
+			fscanf(fp, "%u%s", &p->gi, str);
+			p->name = strdup(str);
+			// read fasta comments 
+			while ((c = fgetc(fp)) != '\n' && c != EOF) *q++ = c;
+			*q = 0;
+			if (q - str > 1) p->anno = strdup(str + 1); // skip leading space
+			else p->anno = strdup("");
+			// read the rest
+			fscanf(fp, "%lld%d%d", &xx, &p->len, &p->n_ambs);
+			p->offset = xx;
+		}
+		fclose(fp);
+	}
+	{ // read .amb
+		int64_t l_pac;
+		int32_t n_seqs;
+		fp = xopen(amb_filename, "r");
+		fscanf(fp, "%lld%d%d", &xx, &n_seqs, &bns->n_holes);
+		l_pac = xx;
+		xassert(l_pac == bns->l_pac && n_seqs == bns->n_seqs, "inconsistent .ann and .amb files.");
+		bns->ambs = (bntamb1_t*)calloc(bns->n_holes, sizeof(bntamb1_t));
+		for (i = 0; i < bns->n_holes; ++i) {
+			bntamb1_t *p = bns->ambs + i;
+			fscanf(fp, "%lld%d%s", &xx, &p->len, str);
+			p->offset = xx;
+			p->amb = str[0];
+		}
+		fclose(fp);
+	}
+	{ // open .pac
+		bns->fp_pac = xopen(pac_filename, "rb");
+	}
+	return bns;
+}
+
+bntseq_t *bns_restore(const char *prefix)
+{  
+	char ann_filename[1024], amb_filename[1024], pac_filename[1024];
+	strcat(strcpy(ann_filename, prefix), ".ann");
+	strcat(strcpy(amb_filename, prefix), ".amb");
+	strcat(strcpy(pac_filename, prefix), ".pac");
+	return bns_restore_core(ann_filename, amb_filename, pac_filename);
+}
+
+void bns_destroy(bntseq_t *bns)
+{
+	if (bns == 0) return;
+	else {
+		int i;
+		if (bns->fp_pac) fclose(bns->fp_pac);
+		free(bns->ambs);
+		for (i = 0; i < bns->n_seqs; ++i) {
+			free(bns->anns[i].name);
+			free(bns->anns[i].anno);
+		}
+		free(bns->anns);
+		free(bns);
+	}
+}
+
+#define _set_pac(pac, l, c) ((pac)[(l)>>2] |= (c)<<((~(l)&3)<<1))
+#define _get_pac(pac, l) ((pac)[(l)>>2]>>((~(l)&3)<<1)&3)
+
+static uint8_t *add1(const kseq_t *seq, bntseq_t *bns, uint8_t *pac, int64_t *m_pac, int *m_seqs, int *m_holes, bntamb1_t **q)
+{
+	bntann1_t *p;
+	int i, lasts;
+	if (bns->n_seqs == *m_seqs) {
+		*m_seqs <<= 1;
+		bns->anns = (bntann1_t*)realloc(bns->anns, *m_seqs * sizeof(bntann1_t));
+	}
+	p = bns->anns + bns->n_seqs;
+	p->name = strdup((char*)seq->name.s);
+	p->anno = seq->comment.s? strdup((char*)seq->comment.s) : strdup("(null)");
+	p->gi = 0; p->len = seq->seq.l;
+	p->offset = (bns->n_seqs == 0)? 0 : (p-1)->offset + (p-1)->len;
+	p->n_ambs = 0;
+	for (i = lasts = 0; i < seq->seq.l; ++i) {
+		int c = nst_nt4_table[(int)seq->seq.s[i]];
+		if (c >= 4) { // N
+			if (lasts == seq->seq.s[i]) { // contiguous N
+				++(*q)->len;
+			} else {
+				if (bns->n_holes == *m_holes) {
+					(*m_holes) <<= 1;
+					bns->ambs = (bntamb1_t*)realloc(bns->ambs, (*m_holes) * sizeof(bntamb1_t));
+				}
+				*q = bns->ambs + bns->n_holes;
+				(*q)->len = 1;
+				(*q)->offset = p->offset + i;
+				(*q)->amb = seq->seq.s[i];
+				++p->n_ambs;
+				++bns->n_holes;
+			}
+		}
+		lasts = seq->seq.s[i];
+		{ // fill buffer
+			if (c >= 4) c = lrand48()&3;
+			if (bns->l_pac == *m_pac) { // double the pac size
+				*m_pac <<= 1;
+				pac = realloc(pac, *m_pac/4);
+				memset(pac + bns->l_pac/4, 0, (*m_pac - bns->l_pac)/4);
+			}
+			_set_pac(pac, bns->l_pac, c);
+			++bns->l_pac;
+		}
+	}
+	++bns->n_seqs;
+	return pac;
+}
+
+int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
+{
+	extern void seq_reverse(int len, ubyte_t *seq, int is_comp); // in bwaseqio.c
+	kseq_t *seq;
+	char name[1024];
+	bntseq_t *bns;
+	uint8_t *pac = 0;
+	int32_t m_seqs, m_holes;
+	int64_t ret = -1, m_pac, l;
+	bntamb1_t *q;
+	FILE *fp;
+
+	// initialization
+	seq = kseq_init(fp_fa);
+	bns = (bntseq_t*)calloc(1, sizeof(bntseq_t));
+	bns->seed = 11; // fixed seed for random generator
+	srand48(bns->seed);
+	m_seqs = m_holes = 8; m_pac = 0x10000;
+	bns->anns = (bntann1_t*)calloc(m_seqs, sizeof(bntann1_t));
+	bns->ambs = (bntamb1_t*)calloc(m_holes, sizeof(bntamb1_t));
+	pac = calloc(m_pac/4, 1);
+	q = bns->ambs;
+	strcpy(name, prefix); strcat(name, ".pac");
+	fp = xopen(name, "wb");
+	// read sequences
+	while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
+	if (!for_only) { // add the reverse complemented sequence
+		m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
+		pac = realloc(pac, m_pac/4);
+		memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
+		for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
+			_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
+	}
+	ret = bns->l_pac;
+	{ // finalize .pac file
+		ubyte_t ct;
+		fwrite(pac, 1, (bns->l_pac>>2) + ((bns->l_pac&3) == 0? 0 : 1), fp);
+		// the following codes make the pac file size always (l_pac/4+1+1)
+		if (bns->l_pac % 4 == 0) {
+			ct = 0;
+			fwrite(&ct, 1, 1, fp);
+		}
+		ct = bns->l_pac % 4;
+		fwrite(&ct, 1, 1, fp);
+		// close .pac file
+		fclose(fp);
+	}
+	bns_dump(bns, prefix);
+	bns_destroy(bns);
+	kseq_destroy(seq);
+	free(pac);
+	return ret;
+}
+
+int bwa_fa2pac(int argc, char *argv[])
+{
+	int c, for_only = 0;
+	gzFile fp;
+	while ((c = getopt(argc, argv, "f")) >= 0) {
+		switch (c) {
+			case 'f': for_only = 1; break;
+		}
+	}
+	if (argc == optind) {
+		fprintf(stderr, "Usage: bwa fa2pac [-f] <in.fasta> [<out.prefix>]\n");
+		return 1;
+	}
+	fp = xzopen(argv[optind], "r");
+	bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);
+	gzclose(fp);
+	return 0;
+}
+
+int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)
+{
+	int left, mid, right, nn;
+	if (ref_id) {
+		left = 0; mid = 0; right = bns->n_seqs;
+		while (left < right) {
+			mid = (left + right) >> 1;
+			if (pos_f >= bns->anns[mid].offset) {
+				if (mid == bns->n_seqs - 1) break;
+				if (pos_f < bns->anns[mid+1].offset) break; // bracketed
+				left = mid + 1;
+			} else right = mid;
+		}
+		*ref_id = mid;
+	}
+	left = 0; right = bns->n_holes; nn = 0;
+	while (left < right) {
+		mid = (left + right) >> 1;
+		if (pos_f >= bns->ambs[mid].offset + bns->ambs[mid].len) left = mid + 1;
+		else if (pos_f + len <= bns->ambs[mid].offset) right = mid;
+		else { // overlap
+			if (pos_f >= bns->ambs[mid].offset) {
+				nn += bns->ambs[mid].offset + bns->ambs[mid].len < pos_f + len?
+					bns->ambs[mid].offset + bns->ambs[mid].len - pos_f : len;
+			} else {
+				nn += bns->ambs[mid].offset + bns->ambs[mid].len < pos_f + len?
+					bns->ambs[mid].len : len - (bns->ambs[mid].offset - pos_f);
+			}
+			break;
+		}
+	}
+	return nn;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bntseq.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,85 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#ifndef BWT_BNTSEQ_H
+#define BWT_BNTSEQ_H
+
+#include <stdint.h>
+#include <zlib.h>
+
+#ifndef BWA_UBYTE
+#define BWA_UBYTE
+typedef uint8_t ubyte_t;
+#endif
+
+typedef struct {
+	int64_t offset;
+	int32_t len;
+	int32_t n_ambs;
+	uint32_t gi;
+	char *name, *anno;
+} bntann1_t;
+
+typedef struct {
+	int64_t offset;
+	int32_t len;
+	char amb;
+} bntamb1_t;
+
+typedef struct {
+	int64_t l_pac;
+	int32_t n_seqs;
+	uint32_t seed;
+	bntann1_t *anns; // n_seqs elements
+	int32_t n_holes;
+	bntamb1_t *ambs; // n_holes elements
+	FILE *fp_pac;
+} bntseq_t;
+
+extern unsigned char nst_nt4_table[256];
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	void bns_dump(const bntseq_t *bns, const char *prefix);
+	bntseq_t *bns_restore(const char *prefix);
+	bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
+	void bns_destroy(bntseq_t *bns);
+	int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only);
+	int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+static inline int64_t bns_depos(const bntseq_t *bns, int64_t pos, int *is_rev)
+{
+	return (*is_rev = (pos >= bns->l_pac))? (bns->l_pac<<1) - 1 - pos : pos;
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwa.1	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,571 @@
+.TH bwa 1 "19 June 2012" "bwa-0.6.2" "Bioinformatics tools"
+.SH NAME
+.PP
+bwa - Burrows-Wheeler Alignment Tool
+.SH SYNOPSIS
+.PP
+bwa index -a bwtsw database.fasta
+.PP
+bwa aln database.fasta short_read.fastq > aln_sa.sai
+.PP
+bwa samse database.fasta aln_sa.sai short_read.fastq > aln.sam
+.PP
+bwa sampe database.fasta aln_sa1.sai aln_sa2.sai read1.fq read2.fq > aln.sam
+.PP
+bwa bwasw database.fasta long_read.fastq > aln.sam
+
+.SH DESCRIPTION
+.PP
+BWA is a fast light-weighted tool that aligns relatively short sequences
+(queries) to a sequence database (targe), such as the human reference
+genome. It implements two different algorithms, both based on
+Burrows-Wheeler Transform (BWT). The first algorithm is designed for
+short queries up to ~150bp with low error rate (<3%). It does gapped
+global alignment w.r.t. queries, supports paired-end reads, and is one
+of the fastest short read alignment algorithms to date while also
+visiting suboptimal hits. The second algorithm, BWA-SW, is designed for
+reads longer than 100bp with more errors. It performs a heuristic Smith-Waterman-like
+alignment to find high-scoring local hits and split hits. On
+low-error short queries, BWA-SW is a little slower and less accurate than the
+first algorithm, but on long queries, it is better.
+.PP
+For both algorithms, the database file in the FASTA format must be
+first indexed with the
+.B `index'
+command, which typically takes a few hours for a 3GB genome. The first algorithm is
+implemented via the
+.B `aln'
+command, which finds the suffix array (SA) coordinates of good hits of
+each individual read, and the
+.B `samse/sampe'
+command, which converts SA coordinates to chromosomal coordinate and
+pairs reads (for `sampe'). The second algorithm is invoked by the
+.B `bwasw'
+command. It works for single-end reads only.
+
+.SH COMMANDS AND OPTIONS
+.TP
+.B index
+bwa index [-p prefix] [-a algoType] <in.db.fasta>
+
+Index database sequences in the FASTA format.
+
+.B OPTIONS:
+.RS
+.TP 10
+.B -c
+Build color-space index. The input fast should be in nucleotide space. (Disabled since 0.6.x)
+.TP
+.BI -p \ STR
+Prefix of the output database [same as db filename]
+.TP
+.BI -a \ STR
+Algorithm for constructing BWT index. Available options are:
+.RS
+.TP
+.B is
+IS linear-time algorithm for constructing suffix array. It requires
+5.37N memory where N is the size of the database. IS is moderately fast,
+but does not work with database larger than 2GB. IS is the default
+algorithm due to its simplicity. The current codes for IS algorithm are
+reimplemented by Yuta Mori.
+.TP
+.B bwtsw
+Algorithm implemented in BWT-SW. This method works with the whole human
+genome.
+.RE
+.RE
+
+.TP
+.B aln
+bwa aln [-n maxDiff] [-o maxGapO] [-e maxGapE] [-d nDelTail] [-i
+nIndelEnd] [-k maxSeedDiff] [-l seedLen] [-t nThrds] [-cRN] [-M misMsc]
+[-O gapOsc] [-E gapEsc] [-q trimQual] <in.db.fasta> <in.query.fq> >
+<out.sai>
+
+Find the SA coordinates of the input reads. Maximum
+.I maxSeedDiff
+differences are allowed in the first
+.I seedLen
+subsequence and maximum
+.I maxDiff
+differences are allowed in the whole sequence.
+
+.B OPTIONS:
+.RS
+.TP 10
+.BI -n \ NUM
+Maximum edit distance if the value is INT, or the fraction of missing
+alignments given 2% uniform base error rate if FLOAT. In the latter
+case, the maximum edit distance is automatically chosen for different
+read lengths. [0.04]
+.TP
+.BI -o \ INT
+Maximum number of gap opens [1]
+.TP
+.BI -e \ INT
+Maximum number of gap extensions, -1 for k-difference mode (disallowing
+long gaps) [-1]
+.TP
+.BI -d \ INT
+Disallow a long deletion within INT bp towards the 3'-end [16]
+.TP
+.BI -i \ INT
+Disallow an indel within INT bp towards the ends [5]
+.TP
+.BI -l \ INT
+Take the first INT subsequence as seed. If INT is larger than the query
+sequence, seeding will be disabled. For long reads, this option is
+typically ranged from 25 to 35 for `-k 2'. [inf]
+.TP
+.BI -k \ INT
+Maximum edit distance in the seed [2]
+.TP
+.BI -t \ INT
+Number of threads (multi-threading mode) [1]
+.TP
+.BI -M \ INT
+Mismatch penalty. BWA will not search for suboptimal hits with a score
+lower than (bestScore-misMsc). [3]
+.TP
+.BI -O \ INT
+Gap open penalty [11]
+.TP
+.BI -E \ INT
+Gap extension penalty [4]
+.TP
+.BI -R \ INT
+Proceed with suboptimal alignments if there are no more than INT equally
+best hits. This option only affects paired-end mapping. Increasing this
+threshold helps to improve the pairing accuracy at the cost of speed,
+especially for short reads (~32bp).
+.TP
+.B -c
+Reverse query but not complement it, which is required for alignment in
+the color space. (Disabled since 0.6.x)
+.TP
+.B -N
+Disable iterative search. All hits with no more than
+.I maxDiff
+differences will be found. This mode is much slower than the default.
+.TP
+.BI -q \ INT
+Parameter for read trimming. BWA trims a read down to
+argmax_x{\\sum_{i=x+1}^l(INT-q_i)} if q_l<INT where l is the original
+read length. [0]
+.TP
+.B -I
+The input is in the Illumina 1.3+ read format (quality equals ASCII-64).
+.TP
+.BI -B \ INT
+Length of barcode starting from the 5'-end. When
+.I INT
+is positive, the barcode of each read will be trimmed before mapping and will
+be written at the
+.B BC
+SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]
+.TP
+.B -b
+Specify the input read sequence file is the BAM format. For paired-end
+data, two ends in a pair must be grouped together and options
+.B -1
+or
+.B -2
+are usually applied to specify which end should be mapped. Typical
+command lines for mapping pair-end data in the BAM format are:
+
+    bwa aln ref.fa -b1 reads.bam > 1.sai
+    bwa aln ref.fa -b2 reads.bam > 2.sai
+    bwa sampe ref.fa 1.sai 2.sai reads.bam reads.bam > aln.sam
+.TP
+.B -0
+When
+.B -b
+is specified, only use single-end reads in mapping.
+.TP
+.B -1
+When
+.B -b
+is specified, only use the first read in a read pair in mapping (skip
+single-end reads and the second reads).
+.TP
+.B -2
+When
+.B -b
+is specified, only use the second read in a read pair in mapping.
+.B
+.RE
+
+.TP
+.B samse
+bwa samse [-n maxOcc] <in.db.fasta> <in.sai> <in.fq> > <out.sam>
+
+Generate alignments in the SAM format given single-end reads. Repetitive
+hits will be randomly chosen.
+
+.B OPTIONS:
+.RS
+.TP 10
+.BI -n \ INT
+Maximum number of alignments to output in the XA tag for reads paired
+properly. If a read has more than INT hits, the XA tag will not be
+written. [3]
+.TP
+.BI -r \ STR
+Specify the read group in a format like `@RG\\tID:foo\\tSM:bar'. [null]
+.RE
+
+.TP
+.B sampe
+bwa sampe [-a maxInsSize] [-o maxOcc] [-n maxHitPaired] [-N maxHitDis]
+[-P] <in.db.fasta> <in1.sai> <in2.sai> <in1.fq> <in2.fq> > <out.sam>
+
+Generate alignments in the SAM format given paired-end reads. Repetitive
+read pairs will be placed randomly.
+
+.B OPTIONS:
+.RS
+.TP 8
+.BI -a \ INT
+Maximum insert size for a read pair to be considered being mapped
+properly. Since 0.4.5, this option is only used when there are not
+enough good alignment to infer the distribution of insert sizes. [500]
+.TP
+.BI -o \ INT
+Maximum occurrences of a read for pairing. A read with more occurrneces
+will be treated as a single-end read. Reducing this parameter helps
+faster pairing. [100000]
+.TP
+.B -P
+Load the entire FM-index into memory to reduce disk operations
+(base-space reads only). With this option, at least 1.25N bytes of
+memory are required, where N is the length of the genome.
+.TP
+.BI -n \ INT
+Maximum number of alignments to output in the XA tag for reads paired
+properly. If a read has more than INT hits, the XA tag will not be
+written. [3]
+.TP
+.BI -N \ INT
+Maximum number of alignments to output in the XA tag for disconcordant
+read pairs (excluding singletons). If a read has more than INT hits, the
+XA tag will not be written. [10]
+.TP
+.BI -r \ STR
+Specify the read group in a format like `@RG\\tID:foo\\tSM:bar'. [null]
+.RE
+
+.TP
+.B bwasw
+bwa bwasw [-a matchScore] [-b mmPen] [-q gapOpenPen] [-r gapExtPen] [-t
+nThreads] [-w bandWidth] [-T thres] [-s hspIntv] [-z zBest] [-N
+nHspRev] [-c thresCoef] <in.db.fasta> <in.fq> [mate.fq]
+
+Align query sequences in the
+.I in.fq
+file. When
+.I mate.fq
+is present, perform paired-end alignment. The paired-end mode only works
+for reads Illumina short-insert libraries. In the paired-end mode, BWA-SW
+may still output split alignments but they are all marked as not properly
+paired; the mate positions will not be written if the mate has multiple
+local hits.
+
+.B OPTIONS:
+.RS
+.TP 10
+.BI -a \ INT
+Score of a match [1]
+.TP
+.BI -b \ INT
+Mismatch penalty [3]
+.TP
+.BI -q \ INT
+Gap open penalty [5]
+.TP
+.BI -r \ INT
+Gap extension penalty. The penalty for a contiguous gap of size k is
+q+k*r. [2]
+.TP
+.BI -t \ INT
+Number of threads in the multi-threading mode [1]
+.TP
+.BI -w \ INT
+Band width in the banded alignment [33]
+.TP
+.BI -T \ INT
+Minimum score threshold divided by a [37]
+.TP
+.BI -c \ FLOAT
+Coefficient for threshold adjustment according to query length. Given an
+l-long query, the threshold for a hit to be retained is
+a*max{T,c*log(l)}. [5.5]
+.TP
+.BI -z \ INT
+Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]
+.TP
+.BI -s \ INT
+Maximum SA interval size for initiating a seed. Higher -s increases
+accuracy at the cost of speed. [3]
+.TP
+.BI -N \ INT
+Minimum number of seeds supporting the resultant alignment to skip
+reverse alignment. [5]
+.RE
+
+.SH SAM ALIGNMENT FORMAT
+.PP
+The output of the
+.B `aln'
+command is binary and designed for BWA use only. BWA outputs the final
+alignment in the SAM (Sequence Alignment/Map) format. Each line consists
+of:
+
+.TS
+center box;
+cb | cb | cb
+n | l | l .
+Col	Field	Description
+_
+1	QNAME	Query (pair) NAME
+2	FLAG	bitwise FLAG
+3	RNAME	Reference sequence NAME
+4	POS	1-based leftmost POSition/coordinate of clipped sequence
+5	MAPQ	MAPping Quality (Phred-scaled)
+6	CIAGR	extended CIGAR string
+7	MRNM	Mate Reference sequence NaMe (`=' if same as RNAME)
+8	MPOS	1-based Mate POSistion
+9	ISIZE	Inferred insert SIZE
+10	SEQ	query SEQuence on the same strand as the reference
+11	QUAL	query QUALity (ASCII-33 gives the Phred base quality)
+12	OPT	variable OPTional fields in the format TAG:VTYPE:VALUE
+.TE
+
+.PP
+Each bit in the FLAG field is defined as:
+
+.TS
+center box;
+cb | cb | cb
+c | l | l .
+Chr	Flag	Description
+_
+p	0x0001	the read is paired in sequencing
+P	0x0002	the read is mapped in a proper pair
+u	0x0004	the query sequence itself is unmapped
+U	0x0008	the mate is unmapped
+r	0x0010	strand of the query (1 for reverse)
+R	0x0020	strand of the mate
+1	0x0040	the read is the first read in a pair
+2	0x0080	the read is the second read in a pair
+s	0x0100	the alignment is not primary
+f	0x0200	QC failure
+d	0x0400	optical or PCR duplicate
+.TE
+
+.PP
+The Please check <http://samtools.sourceforge.net> for the format
+specification and the tools for post-processing the alignment.
+
+BWA generates the following optional fields. Tags starting with `X' are
+specific to BWA.
+
+.TS
+center box;
+cb | cb
+cB | l .
+Tag	Meaning
+_
+NM	Edit distance
+MD	Mismatching positions/bases
+AS	Alignment score
+BC	Barcode sequence
+_
+X0	Number of best hits
+X1	Number of suboptimal hits found by BWA
+XN	Number of ambiguous bases in the referenece
+XM	Number of mismatches in the alignment
+XO	Number of gap opens
+XG	Number of gap extentions
+XT	Type: Unique/Repeat/N/Mate-sw
+XA	Alternative hits; format: (chr,pos,CIGAR,NM;)*
+_
+XS	Suboptimal alignment score
+XF	Support from forward/reverse alignment
+XE	Number of supporting seeds
+.TE
+
+.PP
+Note that XO and XG are generated by BWT search while the CIGAR string
+by Smith-Waterman alignment. These two tags may be inconsistent with the
+CIGAR string. This is not a bug.
+
+.SH NOTES ON SHORT-READ ALIGNMENT
+.SS Alignment Accuracy
+.PP
+When seeding is disabled, BWA guarantees to find an alignment
+containing maximum
+.I maxDiff
+differences including
+.I maxGapO
+gap opens which do not occur within
+.I nIndelEnd
+bp towards either end of the query. Longer gaps may be found if
+.I maxGapE
+is positive, but it is not guaranteed to find all hits. When seeding is
+enabled, BWA further requires that the first
+.I seedLen
+subsequence contains no more than
+.I maxSeedDiff
+differences.
+.PP
+When gapped alignment is disabled, BWA is expected to generate the same
+alignment as Eland version 1, the Illumina alignment program. However, as BWA
+change `N' in the database sequence to random nucleotides, hits to these
+random sequences will also be counted. As a consequence, BWA may mark a
+unique hit as a repeat, if the random sequences happen to be identical
+to the sequences which should be unqiue in the database.
+.PP
+By default, if the best hit is not highly repetitive (controlled by -R), BWA
+also finds all hits contains one more mismatch; otherwise, BWA finds all
+equally best hits only. Base quality is NOT considered in evaluating
+hits. In the paired-end mode, BWA pairs all hits it found. It further
+performs Smith-Waterman alignment for unmapped reads to rescue reads with a
+high erro rate, and for high-quality anomalous pairs to fix potential alignment
+errors.
+
+.SS Estimating Insert Size Distribution
+.PP
+BWA estimates the insert size distribution per 256*1024 read pairs. It
+first collects pairs of reads with both ends mapped with a single-end
+quality 20 or higher and then calculates median (Q2), lower and higher
+quartile (Q1 and Q3). It estimates the mean and the variance of the
+insert size distribution from pairs whose insert sizes are within
+interval [Q1-2(Q3-Q1), Q3+2(Q3-Q1)]. The maximum distance x for a pair
+considered to be properly paired (SAM flag 0x2) is calculated by solving
+equation Phi((x-mu)/sigma)=x/L*p0, where mu is the mean, sigma is the
+standard error of the insert size distribution, L is the length of the
+genome, p0 is prior of anomalous pair and Phi() is the standard
+cumulative distribution function. For mapping Illumina short-insert
+reads to the human genome, x is about 6-7 sigma away from the
+mean. Quartiles, mean, variance and x will be printed to the standard
+error output.
+
+.SS Memory Requirement
+.PP
+With bwtsw algorithm, 5GB memory is required for indexing the complete
+human genome sequences. For short reads, the
+.B aln
+command uses ~3.2GB memory and the
+.B sampe
+command uses ~5.4GB.
+
+.SS Speed
+.PP
+Indexing the human genome sequences takes 3 hours with bwtsw
+algorithm. Indexing smaller genomes with IS algorithms is
+faster, but requires more memory.
+.PP
+The speed of alignment is largely determined by the error rate of the query
+sequences (r). Firstly, BWA runs much faster for near perfect hits than
+for hits with many differences, and it stops searching for a hit with
+l+2 differences if a l-difference hit is found. This means BWA will be
+very slow if r is high because in this case BWA has to visit hits with
+many differences and looking for these hits is expensive. Secondly, the
+alignment algorithm behind makes the speed sensitive to [k log(N)/m],
+where k is the maximum allowed differences, N the size of database and m
+the length of a query. In practice, we choose k w.r.t. r and therefore r
+is the leading factor. I would not recommend to use BWA on data with
+r>0.02.
+.PP
+Pairing is slower for shorter reads. This is mainly because shorter
+reads have more spurious hits and converting SA coordinates to
+chromosomal coordinates are very costly.
+
+.SH NOTES ON LONG-READ ALIGNMENT
+.PP
+Command
+.B bwasw
+is designed for long-read alignment. BWA-SW essentially aligns the trie
+of the reference genome against the directed acyclic word graph (DAWG) of a
+read to find seeds not highly repetitive in the genome, and then performs a
+standard Smith-Waterman algorithm to extend the seeds. A key heuristic, called
+the Z-best heuristic, is that at each vertex in the DAWG, BWA-SW only keeps the
+top Z reference suffix intervals that match the vertex. BWA-SW is more accurate
+if the resultant alignment is supported by more seeds, and therefore BWA-SW
+usually performs better on long queries or queries with low divergence to the
+reference genome.
+
+BWA-SW is perhaps a better choice than BWA-short for 100bp single-end HiSeq reads
+mainly because it gives better gapped alignment. For paired-end reads, it is yet
+to know whether BWA-short or BWA-SW yield overall better results.
+
+.SH CHANGES IN BWA-0.6
+.PP
+Since version 0.6, BWA has been able to work with a reference genome longer than 4GB.
+This feature makes it possible to integrate the forward and reverse complemented
+genome in one FM-index, which speeds up both BWA-short and BWA-SW. As a tradeoff,
+BWA uses more memory because it has to keep all positions and ranks in 64-bit
+integers, twice larger than 32-bit integers used in the previous versions.
+
+The latest BWA-SW also works for paired-end reads longer than 100bp. In
+comparison to BWA-short, BWA-SW tends to be more accurate for highly unique
+reads and more robust to relative long INDELs and structural variants.
+Nonetheless, BWA-short usually has higher power to distinguish the optimal hit
+from many suboptimal hits. The choice of the mapping algorithm may depend on
+the application.
+
+.SH SEE ALSO
+BWA website <http://bio-bwa.sourceforge.net>, Samtools website
+<http://samtools.sourceforge.net>
+
+.SH AUTHOR
+Heng Li at the Sanger Institute wrote the key source codes and
+integrated the following codes for BWT construction: bwtsw
+<http://i.cs.hku.hk/~ckwong3/bwtsw/>, implemented by Chi-Kwong Wong at
+the University of Hong Kong and IS
+<http://yuta.256.googlepages.com/sais> originally proposed by Nong Ge
+<http://www.cs.sysu.edu.cn/nong/> at the Sun Yat-Sen University and
+implemented by Yuta Mori.
+
+.SH LICENSE AND CITATION
+.PP
+The full BWA package is distributed under GPLv3 as it uses source codes
+from BWT-SW which is covered by GPL. Sorting, hash table, BWT and IS
+libraries are distributed under the MIT license.
+.PP
+If you use the short-read alignment component, please cite the following
+paper:
+.PP
+Li H. and Durbin R. (2009) Fast and accurate short read alignment with
+Burrows-Wheeler transform. Bioinformatics, 25, 1754-1760. [PMID: 19451168]
+.PP
+If you use the long-read component (BWA-SW), please cite:
+.PP
+Li H. and Durbin R. (2010) Fast and accurate long-read alignment with
+Burrows-Wheeler transform. Bioinformatics, 26, 589-595. [PMID: 20080505]
+
+.SH HISTORY
+BWA is largely influenced by BWT-SW. It uses source codes from BWT-SW
+and mimics its binary file formats; BWA-SW resembles BWT-SW in several
+ways. The initial idea about BWT-based alignment also came from the
+group who developed BWT-SW. At the same time, BWA is different enough
+from BWT-SW. The short-read alignment algorithm bears no similarity to
+Smith-Waterman algorithm any more. While BWA-SW learns from BWT-SW, it
+introduces heuristics that can hardly be applied to the original
+algorithm. In all, BWA does not guarantee to find all local hits as what
+BWT-SW is designed to do, but it is much faster than BWT-SW on both
+short and long query sequences.
+
+I started to write the first piece of codes on 24 May 2008 and got the
+initial stable version on 02 June 2008. During this period, I was
+acquainted that Professor Tak-Wah Lam, the first author of BWT-SW paper,
+was collaborating with Beijing Genomics Institute on SOAP2, the successor
+to SOAP (Short Oligonucleotide Analysis Package). SOAP2 has come out in
+November 2008. According to the SourceForge download page, the third
+BWT-based short read aligner, bowtie, was first released in August
+2008. At the time of writing this manual, at least three more BWT-based
+short-read aligners are being implemented.
+
+The BWA-SW algorithm is a new component of BWA. It was conceived in
+November 2008 and implemented ten months later.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwa.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,272 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include "bwa.h"
+#include "bwt.h"
+#include "bwtgap.h"
+#include "bntseq.h"
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+extern unsigned char nst_nt4_table[256];
+extern void seq_reverse(int len, uint8_t *seq, int is_comp);
+
+bwa_opt_t bwa_def_opt = { 11, 4, -1, 1, 6, 32, 2, 0.04 };
+
+struct bwa_idx_t {
+	bwt_t *bwt;
+	bntseq_t *bns;
+	uint8_t *pac;
+};
+
+struct bwa_buf_t {
+	int max_buf;
+	bwa_pestat_t pes;
+	gap_stack_t *stack;
+	gap_opt_t *opt;
+	int *diff_tab;
+	uint8_t *buf;
+	int *logn;
+};
+
+bwa_idx_t *bwa_idx_load(const char *prefix)
+{
+	bwa_idx_t *p;
+	int l;
+	char *str;
+	l = strlen(prefix);
+	p = calloc(1, sizeof(bwa_idx_t));
+	str = malloc(l + 10);
+	strcpy(str, prefix);
+	p->bns = bns_restore(str);
+	strcpy(str + l, ".bwt");
+	p->bwt = bwt_restore_bwt(str);
+	str[l] = 0;
+	strcpy(str + l, ".sa");
+	bwt_restore_sa(str, p->bwt);
+	free(str);
+	p->pac = calloc(p->bns->l_pac/4+1, 1);
+	fread(p->pac, 1, p->bns->l_pac/4+1, p->bns->fp_pac);
+	fclose(p->bns->fp_pac);
+	p->bns->fp_pac = 0;
+	return p;
+}
+
+void bwa_idx_destroy(bwa_idx_t *p)
+{
+	bns_destroy(p->bns);
+	bwt_destroy(p->bwt);
+	free(p->pac);
+	free(p);
+}
+
+bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score)
+{
+	extern gap_opt_t *gap_init_opt(void);
+	extern int bwa_cal_maxdiff(int l, double err, double thres);
+	int i;
+	bwa_buf_t *p;
+	p = malloc(sizeof(bwa_buf_t));
+	p->stack = gap_init_stack2(max_score);
+	p->opt = gap_init_opt();
+	p->opt->s_gapo = opt->s_gapo;
+	p->opt->s_gape = opt->s_gape;
+	p->opt->max_diff = opt->max_diff;
+	p->opt->max_gapo = opt->max_gapo;
+	p->opt->max_gape = opt->max_gape;
+	p->opt->seed_len = opt->seed_len;
+	p->opt->max_seed_diff = opt->max_seed_diff;
+	p->opt->fnr = opt->fnr;
+	p->diff_tab = calloc(BWA_MAX_QUERY_LEN, sizeof(int));
+	for (i = 1; i < BWA_MAX_QUERY_LEN; ++i)
+		p->diff_tab[i] = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);
+	p->logn = calloc(256, sizeof(int));
+	for (i = 1; i != 256; ++i)
+		p->logn[i] = (int)(4.343 * log(i) + 0.499);
+	return p;
+}
+
+void bwa_buf_destroy(bwa_buf_t *p)
+{
+	gap_destroy_stack(p->stack);
+	free(p->diff_tab); free(p->logn); free(p->opt);
+	free(p);
+}
+
+bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq)
+{
+	extern int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width);
+	int i, seq_len, buf_len;
+	bwt_width_t *w, *seed_w;
+	uint8_t *s;
+	gap_opt_t opt2 = *buf->opt;
+	bwa_sai_t sai;
+
+	seq_len = strlen(seq);
+	// estimate the buffer length
+	buf_len = (buf->opt->seed_len + seq_len + 1) * sizeof(bwt_width_t) + seq_len;
+	if (buf_len > buf->max_buf) {
+		buf->max_buf = buf_len;
+		kroundup32(buf->max_buf);
+		buf->buf = realloc(buf->buf, buf->max_buf);
+	}
+	memset(buf->buf, 0, buf_len);
+	seed_w = (bwt_width_t*)buf->buf;
+	w = seed_w + buf->opt->seed_len;
+	s = (uint8_t*)(w + seq_len + 1);
+	if (opt2.fnr > 0.) opt2.max_diff = buf->diff_tab[seq_len];
+	// copy the sequence
+	for (i = 0; i < seq_len; ++i)
+		s[i] = nst_nt4_table[(int)seq[i]];
+	seq_reverse(seq_len, s, 0);
+	// mapping
+	bwt_cal_width(idx->bwt, seq_len, s, w);
+	if (opt2.seed_len >= seq_len) opt2.seed_len = 0x7fffffff;
+	if (seq_len > buf->opt->seed_len)
+		bwt_cal_width(idx->bwt, buf->opt->seed_len, s + (seq_len - buf->opt->seed_len), seed_w);
+	for (i = 0; i < seq_len; ++i) // complement; I forgot why...
+		s[i] = s[i] > 3? 4 : 3 - s[i];
+	sai.sai = (bwa_sai1_t*)bwt_match_gap(idx->bwt, seq_len, s, w, seq_len <= buf->opt->seed_len? 0 : seed_w, &opt2, &sai.n, buf->stack);
+	return sai;
+}
+
+static void compute_NM(const uint8_t *pac, uint64_t l_pac, uint8_t *seq, int64_t pos, int n_cigar, uint32_t *cigar, int *n_mm, int *n_gaps)
+{
+	uint64_t x = pos, z;
+	int k, y = 0;
+	*n_mm = *n_gaps = 0;
+	for (k = 0; k < n_cigar; ++k) {
+		int l = cigar[k]>>4;
+		int op = cigar[k]&0xf;
+		if (op == 0) { // match/mismatch
+			for (z = 0; z < l && x + z < l_pac; ++z) {
+				int c = pac[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
+				if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) ++(*n_mm);
+			}
+		}
+		if (op == 1 || op == 2) (*n_gaps) += l;
+		if (op == 0 || op == 2) x += l;
+		if (op == 0 || op == 1 || op == 4) y += l;
+	}
+}
+
+void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln)
+{
+	extern bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand);
+	extern bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const uint8_t *seq, bwtint_t *_pos, int ext, int *n_cigar, int is_end_correct);
+	int strand, seq_len, i, n_gap, n_mm;
+	uint64_t pos3, pac_pos;
+	uint8_t *s[2];
+
+	memset(aln, 0, sizeof(bwa_aln_t));
+	seq_len = strlen(seq);
+	if (seq_len<<1 > buf->max_buf) {
+		buf->max_buf = seq_len<<1;
+		kroundup32(buf->max_buf);
+		buf->buf = realloc(buf->buf, buf->max_buf);
+	}
+	s[0] = buf->buf;
+	s[1] = s[0] + seq_len;
+	for (i = 0; i < seq_len; ++i)
+		s[0][i] = s[1][i] = nst_nt4_table[(int)seq[i]];
+	seq_reverse(seq_len, s[1], 1);
+	pac_pos = bwa_sa2pos(idx->bns, idx->bwt, sa, seq_len, &strand);
+	if (strand) aln->flag |= 16;
+	if (n_gaps) { // only for gapped alignment
+		int n_cigar;
+		bwa_cigar_t *cigar16;
+		cigar16 = bwa_refine_gapped_core(idx->bns->l_pac, idx->pac, seq_len, s[strand], &pac_pos, strand? n_gaps : -n_gaps, &n_cigar, 1);
+		aln->n_cigar = n_cigar;
+		aln->cigar = malloc(n_cigar * 4);
+		for (i = 0, pos3 = pac_pos; i < n_cigar; ++i) {
+			int op = cigar16[i]>>14;
+			int len = cigar16[i]&0x3fff;
+			if (op == 3) op = 4; // the 16-bit CIGAR is different from the 32-bit CIGAR
+			aln->cigar[i] = len<<4 | op;
+			if (op == 0 || op == 2) pos3 += len;
+		}
+		free(cigar16);
+	} else { // ungapped
+		aln->n_cigar = 1;
+		aln->cigar = malloc(4);
+		aln->cigar[0] = seq_len<<4 | 0;
+		pos3 = pac_pos + seq_len;
+	}
+	aln->n_n = bns_cnt_ambi(idx->bns, pac_pos, pos3 - pac_pos, &aln->ref_id);
+	aln->offset = pac_pos - idx->bns->anns[aln->ref_id].offset;
+	if (pos3 - idx->bns->anns[aln->ref_id].offset > idx->bns->anns[aln->ref_id].len) // read mapped beyond the end of a sequence
+		aln->flag |= 4; // read unmapped
+	compute_NM(idx->pac, idx->bns->l_pac, s[strand], pac_pos, aln->n_cigar, aln->cigar, &n_mm, &n_gap);
+	aln->n_mm = n_mm;
+	aln->n_gap = n_gap;
+}
+
+/************************
+ * Single-end alignment *
+ ************************/
+
+bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar)
+{
+	bwa_one_t *one;
+	int best, cnt, i, seq_len;
+
+	seq_len = strlen(seq);
+	one = calloc(1, sizeof(bwa_one_t));
+	one->sai = bwa_sai(idx, buf, seq);
+	if (one->sai.n == 0) return one;
+	// count number of hits; randomly select one alignment
+	best = one->sai.sai[0].score;
+	for (i = cnt = 0; i < one->sai.n; ++i) {
+		bwa_sai1_t *p = &one->sai.sai[i];
+		if (p->score > best) break;
+		if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {
+			one->which = p;
+			one->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
+		}
+		cnt += p->l - p->k + 1;
+	}
+	one->c1 = cnt;
+	for (; i < one->sai.n; ++i)
+		cnt += one->sai.sai[i].l - one->sai.sai[i].k + 1;
+	one->c2 = cnt - one->c1;
+	// estimate single-end mapping quality
+	one->mapQs = -1;
+	if (one->c1 == 0) one->mapQs = 23; // FIXME: is it possible?
+	else if (one->c1 > 1) one->mapQs = 0;
+	else {
+		int diff = one->which->n_mm + one->which->n_gapo + one->which->n_gape;
+		if (diff >= buf->diff_tab[seq_len]) one->mapQs = 25;
+		else if (one->c2 == 0) one->mapQs = 37;
+	}
+	if (one->mapQs < 0) {
+		cnt = (one->c2 >= 255)? 255 : one->c2;
+		one->mapQs = 23 < buf->logn[cnt]? 0 : 23 - buf->logn[cnt];
+	}
+	one->mapQ = one->mapQs;
+	// compute CIGAR on request
+	one->one.ref_id = -1;
+	if (gen_cigar) bwa_sa2aln(idx, buf, seq, one->sa, one->which->n_gapo + one->which->n_gape, &one->one);
+	return one;
+}
+
+void bwa_one_destroy(bwa_one_t *one)
+{
+	free(one->sai.sai);
+	free(one->one.cigar);
+	free(one);
+}
+
+/************************
+ * Paired-end alignment *
+ ************************/
+
+void bwa_pestat(bwa_buf_t *buf, int n, bwa_one_t **o[2])
+{
+}
+
+void bwa_pe(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq[2], bwa_one_t *o[2])
+{
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwa.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,107 @@
+#ifndef BWA_H_
+#define BWA_H_
+
+#include <stdint.h>
+
+#define BWA_DEF_MAX_SCORE 2048
+#define BWA_MAX_QUERY_LEN 1024
+
+// BWA index
+struct bwa_idx_t;
+typedef struct bwa_idx_t bwa_idx_t;
+
+// Buffer for BWA alignment
+struct bwa_buf_t;
+typedef struct bwa_buf_t bwa_buf_t;
+
+// BWA alignment options
+typedef struct {
+	int s_gapo, s_gape;               // gap open and extension penalties; the mismatch penalty is fixed at 3
+	int max_diff, max_gapo, max_gape; // max differences (-1 to use fnr for length-adjusted max diff), gap opens and gap extensions
+	int seed_len, max_seed_diff;      // seed length and max differences allowed in the seed
+	float fnr;                        // parameter for automatic length-adjusted max differences
+} bwa_opt_t;
+
+// default BWA alignment options
+extern bwa_opt_t bwa_def_opt; // = { 11, 4, -1, 1, 6, 32, 2, 0.04 }
+
+// an interval hit in the SA coordinate; basic unit in .sai files
+typedef struct {
+	uint32_t n_mm:16, n_gapo:8, n_gape:8;
+	int score;
+	uint64_t k, l; // [k,l] is the SA interval; each interval has l-k+1 hits
+} bwa_sai1_t;
+
+// all interval hits in the SA coordinate
+typedef struct {
+	int n; // number of interval hits
+	bwa_sai1_t *sai;
+} bwa_sai_t;
+
+// an alignment
+typedef struct {
+	uint32_t n_n:8, n_gap:12, n_mm:12; // number of ambiguous bases, gaps and mismatches in the alignment
+	int32_t ref_id;                    // referece sequence index (the first seq is indexed by 0)
+	uint32_t offset;                   // coordinate on the reference; zero-based
+	uint32_t n_cigar:16, flag:16;      // number of CIGAR operations; SAM flag
+	uint32_t *cigar;                   // CIGAR in the BAM 28+4 encoding; having n_cigar operations
+} bwa_aln_t;
+
+typedef struct {
+	int mapQs, mapQ, c1, c2;
+	uint64_t sa;
+	bwa_sai1_t *which;
+	bwa_sai_t sai;
+	bwa_aln_t one;
+} bwa_one_t;
+
+typedef struct {
+	double avg, std, ap_prior;
+	uint64_t low, high, high_bayesian;
+} bwa_pestat_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	// load a BWA index
+	bwa_idx_t *bwa_idx_load(const char *prefix);
+	void bwa_idx_destroy(bwa_idx_t *p);
+
+	// allocate a BWA alignment buffer; if unsure, set opt to &bwa_def_opt and max_score to BWA_DEF_MAX_SCORE
+	bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score);
+	void bwa_buf_destroy(bwa_buf_t *p);
+
+	/**
+	 * Find all the SA intervals
+	 *
+	 * @param idx    BWA index; multiple threads can share the same index
+	 * @param buf    BWA alignment buffer; each thread should have its own buffer
+	 * @param seq    NULL terminated C string, consisting of A/C/G/T/N only
+	 *
+	 * @return       SA intervals seq is matched to
+	 */
+	bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq);
+
+	/**
+	 * Construct an alignment in the base-pair coordinate
+	 *
+	 * @param idx     BWA index
+	 * @param buf     BWA alignment buffer
+	 * @param seq     NULL terinated C string
+	 * @param sa      Suffix array value
+	 * @param n_gaps  Number of gaps (typically equal to bwa_sai1_t::n_gapo + bwa_sai1_t::n_gape
+	 *
+	 * @return        An alignment
+	 */
+	void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln);
+
+	bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar);
+
+	void bwa_one_destroy(bwa_one_t *one);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwape.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,824 @@
+#include <unistd.h>
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+#include "bwtaln.h"
+#include "kvec.h"
+#include "bntseq.h"
+#include "utils.h"
+#include "stdaln.h"
+#include "bwase.h"
+
+typedef struct {
+	int n;
+	bwtint_t *a;
+} poslist_t;
+
+typedef struct {
+	double avg, std, ap_prior;
+	bwtint_t low, high, high_bayesian;
+} isize_info_t;
+
+typedef struct {
+	uint64_t x, y;
+} b128_t;
+
+#define b128_lt(a, b) ((a).x < (b).x)
+#define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y)
+#define b128_hash(a) ((uint32_t)(a).x)
+
+#include "khash.h"
+KHASH_INIT(b128, b128_t, poslist_t, 1, b128_hash, b128_eq)
+
+#include "ksort.h"
+KSORT_INIT(b128, b128_t, b128_lt)
+KSORT_INIT_GENERIC(uint64_t)
+
+typedef struct {
+	kvec_t(b128_t) arr;
+	kvec_t(b128_t) pos[2];
+	kvec_t(bwt_aln1_t) aln[2];
+} pe_data_t;
+
+#define MIN_HASH_WIDTH 1000
+
+extern int g_log_n[256]; // in bwase.c
+static kh_b128_t *g_hash;
+
+void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi);
+void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
+int bwa_approx_mapQ(const bwa_seq_t *p, int mm);
+void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2);
+bntseq_t *bwa_open_nt(const char *prefix);
+void bwa_print_sam_SQ(const bntseq_t *bns);
+void bwa_print_sam_PG();
+
+pe_opt_t *bwa_init_pe_opt()
+{
+	pe_opt_t *po;
+	po = (pe_opt_t*)calloc(1, sizeof(pe_opt_t));
+	po->max_isize = 500;
+	po->force_isize = 0;
+	po->max_occ = 100000;
+	po->n_multi = 3;
+	po->N_multi = 10;
+	po->type = BWA_PET_STD;
+	po->is_sw = 1;
+	po->ap_prior = 1e-5;
+	return po;
+}
+
+static inline uint64_t hash_64(uint64_t key)
+{
+	key += ~(key << 32);
+	key ^= (key >> 22);
+	key += ~(key << 13);
+	key ^= (key >> 8);
+	key += (key << 3);
+	key ^= (key >> 15);
+	key += ~(key << 27);
+	key ^= (key >> 31);
+	return key;
+}
+/*
+static double ierfc(double x) // inverse erfc(); iphi(x) = M_SQRT2 *ierfc(2 * x);
+{
+	const double a = 0.140012;
+	double b, c;
+	b = log(x * (2 - x));
+	c = 2./M_PI/a + b / 2.;
+	return sqrt(sqrt(c * c - b / a) - c);
+}
+*/
+
+// for normal distribution, this is about 3std
+#define OUTLIER_BOUND 2.0
+
+static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double ap_prior, int64_t L)
+{
+	uint64_t x, *isizes, n_ap = 0;
+	int n, i, tot, p25, p75, p50, max_len = 1, tmp;
+	double skewness = 0.0, kurtosis = 0.0, y;
+
+	ii->avg = ii->std = -1.0;
+	ii->low = ii->high = ii->high_bayesian = 0;
+	isizes = (uint64_t*)calloc(n_seqs, 8);
+	for (i = 0, tot = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p[2];
+		p[0] = seqs[0] + i; p[1] = seqs[1] + i;
+		if (p[0]->mapQ >= 20 && p[1]->mapQ >= 20) {
+			x = (p[0]->pos < p[1]->pos)? p[1]->pos + p[1]->len - p[0]->pos : p[0]->pos + p[0]->len - p[1]->pos;
+			if (x < 100000) isizes[tot++] = x;
+		}
+		if (p[0]->len > max_len) max_len = p[0]->len;
+		if (p[1]->len > max_len) max_len = p[1]->len;
+	}
+	if (tot < 20) {
+		fprintf(stderr, "[infer_isize] fail to infer insert size: too few good pairs\n");
+		free(isizes);
+		return -1;
+	}
+	ks_introsort(uint64_t, tot, isizes);
+	p25 = isizes[(int)(tot*0.25 + 0.5)];
+	p50 = isizes[(int)(tot*0.50 + 0.5)];
+	p75 = isizes[(int)(tot*0.75 + 0.5)];
+	tmp  = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
+	ii->low = tmp > max_len? tmp : max_len; // ii->low is unsigned
+	ii->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
+	for (i = 0, x = n = 0; i < tot; ++i)
+		if (isizes[i] >= ii->low && isizes[i] <= ii->high)
+			++n, x += isizes[i];
+	ii->avg = (double)x / n;
+	for (i = 0; i < tot; ++i) {
+		if (isizes[i] >= ii->low && isizes[i] <= ii->high) {
+			double tmp = (isizes[i] - ii->avg) * (isizes[i] - ii->avg);
+			ii->std += tmp;
+			skewness += tmp * (isizes[i] - ii->avg);
+			kurtosis += tmp * tmp;
+		}
+	}
+	kurtosis = kurtosis/n / (ii->std / n * ii->std / n) - 3;
+	ii->std = sqrt(ii->std / n); // it would be better as n-1, but n is usually very large
+	skewness = skewness / n / (ii->std * ii->std * ii->std);
+	for (y = 1.0; y < 10.0; y += 0.01)
+		if (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) break;
+	ii->high_bayesian = (bwtint_t)(y * ii->std + ii->avg + .499);
+	for (i = 0; i < tot; ++i)
+		if (isizes[i] > ii->high_bayesian) ++n_ap;
+	ii->ap_prior = .01 * (n_ap + .01) / tot;
+	if (ii->ap_prior < ap_prior) ii->ap_prior = ap_prior;
+	free(isizes);
+	fprintf(stderr, "[infer_isize] (25, 50, 75) percentile: (%d, %d, %d)\n", p25, p50, p75);
+	if (isnan(ii->std) || p75 > 100000) {
+		ii->low = ii->high = ii->high_bayesian = 0; ii->avg = ii->std = -1.0;
+		fprintf(stderr, "[infer_isize] fail to infer insert size: weird pairing\n");
+		return -1;
+	}
+	for (y = 1.0; y < 10.0; y += 0.01)
+		if (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) break;
+	ii->high_bayesian = (bwtint_t)(y * ii->std + ii->avg + .499);
+	fprintf(stderr, "[infer_isize] low and high boundaries: %ld and %ld for estimating avg and std\n", (long)ii->low, (long)ii->high);
+	fprintf(stderr, "[infer_isize] inferred external isize from %d pairs: %.3lf +/- %.3lf\n", n, ii->avg, ii->std);
+	fprintf(stderr, "[infer_isize] skewness: %.3lf; kurtosis: %.3lf; ap_prior: %.2e\n", skewness, kurtosis, ii->ap_prior);
+	fprintf(stderr, "[infer_isize] inferred maximum insert size: %ld (%.2lf sigma)\n", (long)ii->high_bayesian, y);
+	return 0;
+}
+
+static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, const isize_info_t *ii)
+{
+	int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len;
+	uint64_t o_score, subo_score;
+	b128_t last_pos[2][2], o_pos[2];
+	max_len = p[0]->full_len;
+	if (max_len < p[1]->full_len) max_len = p[1]->full_len;
+	if (low_bound < max_len) low_bound = max_len;
+
+	// here v>=u. When ii is set, we check insert size with ii; otherwise with opt->max_isize
+#define __pairing_aux(u,v) do { \
+		bwtint_t l = (v).x + p[(v).y&1]->len - ((u).x); \
+		if ((u).x != (uint64_t)-1 && (v).x > (u).x && l >= max_len \
+			&& ((ii->high && l <= ii->high_bayesian) || (ii->high == 0 && l <= opt->max_isize))) \
+		{ \
+			uint64_t s = d->aln[(v).y&1].a[(v).y>>2].score + d->aln[(u).y&1].a[(u).y>>2].score; \
+			s *= 10; \
+			if (ii->high) s += (int)(-4.343 * log(.5 * erfc(M_SQRT1_2 * fabs(l - ii->avg) / ii->std)) + .499); \
+			s = s<<32 | (uint32_t)hash_64((u).x<<32 | (v).x); \
+			if (s>>32 == o_score>>32) ++o_n; \
+			else if (s>>32 < o_score>>32) { subo_n += o_n; o_n = 1; } \
+			else ++subo_n; \
+			if (s < o_score) subo_score = o_score, o_score = s, o_pos[(u).y&1] = (u), o_pos[(v).y&1] = (v); \
+			else if (s < subo_score) subo_score = s; \
+		} \
+	} while (0)
+
+#define __pairing_aux2(q, w) do { \
+		const bwt_aln1_t *r = d->aln[(w).y&1].a + ((w).y>>2); \
+		(q)->extra_flag |= SAM_FPP; \
+		if ((q)->pos != (w).x || (q)->strand != ((w).y>>1&1)) { \
+			(q)->n_mm = r->n_mm; (q)->n_gapo = r->n_gapo; (q)->n_gape = r->n_gape; (q)->strand = (w).y>>1&1; \
+			(q)->score = r->score; \
+			(q)->pos = (w).x; \
+			if ((q)->mapQ > 0) ++cnt_chg; \
+		} \
+	} while (0)
+
+	o_score = subo_score = (uint64_t)-1;
+	o_n = subo_n = 0;
+	ks_introsort(b128, d->arr.n, d->arr.a);
+	for (j = 0; j < 2; ++j) last_pos[j][0].x = last_pos[j][0].y = last_pos[j][1].x = last_pos[j][1].y = (uint64_t)-1;
+	if (opt->type == BWA_PET_STD) {
+		for (i = 0; i < d->arr.n; ++i) {
+			b128_t x = d->arr.a[i];
+			int strand = x.y>>1&1;
+			if (strand == 1) { // reverse strand, then check
+				int y = 1 - (x.y&1);
+				__pairing_aux(last_pos[y][1], x);
+				__pairing_aux(last_pos[y][0], x);
+			} else { // forward strand, then push
+				last_pos[x.y&1][0] = last_pos[x.y&1][1];
+				last_pos[x.y&1][1] = x;
+			}
+		}
+	} else if (opt->type == BWA_PET_SOLID) {
+		for (i = 0; i < d->arr.n; ++i) {
+			b128_t x = d->arr.a[i];
+			int strand = x.y>>1&1;
+			if ((strand^x.y)&1) { // push
+				int y = 1 - (x.y&1);
+				__pairing_aux(last_pos[y][1], x);
+				__pairing_aux(last_pos[y][0], x);
+			} else { // check
+				last_pos[x.y&1][0] = last_pos[x.y&1][1];
+				last_pos[x.y&1][1] = x;
+			}
+		}
+	} else {
+		fprintf(stderr, "[paring] not implemented yet!\n");
+		exit(1);
+	}
+	// set pairing
+	//fprintf(stderr, "[%ld, %d, %d, %d]\n", d->arr.n, (int)(o_score>>32), (int)(subo_score>>32), o_n);
+	if (o_score != (uint64_t)-1) {
+		int mapQ_p = 0; // this is the maximum mapping quality when one end is moved
+		//fprintf(stderr, "%d, %d\n", o_n, subo_n);
+		if (o_n == 1) {
+			if (subo_score == (uint64_t)-1) mapQ_p = 29; // no sub-optimal pair
+			else if ((subo_score>>32) - (o_score>>32) > s_mm * 10) mapQ_p = 23; // poor sub-optimal pair
+			else {
+				int n = subo_n > 255? 255 : subo_n;
+				mapQ_p = ((subo_score>>32) - (o_score>>32)) / 2 - g_log_n[n];
+				if (mapQ_p < 0) mapQ_p = 0;
+			}
+		}
+		if ((p[0]->pos == o_pos[0].x && p[0]->strand == (o_pos[0].y>>1&1)) && (p[1]->pos == o_pos[1].x && p[1]->strand == (o_pos[1].y>>1&1))) { // both ends not moved
+			if (p[0]->mapQ > 0 && p[1]->mapQ > 0) {
+				int mapQ = p[0]->mapQ + p[1]->mapQ;
+				if (mapQ > 60) mapQ = 60;
+				p[0]->mapQ = p[1]->mapQ = mapQ;
+			} else {
+				if (p[0]->mapQ == 0) p[0]->mapQ = (mapQ_p + 7 < p[1]->mapQ)? mapQ_p + 7 : p[1]->mapQ;
+				if (p[1]->mapQ == 0) p[1]->mapQ = (mapQ_p + 7 < p[0]->mapQ)? mapQ_p + 7 : p[0]->mapQ;
+			}
+		} else if (p[0]->pos == o_pos[0].x && p[0]->strand == (o_pos[0].y>>1&1)) { // [1] moved
+			p[1]->seQ = 0; p[1]->mapQ = p[0]->mapQ;
+			if (p[1]->mapQ > mapQ_p) p[1]->mapQ = mapQ_p;
+		} else if (p[1]->pos == o_pos[1].x && p[1]->strand == (o_pos[1].y>>1&1)) { // [0] moved
+			p[0]->seQ = 0; p[0]->mapQ = p[1]->mapQ;
+			if (p[0]->mapQ > mapQ_p) p[0]->mapQ = mapQ_p;
+		} else { // both ends moved
+			p[0]->seQ = p[1]->seQ = 0;
+			mapQ_p -= 20;
+			if (mapQ_p < 0) mapQ_p = 0;
+			p[0]->mapQ = p[1]->mapQ = mapQ_p;
+		}
+		__pairing_aux2(p[0], o_pos[0]);
+		__pairing_aux2(p[1], o_pos[1]);
+	}
+	return cnt_chg;
+}
+
+typedef struct {
+	kvec_t(bwt_aln1_t) aln;
+} aln_buf_t;
+
+int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bwt, int n_seqs, bwa_seq_t *seqs[2], FILE *fp_sa[2], isize_info_t *ii,
+					   const pe_opt_t *opt, const gap_opt_t *gopt, const isize_info_t *last_ii)
+{
+	int i, j, cnt_chg = 0;
+	char str[1024];
+	bwt_t *bwt;
+	pe_data_t *d;
+	aln_buf_t *buf[2];
+
+	d = (pe_data_t*)calloc(1, sizeof(pe_data_t));
+	buf[0] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t));
+	buf[1] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t));
+
+	if (_bwt == 0) { // load forward SA
+		strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
+		strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
+	} else bwt = _bwt;
+
+	// SE
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p[2];
+		for (j = 0; j < 2; ++j) {
+			int n_aln;
+			p[j] = seqs[j] + i;
+			p[j]->n_multi = 0;
+			p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2);
+			fread(&n_aln, 4, 1, fp_sa[j]);
+			if (n_aln > kv_max(d->aln[j]))
+				kv_resize(bwt_aln1_t, d->aln[j], n_aln);
+			d->aln[j].n = n_aln;
+			fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]);
+			kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j]
+			// generate SE alignment and mapping quality
+			bwa_aln2seq(n_aln, d->aln[j].a, p[j]);
+			if (p[j]->type == BWA_TYPE_UNIQUE || p[j]->type == BWA_TYPE_REPEAT) {
+				int strand;
+				int max_diff = gopt->fnr > 0.0? bwa_cal_maxdiff(p[j]->len, BWA_AVG_ERR, gopt->fnr) : gopt->max_diff;
+				p[j]->seQ = p[j]->mapQ = bwa_approx_mapQ(p[j], max_diff);
+				p[j]->pos = bwa_sa2pos(bns, bwt, p[j]->sa, p[j]->len, &strand);
+				p[j]->strand = strand;
+			}
+		}
+	}
+
+	// infer isize
+	infer_isize(n_seqs, seqs, ii, opt->ap_prior, bwt->seq_len/2);
+	if (ii->avg < 0.0 && last_ii->avg > 0.0) *ii = *last_ii;
+	if (opt->force_isize) {
+		fprintf(stderr, "[%s] discard insert size estimate as user's request.\n", __func__);
+		ii->low = ii->high = 0; ii->avg = ii->std = -1.0;
+	}
+
+	// PE
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p[2];
+		for (j = 0; j < 2; ++j) {
+			p[j] = seqs[j] + i;
+			kv_copy(bwt_aln1_t, d->aln[j], buf[j][i].aln);
+		}
+		if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT)
+			&& (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT))
+		{ // only when both ends mapped
+			b128_t x;
+			int j, k;
+			long long n_occ[2];
+			for (j = 0; j < 2; ++j) {
+				n_occ[j] = 0;
+				for (k = 0; k < d->aln[j].n; ++k)
+					n_occ[j] += d->aln[j].a[k].l - d->aln[j].a[k].k + 1;
+			}
+			if (n_occ[0] > opt->max_occ || n_occ[1] > opt->max_occ) continue;
+			d->arr.n = 0;
+			for (j = 0; j < 2; ++j) {
+				for (k = 0; k < d->aln[j].n; ++k) {
+					bwt_aln1_t *r = d->aln[j].a + k;
+					bwtint_t l;
+					if (0 && r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table
+						b128_t key;
+						int ret;
+						key.x = r->k; key.y = r->l;
+						khint_t iter = kh_put(b128, g_hash, key, &ret);
+						if (ret) { // not in the hash table; ret must equal 1 as we never remove elements
+							poslist_t *z = &kh_val(g_hash, iter);
+							z->n = r->l - r->k + 1;
+							z->a = (bwtint_t*)malloc(sizeof(bwtint_t) * z->n);
+							for (l = r->k; l <= r->l; ++l) {
+								int strand;
+								z->a[l - r->k] = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand)<<1;
+								z->a[l - r->k] |= strand;
+							}
+						}
+						for (l = 0; l < kh_val(g_hash, iter).n; ++l) {
+							x.x = kh_val(g_hash, iter).a[l]>>1;
+							x.y = k<<2 | (kh_val(g_hash, iter).a[l]&1)<<1 | j;
+							kv_push(b128_t, d->arr, x);
+						}
+					} else { // then calculate on the fly
+						for (l = r->k; l <= r->l; ++l) {
+							int strand;
+							x.x = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand);
+							x.y = k<<2 | strand<<1 | j;
+							kv_push(b128_t, d->arr, x);
+						}
+					}
+				}
+			}
+			cnt_chg += pairing(p, d, opt, gopt->s_mm, ii);
+		}
+
+		if (opt->N_multi || opt->n_multi) {
+			for (j = 0; j < 2; ++j) {
+				if (p[j]->type != BWA_TYPE_NO_MATCH) {
+					int k, n_multi;
+					if (!(p[j]->extra_flag&SAM_FPP) && p[1-j]->type != BWA_TYPE_NO_MATCH) {
+						bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, p[j]->c1+p[j]->c2-1 > opt->N_multi? opt->n_multi : opt->N_multi);
+					} else bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, opt->n_multi);
+					for (k = 0, n_multi = 0; k < p[j]->n_multi; ++k) {
+						int strand;
+						bwt_multi1_t *q = p[j]->multi + k;
+						q->pos = bwa_sa2pos(bns, bwt, q->pos, p[j]->len, &strand);
+						q->strand = strand;
+						if (q->pos != p[j]->pos)
+							p[j]->multi[n_multi++] = *q;
+					}
+					p[j]->n_multi = n_multi;
+				}
+			}
+		}
+	}
+
+	// free
+	for (i = 0; i < n_seqs; ++i) {
+		kv_destroy(buf[0][i].aln);
+		kv_destroy(buf[1][i].aln);
+	}
+	free(buf[0]); free(buf[1]);
+	if (_bwt == 0) bwt_destroy(bwt);
+	kv_destroy(d->arr);
+	kv_destroy(d->pos[0]); kv_destroy(d->pos[1]);
+	kv_destroy(d->aln[0]); kv_destroy(d->aln[1]);
+	free(d);
+	return cnt_chg;
+}
+
+#define SW_MIN_MATCH_LEN 20
+#define SW_MIN_MAPQ 17
+
+// cnt = n_mm<<16 | n_gapo<<8 | n_gape
+bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
+					  int *n_cigar, uint32_t *_cnt)
+{
+	bwa_cigar_t *cigar = 0;
+	ubyte_t *ref_seq;
+	bwtint_t k, x, y, l;
+	int path_len, ret, subo;
+	AlnParam ap = aln_param_bwa;
+	path_t *path, *p;
+
+	// check whether there are too many N's
+	if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
+	for (k = 0, x = 0; k < len; ++k)
+		if (seq[k] >= 4) ++x;
+	if ((float)x/len >= 0.25 || len - x < SW_MIN_MATCH_LEN) return 0;
+
+	// get reference subsequence
+	ref_seq = (ubyte_t*)calloc(reglen, 1);
+	for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
+		ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
+	path = (path_t*)calloc(l+len, sizeof(path_t));
+
+	// do alignment
+	ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo);
+	if (ret < 0 || subo == ret) { // no hit or tandem hits
+		free(path); free(cigar); free(ref_seq); *n_cigar = 0;
+		return 0;
+	}
+	cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
+
+	// check whether the alignment is good enough
+	for (k = 0, x = y = 0; k < *n_cigar; ++k) {
+		bwa_cigar_t c = cigar[k];
+		if (__cigar_op(c) == FROM_M) x += __cigar_len(c), y += __cigar_len(c);
+		else if (__cigar_op(c) == FROM_D) x += __cigar_len(c);
+		else y += __cigar_len(c);
+	}
+	if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
+		free(path); free(cigar); free(ref_seq);
+		*n_cigar = 0;
+		return 0;
+	}
+
+	{ // update cigar and coordinate;
+		int start, end;
+		p = path + path_len - 1;
+		*beg += (p->i? p->i : 1) - 1;
+		start = (p->j? p->j : 1) - 1;
+		end = path->j;
+		cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
+		if (start) {
+			memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
+			cigar[0] = __cigar_create(3, start);
+			++(*n_cigar);
+		}
+		if (end < len) {
+			/*cigar[*n_cigar] = 3<<14 | (len - end);*/
+			cigar[*n_cigar] = __cigar_create(3, (len - end));
+			++(*n_cigar);
+		}
+	}
+
+	{ // set *cnt
+		int n_mm, n_gapo, n_gape;
+		n_mm = n_gapo = n_gape = 0;
+		p = path + path_len - 1;
+		x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
+		for (k = 0; k < *n_cigar; ++k) {
+			bwa_cigar_t c = cigar[k];
+			if (__cigar_op(c) == FROM_M) {
+				for (l = 0; l < (__cigar_len(c)); ++l)
+					if (ref_seq[x+l] < 4 && seq[y+l] < 4 && ref_seq[x+l] != seq[y+l]) ++n_mm;
+				x += __cigar_len(c), y += __cigar_len(c);
+			} else if (__cigar_op(c) == FROM_D) {
+				x += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1;
+			} else if (__cigar_op(c) == FROM_I) {
+				y += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1;
+			}
+		}
+		*_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
+	}
+	
+	free(ref_seq); free(path);
+	return cigar;
+}
+
+ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, bwa_seq_t *seqs[2], const pe_opt_t *popt, const isize_info_t *ii)
+{
+	ubyte_t *pacseq;
+	int i;
+	uint64_t n_tot[2], n_mapped[2];
+
+	// load reference sequence
+	if (_pacseq == 0) {
+		pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
+		rewind(bns->fp_pac);
+		fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
+	} else pacseq = (ubyte_t*)_pacseq;
+	if (!popt->is_sw || ii->avg < 0.0) return pacseq;
+
+	// perform mate alignment
+	n_tot[0] = n_tot[1] = n_mapped[0] = n_mapped[1] = 0;
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p[2];
+		p[0] = seqs[0] + i; p[1] = seqs[1] + i;
+		if ((p[0]->mapQ >= SW_MIN_MAPQ || p[1]->mapQ >= SW_MIN_MAPQ) && (p[0]->extra_flag&SAM_FPP) == 0) { // unpaired and one read has high mapQ
+			int k, n_cigar[2], is_singleton, mapQ = 0, mq_adjust[2];
+			int64_t beg[2], end[2];
+			bwa_cigar_t *cigar[2];
+			uint32_t cnt[2];
+
+			/* In the following, _pref points to the reference read
+			 * which must be aligned; _pmate points to its mate which is
+			 * considered to be modified. */
+
+#define __set_rght_coor(_a, _b, _pref, _pmate) do {						\
+				(_a) = (int64_t)_pref->pos + ii->avg - 3 * ii->std - _pmate->len * 1.5; \
+				(_b) = (_a) + 6 * ii->std + 2 * _pmate->len;			\
+				if ((_a) < (int64_t)_pref->pos + _pref->len) (_a) = _pref->pos + _pref->len; \
+				if ((_b) > bns->l_pac) (_b) = bns->l_pac;				\
+			} while (0)
+
+#define __set_left_coor(_a, _b, _pref, _pmate) do {						\
+				(_a) = (int64_t)_pref->pos + _pref->len - ii->avg - 3 * ii->std - _pmate->len * 0.5; \
+				(_b) = (_a) + 6 * ii->std + 2 * _pmate->len;			\
+				if ((_a) < 0) (_a) = 0;									\
+				if ((_b) > _pref->pos) (_b) = _pref->pos;				\
+			} while (0)
+			
+#define __set_fixed(_pref, _pmate, _beg, _cnt) do {						\
+				_pmate->type = BWA_TYPE_MATESW;							\
+				_pmate->pos = _beg;										\
+				_pmate->seQ = _pref->seQ;								\
+				_pmate->strand = (popt->type == BWA_PET_STD)? 1 - _pref->strand : _pref->strand; \
+				_pmate->n_mm = _cnt>>16; _pmate->n_gapo = _cnt>>8&0xff; _pmate->n_gape = _cnt&0xff; \
+				_pmate->extra_flag |= SAM_FPP;							\
+				_pref->extra_flag |= SAM_FPP;							\
+			} while (0)
+
+			mq_adjust[0] = mq_adjust[1] = 255; // not effective
+			is_singleton = (p[0]->type == BWA_TYPE_NO_MATCH || p[1]->type == BWA_TYPE_NO_MATCH)? 1 : 0;
+
+			++n_tot[is_singleton];
+			cigar[0] = cigar[1] = 0;
+			n_cigar[0] = n_cigar[1] = 0;
+			if (popt->type != BWA_PET_STD && popt->type != BWA_PET_SOLID) continue; // other types of pairing is not considered
+			for (k = 0; k < 2; ++k) { // p[1-k] is the reference read and p[k] is the read considered to be modified
+				ubyte_t *seq;
+				if (p[1-k]->type == BWA_TYPE_NO_MATCH) continue; // if p[1-k] is unmapped, skip
+				if (popt->type == BWA_PET_STD) {
+					if (p[1-k]->strand == 0) { // then the mate is on the reverse strand and has larger coordinate
+						__set_rght_coor(beg[k], end[k], p[1-k], p[k]);
+						seq = p[k]->rseq;
+					} else { // then the mate is on forward stand and has smaller coordinate
+						__set_left_coor(beg[k], end[k], p[1-k], p[k]);
+						seq = p[k]->seq;
+						seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed; this will reversed back shortly
+					}
+				} else { // BWA_PET_SOLID
+					if (p[1-k]->strand == 0) { // R3-F3 pairing
+						if (k == 0) __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
+						else __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
+						seq = p[k]->rseq;
+						seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed
+					} else { // F3-R3 pairing
+						if (k == 0) __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
+						else __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
+						seq = p[k]->seq;
+					}
+				}
+				// perform SW alignment
+				cigar[k] = bwa_sw_core(bns->l_pac, pacseq, p[k]->len, seq, &beg[k], end[k] - beg[k], &n_cigar[k], &cnt[k]);
+				if (cigar[k] && p[k]->type != BWA_TYPE_NO_MATCH) { // re-evaluate cigar[k]
+					int s_old, clip = 0, s_new;
+					if (__cigar_op(cigar[k][0]) == 3) clip += __cigar_len(cigar[k][0]);
+					if (__cigar_op(cigar[k][n_cigar[k]-1]) == 3) clip += __cigar_len(cigar[k][n_cigar[k]-1]);
+					s_old = (int)((p[k]->n_mm * 9 + p[k]->n_gapo * 13 + p[k]->n_gape * 2) / 3. * 8. + .499);
+					s_new = (int)(((cnt[k]>>16) * 9 + (cnt[k]>>8&0xff) * 13 + (cnt[k]&0xff) * 2 + clip * 3) / 3. * 8. + .499);
+					s_old += -4.343 * log(ii->ap_prior / bns->l_pac);
+					s_new += (int)(-4.343 * log(.5 * erfc(M_SQRT1_2 * 1.5) + .499)); // assume the mapped isize is 1.5\sigma
+					if (s_old < s_new) { // reject SW alignment
+						mq_adjust[k] = s_new - s_old;
+						free(cigar[k]); cigar[k] = 0; n_cigar[k] = 0;
+					} else mq_adjust[k] = s_old - s_new;
+				}
+				// now revserse sequence back such that p[*]->seq looks untouched
+				if (popt->type == BWA_PET_STD) {
+					if (p[1-k]->strand == 1) seq_reverse(p[k]->len, seq, 0);
+				} else {
+					if (p[1-k]->strand == 0) seq_reverse(p[k]->len, seq, 0);
+				}
+			}
+			k = -1; // no read to be changed
+			if (cigar[0] && cigar[1]) {
+				k = p[0]->mapQ < p[1]->mapQ? 0 : 1; // p[k] to be fixed
+				mapQ = abs(p[1]->mapQ - p[0]->mapQ);
+			} else if (cigar[0]) k = 0, mapQ = p[1]->mapQ;
+			else if (cigar[1]) k = 1, mapQ = p[0]->mapQ;
+			if (k >= 0 && p[k]->pos != beg[k]) {
+				++n_mapped[is_singleton];
+				{ // recalculate mapping quality
+					int tmp = (int)p[1-k]->mapQ - p[k]->mapQ/2 - 8;
+					if (tmp <= 0) tmp = 1;
+					if (mapQ > tmp) mapQ = tmp;
+					p[k]->mapQ = p[1-k]->mapQ = mapQ;
+					p[k]->seQ = p[1-k]->seQ = p[1-k]->seQ < mapQ? p[1-k]->seQ : mapQ;
+					if (p[k]->mapQ > mq_adjust[k]) p[k]->mapQ = mq_adjust[k];
+					if (p[k]->seQ > mq_adjust[k]) p[k]->seQ = mq_adjust[k];
+				}
+				// update CIGAR
+				free(p[k]->cigar); p[k]->cigar = cigar[k]; cigar[k] = 0;
+				p[k]->n_cigar = n_cigar[k];
+				// update the rest of information
+				__set_fixed(p[1-k], p[k], beg[k], cnt[k]);
+			}
+			free(cigar[0]); free(cigar[1]);
+		}
+	}
+	fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d singletons are mated.\n",
+			(long long)n_mapped[1], (long long)n_tot[1], SW_MIN_MAPQ);
+	fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d discordant pairs are fixed.\n",
+			(long long)n_mapped[0], (long long)n_tot[0], SW_MIN_MAPQ);
+	return pacseq;
+}
+
+void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt)
+{
+	extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
+	int i, j, n_seqs, tot_seqs = 0;
+	bwa_seq_t *seqs[2];
+	bwa_seqio_t *ks[2];
+	clock_t t;
+	bntseq_t *bns, *ntbns = 0;
+	FILE *fp_sa[2];
+	gap_opt_t opt, opt0;
+	khint_t iter;
+	isize_info_t last_ii; // this is for the last batch of reads
+	char str[1024];
+	bwt_t *bwt;
+	uint8_t *pac;
+
+	// initialization
+	bwase_initialize(); // initialize g_log_n[] in bwase.c
+	pac = 0; bwt = 0;
+	for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5);
+	bns = bns_restore(prefix);
+	srand48(bns->seed);
+	fp_sa[0] = xopen(fn_sa[0], "r");
+	fp_sa[1] = xopen(fn_sa[1], "r");
+	g_hash = kh_init(b128);
+	last_ii.avg = -1.0;
+
+	fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
+	ks[0] = bwa_open_reads(opt.mode, fn_fa[0]);
+	opt0 = opt;
+	fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
+	ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
+	if (!(opt.mode & BWA_MODE_COMPREAD)) {
+		popt->type = BWA_PET_SOLID;
+		ntbns = bwa_open_nt(prefix);
+	} else { // for Illumina alignment only
+		if (popt->is_preload) {
+			strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
+			strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
+			pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
+			rewind(bns->fp_pac);
+			fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
+		}
+	}
+
+	// core loop
+	bwa_print_sam_SQ(bns);
+	bwa_print_sam_PG();
+	while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt0.mode, opt0.trim_qual)) != 0) {
+		int cnt_chg;
+		isize_info_t ii;
+		ubyte_t *pacseq;
+
+		seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode, opt.trim_qual);
+		tot_seqs += n_seqs;
+		t = clock();
+
+		fprintf(stderr, "[bwa_sai2sam_pe_core] convert to sequence coordinate... \n");
+		cnt_chg = bwa_cal_pac_pos_pe(bns, prefix, bwt, n_seqs, seqs, fp_sa, &ii, popt, &opt, &last_ii);
+		fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+		fprintf(stderr, "[bwa_sai2sam_pe_core] changing coordinates of %d alignments.\n", cnt_chg);
+
+		fprintf(stderr, "[bwa_sai2sam_pe_core] align unmapped mate...\n");
+		pacseq = bwa_paired_sw(bns, pac, n_seqs, seqs, popt, &ii);
+		fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		fprintf(stderr, "[bwa_sai2sam_pe_core] refine gapped alignments... ");
+		for (j = 0; j < 2; ++j)
+			bwa_refine_gapped(bns, n_seqs, seqs[j], pacseq, ntbns);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+		if (pac == 0) free(pacseq);
+
+		fprintf(stderr, "[bwa_sai2sam_pe_core] print alignments... ");
+		for (i = 0; i < n_seqs; ++i) {
+			bwa_seq_t *p[2];
+			p[0] = seqs[0] + i; p[1] = seqs[1] + i;
+			if (p[0]->bc[0] || p[1]->bc[0]) {
+				strcat(p[0]->bc, p[1]->bc);
+				strcpy(p[1]->bc, p[0]->bc);
+			}
+			bwa_print_sam1(bns, p[0], p[1], opt.mode, opt.max_top2);
+			bwa_print_sam1(bns, p[1], p[0], opt.mode, opt.max_top2);
+		}
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		for (j = 0; j < 2; ++j)
+			bwa_free_read_seq(n_seqs, seqs[j]);
+		fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs);
+		last_ii = ii;
+	}
+
+	// destroy
+	bns_destroy(bns);
+	if (ntbns) bns_destroy(ntbns);
+	for (i = 0; i < 2; ++i) {
+		bwa_seq_close(ks[i]);
+		fclose(fp_sa[i]);
+	}
+	for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)
+		if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);
+	kh_destroy(b128, g_hash);
+	if (pac) {
+		free(pac); bwt_destroy(bwt);
+	}
+}
+
+int bwa_sai2sam_pe(int argc, char *argv[])
+{
+	extern char *bwa_rg_line, *bwa_rg_id;
+	extern int bwa_set_rg(const char *s);
+	extern char *bwa_infer_prefix(const char *hint);
+	int c;
+	pe_opt_t *popt;
+	char *prefix;
+
+	popt = bwa_init_pe_opt();
+	while ((c = getopt(argc, argv, "a:o:sPn:N:c:f:Ar:")) >= 0) {
+		switch (c) {
+		case 'r':
+			if (bwa_set_rg(optarg) < 0) {
+				fprintf(stderr, "[%s] malformated @RG line\n", __func__);
+				return 1;
+			}
+			break;
+		case 'a': popt->max_isize = atoi(optarg); break;
+		case 'o': popt->max_occ = atoi(optarg); break;
+		case 's': popt->is_sw = 0; break;
+		case 'P': popt->is_preload = 1; break;
+		case 'n': popt->n_multi = atoi(optarg); break;
+		case 'N': popt->N_multi = atoi(optarg); break;
+		case 'c': popt->ap_prior = atof(optarg); break;
+		case 'f': xreopen(optarg, "w", stdout); break;
+		case 'A': popt->force_isize = 1; break;
+		default: return 1;
+		}
+	}
+
+	if (optind + 5 > argc) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Usage:   bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>\n\n");
+		fprintf(stderr, "Options: -a INT   maximum insert size [%d]\n", popt->max_isize);
+		fprintf(stderr, "         -o INT   maximum occurrences for one end [%d]\n", popt->max_occ);
+		fprintf(stderr, "         -n INT   maximum hits to output for paired reads [%d]\n", popt->n_multi);
+		fprintf(stderr, "         -N INT   maximum hits to output for discordant pairs [%d]\n", popt->N_multi);
+		fprintf(stderr, "         -c FLOAT prior of chimeric rate (lower bound) [%.1le]\n", popt->ap_prior);
+        fprintf(stderr, "         -f FILE  sam file to output results to [stdout]\n");
+		fprintf(stderr, "         -r STR   read group header line such as `@RG\\tID:foo\\tSM:bar' [null]\n");
+		fprintf(stderr, "         -P       preload index into memory (for base-space reads only)\n");
+		fprintf(stderr, "         -s       disable Smith-Waterman for the unmapped mate\n");
+		fprintf(stderr, "         -A       disable insert size estimate (force -s)\n\n");
+		fprintf(stderr, "Notes: 1. For SOLiD reads, <in1.fq> corresponds R3 reads and <in2.fq> to F3.\n");
+		fprintf(stderr, "       2. For reads shorter than 30bp, applying a smaller -o is recommended to\n");
+		fprintf(stderr, "          to get a sensible speed at the cost of pairing accuracy.\n");
+		fprintf(stderr, "\n");
+		return 1;
+	}
+	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
+		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
+		free(bwa_rg_line); free(bwa_rg_id);
+		return 0;
+	}
+	bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt);
+	free(bwa_rg_line); free(bwa_rg_id); free(prefix);
+	free(popt);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwase.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,683 @@
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include "stdaln.h"
+#include "bwase.h"
+#include "bwtaln.h"
+#include "bntseq.h"
+#include "utils.h"
+#include "kstring.h"
+
+int g_log_n[256];
+char *bwa_rg_line, *bwa_rg_id;
+
+void bwa_print_sam_PG();
+
+void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi)
+{
+	int i, cnt, best;
+	if (n_aln == 0) {
+		s->type = BWA_TYPE_NO_MATCH;
+		s->c1 = s->c2 = 0;
+		return;
+	}
+
+	if (set_main) {
+		best = aln[0].score;
+		for (i = cnt = 0; i < n_aln; ++i) {
+			const bwt_aln1_t *p = aln + i;
+			if (p->score > best) break;
+			if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {
+				s->n_mm = p->n_mm; s->n_gapo = p->n_gapo; s->n_gape = p->n_gape;
+				s->score = p->score;
+				s->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
+			}
+			cnt += p->l - p->k + 1;
+		}
+		s->c1 = cnt;
+		for (; i < n_aln; ++i) cnt += aln[i].l - aln[i].k + 1;
+		s->c2 = cnt - s->c1;
+		s->type = s->c1 > 1? BWA_TYPE_REPEAT : BWA_TYPE_UNIQUE;
+	}
+
+	if (n_multi) {
+		int k, rest, n_occ, z = 0;
+		for (k = n_occ = 0; k < n_aln; ++k) {
+			const bwt_aln1_t *q = aln + k;
+			n_occ += q->l - q->k + 1;
+		}
+		if (s->multi) free(s->multi);
+		if (n_occ > n_multi + 1) { // if there are too many hits, generate none of them
+			s->multi = 0; s->n_multi = 0;
+			return;
+		}
+		/* The following code is more flexible than what is required
+		 * here. In principle, due to the requirement above, we can
+		 * simply output all hits, but the following samples "rest"
+		 * number of random hits. */
+		rest = n_occ > n_multi + 1? n_multi + 1 : n_occ; // find one additional for ->sa
+		s->multi = calloc(rest, sizeof(bwt_multi1_t));
+		for (k = 0; k < n_aln; ++k) {
+			const bwt_aln1_t *q = aln + k;
+			if (q->l - q->k + 1 <= rest) {
+				bwtint_t l;
+				for (l = q->k; l <= q->l; ++l) {
+					s->multi[z].pos = l;
+					s->multi[z].gap = q->n_gapo + q->n_gape;
+					s->multi[z++].mm = q->n_mm;
+				}
+				rest -= q->l - q->k + 1;
+			} else { // Random sampling (http://code.activestate.com/recipes/272884/). In fact, we never come here. 
+				int j, i, k;
+				for (j = rest, i = q->l - q->k + 1, k = 0; j > 0; --j) {
+					double p = 1.0, x = drand48();
+					while (x < p) p -= p * j / (i--);
+					s->multi[z].pos = q->l - i;
+					s->multi[z].gap = q->n_gapo + q->n_gape;
+					s->multi[z++].mm = q->n_mm;
+				}
+				rest = 0;
+				break;
+			}
+		}
+		s->n_multi = z;
+	}
+}
+
+void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s)
+{
+	bwa_aln2seq_core(n_aln, aln, s, 1, 0);
+}
+
+int bwa_approx_mapQ(const bwa_seq_t *p, int mm)
+{
+	int n;
+	if (p->c1 == 0) return 23;
+	if (p->c1 > 1) return 0;
+	if (p->n_mm == mm) return 25;
+	if (p->c2 == 0) return 37;
+	n = (p->c2 >= 255)? 255 : p->c2;
+	return (23 < g_log_n[n])? 0 : 23 - g_log_n[n];
+}
+
+bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand)
+{
+	bwtint_t pos_f;
+	int is_rev;
+	pos_f = bns_depos(bns, bwt_sa(bwt, sapos), &is_rev); // pos_f
+	*strand = !is_rev;
+	/* NB: For gapped alignment, pacpos may not be correct, which will be fixed
+	 * in bwa_refine_gapped_core(). This line also determines the way "x" is
+	 * calculated in bwa_refine_gapped_core() when (ext < 0 && is_end == 0). */
+	if (is_rev) pos_f = pos_f + 1 < len? 0 : pos_f - len + 1; // mapped to the forward strand
+	return pos_f; // FIXME: it is possible that pos_f < bns->anns[ref_id].offset
+}
+
+/**
+ * Derive the actual position in the read from the given suffix array
+ * coordinates. Note that the position will be approximate based on
+ * whether indels appear in the read and whether calculations are
+ * performed from the start or end of the read.
+ */
+void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t *bwt, bwa_seq_t *seq, const int max_mm, const float fnr)
+{
+	int max_diff, strand;
+	if (seq->type != BWA_TYPE_UNIQUE && seq->type != BWA_TYPE_REPEAT) return;
+	max_diff = fnr > 0.0? bwa_cal_maxdiff(seq->len, BWA_AVG_ERR, fnr) : max_mm;
+	seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
+	seq->pos = bwa_sa2pos(bns, bwt, seq->sa, seq->len, &strand);
+	seq->strand = strand;
+	seq->seQ = seq->mapQ = bwa_approx_mapQ(seq, max_diff);
+}
+
+void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr)
+{
+	int i, j, strand, n_multi;
+	char str[1024];
+	bwt_t *bwt;
+	// load forward SA
+	strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
+	strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p = &seqs[i];
+		bwa_cal_pac_pos_core(bns, bwt, p, max_mm, fnr);
+		for (j = n_multi = 0; j < p->n_multi; ++j) {
+			bwt_multi1_t *q = p->multi + j;
+			q->pos = bwa_sa2pos(bns, bwt, q->pos, p->len, &strand);
+			q->strand = strand;
+			if (q->pos != p->pos)
+				p->multi[n_multi++] = *q;
+		}
+		p->n_multi = n_multi;
+	}
+	bwt_destroy(bwt);
+}
+
+/* is_end_correct == 1 if (*pos+len) gives the correct coordinate on
+ * forward strand. This happens when p->pos is calculated by
+ * bwa_cal_pac_pos(). is_end_correct==0 if (*pos) gives the correct
+ * coordinate. This happens only for color-converted alignment. */
+bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, bwtint_t *_pos,
+									int ext, int *n_cigar, int is_end_correct)
+{
+	bwa_cigar_t *cigar = 0;
+	ubyte_t *ref_seq;
+	int l = 0, path_len, ref_len;
+	AlnParam ap = aln_param_bwa;
+	path_t *path;
+	int64_t k, __pos = *_pos;
+
+	ref_len = len + abs(ext);
+	if (ext > 0) {
+		ref_seq = (ubyte_t*)calloc(ref_len, 1);
+		for (k = __pos; k < __pos + ref_len && k < l_pac; ++k)
+			ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
+	} else {
+		int64_t x = __pos + (is_end_correct? len : ref_len);
+		ref_seq = (ubyte_t*)calloc(ref_len, 1);
+		for (l = 0, k = x - ref_len > 0? x - ref_len : 0; k < x && k < l_pac; ++k)
+			ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
+	}
+	path = (path_t*)calloc(l+len, sizeof(path_t));
+
+	aln_global_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len);
+	cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
+	
+	if (ext < 0 && is_end_correct) { // fix coordinate for reads mapped to the forward strand
+		for (l = k = 0; k < *n_cigar; ++k) {
+			if (__cigar_op(cigar[k]) == FROM_D) l -= __cigar_len(cigar[k]);
+			else if (__cigar_op(cigar[k]) == FROM_I) l += __cigar_len(cigar[k]);
+		}
+		__pos += l;
+	}
+
+	if (__cigar_op(cigar[0]) == FROM_D) { // deletion at the 5'-end
+		__pos += __cigar_len(cigar[0]);
+		for (k = 0; k < *n_cigar - 1; ++k) cigar[k] = cigar[k+1];
+		--(*n_cigar);
+	}
+	if (__cigar_op(cigar[*n_cigar-1]) == FROM_D) --(*n_cigar); // deletion at the 3'-end
+
+	// change "I" at either end of the read to S. just in case. This should rarely happen...
+	if (__cigar_op(cigar[*n_cigar-1]) == FROM_I) cigar[*n_cigar-1] = __cigar_create(3, (__cigar_len(cigar[*n_cigar-1])));
+	if (__cigar_op(cigar[0]) == FROM_I) cigar[0] = __cigar_create(3, (__cigar_len(cigar[0])));
+
+	*_pos = (bwtint_t)__pos;
+	free(ref_seq); free(path);
+	return cigar;
+}
+
+char *bwa_cal_md1(int n_cigar, bwa_cigar_t *cigar, int len, bwtint_t pos, ubyte_t *seq,
+				  bwtint_t l_pac, ubyte_t *pacseq, kstring_t *str, int *_nm)
+{
+	bwtint_t x, y;
+	int z, u, c, nm = 0;
+	str->l = 0; // reset
+	x = pos; y = 0;
+	if (cigar) {
+		int k, l;
+		for (k = u = 0; k < n_cigar; ++k) {
+			l = __cigar_len(cigar[k]);
+			if (__cigar_op(cigar[k]) == FROM_M) {
+				for (z = 0; z < l && x+z < l_pac; ++z) {
+					c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
+					if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {
+						ksprintf(str, "%d", u);
+						kputc("ACGTN"[c], str);
+						++nm;
+						u = 0;
+					} else ++u;
+				}
+				x += l; y += l;
+			} else if (__cigar_op(cigar[k]) == FROM_I || __cigar_op(cigar[k]) == FROM_S) {
+				y += l;
+				if (__cigar_op(cigar[k]) == FROM_I) nm += l;
+			} else if (__cigar_op(cigar[k]) == FROM_D) {
+				ksprintf(str, "%d", u);
+				kputc('^', str);
+				for (z = 0; z < l && x+z < l_pac; ++z)
+					kputc("ACGT"[pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3], str);
+				u = 0;
+				x += l; nm += l;
+			}
+		}
+	} else { // no gaps
+		for (z = u = 0; z < (bwtint_t)len && x+z < l_pac; ++z) {
+			c = pacseq[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
+			if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {
+				ksprintf(str, "%d", u);
+				kputc("ACGTN"[c], str);
+				++nm;
+				u = 0;
+			} else ++u;
+		}
+	}
+	ksprintf(str, "%d", u);
+	*_nm = nm;
+	return strdup(str->s);
+}
+
+void bwa_correct_trimmed(bwa_seq_t *s)
+{
+	if (s->len == s->full_len) return;
+	if (s->strand == 0) { // forward
+		if (s->cigar && __cigar_op(s->cigar[s->n_cigar-1]) == FROM_S) { // the last is S
+			s->cigar[s->n_cigar-1] += s->full_len - s->len;
+		} else {
+			if (s->cigar == 0) {
+				s->n_cigar = 2;
+				s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t));
+				s->cigar[0] = __cigar_create(0, s->len);
+			} else {
+				++s->n_cigar;
+				s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t));
+			}
+			s->cigar[s->n_cigar-1] = __cigar_create(3, (s->full_len - s->len));
+		}
+	} else { // reverse
+		if (s->cigar && __cigar_op(s->cigar[0]) == FROM_S) { // the first is S
+			s->cigar[0] += s->full_len - s->len;
+		} else {
+			if (s->cigar == 0) {
+				s->n_cigar = 2;
+				s->cigar = calloc(s->n_cigar, sizeof(bwa_cigar_t));
+				s->cigar[1] = __cigar_create(0, s->len);
+			} else {
+				++s->n_cigar;
+				s->cigar = realloc(s->cigar, s->n_cigar * sizeof(bwa_cigar_t));
+				memmove(s->cigar + 1, s->cigar, (s->n_cigar-1) * sizeof(bwa_cigar_t));
+			}
+			s->cigar[0] = __cigar_create(3, (s->full_len - s->len));
+		}
+	}
+	s->len = s->full_len;
+}
+
+void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns)
+{
+	ubyte_t *pacseq, *ntpac = 0;
+	int i, j;
+	kstring_t *str;
+
+	if (ntbns) { // in color space
+		ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1);
+		rewind(ntbns->fp_pac);
+		fread(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac);
+	}
+
+	if (!_pacseq) {
+		pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
+		rewind(bns->fp_pac);
+		fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
+	} else pacseq = _pacseq;
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *s = seqs + i;
+		seq_reverse(s->len, s->seq, 0); // IMPORTANT: s->seq is reversed here!!!
+		for (j = 0; j < s->n_multi; ++j) {
+			bwt_multi1_t *q = s->multi + j;
+			int n_cigar;
+			if (q->gap == 0) continue;
+			q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, &q->pos,
+										  (q->strand? 1 : -1) * q->gap, &n_cigar, 1);
+			q->n_cigar = n_cigar;
+		}
+		if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
+		s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos,
+									  (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 1);
+	}
+#if 0
+	if (ntbns) { // in color space
+		for (i = 0; i < n_seqs; ++i) {
+			bwa_seq_t *s = seqs + i;
+			bwa_cs2nt_core(s, bns->l_pac, ntpac);
+			for (j = 0; j < s->n_multi; ++j) {
+				bwt_multi1_t *q = s->multi + j;
+				int n_cigar;
+				if (q->gap == 0) continue;
+				free(q->cigar);
+				q->cigar = bwa_refine_gapped_core(bns->l_pac, ntpac, s->len, q->strand? s->rseq : s->seq, &q->pos,
+											  (q->strand? 1 : -1) * q->gap, &n_cigar, 0);
+				q->n_cigar = n_cigar;
+			}
+			if (s->type != BWA_TYPE_NO_MATCH && s->cigar) { // update cigar again
+				free(s->cigar);
+				s->cigar = bwa_refine_gapped_core(bns->l_pac, ntpac, s->len, s->strand? s->rseq : s->seq, &s->pos,
+											  (s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 0);
+			}
+		}
+	}
+#endif
+	// generate MD tag
+	str = (kstring_t*)calloc(1, sizeof(kstring_t));
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *s = seqs + i;
+		if (s->type != BWA_TYPE_NO_MATCH) {
+			int nm;
+			s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq,
+								bns->l_pac, ntbns? ntpac : pacseq, str, &nm);
+			s->nm = nm;
+		}
+	}
+	free(str->s); free(str);
+
+	// correct for trimmed reads
+	if (!ntbns) // trimming is only enabled for Illumina reads
+		for (i = 0; i < n_seqs; ++i) bwa_correct_trimmed(seqs + i);
+
+	if (!_pacseq) free(pacseq);
+	free(ntpac);
+}
+
+int64_t pos_end(const bwa_seq_t *p)
+{
+	if (p->cigar) {
+		int j;
+		int64_t x = p->pos;
+		for (j = 0; j != p->n_cigar; ++j) {
+			int op = __cigar_op(p->cigar[j]);
+			if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]);
+		}
+		return x;
+	} else return p->pos + p->len;
+}
+
+int64_t pos_end_multi(const bwt_multi1_t *p, int len) // analogy to pos_end()
+{
+	if (p->cigar) {
+		int j;
+		int64_t x = p->pos;
+		for (j = 0; j != p->n_cigar; ++j) {
+			int op = __cigar_op(p->cigar[j]);
+			if (op == 0 || op == 2) x += __cigar_len(p->cigar[j]);
+		}
+		return x;
+	} else return p->pos + len;
+}
+
+static int64_t pos_5(const bwa_seq_t *p)
+{
+	if (p->type != BWA_TYPE_NO_MATCH)
+		return p->strand? pos_end(p) : p->pos;
+	return -1;
+}
+
+void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2)
+{
+	int j;
+	if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) {
+		int seqid, nn, am = 0, flag = p->extra_flag;
+		char XT;
+
+		if (p->type == BWA_TYPE_NO_MATCH) {
+			p->pos = mate->pos;
+			p->strand = mate->strand;
+			flag |= SAM_FSU;
+			j = 1;
+		} else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment
+
+		// get seqid
+		nn = bns_cnt_ambi(bns, p->pos, j, &seqid);
+		if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len)
+			flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences
+
+		// update flag and print it
+		if (p->strand) flag |= SAM_FSR;
+		if (mate) {
+			if (mate->type != BWA_TYPE_NO_MATCH) {
+				if (mate->strand) flag |= SAM_FMR;
+			} else flag |= SAM_FMU;
+		}
+		err_printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name);
+		err_printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ);
+
+		// print CIGAR
+		if (p->cigar) {
+			for (j = 0; j != p->n_cigar; ++j)
+				err_printf("%d%c", __cigar_len(p->cigar[j]), "MIDS"[__cigar_op(p->cigar[j])]);
+		} else if (p->type == BWA_TYPE_NO_MATCH) err_printf("*");
+		else err_printf("%dM", p->len);
+
+		// print mate coordinate
+		if (mate && mate->type != BWA_TYPE_NO_MATCH) {
+			int m_seqid, m_is_N;
+			long long isize;
+			am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality
+			// redundant calculation here, but should not matter too much
+			m_is_N = bns_cnt_ambi(bns, mate->pos, mate->len, &m_seqid);
+			err_printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name);
+			isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0;
+			if (p->type == BWA_TYPE_NO_MATCH) isize = 0;
+			err_printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize);
+		} else if (mate) err_printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1));
+		else err_printf("\t*\t0\t0\t");
+
+		// print sequence and quality
+		if (p->strand == 0)
+			for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]);
+		else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]);
+		putchar('\t');
+		if (p->qual) {
+			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
+			err_printf("%s", p->qual);
+		} else err_printf("*");
+
+		if (bwa_rg_id) err_printf("\tRG:Z:%s", bwa_rg_id);
+		if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
+		if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
+		if (p->type != BWA_TYPE_NO_MATCH) {
+			int i;
+			// calculate XT tag
+			XT = "NURM"[p->type];
+			if (nn > 10) XT = 'N';
+			// print tags
+			err_printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm);
+			if (nn) err_printf("\tXN:i:%d", nn);
+			if (mate) err_printf("\tSM:i:%d\tAM:i:%d", p->seQ, am);
+			if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment
+				err_printf("\tX0:i:%d", p->c1);
+				if (p->c1 <= max_top2) err_printf("\tX1:i:%d", p->c2);
+			}
+			err_printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo, p->n_gapo+p->n_gape);
+			if (p->md) err_printf("\tMD:Z:%s", p->md);
+			// print multiple hits
+			if (p->n_multi) {
+				err_printf("\tXA:Z:");
+				for (i = 0; i < p->n_multi; ++i) {
+					bwt_multi1_t *q = p->multi + i;
+					int k;
+					j = pos_end_multi(q, p->len) - q->pos;
+					nn = bns_cnt_ambi(bns, q->pos, j, &seqid);
+					err_printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+',
+						   (int)(q->pos - bns->anns[seqid].offset + 1));
+					if (q->cigar) {
+						for (k = 0; k < q->n_cigar; ++k)
+							err_printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]);
+					} else err_printf("%dM", p->len);
+					err_printf(",%d;", q->gap + q->mm);
+				}
+			}
+		}
+		putchar('\n');
+	} else { // this read has no match
+		ubyte_t *s = p->strand? p->rseq : p->seq;
+		int flag = p->extra_flag | SAM_FSU;
+		if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU;
+		err_printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag);
+		for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]);
+		putchar('\t');
+		if (p->qual) {
+			if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality
+			err_printf("%s", p->qual);
+		} else err_printf("*");
+		if (bwa_rg_id) err_printf("\tRG:Z:%s", bwa_rg_id);
+		if (p->bc[0]) err_printf("\tBC:Z:%s", p->bc);
+		if (p->clip_len < p->full_len) err_printf("\tXC:i:%d", p->clip_len);
+		putchar('\n');
+	}
+}
+
+bntseq_t *bwa_open_nt(const char *prefix)
+{
+	bntseq_t *ntbns;
+	char *str;
+	str = (char*)calloc(strlen(prefix) + 10, 1);
+	strcat(strcpy(str, prefix), ".nt");
+	ntbns = bns_restore(str);
+	free(str);
+	return ntbns;
+}
+
+void bwa_print_sam_SQ(const bntseq_t *bns)
+{
+	int i;
+	for (i = 0; i < bns->n_seqs; ++i)
+		err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len);
+	if (bwa_rg_line) err_printf("%s\n", bwa_rg_line);
+}
+
+void bwase_initialize() 
+{
+	int i;
+	for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5);
+}
+
+char *bwa_escape(char *s)
+{
+	char *p, *q;
+	for (p = q = s; *p; ++p) {
+		if (*p == '\\') {
+			++p;
+			if (*p == 't') *q++ = '\t';
+			else if (*p == 'n') *q++ = '\n';
+			else if (*p == 'r') *q++ = '\r';
+			else if (*p == '\\') *q++ = '\\';
+		} else *q++ = *p;
+	}
+	*q = '\0';
+	return s;
+}
+
+int bwa_set_rg(const char *s)
+{
+	char *p, *q, *r;
+	if (strstr(s, "@RG") != s) return -1;
+	if (bwa_rg_line) free(bwa_rg_line);
+	if (bwa_rg_id) free(bwa_rg_id);
+	bwa_rg_line = strdup(s);
+	bwa_rg_id = 0;
+	bwa_escape(bwa_rg_line);
+	p = strstr(bwa_rg_line, "\tID:");
+	if (p == 0) return -1;
+	p += 4;
+	for (q = p; *q && *q != '\t' && *q != '\n'; ++q);
+	bwa_rg_id = calloc(q - p + 1, 1);
+	for (q = p, r = bwa_rg_id; *q && *q != '\t' && *q != '\n'; ++q)
+		*r++ = *q;
+	return 0;
+}
+
+void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ)
+{
+	extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
+	int i, n_seqs, tot_seqs = 0, m_aln;
+	bwt_aln1_t *aln = 0;
+	bwa_seq_t *seqs;
+	bwa_seqio_t *ks;
+	clock_t t;
+	bntseq_t *bns, *ntbns = 0;
+	FILE *fp_sa;
+	gap_opt_t opt;
+
+	// initialization
+	bwase_initialize();
+	bns = bns_restore(prefix);
+	srand48(bns->seed);
+	fp_sa = xopen(fn_sa, "r");
+
+	m_aln = 0;
+	fread(&opt, sizeof(gap_opt_t), 1, fp_sa);
+	if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac
+		ntbns = bwa_open_nt(prefix);
+	bwa_print_sam_SQ(bns);
+	//bwa_print_sam_PG();
+	// set ks
+	ks = bwa_open_reads(opt.mode, fn_fa);
+	// core loop
+	while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt.mode, opt.trim_qual)) != 0) {
+		tot_seqs += n_seqs;
+		t = clock();
+
+		// read alignment
+		for (i = 0; i < n_seqs; ++i) {
+			bwa_seq_t *p = seqs + i;
+			int n_aln;
+			fread(&n_aln, 4, 1, fp_sa);
+			if (n_aln > m_aln) {
+				m_aln = n_aln;
+				aln = (bwt_aln1_t*)realloc(aln, sizeof(bwt_aln1_t) * m_aln);
+			}
+			fread(aln, sizeof(bwt_aln1_t), n_aln, fp_sa);
+			bwa_aln2seq_core(n_aln, aln, p, 1, n_occ);
+		}
+
+		fprintf(stderr, "[bwa_aln_core] convert to sequence coordinate... ");
+		bwa_cal_pac_pos(bns, prefix, n_seqs, seqs, opt.max_diff, opt.fnr); // forward bwt will be destroyed here
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		fprintf(stderr, "[bwa_aln_core] refine gapped alignments... ");
+		bwa_refine_gapped(bns, n_seqs, seqs, 0, ntbns);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		fprintf(stderr, "[bwa_aln_core] print alignments... ");
+		for (i = 0; i < n_seqs; ++i)
+			bwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		bwa_free_read_seq(n_seqs, seqs);
+		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
+	}
+
+	// destroy
+	bwa_seq_close(ks);
+	if (ntbns) bns_destroy(ntbns);
+	bns_destroy(bns);
+	fclose(fp_sa);
+	free(aln);
+}
+
+int bwa_sai2sam_se(int argc, char *argv[])
+{
+	extern char *bwa_infer_prefix(const char *hint);
+	int c, n_occ = 3;
+	char *prefix;
+	while ((c = getopt(argc, argv, "hn:f:r:")) >= 0) {
+		switch (c) {
+		case 'h': break;
+		case 'r':
+			if (bwa_set_rg(optarg) < 0) {
+				fprintf(stderr, "[%s] malformated @RG line\n", __func__);
+				return 1;
+			}
+			break;
+		case 'n': n_occ = atoi(optarg); break;
+		case 'f': xreopen(optarg, "w", stdout); break;
+		default: return 1;
+		}
+	}
+
+	if (optind + 3 > argc) {
+		fprintf(stderr, "Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] <prefix> <in.sai> <in.fq>\n");
+		return 1;
+	}
+	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
+		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
+		free(bwa_rg_line); free(bwa_rg_id);
+		return 0;
+	}
+	bwa_sai2sam_se_core(prefix, argv[optind+1], argv[optind+2], n_occ);
+	free(bwa_rg_line); free(bwa_rg_id);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwase.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,29 @@
+#ifndef BWASE_H
+#define BWASE_H
+
+#include "bntseq.h"
+#include "bwt.h"
+#include "bwtaln.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	// Initialize mapping tables in the bwa single-end mapper.
+	void bwase_initialize();
+	// Calculate the approximate position of the sequence from the specified bwt with loaded suffix array.
+	void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t* bwt, bwa_seq_t* seq, const int max_mm, const float fnr);
+	// Refine the approximate position of the sequence to an actual placement for the sequence.
+	void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns);
+	// Backfill certain alignment properties mainly centering around number of matches.
+	void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
+	// Calculate the end position of a read given a certain sequence.
+	int64_t pos_end(const bwa_seq_t *p);
+	//
+	bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // BWASE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwaseqio.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,227 @@
+#include <zlib.h>
+#include <ctype.h>
+#include "bwtaln.h"
+#include "utils.h"
+#include "bamlite.h"
+
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+extern unsigned char nst_nt4_table[256];
+static char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
+
+struct __bwa_seqio_t {
+	// for BAM input
+	int is_bam, which; // 1st bit: read1, 2nd bit: read2, 3rd: SE
+	bamFile fp;
+	// for fastq input
+	kseq_t *ks;
+};
+
+bwa_seqio_t *bwa_bam_open(const char *fn, int which)
+{
+	bwa_seqio_t *bs;
+	bam_header_t *h;
+	bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t));
+	bs->is_bam = 1;
+	bs->which = which;
+	bs->fp = bam_open(fn, "r");
+	h = bam_header_read(bs->fp);
+	bam_header_destroy(h);
+	return bs;
+}
+
+bwa_seqio_t *bwa_seq_open(const char *fn)
+{
+	gzFile fp;
+	bwa_seqio_t *bs;
+	bs = (bwa_seqio_t*)calloc(1, sizeof(bwa_seqio_t));
+	fp = xzopen(fn, "r");
+	bs->ks = kseq_init(fp);
+	return bs;
+}
+
+void bwa_seq_close(bwa_seqio_t *bs)
+{
+	if (bs == 0) return;
+	if (bs->is_bam) bam_close(bs->fp);
+	else {
+		gzclose(bs->ks->f->f);
+		kseq_destroy(bs->ks);
+	}
+	free(bs);
+}
+
+void seq_reverse(int len, ubyte_t *seq, int is_comp)
+{
+	int i;
+	if (is_comp) {
+		for (i = 0; i < len>>1; ++i) {
+			char tmp = seq[len-1-i];
+			if (tmp < 4) tmp = 3 - tmp;
+			seq[len-1-i] = (seq[i] >= 4)? seq[i] : 3 - seq[i];
+			seq[i] = tmp;
+		}
+		if (len&1) seq[i] = (seq[i] >= 4)? seq[i] : 3 - seq[i];
+	} else {
+		for (i = 0; i < len>>1; ++i) {
+			char tmp = seq[len-1-i];
+			seq[len-1-i] = seq[i]; seq[i] = tmp;
+		}
+	}
+}
+
+int bwa_trim_read(int trim_qual, bwa_seq_t *p)
+{
+	int s = 0, l, max = 0, max_l = p->len;
+	if (trim_qual < 1 || p->qual == 0) return 0;
+	for (l = p->len - 1; l >= BWA_MIN_RDLEN; --l) {
+		s += trim_qual - (p->qual[l] - 33);
+		if (s < 0) break;
+		if (s > max) max = s, max_l = l;
+	}
+	p->clip_len = p->len = max_l;
+	return p->full_len - p->len;
+}
+
+static bwa_seq_t *bwa_read_bam(bwa_seqio_t *bs, int n_needed, int *n, int is_comp, int trim_qual)
+{
+	bwa_seq_t *seqs, *p;
+	int n_seqs, l, i;
+	long n_trimmed = 0, n_tot = 0;
+	bam1_t *b;
+
+	b = bam_init1();
+	n_seqs = 0;
+	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
+	while (bam_read1(bs->fp, b) >= 0) {
+		uint8_t *s, *q;
+		int go = 0;
+		if ((bs->which & 1) && (b->core.flag & BAM_FREAD1)) go = 1;
+		if ((bs->which & 2) && (b->core.flag & BAM_FREAD2)) go = 1;
+		if ((bs->which & 4) && !(b->core.flag& BAM_FREAD1) && !(b->core.flag& BAM_FREAD2))go = 1;
+		if (go == 0) continue;
+		l = b->core.l_qseq;
+		p = &seqs[n_seqs++];
+		p->tid = -1; // no assigned to a thread
+		p->qual = 0;
+		p->full_len = p->clip_len = p->len = l;
+		n_tot += p->full_len;
+		s = bam1_seq(b); q = bam1_qual(b);
+		p->seq = (ubyte_t*)calloc(p->len + 1, 1);
+		p->qual = (ubyte_t*)calloc(p->len + 1, 1);
+		for (i = 0; i != p->full_len; ++i) {
+			p->seq[i] = bam_nt16_nt4_table[(int)bam1_seqi(s, i)];
+			p->qual[i] = q[i] + 33 < 126? q[i] + 33 : 126;
+		}
+		if (bam1_strand(b)) { // then reverse 
+			seq_reverse(p->len, p->seq, 1);
+			seq_reverse(p->len, p->qual, 0);
+		}
+		if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
+		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
+		memcpy(p->rseq, p->seq, p->len);
+		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
+		seq_reverse(p->len, p->rseq, is_comp);
+		p->name = strdup((const char*)bam1_qname(b));
+		if (n_seqs == n_needed) break;
+	}
+	*n = n_seqs;
+	if (n_seqs && trim_qual >= 1)
+		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
+	if (n_seqs == 0) {
+		free(seqs);
+		bam_destroy1(b);
+		return 0;
+	}
+	bam_destroy1(b);
+	return seqs;
+}
+
+#define BARCODE_LOW_QUAL 13
+
+bwa_seq_t *bwa_read_seq(bwa_seqio_t *bs, int n_needed, int *n, int mode, int trim_qual)
+{
+	bwa_seq_t *seqs, *p;
+	kseq_t *seq = bs->ks;
+	int n_seqs, l, i, is_comp = mode&BWA_MODE_COMPREAD, is_64 = mode&BWA_MODE_IL13, l_bc = mode>>24;
+	long n_trimmed = 0, n_tot = 0;
+
+	if (l_bc > BWA_MAX_BCLEN) {
+		fprintf(stderr, "[%s] the maximum barcode length is %d.\n", __func__, BWA_MAX_BCLEN);
+		return 0;
+	}
+	if (bs->is_bam) return bwa_read_bam(bs, n_needed, n, is_comp, trim_qual); // l_bc has no effect for BAM input
+	n_seqs = 0;
+	seqs = (bwa_seq_t*)calloc(n_needed, sizeof(bwa_seq_t));
+	while ((l = kseq_read(seq)) >= 0) {
+		if ((mode & BWA_MODE_CFY) && (seq->comment.l != 0)) {
+			// skip reads that are marked to be filtered by Casava
+			char *s = index(seq->comment.s, ':');
+			if (s && *(++s) == 'Y') {
+				continue;
+			}
+		}
+		if (is_64 && seq->qual.l)
+			for (i = 0; i < seq->qual.l; ++i) seq->qual.s[i] -= 31;
+		if (seq->seq.l <= l_bc) continue; // sequence length equals or smaller than the barcode length
+		p = &seqs[n_seqs++];
+		if (l_bc) { // then trim barcode
+			for (i = 0; i < l_bc; ++i)
+				p->bc[i] = (seq->qual.l && seq->qual.s[i]-33 < BARCODE_LOW_QUAL)? tolower(seq->seq.s[i]) : toupper(seq->seq.s[i]);
+			p->bc[i] = 0;
+			for (; i < seq->seq.l; ++i)
+				seq->seq.s[i - l_bc] = seq->seq.s[i];
+			seq->seq.l -= l_bc; seq->seq.s[seq->seq.l] = 0;
+			if (seq->qual.l) {
+				for (i = l_bc; i < seq->qual.l; ++i)
+					seq->qual.s[i - l_bc] = seq->qual.s[i];
+				seq->qual.l -= l_bc; seq->qual.s[seq->qual.l] = 0;
+			}
+			l = seq->seq.l;
+		} else p->bc[0] = 0;
+		p->tid = -1; // no assigned to a thread
+		p->qual = 0;
+		p->full_len = p->clip_len = p->len = l;
+		n_tot += p->full_len;
+		p->seq = (ubyte_t*)calloc(p->len, 1);
+		for (i = 0; i != p->full_len; ++i)
+			p->seq[i] = nst_nt4_table[(int)seq->seq.s[i]];
+		if (seq->qual.l) { // copy quality
+			p->qual = (ubyte_t*)strdup((char*)seq->qual.s);
+			if (trim_qual >= 1) n_trimmed += bwa_trim_read(trim_qual, p);
+		}
+		p->rseq = (ubyte_t*)calloc(p->full_len, 1);
+		memcpy(p->rseq, p->seq, p->len);
+		seq_reverse(p->len, p->seq, 0); // *IMPORTANT*: will be reversed back in bwa_refine_gapped()
+		seq_reverse(p->len, p->rseq, is_comp);
+		p->name = strdup((const char*)seq->name.s);
+		{ // trim /[12]$
+			int t = strlen(p->name);
+			if (t > 2 && p->name[t-2] == '/' && (p->name[t-1] == '1' || p->name[t-1] == '2')) p->name[t-2] = '\0';
+		}
+		if (n_seqs == n_needed) break;
+	}
+	*n = n_seqs;
+	if (n_seqs && trim_qual >= 1)
+		fprintf(stderr, "[bwa_read_seq] %.1f%% bases are trimmed.\n", 100.0f * n_trimmed/n_tot);
+	if (n_seqs == 0) {
+		free(seqs);
+		return 0;
+	}
+	return seqs;
+}
+
+void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs)
+{
+	int i, j;
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p = seqs + i;
+		for (j = 0; j < p->n_multi; ++j)
+			if (p->multi[j].cigar) free(p->multi[j].cigar);
+		free(p->name);
+		free(p->seq); free(p->rseq); free(p->qual); free(p->aln); free(p->md); free(p->multi);
+		free(p->cigar);
+	}
+	free(seqs);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwt.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,339 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdint.h>
+#include "utils.h"
+#include "bwt.h"
+#include "kvec.h"
+
+void bwt_gen_cnt_table(bwt_t *bwt)
+{
+	int i, j;
+	for (i = 0; i != 256; ++i) {
+		uint32_t x = 0;
+		for (j = 0; j != 4; ++j)
+			x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
+		bwt->cnt_table[i] = x;
+	}
+}
+
+// bwt->bwt and bwt->occ must be precalculated
+void bwt_cal_sa(bwt_t *bwt, int intv)
+{
+	bwtint_t isa, sa, i; // S(isa) = sa
+	int intv_round = intv;
+
+	kv_roundup32(intv_round);
+	xassert(intv_round == intv, "SA sample interval is not a power of 2.");
+	xassert(bwt->bwt, "bwt_t::bwt is not initialized.");
+
+	if (bwt->sa) free(bwt->sa);
+	bwt->sa_intv = intv;
+	bwt->n_sa = (bwt->seq_len + intv) / intv;
+	bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
+	if (bwt->sa == 0) {
+		fprintf(stderr, "[%s] Fail to allocate %.3fMB memory. Abort!\n", __func__, bwt->n_sa * sizeof(bwtint_t) / 1024.0/1024.0);
+		abort();
+	}
+	// calculate SA value
+	isa = 0; sa = bwt->seq_len;
+	for (i = 0; i < bwt->seq_len; ++i) {
+		if (isa % intv == 0) bwt->sa[isa/intv] = sa;
+		--sa;
+		isa = bwt_invPsi(bwt, isa);
+	}
+	if (isa % intv == 0) bwt->sa[isa/intv] = sa;
+	bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len
+}
+
+bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k)
+{
+	bwtint_t sa = 0, mask = bwt->sa_intv - 1;
+	while (k & mask) {
+		++sa;
+		k = bwt_invPsi(bwt, k);
+	}
+	/* without setting bwt->sa[0] = -1, the following line should be
+	   changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */
+	return sa + bwt->sa[k/bwt->sa_intv];
+}
+
+static inline int __occ_aux(uint64_t y, int c)
+{
+	// reduce nucleotide counting to bits counting
+	y = ((c&2)? y : ~y) >> 1 & ((c&1)? y : ~y) & 0x5555555555555555ull;
+	// count the number of 1s in y
+	y = (y & 0x3333333333333333ull) + (y >> 2 & 0x3333333333333333ull);
+	return ((y + (y >> 4)) & 0xf0f0f0f0f0f0f0full) * 0x101010101010101ull >> 56;
+}
+
+inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
+{
+	bwtint_t n, l, j;
+	uint32_t *p;
+
+	if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
+	if (k == (bwtint_t)(-1)) return 0;
+	if (k >= bwt->primary) --k; // because $ is not in bwt
+
+	// retrieve Occ at k/OCC_INTERVAL
+	n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
+	p += sizeof(bwtint_t); // jump to the start of the first BWT cell
+
+	// calculate Occ up to the last k/32
+	j = k >> 5 << 5;
+	for (l = k/OCC_INTERVAL*OCC_INTERVAL; l < j; l += 32, p += 2)
+		n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
+
+	// calculate Occ
+	n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
+	if (c == 0) n -= ~k&31; // corrected for the masked bits
+
+	return n;
+}
+
+// an analogy to bwt_occ() but more efficient, requiring k <= l
+inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
+{
+	bwtint_t _k, _l;
+	_k = (k >= bwt->primary)? k-1 : k;
+	_l = (l >= bwt->primary)? l-1 : l;
+	if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
+		*ok = bwt_occ(bwt, k, c);
+		*ol = bwt_occ(bwt, l, c);
+	} else {
+		bwtint_t m, n, i, j;
+		uint32_t *p;
+		if (k >= bwt->primary) --k;
+		if (l >= bwt->primary) --l;
+		n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
+		p += sizeof(bwtint_t);
+		// calculate *ok
+		j = k >> 5 << 5;
+		for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
+			n += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
+		m = n;
+		n += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~k&31)<<1)) - 1), c);
+		if (c == 0) n -= ~k&31; // corrected for the masked bits
+		*ok = n;
+		// calculate *ol
+		j = l >> 5 << 5;
+		for (; i < j; i += 32, p += 2)
+			m += __occ_aux((uint64_t)p[0]<<32 | p[1], c);
+		m += __occ_aux(((uint64_t)p[0]<<32 | p[1]) & ~((1ull<<((~l&31)<<1)) - 1), c);
+		if (c == 0) m -= ~l&31; // corrected for the masked bits
+		*ol = m;
+	}
+}
+
+#define __occ_aux4(bwt, b)											\
+	((bwt)->cnt_table[(b)&0xff] + (bwt)->cnt_table[(b)>>8&0xff]		\
+	 + (bwt)->cnt_table[(b)>>16&0xff] + (bwt)->cnt_table[(b)>>24])
+
+inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
+{
+	bwtint_t l, j, x;
+	uint32_t *p;
+	if (k == (bwtint_t)(-1)) {
+		memset(cnt, 0, 4 * sizeof(bwtint_t));
+		return;
+	}
+	if (k >= bwt->primary) --k; // because $ is not in bwt
+	p = bwt_occ_intv(bwt, k);
+	memcpy(cnt, p, 4 * sizeof(bwtint_t));
+	p += sizeof(bwtint_t);
+	j = k >> 4 << 4;
+	for (l = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; l < j; l += 16, ++p)
+		x += __occ_aux4(bwt, *p);
+	x += __occ_aux4(bwt, *p & ~((1U<<((~k&15)<<1)) - 1)) - (~k&15);
+	cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
+}
+
+// an analogy to bwt_occ4() but more efficient, requiring k <= l
+inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
+{
+	bwtint_t _k, _l;
+	_k = (k >= bwt->primary)? k-1 : k;
+	_l = (l >= bwt->primary)? l-1 : l;
+	if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
+		bwt_occ4(bwt, k, cntk);
+		bwt_occ4(bwt, l, cntl);
+	} else {
+		bwtint_t i, j, x, y;
+		uint32_t *p;
+		if (k >= bwt->primary) --k; // because $ is not in bwt
+		if (l >= bwt->primary) --l;
+		p = bwt_occ_intv(bwt, k);
+		memcpy(cntk, p, 4 * sizeof(bwtint_t));
+		p += sizeof(bwtint_t);
+		// prepare cntk[]
+		j = k >> 4 << 4;
+		for (i = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; i < j; i += 16, ++p)
+			x += __occ_aux4(bwt, *p);
+		y = x;
+		x += __occ_aux4(bwt, *p & ~((1U<<((~k&15)<<1)) - 1)) - (~k&15);
+		// calculate cntl[] and finalize cntk[]
+		j = l >> 4 << 4;
+		for (; i < j; i += 16, ++p) y += __occ_aux4(bwt, *p);
+		y += __occ_aux4(bwt, *p & ~((1U<<((~l&15)<<1)) - 1)) - (~l&15);
+		memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
+		cntk[0] += x&0xff; cntk[1] += x>>8&0xff; cntk[2] += x>>16&0xff; cntk[3] += x>>24;
+		cntl[0] += y&0xff; cntl[1] += y>>8&0xff; cntl[2] += y>>16&0xff; cntl[3] += y>>24;
+	}
+}
+
+int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end)
+{
+	bwtint_t k, l, ok, ol;
+	int i;
+	k = 0; l = bwt->seq_len;
+	for (i = len - 1; i >= 0; --i) {
+		ubyte_t c = str[i];
+		if (c > 3) return 0; // no match
+		bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
+		k = bwt->L2[c] + ok + 1;
+		l = bwt->L2[c] + ol;
+		if (k > l) break; // no match
+	}
+	if (k > l) return 0; // no match
+	if (sa_begin) *sa_begin = k;
+	if (sa_end)   *sa_end = l;
+	return l - k + 1;
+}
+
+int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0)
+{
+	int i;
+	bwtint_t k, l, ok, ol;
+	k = *k0; l = *l0;
+	for (i = len - 1; i >= 0; --i) {
+		ubyte_t c = str[i];
+		if (c > 3) return 0; // there is an N here. no match
+		bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
+		k = bwt->L2[c] + ok + 1;
+		l = bwt->L2[c] + ol;
+		if (k > l) return 0; // no match
+	}
+	*k0 = k; *l0 = l;
+	return l - k + 1;
+}
+
+/*********************
+ * Bidirectional BWT *
+ *********************/
+
+void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back)
+{
+	bwtint_t tk[4], tl[4];
+	int i;
+	bwt_2occ4(bwt, ik->x[!is_back] - 1, ik->x[!is_back] - 1 + ik->x[2], tk, tl);
+	for (i = 0; i != 4; ++i) {
+		ok[i].x[!is_back] = bwt->L2[i] + 1 + tk[i];
+		ok[i].x[2] = tl[i] - tk[i];
+	}
+	ok[3].x[is_back] = ik->x[is_back] + (ik->x[!is_back] <= bwt->primary && ik->x[!is_back] + ik->x[2] - 1 >= bwt->primary);
+	ok[2].x[is_back] = ok[3].x[is_back] + ok[3].x[2];
+	ok[1].x[is_back] = ok[2].x[is_back] + ok[2].x[2];
+	ok[0].x[is_back] = ok[1].x[is_back] + ok[1].x[2];
+}
+
+static void bwt_reverse_intvs(bwtintv_v *p)
+{
+	if (p->n > 1) {
+		int j;
+		for (j = 0; j < p->n>>1; ++j) {
+			bwtintv_t tmp = p->a[p->n - 1 - j];
+			p->a[p->n - 1 - j] = p->a[j];
+			p->a[j] = tmp;
+		}
+	}
+}
+
+int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2])
+{
+	int i, j, c, ret;
+	bwtintv_t ik, ok[4];
+	bwtintv_v a[2], *prev, *curr, *swap;
+
+	mem->n = 0;
+	if (q[x] > 3) return x + 1;
+	kv_init(a[0]); kv_init(a[1]);
+	prev = tmpvec[0]? tmpvec[0] : &a[0];
+	curr = tmpvec[1]? tmpvec[1] : &a[1];
+	bwt_set_intv(bwt, q[x], ik);
+	ik.info = x + 1;
+
+	for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
+		if (q[i] < 4) {
+			c = 3 - q[i];
+			bwt_extend(bwt, &ik, ok, 0);
+			if (ok[c].x[2] != ik.x[2]) // change of the interval size
+				kv_push(bwtintv_t, *curr, ik);
+			if (ok[c].x[2] == 0) break; // cannot be extended
+			ik = ok[c]; ik.info = i + 1;
+		} else { // an ambiguous base
+			kv_push(bwtintv_t, *curr, ik);
+			break; // cannot be extended; in this case, i<len always stands
+		}
+	}
+	if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
+	bwt_reverse_intvs(curr); // s.t. smaller intervals visited first
+	ret = curr->a[0].info; // this will be the returned value
+	swap = curr; curr = prev; prev = swap;
+
+	for (i = x - 1; i >= -1; --i) { // backward search for MEMs
+		if (q[i] > 3) break;
+		c = i < 0? 0 : q[i];
+		for (j = 0, curr->n = 0; j < prev->n; ++j) {
+			bwtintv_t *p = &prev->a[j];
+			bwt_extend(bwt, p, ok, 1);
+			if (ok[c].x[2] == 0 || i == -1) { // keep the hit if reaching the beginning or not extended further
+				if (curr->n == 0) { // curr->n to make sure there is no longer matches
+					if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
+						ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
+						kv_push(bwtintv_t, *mem, ik);
+					}
+				} // otherwise the match is contained in another longer match
+			}
+			if (ok[c].x[2] && (curr->n == 0 || ok[c].x[2] != curr->a[curr->n-1].x[2])) {
+				ok[c].info = p->info;
+				kv_push(bwtintv_t, *curr, ok[c]);
+			}
+		}
+		if (curr->n == 0) break;
+		swap = curr; curr = prev; prev = swap;
+	}
+	bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
+
+	if (tmpvec[0] == 0) free(a[0].a);
+	if (tmpvec[1] == 0) free(a[1].a);
+	return ret;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwt.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,130 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#ifndef BWA_BWT_H
+#define BWA_BWT_H
+
+#include <stdint.h>
+
+// requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line
+#define OCC_INTERVAL 0x80
+
+#ifndef BWA_UBYTE
+#define BWA_UBYTE
+typedef unsigned char ubyte_t;
+#endif
+
+typedef uint64_t bwtint_t;
+
+typedef struct {
+	bwtint_t primary; // S^{-1}(0), or the primary index of BWT
+	bwtint_t L2[5]; // C(), cumulative count
+	bwtint_t seq_len; // sequence length
+	bwtint_t bwt_size; // size of bwt, about seq_len/4
+	uint32_t *bwt; // BWT
+	// occurance array, separated to two parts
+	uint32_t cnt_table[256];
+	// suffix array
+	int sa_intv;
+	bwtint_t n_sa;
+	bwtint_t *sa;
+} bwt_t;
+
+typedef struct {
+	bwtint_t x[3], info;
+} bwtintv_t;
+
+typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v;
+
+/* For general OCC_INTERVAL, the following is correct:
+#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
+#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4)
+*/
+
+// The following two lines are ONLY correct when OCC_INTERVAL==0x80
+#define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)])
+#define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4))
+
+/* retrieve a character from the $-removed BWT string. Note that
+ * bwt_t::bwt is not exactly the BWT string and therefore this macro is
+ * called bwt_B0 instead of bwt_B */
+#define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3)
+
+// inverse Psi function
+#define bwt_invPsi(bwt, k)												\
+	(((k) == (bwt)->primary)? 0 :										\
+	 ((k) < (bwt)->primary)?											\
+	 (bwt)->L2[bwt_B0(bwt, k)] + bwt_occ(bwt, k, bwt_B0(bwt, k))		\
+	 : (bwt)->L2[bwt_B0(bwt, (k)-1)] + bwt_occ(bwt, k, bwt_B0(bwt, (k)-1)))
+
+#define bwt_set_intv(bwt, c, ik) ((ik).x[0] = (bwt)->L2[(int)(c)]+1, (ik).x[2] = (bwt)->L2[(int)(c)+1]-(bwt)->L2[(int)(c)], (ik).x[1] = (bwt)->L2[3-(c)]+1, (ik).info = 0)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	void bwt_dump_bwt(const char *fn, const bwt_t *bwt);
+	void bwt_dump_sa(const char *fn, const bwt_t *bwt);
+
+	bwt_t *bwt_restore_bwt(const char *fn);
+	void bwt_restore_sa(const char *fn, bwt_t *bwt);
+
+	void bwt_destroy(bwt_t *bwt);
+
+	void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
+	void bwt_cal_sa(bwt_t *bwt, int intv);
+
+	void bwt_bwtupdate_core(bwt_t *bwt);
+
+	inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c);
+	inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]);
+	bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k);
+
+	// more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values
+	void bwt_gen_cnt_table(bwt_t *bwt);
+	inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol);
+	inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]);
+
+	int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end);
+	int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0);
+
+	/**
+	 * Extend bi-SA-interval _ik_
+	 */
+	void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back);
+
+	/**
+	 * Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_.
+	 * Return the end of the longest exact match starting from _x_.
+	 */
+	int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwt_gen.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,1566 @@
+/*
+
+   BWTConstruct.c		BWT-Index Construction
+
+   This module constructs BWT and auxiliary data structures.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdint.h>
+#include "QSufSort.h"
+
+typedef uint64_t bgint_t;
+typedef int64_t sbgint_t;
+
+#define ALPHABET_SIZE				4
+#define BIT_PER_CHAR				2
+#define CHAR_PER_WORD				16
+#define CHAR_PER_BYTE				4
+
+#define BITS_IN_WORD 32
+#define BITS_IN_BYTE 8
+#define BYTES_IN_WORD 4
+
+#define ALL_ONE_MASK 0xFFFFFFFF
+#define DNA_OCC_CNT_TABLE_SIZE_IN_WORD	65536
+
+#define BITS_PER_OCC_VALUE			16
+#define OCC_VALUE_PER_WORD			2
+#define OCC_INTERVAL				256
+#define OCC_INTERVAL_MAJOR			65536
+
+#define TRUE    1
+#define FALSE   0
+
+#define BWTINC_INSERT_SORT_NUM_ITEM 7
+
+#define MIN_AVAILABLE_WORD 0x10000
+
+#define average(value1, value2)					( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 )
+#define min(value1, value2)						( ((value1) < (value2)) ? (value1) : (value2) )
+#define max(value1, value2)						( ((value1) > (value2)) ? (value1) : (value2) )
+#define med3(a, b, c)							( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
+#define swap(a, b, t);							t = a; a = b; b = t;
+#define truncateLeft(value, offset)				( (value) << (offset) >> (offset) )
+#define truncateRight(value, offset)			( (value) >> (offset) << (offset) )
+#define DNA_OCC_SUM_EXCEPTION(sum)			((sum & 0xfefefeff) == 0)
+
+typedef struct BWT {
+	bgint_t textLength;					// length of the text
+	bgint_t inverseSa0;					// SA-1[0]
+	bgint_t *cumulativeFreq;			// cumulative frequency
+	unsigned int *bwtCode;				// BWT code
+	unsigned int *occValue;				// Occurrence values stored explicitly
+	bgint_t *occValueMajor;				// Occurrence values stored explicitly
+	unsigned int *decodeTable;			// For decoding BWT by table lookup
+	bgint_t bwtSizeInWord;				// Temporary variable to hold the memory allocated
+	bgint_t occSizeInWord;				// Temporary variable to hold the memory allocated
+	bgint_t occMajorSizeInWord;			// Temporary variable to hold the memory allocated
+} BWT;
+
+typedef struct BWTInc {
+	BWT *bwt;
+	unsigned int numberOfIterationDone;
+	bgint_t *cumulativeCountInCurrentBuild;
+	bgint_t availableWord;
+	bgint_t buildSize;
+	bgint_t initialMaxBuildSize;
+	bgint_t incMaxBuildSize;
+	unsigned int firstCharInLastIteration;
+	unsigned int *workingMemory;
+	unsigned int *packedText;
+	unsigned char *textBuffer;
+	unsigned int *packedShift;
+} BWTInc;
+
+static bgint_t TextLengthFromBytePacked(bgint_t bytePackedLength, unsigned int bitPerChar,
+											 unsigned int lastByteLength)
+{
+	return (bytePackedLength - 1) * (BITS_IN_BYTE / bitPerChar) + lastByteLength;
+}
+
+static void initializeVAL(unsigned int *startAddr, const bgint_t length, const unsigned int initValue)
+{
+	bgint_t i;
+	for (i=0; i<length; i++) startAddr[i] = initValue;
+}
+
+static void initializeVAL_bg(bgint_t *startAddr, const bgint_t length, const bgint_t initValue)
+{
+	bgint_t i;
+	for (i=0; i<length; i++) startAddr[i] = initValue;
+}
+
+static void GenerateDNAOccCountTable(unsigned int *dnaDecodeTable)
+{
+	unsigned int i, j, c, t;
+
+	for (i=0; i<DNA_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
+		dnaDecodeTable[i] = 0;
+		c = i;
+		for (j=0; j<8; j++) {
+			t = c & 0x00000003;
+			dnaDecodeTable[i] += 1 << (t * 8);
+			c >>= 2;
+		}
+	}
+
+}
+// for BWTIncCreate()
+static bgint_t BWTOccValueMajorSizeInWord(const bgint_t numChar)
+{
+	bgint_t numOfOccValue;
+	unsigned numOfOccIntervalPerMajor;
+	numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
+	numOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
+	return (numOfOccValue + numOfOccIntervalPerMajor - 1) / numOfOccIntervalPerMajor * ALPHABET_SIZE;
+}
+// for BWTIncCreate()
+static bgint_t BWTOccValueMinorSizeInWord(const bgint_t numChar)
+{
+	bgint_t numOfOccValue;
+	numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;		// Value at both end for bi-directional encoding
+	return (numOfOccValue + OCC_VALUE_PER_WORD - 1) / OCC_VALUE_PER_WORD * ALPHABET_SIZE;
+}
+// for BWTIncCreate()
+static bgint_t BWTResidentSizeInWord(const bgint_t numChar) {
+
+	bgint_t numCharRoundUpToOccInterval;
+
+	// The $ in BWT at the position of inverseSa0 is not encoded
+	numCharRoundUpToOccInterval = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL;
+
+	return (numCharRoundUpToOccInterval + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
+
+}
+
+static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
+{
+	bgint_t maxBuildSize;
+
+	if (bwtInc->bwt->textLength == 0) {
+		// initial build
+		// Minus 2 because n+1 entries of seq and rank needed for n char
+		maxBuildSize = (bwtInc->availableWord - (2 + OCC_INTERVAL / CHAR_PER_WORD) * (sizeof(bgint_t) / 4))
+							/ (2 * CHAR_PER_WORD + 1) * CHAR_PER_WORD / (sizeof(bgint_t) / 4);
+		if (bwtInc->initialMaxBuildSize > 0) {
+			bwtInc->buildSize = min(bwtInc->initialMaxBuildSize, maxBuildSize);
+		} else {
+			bwtInc->buildSize = maxBuildSize;
+		}
+	} else {
+		// Minus 3 because n+1 entries of sorted rank, seq and rank needed for n char
+		// Minus numberOfIterationDone because bwt slightly shift to left in each iteration
+		maxBuildSize = (bwtInc->availableWord - bwtInc->bwt->bwtSizeInWord - bwtInc->bwt->occSizeInWord
+							 - (3 + bwtInc->numberOfIterationDone * OCC_INTERVAL / BIT_PER_CHAR) * (sizeof(bgint_t) / 4)) 
+							 / 3 / (sizeof(bgint_t) / 4);
+		if (maxBuildSize < CHAR_PER_WORD) {
+			fprintf(stderr, "BWTIncSetBuildSizeAndTextAddr(): Not enough space allocated to continue construction!\n");
+			exit(1);
+		}
+		if (bwtInc->incMaxBuildSize > 0) {
+            bwtInc->buildSize = min(bwtInc->incMaxBuildSize, maxBuildSize);
+		} else {
+			bwtInc->buildSize = maxBuildSize;
+		}
+		if (bwtInc->buildSize < CHAR_PER_WORD)
+			bwtInc->buildSize = CHAR_PER_WORD;
+	}
+
+	if (bwtInc->buildSize < CHAR_PER_WORD) {
+		fprintf(stderr, "BWTIncSetBuildSizeAndTextAddr(): Not enough space allocated to continue construction!\n");
+		exit(1);
+	}
+
+	bwtInc->buildSize = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;
+
+	bwtInc->packedText = bwtInc->workingMemory + 2 * (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4);
+	bwtInc->textBuffer = (unsigned char*)(bwtInc->workingMemory + (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4));
+}
+
+// for ceilLog2()
+unsigned int leadingZero(const unsigned int input)
+{
+	unsigned int l;
+	const static unsigned int leadingZero8bit[256] = {8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+											 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+											 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+											 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+	if (input & 0xFFFF0000) {
+		if (input & 0xFF000000) {
+			l = leadingZero8bit[input >> 24];
+		} else {
+			l = 8 + leadingZero8bit[input >> 16];
+		}
+	} else {
+		if (input & 0x0000FF00) {
+			l = 16 + leadingZero8bit[input >> 8];
+		} else {
+			l = 24 + leadingZero8bit[input];
+		}
+	}
+	return l;
+
+}
+// for BitPerBytePackedChar()
+static unsigned int ceilLog2(const unsigned int input)
+{
+	if (input <= 1) return 0;
+	return BITS_IN_WORD - leadingZero(input - 1);
+
+}
+// for ConvertBytePackedToWordPacked()
+static unsigned int BitPerBytePackedChar(const unsigned int alphabetSize)
+{
+	unsigned int bitPerChar;
+	bitPerChar = ceilLog2(alphabetSize);
+	// Return the largest number of bit that does not affect packing efficiency
+	if (BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar) > bitPerChar)
+		bitPerChar = BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar);
+	return bitPerChar;
+}
+// for ConvertBytePackedToWordPacked()
+static unsigned int BitPerWordPackedChar(const unsigned int alphabetSize)
+{
+	return ceilLog2(alphabetSize);
+}
+
+static void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize,
+										  const bgint_t textLength)
+{
+	bgint_t i;
+	unsigned int j, k, c;
+	unsigned int bitPerBytePackedChar;
+	unsigned int bitPerWordPackedChar;
+	unsigned int charPerWord;
+	unsigned int charPerByte;
+	unsigned int bytePerIteration;
+	bgint_t byteProcessed = 0;
+	bgint_t wordProcessed = 0;
+	unsigned int mask, shift;
+	
+	unsigned int buffer[BITS_IN_WORD];
+
+	bitPerBytePackedChar = BitPerBytePackedChar(alphabetSize);
+	bitPerWordPackedChar = BitPerWordPackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerBytePackedChar;
+	charPerWord = BITS_IN_WORD / bitPerWordPackedChar;
+
+	bytePerIteration = charPerWord / charPerByte;
+	mask = truncateRight(ALL_ONE_MASK, BITS_IN_WORD - bitPerWordPackedChar);
+	shift = BITS_IN_WORD - BITS_IN_BYTE + bitPerBytePackedChar - bitPerWordPackedChar;
+
+	while ((wordProcessed + 1) * charPerWord < textLength) {
+
+		k = 0;
+		for (i=0; i<bytePerIteration; i++) {
+			c = (unsigned int)input[byteProcessed] << shift;
+			for (j=0; j<charPerByte; j++) {
+				buffer[k] = c & mask;
+				c <<= bitPerBytePackedChar;
+				k++;
+			}
+			byteProcessed++;
+		}
+
+		c = 0;
+		for (i=0; i<charPerWord; i++) {
+			c |= buffer[i] >> bitPerWordPackedChar * i;
+		}
+		output[wordProcessed] = c;
+		wordProcessed++;
+
+	}
+
+	k = 0;
+	for (i=0; i < (textLength - wordProcessed * charPerWord - 1) / charPerByte + 1; i++) {
+		c = (unsigned int)input[byteProcessed] << shift;
+		for (j=0; j<charPerByte; j++) {
+			buffer[k] = c & mask;
+			c <<= bitPerBytePackedChar;
+			k++;
+		}
+		byteProcessed++;
+	}
+
+	c = 0;
+	for (i=0; i<textLength - wordProcessed * charPerWord; i++) {
+		c |= buffer[i] >> bitPerWordPackedChar * i;
+	}
+	output[wordProcessed] = c;
+}
+
+BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
+{
+	BWT *bwt;
+
+	bwt = (BWT*)calloc(1, sizeof(BWT));
+
+	bwt->textLength = 0;
+
+	bwt->cumulativeFreq = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
+	initializeVAL_bg(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0);
+
+	bwt->bwtSizeInWord = 0;
+
+	// Generate decode tables
+	if (decodeTable == NULL) {
+		bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int));
+		GenerateDNAOccCountTable(bwt->decodeTable);
+	} else {
+		bwt->decodeTable = decodeTable;
+	}
+
+	bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(textLength);
+	bwt->occValueMajor = (bgint_t*)calloc(bwt->occMajorSizeInWord, sizeof(bgint_t));
+
+	bwt->occSizeInWord = 0;
+	bwt->occValue = NULL;
+
+	return bwt;
+}
+
+BWTInc *BWTIncCreate(const bgint_t textLength, unsigned int initialMaxBuildSize, unsigned int incMaxBuildSize)
+{
+	BWTInc *bwtInc;
+	unsigned int i, n_iter;
+
+	if (textLength < incMaxBuildSize) incMaxBuildSize = textLength;
+	if (textLength < initialMaxBuildSize) initialMaxBuildSize = textLength;
+
+	bwtInc = (BWTInc*)calloc(1, sizeof(BWTInc));
+	bwtInc->numberOfIterationDone = 0;
+	bwtInc->bwt = BWTCreate(textLength, NULL);
+	bwtInc->initialMaxBuildSize = initialMaxBuildSize;
+	bwtInc->incMaxBuildSize = incMaxBuildSize;
+	bwtInc->cumulativeCountInCurrentBuild = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
+	initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
+
+	// Build frequently accessed data
+	bwtInc->packedShift = (unsigned*)calloc(CHAR_PER_WORD, sizeof(unsigned int));
+	for (i=0; i<CHAR_PER_WORD; i++)
+		bwtInc->packedShift[i] = BITS_IN_WORD - (i+1) * BIT_PER_CHAR;
+
+	n_iter = (textLength - initialMaxBuildSize) / incMaxBuildSize + 1;
+	bwtInc->availableWord = BWTResidentSizeInWord(textLength) + BWTOccValueMinorSizeInWord(textLength) // minimal memory requirement
+		+ OCC_INTERVAL / BIT_PER_CHAR * n_iter * 2 * (sizeof(bgint_t) / 4) // buffer at the end of occ array 
+		+ incMaxBuildSize/5 * 3 * (sizeof(bgint_t) / 4); // space for the 3 temporary arrays in each iteration
+	if (bwtInc->availableWord < MIN_AVAILABLE_WORD) bwtInc->availableWord = MIN_AVAILABLE_WORD; // lh3: otherwise segfaul when availableWord is too small
+	fprintf(stderr, "[%s] textLength=%ld, availableWord=%ld\n", __func__, (long)textLength, (long)bwtInc->availableWord);
+	bwtInc->workingMemory = (unsigned*)calloc(bwtInc->availableWord, BYTES_IN_WORD);
+
+	return bwtInc;
+}
+// for BWTIncConstruct()
+static void BWTIncPutPackedTextToRank(const unsigned int *packedText, bgint_t* __restrict rank,
+									  bgint_t* __restrict cumulativeCount, const bgint_t numChar)
+{
+	bgint_t i;
+	unsigned int j;
+	unsigned int c, t;
+	unsigned int packedMask;
+	bgint_t rankIndex;
+	bgint_t lastWord;
+	unsigned int numCharInLastWord;
+
+	lastWord = (numChar - 1) / CHAR_PER_WORD;
+	numCharInLastWord = numChar - lastWord * CHAR_PER_WORD;
+
+	packedMask = ALL_ONE_MASK >> (BITS_IN_WORD - BIT_PER_CHAR);
+	rankIndex = numChar - 1;
+
+	t = packedText[lastWord] >> (BITS_IN_WORD - numCharInLastWord * BIT_PER_CHAR);
+	for (i=0; i<numCharInLastWord; i++) {
+		c = t & packedMask;
+		cumulativeCount[c+1]++;
+		rank[rankIndex] = c;
+		rankIndex--;
+		t >>= BIT_PER_CHAR;
+	}
+
+	for (i=lastWord; i--;) {	// loop from lastWord - 1 to 0
+		t = packedText[i];
+		for (j=0; j<CHAR_PER_WORD; j++) {
+			c = t & packedMask;
+			cumulativeCount[c+1]++;
+			rank[rankIndex] = c;
+			rankIndex--;
+			t >>= BIT_PER_CHAR;
+		}
+	}
+
+	// Convert occurrence to cumulativeCount
+	cumulativeCount[2] += cumulativeCount[1];
+	cumulativeCount[3] += cumulativeCount[2];
+	cumulativeCount[4] += cumulativeCount[3];
+}
+
+
+static void ForwardDNAAllOccCountNoLimit(const unsigned int*  dna, const bgint_t index,
+										 bgint_t* __restrict occCount, const unsigned int*  dnaDecodeTable)
+{
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	bgint_t iteration, i;
+	unsigned int wordToCount, charToCount;
+	unsigned int j, c, sum;
+
+	occCount[0] = 0;
+	occCount[1] = 0;
+	occCount[2] = 0;
+	occCount[3] = 0;
+
+	iteration = index / 256;
+	wordToCount = (index - iteration * 256) / 16;
+	charToCount = index - iteration * 256 - wordToCount * 16;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<16; j++) {
+			sum += dnaDecodeTable[*dna >> 16];
+			sum += dnaDecodeTable[*dna & 0x0000FFFF];
+			dna++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			occCount[0] += sum & 0x000000FF;	sum >>= 8;
+			occCount[1] += sum & 0x000000FF;	sum >>= 8;
+			occCount[2] += sum & 0x000000FF;	sum >>= 8;
+			occCount[3] += sum;
+		} else {
+			// only some or all of the 3 bits are on
+			// in reality, only one of the four cases are possible
+			if (sum == 0x00000100) {
+				occCount[0] += 256;
+			} else if (sum == 0x00010000) {
+				occCount[1] += 256;
+			} else if (sum == 0x01000000) {
+				occCount[2] += 256;
+			} else if (sum == 0x00000000) {
+				occCount[3] += 256;
+			} else {
+				fprintf(stderr, "ForwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
+				exit(1);
+			}
+		}
+
+	}
+
+	sum = 0;
+	for (j=0; j<wordToCount; j++) {
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+		dna++;
+	}
+
+	if (charToCount > 0) {
+		c = *dna & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	occCount[0] += sum & 0x000000FF;	sum >>= 8;
+	occCount[1] += sum & 0x000000FF;	sum >>= 8;
+	occCount[2] += sum & 0x000000FF;	sum >>= 8;
+	occCount[3] += sum;
+}
+
+static void BWTIncBuildPackedBwt(const bgint_t *relativeRank, unsigned int* __restrict bwt, const bgint_t numChar,
+								 const bgint_t *cumulativeCount, const unsigned int *packedShift) {
+
+	bgint_t i, r;
+	unsigned int c;
+	bgint_t previousRank, currentRank;
+	bgint_t wordIndex, charIndex;
+	bgint_t inverseSa0;
+
+	inverseSa0 = previousRank = relativeRank[0];
+
+	for (i=1; i<=numChar; i++) {
+		currentRank = relativeRank[i];
+		// previousRank > cumulativeCount[c] because $ is one of the char
+		c = (previousRank > cumulativeCount[1]) + (previousRank > cumulativeCount[2]) 
+											    + (previousRank > cumulativeCount[3]);
+		// set bwt for currentRank
+		if (c > 0) {
+			// c <> 'a'
+			r = currentRank;
+			if (r > inverseSa0) {
+				// - 1 because $ at inverseSa0 is not encoded			
+				r--;
+			}
+			wordIndex = r / CHAR_PER_WORD;
+			charIndex = r - wordIndex * CHAR_PER_WORD;
+			bwt[wordIndex] |= c << packedShift[charIndex];
+		}
+		previousRank = currentRank;
+	}
+}
+
+static inline bgint_t BWTOccValueExplicit(const BWT *bwt, const bgint_t occIndexExplicit,
+											   const unsigned int character)
+{
+	bgint_t occIndexMajor;
+
+	occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
+
+	if (occIndexExplicit % OCC_VALUE_PER_WORD == 0) {
+		return bwt->occValueMajor[occIndexMajor * ALPHABET_SIZE + character] +
+			   (bwt->occValue[occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE + character] >> 16);
+
+	} else {
+		return bwt->occValueMajor[occIndexMajor * ALPHABET_SIZE + character] +
+			   (bwt->occValue[occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE + character] & 0x0000FFFF);
+	}
+}
+
+
+static unsigned int ForwardDNAOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character,
+									   const unsigned int*  dnaDecodeTable)
+{
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[dna[i] >> 16];
+		sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
+	}
+
+	if (charToCount > 0) {
+		c = dna[i] & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	return (sum >> (character * 8)) & 0x000000FF;
+
+}
+
+static unsigned int BackwardDNAOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character,
+										const unsigned int*  dnaDecodeTable)
+{
+	static const unsigned int truncateLeftMask[16] =  { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
+											   0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
+											   0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
+											   0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	dna -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+	
+	for (i=0; i<wordToCount; i++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+	}
+
+	return (sum >> (character * 8)) & 0x000000FF;
+
+}
+
+bgint_t BWTOccValue(const BWT *bwt, bgint_t index, const unsigned int character)
+{
+	bgint_t occValue;
+	bgint_t occExplicitIndex, occIndex;
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is subtracted by 1 for adjustment
+	if (index > bwt->inverseSa0)
+		index--;
+
+	occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex = occExplicitIndex * OCC_INTERVAL;
+	occValue = BWTOccValueExplicit(bwt, occExplicitIndex, character);
+
+	if (occIndex == index)
+		return occValue;
+
+	if (occIndex < index) {
+		return occValue + ForwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, index - occIndex, character, bwt->decodeTable);
+	} else {
+		return occValue - BackwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, occIndex - index, character, bwt->decodeTable);
+	}
+}
+
+static bgint_t BWTIncGetAbsoluteRank(BWT *bwt, bgint_t* __restrict absoluteRank, bgint_t* __restrict seq,
+										  const unsigned int *packedText, const bgint_t numChar,
+										  const bgint_t* cumulativeCount, const unsigned int firstCharInLastIteration)
+{
+	bgint_t saIndex;
+	bgint_t lastWord;
+	unsigned int packedMask;
+	bgint_t i;
+	unsigned int c, t, j;
+	bgint_t rankIndex;
+	unsigned int shift;
+	bgint_t seqIndexFromStart[ALPHABET_SIZE];
+	bgint_t seqIndexFromEnd[ALPHABET_SIZE];
+
+	for (i=0; i<ALPHABET_SIZE; i++) {
+		seqIndexFromStart[i] = cumulativeCount[i];
+		seqIndexFromEnd[i] = cumulativeCount[i+1] - 1;
+	}
+
+	shift = BITS_IN_WORD - BIT_PER_CHAR;
+	packedMask = ALL_ONE_MASK >> shift;
+	saIndex = bwt->inverseSa0;
+	rankIndex = numChar - 1;
+
+	lastWord = numChar / CHAR_PER_WORD;
+	for (i=lastWord; i--;) {	// loop from lastWord - 1 to 0
+		t = packedText[i];
+		for (j=0; j<CHAR_PER_WORD; j++) {
+			c = t & packedMask;
+			saIndex = bwt->cumulativeFreq[c] + BWTOccValue(bwt, saIndex, c) + 1;
+			// A counting sort using the first character of suffix is done here
+			// If rank > inverseSa0 -> fill seq from end, otherwise fill seq from start -> to leave the right entry for inverseSa0
+			if (saIndex > bwt->inverseSa0) {
+				seq[seqIndexFromEnd[c]] = rankIndex;
+				absoluteRank[seqIndexFromEnd[c]] = saIndex;
+				seqIndexFromEnd[c]--;
+			} else {
+				seq[seqIndexFromStart[c]] = rankIndex;
+				absoluteRank[seqIndexFromStart[c]] = saIndex;
+				seqIndexFromStart[c]++;
+			}
+			rankIndex--;
+			t >>= BIT_PER_CHAR;
+		}
+	}
+
+	absoluteRank[seqIndexFromStart[firstCharInLastIteration]] = bwt->inverseSa0;	// representing the substring of all preceding characters
+	seq[seqIndexFromStart[firstCharInLastIteration]] = numChar;
+
+	return seqIndexFromStart[firstCharInLastIteration];
+}
+
+static void BWTIncSortKey(bgint_t* __restrict key, bgint_t* __restrict seq, const bgint_t numItem)
+{
+	#define EQUAL_KEY_THRESHOLD	4	// Partition for equal key if data array size / the number of data with equal value with pivot < EQUAL_KEY_THRESHOLD
+
+	int64_t lowIndex, highIndex, midIndex;
+	int64_t lowPartitionIndex, highPartitionIndex;
+	int64_t lowStack[32], highStack[32];
+	int stackDepth;
+	int64_t i, j;
+	bgint_t tempSeq, tempKey;
+	int64_t numberOfEqualKey;
+
+	if (numItem < 2) return;
+
+	stackDepth = 0;
+
+    lowIndex = 0;
+    highIndex = numItem - 1;
+
+	for (;;) {
+
+		for (;;) {
+
+			// Sort small array of data
+			if (highIndex - lowIndex < BWTINC_INSERT_SORT_NUM_ITEM) {	 // Insertion sort on smallest arrays
+				for (i=lowIndex+1; i<=highIndex; i++) {
+					tempSeq = seq[i];
+					tempKey = key[i];
+					for (j = i; j > lowIndex && key[j-1] > tempKey; j--) {
+						seq[j] = seq[j-1];
+						key[j] = key[j-1];
+					}
+					if (j != i) {
+						seq[j] = tempSeq;
+						key[j] = tempKey;
+					}
+				}
+				break;
+			}
+
+			// Choose pivot as median of the lowest, middle, and highest data; sort the three data
+
+			midIndex = average(lowIndex, highIndex);
+			if (key[lowIndex] > key[midIndex]) {
+				tempSeq = seq[lowIndex];
+				tempKey = key[lowIndex];
+				seq[lowIndex] = seq[midIndex];
+				key[lowIndex] = key[midIndex];
+				seq[midIndex] = tempSeq;
+				key[midIndex] = tempKey;
+			}
+			if (key[lowIndex] > key[highIndex]) {
+				tempSeq = seq[lowIndex];
+				tempKey = key[lowIndex];
+				seq[lowIndex] = seq[highIndex];
+				key[lowIndex] = key[highIndex];
+				seq[highIndex] = tempSeq;
+				key[highIndex] = tempKey;
+			}
+			if (key[midIndex] > key[highIndex]) {
+				tempSeq = seq[midIndex];
+				tempKey = key[midIndex];
+				seq[midIndex] = seq[highIndex];
+				key[midIndex] = key[highIndex];
+				seq[highIndex] = tempSeq;
+				key[highIndex] = tempKey;
+			}
+
+			// Partition data
+
+			numberOfEqualKey = 0;
+
+			lowPartitionIndex = lowIndex + 1;
+			highPartitionIndex = highIndex - 1;
+
+			for (;;) {
+				while (lowPartitionIndex <= highPartitionIndex && key[lowPartitionIndex] <= key[midIndex]) {
+					numberOfEqualKey += (key[lowPartitionIndex] == key[midIndex]);
+					lowPartitionIndex++;
+				}
+				while (lowPartitionIndex < highPartitionIndex) {
+					if (key[midIndex] >= key[highPartitionIndex]) {
+						numberOfEqualKey += (key[midIndex] == key[highPartitionIndex]);
+						break;
+					}
+					highPartitionIndex--;
+				}
+				if (lowPartitionIndex >= highPartitionIndex) {
+					break;
+				}
+				tempSeq = seq[lowPartitionIndex];
+				tempKey = key[lowPartitionIndex];
+				seq[lowPartitionIndex] = seq[highPartitionIndex];
+				key[lowPartitionIndex] = key[highPartitionIndex];
+				seq[highPartitionIndex] = tempSeq;
+				key[highPartitionIndex] = tempKey;
+				if (highPartitionIndex == midIndex) {
+					// partition key has been moved
+					midIndex = lowPartitionIndex;
+				}
+				lowPartitionIndex++;
+				highPartitionIndex--;
+			}
+
+			// Adjust the partition index
+			highPartitionIndex = lowPartitionIndex;
+			lowPartitionIndex--;
+
+			// move the partition key to end of low partition
+			tempSeq = seq[midIndex];
+			tempKey = key[midIndex];
+			seq[midIndex] = seq[lowPartitionIndex];
+			key[midIndex] = key[lowPartitionIndex];
+			seq[lowPartitionIndex] = tempSeq;
+			key[lowPartitionIndex] = tempKey;
+
+			if (highIndex - lowIndex + BWTINC_INSERT_SORT_NUM_ITEM <= EQUAL_KEY_THRESHOLD * numberOfEqualKey) {
+
+				// Many keys = partition key; separate the equal key data from the lower partition
+		
+				midIndex = lowIndex;
+
+				for (;;) {
+					while (midIndex < lowPartitionIndex && key[midIndex] < key[lowPartitionIndex]) {
+						midIndex++;
+					}
+					while (midIndex < lowPartitionIndex && key[lowPartitionIndex] == key[lowPartitionIndex - 1]) {
+						lowPartitionIndex--;
+					}
+					if (midIndex >= lowPartitionIndex) {
+						break;
+					}
+					tempSeq = seq[midIndex];
+					tempKey = key[midIndex];
+					seq[midIndex] = seq[lowPartitionIndex - 1];
+					key[midIndex] = key[lowPartitionIndex - 1];
+					seq[lowPartitionIndex - 1] = tempSeq;
+					key[lowPartitionIndex - 1] = tempKey;
+					midIndex++;
+					lowPartitionIndex--;
+				}
+
+			}
+
+			if (lowPartitionIndex - lowIndex > highIndex - highPartitionIndex) {
+				// put the larger partition to stack
+				lowStack[stackDepth] = lowIndex;
+				highStack[stackDepth] = lowPartitionIndex - 1;
+				stackDepth++;
+				// sort the smaller partition first
+				lowIndex = highPartitionIndex;
+			} else {
+				// put the larger partition to stack
+				lowStack[stackDepth] = highPartitionIndex;
+				highStack[stackDepth] = highIndex;
+				stackDepth++;
+				// sort the smaller partition first
+				if (lowPartitionIndex > lowIndex) {
+					highIndex = lowPartitionIndex - 1;
+				} else {
+					// all keys in the partition equals to the partition key
+					break;
+				}
+			}
+			continue;
+		}
+
+		// Pop a range from stack
+		if (stackDepth > 0) {
+			stackDepth--;
+			lowIndex = lowStack[stackDepth];
+			highIndex = highStack[stackDepth];
+			continue;
+		} else return;
+	}
+}
+
+
+static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __restrict seq,
+									bgint_t* __restrict relativeRank, const bgint_t numItem,
+									bgint_t oldInverseSa0, const bgint_t *cumulativeCount)
+{
+	bgint_t i, c;
+	bgint_t s, r;
+	bgint_t lastRank, lastIndex;
+	bgint_t oldInverseSa0RelativeRank = 0;
+	bgint_t freq;
+
+	lastIndex = numItem;
+	lastRank = sortedRank[numItem];
+	if (lastRank > oldInverseSa0) {
+		sortedRank[numItem]--;	// to prepare for merging; $ is not encoded in bwt
+	}
+	s = seq[numItem];
+	relativeRank[s] = numItem;
+	if (lastRank == oldInverseSa0) {
+		oldInverseSa0RelativeRank = numItem;
+		oldInverseSa0++;	// so that this segment of code is not run again
+		lastRank++;			// so that oldInverseSa0 become a sorted group with 1 item
+	}
+
+	c = ALPHABET_SIZE - 1;
+	freq = cumulativeCount[c];
+
+	for (i=numItem; i--;) {	// from numItem - 1 to 0
+		r = sortedRank[i];
+		if (r > oldInverseSa0)
+			sortedRank[i]--;	// to prepare for merging; $ is not encoded in bwt
+		s = seq[i];
+		if (i < freq) {
+			if (lastIndex >= freq)
+				lastRank++;	// to trigger the group across alphabet boundary to be split
+			c--;
+			freq = cumulativeCount[c];
+		}
+		if (r == lastRank) {
+			relativeRank[s] = lastIndex;
+		} else {
+			if (i == lastIndex - 1) {
+				if (lastIndex < numItem && (sbgint_t)seq[lastIndex + 1] < 0) {
+					seq[lastIndex] = seq[lastIndex + 1] - 1;
+				} else {
+					seq[lastIndex] = (bgint_t)-1;
+				}
+			}
+			lastIndex = i;
+			lastRank = r;
+			relativeRank[s] = i;
+			if (r == oldInverseSa0) {
+				oldInverseSa0RelativeRank = i;
+				oldInverseSa0++;	// so that this segment of code is not run again
+				lastRank++;			// so that oldInverseSa0 become a sorted group with 1 item
+			}
+		}
+	}
+
+}
+
+static void BWTIncBuildBwt(unsigned int* insertBwt, const bgint_t *relativeRank, const bgint_t numChar,
+						   const bgint_t *cumulativeCount)
+{
+	unsigned int c;
+	bgint_t i;
+	bgint_t previousRank, currentRank;
+
+	previousRank = relativeRank[0];
+
+	for (i=1; i<=numChar; i++) {
+		currentRank = relativeRank[i];
+		c = (previousRank >= cumulativeCount[1]) + (previousRank >= cumulativeCount[2])
+											  	 + (previousRank >= cumulativeCount[3]);
+		insertBwt[currentRank] = c;
+		previousRank = currentRank;
+	}
+}
+
+static void BWTIncMergeBwt(const bgint_t *sortedRank, const unsigned int* oldBwt, const unsigned int *insertBwt,
+						   unsigned int* __restrict mergedBwt, const bgint_t numOldBwt, const bgint_t numInsertBwt)
+{
+	unsigned int bitsInWordMinusBitPerChar;
+	bgint_t leftShift, rightShift;
+	bgint_t o;
+	bgint_t oIndex, iIndex, mIndex;
+	bgint_t mWord, mChar, oWord, oChar;
+	bgint_t numInsert;
+
+	bitsInWordMinusBitPerChar = BITS_IN_WORD - BIT_PER_CHAR;
+
+	oIndex = 0;
+	iIndex = 0;
+	mIndex = 0;
+
+	mWord = 0;
+	mChar = 0;
+
+	mergedBwt[0] = 0;	// this can be cleared as merged Bwt slightly shift to the left in each iteration
+
+	while (oIndex < numOldBwt) {
+
+		// copy from insertBwt
+		while (iIndex <= numInsertBwt && sortedRank[iIndex] <= oIndex) {
+			if (sortedRank[iIndex] != 0) {	// special value to indicate that this is for new inverseSa0
+				mergedBwt[mWord] |= insertBwt[iIndex] << (BITS_IN_WORD - (mChar + 1) * BIT_PER_CHAR);
+				mIndex++;
+				mChar++;
+				if (mChar == CHAR_PER_WORD) {
+					mChar = 0;
+					mWord++;
+					mergedBwt[mWord] = 0;	// no need to worry about crossing mergedBwt boundary
+				}
+			}
+			iIndex++;
+		}
+
+		// Copy from oldBwt to mergedBwt
+		if (iIndex <= numInsertBwt) {
+			o = sortedRank[iIndex];
+		} else {
+			o = numOldBwt;
+		}
+		numInsert = o - oIndex;
+
+		oWord = oIndex / CHAR_PER_WORD;
+		oChar = oIndex - oWord * CHAR_PER_WORD;
+		if (oChar > mChar) {
+			leftShift = (oChar - mChar) * BIT_PER_CHAR;
+			rightShift = (CHAR_PER_WORD + mChar - oChar) * BIT_PER_CHAR;
+			mergedBwt[mWord] = mergedBwt[mWord]
+								| (oldBwt[oWord] << (oChar * BIT_PER_CHAR) >> (mChar * BIT_PER_CHAR))
+								| (oldBwt[oWord+1] >> rightShift);
+			oIndex += min(numInsert, CHAR_PER_WORD - mChar);
+			while (o > oIndex) {
+				oWord++;
+				mWord++;
+				mergedBwt[mWord] = (oldBwt[oWord] << leftShift) | (oldBwt[oWord+1] >> rightShift);
+				oIndex += CHAR_PER_WORD;
+			}
+		} else if (oChar < mChar) {
+			rightShift = (mChar - oChar) * BIT_PER_CHAR;
+			leftShift = (CHAR_PER_WORD + oChar - mChar) * BIT_PER_CHAR;
+			mergedBwt[mWord] = mergedBwt[mWord] 
+								| (oldBwt[oWord] << (oChar * BIT_PER_CHAR) >> (mChar * BIT_PER_CHAR));
+			oIndex += min(numInsert, CHAR_PER_WORD - mChar);
+			while (o > oIndex) {
+				oWord++;
+				mWord++;
+				mergedBwt[mWord] = (oldBwt[oWord-1] << leftShift) | (oldBwt[oWord] >> rightShift);
+				oIndex += CHAR_PER_WORD;
+			}
+		} else { // oChar == mChar
+			mergedBwt[mWord] = mergedBwt[mWord] | truncateLeft(oldBwt[oWord], mChar * BIT_PER_CHAR);
+			oIndex += min(numInsert, CHAR_PER_WORD - mChar);
+			while (o > oIndex) {
+				oWord++;
+				mWord++;
+				mergedBwt[mWord] = oldBwt[oWord];
+				oIndex += CHAR_PER_WORD;
+			}
+		}
+		oIndex = o;
+		mIndex += numInsert;
+
+		// Clear the trailing garbage in mergedBwt
+		mWord = mIndex / CHAR_PER_WORD;
+		mChar = mIndex - mWord * CHAR_PER_WORD;
+		if (mChar == 0) {
+			mergedBwt[mWord] = 0;
+		} else {
+			mergedBwt[mWord] = truncateRight(mergedBwt[mWord], (BITS_IN_WORD - mChar * BIT_PER_CHAR));
+		}
+
+	}
+
+	// copy from insertBwt
+	while (iIndex <= numInsertBwt) {
+		if (sortedRank[iIndex] != 0) {
+			mergedBwt[mWord] |= insertBwt[iIndex] << (BITS_IN_WORD - (mChar + 1) * BIT_PER_CHAR);
+			mIndex++;
+			mChar++;
+			if (mChar == CHAR_PER_WORD) {
+				mChar = 0;
+				mWord++;
+				mergedBwt[mWord] = 0;	// no need to worry about crossing mergedBwt boundary
+			}
+		}
+		iIndex++;
+	}
+}
+
+void BWTClearTrailingBwtCode(BWT *bwt)
+{
+	bgint_t bwtResidentSizeInWord;
+	bgint_t wordIndex, offset;
+	bgint_t i;
+
+	bwtResidentSizeInWord = BWTResidentSizeInWord(bwt->textLength);
+
+	wordIndex = bwt->textLength / CHAR_PER_WORD;
+	offset = (bwt->textLength - wordIndex * CHAR_PER_WORD) * BIT_PER_CHAR;
+	if (offset > 0) {
+		bwt->bwtCode[wordIndex] = truncateRight(bwt->bwtCode[wordIndex], BITS_IN_WORD - offset);
+	} else {
+		if (wordIndex < bwtResidentSizeInWord) {
+			bwt->bwtCode[wordIndex] = 0;
+		}
+	}
+
+	for (i=wordIndex+1; i<bwtResidentSizeInWord; i++) {
+		bwt->bwtCode[i] = 0;
+	}
+}
+
+
+void BWTGenerateOccValueFromBwt(const unsigned int*  bwt, unsigned int* __restrict occValue,
+								bgint_t* __restrict occValueMajor,
+								const bgint_t textLength, const unsigned int*  decodeTable)
+{
+	bgint_t numberOfOccValueMajor, numberOfOccValue;
+	unsigned int wordBetweenOccValue;
+	bgint_t numberOfOccIntervalPerMajor;
+	unsigned int c;
+	bgint_t i, j;
+	bgint_t occMajorIndex;
+	bgint_t occIndex, bwtIndex;
+	bgint_t sum; // perhaps unsigned is big enough
+	bgint_t tempOccValue0[ALPHABET_SIZE], tempOccValue1[ALPHABET_SIZE];
+
+	wordBetweenOccValue = OCC_INTERVAL / CHAR_PER_WORD;
+
+	// Calculate occValue
+	numberOfOccValue = (textLength + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;				// Value at both end for bi-directional encoding
+	numberOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
+	numberOfOccValueMajor = (numberOfOccValue + numberOfOccIntervalPerMajor - 1) / numberOfOccIntervalPerMajor;
+
+	tempOccValue0[0] = 0;
+	tempOccValue0[1] = 0;
+	tempOccValue0[2] = 0;
+	tempOccValue0[3] = 0;
+	occValueMajor[0] = 0;
+	occValueMajor[1] = 0;
+	occValueMajor[2] = 0;
+	occValueMajor[3] = 0;
+
+	occIndex = 0;
+	bwtIndex = 0;
+	for (occMajorIndex=1; occMajorIndex<numberOfOccValueMajor; occMajorIndex++) {
+
+		for (i=0; i<numberOfOccIntervalPerMajor/2; i++) {
+
+			sum = 0;
+			tempOccValue1[0] = tempOccValue0[0];
+			tempOccValue1[1] = tempOccValue0[1];
+			tempOccValue1[2] = tempOccValue0[2];
+			tempOccValue1[3] = tempOccValue0[3];
+
+			for (j=0; j<wordBetweenOccValue; j++) {
+				c = bwt[bwtIndex];
+				sum += decodeTable[c >> 16];
+				sum += decodeTable[c & 0x0000FFFF];
+				bwtIndex++;
+			}
+			if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+				tempOccValue1[0] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue1[1] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue1[2] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue1[3] += sum;
+			} else {
+				if (sum == 0x00000100) {
+					tempOccValue1[0] += 256;
+				} else if (sum == 0x00010000) {
+					tempOccValue1[1] += 256;
+				} else if (sum == 0x01000000) {
+					tempOccValue1[2] += 256;
+				} else {
+					tempOccValue1[3] += 256;
+				}
+			}
+			occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
+			occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
+			occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
+			occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
+			tempOccValue0[0] = tempOccValue1[0];
+			tempOccValue0[1] = tempOccValue1[1];
+			tempOccValue0[2] = tempOccValue1[2];
+			tempOccValue0[3] = tempOccValue1[3];
+			sum = 0;
+
+			occIndex++;
+
+			for (j=0; j<wordBetweenOccValue; j++) {
+				c = bwt[bwtIndex];
+				sum += decodeTable[c >> 16];
+				sum += decodeTable[c & 0x0000FFFF];
+				bwtIndex++;
+			}
+			if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+				tempOccValue0[0] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue0[1] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue0[2] += (sum & 0x000000FF);	sum >>= 8;
+				tempOccValue0[3] += sum;
+			} else {
+				if (sum == 0x00000100) {
+					tempOccValue0[0] += 256;
+				} else if (sum == 0x00010000) {
+					tempOccValue0[1] += 256;
+				} else if (sum == 0x01000000) {
+					tempOccValue0[2] += 256;
+				} else {
+					tempOccValue0[3] += 256;
+				}
+			}
+		}
+
+		occValueMajor[occMajorIndex * 4 + 0] = occValueMajor[(occMajorIndex - 1) * 4 + 0] + tempOccValue0[0];
+		occValueMajor[occMajorIndex * 4 + 1] = occValueMajor[(occMajorIndex - 1) * 4 + 1] + tempOccValue0[1];
+		occValueMajor[occMajorIndex * 4 + 2] = occValueMajor[(occMajorIndex - 1) * 4 + 2] + tempOccValue0[2];
+		occValueMajor[occMajorIndex * 4 + 3] = occValueMajor[(occMajorIndex - 1) * 4 + 3] + tempOccValue0[3];
+		tempOccValue0[0] = 0;
+		tempOccValue0[1] = 0;
+		tempOccValue0[2] = 0;
+		tempOccValue0[3] = 0;
+
+	}
+
+	while (occIndex < (numberOfOccValue-1)/2) {
+		sum = 0;
+		tempOccValue1[0] = tempOccValue0[0];
+		tempOccValue1[1] = tempOccValue0[1];
+		tempOccValue1[2] = tempOccValue0[2];
+		tempOccValue1[3] = tempOccValue0[3];
+		for (j=0; j<wordBetweenOccValue; j++) {
+			c = bwt[bwtIndex];
+			sum += decodeTable[c >> 16];
+			sum += decodeTable[c & 0x0000FFFF];
+			bwtIndex++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			tempOccValue1[0] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[1] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[2] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[3] += sum;
+		} else {
+			if (sum == 0x00000100) {
+				tempOccValue1[0] += 256;
+			} else if (sum == 0x00010000) {
+				tempOccValue1[1] += 256;
+			} else if (sum == 0x01000000) {
+				tempOccValue1[2] += 256;
+			} else {
+				tempOccValue1[3] += 256;
+			}
+		}
+		occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
+		occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
+		occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
+		occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
+		tempOccValue0[0] = tempOccValue1[0];
+		tempOccValue0[1] = tempOccValue1[1];
+		tempOccValue0[2] = tempOccValue1[2];
+		tempOccValue0[3] = tempOccValue1[3];
+		sum = 0;
+		occIndex++;
+
+		for (j=0; j<wordBetweenOccValue; j++) {
+			c = bwt[bwtIndex];
+			sum += decodeTable[c >> 16];
+			sum += decodeTable[c & 0x0000FFFF];
+			bwtIndex++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			tempOccValue0[0] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue0[1] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue0[2] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue0[3] += sum;
+		} else {
+			if (sum == 0x00000100) {
+				tempOccValue0[0] += 256;
+			} else if (sum == 0x00010000) {
+				tempOccValue0[1] += 256;
+			} else if (sum == 0x01000000) {
+				tempOccValue0[2] += 256;
+			} else {
+				tempOccValue0[3] += 256;
+			}
+		}
+	}
+
+	sum = 0;
+	tempOccValue1[0] = tempOccValue0[0];
+	tempOccValue1[1] = tempOccValue0[1];
+	tempOccValue1[2] = tempOccValue0[2];
+	tempOccValue1[3] = tempOccValue0[3];
+
+	if (occIndex * 2 < numberOfOccValue - 1) {
+		for (j=0; j<wordBetweenOccValue; j++) {
+			c = bwt[bwtIndex];
+			sum += decodeTable[c >> 16];
+			sum += decodeTable[c & 0x0000FFFF];
+			bwtIndex++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			tempOccValue1[0] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[1] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[2] += (sum & 0x000000FF);	sum >>= 8;
+			tempOccValue1[3] += sum;
+		} else {
+			if (sum == 0x00000100) {
+				tempOccValue1[0] += 256;
+			} else if (sum == 0x00010000) {
+				tempOccValue1[1] += 256;
+			} else if (sum == 0x01000000) {
+				tempOccValue1[2] += 256;
+			} else {
+				tempOccValue1[3] += 256;
+			}
+		}
+	}
+
+	occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
+	occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
+	occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
+	occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
+
+}
+
+static void BWTIncConstruct(BWTInc *bwtInc, const bgint_t numChar)
+{
+	unsigned int i;
+	bgint_t mergedBwtSizeInWord, mergedOccSizeInWord;
+	unsigned int firstCharInThisIteration;
+
+	bgint_t *relativeRank, *seq, *sortedRank;
+	unsigned int *insertBwt, *mergedBwt;
+	bgint_t newInverseSa0RelativeRank, oldInverseSa0RelativeRank, newInverseSa0;
+
+	mergedBwtSizeInWord = BWTResidentSizeInWord(bwtInc->bwt->textLength + numChar);
+	mergedOccSizeInWord = BWTOccValueMinorSizeInWord(bwtInc->bwt->textLength + numChar);
+
+	initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
+
+	if (bwtInc->bwt->textLength == 0) {		// Initial build
+
+		// Set address
+		seq = (bgint_t*)bwtInc->workingMemory;
+		relativeRank = seq + bwtInc->buildSize + 1;
+		// mergedBwt and packedTex may share memory
+		mergedBwt = insertBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord;	// build in place
+
+		assert((void*)(relativeRank + bwtInc->buildSize + 1) <= (void*)bwtInc->packedText);
+		assert((void*)(relativeRank + bwtInc->buildSize + 1) <= (void*)mergedBwt);
+
+		// ->packedText is not used any more and may be overwritten by mergedBwt
+		BWTIncPutPackedTextToRank(bwtInc->packedText, relativeRank, bwtInc->cumulativeCountInCurrentBuild, numChar);
+
+		firstCharInThisIteration = relativeRank[0];
+		relativeRank[numChar] = 0;
+
+		// Sort suffix
+		QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)ALPHABET_SIZE - 1, 0, FALSE);
+		newInverseSa0 = relativeRank[0];
+
+		// Clear BWT area
+		initializeVAL(insertBwt, mergedBwtSizeInWord, 0);
+
+		// Build BWT
+		BWTIncBuildPackedBwt(relativeRank, insertBwt, numChar, bwtInc->cumulativeCountInCurrentBuild, bwtInc->packedShift);
+
+		// so that the cumulativeCount is not deducted
+		bwtInc->firstCharInLastIteration = ALPHABET_SIZE;
+
+	} else {		// Incremental build
+		// Set address
+		sortedRank = (bgint_t*)bwtInc->workingMemory;
+		seq = sortedRank + bwtInc->buildSize + 1;
+		insertBwt = (unsigned*)seq; // insertBwt and seq share memory
+		// relativeRank and ->packedText may share memory
+		relativeRank = seq + bwtInc->buildSize + 1;
+
+		assert((void*)relativeRank <= (void*)bwtInc->packedText);
+
+		// Store the first character of this iteration
+		firstCharInThisIteration = bwtInc->packedText[0] >> (BITS_IN_WORD - BIT_PER_CHAR);
+
+		// Count occurrence of input text
+		ForwardDNAAllOccCountNoLimit(bwtInc->packedText, numChar, bwtInc->cumulativeCountInCurrentBuild + 1, bwtInc->bwt->decodeTable);
+		// Add the first character of the previous iteration to represent the inverseSa0 of the previous iteration
+		bwtInc->cumulativeCountInCurrentBuild[bwtInc->firstCharInLastIteration + 1]++;
+		bwtInc->cumulativeCountInCurrentBuild[2] += bwtInc->cumulativeCountInCurrentBuild[1];
+		bwtInc->cumulativeCountInCurrentBuild[3] += bwtInc->cumulativeCountInCurrentBuild[2];
+		bwtInc->cumulativeCountInCurrentBuild[4] += bwtInc->cumulativeCountInCurrentBuild[3];
+
+		// Get rank of new suffix among processed suffix
+		// The seq array is built into ALPHABET_SIZE + 2 groups; ALPHABET_SIZE groups + 1 group divided into 2 by inverseSa0 + inverseSa0 as 1 group
+		// ->packedText is not used any more and will be overwritten by relativeRank
+		oldInverseSa0RelativeRank = BWTIncGetAbsoluteRank(bwtInc->bwt, sortedRank, seq, bwtInc->packedText, 
+														  numChar, bwtInc->cumulativeCountInCurrentBuild, bwtInc->firstCharInLastIteration);
+
+		// Sort rank by ALPHABET_SIZE + 2 groups (or ALPHABET_SIZE + 1 groups when inverseSa0 sit on the border of a group)
+		for (i=0; i<ALPHABET_SIZE; i++) {
+			if (bwtInc->cumulativeCountInCurrentBuild[i] > oldInverseSa0RelativeRank ||
+				bwtInc->cumulativeCountInCurrentBuild[i+1] <= oldInverseSa0RelativeRank) {
+				BWTIncSortKey(sortedRank + bwtInc->cumulativeCountInCurrentBuild[i], seq + bwtInc->cumulativeCountInCurrentBuild[i], bwtInc->cumulativeCountInCurrentBuild[i+1] - bwtInc->cumulativeCountInCurrentBuild[i]);
+			} else {
+				if (bwtInc->cumulativeCountInCurrentBuild[i] < oldInverseSa0RelativeRank) {
+					BWTIncSortKey(sortedRank + bwtInc->cumulativeCountInCurrentBuild[i], seq + bwtInc->cumulativeCountInCurrentBuild[i], oldInverseSa0RelativeRank - bwtInc->cumulativeCountInCurrentBuild[i]);
+				}
+				if (bwtInc->cumulativeCountInCurrentBuild[i+1] > oldInverseSa0RelativeRank + 1) {
+					BWTIncSortKey(sortedRank + oldInverseSa0RelativeRank + 1, seq + oldInverseSa0RelativeRank + 1, bwtInc->cumulativeCountInCurrentBuild[i+1] - oldInverseSa0RelativeRank - 1);
+				}
+			}
+		}
+
+		// build relative rank; sortedRank is updated for merging to cater for the fact that $ is not encoded in bwt
+		// the cumulative freq information is used to make sure that inverseSa0 and suffix beginning with different characters are kept in different unsorted groups)
+		BWTIncBuildRelativeRank(sortedRank, seq, relativeRank, numChar, bwtInc->bwt->inverseSa0, bwtInc->cumulativeCountInCurrentBuild);
+		assert(relativeRank[numChar] == oldInverseSa0RelativeRank);
+
+		// Sort suffix
+		QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)numChar, 1, TRUE);
+
+		newInverseSa0RelativeRank = relativeRank[0];
+		newInverseSa0 = sortedRank[newInverseSa0RelativeRank] + newInverseSa0RelativeRank;
+
+		sortedRank[newInverseSa0RelativeRank] = 0;	// a special value so that this is skipped in the merged bwt
+
+		// Build BWT; seq is overwritten by insertBwt
+		BWTIncBuildBwt(insertBwt, relativeRank, numChar, bwtInc->cumulativeCountInCurrentBuild);
+
+		// Merge BWT; relativeRank may be overwritten by mergedBwt
+		mergedBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord 
+				    - bwtInc->numberOfIterationDone * OCC_INTERVAL / BIT_PER_CHAR * (sizeof(bgint_t) / 4); // minus numberOfIteration * occInterval to create a buffer for merging
+		assert(mergedBwt >= insertBwt + numChar);
+		BWTIncMergeBwt(sortedRank, bwtInc->bwt->bwtCode, insertBwt, mergedBwt, bwtInc->bwt->textLength, numChar);
+	}
+
+	// Build auxiliary structure and update info and pointers in BWT
+	bwtInc->bwt->textLength += numChar;
+	bwtInc->bwt->bwtCode = mergedBwt;
+	bwtInc->bwt->bwtSizeInWord = mergedBwtSizeInWord;
+	bwtInc->bwt->occSizeInWord = mergedOccSizeInWord;
+	assert(mergedBwt >= bwtInc->workingMemory + mergedOccSizeInWord);
+
+	bwtInc->bwt->occValue = mergedBwt - mergedOccSizeInWord;
+
+	BWTClearTrailingBwtCode(bwtInc->bwt);
+	BWTGenerateOccValueFromBwt(bwtInc->bwt->bwtCode, bwtInc->bwt->occValue, bwtInc->bwt->occValueMajor,
+							   bwtInc->bwt->textLength, bwtInc->bwt->decodeTable);
+
+	bwtInc->bwt->inverseSa0 = newInverseSa0;
+	
+	bwtInc->bwt->cumulativeFreq[1] += bwtInc->cumulativeCountInCurrentBuild[1] - (bwtInc->firstCharInLastIteration <= 0);
+	bwtInc->bwt->cumulativeFreq[2] += bwtInc->cumulativeCountInCurrentBuild[2] - (bwtInc->firstCharInLastIteration <= 1);
+	bwtInc->bwt->cumulativeFreq[3] += bwtInc->cumulativeCountInCurrentBuild[3] - (bwtInc->firstCharInLastIteration <= 2);
+	bwtInc->bwt->cumulativeFreq[4] += bwtInc->cumulativeCountInCurrentBuild[4] - (bwtInc->firstCharInLastIteration <= 3);
+
+	bwtInc->firstCharInLastIteration = firstCharInThisIteration;
+
+	// Set build size and text address for the next build
+	BWTIncSetBuildSizeAndTextAddr(bwtInc);
+	bwtInc->numberOfIterationDone++;
+
+}
+
+BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxBuildSize, bgint_t incMaxBuildSize)
+{
+
+	FILE *packedFile;
+	bgint_t packedFileLen;
+	bgint_t totalTextLength;
+	bgint_t textToLoad, textSizeInByte;
+	bgint_t processedTextLength;
+	unsigned char lastByteLength;
+
+	BWTInc *bwtInc;
+
+	packedFile = (FILE*)fopen(inputFileName, "rb");
+
+	if (packedFile == NULL) {
+		fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	fseek(packedFile, -1, SEEK_END);
+	packedFileLen = ftell(packedFile);
+	fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
+	totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
+
+	bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
+
+	BWTIncSetBuildSizeAndTextAddr(bwtInc);
+
+	if (bwtInc->buildSize > totalTextLength) {
+		textToLoad = totalTextLength;
+	} else {
+		textToLoad = totalTextLength - ((totalTextLength - bwtInc->buildSize + CHAR_PER_WORD - 1) / CHAR_PER_WORD * CHAR_PER_WORD);
+	}
+	textSizeInByte = textToLoad / CHAR_PER_BYTE;	// excluded the odd byte
+
+	fseek(packedFile, -2, SEEK_CUR);
+	fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
+	fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile);
+	fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR);
+
+	ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
+	BWTIncConstruct(bwtInc, textToLoad);
+
+	processedTextLength = textToLoad;
+
+	while (processedTextLength < totalTextLength) {
+		textToLoad = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;
+		if (textToLoad > totalTextLength - processedTextLength) {
+			textToLoad = totalTextLength - processedTextLength;
+		}
+		textSizeInByte = textToLoad / CHAR_PER_BYTE;
+		fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
+		fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile);
+		fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
+		ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
+		BWTIncConstruct(bwtInc, textToLoad);
+		processedTextLength += textToLoad;
+		if (bwtInc->numberOfIterationDone % 10 == 0) {
+			fprintf(stderr, "[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\n",
+					(long)bwtInc->numberOfIterationDone, (long)processedTextLength);
+		}
+	}
+	return bwtInc;
+}
+
+void BWTFree(BWT *bwt)
+{
+	if (bwt == 0) return;
+	free(bwt->cumulativeFreq);
+	free(bwt->bwtCode);
+	free(bwt->occValue);
+	free(bwt->occValueMajor);
+	free(bwt->decodeTable);
+	free(bwt);
+}
+
+void BWTIncFree(BWTInc *bwtInc)
+{
+	if (bwtInc == 0) return;
+	free(bwtInc->bwt);
+	free(bwtInc->workingMemory);
+	free(bwtInc);
+}
+
+static bgint_t BWTFileSizeInWord(const bgint_t numChar)
+{
+	// The $ in BWT at the position of inverseSa0 is not encoded
+	return (numChar + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
+}
+
+void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *occValueFileName)
+{
+	FILE *bwtFile;
+/*	FILE *occValueFile; */
+	bgint_t bwtLength;
+
+	bwtFile = (FILE*)fopen(bwtFileName, "wb");
+	if (bwtFile == NULL) {
+		fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n");
+		exit(1);
+	}
+
+	fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile);
+	fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile);
+	bwtLength = BWTFileSizeInWord(bwt->textLength);
+	fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile);
+	fclose(bwtFile);
+}
+
+void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
+{
+	BWTInc *bwtInc;
+	bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
+	printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
+	BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
+	BWTIncFree(bwtInc);
+}
+
+int bwt_bwtgen_main(int argc, char *argv[])
+{
+	if (argc < 3) {
+		fprintf(stderr, "Usage: bwtgen <in.pac> <out.bwt>\n");
+		return 1;
+	}
+	bwt_bwtgen(argv[1], argv[2]);
+	return 0;
+}
+
+#ifdef MAIN_BWT_GEN
+
+int main(int argc, char *argv[])
+{
+	return bwt_bwtgen_main(argc, argv);
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwt_lite.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,94 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "bwt_lite.h"
+
+int is_sa(const uint8_t *T, uint32_t *SA, int n);
+int is_bwt(uint8_t *T, int n);
+
+bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq)
+{
+	bwtl_t *b;
+	int i;
+	b = (bwtl_t*)calloc(1, sizeof(bwtl_t));
+	b->seq_len = len;
+
+	{ // calculate b->bwt
+		uint8_t *s;
+		b->sa = (uint32_t*)calloc(len + 1, 4);
+		is_sa(seq, b->sa, len);
+		s = (uint8_t*)calloc(len + 1, 1);
+		for (i = 0; i <= len; ++i) {
+			if (b->sa[i] == 0) b->primary = i;
+			else s[i] = seq[b->sa[i] - 1];
+		}
+		for (i = b->primary; i < len; ++i) s[i] = s[i + 1];
+		b->bwt_size = (len + 15) / 16;
+		b->bwt = (uint32_t*)calloc(b->bwt_size, 4);
+		for (i = 0; i < len; ++i)
+			b->bwt[i>>4] |= s[i] << ((15 - (i&15)) << 1);
+		free(s);
+	}
+	{ // calculate b->occ
+		uint32_t c[4];
+		b->n_occ = (len + 15) / 16 * 4;
+		b->occ = (uint32_t*)calloc(b->n_occ, 4);
+		memset(c, 0, 16);
+		for (i = 0; i < len; ++i) {
+			if (i % 16 == 0)
+				memcpy(b->occ + (i/16) * 4, c, 16);
+			++c[bwtl_B0(b, i)];
+		}
+		memcpy(b->L2+1, c, 16);
+		for (i = 2; i < 5; ++i) b->L2[i] += b->L2[i-1];
+	}
+	{ // generate cnt_table
+		for (i = 0; i != 256; ++i) {
+			u_int32_t j, x = 0;
+			for (j = 0; j != 4; ++j)
+				x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
+			b->cnt_table[i] = x;
+		}
+	}
+	return b;
+}
+inline uint32_t bwtl_occ(const bwtl_t *bwt, uint32_t k, uint8_t c)
+{
+	uint32_t n, b;
+	if (k == bwt->seq_len) return bwt->L2[c+1] - bwt->L2[c];
+	if (k == (uint32_t)(-1)) return 0;
+	if (k >= bwt->primary) --k; // because $ is not in bwt
+	n = bwt->occ[k/16<<2|c];
+	b = bwt->bwt[k/16] & ~((1U<<((15-(k&15))<<1)) - 1);
+	n += (bwt->cnt_table[b&0xff] + bwt->cnt_table[b>>8&0xff]
+		  + bwt->cnt_table[b>>16&0xff] + bwt->cnt_table[b>>24]) >> (c<<3) & 0xff;
+	if (c == 0) n -= 15 - (k&15); // corrected for the masked bits
+	return n;
+}
+inline void bwtl_occ4(const bwtl_t *bwt, uint32_t k, uint32_t cnt[4])
+{
+	uint32_t x, b;
+	if (k == (uint32_t)(-1)) {
+		memset(cnt, 0, 16);
+		return;
+	}
+	if (k >= bwt->primary) --k; // because $ is not in bwt
+	memcpy(cnt, bwt->occ + (k>>4<<2), 16);
+	b = bwt->bwt[k>>4] & ~((1U<<((~k&15)<<1)) - 1);
+	x = bwt->cnt_table[b&0xff] + bwt->cnt_table[b>>8&0xff]
+		+ bwt->cnt_table[b>>16&0xff] + bwt->cnt_table[b>>24];
+	x -= 15 - (k&15);
+	cnt[0] += x&0xff; cnt[1] += x>>8&0xff; cnt[2] += x>>16&0xff; cnt[3] += x>>24;
+}
+inline void bwtl_2occ4(const bwtl_t *bwt, uint32_t k, uint32_t l, uint32_t cntk[4], uint32_t cntl[4])
+{
+	bwtl_occ4(bwt, k, cntk);
+	bwtl_occ4(bwt, l, cntl);
+}
+void bwtl_destroy(bwtl_t *bwt)
+{
+	if (bwt) {
+		free(bwt->occ); free(bwt->bwt); free(bwt->sa);
+		free(bwt);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwt_lite.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,29 @@
+#ifndef BWT_LITE_H_
+#define BWT_LITE_H_
+
+#include <stdint.h>
+
+typedef struct {
+	uint32_t seq_len, bwt_size, n_occ;
+	uint32_t primary;
+	uint32_t *bwt, *occ, *sa, L2[5];
+	uint32_t cnt_table[256];
+} bwtl_t;
+
+#define bwtl_B0(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq);
+	inline uint32_t bwtl_occ(const bwtl_t *bwt, uint32_t k, uint8_t c);
+	inline void bwtl_occ4(const bwtl_t *bwt, uint32_t k, uint32_t cnt[4]);
+	inline void bwtl_2occ4(const bwtl_t *bwt, uint32_t k, uint32_t l, uint32_t cntk[4], uint32_t cntl[4]);
+	void bwtl_destroy(bwtl_t *bwt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtaln.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,356 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <stdint.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "bwtaln.h"
+#include "bwtgap.h"
+#include "utils.h"
+
+#ifdef HAVE_PTHREAD
+#include <pthread.h>
+#endif
+
+gap_opt_t *gap_init_opt()
+{
+	gap_opt_t *o;
+	o = (gap_opt_t*)calloc(1, sizeof(gap_opt_t));
+	/* IMPORTANT: s_mm*10 should be about the average base error
+	   rate. Voilating this requirement will break pairing! */
+	o->s_mm = 3; o->s_gapo = 11; o->s_gape = 4;
+	o->max_diff = -1; o->max_gapo = 1; o->max_gape = 6;
+	o->indel_end_skip = 5; o->max_del_occ = 10; o->max_entries = 2000000;
+	o->mode = BWA_MODE_GAPE | BWA_MODE_COMPREAD;
+	o->seed_len = 32; o->max_seed_diff = 2;
+	o->fnr = 0.04;
+	o->n_threads = 1;
+	o->max_top2 = 30;
+	o->trim_qual = 0;
+	return o;
+}
+
+int bwa_cal_maxdiff(int l, double err, double thres)
+{
+	double elambda = exp(-l * err);
+	double sum, y = 1.0;
+	int k, x = 1;
+	for (k = 1, sum = elambda; k < 1000; ++k) {
+		y *= l * err;
+		x *= k;
+		sum += elambda * y / x;
+		if (1.0 - sum < thres) return k;
+	}
+	return 2;
+}
+
+// width must be filled as zero
+int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width)
+{
+	bwtint_t k, l, ok, ol;
+	int i, bid;
+	bid = 0;
+	k = 0; l = bwt->seq_len;
+	for (i = 0; i < len; ++i) {
+		ubyte_t c = str[i];
+		if (c < 4) {
+			bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
+			k = bwt->L2[c] + ok + 1;
+			l = bwt->L2[c] + ol;
+		}
+		if (k > l || c > 3) { // then restart
+			k = 0;
+			l = bwt->seq_len;
+			++bid;
+		}
+		width[i].w = l - k + 1;
+		width[i].bid = bid;
+	}
+	width[len].w = 0;
+	width[len].bid = ++bid;
+	return bid;
+}
+
+void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt)
+{
+	int i, j, max_l = 0, max_len;
+	gap_stack_t *stack;
+	bwt_width_t *w, *seed_w;
+	gap_opt_t local_opt = *opt;
+
+	// initiate priority stack
+	for (i = max_len = 0; i != n_seqs; ++i)
+		if (seqs[i].len > max_len) max_len = seqs[i].len;
+	if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr);
+	if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff;
+	stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt);
+
+	seed_w = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t));
+	w = 0;
+	for (i = 0; i != n_seqs; ++i) {
+		bwa_seq_t *p = seqs + i;
+#ifdef HAVE_PTHREAD
+		if (i % opt->n_threads != tid) continue;
+#endif
+		p->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0;
+		if (max_l < p->len) {
+			max_l = p->len;
+			w = (bwt_width_t*)realloc(w, (max_l + 1) * sizeof(bwt_width_t));
+			memset(w, 0, (max_l + 1) * sizeof(bwt_width_t));
+		}
+		bwt_cal_width(bwt, p->len, p->seq, w);
+		if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr);
+		local_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff;
+		if (p->len > opt->seed_len)
+			bwt_cal_width(bwt, opt->seed_len, p->seq + (p->len - opt->seed_len), seed_w);
+		// core function
+		for (j = 0; j < p->len; ++j) // we need to complement
+			p->seq[j] = p->seq[j] > 3? 4 : 3 - p->seq[j];
+		p->aln = bwt_match_gap(bwt, p->len, p->seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack);
+		// clean up the unused data in the record
+		free(p->name); free(p->seq); free(p->rseq); free(p->qual);
+		p->name = 0; p->seq = p->rseq = p->qual = 0;
+	}
+	free(seed_w); free(w);
+	gap_destroy_stack(stack);
+}
+
+#ifdef HAVE_PTHREAD
+typedef struct {
+	int tid;
+	bwt_t *bwt;
+	int n_seqs;
+	bwa_seq_t *seqs;
+	const gap_opt_t *opt;
+} thread_aux_t;
+
+static void *worker(void *data)
+{
+	thread_aux_t *d = (thread_aux_t*)data;
+	bwa_cal_sa_reg_gap(d->tid, d->bwt, d->n_seqs, d->seqs, d->opt);
+	return 0;
+}
+#endif
+
+bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)
+{
+	bwa_seqio_t *ks;
+	if (mode & BWA_MODE_BAM) { // open BAM
+		int which = 0;
+		if (mode & BWA_MODE_BAM_SE) which |= 4;
+		if (mode & BWA_MODE_BAM_READ1) which |= 1;
+		if (mode & BWA_MODE_BAM_READ2) which |= 2;
+		if (which == 0) which = 7; // then read all reads
+		ks = bwa_bam_open(fn_fa, which);
+	} else ks = bwa_seq_open(fn_fa);
+	return ks;
+}
+
+void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
+{
+	int i, n_seqs, tot_seqs = 0;
+	bwa_seq_t *seqs;
+	bwa_seqio_t *ks;
+	clock_t t;
+	bwt_t *bwt;
+
+	// initialization
+	ks = bwa_open_reads(opt->mode, fn_fa);
+
+	{ // load BWT
+		char *str = (char*)calloc(strlen(prefix) + 10, 1);
+		strcpy(str, prefix); strcat(str, ".bwt");  bwt = bwt_restore_bwt(str);
+		free(str);
+	}
+
+	// core loop
+	err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
+	while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
+		tot_seqs += n_seqs;
+		t = clock();
+
+		fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");
+
+#ifdef HAVE_PTHREAD
+		if (opt->n_threads <= 1) { // no multi-threading at all
+			bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
+		} else {
+			pthread_t *tid;
+			pthread_attr_t attr;
+			thread_aux_t *data;
+			int j;
+			pthread_attr_init(&attr);
+			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+			data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
+			tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
+			for (j = 0; j < opt->n_threads; ++j) {
+				data[j].tid = j; data[j].bwt = bwt;
+				data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
+				pthread_create(&tid[j], &attr, worker, data + j);
+			}
+			for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
+			free(data); free(tid);
+		}
+#else
+		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
+#endif
+
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		t = clock();
+		fprintf(stderr, "[bwa_aln_core] write to the disk... ");
+		for (i = 0; i < n_seqs; ++i) {
+			bwa_seq_t *p = seqs + i;
+			err_fwrite(&p->n_aln, 4, 1, stdout);
+			if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
+		}
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+
+		bwa_free_read_seq(n_seqs, seqs);
+		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
+	}
+
+	// destroy
+	bwt_destroy(bwt);
+	bwa_seq_close(ks);
+}
+
+char *bwa_infer_prefix(const char *hint)
+{
+	char *prefix;
+	int l_hint;
+	FILE *fp;
+	l_hint = strlen(hint);
+	prefix = malloc(l_hint + 3 + 4 + 1);
+	strcpy(prefix, hint);
+	strcpy(prefix + l_hint, ".64.bwt");
+	if ((fp = fopen(prefix, "rb")) != 0) {
+		fclose(fp);
+		prefix[l_hint + 3] = 0;
+		return prefix;
+	} else {
+		strcpy(prefix + l_hint, ".bwt");
+		if ((fp = fopen(prefix, "rb")) == 0) {
+			free(prefix);
+			return 0;
+		} else {
+			fclose(fp);
+			prefix[l_hint] = 0;
+			return prefix;
+		}
+	}
+}
+
+int bwa_aln(int argc, char *argv[])
+{
+	int c, opte = -1;
+	gap_opt_t *opt;
+	char *prefix;
+
+	opt = gap_init_opt();
+	while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
+		switch (c) {
+		case 'n':
+			if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
+			else opt->max_diff = atoi(optarg), opt->fnr = -1.0;
+			break;
+		case 'o': opt->max_gapo = atoi(optarg); break;
+		case 'e': opte = atoi(optarg); break;
+		case 'M': opt->s_mm = atoi(optarg); break;
+		case 'O': opt->s_gapo = atoi(optarg); break;
+		case 'E': opt->s_gape = atoi(optarg); break;
+		case 'd': opt->max_del_occ = atoi(optarg); break;
+		case 'i': opt->indel_end_skip = atoi(optarg); break;
+		case 'l': opt->seed_len = atoi(optarg); break;
+		case 'k': opt->max_seed_diff = atoi(optarg); break;
+		case 'm': opt->max_entries = atoi(optarg); break;
+		case 't': opt->n_threads = atoi(optarg); break;
+		case 'L': opt->mode |= BWA_MODE_LOGGAP; break;
+		case 'R': opt->max_top2 = atoi(optarg); break;
+		case 'q': opt->trim_qual = atoi(optarg); break;
+		case 'c': opt->mode &= ~BWA_MODE_COMPREAD; break;
+		case 'N': opt->mode |= BWA_MODE_NONSTOP; opt->max_top2 = 0x7fffffff; break;
+		case 'f': xreopen(optarg, "wb", stdout); break;
+		case 'b': opt->mode |= BWA_MODE_BAM; break;
+		case '0': opt->mode |= BWA_MODE_BAM_SE; break;
+		case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
+		case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
+		case 'I': opt->mode |= BWA_MODE_IL13; break;
+		case 'Y': opt->mode |= BWA_MODE_CFY; break;
+		case 'B': opt->mode |= atoi(optarg) << 24; break;
+		default: return 1;
+		}
+	}
+	if (opte > 0) {
+		opt->max_gape = opte;
+		opt->mode &= ~BWA_MODE_GAPE;
+	}
+
+	if (optind + 2 > argc) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Usage:   bwa aln [options] <prefix> <in.fq>\n\n");
+		fprintf(stderr, "Options: -n NUM    max #diff (int) or missing prob under %.2f err rate (float) [%.2f]\n",
+				BWA_AVG_ERR, opt->fnr);
+		fprintf(stderr, "         -o INT    maximum number or fraction of gap opens [%d]\n", opt->max_gapo);
+		fprintf(stderr, "         -e INT    maximum number of gap extensions, -1 for disabling long gaps [-1]\n");
+		fprintf(stderr, "         -i INT    do not put an indel within INT bp towards the ends [%d]\n", opt->indel_end_skip);
+		fprintf(stderr, "         -d INT    maximum occurrences for extending a long deletion [%d]\n", opt->max_del_occ);
+		fprintf(stderr, "         -l INT    seed length [%d]\n", opt->seed_len);
+		fprintf(stderr, "         -k INT    maximum differences in the seed [%d]\n", opt->max_seed_diff);
+		fprintf(stderr, "         -m INT    maximum entries in the queue [%d]\n", opt->max_entries);
+		fprintf(stderr, "         -t INT    number of threads [%d]\n", opt->n_threads);
+		fprintf(stderr, "         -M INT    mismatch penalty [%d]\n", opt->s_mm);
+		fprintf(stderr, "         -O INT    gap open penalty [%d]\n", opt->s_gapo);
+		fprintf(stderr, "         -E INT    gap extension penalty [%d]\n", opt->s_gape);
+		fprintf(stderr, "         -R INT    stop searching when there are >INT equally best hits [%d]\n", opt->max_top2);
+		fprintf(stderr, "         -q INT    quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
+        fprintf(stderr, "         -f FILE   file to write output to instead of stdout\n");
+		fprintf(stderr, "         -B INT    length of barcode\n");
+//		fprintf(stderr, "         -c        input sequences are in the color space\n");
+		fprintf(stderr, "         -L        log-scaled gap penalty for long deletions\n");
+		fprintf(stderr, "         -N        non-iterative mode: search for all n-difference hits (slooow)\n");
+		fprintf(stderr, "         -I        the input is in the Illumina 1.3+ FASTQ-like format\n");
+		fprintf(stderr, "         -b        the input read file is in the BAM format\n");
+		fprintf(stderr, "         -0        use single-end reads only (effective with -b)\n");
+		fprintf(stderr, "         -1        use the 1st read in a pair (effective with -b)\n");
+		fprintf(stderr, "         -2        use the 2nd read in a pair (effective with -b)\n");
+		fprintf(stderr, "         -Y        filter Casava-filtered sequences\n");
+		fprintf(stderr, "\n");
+		return 1;
+	}
+	if (opt->fnr > 0.0) {
+		int i, k;
+		for (i = 17, k = 0; i <= 250; ++i) {
+			int l = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);
+			if (l != k) fprintf(stderr, "[bwa_aln] %dbp reads: max_diff = %d\n", i, l);
+			k = l;
+		}
+	}
+	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
+		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
+		free(opt);
+		return 0;
+	}
+	bwa_aln_core(prefix, argv[optind+1], opt);
+	free(opt); free(prefix);
+	return 0;
+}
+
+/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
+__cigar_op and __cigar_len while keeping stdaln stand alone */
+bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
+{
+	uint32_t *cigar32;
+	bwa_cigar_t *cigar;
+	int i;
+	cigar32 = aln_path2cigar32((path_t*) path, path_len, n_cigar);
+	cigar = (bwa_cigar_t*)cigar32;
+	for (i = 0; i < *n_cigar; ++i)
+                cigar[i] = __cigar_create( (cigar32[i]&0xf), (cigar32[i]>>4) );
+	return cigar;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtaln.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,153 @@
+#ifndef BWTALN_H
+#define BWTALN_H
+
+#include <stdint.h>
+#include "bwt.h"
+
+#define BWA_TYPE_NO_MATCH 0
+#define BWA_TYPE_UNIQUE 1
+#define BWA_TYPE_REPEAT 2
+#define BWA_TYPE_MATESW 3
+
+#define SAM_FPD   1 // paired
+#define SAM_FPP   2 // properly paired
+#define SAM_FSU   4 // self-unmapped
+#define SAM_FMU   8 // mate-unmapped
+#define SAM_FSR  16 // self on the reverse strand
+#define SAM_FMR  32 // mate on the reverse strand
+#define SAM_FR1  64 // this is read one
+#define SAM_FR2 128 // this is read two
+#define SAM_FSC 256 // secondary alignment
+
+#define BWA_AVG_ERR 0.02
+#define BWA_MIN_RDLEN 35 // for read trimming
+
+#define BWA_MAX_BCLEN 63 // maximum barcode length; 127 is the maximum
+
+#ifndef bns_pac
+#define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
+#endif
+
+typedef struct {
+	bwtint_t w;
+	int bid;
+} bwt_width_t;
+
+typedef struct {
+	uint32_t n_mm:16, n_gapo:8, n_gape:8;
+	int score;
+	bwtint_t k, l;
+} bwt_aln1_t;
+
+typedef uint16_t bwa_cigar_t;
+/* rgoya: If changing order of bytes, beware of operations like:
+ *     s->cigar[0] += s->full_len - s->len;
+ */
+#define CIGAR_OP_SHIFT 14
+#define CIGAR_LN_MASK 0x3fff
+
+#define __cigar_op(__cigar) ((__cigar)>>CIGAR_OP_SHIFT)
+#define __cigar_len(__cigar) ((__cigar)&CIGAR_LN_MASK)
+#define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len))
+
+typedef struct {
+	uint32_t n_cigar:15, gap:8, mm:8, strand:1;
+	bwtint_t pos;
+	bwa_cigar_t *cigar;
+} bwt_multi1_t;
+
+typedef struct {
+	char *name;
+	ubyte_t *seq, *rseq, *qual;
+	uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8;
+	uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8;
+	int score;
+	int clip_len;
+	// alignments in SA coordinates
+	int n_aln;
+	bwt_aln1_t *aln;
+	// multiple hits
+	int n_multi;
+	bwt_multi1_t *multi;
+	// alignment information
+	bwtint_t sa, pos;
+	uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ
+	int n_cigar;
+	bwa_cigar_t *cigar;
+	// for multi-threading only
+	int tid;
+	// barcode
+	char bc[BWA_MAX_BCLEN+1]; // null terminated; up to BWA_MAX_BCLEN bases
+	// NM and MD tags
+	uint32_t full_len:20, nm:12;
+	char *md;
+} bwa_seq_t;
+
+#define BWA_MODE_GAPE       0x01
+#define BWA_MODE_COMPREAD   0x02
+#define BWA_MODE_LOGGAP     0x04
+#define BWA_MODE_CFY        0x08
+#define BWA_MODE_NONSTOP    0x10
+#define BWA_MODE_BAM        0x20
+#define BWA_MODE_BAM_SE     0x40
+#define BWA_MODE_BAM_READ1  0x80
+#define BWA_MODE_BAM_READ2  0x100
+#define BWA_MODE_IL13       0x200
+
+typedef struct {
+	int s_mm, s_gapo, s_gape;
+	int mode; // bit 24-31 are the barcode length
+	int indel_end_skip, max_del_occ, max_entries;
+	float fnr;
+	int max_diff, max_gapo, max_gape;
+	int max_seed_diff, seed_len;
+	int n_threads;
+	int max_top2;
+	int trim_qual;
+} gap_opt_t;
+
+#define BWA_PET_STD   1
+#define BWA_PET_SOLID 2
+
+typedef struct {
+	int max_isize, force_isize;
+	int max_occ;
+	int n_multi, N_multi;
+	int type, is_sw, is_preload;
+	double ap_prior;
+} pe_opt_t;
+
+struct __bwa_seqio_t;
+typedef struct __bwa_seqio_t bwa_seqio_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	gap_opt_t *gap_init_opt();
+	void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt);
+
+	bwa_seqio_t *bwa_seq_open(const char *fn);
+	bwa_seqio_t *bwa_bam_open(const char *fn, int which);
+	void bwa_seq_close(bwa_seqio_t *bs);
+	void seq_reverse(int len, ubyte_t *seq, int is_comp);
+	bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
+	void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);
+
+	int bwa_cal_maxdiff(int l, double err, double thres);
+	void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt);
+
+	void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
+
+
+	/* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
+	__cigar_op and __cigar_len while keeping stdaln stand alone */
+#include "stdaln.h"
+
+	bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtgap.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,256 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "bwtgap.h"
+#include "bwtaln.h"
+
+#define STATE_M 0
+#define STATE_I 1
+#define STATE_D 2
+
+#define aln_score(m,o,e,p) ((m)*(p)->s_mm + (o)*(p)->s_gapo + (e)*(p)->s_gape)
+
+gap_stack_t *gap_init_stack2(int max_score)
+{
+	gap_stack_t *stack;
+	stack = (gap_stack_t*)calloc(1, sizeof(gap_stack_t));
+	stack->n_stacks = max_score;
+	stack->stacks = (gap_stack1_t*)calloc(stack->n_stacks, sizeof(gap_stack1_t));
+	return stack;
+}
+
+gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt)
+{
+	return gap_init_stack2(aln_score(max_mm+1, max_gapo+1, max_gape+1, opt));
+}
+
+void gap_destroy_stack(gap_stack_t *stack)
+{
+	int i;
+	for (i = 0; i != stack->n_stacks; ++i) free(stack->stacks[i].stack);
+	free(stack->stacks);
+	free(stack);
+}
+
+static void gap_reset_stack(gap_stack_t *stack)
+{
+	int i;
+	for (i = 0; i != stack->n_stacks; ++i)
+		stack->stacks[i].n_entries = 0;
+	stack->best = stack->n_stacks;
+	stack->n_entries = 0;
+}
+
+static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, int n_mm, int n_gapo, int n_gape,
+							int state, int is_diff, const gap_opt_t *opt)
+{
+	int score;
+	gap_entry_t *p;
+	gap_stack1_t *q;
+	score = aln_score(n_mm, n_gapo, n_gape, opt);
+	q = stack->stacks + score;
+	if (q->n_entries == q->m_entries) {
+		q->m_entries = q->m_entries? q->m_entries<<1 : 4;
+		q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
+	}
+	p = q->stack + q->n_entries;
+	p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l;
+	p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->state = state;
+	p->last_diff_pos = is_diff? i : 0;
+	++(q->n_entries);
+	++(stack->n_entries);
+	if (stack->best > score) stack->best = score;
+}
+
+static inline void gap_pop(gap_stack_t *stack, gap_entry_t *e)
+{
+	gap_stack1_t *q;
+	q = stack->stacks + stack->best;
+	*e = q->stack[q->n_entries - 1];
+	--(q->n_entries);
+	--(stack->n_entries);
+	if (q->n_entries == 0 && stack->n_entries) { // reset best
+		int i;
+		for (i = stack->best + 1; i < stack->n_stacks; ++i)
+			if (stack->stacks[i].n_entries != 0) break;
+		stack->best = i;
+	} else if (stack->n_entries == 0) stack->best = stack->n_stacks;
+}
+
+static inline void gap_shadow(int x, int len, bwtint_t max, int last_diff_pos, bwt_width_t *w)
+{
+	int i, j;
+	for (i = j = 0; i < last_diff_pos; ++i) {
+		if (w[i].w > x) w[i].w -= x;
+		else if (w[i].w == x) {
+			w[i].bid = 1;
+			w[i].w = max - (++j);
+		} // else should not happen
+	}
+}
+
+static inline int int_log2(uint32_t v)
+{
+	int c = 0;
+	if (v & 0xffff0000u) { v >>= 16; c |= 16; }
+	if (v & 0xff00) { v >>= 8; c |= 8; }
+	if (v & 0xf0) { v >>= 4; c |= 4; }
+	if (v & 0xc) { v >>= 2; c |= 2; }
+	if (v & 0x2) c |= 1;
+	return c;
+}
+
+bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_width_t *width,
+						  bwt_width_t *seed_width, const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack)
+{
+	int best_score = aln_score(opt->max_diff+1, opt->max_gapo+1, opt->max_gape+1, opt);
+	int best_diff = opt->max_diff + 1, max_diff = opt->max_diff;
+	int best_cnt = 0;
+	int max_entries = 0, j, _j, n_aln, m_aln;
+	bwt_aln1_t *aln;
+
+	m_aln = 4; n_aln = 0;
+	aln = (bwt_aln1_t*)calloc(m_aln, sizeof(bwt_aln1_t));
+
+	// check whether there are too many N
+	for (j = _j = 0; j < len; ++j)
+		if (seq[j] > 3) ++_j;
+	if (_j > max_diff) {
+		*_n_aln = n_aln;
+		return aln;
+	}
+
+	//for (j = 0; j != len; ++j) printf("#0 %d: [%d,%u]\t[%d,%u]\n", j, w[0][j].bid, w[0][j].w, w[1][j].bid, w[1][j].w);
+	gap_reset_stack(stack); // reset stack
+	gap_push(stack, len, 0, bwt->seq_len, 0, 0, 0, 0, 0, opt);
+
+	while (stack->n_entries) {
+		gap_entry_t e;
+		int i, m, m_seed = 0, hit_found, allow_diff, allow_M, tmp;
+		bwtint_t k, l, cnt_k[4], cnt_l[4], occ;
+
+		if (max_entries < stack->n_entries) max_entries = stack->n_entries;
+		if (stack->n_entries > opt->max_entries) break;
+		gap_pop(stack, &e); // get the best entry
+		k = e.k; l = e.l; // SA interval
+		i = e.info&0xffff; // length
+		if (!(opt->mode & BWA_MODE_NONSTOP) && e.info>>21 > best_score + opt->s_mm) break; // no need to proceed
+
+		m = max_diff - (e.n_mm + e.n_gapo);
+		if (opt->mode & BWA_MODE_GAPE) m -= e.n_gape;
+		if (m < 0) continue;
+		if (seed_width) { // apply seeding
+			m_seed = opt->max_seed_diff - (e.n_mm + e.n_gapo);
+			if (opt->mode & BWA_MODE_GAPE) m_seed -= e.n_gape;
+		}
+		//printf("#1\t[%d,%d,%d,%c]\t[%d,%d,%d]\t[%u,%u]\t[%u,%u]\t%d\n", stack->n_entries, a, i, "MID"[e.state], e.n_mm, e.n_gapo, e.n_gape, width[i-1].bid, width[i-1].w, k, l, e.last_diff_pos);
+		if (i > 0 && m < width[i-1].bid) continue;
+
+		// check whether a hit is found
+		hit_found = 0;
+		if (i == 0) hit_found = 1;
+		else if (m == 0 && (e.state == STATE_M || (opt->mode&BWA_MODE_GAPE) || e.n_gape == opt->max_gape)) { // no diff allowed
+			if (bwt_match_exact_alt(bwt, i, seq, &k, &l)) hit_found = 1;
+			else continue; // no hit, skip
+		}
+
+		if (hit_found) { // action for found hits
+			int score = aln_score(e.n_mm, e.n_gapo, e.n_gape, opt);
+			int do_add = 1;
+			//printf("#2 hits found: %d:(%u,%u)\n", e.n_mm+e.n_gapo, k, l);
+			if (n_aln == 0) {
+				best_score = score;
+				best_diff = e.n_mm + e.n_gapo;
+				if (opt->mode & BWA_MODE_GAPE) best_diff += e.n_gape;
+				if (!(opt->mode & BWA_MODE_NONSTOP))
+					max_diff = (best_diff + 1 > opt->max_diff)? opt->max_diff : best_diff + 1; // top2 behaviour
+			}
+			if (score == best_score) best_cnt += l - k + 1;
+			else if (best_cnt > opt->max_top2) break; // top2b behaviour
+			if (e.n_gapo) { // check whether the hit has been found. this may happen when a gap occurs in a tandem repeat
+				for (j = 0; j != n_aln; ++j)
+					if (aln[j].k == k && aln[j].l == l) break;
+				if (j < n_aln) do_add = 0;
+			}
+			if (do_add) { // append
+				bwt_aln1_t *p;
+				gap_shadow(l - k + 1, len, bwt->seq_len, e.last_diff_pos, width);
+				if (n_aln == m_aln) {
+					m_aln <<= 1;
+					aln = (bwt_aln1_t*)realloc(aln, m_aln * sizeof(bwt_aln1_t));
+					memset(aln + m_aln/2, 0, m_aln/2*sizeof(bwt_aln1_t));
+				}
+				p = aln + n_aln;
+				p->n_mm = e.n_mm; p->n_gapo = e.n_gapo; p->n_gape = e.n_gape;
+				p->k = k; p->l = l;
+				p->score = score;
+				++n_aln;
+			}
+			continue;
+		}
+
+		--i;
+		bwt_2occ4(bwt, k - 1, l, cnt_k, cnt_l); // retrieve Occ values
+		occ = l - k + 1;
+		// test whether diff is allowed
+		allow_diff = allow_M = 1;
+		if (i > 0) {
+			int ii = i - (len - opt->seed_len);
+			if (width[i-1].bid > m-1) allow_diff = 0;
+			else if (width[i-1].bid == m-1 && width[i].bid == m-1 && width[i-1].w == width[i].w) allow_M = 0;
+			if (seed_width && ii > 0) {
+				if (seed_width[ii-1].bid > m_seed-1) allow_diff = 0;
+				else if (seed_width[ii-1].bid == m_seed-1 && seed_width[ii].bid == m_seed-1
+						 && seed_width[ii-1].w == seed_width[ii].w) allow_M = 0;
+			}
+		}
+		// indels
+		tmp = (opt->mode & BWA_MODE_LOGGAP)? int_log2(e.n_gape + e.n_gapo)/2+1 : e.n_gapo + e.n_gape;
+		if (allow_diff && i >= opt->indel_end_skip + tmp && len - i >= opt->indel_end_skip + tmp) {
+			if (e.state == STATE_M) { // gap open
+				if (e.n_gapo < opt->max_gapo) { // gap open is allowed
+					// insertion
+					gap_push(stack, i, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_I, 1, opt);
+					// deletion
+					for (j = 0; j != 4; ++j) {
+						k = bwt->L2[j] + cnt_k[j] + 1;
+						l = bwt->L2[j] + cnt_l[j];
+						if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo + 1, e.n_gape, STATE_D, 1, opt);
+					}
+				}
+			} else if (e.state == STATE_I) { // extention of an insertion
+				if (e.n_gape < opt->max_gape) // gap extention is allowed
+					gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_I, 1, opt);
+			} else if (e.state == STATE_D) { // extention of a deletion
+				if (e.n_gape < opt->max_gape) { // gap extention is allowed
+					if (e.n_gape + e.n_gapo < max_diff || occ < opt->max_del_occ) {
+						for (j = 0; j != 4; ++j) {
+							k = bwt->L2[j] + cnt_k[j] + 1;
+							l = bwt->L2[j] + cnt_l[j];
+							if (k <= l) gap_push(stack, i + 1, k, l, e.n_mm, e.n_gapo, e.n_gape + 1, STATE_D, 1, opt);
+						}
+					}
+				}
+			}
+		}
+		// mismatches
+		if (allow_diff && allow_M) { // mismatch is allowed
+			for (j = 1; j <= 4; ++j) {
+				int c = (seq[i] + j) & 3;
+				int is_mm = (j != 4 || seq[i] > 3);
+				k = bwt->L2[c] + cnt_k[c] + 1;
+				l = bwt->L2[c] + cnt_l[c];
+				if (k <= l) gap_push(stack, i, k, l, e.n_mm + is_mm, e.n_gapo, e.n_gape, STATE_M, is_mm, opt);
+			}
+		} else if (seq[i] < 4) { // try exact match only
+			int c = seq[i] & 3;
+			k = bwt->L2[c] + cnt_k[c] + 1;
+			l = bwt->L2[c] + cnt_l[c];
+			if (k <= l) gap_push(stack, i, k, l, e.n_mm, e.n_gapo, e.n_gape, STATE_M, 0, opt);
+		}
+	}
+
+	*_n_aln = n_aln;
+	//fprintf(stderr, "max_entries = %d\n", max_entries);
+	return aln;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtgap.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,39 @@
+#ifndef BWTGAP_H_
+#define BWTGAP_H_
+
+#include "bwt.h"
+#include "bwtaln.h"
+
+typedef struct { // recursion stack
+	u_int32_t info; // score<<21 | i
+	u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
+	bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
+	int last_diff_pos;
+} gap_entry_t;
+
+typedef struct {
+	int n_entries, m_entries;
+	gap_entry_t *stack;
+} gap_stack1_t;
+
+typedef struct {
+	int n_stacks, best, n_entries;
+	gap_stack1_t *stacks;
+} gap_stack_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	gap_stack_t *gap_init_stack2(int max_score);
+	gap_stack_t *gap_init_stack(int max_mm, int max_gapo, int max_gape, const gap_opt_t *opt);
+	void gap_destroy_stack(gap_stack_t *stack);
+	bwt_aln1_t *bwt_match_gap(bwt_t *const bwt, int len, const ubyte_t *seq, bwt_width_t *w,
+							  bwt_width_t *seed_w, const gap_opt_t *opt, int *_n_aln, gap_stack_t *stack);
+	void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtindex.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,158 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include <zlib.h>
+#include "bntseq.h"
+#include "bwt.h"
+#include "main.h"
+#include "utils.h"
+
+bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is);
+void bwa_pac_rev_core(const char *fn, const char *fn_rev);
+
+int bwa_index(int argc, char *argv[])
+{
+	char *prefix = 0, *str, *str2, *str3;
+	int c, algo_type = 0, is_color = 0, is_64 = 0;
+	clock_t t;
+	int64_t l_pac;
+
+	while ((c = getopt(argc, argv, "6ca:p:")) >= 0) {
+		switch (c) {
+		case 'a': // if -a is not set, algo_type will be determined later
+			if (strcmp(optarg, "div") == 0) algo_type = 1;
+			else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
+			else if (strcmp(optarg, "is") == 0) algo_type = 3;
+			else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
+			break;
+		case 'p': prefix = strdup(optarg); break;
+		case 'c': is_color = 1; break;
+		case '6': is_64 = 1; break;
+		default: return 1;
+		}
+	}
+
+	if (optind + 1 > argc) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Usage:   bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
+		fprintf(stderr, "Options: -a STR    BWT construction algorithm: bwtsw or is [auto]\n");
+		fprintf(stderr, "         -p STR    prefix of the index [same as fasta name]\n");
+		fprintf(stderr, "         -6        index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
+//		fprintf(stderr, "         -c        build color-space index\n");
+		fprintf(stderr, "\n");
+		fprintf(stderr,	"Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
+		fprintf(stderr, "         `-a div' do not work not for long genomes. Please choose `-a'\n");
+		fprintf(stderr, "         according to the length of the genome.\n\n");
+		return 1;
+	}
+	if (prefix == 0) {
+		prefix = malloc(strlen(argv[optind]) + 4);
+		strcpy(prefix, argv[optind]);
+		if (is_64) strcat(prefix, ".64");
+	}
+	str  = (char*)calloc(strlen(prefix) + 10, 1);
+	str2 = (char*)calloc(strlen(prefix) + 10, 1);
+	str3 = (char*)calloc(strlen(prefix) + 10, 1);
+
+	if (is_color == 0) { // nucleotide indexing
+		gzFile fp = xzopen(argv[optind], "r");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Pack FASTA... ");
+		l_pac = bns_fasta2bntseq(fp, prefix, 0);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		gzclose(fp);
+	} else { // color indexing
+		gzFile fp = xzopen(argv[optind], "r");
+		strcat(strcpy(str, prefix), ".nt");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
+		l_pac = bns_fasta2bntseq(fp, str, 0);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		gzclose(fp);
+		{
+			char *tmp_argv[3];
+			tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix;
+			t = clock();
+			fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... ");
+			bwa_pac2cspac(3, tmp_argv);
+			fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		}
+	}
+	if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
+	{
+		strcpy(str, prefix); strcat(str, ".pac");
+		strcpy(str2, prefix); strcat(str2, ".bwt");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
+		if (algo_type == 2) bwt_bwtgen(str, str2);
+		else if (algo_type == 1 || algo_type == 3) {
+			bwt_t *bwt;
+			bwt = bwt_pac2bwt(str, algo_type == 3);
+			bwt_dump_bwt(str2, bwt);
+			bwt_destroy(bwt);
+		}
+		fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+	}
+	{
+		bwt_t *bwt;
+		strcpy(str, prefix); strcat(str, ".bwt");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Update BWT... ");
+		bwt = bwt_restore_bwt(str);
+		bwt_bwtupdate_core(bwt);
+		bwt_dump_bwt(str, bwt);
+		bwt_destroy(bwt);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+	}
+	{
+		gzFile fp = xzopen(argv[optind], "r");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
+		l_pac = bns_fasta2bntseq(fp, prefix, 1);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		gzclose(fp);
+	}
+	{
+		bwt_t *bwt;
+		strcpy(str, prefix); strcat(str, ".bwt");
+		strcpy(str3, prefix); strcat(str3, ".sa");
+		t = clock();
+		fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
+		bwt = bwt_restore_bwt(str);
+		bwt_cal_sa(bwt, 32);
+		bwt_dump_sa(str3, bwt);
+		bwt_destroy(bwt);
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+	}
+	free(str3); free(str2); free(str); free(prefix);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtio.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,77 @@
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "bwt.h"
+#include "utils.h"
+
+void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
+{
+	FILE *fp;
+	fp = xopen(fn, "wb");
+	fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
+	fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
+	fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
+	fclose(fp);
+}
+
+void bwt_dump_sa(const char *fn, const bwt_t *bwt)
+{
+	FILE *fp;
+	fp = xopen(fn, "wb");
+	fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
+	fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
+	fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
+	fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp);
+	fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
+	fclose(fp);
+}
+
+void bwt_restore_sa(const char *fn, bwt_t *bwt)
+{
+	char skipped[256];
+	FILE *fp;
+	bwtint_t primary;
+
+	fp = xopen(fn, "rb");
+	fread(&primary, sizeof(bwtint_t), 1, fp);
+	xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same.");
+	fread(skipped, sizeof(bwtint_t), 4, fp); // skip
+	fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp);
+	fread(&primary, sizeof(bwtint_t), 1, fp);
+	xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same.");
+
+	bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv;
+	bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t));
+	bwt->sa[0] = -1;
+
+	fread(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp);
+	fclose(fp);
+}
+
+bwt_t *bwt_restore_bwt(const char *fn)
+{
+	bwt_t *bwt;
+	FILE *fp;
+
+	bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
+	fp = xopen(fn, "rb");
+	fseek(fp, 0, SEEK_END);
+	bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2;
+	bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
+	fseek(fp, 0, SEEK_SET);
+	fread(&bwt->primary, sizeof(bwtint_t), 1, fp);
+	fread(bwt->L2+1, sizeof(bwtint_t), 4, fp);
+	fread(bwt->bwt, 4, bwt->bwt_size, fp);
+	bwt->seq_len = bwt->L2[4];
+	fclose(fp);
+	bwt_gen_cnt_table(bwt);
+
+	return bwt;
+}
+
+void bwt_destroy(bwt_t *bwt)
+{
+	if (bwt == 0) return;
+	free(bwt->sa); free(bwt->bwt);
+	free(bwt);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtmisc.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,230 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "bntseq.h"
+#include "utils.h"
+#include "main.h"
+#include "bwt.h"
+
+#ifdef _DIVBWT
+#include "divsufsort.h"
+#endif
+
+int is_bwt(ubyte_t *T, int n);
+
+int64_t bwa_seq_len(const char *fn_pac)
+{
+	FILE *fp;
+	int64_t pac_len;
+	ubyte_t c;
+	fp = xopen(fn_pac, "rb");
+	fseek(fp, -1, SEEK_END);
+	pac_len = ftell(fp);
+	fread(&c, 1, 1, fp);
+	fclose(fp);
+	return (pac_len - 1) * 4 + (int)c;
+}
+
+bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
+{
+	bwt_t *bwt;
+	ubyte_t *buf, *buf2;
+	int i, pac_size;
+	FILE *fp;
+
+	// initialization
+	bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
+	bwt->seq_len = bwa_seq_len(fn_pac);
+	bwt->bwt_size = (bwt->seq_len + 15) >> 4;
+	fp = xopen(fn_pac, "rb");
+
+	// prepare sequence
+	pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
+	buf2 = (ubyte_t*)calloc(pac_size, 1);
+	fread(buf2, 1, pac_size, fp);
+	fclose(fp);
+	memset(bwt->L2, 0, 5 * 4);
+	buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
+	for (i = 0; i < bwt->seq_len; ++i) {
+		buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;
+		++bwt->L2[1+buf[i]];
+	}
+	for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];
+	free(buf2);
+
+	// Burrows-Wheeler Transform
+	if (use_is) {
+		bwt->primary = is_bwt(buf, bwt->seq_len);
+	} else {
+#ifdef _DIVBWT
+		bwt->primary = divbwt(buf, buf, 0, bwt->seq_len);
+#else
+		err_fatal_simple("libdivsufsort is not compiled in.");
+#endif
+	}
+	bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
+	for (i = 0; i < bwt->seq_len; ++i)
+		bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
+	free(buf);
+	return bwt;
+}
+
+int bwa_pac2bwt(int argc, char *argv[])
+{
+	bwt_t *bwt;
+	int c, use_is = 1;
+	while ((c = getopt(argc, argv, "d")) >= 0) {
+		switch (c) {
+		case 'd': use_is = 0; break;
+		default: return 1;
+		}
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "Usage: bwa pac2bwt [-d] <in.pac> <out.bwt>\n");
+		return 1;
+	}
+	bwt = bwt_pac2bwt(argv[optind], use_is);
+	bwt_dump_bwt(argv[optind+1], bwt);
+	bwt_destroy(bwt);
+	return 0;
+}
+
+#define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
+
+void bwt_bwtupdate_core(bwt_t *bwt)
+{
+	bwtint_t i, k, c[4], n_occ;
+	uint32_t *buf;
+
+	n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
+	bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
+	buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
+	c[0] = c[1] = c[2] = c[3] = 0;
+	for (i = k = 0; i < bwt->seq_len; ++i) {
+		if (i % OCC_INTERVAL == 0) {
+			memcpy(buf + k, c, sizeof(bwtint_t) * 4);
+			k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
+		}
+		if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
+		++c[bwt_B00(bwt, i)];
+	}
+	// the last element
+	memcpy(buf + k, c, sizeof(bwtint_t) * 4);
+	xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
+	// update bwt
+	free(bwt->bwt); bwt->bwt = buf;
+}
+
+int bwa_bwtupdate(int argc, char *argv[])
+{
+	bwt_t *bwt;
+	if (argc < 2) {
+		fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
+		return 1;
+	}
+	bwt = bwt_restore_bwt(argv[1]);
+	bwt_bwtupdate_core(bwt);
+	bwt_dump_bwt(argv[1], bwt);
+	bwt_destroy(bwt);
+	return 0;
+}
+
+const int nst_color_space_table[] = { 4, 0, 0, 1, 0, 2, 3, 4, 0, 3, 2, 4, 1, 4, 4, 4};
+
+/* this function is not memory efficient, but this will make life easier
+   Ideally we should also change .amb files as one 'N' in the nucleotide
+   sequence leads to two ambiguous colors. I may do this later... */
+uint8_t *bwa_pac2cspac_core(const bntseq_t *bns)
+{
+	uint8_t *pac, *cspac;
+	bwtint_t i;
+	int c1, c2;
+	pac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1);
+	cspac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1);
+	fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
+	rewind(bns->fp_pac);
+	c1 = pac[0]>>6; cspac[0] = c1<<6;
+	for (i = 1; i < bns->l_pac; ++i) {
+		c2 = pac[i>>2] >> (~i&3)*2 & 3;
+		cspac[i>>2] |= nst_color_space_table[(1<<c1)|(1<<c2)] << (~i&3)*2;
+		c1 = c2;
+	}
+	free(pac);
+	return cspac;
+}
+
+int bwa_pac2cspac(int argc, char *argv[])
+{
+	bntseq_t *bns;
+	uint8_t *cspac, ct;
+	char *str;
+	FILE *fp;
+
+	if (argc < 3) {
+		fprintf(stderr, "Usage: bwa pac2cspac <in.nt.prefix> <out.cs.prefix>\n");
+		return 1;
+	}
+	bns = bns_restore(argv[1]);
+	cspac = bwa_pac2cspac_core(bns);
+	bns_dump(bns, argv[2]);
+	// now write cspac
+	str = (char*)calloc(strlen(argv[2]) + 5, 1);
+	strcat(strcpy(str, argv[2]), ".pac");
+	fp = xopen(str, "wb");
+	fwrite(cspac, 1, bns->l_pac/4 + 1, fp);
+	ct = bns->l_pac % 4;
+	fwrite(&ct, 1, 1, fp);	
+	fclose(fp);
+	bns_destroy(bns);
+	free(cspac);
+	return 0;
+}
+
+int bwa_bwt2sa(int argc, char *argv[])
+{
+	bwt_t *bwt;
+	int c, sa_intv = 32;
+	while ((c = getopt(argc, argv, "i:")) >= 0) {
+		switch (c) {
+		case 'i': sa_intv = atoi(optarg); break;
+		default: return 1;
+		}
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "Usage: bwa bwt2sa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
+		return 1;
+	}
+	bwt = bwt_restore_bwt(argv[optind]);
+	bwt_cal_sa(bwt, sa_intv);
+	bwt_dump_sa(argv[optind+1], bwt);
+	bwt_destroy(bwt);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,69 @@
+#ifndef LH3_BWTSW2_H
+#define LH3_BWTSW2_H
+
+#include <stdint.h>
+#include "bntseq.h"
+#include "bwt_lite.h"
+#include "bwt.h"
+
+#define BSW2_FLAG_MATESW  0x100
+#define BSW2_FLAG_TANDEM  0x200
+#define BSW2_FLAG_MOVED   0x400
+#define BSW2_FLAG_RESCUED 0x800
+
+typedef struct {
+	int skip_sw:16, hard_clip:16;
+	int a, b, q, r, t, qr, bw, max_ins;
+	int z, is, t_seeds, multi_2nd;
+	float mask_level, coef;
+	int n_threads, chunk_size;
+} bsw2opt_t;
+
+typedef struct {
+	bwtint_t k, l;
+	uint32_t flag:18, n_seeds:13, is_rev:1;
+	int len, G, G2;
+	int beg, end;
+} bsw2hit_t;
+
+typedef struct {
+	int flag, nn, n_cigar, chr, pos, qual, mchr, mpos, pqual, isize, nm;
+	uint32_t *cigar;
+} bsw2aux_t;
+
+typedef struct {
+	int n, max;
+	bsw2hit_t *hits;
+	bsw2aux_t *aux;
+} bwtsw2_t;
+
+typedef struct {
+	void *stack;
+	int max_l;
+	uint8_t *aln_mem;
+} bsw2global_t;
+
+typedef struct {
+	int l, tid;
+	char *name, *seq, *qual, *sam;
+} bsw2seq1_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	bsw2opt_t *bsw2_init_opt();
+	bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool);
+	void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn, const char *fn2);
+	void bsw2_destroy(bwtsw2_t *b);
+
+	bsw2global_t *bsw2_global_init();
+	void bsw2_global_destroy(bsw2global_t *_pool);
+
+	void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, bsw2seq1_t *seq, bwtsw2_t **hit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2_aux.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,821 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#ifdef HAVE_PTHREAD
+#include <pthread.h>
+#endif
+#include "bntseq.h"
+#include "bwt_lite.h"
+#include "utils.h"
+#include "bwtsw2.h"
+#include "stdaln.h"
+#include "kstring.h"
+
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+#include "ksort.h"
+#define __left_lt(a, b) ((a).end > (b).end)
+KSORT_INIT(hit, bsw2hit_t, __left_lt)
+
+extern unsigned char nst_nt4_table[256];
+
+unsigned char nt_comp_table[256] = {
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','T','V','G', 'H','N','N','C', 'D','N','N','M', 'N','K','N','N',
+	'N','N','Y','S', 'A','N','B','W', 'X','R','N','N', 'N','N','N','N',
+	'n','t','v','g', 'h','n','n','c', 'd','n','n','m', 'n','k','n','n',
+	'n','n','y','s', 'a','n','b','w', 'x','r','n','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N'
+};
+
+extern int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS);
+extern int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level);
+
+bsw2opt_t *bsw2_init_opt()
+{
+	bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));
+	o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;
+	o->bw = 50;
+	o->max_ins = 20000;
+	o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;
+	o->mask_level = 0.50f; o->coef = 5.5f;
+	o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
+	return o;
+}
+
+void bsw2_destroy(bwtsw2_t *b)
+{
+	int i;
+	if (b == 0) return;
+	if (b->aux)
+		for (i = 0; i < b->n; ++i) free(b->aux[i].cigar);
+	free(b->aux); free(b->hits);
+	free(b);
+}
+
+bwtsw2_t *bsw2_dup_no_cigar(const bwtsw2_t *b)
+{
+	bwtsw2_t *p;
+	p = calloc(1, sizeof(bwtsw2_t));
+	p->max = p->n = b->n;
+	if (b->n) {
+		kroundup32(p->max);
+		p->hits = calloc(p->max, sizeof(bsw2hit_t));
+		memcpy(p->hits, b->hits, p->n * sizeof(bsw2hit_t));
+	}
+	return p;
+}
+
+#define __gen_ap(par, opt) do {									\
+		int i;													\
+		for (i = 0; i < 25; ++i) (par).matrix[i] = -(opt)->b;	\
+		for (i = 0; i < 4; ++i) (par).matrix[i*5+i] = (opt)->a; \
+		(par).gap_open = (opt)->q; (par).gap_ext = (opt)->r;	\
+		(par).gap_end = (opt)->r;								\
+		(par).row = 5; (par).band_width = opt->bw;				\
+	} while (0)
+
+void bsw2_extend_left(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *_query, int lq, uint8_t *pac, bwtint_t l_pac, uint8_t *_mem)
+{
+	int i, matrix[25];
+	bwtint_t k;
+	uint8_t *target = 0, *query;
+	AlnParam par;
+
+	par.matrix = matrix;
+	__gen_ap(par, opt);
+	query = calloc(lq, 1);
+	// sort according to the descending order of query end
+	ks_introsort(hit, b->n, b->hits);
+	target = calloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1);
+	// reverse _query
+	for (i = 0; i < lq; ++i) query[lq - i - 1] = _query[i];
+	// core loop
+	for (i = 0; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		int lt = ((p->beg + 1) / 2 * opt->a + opt->r) / opt->r + lq;
+		int score, j;
+		path_t path;
+		p->n_seeds = 1;
+		if (p->l || p->k == 0) continue;
+		for (j = score = 0; j < i; ++j) {
+			bsw2hit_t *q = b->hits + j;
+			if (q->beg <= p->beg && q->k <= p->k && q->k + q->len >= p->k + p->len) {
+				if (q->n_seeds < (1<<13) - 2) ++q->n_seeds;
+				++score;
+			}
+		}
+		if (score) continue;
+		if (lt > p->k) lt = p->k;
+		for (k = p->k - 1, j = 0; k > 0 && j < lt; --k) // FIXME: k=0 not considered!
+			target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3;
+		lt = j;
+		score = aln_extend_core(target, lt, query + lq - p->beg, p->beg, &par, &path, 0, p->G, _mem);
+		if (score > p->G) { // extensible
+			p->G = score;
+			p->len += path.i;
+			p->beg -= path.j;
+			p->k -= path.i;
+		}
+	}
+	free(query); free(target);
+}
+
+void bsw2_extend_rght(const bsw2opt_t *opt, bwtsw2_t *b, uint8_t *query, int lq, uint8_t *pac, bwtint_t l_pac, uint8_t *_mem)
+{
+	int i, matrix[25];
+	bwtint_t k;
+	uint8_t *target;
+	AlnParam par;
+	
+	par.matrix = matrix;
+	__gen_ap(par, opt);
+	target = calloc(((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq, 1);
+	for (i = 0; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		int lt = ((lq - p->beg + 1) / 2 * opt->a + opt->r) / opt->r + lq;
+		int j, score;
+		path_t path;
+		if (p->l) continue;
+		for (k = p->k, j = 0; k < p->k + lt && k < l_pac; ++k)
+			target[j++] = pac[k>>2] >> (~k&3)*2 & 0x3;
+		lt = j;
+		score = aln_extend_core(target, lt, query + p->beg, lq - p->beg, &par, &path, 0, 1, _mem);
+//		if (score < p->G) fprintf(stderr, "[bsw2_extend_hits] %d < %d\n", score, p->G);
+		if (score >= p->G) {
+			p->G = score;
+			p->len = path.i;
+			p->end = path.j + p->beg;
+		}
+	}
+	free(target);
+}
+
+/* generate CIGAR array(s) in b->cigar[] */
+static void gen_cigar(const bsw2opt_t *opt, int lq, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
+{
+	uint8_t *target;
+	int i, matrix[25];
+	AlnParam par;
+	path_t *path;
+
+	par.matrix = matrix;
+	__gen_ap(par, opt);
+	i = ((lq + 1) / 2 * opt->a + opt->r) / opt->r + lq; // maximum possible target length
+	target = calloc(i, 1);
+	path = calloc(i + lq, sizeof(path_t));
+	// generate CIGAR
+	for (i = 0; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		bsw2aux_t *q = b->aux + i;
+		uint8_t *query;
+		bwtint_t k;
+		int score, path_len, beg, end;
+		if (p->l) continue;
+		beg = (p->flag & 0x10)? lq - p->end : p->beg;
+		end = (p->flag & 0x10)? lq - p->beg : p->end;
+		query = seq[(p->flag & 0x10)? 1 : 0] + beg;
+		for (k = p->k; k < p->k + p->len; ++k) // in principle, no out-of-boundary here
+			target[k - p->k] = pac[k>>2] >> (~k&3)*2 & 0x3;
+		score = aln_global_core(target, p->len, query, end - beg, &par, path, &path_len);
+		q->cigar = aln_path2cigar32(path, path_len, &q->n_cigar);
+#if 0
+		if (name && score != p->G) { // debugging only
+			int j, glen = 0;
+			for (j = 0; j < q->n_cigar; ++j)
+				if ((q->cigar[j]&0xf) == 1 || (q->cigar[j]&0xf) == 2)
+					glen += q->cigar[j]>>4;
+			fprintf(stderr, "[E::%s] %s - unequal score: %d != %d; (qlen, aqlen, arlen, glen, bw) = (%d, %d, %d, %d, %d)\n",
+					__func__, name, score, p->G, lq, end - beg, p->len, glen, opt->bw);
+		}
+#endif
+		if (beg != 0 || end < lq) { // write soft clipping
+			q->cigar = realloc(q->cigar, 4 * (q->n_cigar + 2));
+			if (beg != 0) {
+				memmove(q->cigar + 1, q->cigar, q->n_cigar * 4);
+				q->cigar[0] = beg<<4 | 4;
+				++q->n_cigar;
+			}
+			if (end < lq) {
+				q->cigar[q->n_cigar] = (lq - end)<<4 | 4;
+				++q->n_cigar;
+			}
+		}
+	}
+	free(target); free(path);
+}
+
+/* this is for the debugging purpose only */
+void bsw2_debug_hits(const bwtsw2_t *b)
+{
+	int i;
+	printf("# raw hits: %d\n", b->n);
+	for (i = 0; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		if (p->G > 0)
+			printf("G=%d, len=%d, [%d,%d), k=%lu, l=%lu, #seeds=%d, is_rev=%d\n", p->G, p->len, p->beg, p->end, (long)p->k, (long)p->l, p->n_seeds, p->is_rev);
+	}
+}
+
+static void merge_hits(bwtsw2_t *b[2], int l, int is_reverse)
+{
+	int i;
+	if (b[0]->n + b[1]->n > b[0]->max) {
+		b[0]->max = b[0]->n + b[1]->n;
+		b[0]->hits = realloc(b[0]->hits, b[0]->max * sizeof(bsw2hit_t));
+	}
+	for (i = 0; i < b[1]->n; ++i) {
+		bsw2hit_t *p = b[0]->hits + b[0]->n + i;
+		*p = b[1]->hits[i];
+		if (is_reverse) {
+			int x = p->beg;
+			p->beg = l - p->end;
+			p->end = l - x;
+			p->flag |= 0x10;
+		}
+	}
+	b[0]->n += b[1]->n;
+	bsw2_destroy(b[1]);
+	b[1] = 0;
+}
+/* seq[0] is the forward sequence and seq[1] is the reverse complement. */
+static bwtsw2_t *bsw2_aln1_core(const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, const bwt_t *target,
+								int l, uint8_t *seq[2], bsw2global_t *pool)
+{
+	extern void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]);
+	bwtsw2_t *b[2], **bb[2], **_b, *p;
+	int k, j;
+	bwtl_t *query;
+	query = bwtl_seq2bwtl(l, seq[0]);
+	_b = bsw2_core(bns, opt, query, target, pool);
+	bwtl_destroy(query);
+	for (k = 0; k < 2; ++k) {
+		bb[k] = calloc(2, sizeof(void*));
+		bb[k][0] = calloc(1, sizeof(bwtsw2_t));
+		bb[k][1] = calloc(1, sizeof(bwtsw2_t));
+	}
+	for (k = 0; k < 2; ++k) { // separate _b into bb[2] based on the strand
+		for (j = 0; j < _b[k]->n; ++j) {
+			bsw2hit_t *q;
+			p = bb[_b[k]->hits[j].is_rev][k];
+			if (p->n == p->max) {
+				p->max = p->max? p->max<<1 : 8;
+				p->hits = realloc(p->hits, p->max * sizeof(bsw2hit_t));
+			}
+			q = &p->hits[p->n++];
+			*q = _b[k]->hits[j];
+			if (_b[k]->hits[j].is_rev) {
+				int x = q->beg;
+				q->beg = l - q->end;
+				q->end = l - x;
+			}
+		}
+	}
+	b[0] = bb[0][1]; b[1] = bb[1][1]; // bb[*][1] are "narrow SA hits"
+	bsw2_chain_filter(opt, l, b);
+	for (k = 0; k < 2; ++k) {
+		bsw2_extend_left(opt, bb[k][1], seq[k], l, pac, bns->l_pac, pool->aln_mem);
+		merge_hits(bb[k], l, 0); // bb[k][1] is merged to bb[k][0] here
+		bsw2_resolve_duphits(0, 0, bb[k][0], 0);
+		bsw2_extend_rght(opt, bb[k][0], seq[k], l, pac, bns->l_pac, pool->aln_mem);
+		b[k] = bb[k][0];
+		free(bb[k]);		
+	}
+	merge_hits(b, l, 1); // again, b[1] is merged to b[0]
+	bsw2_resolve_query_overlaps(b[0], opt->mask_level);
+	bsw2_destroy(_b[0]); bsw2_destroy(_b[1]); free(_b);
+	return b[0];
+}
+
+/* set ->flag to records the origin of the hit (to forward bwt or reverse bwt) */
+static void flag_fr(bwtsw2_t *b[2])
+{
+	int i, j;
+	for (i = 0; i < b[0]->n; ++i) {
+		bsw2hit_t *p = b[0]->hits + i;
+		p->flag |= 0x10000;
+	}
+	for (i = 0; i < b[1]->n; ++i) {
+		bsw2hit_t *p = b[1]->hits + i;
+		p->flag |= 0x20000;
+	}
+	for (i = 0; i < b[0]->n; ++i) {
+		bsw2hit_t *p = b[0]->hits + i;
+		for (j = 0; j < b[1]->n; ++j) {
+			bsw2hit_t *q = b[1]->hits + j;
+			if (q->beg == p->beg && q->end == p->end && q->k == p->k && q->len == p->len && q->G == p->G) {
+				q->flag |= 0x30000; p->flag |= 0x30000;
+				break;
+			}
+		}
+	}
+}
+
+typedef struct {
+	int n, max;
+	bsw2seq1_t *seq;
+} bsw2seq_t;
+
+static int fix_cigar(const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *cigar)
+{
+	// FIXME: this routine does not work if the query bridge three reference sequences
+	int32_t coor, refl, lq;
+	int x, y, i, seqid;
+	bns_cnt_ambi(bns, p->k, p->len, &seqid);
+	coor = p->k - bns->anns[seqid].offset;
+	refl = bns->anns[seqid].len;
+	x = coor; y = 0;
+	// test if the alignment goes beyond the boundary
+	for (i = 0; i < n_cigar; ++i) {
+		int op = cigar[i]&0xf, ln = cigar[i]>>4;
+		if (op == 1 || op == 4 || op == 5) y += ln;
+		else if (op == 2) x += ln;
+		else x += ln, y += ln;
+	}
+	lq = y; // length of the query sequence
+	if (x > refl) { // then fix it
+		int j, nc, mq[2], nlen[2];
+		uint32_t *cn;
+		bwtint_t kk = 0;
+		nc = mq[0] = mq[1] = nlen[0] = nlen[1] = 0;
+		cn = calloc(n_cigar + 3, 4);
+		x = coor; y = 0;
+		for (i = j = 0; i < n_cigar; ++i) {
+			int op = cigar[i]&0xf, ln = cigar[i]>>4;
+			if (op == 4 || op == 5 || op == 1) { // ins or clipping
+				y += ln;
+				cn[j++] = cigar[i];
+			} else if (op == 2) { // del
+				if (x + ln >= refl && nc == 0) {
+					cn[j++] = (uint32_t)(lq - y)<<4 | 4;
+					nc = j;
+					cn[j++] = (uint32_t)y<<4 | 4;
+					kk = p->k + (x + ln - refl);
+					nlen[0] = x - coor;
+					nlen[1] = p->len - nlen[0] - ln;
+				} else cn[j++] = cigar[i];
+				x += ln;
+			} else if (op == 0) { // match
+				if (x + ln >= refl && nc == 0) {
+					// FIXME: not consider a special case where a split right between M and I
+					cn[j++] = (uint32_t)(refl - x)<<4 | 0; // write M
+					cn[j++] = (uint32_t)(lq - y - (refl - x))<<4 | 4; // write S
+					nc = j;
+					mq[0] += refl - x;
+					cn[j++] = (uint32_t)(y + (refl - x))<<4 | 4;
+					if (x + ln - refl) cn[j++] = (uint32_t)(x + ln - refl)<<4 | 0;
+					mq[1] += x + ln - refl;
+					kk = bns->anns[seqid].offset + refl;
+					nlen[0] = refl - coor;
+					nlen[1] = p->len - nlen[0];
+				} else {
+					cn[j++] = cigar[i];
+					mq[nc?1:0] += ln;
+				}
+				x += ln; y += ln;
+			}
+		}
+		if (mq[0] > mq[1]) { // then take the first alignment
+			n_cigar = nc;
+			memcpy(cigar, cn, 4 * nc);
+			p->len = nlen[0];
+		} else {
+			p->k = kk; p->len = nlen[1];
+			n_cigar = j - nc;
+			memcpy(cigar, cn + nc, 4 * (j - nc));
+		}
+		free(cn);
+	}
+	return n_cigar;
+}
+
+static int compute_nm(bsw2hit_t *p, int n_cigar, const uint32_t *cigar, const uint8_t *pac, const uint8_t *seq)
+{
+	int k, x, n_mm = 0, i, n_gap = 0;
+	bwtint_t y;
+	x = 0; y = p->k;
+	for (k = 0; k < n_cigar; ++k) {
+		int op  = cigar[k]&0xf;
+		int len = cigar[k]>>4;
+		if (op == 0) { // match
+			for (i = 0; i < len; ++i) {
+				int ref = pac[(y+i)>>2] >> (~(y+i)&3)*2 & 0x3;
+				if (seq[x + i] != ref) ++n_mm;
+			}
+			x += len; y += len;
+		} else if (op == 1) x += len, n_gap += len;
+		else if (op == 2) y += len, n_gap += len;
+		else if (op == 4) x += len;
+	}
+	return n_mm + n_gap;
+}
+
+static void write_aux(const bsw2opt_t *opt, const bntseq_t *bns, int qlen, uint8_t *seq[2], const uint8_t *pac, bwtsw2_t *b, const char *name)
+{
+	int i;
+	// allocate for b->aux
+	if (b->n<<1 < b->max) {
+		b->max = b->n;
+		kroundup32(b->max);
+		b->hits = realloc(b->hits, b->max * sizeof(bsw2hit_t));
+	}
+	b->aux = calloc(b->n, sizeof(bsw2aux_t));
+	// generate CIGAR
+	gen_cigar(opt, qlen, seq, pac, b, name);
+	// fix CIGAR, generate mapQ, and write chromosomal position
+	for (i = 0; i < b->n; ++i) {
+		bsw2hit_t *p = &b->hits[i];
+		bsw2aux_t *q = &b->aux[i];
+		q->flag = p->flag & 0xfe;
+		q->isize = 0;
+		if (p->l == 0) { // unique hit
+			float c = 1.0;
+			int subo;
+			// fix out-of-boundary CIGAR
+			q->n_cigar = fix_cigar(bns, p, q->n_cigar, q->cigar);
+			// compute the NM tag
+			q->nm = compute_nm(p, q->n_cigar, q->cigar, pac, seq[p->is_rev]);
+			// compute mapQ
+			subo = p->G2 > opt->t? p->G2 : opt->t;
+			if (p->flag>>16 == 1 || p->flag>>16 == 2) c *= .5;
+			if (p->n_seeds < 2) c *= .2;
+			q->qual = (int)(c * (p->G - subo) * (250.0 / p->G + 0.03 / opt->a) + .499);
+			if (q->qual > 250) q->qual = 250;
+			if (q->qual < 0) q->qual = 0;
+			if (p->flag&1) q->qual = 0; // this is a random hit
+			q->pqual = q->qual; // set the paired qual as qual
+			// get the chromosomal position
+			q->nn = bns_cnt_ambi(bns, p->k, p->len, &q->chr);
+			q->pos = p->k - bns->anns[q->chr].offset;
+		} else q->qual = 0, q->n_cigar = 0, q->chr = q->pos = -1, q->nn = 0;
+	}
+}
+
+static void update_mate_aux(bwtsw2_t *b, const bwtsw2_t *m)
+{
+	int i;
+	if (m == 0) return;
+	// update flag, mchr and mpos
+	for (i = 0; i < b->n; ++i) {
+		bsw2aux_t *q = &b->aux[i];
+		q->flag |= 1; // paired
+		if (m->n == 0) q->flag |= 8; // mate unmapped
+		if (m->n == 1) {
+			q->mchr = m->aux[0].chr;
+			q->mpos = m->aux[0].pos;
+			if (m->aux[0].flag&0x10) q->flag |= 0x20; // mate reverse strand
+			if (q->chr == q->mchr) { // set insert size
+				if (q->mpos + m->hits[0].len > q->pos)
+					q->isize = q->mpos + m->hits[0].len - q->pos;
+				else q->isize = q->mpos - q->pos - b->hits[0].len;
+			} else q->isize = 0;
+		} else q->mchr = q->mpos = -1;
+	}
+	// update mapping quality
+	if (b->n == 1 && m->n == 1) {
+		bsw2hit_t *p = &b->hits[0];
+		if (p->flag & BSW2_FLAG_MATESW) { // this alignment is found by Smith-Waterman
+			if (!(p->flag & BSW2_FLAG_TANDEM) && b->aux[0].pqual < 20)
+				b->aux[0].pqual = 20;
+			if (b->aux[0].pqual >= m->aux[0].qual) b->aux[0].pqual = m->aux[0].qual;
+		} else if ((p->flag & 2) && !(m->hits[0].flag & BSW2_FLAG_MATESW)) { // properly paired
+			if (!(p->flag & BSW2_FLAG_TANDEM)) { // pqual is bounded by [b->aux[0].qual,m->aux[0].qual]
+				b->aux[0].pqual += 20;
+				if (b->aux[0].pqual > m->aux[0].qual) b->aux[0].pqual = m->aux[0].qual;
+				if (b->aux[0].pqual < b->aux[0].qual) b->aux[0].pqual = b->aux[0].qual;
+			}
+		}
+	}
+}
+
+/* generate SAM lines for a sequence in ks with alignment stored in
+ * b. ks->name and ks->seq will be freed and set to NULL in the end. */
+static void print_hits(const bntseq_t *bns, const bsw2opt_t *opt, bsw2seq1_t *ks, bwtsw2_t *b, int is_pe, bwtsw2_t *bmate)
+{
+	int i, k;
+	kstring_t str;
+	memset(&str, 0, sizeof(kstring_t));
+	if (b == 0 || b->n == 0) { // no hits
+		ksprintf(&str, "%s\t4\t*\t0\t0\t*\t*\t0\t0\t", ks->name);
+		for (i = 0; i < ks->l; ++i) kputc(ks->seq[i], &str);
+		if (ks->qual) {
+			kputc('\t', &str);
+			for (i = 0; i < ks->l; ++i) kputc(ks->qual[i], &str);
+		} else kputs("\t*", &str);
+		kputc('\n', &str);
+	}
+	for (i = 0; b && i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		bsw2aux_t *q = b->aux + i;
+		int j, beg, end, type = 0;
+		// print mandatory fields before SEQ
+		ksprintf(&str, "%s\t%d", ks->name, q->flag | (opt->multi_2nd && i? 0x100 : 0));
+		ksprintf(&str, "\t%s\t%ld", q->chr>=0? bns->anns[q->chr].name : "*", (long)q->pos + 1);
+		if (p->l == 0) { // not a repetitive hit
+			ksprintf(&str, "\t%d\t", q->pqual);
+			for (k = 0; k < q->n_cigar; ++k)
+				ksprintf(&str, "%d%c", q->cigar[k]>>4, (opt->hard_clip? "MIDNHHP" : "MIDNSHP")[q->cigar[k]&0xf]);
+		} else ksprintf(&str, "\t0\t*");
+		if (!is_pe) kputs("\t*\t0\t0\t", &str);
+		else ksprintf(&str, "\t%s\t%d\t%d\t", q->mchr==q->chr? "=" : (q->mchr<0? "*" : bns->anns[q->mchr].name), q->mpos+1, q->isize);
+		// get the sequence begin and end
+		beg = 0; end = ks->l;
+		if (opt->hard_clip) {
+			if ((q->cigar[0]&0xf) == 4) beg += q->cigar[0]>>4;
+			if ((q->cigar[q->n_cigar-1]&0xf) == 4) end -= q->cigar[q->n_cigar-1]>>4;
+		}
+		for (j = beg; j < end; ++j) {
+			if (p->flag&0x10) kputc(nt_comp_table[(int)ks->seq[ks->l - 1 - j]], &str);
+			else kputc(ks->seq[j], &str);
+		}
+		// print base quality if present
+		if (ks->qual) {
+			kputc('\t', &str);
+			for (j = beg; j < end; ++j) {
+				if (p->flag&0x10) kputc(ks->qual[ks->l - 1 - j], &str);
+				else kputc(ks->qual[j], &str);
+			}
+		} else ksprintf(&str, "\t*");
+		// print optional tags
+		ksprintf(&str, "\tAS:i:%d\tXS:i:%d\tXF:i:%d\tXE:i:%d\tNM:i:%d", p->G, p->G2, p->flag>>16, p->n_seeds, q->nm);
+		if (q->nn) ksprintf(&str, "\tXN:i:%d", q->nn);
+		if (p->l) ksprintf(&str, "\tXI:i:%d", p->l - p->k + 1);
+		if (p->flag&BSW2_FLAG_MATESW) type |= 1;
+		if (p->flag&BSW2_FLAG_TANDEM) type |= 2;
+		if (type) ksprintf(&str, "\tXT:i:%d", type);
+		kputc('\n', &str);
+	}
+	ks->sam = str.s;
+	free(ks->seq); ks->seq = 0;
+	free(ks->qual); ks->qual = 0;
+	free(ks->name); ks->name = 0;
+}
+
+static void update_opt(bsw2opt_t *dst, const bsw2opt_t *src, int qlen)
+{
+	double ll = log(qlen);
+	int i, k;
+	*dst = *src;
+	if (dst->t < ll * dst->coef) dst->t = (int)(ll * dst->coef + .499);
+	// set band width: the query length sets a boundary on the maximum band width
+	k = (qlen * dst->a - 2 * dst->q) / (2 * dst->r + dst->a);
+	i = (qlen * dst->a - dst->a - dst->t) / dst->r;
+	if (k > i) k = i;
+	if (k < 1) k = 1; // I do not know if k==0 causes troubles
+	dst->bw = src->bw < k? src->bw : k;
+}
+
+/* Core routine to align reads in _seq. It is separated from
+ * process_seqs() to realize multi-threading */ 
+static void bsw2_aln_core(bsw2seq_t *_seq, const bsw2opt_t *_opt, const bntseq_t *bns, uint8_t *pac, const bwt_t *target, int is_pe)
+{
+	int x;
+	bsw2opt_t opt;
+	bsw2global_t *pool = bsw2_global_init();
+	bwtsw2_t **buf;
+	buf = calloc(_seq->n, sizeof(void*));
+	for (x = 0; x < _seq->n; ++x) {
+		bsw2seq1_t *p = _seq->seq + x;
+		uint8_t *seq[2], *rseq[2];
+		int i, l, k;
+		bwtsw2_t *b[2];
+		l = p->l;
+		update_opt(&opt, _opt, p->l);
+		if (pool->max_l < l) { // then enlarge working space for aln_extend_core()
+			int tmp = ((l + 1) / 2 * opt.a + opt.r) / opt.r + l;
+			pool->max_l = l;
+			pool->aln_mem = realloc(pool->aln_mem, (tmp + 2) * 24);
+		}
+		// set seq[2] and rseq[2]
+		seq[0] = calloc(l * 4, 1);
+		seq[1] = seq[0] + l;
+		rseq[0] = seq[1] + l; rseq[1] = rseq[0] + l;
+		// convert sequences to 2-bit representation
+		for (i = k = 0; i < l; ++i) {
+			int c = nst_nt4_table[(int)p->seq[i]];
+			if (c >= 4) { c = (int)(drand48() * 4); ++k; } // FIXME: ambiguous bases are not properly handled
+			seq[0][i] = c;
+			seq[1][l-1-i] = 3 - c;
+			rseq[0][l-1-i] = 3 - c;
+			rseq[1][i] = c;
+		}
+		if (l - k < opt.t) { // too few unambiguous bases
+			buf[x] = calloc(1, sizeof(bwtsw2_t));
+			free(seq[0]); continue;
+		}
+		// alignment
+		b[0] = bsw2_aln1_core(&opt, bns, pac, target, l, seq, pool);
+		for (k = 0; k < b[0]->n; ++k)
+			if (b[0]->hits[k].n_seeds < opt.t_seeds) break;
+		if (k < b[0]->n) {
+			b[1] = bsw2_aln1_core(&opt, bns, pac, target, l, rseq, pool);
+			for (i = 0; i < b[1]->n; ++i) {
+				bsw2hit_t *p = &b[1]->hits[i];
+				int x = p->beg;
+				p->flag ^= 0x10, p->is_rev ^= 1; // flip the strand
+				p->beg = l - p->end;
+				p->end = l - x;
+			}
+			flag_fr(b);
+			merge_hits(b, l, 0);
+			bsw2_resolve_duphits(0, 0, b[0], 0);
+			bsw2_resolve_query_overlaps(b[0], opt.mask_level);
+		} else b[1] = 0;
+		// generate CIGAR and print SAM
+		buf[x] = bsw2_dup_no_cigar(b[0]);
+		// free
+		free(seq[0]);
+		bsw2_destroy(b[0]);
+	}
+	if (is_pe) bsw2_pair(&opt, bns->l_pac, pac, _seq->n, _seq->seq, buf);
+	for (x = 0; x < _seq->n; ++x) {
+		bsw2seq1_t *p = _seq->seq + x;
+		uint8_t *seq[2];
+		int i;
+		seq[0] = malloc(p->l * 2); seq[1] = seq[0] + p->l;
+		for (i = 0; i < p->l; ++i) {
+			int c = nst_nt4_table[(int)p->seq[i]];
+			if (c >= 4) c = (int)(drand48() * 4);
+			seq[0][i] = c;
+			seq[1][p->l-1-i] = 3 - c;
+		}
+		update_opt(&opt, _opt, p->l);
+		write_aux(&opt, bns, p->l, seq, pac, buf[x], _seq->seq[x].name);
+		free(seq[0]);
+	}
+	for (x = 0; x < _seq->n; ++x) {
+		if (is_pe) update_mate_aux(buf[x], buf[x^1]);
+		print_hits(bns, &opt, &_seq->seq[x], buf[x], is_pe, buf[x^1]);
+	}
+	for (x = 0; x < _seq->n; ++x) bsw2_destroy(buf[x]);
+	free(buf);
+	bsw2_global_destroy(pool);
+}
+
+#ifdef HAVE_PTHREAD
+typedef struct {
+	int tid, is_pe;
+	bsw2seq_t *_seq;
+	const bsw2opt_t *_opt;
+	const bntseq_t *bns;
+	uint8_t *pac;
+	const bwt_t *target;
+} thread_aux_t;
+
+/* another interface to bsw2_aln_core() to facilitate pthread_create() */
+static void *worker(void *data)
+{
+	thread_aux_t *p = (thread_aux_t*)data;
+	bsw2_aln_core(p->_seq, p->_opt, p->bns, p->pac, p->target, p->is_pe);
+	return 0;
+}
+#endif
+
+/* process sequences stored in _seq, generate SAM lines for these
+ * sequences and reset _seq afterwards. */
+static void process_seqs(bsw2seq_t *_seq, const bsw2opt_t *opt, const bntseq_t *bns, uint8_t *pac, const bwt_t *target, int is_pe)
+{
+	int i;
+	is_pe = is_pe? 1 : 0;
+
+#ifdef HAVE_PTHREAD
+	if (opt->n_threads <= 1) {
+		bsw2_aln_core(_seq, opt, bns, pac, target, is_pe);
+	} else {
+		pthread_t *tid;
+		pthread_attr_t attr;
+		thread_aux_t *data;
+		int j;
+		pthread_attr_init(&attr);
+		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+		data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
+		tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
+		for (j = 0; j < opt->n_threads; ++j) {
+			thread_aux_t *p = data + j;
+			p->tid = j; p->_opt = opt; p->bns = bns; p->is_pe = is_pe;
+			p->pac = pac; p->target = target;
+			p->_seq = calloc(1, sizeof(bsw2seq_t));
+			p->_seq->max = (_seq->n + opt->n_threads - 1) / opt->n_threads + 1;
+			p->_seq->n = 0;
+			p->_seq->seq = calloc(p->_seq->max, sizeof(bsw2seq1_t));
+		}
+		for (i = 0; i < _seq->n; ++i) { // assign sequences to each thread
+			bsw2seq_t *p = data[(i>>is_pe)%opt->n_threads]._seq;
+			p->seq[p->n++] = _seq->seq[i];
+		}
+		for (j = 0; j < opt->n_threads; ++j) pthread_create(&tid[j], &attr, worker, &data[j]);
+		for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
+		for (j = 0; j < opt->n_threads; ++j) data[j]._seq->n = 0;
+		for (i = 0; i < _seq->n; ++i) { // copy the result from each thread back
+			bsw2seq_t *p = data[(i>>is_pe)%opt->n_threads]._seq;
+			_seq->seq[i] = p->seq[p->n++];
+		}
+		for (j = 0; j < opt->n_threads; ++j) {
+			thread_aux_t *p = data + j;
+			free(p->_seq->seq);
+			free(p->_seq);
+		}
+		free(data); free(tid);
+	}
+#else
+	bsw2_aln_core(_seq, opt, bns, pac, target, is_pe);
+#endif
+
+	// print and reset
+	for (i = 0; i < _seq->n; ++i) {
+		bsw2seq1_t *p = _seq->seq + i;
+		if (p->sam) printf("%s", p->sam);
+		free(p->name); free(p->seq); free(p->qual); free(p->sam);
+		p->tid = -1; p->l = 0;
+		p->name = p->seq = p->qual = p->sam = 0;
+	}
+	fflush(stdout);
+	_seq->n = 0;
+}
+
+static void kseq_to_bsw2seq(const kseq_t *ks, bsw2seq1_t *p)
+{
+	p->tid = -1;
+	p->l = ks->seq.l;
+	p->name = strdup(ks->name.s);
+	p->seq = strdup(ks->seq.s);
+	p->qual = ks->qual.l? strdup(ks->qual.s) : 0;
+	p->sam = 0;
+}
+
+void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, const char *fn, const char *fn2)
+{
+	gzFile fp, fp2;
+	kseq_t *ks, *ks2;
+	int l, size = 0, is_pe = 0;
+	uint8_t *pac;
+	bsw2seq_t *_seq;
+
+	pac = calloc(bns->l_pac/4+1, 1);
+	if (pac == 0) {
+		fprintf(stderr, "[bsw2_aln] insufficient memory!\n");
+		return;
+	}
+	for (l = 0; l < bns->n_seqs; ++l)
+		printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
+	fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
+	fp = xzopen(fn, "r");
+	ks = kseq_init(fp);
+	_seq = calloc(1, sizeof(bsw2seq_t));
+	if (fn2) {
+		fp2 = xzopen(fn2, "r");
+		ks2 = kseq_init(fp2);
+		is_pe = 1;
+	} else fp2 = 0, ks2 = 0, is_pe = 0;
+	while (kseq_read(ks) >= 0) {
+		if (ks->name.l > 2 && ks->name.s[ks->name.l-2] == '/')
+			ks->name.l -= 2, ks->name.s[ks->name.l] = 0;
+		if (_seq->n == _seq->max) {
+			_seq->max = _seq->max? _seq->max<<1 : 1024;
+			_seq->seq = realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t));
+		}
+		kseq_to_bsw2seq(ks, &_seq->seq[_seq->n++]);
+		size += ks->seq.l;
+		if (ks2) {
+			if (kseq_read(ks2) >= 0) {
+				if (ks2->name.l > 2 && ks2->name.s[ks2->name.l-2] == '/')
+					ks2->name.l -= 2, ks2->name.s[ks2->name.l] = 0;
+				kseq_to_bsw2seq(ks2, &_seq->seq[_seq->n++]); // for PE, _seq->n here must be odd and we do not need to enlarge
+				size += ks->seq.l;
+			} else {
+				fprintf(stderr, "[%s] The second query file has fewer reads. Switched to the single-end mode for the following batches.\n", __func__);
+				is_pe = 0;
+			}
+		}
+		if (size > opt->chunk_size * opt->n_threads) {
+			fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp)...\n", _seq->n, size);
+			process_seqs(_seq, opt, bns, pac, target, is_pe);
+			size = 0;
+		}
+	}
+	fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp)...\n", _seq->n, size);
+	process_seqs(_seq, opt, bns, pac, target, is_pe);
+	// free
+	free(pac);
+	free(_seq->seq); free(_seq);
+	kseq_destroy(ks);
+	gzclose(fp);
+	if (fn2) {
+		kseq_destroy(ks2);
+		gzclose(fp2);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2_chain.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include "bwtsw2.h"
+
+typedef struct {
+	uint32_t tbeg, tend;
+	int qbeg, qend;
+	uint32_t flag:1, idx:31;
+	int chain; // also reuse as a counter
+} hsaip_t;
+
+#define _hsaip_lt(a, b) ((a).qbeg < (b).qbeg)
+
+#include "ksort.h"
+KSORT_INIT(hsaip, hsaip_t, _hsaip_lt)
+
+static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t *chain)
+{
+	int j, k, m = 0;
+	ks_introsort(hsaip, n, z);
+	for (j = 0; j < n; ++j) {
+		hsaip_t *p = z + j;
+		for (k = m - 1; k >= 0; --k) {
+			hsaip_t *q = chain + k;
+			int x = p->qbeg - q->qbeg; // always positive
+			int y = p->tbeg - q->tbeg;
+			if (y > 0 && x - y <= opt->bw && y - x <= opt->bw) {
+				if (p->qend > q->qend) q->qend = p->qend;
+				if (p->tend > q->tend) q->tend = p->tend;
+				++q->chain;
+				p->chain = shift + k;
+				break;
+			}
+		}
+		if (k < 0) {
+			chain[m] = *p;
+			chain[m].chain = 1;
+			chain[m].idx = p->chain = shift + m;
+			++m;
+		}
+	}
+	return m;
+}
+
+void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
+{
+	hsaip_t *z[2], *chain[2];
+	int i, j, k, n[2], m[2];
+	char *flag;
+	// initialization
+	n[0] = b[0]->n; n[1] = b[1]->n;
+	z[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
+	z[1] = z[0] + n[0];
+	chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
+	for (k = j = 0; k < 2; ++k) {
+		for (i = 0; i < b[k]->n; ++i) {
+			bsw2hit_t *p = b[k]->hits + i;
+			hsaip_t *q = z[k] + i;
+			q->flag = k; q->idx = i;
+			q->tbeg = p->k; q->tend = p->k + p->len;
+			q->chain = -1;
+			q->qbeg = p->beg; q->qend = p->end;
+		}
+	}
+	// chaining
+	m[0] = chaining(opt, 0,    n[0], z[0], chain[0]);
+	chain[1] = chain[0] + m[0];
+	m[1] = chaining(opt, m[0], n[1], z[1], chain[1]);	
+	// change query coordinate on the reverse strand
+	for (k = 0; k < m[1]; ++k) {
+		hsaip_t *p = chain[1] + k;
+		int tmp = p->qbeg;
+		p->qbeg = len - p->qend; p->qend = len - tmp;
+	}
+	// filtering
+	flag = calloc(m[0] + m[1], 1);
+	ks_introsort(hsaip, m[0] + m[1], chain[0]);
+	for (k = 1; k < m[0] + m[1]; ++k) {
+		hsaip_t *p = chain[0] + k;
+		for (j = 0; j < k; ++j) {
+			hsaip_t *q = chain[0] + j;
+			if (flag[q->idx]) continue;
+			if (q->qend >= p->qend && q->chain > p->chain * opt->t_seeds * 2) {
+				flag[p->idx] = 1;
+				break;
+			}
+		}
+	}
+	for (k = 0; k < n[0] + n[1]; ++k) {
+		hsaip_t *p = z[0] + k;
+		if (flag[p->chain])
+			b[p->flag]->hits[p->idx].G = 0;
+	}
+	free(flag);
+	// squeeze out filtered elements in b[2]
+	for (k = 0; k < 2; ++k) {
+		for (j = i = 0; j < n[k]; ++j) {
+			bsw2hit_t *p = b[k]->hits + j;
+			if (p->G) {
+				if (i != j) b[k]->hits[i++] = *p;
+				else ++i;
+			}
+		}
+		b[k]->n = i;
+	}
+	// free
+	free(z[0]); free(chain[0]);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2_core.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,615 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/resource.h>
+#include <assert.h>
+#include "bwt_lite.h"
+#include "bwtsw2.h"
+#include "bwt.h"
+#include "kvec.h"
+
+typedef struct {
+	bwtint_t k, l;
+} qintv_t;
+
+#define qintv_eq(a, b) ((a).k == (b).k && (a).l == (b).l)
+#define qintv_hash(a) ((a).k>>7^(a).l<<17)
+
+#include "khash.h"
+KHASH_INIT(qintv, qintv_t, uint64_t, 1, qintv_hash, qintv_eq)
+KHASH_MAP_INIT_INT64(64, uint64_t)
+
+#define MINUS_INF -0x3fffffff
+#define MASK_LEVEL 0.90f
+
+struct __mempool_t;
+static void mp_destroy(struct __mempool_t*);
+typedef struct {
+	bwtint_t qk, ql;
+	int I, D, G;
+	uint32_t pj:2, qlen:30;
+	int tlen;
+	int ppos, upos;
+	int cpos[4];
+} bsw2cell_t;
+
+#include "ksort.h"
+KSORT_INIT_GENERIC(int)
+#define __hitG_lt(a, b) (((a).G + ((int)(a).n_seeds<<2)) > (b).G + ((int)(b).n_seeds<<2))
+KSORT_INIT(hitG, bsw2hit_t, __hitG_lt)
+
+static const bsw2cell_t g_default_cell = { 0, 0, MINUS_INF, MINUS_INF, MINUS_INF, 0, 0, 0, -1, -1, {-1, -1, -1, -1} };
+
+typedef struct {
+	int n, max;
+	uint32_t tk, tl; // this is fine
+	bsw2cell_t *array;
+} bsw2entry_t, *bsw2entry_p;
+
+/* --- BEGIN: Stack operations --- */
+typedef struct {
+	int n_pending;
+	kvec_t(bsw2entry_p) stack0, pending;
+	struct __mempool_t *pool;
+} bsw2stack_t;
+
+#define stack_isempty(s) (kv_size(s->stack0) == 0 && s->n_pending == 0)
+static void stack_destroy(bsw2stack_t *s) { mp_destroy(s->pool); kv_destroy(s->stack0); kv_destroy(s->pending); free(s); }
+inline static void stack_push0(bsw2stack_t *s, bsw2entry_p e) { kv_push(bsw2entry_p, s->stack0, e); }
+inline static bsw2entry_p stack_pop(bsw2stack_t *s)
+{
+	assert(!(kv_size(s->stack0) == 0 && s->n_pending != 0));
+	return kv_pop(s->stack0);
+}
+/* --- END: Stack operations --- */
+
+/* --- BEGIN: memory pool --- */
+typedef struct __mempool_t {
+	int cnt; // if cnt!=0, then there must be memory leak
+	kvec_t(bsw2entry_p) pool;
+} mempool_t;
+inline static bsw2entry_p mp_alloc(mempool_t *mp)
+{
+	++mp->cnt;
+	if (kv_size(mp->pool) == 0) return (bsw2entry_t*)calloc(1, sizeof(bsw2entry_t));
+	else return kv_pop(mp->pool);
+}
+inline static void mp_free(mempool_t *mp, bsw2entry_p e)
+{
+	--mp->cnt; e->n = 0;
+	kv_push(bsw2entry_p, mp->pool, e);
+}
+static void mp_destroy(struct __mempool_t *mp)
+{
+	int i;
+	for (i = 0; i != kv_size(mp->pool); ++i) {
+		free(kv_A(mp->pool, i)->array);
+		free(kv_A(mp->pool, i));
+	}
+	kv_destroy(mp->pool);
+	free(mp);
+}
+/* --- END: memory pool --- */
+
+/* --- BEGIN: utilities --- */
+static khash_t(64) *bsw2_connectivity(const bwtl_t *b)
+{
+	khash_t(64) *h;
+	uint32_t k, l, cntk[4], cntl[4]; // this is fine
+	uint64_t x;
+	khiter_t iter;
+	int j, ret;
+	kvec_t(uint64_t) stack;
+
+	kv_init(stack);
+	h = kh_init(64);
+	kh_resize(64, h, b->seq_len * 4);
+	x = b->seq_len;
+	kv_push(uint64_t, stack, x);
+	while (kv_size(stack)) {
+		x = kv_pop(stack);
+		k = x>>32; l = (uint32_t)x;
+		bwtl_2occ4(b, k-1, l, cntk, cntl);
+		for (j = 0; j != 4; ++j) {
+			k = b->L2[j] + cntk[j] + 1;
+			l = b->L2[j] + cntl[j];
+			if (k > l) continue;
+			x = (uint64_t)k << 32 | l;
+			iter = kh_put(64, h, x, &ret);
+			if (ret) { // if not present
+				kh_value(h, iter) = 1;
+				kv_push(uint64_t, stack, x);
+			} else ++kh_value(h, iter);
+		}
+	}
+	kv_destroy(stack);
+	//fprintf(stderr, "[bsw2_connectivity] %u nodes in the DAG\n", kh_size(h));
+	return h;
+}
+// pick up top T matches at a node
+static void cut_tail(bsw2entry_t *u, int T, bsw2entry_t *aux)
+{
+	int i, *a, n, x;
+	if (u->n <= T) return;
+	if (aux->max < u->n) {
+		aux->max = u->n;
+		aux->array = (bsw2cell_t*)realloc(aux->array, aux->max * sizeof(bsw2cell_t));
+	}
+	a = (int*)aux->array;
+	for (i = n = 0; i != u->n; ++i)
+		if (u->array[i].ql && u->array[i].G > 0)
+			a[n++] = -u->array[i].G;
+	if (n <= T) return;
+	x = -ks_ksmall(int, n, a, T);
+	n = 0;
+	for (i = 0; i < u->n; ++i) {
+		bsw2cell_t *p = u->array + i;
+		if (p->G == x) ++n;
+		if (p->G < x || (p->G == x && n >= T)) {
+			p->qk = p->ql = 0; p->G = 0;
+			if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -1;
+		}
+	}
+}
+// remove duplicated cells
+static inline void remove_duplicate(bsw2entry_t *u, khash_t(qintv) *hash)
+{
+	int i, ret, j;
+	khiter_t k;
+	qintv_t key;
+	kh_clear(qintv, hash);
+	for (i = 0; i != u->n; ++i) {
+		bsw2cell_t *p = u->array + i;
+		if (p->ql == 0) continue;
+		key.k = p->qk; key.l = p->ql;
+		k = kh_put(qintv, hash, key, &ret);
+		j = -1;
+		if (ret == 0) {
+			if ((uint32_t)kh_value(hash, k) >= p->G) j = i;
+			else {
+				j = kh_value(hash, k)>>32;
+				kh_value(hash, k) = (uint64_t)i<<32 | p->G;
+			}
+		} else kh_value(hash, k) = (uint64_t)i<<32 | p->G;
+		if (j >= 0) {
+			p = u->array + j;
+			p->qk = p->ql = 0; p->G = 0;
+			if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -3;
+		}
+	}
+}
+// merge two entries
+static void merge_entry(const bsw2opt_t * __restrict opt, bsw2entry_t *u, bsw2entry_t *v, bwtsw2_t *b)
+{
+	int i;
+	if (u->n + v->n >= u->max) {
+		u->max = u->n + v->n;
+		u->array = (bsw2cell_t*)realloc(u->array, u->max * sizeof(bsw2cell_t));
+	}
+	for (i = 0; i != v->n; ++i) {
+		bsw2cell_t *p = v->array + i;
+		if (p->ppos >= 0) p->ppos += u->n;
+		if (p->cpos[0] >= 0) p->cpos[0] += u->n;
+		if (p->cpos[1] >= 0) p->cpos[1] += u->n;
+		if (p->cpos[2] >= 0) p->cpos[2] += u->n;
+		if (p->cpos[3] >= 0) p->cpos[3] += u->n;
+	}
+	memcpy(u->array + u->n, v->array, v->n * sizeof(bsw2cell_t));
+	u->n += v->n;
+}
+
+static inline bsw2cell_t *push_array_p(bsw2entry_t *e)
+{
+	if (e->n == e->max) {
+		e->max = e->max? e->max<<1 : 256;
+		e->array = (bsw2cell_t*)realloc(e->array, sizeof(bsw2cell_t) * e->max);
+	}
+	return e->array + e->n;
+}
+
+static inline double time_elapse(const struct rusage *curr, const struct rusage *last)
+{
+	long t1 = (curr->ru_utime.tv_sec - last->ru_utime.tv_sec) + (curr->ru_stime.tv_sec - last->ru_stime.tv_sec);
+	long t2 = (curr->ru_utime.tv_usec - last->ru_utime.tv_usec) + (curr->ru_stime.tv_usec - last->ru_stime.tv_usec);
+	return (double)t1 + t2 * 1e-6;
+}
+/* --- END: utilities --- */
+
+/* --- BEGIN: processing partial hits --- */
+static void save_hits(const bwtl_t *bwt, int thres, bsw2hit_t *hits, bsw2entry_t *u)
+{
+	int i;
+	uint32_t k; // this is fine
+	for (i = 0; i < u->n; ++i) {
+		bsw2cell_t *p = u->array + i;
+		if (p->G < thres) continue;
+		for (k = u->tk; k <= u->tl; ++k) {
+			int beg, end;
+			bsw2hit_t *q = 0;
+			beg = bwt->sa[k]; end = beg + p->tlen;
+			if (p->G > hits[beg*2].G) {
+				hits[beg*2+1] = hits[beg*2];
+				q = hits + beg * 2;
+			} else if (p->G > hits[beg*2+1].G) q = hits + beg * 2 + 1;
+			if (q) {
+				q->k = p->qk; q->l = p->ql; q->len = p->qlen; q->G = p->G;
+				q->beg = beg; q->end = end; q->G2 = q->k == q->l? 0 : q->G;
+				q->flag = q->n_seeds = 0;
+			}
+		}
+	}
+}
+/* "narrow hits" are node-to-node hits that have a high score and
+ * are not so repetitive (|SA interval|<=IS). */
+static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, int t, int IS)
+{
+	int i;
+	for (i = 0; i < u->n; ++i) {
+		bsw2hit_t *q;
+		bsw2cell_t *p = u->array + i;
+		if (p->G >= t && p->ql - p->qk + 1 <= IS) { // good narrow hit
+			if (b1->max == b1->n) {
+				b1->max = b1->max? b1->max<<1 : 4;
+				b1->hits = realloc(b1->hits, b1->max * sizeof(bsw2hit_t));
+			}
+			q = &b1->hits[b1->n++];
+			q->k = p->qk; q->l = p->ql;
+			q->len = p->qlen;
+			q->G = p->G; q->G2 = 0;
+			q->beg = bwtl->sa[u->tk]; q->end = q->beg + p->tlen;
+			q->flag = 0;
+			// delete p
+			p->qk = p->ql = 0; p->G = 0;
+			if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -3;
+		}
+	}
+}
+/* after this, "narrow SA hits" will be expanded and the coordinates
+ * will be obtained and stored in b->hits[*].k. */
+int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS)
+{
+	int i, j, n, is_rev;
+	if (b->n == 0) return 0;
+	if (bwt && bns) { // convert to chromosomal coordinates if requested
+		int old_n = b->n;
+		bsw2hit_t *old_hits = b->hits;
+		for (i = n = 0; i < b->n; ++i) { // compute the memory to allocated
+			bsw2hit_t *p = old_hits + i;
+			if (p->l - p->k + 1 <= IS) n += p->l - p->k + 1;
+			else if (p->G > 0) ++n;
+		}
+		b->n = b->max = n;
+		b->hits = calloc(b->max, sizeof(bsw2hit_t));
+		for (i = j = 0; i < old_n; ++i) {
+			bsw2hit_t *p = old_hits + i;
+			if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive
+				bwtint_t k;
+				if (p->G == 0 && p->k == 0 && p->l == 0 && p->len == 0) continue;
+				for (k = p->k; k <= p->l; ++k) {
+					b->hits[j] = *p;
+					b->hits[j].k = bns_depos(bns, bwt_sa(bwt, k), &is_rev);
+					b->hits[j].l = 0;
+					b->hits[j].is_rev = is_rev;
+					if (is_rev) b->hits[j].k -= p->len - 1;
+					++j;
+				}
+			} else if (p->G > 0) {
+				b->hits[j] = *p;
+				b->hits[j].k = bns_depos(bns, bwt_sa(bwt, p->k), &is_rev);
+				b->hits[j].l = 0;
+				b->hits[j].flag |= 1;
+				b->hits[j].is_rev = is_rev;
+				if (is_rev) b->hits[j].k -= p->len - 1;
+				++j;
+			}
+		}
+		free(old_hits);
+	}
+	for (i = j = 0; i < b->n; ++i) // squeeze out empty elements
+		if (b->hits[i].G) b->hits[j++] = b->hits[i];
+	b->n = j;
+	ks_introsort(hitG, b->n, b->hits);
+	for (i = 1; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		for (j = 0; j < i; ++j) {
+			bsw2hit_t *q = b->hits + j;
+			int compatible = 1;
+			if (p->is_rev != q->is_rev) continue; // hits from opposite strands are not duplicates
+			if (p->l == 0 && q->l == 0) {
+				int qol = (p->end < q->end? p->end : q->end) - (p->beg > q->beg? p->beg : q->beg); // length of query overlap
+				if (qol < 0) qol = 0;
+				if ((float)qol / (p->end - p->beg) > MASK_LEVEL || (float)qol / (q->end - q->beg) > MASK_LEVEL) {
+					int64_t tol = (int64_t)(p->k + p->len < q->k + q->len? p->k + p->len : q->k + q->len)
+						- (int64_t)(p->k > q->k? p->k : q->k); // length of target overlap
+					if ((double)tol / p->len > MASK_LEVEL || (double)tol / q->len > MASK_LEVEL)
+						compatible = 0;
+				}
+			}
+			if (!compatible) {
+				p->G = 0;
+				if (q->G2 < p->G2) q->G2 = p->G2;
+				break;
+			}
+		}
+	}
+	n = i;
+	for (i = j = 0; i < n; ++i) {
+		if (b->hits[i].G == 0) continue;
+		if (i != j) b->hits[j++] = b->hits[i];
+		else ++j;
+	}
+	b->n = j;
+	return b->n;
+}
+
+int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level)
+{
+	int i, j, n;
+	if (b->n == 0) return 0;
+	ks_introsort(hitG, b->n, b->hits);
+	{ // choose a random one
+		int G0 = b->hits[0].G;
+		for (i = 1; i < b->n; ++i)
+			if (b->hits[i].G != G0) break;
+		j = (int)(i * drand48());
+		if (j) {
+			bsw2hit_t tmp;
+			tmp = b->hits[0]; b->hits[0] = b->hits[j]; b->hits[j] = tmp;
+		}
+	}
+	for (i = 1; i < b->n; ++i) {
+		bsw2hit_t *p = b->hits + i;
+		int all_compatible = 1;
+		if (p->G == 0) break;
+		for (j = 0; j < i; ++j) {
+			bsw2hit_t *q = b->hits + j;
+			int64_t tol = 0;
+			int qol, compatible = 0;
+			float fol;
+			if (q->G == 0) continue;
+			qol = (p->end < q->end? p->end : q->end) - (p->beg > q->beg? p->beg : q->beg);
+			if (qol < 0) qol = 0;
+			if (p->l == 0 && q->l == 0) {
+				tol = (int64_t)(p->k + p->len < q->k + q->len? p->k + p->len : q->k + q->len)
+					- (p->k > q->k? p->k : q->k);
+				if (tol < 0) tol = 0;
+			}
+			fol = (float)qol / (p->end - p->beg < q->end - q->beg? p->end - p->beg : q->end - q->beg);
+			if (fol < mask_level || (tol > 0 && qol < p->end - p->beg && qol < q->end - q->beg)) compatible = 1;
+			if (!compatible) {
+				if (q->G2 < p->G) q->G2 = p->G;
+				all_compatible = 0;
+			}
+		}
+		if (!all_compatible) p->G = 0;
+	}
+	n = i;
+	for (i = j = 0; i < n; ++i) {
+		if (b->hits[i].G == 0) continue;
+		if (i != j) b->hits[j++] = b->hits[i];
+		else ++j;
+	}
+	b->n = j;
+	return j;
+}
+/* --- END: processing partial hits --- */
+
+/* --- BEGIN: global mem pool --- */
+bsw2global_t *bsw2_global_init()
+{
+	bsw2global_t *pool;
+	bsw2stack_t *stack;
+	pool = calloc(1, sizeof(bsw2global_t));
+	stack = calloc(1, sizeof(bsw2stack_t));
+	stack->pool = (mempool_t*)calloc(1, sizeof(mempool_t));
+	pool->stack = (void*)stack;
+	return pool;
+}
+
+void bsw2_global_destroy(bsw2global_t *pool)
+{
+	stack_destroy((bsw2stack_t*)pool->stack);
+	free(pool->aln_mem);
+	free(pool);
+}
+/* --- END: global mem pool --- */
+
+static inline int fill_cell(const bsw2opt_t *o, int match_score, bsw2cell_t *c[4])
+{
+	int G = c[3]? c[3]->G + match_score : MINUS_INF;
+	if (c[1]) {
+		c[0]->I = c[1]->I > c[1]->G - o->q? c[1]->I - o->r : c[1]->G - o->qr;
+		if (c[0]->I > G) G = c[0]->I;
+	} else c[0]->I = MINUS_INF;
+	if (c[2]) {
+		c[0]->D = c[2]->D > c[2]->G - o->q? c[2]->D - o->r : c[2]->G - o->qr;
+		if (c[0]->D > G) G = c[0]->D;
+	} else c[0]->D = MINUS_INF;
+	return(c[0]->G = G);
+}
+
+static void init_bwtsw2(const bwtl_t *target, const bwt_t *query, bsw2stack_t *s)
+{
+	bsw2entry_t *u;
+	bsw2cell_t *x;
+
+	u = mp_alloc(s->pool);
+	u->tk = 0; u->tl = target->seq_len;
+	x = push_array_p(u);
+	*x = g_default_cell;
+	x->G = 0; x->qk = 0; x->ql = query->seq_len;
+	u->n++;
+	stack_push0(s, u);
+}
+/* On return, ret[1] keeps not-so-repetitive hits (narrow SA hits); ret[0] keeps all hits (right?) */
+bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool)
+{
+	bsw2stack_t *stack = (bsw2stack_t*)pool->stack;
+	bwtsw2_t *b, *b1, **b_ret;
+	int i, j, score_mat[16], *heap, heap_size, n_tot = 0;
+	struct rusage curr, last;
+	khash_t(qintv) *rhash;
+	khash_t(64) *chash;
+
+	// initialize connectivity hash (chash)
+	chash = bsw2_connectivity(target);
+	// calculate score matrix
+	for (i = 0; i != 4; ++i)
+		for (j = 0; j != 4; ++j)
+			score_mat[i<<2|j] = (i == j)? opt->a : -opt->b;
+	// initialize other variables
+	rhash = kh_init(qintv);
+	init_bwtsw2(target, query, stack);
+	heap_size = opt->z;
+	heap = calloc(heap_size, sizeof(int));
+	// initialize the return struct
+	b = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t));
+	b->n = b->max = target->seq_len * 2;
+	b->hits = calloc(b->max, sizeof(bsw2hit_t));
+	b1 = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t));
+	b_ret = calloc(2, sizeof(void*));
+	b_ret[0] = b; b_ret[1] = b1;
+	// initialize timer
+	getrusage(0, &last);
+	// the main loop: traversal of the DAG
+	while (!stack_isempty(stack)) {
+		int old_n, tj;
+		bsw2entry_t *v;
+		uint32_t tcntk[4], tcntl[4];
+		bwtint_t k, l;
+
+		v = stack_pop(stack); old_n = v->n;
+		n_tot += v->n;
+
+		for (i = 0; i < v->n; ++i) { // test max depth and band width
+			bsw2cell_t *p = v->array + i;
+			if (p->ql == 0) continue;
+			if (p->tlen - (int)p->qlen > opt->bw || (int)p->qlen - p->tlen > opt->bw) {
+				p->qk = p->ql = 0;
+				if (p->ppos >= 0) v->array[p->ppos].cpos[p->pj] = -5;
+			}
+		}
+
+		// get Occ for the DAG
+		bwtl_2occ4(target, v->tk - 1, v->tl, tcntk, tcntl);
+		for (tj = 0; tj != 4; ++tj) { // descend to the children
+			bwtint_t qcntk[4], qcntl[4];
+			int qj, *curr_score_mat = score_mat + tj * 4;
+			khiter_t iter;
+			bsw2entry_t *u;
+
+			k = target->L2[tj] + tcntk[tj] + 1;
+			l = target->L2[tj] + tcntl[tj];
+			if (k > l) continue;
+			// update counter
+			iter = kh_get(64, chash, (uint64_t)k<<32 | l);
+			--kh_value(chash, iter);
+			// initialization
+			u = mp_alloc(stack->pool);
+			u->tk = k; u->tl = l;
+			memset(heap, 0, sizeof(int) * opt->z);
+			// loop through all the nodes in v
+		    for (i = 0; i < v->n; ++i) {
+				bsw2cell_t *p = v->array + i, *x, *c[4]; // c[0]=>current, c[1]=>I, c[2]=>D, c[3]=>G
+				int is_added = 0;
+				if (p->ql == 0) continue; // deleted node
+				c[0] = x = push_array_p(u);
+				x->G = MINUS_INF;
+				p->upos = x->upos = -1;
+				if (p->ppos >= 0) { // parent has been visited
+					c[1] = (v->array[p->ppos].upos >= 0)? u->array + v->array[p->ppos].upos : 0;
+					c[3] = v->array + p->ppos; c[2] = p;
+					if (fill_cell(opt, curr_score_mat[p->pj], c) > 0) { // then update topology at p and x
+						x->ppos = v->array[p->ppos].upos; // the parent pos in u
+						p->upos = u->n++; // the current pos in u
+						if (x->ppos >= 0) u->array[x->ppos].cpos[p->pj] = p->upos; // the child pos of its parent in u
+						is_added = 1;
+					}
+				} else {
+					x->D = p->D > p->G - opt->q? p->D - opt->r : p->G - opt->qr;
+					if (x->D > 0) {
+						x->G = x->D;
+						x->I = MINUS_INF; x->ppos = -1;
+						p->upos = u->n++;
+						is_added = 1;
+					}
+				}
+				if (is_added) { // x has been added to u->array. fill the remaining variables
+					x->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1;
+					x->pj = p->pj; x->qk = p->qk; x->ql = p->ql; x->qlen = p->qlen; x->tlen = p->tlen + 1;
+					if (x->G > -heap[0]) {
+						heap[0] = -x->G;
+						ks_heapadjust(int, 0, heap_size, heap);
+					}
+				}
+				if ((x->G > opt->qr && x->G >= -heap[0]) || i < old_n) { // good node in u, or in v
+					if (p->cpos[0] == -1 || p->cpos[1] == -1 || p->cpos[2] == -1 || p->cpos[3] == -1) {
+						bwt_2occ4(query, p->qk - 1, p->ql, qcntk, qcntl);
+						for (qj = 0; qj != 4; ++qj) { // descend to the prefix trie
+							if (p->cpos[qj] != -1) continue; // this node will be visited later
+							k = query->L2[qj] + qcntk[qj] + 1;
+							l = query->L2[qj] + qcntl[qj];
+							if (k > l) { p->cpos[qj] = -2; continue; }
+							x = push_array_p(v);
+							p = v->array + i; // p may not point to the correct position after realloc
+							x->G = x->I = x->D = MINUS_INF;
+							x->qk = k; x->ql = l; x->pj = qj; x->qlen = p->qlen + 1; x->ppos = i; x->tlen = p->tlen;
+							x->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1;
+							p->cpos[qj] = v->n++;
+						} // ~for(qj)
+					} // ~if(p->cpos[])
+				} // ~if
+			} // ~for(i)
+			if (u->n) save_hits(target, opt->t, b->hits, u);
+			{ // push u to the stack (or to the pending array)
+				uint32_t cnt, pos;
+				cnt = (uint32_t)kh_value(chash, iter);
+				pos = kh_value(chash, iter)>>32;
+				if (pos) { // something in the pending array, then merge
+					bsw2entry_t *w = kv_A(stack->pending, pos-1);
+					if (u->n) {
+						if (w->n < u->n) { // swap
+							w = u; u = kv_A(stack->pending, pos-1); kv_A(stack->pending, pos-1) = w;
+						}
+						merge_entry(opt, w, u, b);
+					}
+					if (cnt == 0) { // move from pending to stack0
+						remove_duplicate(w, rhash);
+						save_narrow_hits(target, w, b1, opt->t, opt->is);
+						cut_tail(w, opt->z, u);
+						stack_push0(stack, w);
+						kv_A(stack->pending, pos-1) = 0;
+						--stack->n_pending;
+					}
+					mp_free(stack->pool, u);
+				} else if (cnt) { // the first time
+					if (u->n) { // push to the pending queue
+						++stack->n_pending;
+						kv_push(bsw2entry_p, stack->pending, u);
+						kh_value(chash, iter) = (uint64_t)kv_size(stack->pending)<<32 | cnt;
+					} else mp_free(stack->pool, u);
+				} else { // cnt == 0, then push to the stack
+					bsw2entry_t *w = mp_alloc(stack->pool);
+					save_narrow_hits(target, u, b1, opt->t, opt->is);
+					cut_tail(u, opt->z, w);
+					mp_free(stack->pool, w);
+					stack_push0(stack, u);
+				}
+			}
+		} // ~for(tj)
+		mp_free(stack->pool, v);
+	} // while(top)
+	getrusage(0, &curr);
+	for (i = 0; i < 2; ++i)
+		for (j = 0; j < b_ret[i]->n; ++j)
+			b_ret[i]->hits[j].n_seeds = 0;
+	bsw2_resolve_duphits(bns, query, b, opt->is);
+	bsw2_resolve_duphits(bns, query, b1, opt->is);
+	//fprintf(stderr, "stats: %.3lf sec; %d elems\n", time_elapse(&curr, &last), n_tot);
+	// free
+	free(heap);
+	kh_destroy(qintv, rhash);
+	kh_destroy(64, chash);
+	stack->pending.n = stack->stack0.n = 0;
+	return b_ret;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2_main.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,95 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include "bwt.h"
+#include "bwtsw2.h"
+#include "utils.h"
+
+int bwa_bwtsw2(int argc, char *argv[])
+{
+	extern char *bwa_infer_prefix(const char *hint);
+	bsw2opt_t *opt;
+	bwt_t *target;
+	char buf[1024], *prefix;
+	bntseq_t *bns;
+	int c;
+
+	opt = bsw2_init_opt();
+	srand48(11);
+	while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
+		switch (c) {
+		case 'q': opt->q = atoi(optarg); break;
+		case 'r': opt->r = atoi(optarg); break;
+		case 'a': opt->a = atoi(optarg); break;
+		case 'b': opt->b = atoi(optarg); break;
+		case 'w': opt->bw = atoi(optarg); break;
+		case 'T': opt->t = atoi(optarg); break;
+		case 't': opt->n_threads = atoi(optarg); break;
+		case 'z': opt->z = atoi(optarg); break;
+		case 's': opt->is = atoi(optarg); break;
+		case 'm': opt->mask_level = atof(optarg); break;
+		case 'c': opt->coef = atof(optarg); break;
+		case 'N': opt->t_seeds = atoi(optarg); break;
+		case 'M': opt->multi_2nd = 1; break;
+		case 'H': opt->hard_clip = 1; break;
+		case 'f': xreopen(optarg, "w", stdout); break;
+		case 'I': opt->max_ins = atoi(optarg); break;
+		case 'S': opt->skip_sw = 1; break;
+		}
+	}
+	opt->qr = opt->q + opt->r;
+
+	if (optind + 2 > argc) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Usage:   bwa bwasw [options] <target.prefix> <query.fa> [query2.fa]\n\n");
+		fprintf(stderr, "Options: -a INT   score for a match [%d]\n", opt->a);
+		fprintf(stderr, "         -b INT   mismatch penalty [%d]\n", opt->b);
+		fprintf(stderr, "         -q INT   gap open penalty [%d]\n", opt->q);
+		fprintf(stderr, "         -r INT   gap extension penalty [%d]\n", opt->r);
+		fprintf(stderr, "         -w INT   band width [%d]\n", opt->bw);
+		fprintf(stderr, "         -m FLOAT mask level [%.2f]\n", opt->mask_level);
+		fprintf(stderr, "\n");
+		fprintf(stderr, "         -t INT   number of threads [%d]\n", opt->n_threads);
+		fprintf(stderr, "         -f FILE  file to output results to instead of stdout\n");
+		fprintf(stderr, "         -H       in SAM output, use hard clipping instead of soft clipping\n");
+		fprintf(stderr, "         -M       mark multi-part alignments as secondary\n");
+		fprintf(stderr, "         -S       skip Smith-Waterman read pairing\n");
+		fprintf(stderr, "         -I INT   ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
+		fprintf(stderr, "\n");
+		fprintf(stderr, "         -T INT   score threshold divided by a [%d]\n", opt->t);
+		fprintf(stderr, "         -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
+		fprintf(stderr, "         -z INT   Z-best [%d]\n", opt->z);
+		fprintf(stderr, "         -s INT   maximum seeding interval size [%d]\n", opt->is);
+		fprintf(stderr, "         -N INT   # seeds to trigger reverse alignment [%d]\n", opt->t_seeds);
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Note: For long Illumina, 454 and Sanger reads, assembly contigs, fosmids and\n");
+		fprintf(stderr, "      BACs, the default setting usually works well. For the current PacBio\n");
+		fprintf(stderr, "      reads (end of 2010), '-b5 -q2 -r1 -z10' is recommended. One may also\n");
+		fprintf(stderr, "      increase '-z' for better sensitivity.\n");
+		fprintf(stderr, "\n");
+
+		return 1;
+	}
+
+	// adjust opt for opt->a
+	opt->t *= opt->a;
+	opt->coef *= opt->a;
+
+	if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
+		fprintf(stderr, "[%s] fail to locate the index\n", __func__);
+		return 0;
+	}
+	strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt"));
+	strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target);
+	bns = bns_restore(prefix);
+
+	bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0);
+
+	bns_destroy(bns);
+	bwt_destroy(target);
+	free(opt); free(prefix);
+	
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/bwtsw2_pair.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,291 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "bwt.h"
+#include "bntseq.h"
+#include "bwtsw2.h"
+#include "kstring.h"
+#ifndef _NO_SSE2
+#include "ksw.h"
+#else
+#include "stdaln.h"
+#endif
+
+#define MIN_RATIO     0.8
+#define OUTLIER_BOUND 2.0
+#define MAX_STDDEV    4.0
+#define EXT_STDDEV    4.0
+
+typedef struct {
+	int low, high, failed;
+	double avg, std;
+} bsw2pestat_t;
+
+bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
+{
+	extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
+	int i, k, x, p25, p50, p75, tmp, max_len = 0;
+	uint64_t *isize;
+	bsw2pestat_t r;
+
+	memset(&r, 0, sizeof(bsw2pestat_t));
+	isize = calloc(n, 8);
+	for (i = k = 0; i < n; i += 2) {
+		bsw2hit_t *t[2];
+		int l;
+		if (buf[i] == 0 || buf[i]->n != 1 || buf[i+1]->n != 1) continue; // more than 1 hits
+		t[0] = &buf[i]->hits[0]; t[1] = &buf[i+1]->hits[0];
+		if (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough
+		if (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough
+		l = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;
+		if (l >= max_ins) continue; // skip pairs with excessively large insert
+		max_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;
+		max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
+		isize[k++] = l;
+	}
+	ks_introsort_uint64_t(k, isize);
+	p25 = isize[(int)(.25 * k + .499)];
+	p50 = isize[(int)(.50 * k + .499)];
+	p75 = isize[(int)(.75 * k + .499)];
+	ksprintf(msg, "[%s] infer the insert size distribution from %d high-quality pairs.\n", __func__, k);
+	if (k < 8) {
+		ksprintf(msg, "[%s] fail to infer the insert size distribution.\n", __func__);
+		free(isize);
+		r.failed = 1;
+		return r;
+	}
+	tmp    = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
+	r.low  = tmp > max_len? tmp : max_len;
+	if (r.low < 1) r.low = 1;
+	r.high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
+	ksprintf(msg, "[%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75);
+	ksprintf(msg, "[%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r.low, r.high);
+	for (i = x = 0, r.avg = 0; i < k; ++i)
+		if (isize[i] >= r.low && isize[i] <= r.high)
+			r.avg += isize[i], ++x;
+	r.avg /= x;
+	for (i = 0, r.std = 0; i < k; ++i)
+		if (isize[i] >= r.low && isize[i] <= r.high)
+			r.std += (isize[i] - r.avg) * (isize[i] - r.avg);
+	r.std = sqrt(r.std / x);
+	ksprintf(msg, "[%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r.avg, r.std);
+	tmp  = (int)(p25 - 3. * (p75 - p25) + .499);
+	r.low  = tmp > max_len? tmp : max_len;
+	if (r.low < 1) r.low = 1;
+	r.high = (int)(p75 + 3. * (p75 - p25) + .499);
+	if (r.low > r.avg - MAX_STDDEV * 4.) r.low = (int)(r.avg - MAX_STDDEV * 4. + .499);
+	r.low = tmp > max_len? tmp : max_len;
+	if (r.high < r.avg - MAX_STDDEV * 4.) r.high = (int)(r.avg + MAX_STDDEV * 4. + .499);
+	ksprintf(msg, "[%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r.low, r.high);
+	free(isize);
+	return r;
+}
+
+typedef struct {
+	int n_cigar, beg, end, len;
+	int64_t pos;
+	uint32_t *cigar;
+} pairaux_t;
+
+extern unsigned char nst_nt4_table[256];
+
+void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const bsw2pestat_t *st, const bsw2hit_t *h, int l_mseq, const char *mseq, bsw2hit_t *a, int8_t g_mat[25])
+{
+	extern void seq_reverse(int len, ubyte_t *seq, int is_comp);
+	int64_t k, beg, end;
+	uint8_t *seq, *ref;
+	int i;
+	// compute the region start and end
+	a->n_seeds = 1; a->flag |= BSW2_FLAG_MATESW; // before calling this routine, *a has been cleared with memset(0); the flag is set with 1<<6/7
+	if (h->is_rev == 0) {
+		beg = (int64_t)(h->k + st->avg - EXT_STDDEV * st->std - l_mseq + .499);
+		if (beg < h->k) beg = h->k;
+		end = (int64_t)(h->k + st->avg + EXT_STDDEV * st->std + .499);
+		a->is_rev = 1; a->flag |= 16;
+	} else {
+		beg = (int64_t)(h->k + h->end - h->beg - st->avg - EXT_STDDEV * st->std + .499);
+		end = (int64_t)(h->k + h->end - h->beg - st->avg + EXT_STDDEV * st->std + l_mseq + .499);
+		if (end > h->k + (h->end - h->beg)) end = h->k + (h->end - h->beg);
+		a->is_rev = 0;
+	}
+	if (beg < 1) beg = 1;
+	if (end > l_pac) end = l_pac;
+	if (end - beg < l_mseq) return;
+	// generate the sequence
+	seq = malloc(l_mseq + (end - beg));
+	ref = seq + l_mseq;
+	for (k = beg; k < end; ++k)
+		ref[k - beg] = pac[k>>2] >> ((~k&3)<<1) & 0x3;
+	if (h->is_rev == 0) {
+		for (i = 0; i < l_mseq; ++i) { // on the reverse strand
+			int c = nst_nt4_table[(int)mseq[i]];
+			seq[l_mseq - 1 - i] = c > 3? 4 : 3 - c;
+		}
+	} else {
+		for (i = 0; i < l_mseq; ++i) // on the forward strand
+			seq[i] = nst_nt4_table[(int)mseq[i]];
+	}
+#ifndef _NO_SSE2
+	{
+		ksw_query_t *q;
+		ksw_aux_t aux[2];
+		// forward Smith-Waterman
+		aux[0].T = opt->t; aux[0].gapo = opt->q; aux[0].gape = opt->r; aux[1] = aux[0];
+		q = ksw_qinit(l_mseq * g_mat[0] < 250? 1 : 2, l_mseq, seq, 5, g_mat);
+		ksw_sse2(q, end - beg, ref, &aux[0]);
+		free(q);
+		if (aux[0].score < opt->t) {
+			free(seq);
+			return;
+		}
+		++aux[0].qe; ++aux[0].te;
+		// reverse Smith-Waterman
+		seq_reverse(aux[0].qe, seq, 0);
+		seq_reverse(aux[0].te, ref, 0);
+		q = ksw_qinit(aux[0].qe * g_mat[0] < 250? 1 : 2, aux[0].qe, seq, 5, g_mat);
+		ksw_sse2(q, aux[0].te, ref, &aux[1]);
+		free(q);
+		++aux[1].qe; ++aux[1].te;
+		// write output
+		a->G = aux[0].score;
+		a->G2 = aux[0].score2 > aux[1].score2? aux[0].score2 : aux[1].score2;
+		if (a->G2 < opt->t) a->G2 = 0;
+		if (a->G2) a->flag |= BSW2_FLAG_TANDEM;
+		a->k = beg + (aux[0].te - aux[1].te);
+		a->len = aux[1].te;
+		a->beg = aux[0].qe - aux[1].qe;
+		a->end = aux[0].qe;
+	}
+#else
+	{
+		AlnParam ap;
+		path_t path[2];
+		int matrix[25];
+		for (i = 0; i < 25; ++i) matrix[i] = g_mat[i];
+		ap.gap_open = opt->q; ap.gap_ext = opt->r; ap.gap_end = opt->r;
+		ap.matrix = matrix; ap.row = 5; ap.band_width = 50;
+		a->G = aln_local_core(ref, end - beg, seq, l_mseq, &ap, path, 0, opt->t, &a->G2);
+		if (a->G < opt->t) a->G = 0;
+		if (a->G2 < opt->t) a->G2 = 0;
+		a->k = beg + path[0].i - 1;
+		a->len = path[1].i - path[0].i + 1;
+		a->beg = path[0].j - 1;
+		a->end = path[1].j;
+	}
+#endif
+	if (a->is_rev) i = a->beg, a->beg = l_mseq - a->end, a->end = l_mseq - i;
+	free(seq);
+}
+
+void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, bsw2seq1_t *seq, bwtsw2_t **hits)
+{
+	extern int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS);
+	bsw2pestat_t pes;
+	int i, j, k, n_rescued = 0, n_moved = 0, n_fixed = 0;
+	int8_t g_mat[25];
+	kstring_t msg;
+	memset(&msg, 0, sizeof(kstring_t));
+	pes = bsw2_stat(n, hits, &msg, opt->max_ins);
+	for (i = k = 0; i < 5; ++i) {
+		for (j = 0; j < 4; ++j)
+			g_mat[k++] = i == j? opt->a : -opt->b;
+		g_mat[k++] = 0;
+	}
+	for (i = 0; i < n; i += 2) {
+		bsw2hit_t a[2];
+		memset(&a, 0, sizeof(bsw2hit_t) * 2);
+		a[0].flag = 1<<6; a[1].flag = 1<<7;
+		for (j = 0; j < 2; ++j) { // set the read1/2 flag
+			if (hits[i+j] == 0) continue;
+			for (k = 0; k < hits[i+j]->n; ++k) {
+				bsw2hit_t *p = &hits[i+j]->hits[k];
+				p->flag |= 1<<(6+j);
+			}
+		}
+		if (pes.failed) continue;
+		if (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N
+		if (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit
+		if (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit
+		if (!opt->skip_sw) {
+			if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
+			if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
+		} // else a[0].G == a[1].G == a[0].G2 == a[1].G2 == 0
+		// the following enumerate all possibilities. It is tedious but necessary...
+		if (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;
+			bwtsw2_t *p[2];
+			int which;
+			if (hits[i]->n == 1) p[0] = hits[i], p[1] = hits[i+1], which = 1;
+			else p[0] = hits[i+1], p[1] = hits[i], which = 0;
+			if (a[which].G == 0) continue;
+			a[which].flag |= BSW2_FLAG_RESCUED;
+			if (p[1]->max == 0) {
+				p[1]->max = 1;
+				p[1]->hits = malloc(sizeof(bsw2hit_t));
+			}
+			p[1]->hits[0] = a[which];
+			p[1]->n = 1;
+			p[0]->hits[0].flag |= 2;
+			p[1]->hits[0].flag |= 2;
+			++n_rescued;
+		} else { // then both ends mapped
+			int is_fixed = 0;
+			//fprintf(stderr, "%d; %lld,%lld; %d,%d\n", a[0].is_rev, hits[i]->hits[0].k, a[0].k, hits[i]->hits[0].end, a[0].end);
+			for (j = 0; j < 2; ++j) { // fix wrong mappings and wrong suboptimal alignment score
+				bsw2hit_t *p = &hits[i+j]->hits[0];
+				if (p->G < a[j].G) { // the orginal mapping is suboptimal
+					a[j].G2 = a[j].G2 > p->G? a[j].G2 : p->G; // FIXME: reset BSW2_FLAG_TANDEM?
+					*p = a[j];
+					++n_fixed;
+					is_fixed = 1;
+				} else if (p->k != a[j].k && p->G2 < a[j].G) {
+					p->G2 = a[j].G;
+				} else if (p->k == a[j].k && p->G2 < a[j].G2) {
+					p->G2 = a[j].G2;
+				}
+			}
+			if (hits[i]->hits[0].k == a[0].k && hits[i+1]->hits[0].k == a[1].k) { // properly paired and no ends need to be moved
+				for (j = 0; j < 2; ++j)
+					hits[i+j]->hits[0].flag |= 2 | (a[j].flag & BSW2_FLAG_TANDEM);
+			} else if (hits[i]->hits[0].k == a[0].k || hits[i+1]->hits[0].k == a[1].k) { // a tandem match
+				for (j = 0; j < 2; ++j) {
+					hits[i+j]->hits[0].flag |= 2;
+					if (hits[i+j]->hits[0].k != a[j].k)
+						hits[i+j]->hits[0].flag |= BSW2_FLAG_TANDEM;
+				}
+			} else if (!is_fixed && (a[0].G || a[1].G)) { // it is possible to move one end
+				if (a[0].G && a[1].G) { // now we have two "proper pairs"
+					int G[2];
+					double diff;
+					G[0] = hits[i]->hits[0].G + a[1].G;
+					G[1] = hits[i+1]->hits[0].G + a[0].G;
+					diff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);
+					if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0;
+				}
+				if (a[0].G == 0 || a[1].G == 0) { // one proper pair only
+					bsw2hit_t *p[2]; // p[0] points the unchanged hit; p[1] to the hit to be moved
+					int which, isize;
+					double dev, diff;
+					if (a[0].G) p[0] = &hits[i+1]->hits[0], p[1] = &hits[i]->hits[0], which = 0;
+					else p[0] = &hits[i]->hits[0], p[1] = &hits[i+1]->hits[0], which = 1;
+					isize = p[0]->is_rev? p[0]->k + p[0]->len - a[which].k : a[which].k + a[which].len - p[0]->k;
+					dev = fabs(isize - pes.avg) / pes.std;
+					diff = (double)(p[1]->G - a[which].G) / (opt->a + opt->b) / (p[1]->end - p[1]->beg) * 100.0;
+					if (diff < dev * 2.) { // then move (heuristic)
+						a[which].G2 = a[which].G;
+						p[1][0] = a[which];
+						p[1]->flag |= BSW2_FLAG_MOVED | 2;
+						p[0]->flag |= 2;
+						++n_moved;
+					}
+				}
+			} else if (is_fixed) {
+				hits[i+0]->hits[0].flag |= 2;
+				hits[i+1]->hits[0].flag |= 2;
+			}
+		}
+	}
+	ksprintf(&msg, "[%s] #fixed=%d, #rescued=%d, #moved=%d\n", __func__, n_fixed, n_rescued, n_moved);
+	fputs(msg.s, stderr);
+	free(msg.s);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/cs2nt.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,191 @@
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "bwtaln.h"
+#include "stdaln.h"
+
+/*
+  Here is a delicate example. ref_nt=ATTAAC(RBRBG), read_cs=RBBOG. If we
+  decode as ATTGAC(RBGOG), there are one color change and one nt change;
+  if we decode as ATTAAC(RBRBG), there are two color changes.
+
+  In DP, if color quality is smaller than COLOR_MM, we will use COLOR_MM
+  as the penalty; otherwise, we will use color quality as the
+  penalty. This means we always prefer two consistent color changes over
+  a nt change, but if a color has high quality, we may prefer one nt
+  change.
+
+  In the above example, the penalties of the two types of decoding are
+  q(B)+25 and q(B)+q(O), respectively. If q(O)>25, we prefer the first;
+  otherwise the second. Note that no matter what we choose, the fourth
+  base will get a low nt quality.
+ */
+
+#define COLOR_MM 19
+#define NUCL_MM  25
+
+static const int nst_ntnt2cs_table[] = { 4, 0, 0, 1, 0, 2, 3, 4, 0, 3, 2, 4, 1, 4, 4, 4 };
+
+/*
+  {A,C,G,T,N} -> {0,1,2,3,4}
+  nt_ref[0..size]: nucleotide reference: 0/1/2/3/4
+  cs_read[0..size-1]: color read+qual sequence: base<<6|qual; qual==63 for N
+  nt_read[0..size]: nucleotide read sequence: 0/1/2/3 (returned)
+  btarray[0..4*size]: backtrack array (working space)
+ */
+void cs2nt_DP(int size, const uint8_t *nt_ref, const uint8_t *cs_read, uint8_t *nt_read, uint8_t *btarray)
+{
+	int h[8], curr, last;
+	int x, y, xmin, hmin, k;
+
+	// h[0..3] and h[4..7] are the current and last best score array, depending on curr and last
+
+	// recursion: initial value
+	if (nt_ref[0] >= 4) memset(h, 0, sizeof(int) << 2);
+	else {
+		for (x = 0; x != 4; ++x) h[x] = NUCL_MM;
+		h[nt_ref[0]] = 0;
+	}
+	// recursion: main loop
+	curr = 1; last = 0;
+	for (k = 1; k <= size; ++k) {
+		for (x = 0; x != 4; ++x) {
+			int min = 0x7fffffff, ymin = 0;
+			for (y = 0; y != 4; ++y) {
+				int s = h[last<<2|y];
+				if ((cs_read[k-1]&0x3f) != 63 && cs_read[k-1]>>6 != nst_ntnt2cs_table[1<<x|1<<y])
+					s += ((cs_read[k-1]&0x3f) < COLOR_MM)? COLOR_MM : (cs_read[k-1]&0x3f); // color mismatch
+				if (nt_ref[k] < 4 && nt_ref[k] != x) s += NUCL_MM; // nt mismatch
+				if (s < min) {
+					min = s; ymin = y;
+				}
+			}
+			h[curr<<2|x] = min; btarray[k<<2|x] = ymin;
+		}
+		last = curr; curr = 1 - curr; // swap
+	}
+	// back trace
+	hmin = 0x7fffffff; xmin = 0;
+	for (x = 0; x != 4; ++x) {
+		if (h[last<<2|x] < hmin) {
+			hmin = h[last<<2|x]; xmin = x;
+		}
+	}
+	nt_read[size] = xmin;
+	for (k = size - 1; k >= 0; --k)
+		nt_read[k] = btarray[(k+1)<<2 | nt_read[k+1]];
+}
+/*
+  nt_read[0..size]: nucleotide read sequence: 0/1/2/3
+  cs_read[0..size-1]: color read+qual sequence: base<<6|qual; qual==63 for N
+  tarray[0..size*2-1]: temporary array
+ */
+uint8_t *cs2nt_nt_qual(int size, const uint8_t *nt_read, const uint8_t *cs_read, uint8_t *tarray)
+{
+	int k, c1, c2;
+	uint8_t *t2array = tarray + size;
+	// get the color sequence of nt_read
+	c1 = nt_read[0];
+	for (k = 1; k <= size; ++k) {
+		c2 = nt_read[k]; // in principle, there is no 'N' in nt_read[]; just in case
+		tarray[k-1] = (c1 >= 4 || c2 >= 4)? 4 : nst_ntnt2cs_table[1<<c1 | 1<<c2];
+		c1 = c2;
+	}
+	for (k = 1; k != size; ++k) {
+		int q = 0;
+		if (tarray[k-1] == cs_read[k-1]>>6 && tarray[k] == cs_read[k]>>6) {
+			q = (int)(cs_read[k-1]&0x3f) + (int)(cs_read[k]&0x3f) + 10;
+		} else if (tarray[k-1] == cs_read[k-1]>>6) {
+			q = (int)(cs_read[k-1]&0x3f) - (int)(cs_read[k]&0x3f);
+		} else if (tarray[k] == cs_read[k]>>6) {
+			q = (int)(cs_read[k]&0x3f) - (int)(cs_read[k-1]&0x3f);
+		} // else, q = 0
+		if (q < 0) q = 0;
+		if (q > 60) q = 60;
+		t2array[k] = nt_read[k]<<6 | q;
+		if ((cs_read[k-1]&0x3f) == 63 || (cs_read[k]&0x3f) == 63) t2array[k] = 0;
+	}
+	return t2array + 1; // of size-2
+}
+
+// this function will be called when p->seq has been reversed by refine_gapped()
+void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac)
+{
+	uint8_t *ta, *nt_read, *btarray, *tarray, *nt_ref, *cs_read, *new_nt_read;
+	int i, len;
+	uint8_t *seq;
+
+	// set temporary arrays
+	if (p->type == BWA_TYPE_NO_MATCH) return;
+	len = p->len + p->n_gapo + p->n_gape + 100; // leave enough space
+	ta = (uint8_t*)malloc(len * 7);
+	nt_ref = ta;
+	cs_read = nt_ref + len;
+	nt_read = cs_read + len;
+	btarray = nt_read + len;
+	tarray = nt_read + len;
+
+#define __gen_csbase(_cs, _i, _seq) do {							\
+		int q = p->qual[p->strand? p->len - 1 - (_i) : (_i)] - 33;	\
+		if (q > 60) q = 60;											\
+		if (_seq[_i] > 3) q = 63;									\
+		(_cs) = _seq[_i]<<6 | q;									\
+	} while (0)
+
+	// generate len, nt_ref[] and cs_read
+	seq = p->strand? p->rseq : p->seq;
+	nt_ref[0] = p->pos? bns_pac(pac, p->pos-1) : 4;
+	if (p->cigar == 0) { // no gap or clipping
+		len = p->len;
+		for (i = 0; i < p->len; ++i) {
+			__gen_csbase(cs_read[i], i, seq);
+			nt_ref[i+1] = bns_pac(pac, p->pos + i);
+		}
+	} else {
+		int k, z;
+		bwtint_t x, y;
+		x = p->pos; y = 0;
+		for (k = z = 0; k < p->n_cigar; ++k) {
+			int l = __cigar_len(p->cigar[k]);
+			if (__cigar_op(p->cigar[k]) == FROM_M) {
+				for (i = 0; i < l; ++i, ++x, ++y) {
+					__gen_csbase(cs_read[z], y, seq);
+					nt_ref[z+1] = bns_pac(pac, x);
+					++z;
+				}
+			} else if (__cigar_op(p->cigar[k]) == FROM_I) {
+				for (i = 0; i < l; ++i, ++y) {
+					__gen_csbase(cs_read[z], y, seq);
+					nt_ref[z+1] = 4;
+					++z;
+				}
+			} else if (__cigar_op(p->cigar[k]) == FROM_S) y += l;
+			else x += l;
+		}
+		len = z;
+	}
+
+	cs2nt_DP(len, nt_ref, cs_read, nt_read, btarray);
+	new_nt_read = cs2nt_nt_qual(len, nt_read, cs_read, tarray);
+
+	// update p
+	p->len = p->full_len = len - 1;
+	for (i = 0; i < p->len; ++i) {
+		if ((new_nt_read[i]&0x3f) == 63) {
+			p->qual[i] = 33; seq[i] = 4;
+		} else {
+			p->qual[i] = (new_nt_read[i]&0x3f) + 33;
+			seq[i] = new_nt_read[i]>>6;
+		}
+	}
+	p->qual[p->len] = seq[p->len] = 0;
+	if (p->strand) {
+		memcpy(p->seq, seq, p->len);
+		seq_reverse(p->len, p->seq, 1);
+		seq_reverse(p->len, p->qual, 0);
+	} else {
+		memcpy(p->rseq, seq, p->len);
+		seq_reverse(p->len, p->rseq, 1);
+	}
+	free(ta);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/fastmap.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,127 @@
+#include <zlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include "bntseq.h"
+#include "bwt.h"
+#include "kvec.h"
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+extern unsigned char nst_nt4_table[256];
+
+typedef struct {
+	const bwt_t *bwt;
+	const uint8_t *query;
+	int start, len;
+	bwtintv_v *tmpvec[2], *matches;
+} smem_i;
+
+smem_i *smem_iter_init(const bwt_t *bwt)
+{
+	smem_i *iter;
+	iter = calloc(1, sizeof(smem_i));
+	iter->bwt = bwt;
+	iter->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
+	iter->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
+	iter->matches   = calloc(1, sizeof(bwtintv_v));
+	return iter;
+}
+
+void smem_iter_destroy(smem_i *iter)
+{
+	free(iter->tmpvec[0]->a);
+	free(iter->tmpvec[1]->a);
+	free(iter->matches->a);
+	free(iter);
+}
+
+void smem_set_query(smem_i *iter, int len, const uint8_t *query)
+{
+	iter->query = query;
+	iter->start = 0;
+	iter->len = len;
+}
+
+int smem_next(smem_i *iter)
+{
+	iter->tmpvec[0]->n = iter->tmpvec[1]->n = iter->matches->n = 0;
+	if (iter->start >= iter->len || iter->start < 0) return -1;
+	while (iter->start < iter->len && iter->query[iter->start] > 3) ++iter->start; // skip ambiguous bases
+	if (iter->start == iter->len) return -1;
+	iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->matches, iter->tmpvec);
+	return iter->start;
+}
+
+int main_fastmap(int argc, char *argv[])
+{
+	int c, i, min_iwidth = 20, min_len = 17, print_seq = 0;
+	kseq_t *seq;
+	bwtint_t k;
+	gzFile fp;
+	bwt_t *bwt;
+	bntseq_t *bns;
+	smem_i *iter;
+
+	while ((c = getopt(argc, argv, "w:l:s")) >= 0) {
+		switch (c) {
+			case 's': print_seq = 1; break;
+			case 'w': min_iwidth = atoi(optarg); break;
+			case 'l': min_len = atoi(optarg); break;
+		}
+	}
+	if (optind + 1 >= argc) {
+		fprintf(stderr, "Usage: bwa fastmap [-s] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", min_len, min_iwidth);
+		return 1;
+	}
+
+	fp = gzopen(argv[optind + 1], "r");
+	seq = kseq_init(fp);
+	{ // load the packed sequences, BWT and SA
+		char *tmp = calloc(strlen(argv[optind]) + 5, 1);
+		strcat(strcpy(tmp, argv[optind]), ".bwt");
+		bwt = bwt_restore_bwt(tmp);
+		strcat(strcpy(tmp, argv[optind]), ".sa");
+		bwt_restore_sa(tmp, bwt);
+		free(tmp);
+		bns = bns_restore(argv[optind]);
+	}
+	iter = smem_iter_init(bwt);
+	while (kseq_read(seq) >= 0) {
+		printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
+		if (print_seq) {
+			putchar('\t');
+			puts(seq->seq.s);
+		} else putchar('\n');
+		for (i = 0; i < seq->seq.l; ++i)
+			seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
+		smem_set_query(iter, seq->seq.l, (uint8_t*)seq->seq.s);
+		while (smem_next(iter) > 0) {
+			for (i = 0; i < iter->matches->n; ++i) {
+				bwtintv_t *p = &iter->matches->a[i];
+				if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
+				printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
+				if (p->x[2] <= min_iwidth) {
+					for (k = 0; k < p->x[2]; ++k) {
+						bwtint_t pos;
+						int len, is_rev, ref_id;
+						len  = (uint32_t)p->info - (p->info>>32);
+						pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
+						if (is_rev) pos -= len - 1;
+						bns_cnt_ambi(bns, pos, len, &ref_id);
+						printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
+					}
+				} else fputs("\t*", stdout);
+				putchar('\n');
+			}
+		}
+		puts("//");
+	}
+
+	smem_iter_destroy(iter);
+	bns_destroy(bns);
+	bwt_destroy(bwt);
+	kseq_destroy(seq);
+	gzclose(fp);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/is.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,218 @@
+/*
+ * sais.c for sais-lite
+ * Copyright (c) 2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+
+typedef unsigned char ubyte_t;
+#define chr(i) (cs == sizeof(int) ? ((const int *)T)[i]:((const unsigned char *)T)[i])
+
+/* find the start or end of each bucket */
+static void getCounts(const unsigned char *T, int *C, int n, int k, int cs)
+{
+	int i;
+	for (i = 0; i < k; ++i) C[i] = 0;
+	for (i = 0; i < n; ++i) ++C[chr(i)];
+}
+static void getBuckets(const int *C, int *B, int k, int end)
+{
+	int i, sum = 0;
+	if (end) {
+		for (i = 0; i < k; ++i) {
+			sum += C[i];
+			B[i] = sum;
+		}
+	} else {
+		for (i = 0; i < k; ++i) {
+			sum += C[i];
+			B[i] = sum - C[i];
+		}
+	}
+}
+
+/* compute SA */
+static void induceSA(const unsigned char *T, int *SA, int *C, int *B, int n, int k, int cs)
+{
+	int *b, i, j;
+	int  c0, c1;
+	/* compute SAl */
+	if (C == B) getCounts(T, C, n, k, cs);
+	getBuckets(C, B, k, 0);	/* find starts of buckets */
+	j = n - 1;
+	b = SA + B[c1 = chr(j)];
+	*b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
+	for (i = 0; i < n; ++i) {
+		j = SA[i], SA[i] = ~j;
+		if (0 < j) {
+			--j;
+			if ((c0 = chr(j)) != c1) {
+				B[c1] = b - SA;
+				b = SA + B[c1 = c0];
+			}
+			*b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
+		}
+	}
+	/* compute SAs */
+	if (C == B) getCounts(T, C, n, k, cs);
+	getBuckets(C, B, k, 1);	/* find ends of buckets */
+	for (i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
+		if (0 < (j = SA[i])) {
+			--j;
+			if ((c0 = chr(j)) != c1) {
+				B[c1] = b - SA;
+				b = SA + B[c1 = c0];
+			}
+			*--b = ((j == 0) || (chr(j - 1) > c1)) ? ~j : j;
+		} else SA[i] = ~j;
+	}
+}
+
+/*
+ * find the suffix array SA of T[0..n-1] in {0..k-1}^n use a working
+ * space (excluding T and SA) of at most 2n+O(1) for a constant alphabet
+ */
+static int sais_main(const unsigned char *T, int *SA, int fs, int n, int k, int cs)
+{
+	int *C, *B, *RA;
+	int  i, j, c, m, p, q, plen, qlen, name;
+	int  c0, c1;
+	int  diff;
+
+	/* stage 1: reduce the problem by at least 1/2 sort all the
+	 * S-substrings */
+	if (k <= fs) {
+		C = SA + n;
+		B = (k <= (fs - k)) ? C + k : C;
+	} else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2;
+	getCounts(T, C, n, k, cs);
+	getBuckets(C, B, k, 1);	/* find ends of buckets */
+	for (i = 0; i < n; ++i) SA[i] = 0;
+	for (i = n - 2, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
+		if ((c0 = chr(i)) < (c1 + c)) c = 1;
+		else if (c != 0) SA[--B[c1]] = i + 1, c = 0;
+	}
+	induceSA(T, SA, C, B, n, k, cs);
+	if (fs < k) free(C);
+	/* compact all the sorted substrings into the first m items of SA
+	 * 2*m must be not larger than n (proveable) */
+	for (i = 0, m = 0; i < n; ++i) {
+		p = SA[i];
+		if ((0 < p) && (chr(p - 1) > (c0 = chr(p)))) {
+			for (j = p + 1; (j < n) && (c0 == (c1 = chr(j))); ++j);
+			if ((j < n) && (c0 < c1)) SA[m++] = p;
+		}
+	}
+	for (i = m; i < n; ++i) SA[i] = 0;	/* init the name array buffer */
+	/* store the length of all substrings */
+	for (i = n - 2, j = n, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
+		if ((c0 = chr(i)) < (c1 + c)) c = 1;
+		else if (c != 0) {
+			SA[m + ((i + 1) >> 1)] = j - i - 1;
+			j = i + 1;
+			c = 0;
+		}
+	}
+	/* find the lexicographic names of all substrings */
+	for (i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
+		p = SA[i], plen = SA[m + (p >> 1)], diff = 1;
+		if (plen == qlen) {
+			for (j = 0; (j < plen) && (chr(p + j) == chr(q + j)); j++);
+			if (j == plen) diff = 0;
+		}
+		if (diff != 0) ++name, q = p, qlen = plen;
+		SA[m + (p >> 1)] = name;
+	}
+
+	/* stage 2: solve the reduced problem recurse if names are not yet
+	 * unique */
+	if (name < m) {
+		RA = SA + n + fs - m;
+		for (i = n - 1, j = m - 1; m <= i; --i) {
+			if (SA[i] != 0) RA[j--] = SA[i] - 1;
+		}
+		if (sais_main((unsigned char *) RA, SA, fs + n - m * 2, m, name, sizeof(int)) != 0) return -2;
+		for (i = n - 2, j = m - 1, c = 0, c1 = chr(n - 1); 0 <= i; --i, c1 = c0) {
+			if ((c0 = chr(i)) < (c1 + c)) c = 1;
+			else if (c != 0) RA[j--] = i + 1, c = 0; /* get p1 */
+		}
+		for (i = 0; i < m; ++i) SA[i] = RA[SA[i]]; /* get index */
+	}
+	/* stage 3: induce the result for the original problem */
+	if (k <= fs) {
+		C = SA + n;
+		B = (k <= (fs - k)) ? C + k : C;
+	} else if ((C = B = (int *) malloc(k * sizeof(int))) == NULL) return -2;
+	/* put all left-most S characters into their buckets */
+	getCounts(T, C, n, k, cs);
+	getBuckets(C, B, k, 1);	/* find ends of buckets */
+	for (i = m; i < n; ++i) SA[i] = 0; /* init SA[m..n-1] */
+	for (i = m - 1; 0 <= i; --i) {
+		j = SA[i], SA[i] = 0;
+		SA[--B[chr(j)]] = j;
+	}
+	induceSA(T, SA, C, B, n, k, cs);
+	if (fs < k) free(C);
+	return 0;
+}
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred
+ */
+int is_sa(const ubyte_t *T, int *SA, int n)
+{
+	if ((T == NULL) || (SA == NULL) || (n < 0)) return -1;
+	SA[0] = n;
+	if (n <= 1) {
+		if (n == 1) SA[1] = 0;
+		return 0;
+	}
+	return sais_main(T, SA+1, 0, n, 256, 1);
+}
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+int is_bwt(ubyte_t *T, int n)
+{
+	int *SA, i, primary = 0;
+	SA = (int*)calloc(n+1, sizeof(int));
+	is_sa(T, SA, n);
+
+	for (i = 0; i <= n; ++i) {
+		if (SA[i] == 0) primary = i;
+		else SA[i] = T[SA[i] - 1];
+	}
+	for (i = 0; i < primary; ++i) T[i] = SA[i];
+	for (; i < n; ++i) T[i] = SA[i + 1];
+	free(SA);
+	return primary;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/khash.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,506 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2009 by attractor <attractor@live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+	int ret, is_missing;
+	khiter_t k;
+	khash_t(32) *h = kh_init(32);
+	k = kh_put(32, h, 5, &ret);
+	if (!ret) kh_del(32, h, k);
+	kh_value(h, k) = 10;
+	k = kh_get(32, h, 10);
+	is_missing = (k == kh_end(h));
+	k = kh_get(32, h, 5);
+	kh_del(32, h, k);
+	for (k = kh_begin(h); k != kh_end(h); ++k)
+		if (kh_exist(h, k)) kh_value(h, k) = 1;
+	kh_destroy(32, h);
+	return 0;
+}
+*/
+
+/*
+  2009-09-26 (0.2.4):
+
+    * Improve portability
+
+  2008-09-19 (0.2.3):
+
+	* Corrected the example
+	* Improved interfaces
+
+  2008-09-11 (0.2.2):
+
+	* Improved speed a little in kh_put()
+
+  2008-09-10 (0.2.1):
+
+	* Added kh_clear()
+	* Fixed a compiling error
+
+  2008-09-02 (0.2.0):
+
+	* Changed to token concatenation which increases flexibility.
+
+  2008-08-31 (0.1.2):
+
+	* Fixed a bug in kh_get(), which has not been tested previously.
+
+  2008-08-31 (0.1.1):
+
+	* Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+
+  Generic hash table library.
+
+  @copyright Heng Li
+ */
+
+#define AC_VERSION_KHASH_H "0.2.4"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/* compipler specific configuration */
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_HASH_PRIME_SIZE 32
+static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
+{
+  0ul,          3ul,          11ul,         23ul,         53ul,
+  97ul,         193ul,        389ul,        769ul,        1543ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3221225473ul, 4294967291ul
+};
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	typedef struct {													\
+		khint_t n_buckets, size, n_occupied, upper_bound;				\
+		khint32_t *flags;												\
+		khkey_t *keys;													\
+		khval_t *vals;													\
+	} kh_##name##_t;													\
+	static inline kh_##name##_t *kh_init_##name() {						\
+		return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));		\
+	}																	\
+	static inline void kh_destroy_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h) {														\
+			free(h->keys); free(h->flags);								\
+			free(h->vals);												\
+			free(h);													\
+		}																\
+	}																	\
+	static inline void kh_clear_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h && h->flags) {											\
+			memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
+			h->size = h->n_occupied = 0;								\
+		}																\
+	}																	\
+	static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
+	{																	\
+		if (h->n_buckets) {												\
+			khint_t inc, k, i, last;									\
+			k = __hash_func(key); i = k % h->n_buckets;					\
+			inc = 1 + k % (h->n_buckets - 1); last = i;					\
+			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+				if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+				else i += inc;											\
+				if (i == last) return h->n_buckets;						\
+			}															\
+			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
+		} else return 0;												\
+	}																	\
+	static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{																	\
+		khint32_t *new_flags = 0;										\
+		khint_t j = 1;													\
+		{																\
+			khint_t t = __ac_HASH_PRIME_SIZE - 1;						\
+			while (__ac_prime_list[t] > new_n_buckets) --t;				\
+			new_n_buckets = __ac_prime_list[t+1];						\
+			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	\
+			else {														\
+				new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t));	\
+				memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
+				if (h->n_buckets < new_n_buckets) {						\
+					h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (kh_is_map)										\
+						h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+				}														\
+			}															\
+		}																\
+		if (j) {														\
+			for (j = 0; j != h->n_buckets; ++j) {						\
+				if (__ac_iseither(h->flags, j) == 0) {					\
+					khkey_t key = h->keys[j];							\
+					khval_t val;										\
+					if (kh_is_map) val = h->vals[j];					\
+					__ac_set_isdel_true(h->flags, j);					\
+					while (1) {											\
+						khint_t inc, k, i;								\
+						k = __hash_func(key);							\
+						i = k % new_n_buckets;							\
+						inc = 1 + k % (new_n_buckets - 1);				\
+						while (!__ac_isempty(new_flags, i)) {			\
+							if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
+							else i += inc;								\
+						}												\
+						__ac_set_isempty_false(new_flags, i);			\
+						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
+							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+							__ac_set_isdel_true(h->flags, i);			\
+						} else {										\
+							h->keys[i] = key;							\
+							if (kh_is_map) h->vals[i] = val;			\
+							break;										\
+						}												\
+					}													\
+				}														\
+			}															\
+			if (h->n_buckets > new_n_buckets) {							\
+				h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map)											\
+					h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+			}															\
+			free(h->flags);												\
+			h->flags = new_flags;										\
+			h->n_buckets = new_n_buckets;								\
+			h->n_occupied = h->size;									\
+			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+		}																\
+	}																	\
+	static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+	{																	\
+		khint_t x;														\
+		if (h->n_occupied >= h->upper_bound) {							\
+			if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
+			else kh_resize_##name(h, h->n_buckets + 1);					\
+		}																\
+		{																\
+			khint_t inc, k, i, site, last;								\
+			x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
+			if (__ac_isempty(h->flags, i)) x = i;						\
+			else {														\
+				inc = 1 + k % (h->n_buckets - 1); last = i;				\
+				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+					if (__ac_isdel(h->flags, i)) site = i;				\
+					if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+					else i += inc;										\
+					if (i == last) { x = site; break; }					\
+				}														\
+				if (x == h->n_buckets) {								\
+					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+					else x = i;											\
+				}														\
+			}															\
+		}																\
+		if (__ac_isempty(h->flags, x)) {								\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size; ++h->n_occupied;									\
+			*ret = 1;													\
+		} else if (__ac_isdel(h->flags, x)) {							\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size;													\
+			*ret = 2;													\
+		} else *ret = 0;												\
+		return x;														\
+	}																	\
+	static inline void kh_del_##name(kh_##name##_t *h, khint_t x)		\
+	{																	\
+		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
+			__ac_set_isdel_true(h->flags, x);							\
+			--h->size;													\
+		}																\
+	}
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [khint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (khint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [khint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static inline khint_t __ac_X31_hash_string(const char *s)
+{
+	khint_t h = *s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+	return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other necessary macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: 0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+				the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/* More conenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)										\
+	KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)								\
+	KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)										\
+	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)								\
+	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)										\
+	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)								\
+	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/kseq.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,208 @@
+/* The MIT License
+
+   Copyright (c) 2008, by Heng Li <lh3@sanger.ac.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define __KS_TYPE(type_t)						\
+	typedef struct __kstream_t {				\
+		char *buf;								\
+		int begin, end, is_eof;					\
+		type_t f;								\
+	} kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(type_t, __bufsize)								\
+	static inline kstream_t *ks_init(type_t f)						\
+	{																\
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
+		ks->f = f;													\
+		ks->buf = (char*)malloc(__bufsize);							\
+		return ks;													\
+	}																\
+	static inline void ks_destroy(kstream_t *ks)					\
+	{																\
+		if (ks) {													\
+			free(ks->buf);											\
+			free(ks);												\
+		}															\
+	}
+
+#define __KS_GETC(__read, __bufsize)						\
+	static inline int ks_getc(kstream_t *ks)				\
+	{														\
+		if (ks->is_eof && ks->begin >= ks->end) return -1;	\
+		if (ks->begin >= ks->end) {							\
+			ks->begin = 0;									\
+			ks->end = __read(ks->f, ks->buf, __bufsize);	\
+			if (ks->end < __bufsize) ks->is_eof = 1;		\
+			if (ks->end == 0) return -1;					\
+		}													\
+		return (int)ks->buf[ks->begin++];					\
+	}
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(__read, __bufsize)								\
+	static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{																	\
+		if (dret) *dret = 0;											\
+		str->l = 0;														\
+		if (ks->begin >= ks->end && ks->is_eof) return -1;				\
+		for (;;) {														\
+			int i;														\
+			if (ks->begin >= ks->end) {									\
+				if (!ks->is_eof) {										\
+					ks->begin = 0;										\
+					ks->end = __read(ks->f, ks->buf, __bufsize);		\
+					if (ks->end < __bufsize) ks->is_eof = 1;			\
+					if (ks->end == 0) break;							\
+				} else break;											\
+			}															\
+			if (delimiter) {											\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (ks->buf[i] == delimiter) break;					\
+			} else {													\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (isspace(ks->buf[i])) break;						\
+			}															\
+			if (str->m - str->l < i - ks->begin + 1) {					\
+				str->m = str->l + (i - ks->begin) + 1;					\
+				kroundup32(str->m);										\
+				str->s = (char*)realloc(str->s, str->m);				\
+			}															\
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
+			str->l = str->l + (i - ks->begin);							\
+			ks->begin = i + 1;											\
+			if (i < ks->end) {											\
+				if (dret) *dret = ks->buf[i];							\
+				break;													\
+			}															\
+		}																\
+		str->s[str->l] = '\0';											\
+		return str->l;													\
+	}
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) \
+	__KS_TYPE(type_t)							\
+	__KS_BASIC(type_t, __bufsize)				\
+	__KS_GETC(__read, __bufsize)				\
+	__KS_GETUNTIL(__read, __bufsize)
+
+#define __KSEQ_BASIC(type_t)											\
+	static inline kseq_t *kseq_init(type_t fd)							\
+	{																	\
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
+		s->f = ks_init(fd);												\
+		return s;														\
+	}																	\
+	static inline void kseq_rewind(kseq_t *ks)							\
+	{																	\
+		ks->last_char = 0;												\
+		ks->f->is_eof = ks->f->begin = ks->f->end = 0;					\
+	}																	\
+	static inline void kseq_destroy(kseq_t *ks)							\
+	{																	\
+		if (!ks) return;												\
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f);												\
+		free(ks);														\
+	}
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+ */
+#define __KSEQ_READ														\
+	static int kseq_read(kseq_t *seq)									\
+	{																	\
+		int c;															\
+		kstream_t *ks = seq->f;											\
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');	\
+			if (c == -1) return -1; /* end of file */					\
+			seq->last_char = c;											\
+		} /* the first header char has been read */						\
+		seq->comment.l = seq->seq.l = seq->qual.l = 0;					\
+		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;			\
+		if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);			\
+		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+			if (isgraph(c)) { /* printable non-space character */		\
+				if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
+					seq->seq.m = seq->seq.l + 2;						\
+					kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
+					seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+				}														\
+				seq->seq.s[seq->seq.l++] = (char)c;						\
+			}															\
+		}																\
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */		\
+		if (c != '+') return seq->seq.l; /* FASTA */					\
+		if (seq->qual.m < seq->seq.m) {	/* allocate enough memory */	\
+			seq->qual.m = seq->seq.m;									\
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);		\
+		}																\
+		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* we should not stop here */			\
+		while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)		\
+			if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;	\
+		seq->qual.s[seq->qual.l] = 0; /* null terminated string */		\
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
+		return seq->seq.l;												\
+	}
+
+#define __KSEQ_TYPE(type_t)						\
+	typedef struct {							\
+		kstring_t name, comment, seq, qual;		\
+		int last_char;							\
+		kstream_t *f;							\
+	} kseq_t;
+
+#define KSEQ_INIT(type_t, __read)				\
+	KSTREAM_INIT(type_t, __read, 4096)			\
+	__KSEQ_TYPE(type_t)							\
+	__KSEQ_BASIC(type_t)						\
+	__KSEQ_READ
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/ksort.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,269 @@
+/* The MIT License
+
+   Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  2008-11-16 (0.1.4):
+
+    * Fixed a bug in introsort() that happens in rare cases.
+
+  2008-11-05 (0.1.3):
+
+    * Fixed a bug in introsort() for complex comparisons.
+
+	* Fixed a bug in mergesort(). The previous version is not stable.
+
+  2008-09-15 (0.1.2):
+
+	* Accelerated introsort. On my Mac (not on another Linux machine),
+	  my implementation is as fast as std::sort on random input.
+
+	* Added combsort and in introsort, switch to combsort if the
+	  recursion is too deep.
+
+  2008-09-13 (0.1.1):
+
+	* Added k-small algorithm
+
+  2008-09-05 (0.1.0):
+
+	* Initial version
+
+*/
+
+#ifndef AC_KSORT_H
+#define AC_KSORT_H
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+	void *left, *right;
+	int depth;
+} ks_isort_stack_t;
+
+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
+
+#define KSORT_INIT(name, type_t, __sort_lt)								\
+	void ks_mergesort_##name(size_t n, type_t array[], type_t temp[])	\
+	{																	\
+		type_t *a2[2], *a, *b;											\
+		int curr, shift;												\
+																		\
+		a2[0] = array;													\
+		a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);		\
+		for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {			\
+			a = a2[curr]; b = a2[1-curr];								\
+			if (shift == 0) {											\
+				type_t *p = b, *i, *eb = a + n;							\
+				for (i = a; i < eb; i += 2) {							\
+					if (i == eb - 1) *p++ = *i;							\
+					else {												\
+						if (__sort_lt(*(i+1), *i)) {					\
+							*p++ = *(i+1); *p++ = *i;					\
+						} else {										\
+							*p++ = *i; *p++ = *(i+1);					\
+						}												\
+					}													\
+				}														\
+			} else {													\
+				size_t i, step = 1ul<<shift;							\
+				for (i = 0; i < n; i += step<<1) {						\
+					type_t *p, *j, *k, *ea, *eb;						\
+					if (n < i + step) {									\
+						ea = a + n; eb = a;								\
+					} else {											\
+						ea = a + i + step;								\
+						eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
+					}													\
+					j = a + i; k = a + i + step; p = b + i;				\
+					while (j < ea && k < eb) {							\
+						if (__sort_lt(*k, *j)) *p++ = *k++;				\
+						else *p++ = *j++;								\
+					}													\
+					while (j < ea) *p++ = *j++;							\
+					while (k < eb) *p++ = *k++;							\
+				}														\
+			}															\
+			curr = 1 - curr;											\
+		}																\
+		if (curr == 1) {												\
+			type_t *p = a2[0], *i = a2[1], *eb = array + n;				\
+			for (; p < eb; ++i) *p++ = *i;								\
+		}																\
+		if (temp == 0) free(a2[1]);										\
+	}																	\
+	void ks_heapadjust_##name(size_t i, size_t n, type_t l[])			\
+	{																	\
+		size_t k = i;													\
+		type_t tmp = l[i];												\
+		while ((k = (k << 1) + 1) < n) {								\
+			if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k;				\
+			if (__sort_lt(l[k], tmp)) break;							\
+			l[i] = l[k]; i = k;											\
+		}																\
+		l[i] = tmp;														\
+	}																	\
+	void ks_heapmake_##name(size_t lsize, type_t l[])					\
+	{																	\
+		size_t i;														\
+		for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i)				\
+			ks_heapadjust_##name(i, lsize, l);							\
+	}																	\
+	void ks_heapsort_##name(size_t lsize, type_t l[])					\
+	{																	\
+		size_t i;														\
+		for (i = lsize - 1; i > 0; --i) {								\
+			type_t tmp;													\
+			tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
+		}																\
+	}																	\
+	inline void __ks_insertsort_##name(type_t *s, type_t *t)			\
+	{																	\
+		type_t *i, *j, swap_tmp;										\
+		for (i = s + 1; i < t; ++i)										\
+			for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) {			\
+				swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp;			\
+			}															\
+	}																	\
+	void ks_combsort_##name(size_t n, type_t a[])						\
+	{																	\
+		const double shrink_factor = 1.2473309501039786540366528676643; \
+		int do_swap;													\
+		size_t gap = n;													\
+		type_t tmp, *i, *j;												\
+		do {															\
+			if (gap > 2) {												\
+				gap = (size_t)(gap / shrink_factor);					\
+				if (gap == 9 || gap == 10) gap = 11;					\
+			}															\
+			do_swap = 0;												\
+			for (i = a; i < a + n - gap; ++i) {							\
+				j = i + gap;											\
+				if (__sort_lt(*j, *i)) {								\
+					tmp = *i; *i = *j; *j = tmp;						\
+					do_swap = 1;										\
+				}														\
+			}															\
+		} while (do_swap || gap > 2);									\
+		if (gap != 1) __ks_insertsort_##name(a, a + n);					\
+	}																	\
+	void ks_introsort_##name(size_t n, type_t a[])						\
+	{																	\
+		int d;															\
+		ks_isort_stack_t *top, *stack;									\
+		type_t rp, swap_tmp;											\
+		type_t *s, *t, *i, *j, *k;										\
+																		\
+		if (n < 1) return;												\
+		else if (n == 2) {												\
+			if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
+			return;														\
+		}																\
+		for (d = 2; 1ul<<d < n; ++d);									\
+		stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
+		top = stack; s = a; t = a + (n-1); d <<= 1;						\
+		while (1) {														\
+			if (s < t) {												\
+				if (--d == 0) {											\
+					ks_combsort_##name(t - s + 1, s);					\
+					t = s;												\
+					continue;											\
+				}														\
+				i = s; j = t; k = i + ((j-i)>>1) + 1;					\
+				if (__sort_lt(*k, *i)) {								\
+					if (__sort_lt(*k, *j)) k = j;						\
+				} else k = __sort_lt(*j, *i)? i : j;					\
+				rp = *k;												\
+				if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }	\
+				for (;;) {												\
+					do ++i; while (__sort_lt(*i, rp));					\
+					do --j; while (i <= j && __sort_lt(rp, *j));		\
+					if (j <= i) break;									\
+					swap_tmp = *i; *i = *j; *j = swap_tmp;				\
+				}														\
+				swap_tmp = *i; *i = *t; *t = swap_tmp;					\
+				if (i-s > t-i) {										\
+					if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
+					s = t-i > 16? i+1 : t;								\
+				} else {												\
+					if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
+					t = i-s > 16? i-1 : s;								\
+				}														\
+			} else {													\
+				if (top == stack) {										\
+					free(stack);										\
+					__ks_insertsort_##name(a, a+n);						\
+					return;												\
+				} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
+			}															\
+		}																\
+	}																	\
+	/* This function is adapted from: http://ndevilla.free.fr/median/ */ \
+	/* 0 <= kk < n */													\
+	type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)			\
+	{																	\
+		type_t *low, *high, *k, *ll, *hh, *mid;							\
+		low = arr; high = arr + n - 1; k = arr + kk;					\
+		for (;;) {														\
+			if (high <= low) return *k;									\
+			if (high == low + 1) {										\
+				if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+				return *k;												\
+			}															\
+			mid = low + (high - low) / 2;								\
+			if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
+			if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+			if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);	\
+			KSORT_SWAP(type_t, *mid, *(low+1));							\
+			ll = low + 1; hh = high;									\
+			for (;;) {													\
+				do ++ll; while (__sort_lt(*ll, *low));					\
+				do --hh; while (__sort_lt(*low, *hh));					\
+				if (hh < ll) break;										\
+				KSORT_SWAP(type_t, *ll, *hh);							\
+			}															\
+			KSORT_SWAP(type_t, *low, *hh);								\
+			if (hh <= k) low = ll;										\
+			if (hh >= k) high = hh - 1;									\
+		}																\
+	}
+
+#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
+#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
+#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
+#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
+#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
+#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
+
+#define ks_lt_generic(a, b) ((a) < (b))
+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
+
+typedef const char *ksstr_t;
+
+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/kstring.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,35 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include "kstring.h"
+
+int ksprintf(kstring_t *s, const char *fmt, ...)
+{
+	va_list ap;
+	int l;
+	va_start(ap, fmt);
+	l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
+	va_end(ap);
+	if (l + 1 > s->m - s->l) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+		va_start(ap, fmt);
+		l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
+	}
+	va_end(ap);
+	s->l += l;
+	return l;
+}
+
+#ifdef KSTRING_MAIN
+#include <stdio.h>
+int main()
+{
+	kstring_t *s;
+	s = (kstring_t*)calloc(1, sizeof(kstring_t));
+	ksprintf(s, "abcdefg: %d", 100);
+	printf("%s\n", s->s);
+	free(s);
+	return 0;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/kstring.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,46 @@
+#ifndef KSTRING_H
+#define KSTRING_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+static inline int kputs(const char *p, kstring_t *s)
+{
+	int l = strlen(p);
+	if (s->l + l + 1 >= s->m) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	strcpy(s->s + s->l, p);
+	s->l += l;
+	return l;
+}
+
+static inline int kputc(int c, kstring_t *s)
+{
+	if (s->l + 1 >= s->m) {
+		s->m = s->l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	s->s[s->l++] = c;
+	s->s[s->l] = 0;
+	return c;
+}
+
+int ksprintf(kstring_t *s, const char *fmt, ...);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/ksw.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,401 @@
+/* The MIT License
+
+   Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef _NO_SSE2
+#include <stdlib.h>
+#include <stdint.h>
+#include <emmintrin.h>
+#include "ksw.h"
+
+#ifdef __GNUC__
+#define LIKELY(x) __builtin_expect((x),1)
+#define UNLIKELY(x) __builtin_expect((x),0)
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+struct _ksw_query_t {
+	int qlen, slen;
+	uint8_t shift, mdiff, max, size;
+	__m128i *qp, *H0, *H1, *E, *Hmax;
+};
+
+ksw_query_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
+{
+	ksw_query_t *q;
+	int slen, a, tmp, p;
+
+	size = size > 1? 2 : 1;
+	p = 8 * (3 - size); // # values per __m128i
+	slen = (qlen + p - 1) / p; // segmented length
+	q = malloc(sizeof(ksw_query_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
+	q->qp = (__m128i*)(((size_t)q + sizeof(ksw_query_t) + 15) >> 4 << 4); // align memory
+	q->H0 = q->qp + slen * m;
+	q->H1 = q->H0 + slen;
+	q->E  = q->H1 + slen;
+	q->Hmax = q->E + slen;
+	q->slen = slen; q->qlen = qlen; q->size = size;
+	// compute shift
+	tmp = m * m;
+	for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
+		if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
+		if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
+	}
+	q->max = q->mdiff;
+	q->shift = 256 - q->shift; // NB: q->shift is uint8_t
+	q->mdiff += q->shift; // this is the difference between the min and max scores
+	// An example: p=8, qlen=19, slen=3 and segmentation:
+	//  {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
+	if (size == 1) {
+		int8_t *t = (int8_t*)q->qp;
+		for (a = 0; a < m; ++a) {
+			int i, k, nlen = slen * p;
+			const int8_t *ma = mat + a * m;
+			for (i = 0; i < slen; ++i)
+				for (k = i; k < nlen; k += slen) // p iterations
+					*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
+		}
+	} else {
+		int16_t *t = (int16_t*)q->qp;
+		for (a = 0; a < m; ++a) {
+			int i, k, nlen = slen * p;
+			const int8_t *ma = mat + a * m;
+			for (i = 0; i < slen; ++i)
+				for (k = i; k < nlen; k += slen) // p iterations
+					*t++ = (k >= qlen? 0 : ma[query[k]]);
+		}
+	}
+	return q;
+}
+
+int ksw_sse2_16(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // the first gap costs -(_o+_e)
+{
+	int slen, i, m_b, n_b, te = -1, gmax = 0;
+	uint64_t *b;
+	__m128i zero, gapoe, gape, shift, *H0, *H1, *E, *Hmax;
+
+#define __max_16(ret, xx) do { \
+		(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
+		(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
+		(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
+		(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
+    	(ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
+	} while (0)
+
+	// initialization
+	m_b = n_b = 0; b = 0;
+	zero = _mm_set1_epi32(0);
+	gapoe = _mm_set1_epi8(a->gapo + a->gape);
+	gape = _mm_set1_epi8(a->gape);
+	shift = _mm_set1_epi8(q->shift);
+	H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
+	slen = q->slen;
+	for (i = 0; i < slen; ++i) {
+		_mm_store_si128(E + i, zero);
+		_mm_store_si128(H0 + i, zero);
+		_mm_store_si128(Hmax + i, zero);
+	}
+	// the core loop
+	for (i = 0; i < tlen; ++i) {
+		int j, k, cmp, imax;
+		__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
+		h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
+		h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
+		for (j = 0; LIKELY(j < slen); ++j) {
+			/* SW cells are computed in the following order:
+			 *   H(i,j)   = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
+			 *   E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
+			 *   F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
+			 */
+			// compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
+			h = _mm_adds_epu8(h, _mm_load_si128(S + j));
+			h = _mm_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j)
+			e = _mm_load_si128(E + j); // e=E'(i,j)
+			h = _mm_max_epu8(h, e);
+			h = _mm_max_epu8(h, f); // h=H'(i,j)
+			max = _mm_max_epu8(max, h); // set max
+			_mm_store_si128(H1 + j, h); // save to H'(i,j)
+			// now compute E'(i+1,j)
+			h = _mm_subs_epu8(h, gapoe); // h=H'(i,j)-gapo
+			e = _mm_subs_epu8(e, gape); // e=E'(i,j)-gape
+			e = _mm_max_epu8(e, h); // e=E'(i+1,j)
+			_mm_store_si128(E + j, e); // save to E'(i+1,j)
+			// now compute F'(i,j+1)
+			f = _mm_subs_epu8(f, gape);
+			f = _mm_max_epu8(f, h);
+			// get H'(i-1,j) and prepare for the next j
+			h = _mm_load_si128(H0 + j); // h=H'(i-1,j)
+		}
+		// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
+		for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
+			f = _mm_slli_si128(f, 1);
+			for (j = 0; LIKELY(j < slen); ++j) {
+				h = _mm_load_si128(H1 + j);
+				h = _mm_max_epu8(h, f); // h=H'(i,j)
+				_mm_store_si128(H1 + j, h);
+				h = _mm_subs_epu8(h, gapoe);
+				f = _mm_subs_epu8(f, gape);
+				cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(f, h), zero));
+				if (UNLIKELY(cmp == 0xffff)) goto end_loop16;
+			}
+		}
+end_loop16:
+		//int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
+		__max_16(imax, max); // imax is the maximum number in max
+		if (imax >= a->T) { // write the b array; this condition adds branching unfornately
+			if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append
+				if (n_b == m_b) {
+					m_b = m_b? m_b<<1 : 8;
+					b = realloc(b, 8 * m_b);
+				}
+				b[n_b++] = (uint64_t)imax<<32 | i;
+			} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
+		}
+		if (imax > gmax) {
+			gmax = imax; te = i; // te is the end position on the target
+			for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
+				_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
+			if (gmax + q->shift >= 255) break;
+		}
+		S = H1; H1 = H0; H0 = S; // swap H0 and H1
+	}
+	a->score = gmax; a->te = te;
+	{ // get a->qe, the end of query match; find the 2nd best score
+		int max = -1, low, high, qlen = slen * 16;
+		uint8_t *t = (uint8_t*)Hmax;
+		for (i = 0, a->qe = -1; i < qlen; ++i, ++t)
+			if ((int)*t > max) max = *t, a->qe = i / 16 + i % 16 * slen;
+		//printf("%d,%d\n", max, gmax);
+		i = (a->score + q->max - 1) / q->max;
+		low = te - i; high = te + i;
+		for (i = 0, a->score2 = 0; i < n_b; ++i) {
+			int e = (int32_t)b[i];
+			if ((e < low || e > high) && b[i]>>32 > (uint32_t)a->score2)
+				a->score2 = b[i]>>32, a->te2 = e;
+		}
+	}
+	free(b);
+	return a->score + q->shift >= 255? 255 : a->score;
+}
+
+int ksw_sse2_8(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // the first gap costs -(_o+_e)
+{
+	int slen, i, m_b, n_b, te = -1, gmax = 0;
+	uint64_t *b;
+	__m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax;
+
+#define __max_8(ret, xx) do { \
+		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
+		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
+		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
+    	(ret) = _mm_extract_epi16((xx), 0); \
+	} while (0)
+
+	// initialization
+	m_b = n_b = 0; b = 0;
+	zero = _mm_set1_epi32(0);
+	gapoe = _mm_set1_epi16(a->gapo + a->gape);
+	gape = _mm_set1_epi16(a->gape);
+	H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
+	slen = q->slen;
+	for (i = 0; i < slen; ++i) {
+		_mm_store_si128(E + i, zero);
+		_mm_store_si128(H0 + i, zero);
+		_mm_store_si128(Hmax + i, zero);
+	}
+	// the core loop
+	for (i = 0; i < tlen; ++i) {
+		int j, k, imax;
+		__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
+		h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
+		h = _mm_slli_si128(h, 2);
+		for (j = 0; LIKELY(j < slen); ++j) {
+			h = _mm_adds_epi16(h, *S++);
+			e = _mm_load_si128(E + j);
+			h = _mm_max_epi16(h, e);
+			h = _mm_max_epi16(h, f);
+			max = _mm_max_epi16(max, h);
+			_mm_store_si128(H1 + j, h);
+			h = _mm_subs_epu16(h, gapoe);
+			e = _mm_subs_epu16(e, gape);
+			e = _mm_max_epi16(e, h);
+			_mm_store_si128(E + j, e);
+			f = _mm_subs_epu16(f, gape);
+			f = _mm_max_epi16(f, h);
+			h = _mm_load_si128(H0 + j);
+		}
+		for (k = 0; LIKELY(k < 16); ++k) {
+			f = _mm_slli_si128(f, 2);
+			for (j = 0; LIKELY(j < slen); ++j) {
+				h = _mm_load_si128(H1 + j);
+				h = _mm_max_epi16(h, f);
+				_mm_store_si128(H1 + j, h);
+				h = _mm_subs_epu16(h, gapoe);
+				f = _mm_subs_epu16(f, gape);
+				if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop8;
+			}
+		}
+end_loop8:
+		__max_8(imax, max);
+		if (imax >= a->T) {
+			if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) {
+				if (n_b == m_b) {
+					m_b = m_b? m_b<<1 : 8;
+					b = realloc(b, 8 * m_b);
+				}
+				b[n_b++] = (uint64_t)imax<<32 | i;
+			} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
+		}
+		if (imax > gmax) {
+			gmax = imax; te = i;
+			for (j = 0; LIKELY(j < slen); ++j)
+				_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
+		}
+		S = H1; H1 = H0; H0 = S;
+	}
+	a->score = gmax; a->te = te;
+	{
+		int max = -1, low, high, qlen = slen * 8;
+		uint16_t *t = (uint16_t*)Hmax;
+		for (i = 0, a->qe = -1; i < qlen; ++i, ++t)
+			if ((int)*t > max) max = *t, a->qe = i / 8 + i % 8 * slen;
+		i = (a->score + q->max - 1) / q->max;
+		low = te - i; high = te + i;
+		for (i = 0, a->score2 = 0; i < n_b; ++i) {
+			int e = (int32_t)b[i];
+			if ((e < low || e > high) && b[i]>>32 > (uint32_t)a->score2)
+				a->score2 = b[i]>>32, a->te2 = e;
+		}
+	}
+	free(b);
+	return a->score;
+}
+
+int ksw_sse2(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a)
+{
+	if (q->size == 1) return ksw_sse2_16(q, tlen, target, a);
+	else return ksw_sse2_8(q, tlen, target, a);
+}
+
+/*******************************************
+ * Main function (not compiled by default) *
+ *******************************************/
+
+#ifdef _KSW_MAIN
+
+#include <unistd.h>
+#include <stdio.h>
+#include <zlib.h>
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+unsigned char seq_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+int main(int argc, char *argv[])
+{
+	int c, sa = 1, sb = 3, i, j, k, forward_only = 0, size = 2;
+	int8_t mat[25];
+	ksw_aux_t a;
+	gzFile fpt, fpq;
+	kseq_t *kst, *ksq;
+	// parse command line
+	a.gapo = 5; a.gape = 2; a.T = 10;
+	while ((c = getopt(argc, argv, "a:b:q:r:ft:s:")) >= 0) {
+		switch (c) {
+			case 'a': sa = atoi(optarg); break;
+			case 'b': sb = atoi(optarg); break;
+			case 'q': a.gapo = atoi(optarg); break;
+			case 'r': a.gape = atoi(optarg); break;
+			case 't': a.T = atoi(optarg); break;
+			case 'f': forward_only = 1; break;
+			case 's': size = atoi(optarg); break;
+		}
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "Usage: ksw [-s%d] [-a%d] [-b%d] [-q%d] [-r%d] <target.fa> <query.fa>\n", size, sa, sb, a.gapo, a.gape);
+		return 1;
+	}
+	// initialize scoring matrix
+	for (i = k = 0; i < 5; ++i) {
+		for (j = 0; j < 4; ++j)
+			mat[k++] = i == j? sa : -sb;
+		mat[k++] = 0; // ambiguous base
+	}
+	for (j = 0; j < 5; ++j) mat[k++] = 0;
+	// open file
+	fpt = gzopen(argv[optind],   "r"); kst = kseq_init(fpt);
+	fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
+	// all-pair alignment
+	while (kseq_read(ksq) > 0) {
+		ksw_query_t *q[2];
+		for (i = 0; i < ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];
+		q[0] = ksw_qinit(size, ksq->seq.l, (uint8_t*)ksq->seq.s, 5, mat);
+		if (!forward_only) { // reverse
+			for (i = 0; i < ksq->seq.l/2; ++i) {
+				int t = ksq->seq.s[i];
+				ksq->seq.s[i] = ksq->seq.s[ksq->seq.l-1-i];
+				ksq->seq.s[ksq->seq.l-1-i] = t;
+			}
+			for (i = 0; i < ksq->seq.l; ++i)
+				ksq->seq.s[i] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i];
+			q[1] = ksw_qinit(size, ksq->seq.l, (uint8_t*)ksq->seq.s, 5, mat);
+		} else q[1] = 0;
+		gzrewind(fpt); kseq_rewind(kst);
+		while (kseq_read(kst) > 0) {
+			int s;
+			for (i = 0; i < kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
+			s = ksw_sse2(q[0], kst->seq.l, (uint8_t*)kst->seq.s, &a);
+			printf("%s\t%s\t+\t%d\t%d\t%d\n", ksq->name.s, kst->name.s, s, a.te+1, a.qe+1);
+			if (q[1]) {
+				s = ksw_sse2(q[1], kst->seq.l, (uint8_t*)kst->seq.s, &a);
+				printf("%s\t%s\t-\t%d\t%d\t%d\n", ksq->name.s, kst->name.s, s, a.te+1, a.qe+1);
+			}
+		}
+		free(q[0]); free(q[1]);
+	}
+	kseq_destroy(kst); gzclose(fpt);
+	kseq_destroy(ksq); gzclose(fpq);
+	return 0;
+}
+#endif // _KSW_MAIN
+#endif // _NO_SSE2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/ksw.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,54 @@
+#ifndef __AC_KSW_H
+#define __AC_KSW_H
+
+struct _ksw_query_t;
+typedef struct _ksw_query_t ksw_query_t;
+
+typedef struct {
+	// input
+	unsigned gapo, gape; // the first gap costs gapo+gape
+	unsigned T; // threshold
+	// output
+	int score, te, qe, score2, te2;
+} ksw_aux_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	/**
+	 * Initialize the query data structure
+	 *
+	 * @param size   Number of bytes used to store a score; valid valures are 1 or 2
+	 * @param qlen   Length of the query sequence
+	 * @param query  Query sequence
+	 * @param m      Size of the alphabet
+	 * @param mat    Scoring matrix in a one-dimension array
+	 *
+	 * @return       Query data structure
+	 */
+	ksw_query_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat); // to free, simply call free()
+
+	/**
+	 * Compute the maximum local score for queries initialized with ksw_qinit(1, ...)
+	 *
+	 * @param q       Query data structure returned by ksw_qinit(1, ...)
+	 * @param tlen    Length of the target sequence
+	 * @param target  Target sequence
+	 * @param a       Auxiliary data structure (see ksw.h)
+	 *
+	 * @return        The maximum local score; if the returned value equals 255, the SW may not be finished
+	 */
+	int ksw_sse2_8(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a);
+
+	/** Compute the maximum local score for queries initialized with ksw_qinit(2, ...) */
+	int ksw_sse2_16(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a);
+
+	/** Unified interface for ksw_sse2_8() and ksw_sse2_16() */
+	int ksw_sse2(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/kvec.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,90 @@
+/* The MIT License
+
+   Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "kvec.h"
+int main() {
+	kvec_t(int) array;
+	kv_init(array);
+	kv_push(int, array, 10); // append
+	kv_a(int, array, 20) = 5; // dynamic
+	kv_A(array, 20) = 4; // static
+	kv_destroy(array);
+	return 0;
+}
+*/
+
+/*
+  2008-09-22 (0.1.0):
+
+	* The initial version.
+
+*/
+
+#ifndef AC_KVEC_H
+#define AC_KVEC_H
+
+#include <stdlib.h>
+
+#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+
+#define kvec_t(type) struct { size_t n, m; type *a; }
+#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
+#define kv_destroy(v) free((v).a)
+#define kv_A(v, i) ((v).a[(i)])
+#define kv_pop(v) ((v).a[--(v).n])
+#define kv_size(v) ((v).n)
+#define kv_max(v) ((v).m)
+
+#define kv_resize(type, v, s)  ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
+
+#define kv_copy(type, v1, v0) do {							\
+		if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n);	\
+		(v1).n = (v0).n;									\
+		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n);		\
+	} while (0)												\
+
+#define kv_push(type, v, x) do {									\
+		if ((v).n == (v).m) {										\
+			(v).m = (v).m? (v).m<<1 : 2;							\
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m);	\
+		}															\
+		(v).a[(v).n++] = (x);										\
+	} while (0)
+
+#define kv_pushp(type, v) (((v).n == (v).m)?							\
+						   ((v).m = ((v).m? (v).m<<1 : 2),				\
+							(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0)	\
+						   : 0), ((v).a + ((v).n++))
+
+#define kv_a(type, v, i) ((v).m <= (size_t)(i)?						\
+						  ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
+						   (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
+						  : (v).n <= (size_t)(i)? (v).n = (i)			\
+						  : 0), (v).a[(i)]
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/main.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <string.h>
+#include "main.h"
+#include "utils.h"
+
+#ifndef PACKAGE_VERSION
+#define PACKAGE_VERSION "0.6.2-r126"
+#endif
+
+static int usage()
+{
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Program: bwa (alignment via Burrows-Wheeler transformation)\n");
+	fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+	fprintf(stderr, "Contact: Heng Li <lh3@sanger.ac.uk>\n\n");
+	fprintf(stderr, "Usage:   bwa <command> [options]\n\n");
+	fprintf(stderr, "Command: index         index sequences in the FASTA format\n");
+	fprintf(stderr, "         aln           gapped/ungapped alignment\n");
+	fprintf(stderr, "         samse         generate alignment (single ended)\n");
+	fprintf(stderr, "         sampe         generate alignment (paired ended)\n");
+	fprintf(stderr, "         bwasw         BWA-SW for long queries\n");
+	fprintf(stderr, "         fastmap       identify super-maximal exact matches\n");
+	fprintf(stderr, "\n");
+	fprintf(stderr, "         fa2pac        convert FASTA to PAC format\n");
+	fprintf(stderr, "         pac2bwt       generate BWT from PAC\n");
+	fprintf(stderr, "         pac2bwtgen    alternative algorithm for generating BWT\n");
+	fprintf(stderr, "         bwtupdate     update .bwt to the new format\n");
+	fprintf(stderr, "         bwt2sa        generate SA from BWT and Occ\n");
+	fprintf(stderr, "         pac2cspac     convert PAC to color-space PAC\n");
+	fprintf(stderr, "         stdsw         standard SW/NW alignment\n");
+	fprintf(stderr, "\n");
+	return 1;
+}
+
+void bwa_print_sam_PG()
+{
+	printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION);
+}
+
+int main(int argc, char *argv[])
+{
+	int i, ret;
+	double t_real;
+	t_real = realtime();
+	if (argc < 2) return usage();
+	if (strcmp(argv[1], "fa2pac") == 0) ret = bwa_fa2pac(argc-1, argv+1);
+	else if (strcmp(argv[1], "pac2bwt") == 0) ret = bwa_pac2bwt(argc-1, argv+1);
+	else if (strcmp(argv[1], "pac2bwtgen") == 0) ret = bwt_bwtgen_main(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1);
+	else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1);
+	else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1);
+	else if (strcmp(argv[1], "sw") == 0) ret = bwa_stdsw(argc-1, argv+1);
+	else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1);
+	else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1);
+	else if (strcmp(argv[1], "pac2cspac") == 0) ret = bwa_pac2cspac(argc-1, argv+1);
+	else if (strcmp(argv[1], "stdsw") == 0) ret = bwa_stdsw(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+	else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+	else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1);
+	else {
+		fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
+		return 1;
+	}
+	err_fflush(stdout);
+	err_fclose(stdout);
+	if (ret == 0) {
+		fprintf(stderr, "[%s] Version: %s\n", __func__, PACKAGE_VERSION);
+		fprintf(stderr, "[%s] CMD:", __func__);
+		for (i = 0; i < argc; ++i)
+			fprintf(stderr, " %s", argv[i]);
+		fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - t_real, cputime());
+	}
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/main.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,30 @@
+#ifndef BWA_MAIN_H
+#define BWA_MAIN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	int bwa_fa2pac(int argc, char *argv[]);
+	int bwa_pac2cspac(int argc, char *argv[]);
+	int bwa_pac2bwt(int argc, char *argv[]);
+	int bwa_bwtupdate(int argc, char *argv[]);
+	int bwa_bwt2sa(int argc, char *argv[]);
+	int bwa_index(int argc, char *argv[]);
+	int bwa_aln(int argc, char *argv[]);
+	int bwt_bwtgen_main(int argc, char *argv[]);
+
+	int bwa_sai2sam_se(int argc, char *argv[]);
+	int bwa_sai2sam_pe(int argc, char *argv[]);
+
+	int bwa_stdsw(int argc, char *argv[]);
+
+	int bwa_bwtsw2(int argc, char *argv[]);
+
+	int main_fastmap(int argc, char *argv[]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/qualfa2fq.pl	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,27 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+
+die("Usage: qualfa2fq.pl <in.fasta> <in.qual>\n") if (@ARGV != 2);
+
+my ($fhs, $fhq, $q);
+open($fhs, ($ARGV[0] =~ /\.gz$/)? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die;
+open($fhq, ($ARGV[1] =~ /\.gz$/)? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die;
+
+$/ = ">"; <$fhs>; <$fhq>; $/ = "\n";
+while (<$fhs>) {
+  $q = <$fhq>;
+  print "\@$_";
+  $/ = ">";
+  $_ = <$fhs>; $q = <$fhq>;
+  chomp; chomp($q);
+  $q =~ s/\s*(\d+)\s*/chr($1+33)/eg;
+  print $_, "+\n";
+  for (my $i = 0; $i < length($q); $i += 60) {
+	print substr($q, $i, 60), "\n";
+  }
+  $/ = "\n";
+}
+
+close($fhs); close($fhq);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/simple_dp.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,162 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <zlib.h>
+#include <stdint.h>
+#include "stdaln.h"
+#include "utils.h"
+
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+typedef struct {
+	int l;
+	unsigned char *s;
+	char *n;
+} seq1_t;
+
+typedef struct {
+	int n_seqs, m_seqs;
+	seq1_t *seqs;
+} seqs_t;
+
+unsigned char aln_rev_table[256] = {
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','T','V','G', 'H','N','N','C', 'D','N','N','M', 'N','K','N','N',
+	'N','N','Y','S', 'A','N','B','W', 'X','R','N','N', 'N','N','N','N',
+	'N','t','v','g', 'h','N','N','c', 'd','N','N','m', 'N','k','N','N',
+	'N','N','y','s', 'a','N','b','w', 'x','r','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
+	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N'
+};
+
+static int g_is_global = 0, g_thres = 1, g_strand = 0, g_aa = 0;
+static AlnParam g_aln_param;
+
+static void revseq(int len, uint8_t *seq)
+{
+	int i;
+	for (i = 0; i < len>>1; ++i) {
+		uint8_t tmp = aln_rev_table[seq[len-1-i]];
+		seq[len-1-i] = aln_rev_table[seq[i]];
+		seq[i] = tmp;
+	}
+	if (len&1) seq[i] = aln_rev_table[seq[i]];
+}
+
+static seqs_t *load_seqs(const char *fn)
+{
+	seqs_t *s;
+	seq1_t *p;
+	gzFile fp;
+	int l;
+	kseq_t *seq;
+
+	fp = xzopen(fn, "r");
+	seq = kseq_init(fp);
+	s = (seqs_t*)calloc(1, sizeof(seqs_t));
+	s->m_seqs = 256;
+	s->seqs = (seq1_t*)calloc(s->m_seqs, sizeof(seq1_t));
+	while ((l = kseq_read(seq)) >= 0) {
+		if (s->n_seqs == s->m_seqs) {
+			s->m_seqs <<= 1;
+			s->seqs = (seq1_t*)realloc(s->seqs, s->m_seqs * sizeof(seq1_t));
+		}
+		p = s->seqs + (s->n_seqs++);
+		p->l = seq->seq.l;
+		p->s = (unsigned char*)malloc(p->l + 1);
+		memcpy(p->s, seq->seq.s, p->l);
+		p->s[p->l] = 0;
+		p->n = strdup((const char*)seq->name.s);
+	}
+	kseq_destroy(seq);
+	gzclose(fp);
+	fprintf(stderr, "[load_seqs] %d sequences are loaded.\n", s->n_seqs);
+	return s;
+}
+
+static void aln_1seq(const seqs_t *ss, const char *name, int l, const char *s, char strand)
+{
+	int i;
+	for (i = 0; i < ss->n_seqs; ++i) {
+		AlnAln *aa;
+		seq1_t *p = ss->seqs + i;
+		g_aln_param.band_width = l + p->l;
+		aa = aln_stdaln_aux(s, (const char*)p->s, &g_aln_param, g_is_global, g_thres, l, p->l);
+		if (aa->score >= g_thres || g_is_global) {
+			printf(">%s\t%d\t%d\t%s\t%c\t%d\t%d\t%d\t%d\t", p->n, aa->start1? aa->start1 : 1, aa->end1, name, strand,
+				   aa->start2? aa->start2 : 1, aa->end2, aa->score, aa->subo);
+			// NB: I put the short sequence as the first sequence in SW, an insertion to
+			// the reference becomes a deletion from the short sequence. Therefore, I use
+			// "MDI" here rather than "MID", and print ->out2 first rather than ->out1.
+			for (i = 0; i != aa->n_cigar; ++i)
+				printf("%d%c", aa->cigar32[i]>>4, "MDI"[aa->cigar32[i]&0xf]);
+			printf("\n%s\n%s\n%s\n", aa->out2, aa->outm, aa->out1);
+		}
+		aln_free_AlnAln(aa);
+	}
+}
+
+static void aln_seqs(const seqs_t *ss, const char *fn)
+{
+	gzFile fp;
+	kseq_t *seq;
+	int l;
+
+	fp = xzopen(fn, "r");
+	seq = kseq_init(fp);
+	while ((l = kseq_read(seq)) >= 0) {
+		if (g_strand&1) aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '+');
+		if (g_strand&2) {
+			revseq(l, (uint8_t*)seq->seq.s);
+			aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '-');
+		}
+	}
+	kseq_destroy(seq);
+	gzclose(fp);
+}
+
+int bwa_stdsw(int argc, char *argv[])
+{
+	int c;
+	seqs_t *ss;
+
+	while ((c = getopt(argc, argv, "gT:frp")) >= 0) {
+		switch (c) {
+		case 'g': g_is_global = 1; break;
+		case 'T': g_thres = atoi(optarg); break;
+		case 'f': g_strand |= 1; break;
+		case 'r': g_strand |= 2; break;
+		case 'p': g_aa = 1; break;
+		}
+	}
+	if (g_strand == 0) g_strand = 3;
+	if (g_aa) g_strand = 1;
+	if (optind + 1 >= argc) {
+		fprintf(stderr, "\nUsage:   bwa stdsw [options] <seq1.long.fa> <seq2.short.fa>\n\n");
+		fprintf(stderr, "Options: -T INT    minimum score [%d]\n", g_thres);
+		fprintf(stderr, "         -p        protein alignment (suppressing -r)\n");
+		fprintf(stderr, "         -f        forward strand only\n");
+		fprintf(stderr, "         -r        reverse strand only\n");
+		fprintf(stderr, "         -g        global alignment\n\n");
+		fprintf(stderr, "Note: This program is specifically designed for alignment between multiple short\n");
+		fprintf(stderr, "      sequences and ONE long sequence. It outputs the suboptimal score on the long\n");
+		fprintf(stderr, "      sequence.\n\n");
+		return 1;
+	}
+	g_aln_param = g_aa? aln_param_aa2aa : aln_param_blast;
+	g_aln_param.gap_end = 0;
+	ss = load_seqs(argv[optind]);
+	aln_seqs(ss, argv[optind+1]);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/solid2fastq.pl	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+
+# Author: lh3
+# Note: Ideally, this script should be written in C. It is a bit slow at present.
+# Also note that this script is different from the one contained in MAQ.
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+my %opts;
+my $version = '0.1.4';
+my $usage = qq{
+Usage: solid2fastq.pl <in.title> <out.prefix>
+
+Note: <in.title> is the string showed in the `# Title:' line of a
+      ".csfasta" read file. Then <in.title>F3.csfasta is read sequence
+      file and <in.title>F3_QV.qual is the quality file. If
+      <in.title>R3.csfasta is present, this script assumes reads are
+      paired; otherwise reads will be regarded as single-end.
+
+      The read name will be <out.prefix>:panel_x_y/[12] with `1' for R3
+      tag and `2' for F3. Usually you may want to use short <out.prefix>
+      to save diskspace. Long <out.prefix> also causes troubles to maq.
+
+};
+
+getopts('', \%opts);
+die($usage) if (@ARGV != 2);
+my ($title, $pre) = @ARGV;
+my (@fhr, @fhw);
+my @fn_suff = ('F3.csfasta', 'F3_QV.qual', 'R3.csfasta', 'R3_QV.qual');
+my $is_paired = (-f "$title$fn_suff[2]" || -f "$title$fn_suff[2].gz")? 1 : 0;
+if ($is_paired) { # paired end
+  for (0 .. 3) {
+	my $fn = "$title$fn_suff[$_]";
+	$fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
+	open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
+  }
+  open($fhw[0], "|gzip >$pre.read2.fastq.gz") || die; # this is NOT a typo
+  open($fhw[1], "|gzip >$pre.read1.fastq.gz") || die;
+  open($fhw[2], "|gzip >$pre.single.fastq.gz") || die;
+  my (@df, @dr);
+  @df = &read1(1); @dr = &read1(2);
+  while (@df && @dr) {
+	if ($df[0] eq $dr[0]) { # mate pair
+	  print {$fhw[0]} $df[1]; print {$fhw[1]} $dr[1];
+	  @df = &read1(1); @dr = &read1(2);
+	} else {
+	  if ($df[0] le $dr[0]) {
+		print {$fhw[2]} $df[1];
+		@df = &read1(1);
+	  } else {
+		print {$fhw[2]} $dr[1];
+		@dr = &read1(2);
+	  }
+	}
+  }
+  if (@df) {
+	print {$fhw[2]} $df[1];
+	while (@df = &read1(1, $fhr[0], $fhr[1])) {
+	  print {$fhw[2]} $df[1];
+	}
+  }
+  if (@dr) {
+	print {$fhw[2]} $dr[1];
+	while (@dr = &read1(2, $fhr[2], $fhr[3])) {
+	  print {$fhw[2]} $dr[1];
+	}
+  }
+  close($fhr[$_]) for (0 .. $#fhr);
+  close($fhw[$_]) for (0 .. $#fhw);
+} else { # single end
+  for (0 .. 1) {
+	my $fn = "$title$fn_suff[$_]";
+	$fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
+	open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
+  }
+  open($fhw[2], "|gzip >$pre.single.fastq.gz") || die;
+  my @df;
+  while (@df = &read1(1, $fhr[0], $fhr[1])) {
+	print {$fhw[2]} $df[1];
+  }
+  close($fhr[$_]) for (0 .. $#fhr);
+  close($fhw[2]);
+}
+
+sub read1 {
+  my $i = shift(@_);
+  my $j = ($i-1)<<1;
+  my ($key, $seq);
+  my ($fhs, $fhq) = ($fhr[$j], $fhr[$j|1]);
+  while (<$fhs>) {
+	my $t = <$fhq>;
+	if (/^>(\d+)_(\d+)_(\d+)_[FR]3/) {
+	  $key = sprintf("%.4d_%.4d_%.4d", $1, $2, $3); # this line could be improved on 64-bit machines
+	  die(qq/** unmatched read name: '$_' != '$_'\n/) unless ($_ eq $t);
+	  my $name = "$pre:$1_$2_$3/$i";
+	  $_ = substr(<$fhs>, 2);
+	  tr/0123./ACGTN/;
+	  my $s = $_;
+	  $_ = <$fhq>;
+	  s/-1\b/0/eg;
+	  s/^(\d+)\s*//;
+	  s/(\d+)\s*/chr($1+33)/eg;
+	  $seq = qq/\@$name\n$s+\n$_\n/;
+	  last;
+	}
+  }
+  return defined($seq)? ($key, $seq) : ();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/stdaln.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,1072 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, 2009, by Heng Li <lh3lh3@gmail.com>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include "stdaln.h"
+
+/* char -> 17 (=16+1) nucleotides */
+unsigned char aln_nt16_table[256] = {
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
+};
+char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";
+
+/* char -> 5 (=4+1) nucleotides */
+unsigned char aln_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+char *aln_nt4_rev_table = "AGCTN-";
+
+/* char -> 22 (=20+1+1) amino acids */
+unsigned char aln_aa_table[256] = {
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,20,21, 21,22 /*'-'*/,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21
+};
+char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";
+                       /* 01234567890123456789012 */
+
+/* translation table. They are useless in stdaln.c, but when you realize you need it, you need not write the table again. */
+unsigned char aln_trans_table_eu[66] = {
+	11,11, 2, 2,  1, 1,15,15, 16,16,16,16,  9,12, 9, 9,
+	 6, 6, 3, 3,  7, 7, 7, 7,  0, 0, 0, 0, 19,19,19,19,
+	 5, 5, 8, 8,  1, 1, 1, 1, 14,14,14,14, 10,10,10,10,
+	20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 22
+};
+char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";
+                              /* 01234567890123456789012345678901234567890123456789012345678901234 */
+int aln_sm_blosum62[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
+	-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
+	-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
+	-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
+	 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
+	-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
+	-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
+	-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
+	-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
+	-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
+	-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
+	-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
+	-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
+	-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
+	-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
+	 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
+	-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
+};
+
+int aln_sm_blosum45[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0,-5, 0,
+	-2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2,-5,-1,
+	-1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3,-5,-1,
+	-2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3,-5,-1,
+	-1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1,-5,-2,
+	-1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3,-5,-1,
+	-1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3,-5,-1,
+	 0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3,-5,-1,
+	-2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3,-5,-1,
+	-1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3,-5,-1,
+	-1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1,-5,-1,
+	-1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2,-5,-1,
+	-1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1,-5,-1,
+	-2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0,-5,-1,
+	-1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3,-5,-1,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1,-5, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0,-5, 0,
+	-2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3,-5,-2,
+	-2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1,-5,-1,
+	 0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5,-5,-1,
+	-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 1,-5,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0,-2,-1,-1,-5,-1
+};
+
+int aln_sm_nt[] = {
+/*	 X  A  G  R  C  M  S  V  T  W  K  D  Y  H  B  N */
+	-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+	-2, 2,-1, 1,-2, 1,-2, 0,-2, 1,-2, 0,-2, 0,-2, 0,
+	-2,-1, 2, 1,-2,-2, 1, 0,-2,-2, 1, 0,-2,-2, 0, 0,
+	-2, 1, 1, 1,-2,-1,-1, 0,-2,-1,-1, 0,-2, 0, 0, 0,
+	-2,-2,-2,-2, 2, 1, 1, 0,-1,-2,-2,-2, 1, 0, 0, 0,
+	-2, 1,-2,-1, 1, 1,-1, 0,-2,-1,-2, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1, 1,-1, 1, 0,-2,-2,-1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2,-1,-2,-2,-2, 2, 1, 1, 0, 1, 0, 0, 0,
+	-2, 1,-2,-1,-2,-1,-2, 0, 1, 1,-1, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1,-2,-2,-1, 0, 1,-1, 1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2, 1,-1,-1, 0, 1,-1,-1, 0, 1, 0, 0, 0,
+	-2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+int aln_sm_read[] = {
+/*	  X   A   G   R   C   M   S   V   T   W   K   D   Y   H   B   N  */
+	-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
+	-17,  2,-17,  1,-17,  1,-17,  0,-17,  1,-17,  0,-17,  0,-17,  0,
+	-17,-17,  2,  1,-17,-17,  1,  0,-17,-17,  1,  0,-17,-17,  0,  0,
+	-17,  1,  1,  1,-17,-17,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,-17,-17,  2,  1,  1,  0,-17,-17,-17,-17,  1,  0,  0,  0,
+	-17,  1,-17,-17,  1,  1,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,  1,-17,  1,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,-17,-17,-17,-17,  2,  1,  1,  0,  1,  0,  0,  0,
+	-17,  1,-17,-17,-17,-17,-17,  0,  1,  1,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,-17,-17,-17,  0,  1,-17,  1,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,  1,-17,-17,  0,  1,-17,-17,  0,  1,  0,  0,  0,
+	-17,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+};
+
+int aln_sm_hs[] = {
+/*     A    G    C    T    N */
+	  91, -31,-114,-123, -44,
+	 -31, 100,-125,-114, -42,
+	-123,-125, 100, -31, -42,
+	-114,-114, -31,  91, -42,
+	 -44, -42, -42, -42, -43
+};
+
+int aln_sm_maq[] = {
+	11, -19, -19, -19, -13,
+	-19, 11, -19, -19, -13,
+	-19, -19, 11, -19, -13,
+	-19, -19, -19, 11, -13,
+	-13, -13, -13, -13, -13
+};
+
+int aln_sm_blast[] = {
+	1, -3, -3, -3, -2,
+	-3, 1, -3, -3, -2,
+	-3, -3, 1, -3, -2,
+	-3, -3, -3, 1, -2,
+	-2, -2, -2, -2, -2
+};
+
+/********************/
+/* START OF align.c */
+/********************/
+
+AlnParam aln_param_blast   = {  5,  2,  2, aln_sm_blast, 5, 50 };
+AlnParam aln_param_bwa     = { 26,  9,  5, aln_sm_maq, 5, 50 };
+AlnParam aln_param_nt2nt   = {  8,  2,  2, aln_sm_nt, 16, 75 };
+AlnParam aln_param_rd2rd   = {  1, 19, 19, aln_sm_read, 16, 75 };
+AlnParam aln_param_aa2aa   = { 10,  2,  2, aln_sm_blosum62, 22, 50 };
+
+AlnAln *aln_init_AlnAln()
+{
+	AlnAln *aa;
+	aa = (AlnAln*)malloc(sizeof(AlnAln));
+	aa->path = 0;
+	aa->out1 = aa->out2 = aa->outm = 0;
+	aa->path_len = 0;
+	return aa;
+}
+void aln_free_AlnAln(AlnAln *aa)
+{
+	free(aa->path); free(aa->cigar32);
+	free(aa->out1); free(aa->out2); free(aa->outm);
+	free(aa);
+}
+
+/***************************/
+/* START OF common_align.c */
+/***************************/
+
+#define LOCAL_OVERFLOW_THRESHOLD 32000
+#define LOCAL_OVERFLOW_REDUCE 16000
+#define NT_LOCAL_SCORE int
+#define NT_LOCAL_SHIFT 16
+#define NT_LOCAL_MASK 0xffff
+
+#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;
+
+#define set_M(MM, cur, p, sc)							\
+{														\
+	if ((p)->M >= (p)->I) {								\
+		if ((p)->M >= (p)->D) {							\
+			(MM) = (p)->M + (sc); (cur)->Mt = FROM_M;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	} else {											\
+		if ((p)->I > (p)->D) {							\
+			(MM) = (p)->I + (sc); (cur)->Mt = FROM_I;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	}													\
+}
+#define set_I(II, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->I) {					\
+		(cur)->It = FROM_M;								\
+		(II) = (p)->M - gap_open - gap_ext;				\
+	} else {											\
+		(cur)->It = FROM_I;								\
+		(II) = (p)->I - gap_ext;						\
+	}													\
+}
+#define set_end_I(II, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M - gap_open > (p)->I) {				\
+			(cur)->It = FROM_M;							\
+			(II) = (p)->M - gap_open - gap_end;			\
+		} else {										\
+			(cur)->It = FROM_I;							\
+			(II) = (p)->I - gap_end;					\
+		}												\
+	} else set_I(II, cur, p);							\
+}
+#define set_D(DD, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->D) {					\
+		(cur)->Dt = FROM_M;								\
+		(DD) = (p)->M - gap_open - gap_ext;				\
+	} else {											\
+		(cur)->Dt = FROM_D;								\
+		(DD) = (p)->D - gap_ext;						\
+	}													\
+}
+#define set_end_D(DD, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M - gap_open > (p)->D) {				\
+			(cur)->Dt = FROM_M;							\
+			(DD) = (p)->M - gap_open - gap_end;			\
+		} else {										\
+			(cur)->Dt = FROM_D;							\
+			(DD) = (p)->D - gap_end;					\
+		}												\
+	} else set_D(DD, cur, p);							\
+}
+
+typedef struct
+{
+	unsigned char Mt:3, It:2, Dt:2;
+} dpcell_t;
+
+typedef struct
+{
+	int M, I, D;
+} dpscore_t;
+
+/* build score profile for accelerating alignment, in theory */
+void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)
+{
+	int *tmp, *tmp2, i, k;
+	for (i = 0; i != row; ++i) {
+		tmp = score_matrix + i * row;
+		tmp2 = s_array[i];
+		for (k = 0; k != len; ++k)
+			tmp2[k] = tmp[seq[k]];
+	}
+}
+/***************************
+ * banded global alignment *
+ ***************************/
+int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+					path_t *path, int *path_len)
+{
+	register int i, j;
+	dpcell_t **dpcell, *q;
+	dpscore_t *curr, *last, *s;
+	path_t *p;
+	int b1, b2, tmp_end;
+	int *mat, end, max;
+	unsigned char type, ctype;
+
+	int gap_open, gap_ext, gap_end, b;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+	gap_end = ap->gap_end;
+	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+	
+	if (len1 == 0 || len2 == 0) {
+		*path_len = 0;
+		return 0;
+	}
+	/* calculate b1 and b2 */
+	if (len1 > len2) {
+		b1 = len1 - len2 + b;
+		b2 = b;
+	} else {
+		b1 = b;
+		b2 = len2 - len1 + b;
+	}
+	if (b1 > len1) b1 = len1;
+	if (b2 > len2) b2 = len2;
+	--seq1; --seq2;
+
+	/* allocate memory */
+	end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);
+	dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1));
+	for (j = 0; j <= len2; ++j)
+		dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end);
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] -= j - b2;
+	curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
+	last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
+	
+	/* set first row */
+	SET_INF(*curr); curr->M = 0;
+	for (i = 1, s = curr + 1; i < b1; ++i, ++s) {
+		SET_INF(*s);
+		set_end_D(s->D, dpcell[0] + i, s - 1);
+	}
+	s = curr; curr = last; last = s;
+
+	/* core dynamic programming, part 1 */
+	tmp_end = (b2 < len2)? b2 : len2 - 1;
+	for (j = 1; j <= tmp_end; ++j) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+	/* last row for part 1, use set_end_D() instead of set_D() */
+	if (j == len2 && b2 != len2 - 1) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_end_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+		++j;
+	}
+
+	/* core dynamic programming, part 2 */
+	for (; j <= len2 - b2 + 1; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		end = j + b1 - 1;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+
+	/* core dynamic programming, part 3 */
+	for (; j < len2; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+	/* last row */
+	if (j == len2) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_end_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+
+	/* backtrace */
+	i = len1; j = len2;
+	q = dpcell[j] + i;
+	s = last + len1;
+	max = s->M; type = q->Mt; ctype = FROM_M;
+	if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }
+	if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }
+
+	p = path;
+	p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */
+	++p;
+	do {
+		switch (ctype) {
+			case FROM_M: --i; --j; break;
+			case FROM_I: --j; break;
+			case FROM_D: --i; break;
+		}
+		q = dpcell[j] + i;
+		ctype = type;
+		switch (type) {
+			case FROM_M: type = q->Mt; break;
+			case FROM_I: type = q->It; break;
+			case FROM_D: type = q->Dt; break;
+		}
+		p->ctype = ctype; p->i = i; p->j = j;
+		++p;
+	} while (i || j);
+	*path_len = p - path - 1;
+
+	/* free memory */
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] += j - b2;
+	for (j = 0; j <= len2; ++j)
+		free(dpcell[j]);
+	free(dpcell);
+	free(curr); free(last);
+	
+	return max;
+}
+/*************************************************
+ * local alignment combined with banded strategy *
+ *************************************************/
+int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+				   path_t *path, int *path_len, int _thres, int *_subo)
+{
+	register NT_LOCAL_SCORE *s;
+	register int i;
+	int q, r, qr, tmp_len, qr_shift;
+	int **s_array, *score_array;
+	int e, f;
+	int is_overflow, of_base;
+	NT_LOCAL_SCORE *eh, curr_h, last_h, curr_last_h;
+	int j, start_i, start_j, end_i, end_j;
+	path_t *p;
+	int score_f, score_r, score_g;
+	int start, end, max_score;
+	int thres, *suba, *ss;
+
+	int gap_open, gap_ext, b;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+	thres = _thres > 0? _thres : -_thres;
+
+	if (len1 == 0 || len2 == 0) return -1;
+
+	/* allocate memory */
+	suba = (int*)malloc(sizeof(int) * (len2 + 1));
+	eh = (NT_LOCAL_SCORE*)malloc(sizeof(NT_LOCAL_SCORE) * (len1 + 1));
+	s_array = (int**)malloc(sizeof(int*) * N_MATRIX_ROW);
+	for (i = 0; i != N_MATRIX_ROW; ++i)
+		s_array[i] = (int*)malloc(sizeof(int) * len1);
+	/* initialization */
+	aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
+	q = gap_open;
+	r = gap_ext;
+	qr = q + r;
+	qr_shift = (qr+1) << NT_LOCAL_SHIFT;
+	tmp_len = len1 + 1;
+	start_i = start_j = end_i = end_j = 0;
+	for (i = 0, max_score = 0; i != N_MATRIX_ROW * N_MATRIX_ROW; ++i)
+		if (max_score < score_matrix[i]) max_score = score_matrix[i];
+	/* convert the coordinate */
+	--seq1; --seq2;
+	for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
+
+	/* forward dynamic programming */
+	for (i = 0, s = eh; i != tmp_len; ++i, ++s) *s = 0;
+	score_f = 0;
+	is_overflow = of_base = 0;
+	suba[0] = 0;
+	for (j = 1, ss = suba + 1; j <= len2; ++j, ++ss) {
+		int subo = 0;
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			/* If LOCAL_OVERFLOW_REDUCE is too small, optimal alignment might be missed.
+			 * If it is too large, this block will be excuted frequently and therefore
+			 * slow down the whole program.
+			 * Acually, smaller LOCAL_OVERFLOW_REDUCE might also help to reduce the
+			 * number of assignments because it sets some cells to zero when overflow
+			 * happens. */
+			int tmp, tmp2;
+			score_f -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = 1, s = eh; i <= tmp_len; ++i, ++s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = 1, s = eh; i != tmp_len; ++i, ++s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > 0) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			if (*(s+1) >= qr_shift) { /* initialize e */
+				curr_last_h = *(s+1) >> NT_LOCAL_SHIFT;
+				e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+				if (curr_h < e) curr_h = e;
+				*s = (last_h << NT_LOCAL_SHIFT) | e;
+			} else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
+			last_h = curr_h;
+			if (subo < curr_h) subo = curr_h;
+			if (score_f < curr_h) {
+				score_f = curr_h; end_i = i; end_j = j;
+				if (score_f > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+		*ss = subo + of_base;
+	}
+	score_f += of_base;
+
+	if (score_f < thres) { /* no matching residue at all, 090218 */
+		if (path_len) *path_len = 0;
+		goto end_func;
+	}
+	if (path == 0) goto end_func; /* skip path-filling */
+
+	/* reverse dynamic programming */
+	for (i = end_i, s = eh + end_i; i >= 0; --i, --s) *s = 0;
+	if (end_i == 0 || end_j == 0) goto end_func; /* no local match */
+	score_r = score_matrix[seq1[end_i] * N_MATRIX_ROW + seq2[end_j]];
+	is_overflow = of_base = 0;
+	start_i = end_i; start_j = end_j;
+	eh[end_i] = ((NT_LOCAL_SCORE)(qr + score_r)) << NT_LOCAL_SHIFT; /* in order to initialize f and e, 040408 */
+	start = end_i - 1;
+	end = end_i - 3;
+	if (end <= 0) end = 0;
+
+	/* second pass DP can be done in a band, speed will thus be enhanced */
+	for (j = end_j - 1; j != 0; --j) {
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			int tmp, tmp2;
+			score_r -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = start, s = eh + start + 1; i >= end; --i, --s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = start, s = eh + start + 1; i != end; --i, --s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > 0) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			curr_last_h = *(s-1) >> NT_LOCAL_SHIFT;
+			e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+			if (e < 0) e = 0;
+			if (curr_h < e) curr_h = e;
+			*s = (last_h << NT_LOCAL_SHIFT) | e;
+			last_h = curr_h;
+			if (score_r < curr_h) {
+				score_r = curr_h; start_i = i; start_j = j;
+				if (score_r + of_base - qr == score_f) {
+					j = 1; break;
+				}
+				if (score_r > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+		/* recalculate start and end, the boundaries of the band */
+		if ((eh[start] >> NT_LOCAL_SHIFT) <= qr) --start;
+		if (start <= 0) start = 0;
+		end = start_i - (start_j - j) - (score_r + of_base + (start_j - j) * max_score) / r - 1;
+		if (end <= 0) end = 0;
+	}
+
+	if (_subo) {
+		int tmp2 = 0, tmp = (int)(start_j - .33 * (end_j - start_j) + .499);
+		for (j = 1; j <= tmp; ++j)
+			if (tmp2 < suba[j]) tmp2 = suba[j];
+		tmp = (int)(end_j + .33 * (end_j - start_j) + .499);
+		for (j = tmp; j <= len2; ++j)
+			if (tmp2 < suba[j]) tmp2 = suba[j];
+		*_subo = tmp2;
+	}
+
+	if (path_len == 0) {
+		path[0].i = start_i; path[0].j = start_j;
+		path[1].i = end_i; path[1].j = end_j;
+		goto end_func;
+	}
+
+	score_r += of_base;
+	score_r -= qr;
+
+#ifdef DEBUG
+	/* this seems not a bug */
+	if (score_f != score_r)
+		fprintf(stderr, "[aln_local_core] unknown flaw occurs: score_f(%d) != score_r(%d)\n", score_f, score_r);
+#endif
+
+	if (_thres > 0) { /* call global alignment to fill the path */
+		score_g = 0;
+		j = (end_i - start_i > end_j - start_j)? end_i - start_i : end_j - start_j;
+		++j; /* j is the maximum band_width */
+		for (i = ap->band_width;; i <<= 1) {
+			AlnParam ap_real = *ap;
+			ap_real.gap_end = -1;
+			ap_real.band_width = i;
+			score_g = aln_global_core(seq1 + start_i, end_i - start_i + 1, seq2 + start_j,
+									  end_j - start_j + 1, &ap_real, path, path_len);
+			if (score_g == score_r || score_f == score_g) break;
+			if (i > j) break;
+		}
+		if (score_r > score_g && score_f > score_g) {
+			fprintf(stderr, "[aln_local_core] Potential bug: (%d,%d) > %d\n", score_f, score_r, score_g);
+			score_f = score_r = -1;
+		} else score_f = score_g;
+
+		/* convert coordinate */
+		for (p = path + *path_len - 1; p >= path; --p) {
+			p->i += start_i - 1;
+			p->j += start_j - 1;
+		}
+	} else { /* just store the start and end */
+		*path_len = 2;
+		path[1].i = start_i; path[1].j = start_j;
+		path->i = end_i; path->j = end_j;
+	}
+
+end_func:
+	/* free */
+	free(eh); free(suba);
+	for (i = 0; i != N_MATRIX_ROW; ++i) {
+		++s_array[i];
+		free(s_array[i]);
+	}
+	free(s_array);
+	return score_f;
+}
+AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
+					   int type, int thres, int len1, int len2)
+{
+	unsigned char *seq11, *seq22;
+	int score;
+	int i, j, l;
+	path_t *p;
+	char *out1, *out2, *outm;
+	AlnAln *aa;
+
+	if (len1 < 0) len1 = strlen(seq1);
+	if (len2 < 0) len2 = strlen(seq2);
+
+	aa = aln_init_AlnAln();
+	seq11 = (unsigned char*)malloc(sizeof(unsigned char) * len1);
+	seq22 = (unsigned char*)malloc(sizeof(unsigned char) * len2);
+	aa->path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 1));
+
+	if (ap->row < 10) { /* 4-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt4_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt4_table[(int)seq2[j]];
+	} else if (ap->row < 20) { /* 16-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt16_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt16_table[(int)seq2[j]];
+	} else { /* amino acids */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_aa_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_aa_table[(int)seq2[j]];
+	}
+	
+	if (type == ALN_TYPE_GLOBAL) score = aln_global_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);
+	else if (type == ALN_TYPE_LOCAL) score = aln_local_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, thres, &aa->subo);
+	else if (type == ALN_TYPE_EXTEND)  score = aln_extend_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, 1, 0);
+	else {
+		free(seq11); free(seq22); free(aa->path);
+		aln_free_AlnAln(aa);
+		return 0;
+	}
+	aa->score = score;
+
+	if (thres > 0) {
+		out1 = aa->out1 = (char*)malloc(sizeof(char) * (aa->path_len + 1));
+		out2 = aa->out2 = (char*)malloc(sizeof(char) * (aa->path_len + 1));
+		outm = aa->outm = (char*)malloc(sizeof(char) * (aa->path_len + 1));
+
+		--seq1; --seq2;
+		--seq11; --seq22;
+
+		p = aa->path + aa->path_len - 1;
+
+		for (l = 0; p >= aa->path; --p, ++l) {
+			switch (p->ctype) {
+			case FROM_M: out1[l] = seq1[p->i]; out2[l] = seq2[p->j];
+				outm[l] = (seq11[p->i] == seq22[p->j] && seq11[p->i] != ap->row)? '|' : ' ';
+				break;
+			case FROM_I: out1[l] = '-'; out2[l] = seq2[p->j]; outm[l] = ' '; break;
+			case FROM_D: out1[l] = seq1[p->i]; out2[l] = '-'; outm[l] = ' '; break;
+			}
+		}
+		out1[l] = out2[l] = outm[l] = '\0';
+		++seq11; ++seq22;
+	}
+
+	free(seq11);
+	free(seq22);
+
+	p = aa->path + aa->path_len - 1;
+	aa->start1 = p->i? p->i : 1;
+	aa->end1 = aa->path->i;
+	aa->start2 = p->j? p->j : 1;
+	aa->end2 = aa->path->j;
+	aa->cigar32 = aln_path2cigar32(aa->path, aa->path_len, &aa->n_cigar);
+
+	return aa;
+}
+AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int thres)
+{
+	return aln_stdaln_aux(seq1, seq2, ap, type, thres, -1, -1);
+}
+
+/* for backward compatibility */
+uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
+{
+	uint32_t *cigar32;
+	uint16_t *cigar;
+	int i;
+	cigar32 = aln_path2cigar32(path, path_len, n_cigar);
+	cigar = (uint16_t*)cigar32;
+	for (i = 0; i < *n_cigar; ++i)
+		cigar[i] = (cigar32[i]&0xf)<<14 | (cigar32[i]>>4&0x3fff);
+	return cigar;
+}
+
+/* newly added functions (2009-07-21) */
+
+int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+					path_t *path, int *path_len, int G0, uint8_t *_mem)
+{
+	int q, r, qr, tmp_len;
+	int32_t **s_array, *score_array;
+	int is_overflow, of_base;
+	uint32_t *eh;
+	int i, j, end_i, end_j;
+	int score, start, end;
+	int *score_matrix, N_MATRIX_ROW;
+	uint8_t *mem, *_p;
+
+	/* initialize some align-related parameters. just for compatibility */
+	q = ap->gap_open;
+	r = ap->gap_ext;
+	qr = q + r;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+
+	if (len1 == 0 || len2 == 0) return -1;
+
+	/* allocate memory */
+	mem = _mem? _mem : calloc((len1 + 2) * (N_MATRIX_ROW + 1), 4);
+	_p = mem;
+	eh = (uint32_t*)_p, _p += 4 * (len1 + 2);
+	s_array = calloc(N_MATRIX_ROW, sizeof(void*));
+	for (i = 0; i != N_MATRIX_ROW; ++i)
+		s_array[i] = (int32_t*)_p, _p += 4 * len1;
+	/* initialization */
+	aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
+	tmp_len = len1 + 1;
+	start = 1; end = 2;
+	end_i = end_j = 0;
+	score = 0;
+	is_overflow = of_base = 0;
+	/* convert the coordinate */
+	--seq1; --seq2;
+	for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
+
+	/* dynamic programming */
+	memset(eh, 0, 4 * (len1 + 2));
+	eh[1] = (uint32_t)G0<<16;
+	for (j = 1; j <= len2; ++j) {
+		int _start, _end;
+		int h1 = 0, f = 0;
+		score_array = s_array[seq2[j]];
+		/* set start and end */
+		_start = j - ap->band_width;
+		if (_start < 1) _start = 1;
+		if (_start > start) start = _start;
+		_end = j + ap->band_width;
+		if (_end > len1 + 1) _end = len1 + 1;
+		if (_end < end) end = _end;
+		if (start == end) break;
+		/* adjust eh[] array if overflow occurs. */
+		if (is_overflow) {
+			int tmp, tmp2;
+			score -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = start; i <= end; ++i) {
+				uint32_t *s = &eh[i];
+				tmp = *s >> 16; tmp2 = *s & 0xffff;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << 16) | tmp2;
+			}
+		}
+		_start = _end = 0;
+		/* the inner loop */
+		for (i = start; i < end; ++i) {
+			/* At the beginning of each cycle:
+			     eh[i] -> h[j-1,i-1]<<16 | e[j,i]
+				 f     -> f[j,i]
+				 h1    -> h[j,i-1]
+			*/
+			uint32_t *s = &eh[i];
+			int h = (int)(*s >> 16);
+			int e = *s & 0xffff; /* this is e[j,i] */
+			*s = (uint32_t)h1 << 16; /* eh[i] now stores h[j,i-1]<<16 */
+			h += h? score_array[i] : 0; /* this is left_core() specific */
+			/* calculate h[j,i]; don't need to test 0, as {e,f}>=0 */
+			h = h > e? h : e;
+			h = h > f? h : f; /* h now is h[j,i] */
+			h1 = h;
+			if (h > 0) {
+				if (_start == 0) _start = i;
+				_end = i;
+				if (score < h) {
+					score = h; end_i = i; end_j = j;
+					if (score > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+				}
+			}
+			/* calculate e[j+1,i] and f[j,i+1] */
+			h -= qr;
+			h = h > 0? h : 0;
+			e -= r;
+			e = e > h? e : h;
+			f -= r;
+			f = f > h? f : h;
+			*s |= e;
+		}			
+		eh[end] = h1 << 16;
+		/* recalculate start and end, the boundaries of the band */
+		if (_end <= 0) break; /* no cell in this row has a positive score */
+		start = _start;
+		end = _end + 3;
+	}
+
+	score += of_base - 1;
+	if (score <= 0) {
+		if (path_len) *path_len = 0;
+		goto end_left_func;
+	}
+
+	if (path == 0) goto end_left_func;
+
+	if (path_len == 0) {
+		path[0].i = end_i; path[0].j = end_j;
+		goto end_left_func;
+	}
+
+	{ /* call global alignment to fill the path */
+		int score_g = 0;
+		j = (end_i - 1 > end_j - 1)? end_i - 1 : end_j - 1;
+		++j; /* j is the maximum band_width */
+		for (i = ap->band_width;; i <<= 1) {
+			AlnParam ap_real = *ap;
+			ap_real.gap_end = -1;
+			ap_real.band_width = i;
+			score_g = aln_global_core(seq1 + 1, end_i, seq2 + 1, end_j, &ap_real, path, path_len);
+			if (score == score_g) break;
+			if (i > j) break;
+		}
+		if (score > score_g)
+			fprintf(stderr, "[aln_left_core] no suitable bandwidth: %d < %d\n", score_g, score);
+		score = score_g;
+	}
+
+end_left_func:
+	/* free */
+	free(s_array);
+	if (!_mem) free(mem);
+	return score;
+}
+
+uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar)
+{
+	int i, n;
+	uint32_t *cigar;
+	unsigned char last_type;
+
+	if (path_len == 0 || path == 0) {
+		*n_cigar = 0;
+		return 0;
+	}
+
+	last_type = path->ctype;
+	for (i = n = 1; i < path_len; ++i) {
+		if (last_type != path[i].ctype) ++n;
+		last_type = path[i].ctype;
+	}
+	*n_cigar = n;
+	cigar = (uint32_t*)malloc(*n_cigar * 4);
+
+	cigar[0] = 1u << 4 | path[path_len-1].ctype;
+	last_type = path[path_len-1].ctype;
+	for (i = path_len - 2, n = 0; i >= 0; --i) {
+		if (path[i].ctype == last_type) cigar[n] += 1u << 4;
+		else {
+			cigar[++n] = 1u << 4 | path[i].ctype;
+			last_type = path[i].ctype;
+		}
+	}
+
+	return cigar;
+}
+
+#ifdef STDALN_MAIN
+int main()
+{
+	AlnAln *aln_local, *aln_global, *aln_left;
+	int i;
+
+	aln_local  = aln_stdaln("CGTGCGATGCactgCATACGGCTCGCCTAGATCA", "AAGGGATGCTCTGCATCgCTCGGCTAGCTGT", &aln_param_blast, 0, 1);
+	aln_global = aln_stdaln("CGTGCGATGCactgCATACGGCTCGCCTAGATCA", "AAGGGATGCTCTGCATCGgCTCGGCTAGCTGT", &aln_param_blast, 1, 1);
+//	aln_left   = aln_stdaln(     "GATGCACTGCATACGGCTCGCCTAGATCA",     "GATGCTCTGCATCGgCTCGGCTAGCTGT", &aln_param_blast, 2, 1);
+	aln_left   = aln_stdaln("CACCTTCGACTCACGTCTCATTCTCGGAGTCGAGTGGACGGTCCCTCATACACGAACAGGTTC",
+							"CACCTTCGACTTTCACCTCTCATTCTCGGACTCGAGTGGACGGTCCCTCATCCAAGAACAGGGTCTGTGAAA", &aln_param_blast, 2, 1);
+
+	printf(">%d,%d\t%d,%d\n", aln_local->start1, aln_local->end1, aln_local->start2, aln_local->end2);
+	printf("%s\n%s\n%s\n", aln_local->out1, aln_local->outm, aln_local->out2);
+
+	printf(">%d,%d\t%d,%d\t", aln_global->start1, aln_global->end1, aln_global->start2, aln_global->end2);
+	for (i = 0; i != aln_global->n_cigar; ++i)
+		printf("%d%c", aln_global->cigar32[i]>>4, "MID"[aln_global->cigar32[i]&0xf]);
+	printf("\n%s\n%s\n%s\n", aln_global->out1, aln_global->outm, aln_global->out2);
+
+	printf(">%d\t%d,%d\t%d,%d\t", aln_left->score, aln_left->start1, aln_left->end1, aln_left->start2, aln_left->end2);
+	for (i = 0; i != aln_left->n_cigar; ++i)
+		printf("%d%c", aln_left->cigar32[i]>>4, "MID"[aln_left->cigar32[i]&0xf]);
+	printf("\n%s\n%s\n%s\n", aln_left->out1, aln_left->outm, aln_left->out2);
+
+	aln_free_AlnAln(aln_local);
+	aln_free_AlnAln(aln_global);
+	aln_free_AlnAln(aln_left);
+	return 0;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/stdaln.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,162 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  2009-07-23, 0.10.0
+
+  - Use 32-bit to store CIGAR
+
+  - Report suboptimal aligments
+
+  - Implemented half-fixed-half-open DP
+
+  2009-04-26, 0.9.10
+
+  - Allow to set a threshold for local alignment
+
+  2009-02-18, 0.9.9
+
+  - Fixed a bug when no residue matches
+
+  2008-08-04, 0.9.8
+
+  - Fixed the wrong declaration of aln_stdaln_aux()
+
+  - Avoid 0 coordinate for global alignment
+
+  2008-08-01, 0.9.7
+
+  - Change gap_end penalty to 5 in aln_param_bwa
+
+  - Add function to convert path_t to the CIGAR format
+
+  2008-08-01, 0.9.6
+
+  - The first gap now costs (gap_open+gap_ext), instead of
+    gap_open. Scoring systems are modified accordingly.
+
+  - Gap end is now correctly handled. Previously it is not correct.
+
+  - Change license to MIT.
+
+ */
+
+#ifndef LH3_STDALN_H_
+#define LH3_STDALN_H_
+
+
+#define STDALN_VERSION 0.11.0
+
+#include <stdint.h>
+
+#define FROM_M 0
+#define FROM_I 1
+#define FROM_D 2
+#define FROM_S 3
+
+#define ALN_TYPE_LOCAL  0
+#define ALN_TYPE_GLOBAL 1
+#define ALN_TYPE_EXTEND 2
+
+/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
+#define MINOR_INF -1073741823
+
+typedef struct
+{
+	int gap_open;
+	int gap_ext;
+	int gap_end;
+
+	int *matrix;
+	int row;
+	int band_width;
+} AlnParam;
+
+typedef struct
+{
+	int i, j;
+	unsigned char ctype;
+} path_t;
+
+typedef struct
+{
+	path_t *path; /* for advanced users... :-) */
+	int path_len; /* for advanced users... :-) */
+	int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
+	int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
+	int score, subo; /* score */
+
+	char *out1, *out2; /* print them, and then you will know */
+	char *outm;
+
+	int n_cigar;
+	uint32_t *cigar32;
+} AlnAln;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
+						   int type, int do_align, int len1, int len2);
+	AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
+	void aln_free_AlnAln(AlnAln *aa);
+
+	int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+						path_t *path, int *path_len);
+	int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+					   path_t *path, int *path_len, int _thres, int *_subo);
+	int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+						path_t *path, int *path_len, int G0, uint8_t *_mem);
+	uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
+	uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
+
+#ifdef __cplusplus
+}
+#endif
+
+/********************
+ * global variables *
+ ********************/
+
+extern AlnParam aln_param_bwa;   /* = { 37,  9,  0, aln_sm_maq, 5, 50 }; */
+extern AlnParam aln_param_blast; /* = {  5,  2,  2, aln_sm_blast, 5, 50 }; */
+extern AlnParam aln_param_nt2nt; /* = { 10,  2,  2, aln_sm_nt, 16, 75 }; */
+extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
+extern AlnParam aln_param_rd2rd; /* = { 12,  2,  2, aln_sm_blosum62, 22, 50 }; */
+
+/* common nucleotide score matrix for 16 bases */
+extern int           aln_sm_nt[], aln_sm_bwa[];
+
+/* BLOSUM62 and BLOSUM45 */
+extern int           aln_sm_blosum62[], aln_sm_blosum45[];
+
+/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
+extern int           aln_sm_read[];
+
+/* human-mouse score matrix for 4 bases */
+extern int           aln_sm_hs[];
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/utils.c	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,164 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <errno.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include "utils.h"
+
+FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
+{
+	FILE *fp = 0;
+	if (strcmp(fn, "-") == 0)
+		return (strstr(mode, "r"))? stdin : stdout;
+	if ((fp = fopen(fn, mode)) == 0) {
+		fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
+		abort();
+	}
+	return fp;
+}
+FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
+{
+	if (freopen(fn, mode, fp) == 0) {
+		fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
+		perror(NULL);
+		fprintf(stderr, "Abort!\n");
+		abort();
+	}
+	return fp;
+}
+gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
+{
+	gzFile fp;
+	if (strcmp(fn, "-") == 0)
+		return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
+	if ((fp = gzopen(fn, mode)) == 0) {
+		fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
+		abort();
+	}
+	return fp;
+}
+void err_fatal(const char *header, const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	fprintf(stderr, "[%s] ", header);
+	vfprintf(stderr, fmt, args);
+	fprintf(stderr, " Abort!\n");
+	va_end(args);
+	abort();
+}
+
+void err_fatal_simple_core(const char *func, const char *msg)
+{
+	fprintf(stderr, "[%s] %s Abort!\n", func, msg);
+	abort();
+}
+
+size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+    size_t ret = fwrite(ptr, size, nmemb, stream);
+    if (ret != nmemb) 
+    {
+        err_fatal_simple_core("fwrite", strerror(errno));
+    }
+    return ret;
+}
+
+int err_printf(const char *format, ...) 
+{
+    va_list arg;
+    int done;
+
+    va_start(arg, format);
+    done = vfprintf(stdout, format, arg);
+    int saveErrno = errno;
+    va_end(arg);
+
+    if (done < 0) 
+    {
+        err_fatal_simple_core("vfprintf(stdout)", strerror(saveErrno));
+    }
+    return done;
+}
+
+int err_fprintf(FILE *stream, const char *format, ...) 
+{
+    va_list arg;
+    int done;
+
+    va_start(arg, format);
+    done = vfprintf(stream, format, arg);
+    int saveErrno = errno;
+    va_end(arg);
+
+    if (done < 0) 
+    {
+        err_fatal_simple_core("vfprintf", strerror(saveErrno));
+    }
+    return done;
+}
+
+int err_fflush(FILE *stream) 
+{
+    int ret = fflush(stream);
+    if (ret != 0) 
+    {
+        err_fatal_simple_core("fflush", strerror(errno));
+    }
+    return ret;
+}
+
+int err_fclose(FILE *stream) 
+{
+    int ret = fclose(stream);
+    if (ret != 0) 
+    {
+        err_fatal_simple_core("fclose", strerror(errno));
+    }
+    return ret;
+}
+
+double cputime()
+{
+	struct rusage r;
+	getrusage(RUSAGE_SELF, &r);
+	return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
+}
+
+double realtime()
+{
+	struct timeval tp;
+	struct timezone tzp;
+	gettimeofday(&tp, &tzp);
+	return tp.tv_sec + tp.tv_usec * 1e-6;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/utils.h	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,73 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+#ifndef LH3_UTILS_H
+#define LH3_UTILS_H
+
+#include <stdio.h>
+#include <zlib.h>
+
+#ifdef __GNUC__
+// Tell GCC to validate printf format string and args
+#define ATTRIBUTE(list) __attribute__ (list)
+#else
+#define ATTRIBUTE(list)
+#endif
+
+
+
+#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
+#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
+#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
+#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
+#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	void err_fatal(const char *header, const char *fmt, ...);
+	void err_fatal_simple_core(const char *func, const char *msg);
+	FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
+	FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
+	gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
+    size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
+	int err_fprintf(FILE *stream, const char *format, ...)
+        ATTRIBUTE((format(printf, 2, 3)));
+	int err_printf(const char *format, ...)
+        ATTRIBUTE((format(printf, 1, 2)));
+	int err_fflush(FILE *stream);
+	int err_fclose(FILE *stream);
+
+	double cputime();
+	double realtime();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bwa-0.6.2/xa2multi.pl	Fri Jul 18 07:55:14 2014 -0400
@@ -0,0 +1,25 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+
+while (<>) {
+	if (/\tXA:Z:(\S+)/) {
+		my $l = $1;
+		print;
+		my @t = split("\t");
+		while ($l =~ /([^,;]+),([-+]\d+),([^,]+),(\d+);/g) {
+			my $mchr = ($t[6] eq $1)? '=' : $t[6]; # FIXME: TLEN/ISIZE is not calculated!
+			my $seq = $t[9];
+			my $phred = $t[10];
+			# if alternative alignment has other orientation than primary, 
+			# then print the reverse (complement) of sequence and phred string
+			if ((($t[1]&0x10)>0) xor ($2<0)) {
+				$seq = reverse $seq;
+				$seq =~ tr/ACGTacgt/TGCAtgca/;
+				$phred = reverse $phred;
+			}
+			print(join("\t", $t[0], 0x100|($t[1]&0x6e9)|($2<0?0x10:0), $1, abs($2), 0, $3, @t[6..7], 0, $seq, $phred, "NM:i:$4"), "\n");
+		}
+	} else { print; }
+}