Repository 'neat_genreads'
hg clone https://toolshed.g2.bx.psu.edu/repos/thondeboer/neat_genreads

Changeset 0:6e75a84e9338 (2018-05-15)
Next changeset 1:362e0b0f7024 (2018-05-15)
Commit message:
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
added:
BIOPYTHON_LICENSE
LICENSE
README.md
computeFraglen.xml
computeGC.xml
docs/PE_SE_reads.png
docs/flow_new.png
genMutModel.xml
genReads.py
genSeqErrorModel.xml
mergeJobs.py
models/MutModel_BRCA_US_ICGC.p
models/MutModel_CLLE-ES_ICGC.p
models/MutModel_NA12878.p.gz
models/MutModel_SKCM-US_ICGC.p
models/README.md
models/errorModel_pacbio_toy.p
models/errorModel_toy.p
models/fraglenModel_toy.p
models/gcBias_toy.p
models/genReadsTumorTutorial.zip
neat_genreads.xml
new/chrMT-PE-VCF-BAM-Targeted.bam
new/chrMT-PE-VCF-BAM-Targeted.vcf
new/chrMT-PE-VCF-BAM-Targeted_read1.fq
new/chrMT-PE-VCF-BAM-Targeted_read2.fq
new/chrMT-PE-VCF-BAM-vcf.bam
new/chrMT-PE-VCF-BAM-vcf.vcf
new/chrMT-PE-VCF-BAM-vcf_read1.fq
new/chrMT-PE-VCF-BAM-vcf_read2.fq
new/chrMT-PE-VCF-BAM.bam
new/chrMT-PE-VCF-BAM.bam.bai
new/chrMT-PE-VCF-BAM.vcf
new/chrMT-PE-VCF-BAM_read1.fq
new/chrMT-PE-VCF-BAM_read2.fq
new/chrMT-PE_read1.fq
new/chrMT-PE_read2.fq
new/chrMT.fa.fai
new/chrMT_read1.fq
py/OutputFileWriter.py
py/OutputFileWriter.pyc
py/SequenceContainer.py
py/SequenceContainer.pyc
py/biopython_modified_bgzf.py
py/biopython_modified_bgzf.pyc
py/cigar.py
py/cigar.pyc
py/inputChecking.py
py/inputChecking.pyc
py/probability.py
py/probability.pyc
py/refFunc.py
py/refFunc.pyc
py/vcfFunc.py
py/vcfFunc.pyc
test-data/chrMT-PE-100reads_read1_genSeqErrorModel.p
test-data/chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p
test-data/chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p
test-data/chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p
test-data/chrMT-PE-VCF-BAM-Targeted.bam
test-data/chrMT-PE-VCF-BAM-Targeted.bam.bai
test-data/chrMT-PE-VCF-BAM-Targeted.vcf
test-data/chrMT-PE-VCF-BAM-Targeted_read1.fq
test-data/chrMT-PE-VCF-BAM-Targeted_read2.fq
test-data/chrMT-PE-VCF-BAM-computeGC.p
test-data/chrMT-PE-VCF-BAM-fraglen.p
test-data/chrMT-PE-VCF-BAM-genMutModel.p
test-data/chrMT-PE-VCF-BAM-gz.bam
test-data/chrMT-PE-VCF-BAM-gz.bam.bai
test-data/chrMT-PE-VCF-BAM-gz.vcf
test-data/chrMT-PE-VCF-BAM-gz_read1.fq.gz
test-data/chrMT-PE-VCF-BAM-gz_read2.fq.gz
test-data/chrMT-PE-VCF-BAM-panic.bam
test-data/chrMT-PE-VCF-BAM-panic.bam.bai
test-data/chrMT-PE-VCF-BAM-panic.vcf
test-data/chrMT-PE-VCF-BAM-panic_read1.fq
test-data/chrMT-PE-VCF-BAM-panic_read2.fq
test-data/chrMT-PE-VCF-BAM-vcf.bam
test-data/chrMT-PE-VCF-BAM-vcf.bam.bai
test-data/chrMT-PE-VCF-BAM-vcf.vcf
test-data/chrMT-PE-VCF-BAM-vcf_read1.fq
test-data/chrMT-PE-VCF-BAM-vcf_read2.fq
test-data/chrMT-PE-VCF-BAM.bam
test-data/chrMT-PE-VCF-BAM.bam.bai
test-data/chrMT-PE-VCF-BAM.genomecov
test-data/chrMT-PE-VCF-BAM.vcf
test-data/chrMT-PE-VCF-BAM_read1.fq
test-data/chrMT-PE-VCF-BAM_read2.fq
test-data/chrMT-PE_read1.fq
test-data/chrMT-PE_read1_genSeqErrorModel.p
test-data/chrMT-PE_read2.fq
test-data/chrMT-Targets.bed
test-data/chrMT.fa
test-data/chrMT.fa.fai
test-data/chrMT.fa.trinucCounts
test-data/chrMT_read1.fq
test-data/chrMT_read1_genSeqErrorModel.p
test-data/create_test-data
tool-data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
utilities/README.md
utilities/computeFraglen.py
utilities/computeGC.py
utilities/deprecated/FindNucleotideContextOnReference.healthy.pl
utilities/deprecated/FindNucleotideContextOnReference.pl
utilities/deprecated/README.md
utilities/genMutModel.py
utilities/genSeqErrorModel.py
utilities/plotMutModel.py
utilities/validateBam.py
utilities/validateFQ.py
utilities/vcf_compare_OLD.py
b
diff -r 000000000000 -r 6e75a84e9338 BIOPYTHON_LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BIOPYTHON_LICENSE Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,19 @@
+                 Biopython License Agreement
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
b
diff -r 000000000000 -r 6e75a84e9338 LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,674 @@\n+                    GNU GENERAL PUBLIC LICENSE\n+                       Version 3, 29 June 2007\n+\n+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+                            Preamble\n+\n+  The GNU General Public License is a free, copyleft license for\n+software and other kinds of works.\n+\n+  The licenses for most software and other practical works are designed\n+to take away your freedom to share and change the works.  By contrast,\n+the GNU General Public License is intended to guarantee your freedom to\n+share and change all versions of a program--to make sure it remains free\n+software for all its users.  We, the Free Software Foundation, use the\n+GNU General Public License for most of our software; it applies also to\n+any other work released this way by its authors.  You can apply it to\n+your programs, too.\n+\n+  When we speak of free software, we are referring to freedom, not\n+price.  Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+them if you wish), that you receive source code or can get it if you\n+want it, that you can change the software or use pieces of it in new\n+free programs, and that you know you can do these things.\n+\n+  To protect your rights, we need to prevent others from denying you\n+these rights or asking you to surrender the rights.  Therefore, you have\n+certain responsibilities if you distribute copies of the software, or if\n+you modify it: responsibilities to respect the freedom of others.\n+\n+  For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must pass on to the recipients the same\n+freedoms that you received.  You must make sure that they, too, receive\n+or can get the source code.  And you must show them these terms so they\n+know their rights.\n+\n+  Developers that use the GNU GPL protect your rights with two steps:\n+(1) assert copyright on the software, and (2) offer you this License\n+giving you legal permission to copy, distribute and/or modify it.\n+\n+  For the developers\' and authors\' protection, the GPL clearly explains\n+that there is no warranty for this free software.  For both users\' and\n+authors\' sake, the GPL requires that modified versions be marked as\n+changed, so that their problems will not be attributed erroneously to\n+authors of previous versions.\n+\n+  Some devices are designed to deny users access to install or run\n+modified versions of the software inside them, although the manufacturer\n+can do so.  This is fundamentally incompatible with the aim of\n+protecting users\' freedom to change the software.  The systematic\n+pattern of such abuse occurs in the area of products for individuals to\n+use, which is precisely where it is most unacceptable.  Therefore, we\n+have designed this version of the GPL to prohibit the practice for those\n+products.  If such problems arise substantially in other domains, we\n+stand ready to extend this provision to those domains in future versions\n+of the GPL, as needed to protect the freedom of users.\n+\n+  Finally, every program is threatened constantly by software patents.\n+States should not allow patents to restrict development and use of\n+software on general-purpose computers, but in those that do, we wish to\n+avoid the special danger that patents applied to a free program could\n+make it effectively proprietary.  To prevent this, the GPL assures that\n+patents cannot be used to render the program non-free.\n+\n+  The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+                       TERMS AND CONDITIONS\n+\n+  0. Definitions.\n+\n+  "This License" refers to version 3 of the GNU General Public License.\n+\n+  "Copyright" also means copyright-like laws that apply to other kinds of\n+works, such as semiconductor masks.\n+\n+  "The Program" refers to a'..b'CE OF THE PROGRAM\n+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+  16. Limitation of Liability.\n+\n+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n+SUCH DAMAGES.\n+\n+  17. Interpretation of Sections 15 and 16.\n+\n+  If the disclaimer of warranty and limitation of liability provided\n+above cannot be given local legal effect according to their terms,\n+reviewing courts shall apply local law that most closely approximates\n+an absolute waiver of all civil liability in connection with the\n+Program, unless a warranty or assumption of liability accompanies a\n+copy of the Program in return for a fee.\n+\n+                     END OF TERMS AND CONDITIONS\n+\n+            How to Apply These Terms to Your New Programs\n+\n+  If you develop a new program, and you want it to be of the greatest\n+possible use to the public, the best way to achieve this is to make it\n+free software which everyone can redistribute and change under these terms.\n+\n+  To do so, attach the following notices to the program.  It is safest\n+to attach them to the start of each source file to most effectively\n+state the exclusion of warranty; and each file should have at least\n+the "copyright" line and a pointer to where the full notice is found.\n+\n+    {one line to give the program\'s name and a brief idea of what it does.}\n+    Copyright (C) {year}  {name of author}\n+\n+    This program is free software: you can redistribute it and/or modify\n+    it under the terms of the GNU General Public License as published by\n+    the Free Software Foundation, either version 3 of the License, or\n+    (at your option) any later version.\n+\n+    This program is distributed in the hope that it will be useful,\n+    but WITHOUT ANY WARRANTY; without even the implied warranty of\n+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n+    GNU General Public License for more details.\n+\n+    You should have received a copy of the GNU General Public License\n+    along with this program.  If not, see <http://www.gnu.org/licenses/>.\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+  If the program does terminal interaction, make it output a short\n+notice like this when it starts in an interactive mode:\n+\n+    {project}  Copyright (C) {year}  {fullname}\n+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n+    This is free software, and you are welcome to redistribute it\n+    under certain conditions; type `show c\' for details.\n+\n+The hypothetical commands `show w\' and `show c\' should show the appropriate\n+parts of the General Public License.  Of course, your program\'s commands\n+might be different; for a GUI interface, you would use an "about box".\n+\n+  You should also get your employer (if you work as a programmer) or school,\n+if any, to sign a "copyright disclaimer" for the program, if necessary.\n+For more information on this, and how to apply and follow the GNU GPL, see\n+<http://www.gnu.org/licenses/>.\n+\n+  The GNU General Public License does not permit incorporating your program\n+into proprietary programs.  If your program is a subroutine library, you\n+may consider it more useful to permit linking proprietary applications with\n+the library.  If this is what you want to do, use the GNU Lesser General\n+Public License instead of this License.  But first, please read\n+<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n'
b
diff -r 000000000000 -r 6e75a84e9338 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Tue May 15 02:39:53 2018 -0400
[
b"@@ -0,0 +1,294 @@\n+# neat-genreads\n+NEAT-genReads is a fine-grained read simulator. GenReads simulates real-looking data using models learned from specific datasets. There are several supporting utilities for generating models used for simulation.\n+\n+This is an in-progress v2.0 of the software. For a previous stable release please see: [genReads1](https://github.com/zstephens/genReads1)\n+\n+\n+Table of Contents\n+=================\n+\n+  * [neat-genreads](#neat-genreads)\n+  * [Table of Contents](#table-of-contents)\n+    * [Requirements](#requirements)\n+    * [Usage](#usage)\n+    * [Functionality](#functionality)\n+    * [Examples](#examples)\n+      * [Whole genome simulation](#whole-genome-simulation)\n+      * [Targeted region simulation](#targeted-region-simulation)\n+      * [Insert specific variants](#insert-specific-variants)\n+      * [Single end reads](#single-end-reads)\n+      * [Large single end reads](#large-single-end-reads)\n+      * [Parallelizing simulation](#parallelizing-simulation)\n+  * [Utilities](#utilities)\n+    * [computeGC.py](#computegcpy)\n+    * [computeFraglen.py](#computefraglenpy)\n+    * [genMutModel.py](#genmutmodelpy)\n+    * [genSeqErrorModel.py](#genseqerrormodelpy)\n+    * [plotMutModel.py](#plotmutmodelpy)\n+    * [vcf_compare_OLD.py](#vcf_compare_oldpy)\n+      * [Note on Sensitive Patient Data](#note-on-sensitive-patient-data)\n+\n+\n+\n+\n+## Requirements\n+\n+* Python 2.7\n+* Numpy 1.9.1+\n+\n+## Usage\n+Here's the simplest invocation of genReads using default parameters. This command produces a single ended fastq file with reads of length 101, ploidy 2, coverage 10X, using the default sequencing substitution, GC% bias, and mutation rate models.\n+\n+```\n+python genReads.py -r ref.fa -R 101 -o simulated_data\n+``` \n+\n+The most commonly added options are --pe, --bam, --vcf, and -c. \n+\n+\n+Option           |  Description\n+------           |:----------\n+-h, --help       |  Displays usage information\n+-r <str>         |  Reference sequence file in fasta format. A reference index (.fai) will be created if one is not found in the directory of the reference as [reference filename].fai. Required. The index can be created using samtools faidx.\n+-R <int>         |  Read length. Required. \n+-o <str>         |  Output prefix. Use this option to specify where and what to call output files. Required\n+-c <float>       |  Average coverage across the entire dataset. Default: 10\n+-e <str>         |  Sequencing error model pickle file\n+-E <float>       |  Average sequencing error rate. The sequencing error rate model is rescaled to make this the average value. \n+-p <int>         |  ploidy [2]\n+-t <str>         |  bed file containing targeted regions; default coverage for targeted regions is 98% of -c option; default coverage outside targeted regions is 2% of -c option\n+-to <float>      |  off-target coverage scalar [0.02]\n+-m <str>         |  mutation model pickle file\n+-M <float>       |  Average mutation rate. The mutation rate model is rescaled to make this the average value. Must be between 0 and 0.3. These random mutations are inserted in addition to the once specified in the -v option.\n+-s <str>         |  input sample model\n+-v <str>         |  Input VCF file. Variants from this VCF will be inserted into the simulated sequence with 100% certainty.\n+--pe <int> <int> |  Paired-end fragment length mean and standard deviation. To produce paired end data, one of --pe or --pe-model must be specified.\n+--pe-model <str> |  Empirical fragment length distribution. Can be generated using [computeFraglen.py](#computefraglenpy). To produce paired end data, one of --pe or --pe-model must be specified.\n+--gc-model <str> |  Empirical GC coverage bias distribution.  Can be generated using [computeGC.py](#computegcpy)\n+--job <int> <int>|  Jobs IDs for generating reads in parallel\n+--nnr            |  save non-N ref regions (for parallel jobs)\n+--bam            |  Output golden BAM file\n+--vcf            |  Output golden VCF file\n+--rng <int>      "..b'\n+        -d                          \\\n+        -ibam normal.bam            \\\n+        -g reference.fa\n+```\n+\n+```\n+python computeGC.py                 \\\n+        -r reference.fa             \\\n+        -i genomecovfile            \\\n+        -w [sliding window length]  \\\n+        -o /path/to/model.p\n+```\n+\n+## computeFraglen.py\n+\n+Computes empirical fragment length distribution from sample data.\n+Takes SAM file via stdin:\n+\n+    ./samtools view toy.bam | python computeFraglen.py\n+\n+and creates fraglen.p model in working directory.\n+\n+## genMutModel.py\n+\n+Takes references genome and TSV file to generate mutation models:\n+\n+```\n+python genMutModel.py               \\\n+        -r hg19.fa                  \\\n+        -m inputVariants.tsv        \\\n+        -o /home/me/models.p\n+```\n+\n+Trinucleotides are identified in the reference genome and the variant file. Frequencies of each trinucleotide transition are calculated and output as a pickle (.p) file.\n+\n+## genSeqErrorModel.py\n+\n+Generates sequence error model for genReads.py -e option.\n+This script needs revision, to improve the quality-score model eventually, and to include code to learn sequencing errors from pileup data.\n+\n+```\n+python genSeqErrorModel.py                            \\\n+        -i input_read1.fq (.gz) / input_read1.sam     \\\n+        -o output.p                                   \\\n+        -i2 input_read2.fq (.gz) / input_read2.sam    \\\n+        -p input_alignment.pileup                     \\\n+        -q quality score offset [33]                  \\\n+        -Q maximum quality score [41]                 \\\n+        -n maximum number of reads to process [all]   \\\n+        -s number of simulation iterations [1000000]  \\\n+        --plot perform some optional plotting\n+```\n+## plotMutModel.py\n+\n+Performs plotting and comparison of mutation models generated from genMutModel.py.\n+\n+```\n+python plotMutModel.py                                        \\\n+        -i model1.p [model2.p] [model3.p]...                  \\\n+        -l legend_label1 [legend_label2] [legend_label3]...   \\\n+        -o path/to/pdf_plot_prefix\n+```\n+\n+## vcf_compare_OLD.py\n+\n+Tool for comparing VCF files.\n+\n+```\n+python vcf_compare_OLD.py\n+        --version          show program\'s version number and exit      \\\n+        -h, --help         show this help message and exit             \\\n+        -r <ref.fa>        * Reference Fasta                           \\\n+        -g <golden.vcf>    * Golden VCF                                \\\n+        -w <workflow.vcf>  * Workflow VCF                              \\\n+        -o <prefix>        * Output Prefix                             \\\n+        -m <track.bed>     Mappability Track                           \\\n+        -M <int>           Maptrack Min Len                            \\\n+        -t <regions.bed>   Targetted Regions                           \\\n+        -T <int>           Min Region Len                              \\\n+        -c <int>           Coverage Filter Threshold [15]              \\\n+        -a <float>         Allele Freq Filter Threshold [0.3]          \\\n+        --vcf-out          Output Match/FN/FP variants [False]         \\\n+        --no-plot          No plotting [False]                         \\\n+        --incl-homs        Include homozygous ref calls [False]        \\\n+        --incl-fail        Include calls that failed filters [False]   \\\n+        --fast             No equivalent variant detection [False]\n+```\n+Mappability track examples: https://github.com/zstephens/neat-repeat/tree/master/example_mappabilityTracks\n+\n+### Note on Sensitive Patient Data\n+ICGC\'s "Access Controlled Data" documention can be found at http://docs.icgc.org/access-controlled-data. To have access to controlled germline data, a DACO must be\n+submitted. Open tier data can be obtained without a DACO, but germline alleles that do not match the reference genome are masked and replaced with the reference\n+allele. Controlled data includes unmasked germline alleles.\n+\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 computeFraglen.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/computeFraglen.xml Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,41 @@
+<tool id="computeFraglen"
+      name="computeFraglen"
+      version="1.0.0"
+   profile="16.04">
+  <description>computes empirical fragment length distribution from sample data in BAM format. Creates model file for use in NEAT-genReads</description>
+  <requirements>
+    <requirement type="package" version="0.1.19">samtools</requirement>
+    <requirement type="package" version="1.9.1">numpy</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+    samtools view -d $bam_file
+ | python2 $__tool_directory__/utilities/computeFraglen.py
+    ]]>
+  </command>
+  <inputs>
+    <param name="bam_file"
+        type="data"
+    format="bam"
+    label="The BAM file to be used to calculate the fragment from [-]"
+    help="This will be used by SAMTOOLS to convert BAM to SAM as preparation by the computeFraglen tool" 
+ />
+
+  </inputs>
+  <outputs>
+ <data format="txt"
+   name="computeGC_modelfile"
+   from_work_dir="fraglen.p"
+   label="${os.path.splitext($bam_file.name)[0]}_fraglen.p"
+   metadata_source="in_type.reference">
+ </data>
+
+  </outputs>
+  <tests>
+  <test>
+ <param name="bam_file" value="chrMT-PE-VCF-BAM.bam"/>
+ <output name="computeGC_modelfile" file="chrMT-PE-VCF-BAM-fraglen.p" compare="diff"/>
+ </test>
+
+  </tests>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 computeGC.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/computeGC.xml Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,91 @@
+<tool id="computeGC"
+      name="computeGC"
+      version="1.0.0"
+   profile="16.04">
+  <description>computes GC% coverage bias distribution from sample in BAM format. Creates model file for use in NEAT-genReads</description>
+  <requirements>
+    <requirement type="package" version="2.25.0">bedtools</requirement>
+    <requirement type="package" version="1.9.1">numpy</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+    bedtools genomecov -d -ibam $bam_file
+ #if $in_type.input_type == "built-in":
+   -g ${in_type.reference.path}
+ #else:
+   -g ${in_type.reference}
+ #end if
+ > out.genomecov
+ && python2 $__tool_directory__/utilities/computeGC.py
+ #if $in_type.input_type == "built-in":
+   -r ${in_type.reference.path}
+ #else:
+   -r ${in_type.reference}
+ #end if
+ -i out.genomecov
+ -o $computeGC_modelfile
+ -w $windowsize
+    ]]>
+  </command>
+  <inputs>
+    <param name="bam_file"
+        type="data"
+    format="bam"
+    label="The BAM file to be used to calculate the GC bias from [-ibam]"
+    help="This will be used by BEDTOOLS to calculate the coverage for each base as preparation by the computeGC tool" 
+ />
+
+ <conditional name="in_type">
+   <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?">
+ <option value="built-in">Built-in</option>
+ <option value="history">History file</option>
+   </param>
+   <when value="built-in">
+ <param name="reference"
+    type="select"
+    label="Select a built-in reference sequence"
+    help="The reference sequence that will be used as the basis for the simulated reads">
+   <options from_data_table="all_fasta" />
+ </param>
+   </when>
+   <when value="history">
+ <param name="reference"
+    type="data"
+    format="fasta"
+    label="The reference sequence (FASTA format)"
+    help="The reference sequence that will be used as the basis for the simulated reads"
+ />
+   </when>
+ </conditional>
+
+ <param name="windowsize"
+  type="integer"
+  value="10"
+  size="4"
+  min="1"
+  label="Window size"
+  help="Sliding window length [-R]"
+ />
+
+  </inputs>
+  <outputs>
+ <data format="txt"
+   name="computeGC_modelfile"
+ label="${bam_file.name.replace('.bam','')}_computeGC.p"
+ metadata_source="in_type.reference">
+ </data>
+
+  </outputs>
+  <tests>
+  <test>
+ <param name="bam_file" value="chrMT-PE-VCF-BAM.bam"/>
+ <conditional name="in_type">
+ <param name="input_type" value="history"/>
+   <param name="reference" value="chrMT.fa" format="fasta"/>
+ </conditional>
+ <param name="windowsize" value="10"/>
+ <output name="computeGC_modelfile" file="chrMT-PE-VCF-BAM-computeGC.p" compare="diff"/>
+ </test>
+
+  </tests>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 docs/PE_SE_reads.png
b
Binary file docs/PE_SE_reads.png has changed
b
diff -r 000000000000 -r 6e75a84e9338 docs/flow_new.png
b
Binary file docs/flow_new.png has changed
b
diff -r 000000000000 -r 6e75a84e9338 genMutModel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genMutModel.xml Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,184 @@
+<tool id="genMutModel"
+      name="genMutModel"
+      version="1.0.0"
+   profile="16.04">
+  <description>generates a mutation model based on provided mutations. Creates model file for use in NEAT-genReads</description>
+  <requirements>
+    <requirement type="package" version="1.9.1">numpy</requirement>
+    <requirement type="package">matplotlib</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+ ## Filenames and extensions are important, so make some links to make life easier downstream
+ ln -s $mutation_file mutation_file.vcf
+ #if $in_type.input_type == "built-in":
+ && ln ${in_type.reference.path} reference.fa
+ #else:
+ && ln ${in_type.reference} reference.fa
+ #end if
+    && python2 $__tool_directory__/utilities/genMutModel.py
+ -r reference.fa
+ -m mutation_file.vcf
+ -o $genMutModel_modelfile
+ #if $misc.include_file:
+   -bi $misc.include_file
+ #end if
+ #if $misc.exclude_file:
+   -be $misc.exclude_file
+ #end if
+ $misc.save_trinuc
+ $misc.no_whitelist
+ $misc.skip_common
+ ]]>
+  </command>
+  <inputs>
+
+    <param name="mutation_file"
+        type="data"
+    format="vcf"
+    label="The VCF file to use as the definition of errors in the sequence [-m]"
+    help="Trinucleotides are identified in the reference genome and the variant file. Frequencies of each trinucleotide transition are calculated and output as a pickle (.p) file" 
+ />
+
+ <conditional name="in_type">
+   <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?">
+ <option value="built-in">Built-in</option>
+ <option value="history">History file</option>
+   </param>
+   <when value="built-in">
+ <param name="reference"
+    type="select"
+    label="Select a built-in reference sequence"
+    help="The reference sequence that will be used as the basis for the simulated reads">
+   <options from_data_table="all_fasta" />
+ </param>
+   </when>
+   <when value="history">
+ <param name="reference"
+    type="data"
+    format="fasta"
+    label="The reference sequence (FASTA format)"
+    help="The reference sequence that will be used as the basis for the simulated reads"
+ />
+   </when>
+ </conditional>
+
+ <section name="misc"
+  title="Miscellanous settings"
+  expanded="false"
+  help="Miscellanous settings. In/Exclude regions and other settings"
+ >
+ <param name="include_file"
+    optional="true"
+    type="data"
+    format="bed"
+    label="Only use mutations falling inside this region [-bi]"
+    help="" 
+ />
+
+ <param name="exclude_file"
+    optional="true"
+    type="data"
+    format="bed"
+    label="Exlude mutations falling inside this region [-be]"
+    help="Trinucleotides are identified in the reference genome and the variant file. Frequencies of each trinucleotide transition are calculated and output as a pickle (.p) file" 
+ />
+
+ <param name="save_trinuc"
+    type="boolean"
+    checked="false"
+    truevalue="--save-trinuc"
+    falsevalue=""
+    label="Save the trinulceotide counts for ref [--save-trinuc]"
+    help=""
+ />
+
+ <param name="no_whitelist"
+    type="boolean"
+    checked="false"
+    truevalue="--no-whitelist"
+    falsevalue=""
+    label="Allow any non-standard reference [--no-whitelist]"
+    help=""
+ />
+
+ <param name="skip_common"
+    type="boolean"
+    checked="false"
+    truevalue="--skip-common"
+    falsevalue=""
+    label="Do not save common SNPs and high mutation regions"
+    help=""
+ />
+ </section>
+  </inputs>
+  <outputs>
+ <data format="txt"
+   name="genMutModel_modelfile"
+   label="${os.path.splitext($mutation_file.name)}_genMutModel.p"
+   metadata_source="in_type.reference">
+ </data>
+
+ <data format="simple"
+       name="trinuc_file"
+   from_work_dir="reference.fa.trinucCounts"
+       label="${os.path.splitext($mutation_file.name)}_trinucCounts.tsv"
+       metadata_source="in_type.reference">
+   <filter>misc['save_trinuc']</filter>
+ </data>
+
+  </outputs>
+  <help>
+  </help>
+ <tests>
+
+ <test>
+ <conditional name="in_type">
+ <param name="input_type" value="history"/>
+   <param name="reference" value="chrMT.fa" format="fasta"/>
+ </conditional>
+ <param name="mutation_file" value="chrMT-PE-VCF-BAM.vcf"/>
+ <output name="genMutModel_modelfile" value="chrMT-PE-VCF-BAM-genMutModel.p" compare="diff"/>
+ </test>
+
+ <test>
+ <conditional name="in_type">
+ <param name="input_type" value="history"/>
+   <param name="reference" value="chrMT.fa" format="fasta"/>
+ </conditional>
+ <param name="mutation_file" value="chrMT-PE-VCF-BAM.vcf"/>
+ <section name="misc">
+ <param name="include_file" value="chrMT-Targets.bed"/>
+ </section>
+ <output name="genMutModel_modelfile" value="chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p" compare="diff"/>
+ </test>
+
+ <test>
+ <conditional name="in_type">
+ <param name="input_type" value="history"/>
+   <param name="reference" value="chrMT.fa" format="fasta"/>
+ </conditional>
+ <param name="mutation_file" value="chrMT-PE-VCF-BAM.vcf"/>
+ <section name="misc">
+ <param name="exclude_file" value="chrMT-Targets.bed"/>
+ </section>
+ <output name="genMutModel_modelfile" value="chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p" compare="diff"/>
+ </test>
+
+ <test>
+ <conditional name="in_type">
+ <param name="input_type" value="history"/>
+   <param name="reference" value="chrMT.fa" format="fasta"/>
+ </conditional>
+ <param name="mutation_file" value="chrMT-PE-VCF-BAM.vcf"/>
+ <section name="misc">
+ <param name="save_trinuc" value="true"/>
+ <param name="no_whitelist" value="true"/>
+ <param name="skip_common" value="true"/>
+ </section>
+ <output name="genMutModel_modelfile" value="chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p" compare="diff"/>
+ <output name="trinuc_file" value="chrMT.fa.trinucCounts" compare="diff"/>
+ </test>
+
+ </tests>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 genReads.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genReads.py Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,794 @@\n+#!/usr/bin/env python\n+# encoding: utf-8\n+""" ////////////////////////////////////////////////////////////////////////////////\n+   ///                                                                          ///\n+  ///       genReads.py                                                        ///\n+ ///        VERSION 2.0: HARDER, BETTER, FASTER, STRONGER!                    ///\n+///////                                                                      //////\n+   ///      Variant and read simulator for benchmarking NGS workflows          ///\n+  ///                                                                         ///\n+ ///        Written by:     Zach Stephens                                    ///\n+///////     For:            DEPEND Research Group, UIUC                     ///////\n+   ///      Date:           May 29, 2015                                       ///\n+  ///       Contact:        zstephe2@illinois.edu                             ///\n+ ///                                                                         ///\n+/////////////////////////////////////////////////////////////////////////////// """\n+\n+import os\n+import sys\n+import copy\n+import random\n+import re\n+import time\n+import bisect\n+import cPickle as pickle\n+import numpy as np\n+#import matplotlib.pyplot as mpl\n+import argparse\n+\n+# absolute path to this script\n+SIM_PATH = \'/\'.join(os.path.realpath(__file__).split(\'/\')[:-1])\n+sys.path.append(SIM_PATH+\'/py/\')\n+\n+from inputChecking\t\timport requiredField, checkFileOpen, checkDir, isInRange\n+from refFunc\t\t\timport indexRef, readRef, getAllRefRegions, partitionRefRegions, ALLOWED_NUCL\n+from vcfFunc\t\t\timport parseVCF\n+from OutputFileWriter\timport OutputFileWriter\n+from probability\t\timport DiscreteDistribution, mean_ind_of_weighted_list\n+from SequenceContainer\timport SequenceContainer, ReadContainer, parseInputMutationModel\n+\n+# if coverage val for a given window/position is below this value, consider it effectively zero.\n+LOW_COV_THRESH = 50\n+\n+"""//////////////////////////////////////////////////\n+////////////    PARSE INPUT ARGUMENTS    ////////////\n+//////////////////////////////////////////////////"""\n+\n+\n+parser = argparse.ArgumentParser(description=\'NEAT-genReads V2.0\')\n+parser.add_argument(\'-r\', type=str,   required=True,  metavar=\'<str>\',                  help="* ref.fa")\n+parser.add_argument(\'-R\', type=int,   required=True,  metavar=\'<int>\',                  help="* read length")\n+parser.add_argument(\'-o\', type=str,   required=True,  metavar=\'<str>\',                  help="* output prefix")\n+parser.add_argument(\'-c\', type=float, required=False, metavar=\'<float>\', default=10.,   help="average coverage")\n+parser.add_argument(\'-e\', type=str,   required=False, metavar=\'<str>\',   default=None,  help="sequencing error model")\n+parser.add_argument(\'-E\', type=float, required=False, metavar=\'<float>\', default=-1,    help="rescale avg sequencing error rate to this")\n+parser.add_argument(\'-p\', type=int,   required=False, metavar=\'<int>\',   default=2,     help="ploidy")\n+parser.add_argument(\'-t\', type=str,   required=False, metavar=\'<str>\',   default=None,  help="bed file containing targeted regions")\n+parser.add_argument(\'-to\',type=float, required=False, metavar=\'<float>\', default=0.00,  help="off-target coverage scalar")\n+parser.add_argument(\'-m\', type=str,   required=False, metavar=\'<str>\',   default=None,  help="mutation model pickle file")\n+parser.add_argument(\'-M\', type=float, required=False, metavar=\'<float>\', default=-1,    help="rescale avg mutation rate to this")\n+parser.add_argument(\'-Mb\',type=str,   required=False, metavar=\'<str>\',   default=None,  help="bed file containing positional mut rates")\n+parser.add_argument(\'-N\', type=int,   required=False, metavar=\'<int>\',   default=-1,    help="below this qual, replace base-calls with \'N\'s")\n+#parser.add_argument(\'-s\', type=str,   required=False, metavar=\'<str>\',   default=None,  help="input sample model")\n+parser.add_argument(\'-v\', type='..b"+ mate for PE) are unmapped, put them at end of bam file\n+\t\t\t\t\t\tif all(isUnmapped):\n+\t\t\t\t\t\t\tif PAIRED_END:\n+\t\t\t\t\t\t\t\tunmapped_records.append((myReadName+'/1',myReadData[0],109))\n+\t\t\t\t\t\t\t\tunmapped_records.append((myReadName+'/2',myReadData[1],157))\n+\t\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\t\tunmapped_records.append((myReadName+'/1',myReadData[0],4))\n+\n+\t\t\t\t\t\t# write read data out to FASTQ and BAM files, bypass FASTQ if option specified\n+\t\t\t\t\t\tmyRefIndex = indices_by_refName[refIndex[RI][0]]\n+\t\t\t\t\t\tif len(myReadData) == 1:\n+\t\t\t\t\t\t\tif NO_FASTQ != True:\n+\t\t\t\t\t\t\t\tOFW.writeFASTQRecord(myReadName,myReadData[0][2],myReadData[0][3])\n+\t\t\t\t\t\t\tif SAVE_BAM:\n+\t\t\t\t\t\t\t\tif isUnmapped[0] == False:\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/1', myReadData[0][0], myReadData[0][1], myReadData[0][2], myReadData[0][3], samFlag=0)\n+\t\t\t\t\t\telif len(myReadData) == 2:\n+\t\t\t\t\t\t\tif NO_FASTQ != True:\n+\t\t\t\t\t\t\t\tOFW.writeFASTQRecord(myReadName,myReadData[0][2],myReadData[0][3],read2=myReadData[1][2],qual2=myReadData[1][3])\n+\t\t\t\t\t\t\tif SAVE_BAM:\n+\t\t\t\t\t\t\t\tif isUnmapped[0] == False and isUnmapped[1] == False:\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/1', myReadData[0][0], myReadData[0][1], myReadData[0][2], myReadData[0][3], samFlag=99,  matePos=myReadData[1][0])\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/2', myReadData[1][0], myReadData[1][1], myReadData[1][2], myReadData[1][3], samFlag=147, matePos=myReadData[0][0])\n+\t\t\t\t\t\t\t\telif isUnmapped[0] == False and isUnmapped[1] == True:\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/1', myReadData[0][0], myReadData[0][1], myReadData[0][2], myReadData[0][3], samFlag=105,  matePos=myReadData[0][0])\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/2', myReadData[0][0], myReadData[1][1], myReadData[1][2], myReadData[1][3], samFlag=149, matePos=myReadData[0][0], alnMapQual=0)\n+\t\t\t\t\t\t\t\telif isUnmapped[0] == True and isUnmapped[1] == False:\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/1', myReadData[1][0], myReadData[0][1], myReadData[0][2], myReadData[0][3], samFlag=101,  matePos=myReadData[1][0], alnMapQual=0)\n+\t\t\t\t\t\t\t\t\tOFW.writeBAMRecord(myRefIndex, myReadName+'/2', myReadData[1][0], myReadData[1][1], myReadData[1][2], myReadData[1][3], samFlag=153, matePos=myReadData[1][0])\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tprint '\\nError: Unexpected number of reads generated...\\n'\n+\t\t\t\t\t\t\texit(1)\n+\t\t\t\t\t#print 'READS:',time.time()-ASDF2_TT\n+\n+\t\t\t\t\tif not isLastTime:\n+\t\t\t\t\t\tOFW.flushBuffers(bamMax=next_start)\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tOFW.flushBuffers(bamMax=end+1)\n+\n+\t\t\t\t# tally up all the variants that got successfully introduced\n+\t\t\t\tfor n in all_inserted_variants:\n+\t\t\t\t\tALL_VARIANTS_OUT[n] = True\n+\n+\t\t\t\t# prepare indices of next window\n+\t\t\t\tstart = next_start\n+\t\t\t\tend   = next_end\n+\t\t\t\tif isLastTime:\n+\t\t\t\t\tbreak\n+\t\t\t\tif end >= pf:\n+\t\t\t\t\tisLastTime = True\n+\n+\t\tif currentPercent != 100 and not havePrinted100:\n+\t\t\tprint '100%'\n+\t\telse:\n+\t\t\tprint ''\n+\t\tif ONLY_VCF:\n+\t\t\tprint 'VCF generation completed in',\n+\t\telse:\n+\t\t\tprint 'Read sampling completed in',\n+\t\tprint int(time.time()-tt),'(sec)'\n+\n+\t\t# write all output variants for this reference\n+\t\tif SAVE_VCF:\n+\t\t\tprint 'Writing output VCF...'\n+\t\t\tfor k in sorted(ALL_VARIANTS_OUT.keys()):\n+\t\t\t\tcurrentRef = refIndex[RI][0]\n+\t\t\t\tmyID       = '.'\n+\t\t\t\tmyQual     = '.'\n+\t\t\t\tmyFilt     = 'PASS'\n+\t\t\t\t# k[0] + 1 because we're going back to 1-based vcf coords\n+\t\t\t\tOFW.writeVCFRecord(currentRef, str(int(k[0])+1), myID, k[1], k[2], myQual, myFilt, k[4])\n+\n+\t\t#break\n+\n+\t# write unmapped reads to bam file\n+\tif SAVE_BAM and len(unmapped_records):\n+\t\tprint 'writing unmapped reads to bam file...'\n+\t\tfor umr in unmapped_records:\n+\t\t\tif PAIRED_END:\n+\t\t\t\tOFW.writeBAMRecord(-1, umr[0], 0, umr[1][1], umr[1][2], umr[1][3], samFlag=umr[2], matePos=0, alnMapQual=0)\n+\t\t\telse:\n+\t\t\t\tOFW.writeBAMRecord(-1, umr[0], 0, umr[1][1], umr[1][2], umr[1][3], samFlag=umr[2], alnMapQual=0)\n+\n+\t# close output files\n+\tOFW.closeFiles()\n+\tif CANCER:\n+\t\tOFW_CANCER.closeFiles()\n+\n+\n+if __name__ == '__main__':\n+\tmain()\n+\n+\n+\n"
b
diff -r 000000000000 -r 6e75a84e9338 genSeqErrorModel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genSeqErrorModel.xml Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,180 @@
+<tool id="genSeqErrorModel"
+      name="genSeqErrorModel"
+      version="1.0.0"
+   profile="16.04">
+  <description>generates sequence error model for genReads. Creates model file for use in NEAT-genReads</description>
+  <requirements>
+    <requirement type="package" version="1.9.1">numpy</requirement>
+    <requirement type="package">matplotlib</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+    <![CDATA[
+ #if $lib_type_cond.read1.ext == "fastqsanger.gz":
+   ln -s $lib_type_cond.read1 read1.fq.gz
+   #set $read1="read1.fq.gz"
+   #if $lib_type_cond.lib_type == 'paired':
+     && ln -s $lib_type_cond.read2 read2.fq.gz
+ #set $read2="read2.fq.gz"
+   #end if
+ #else
+   ln -s $lib_type_cond.read1 read1.fq
+   #set $read1="read1.fq"
+   #if $lib_type_cond.lib_type == 'paired':
+     && ln -s $lib_type_cond.read2 read2.fq
+ #set $read2="read2.fq"
+   #end if
+ #end if
+ && python2 $__tool_directory__/utilities/genSeqErrorModel.py
+   -i $read1
+ #if $lib_type_cond.lib_type == 'paired':
+   -i2 $read2
+ #end if
+ #if $stats.simulations != '':
+   -s ${stats.simulations}
+ #end if
+ #if $stats.score_offset != '':
+   -q ${stats.score_offset}
+ #end if
+ #if $stats.max_qualscore != '':
+   -s ${stats.max_qualscore}
+ #end if
+ #if $stats.process_cond.process == 'max':
+   -n ${stats.process_cond.max_reads}
+ #end if
+ -o $genSeqErrorModel_modelfile
+ ]]>
+  </command>
+  <inputs>
+
+ <conditional name="lib_type_cond">
+   <param name="lib_type"
+          type="select"
+  label="Single-end or paired-end sequencing library?"
+   >
+ <option value="paired">Paired-end</option>
+     <option value="single">Single-end</option>
+   </param>
+     <when value="paired">
+ <param name="read1"
+    type="data"
+    format="fastqsanger,fastqsanger.gz"
+    label="The first read"
+    help=""
+ />
+ <param name="read2"
+    type="data"
+    format="fastqsanger,fastqsanger.gz"
+    label="The second read"
+    help=""
+ />
+     </when>
+ <when value="single">
+ <param name="read1"
+    type="data"
+    format="fastqsanger,fastqsanger.gz"
+    label="The first read"
+    help=""
+ />
+     </when>
+ </conditional>
+
+ <section name="stats"
+  title="Miscellanous settings"
+  expanded="false"
+  help="The settings for the error rates etc. for the model"
+ >
+   <param name="simulations"
+          type="integer"
+  value="1000000"
+  min="1000"
+  optional="true"
+  label="Number of simulation iterations [-s]"
+  help="Default is 1,000,000"
+   />
+   <conditional name="process_cond">
+   <param name="process" type="select" label="Maximum number of reads to process">
+   <option value="all">All</option>
+   <option value="max">Set a maximum number</option>
+   </param>
+   <when value="max">
+   <param name="max_reads"
+  optional="true"
+  type="integer"
+  value="1000000"
+  min="1"
+  label="Maximum number of reads to process"
+  help="Processing all reads is the default [-n]"
+   />
+   </when>
+   </conditional>
+   
+   
+   <param name="score_offset"
+  optional="true"
+  type="integer"
+  label="Quality score offset (Default = 33)"
+  help="There are different quality score offsets (depends on provider). When in doubt, leave at default (33) which is typical for modern Illumina sequencing runs [-q]"
+   />
+   <param name="max_qualscore"
+  type="integer"
+  min="1"
+  optional="true"
+  label="Maximum quality score (Default = 41)"
+  help="Setting the maximum quality score may have an effect on the accuracy of the error model [-Q]"
+   />
+
+ </section>
+
+  </inputs>
+  <outputs>
+ <data format="txt"
+   name="genSeqErrorModel_modelfile"
+   label="${os.path.splitext($lib_type_cond.read1.name)[0]}_genSeqErrorModel.p"
+   metadata_source="in_type.reference">
+ </data>
+  </outputs>
+  <help>
+  </help>
+ <tests>
+
+ <test>
+ <conditional name="lib_type_cond">
+ <param name="lib_type" value="single"/>
+ <param name="read1" value="chrMT_read1.fq" format="fastqsanger"/>
+ </conditional>
+ <section name="stats">
+ <param name="simulations" value="100000"/>
+ </section>
+ <assert_stdout has_text="saving model..."/>
+ </test>
+
+ <test>
+ <conditional name="lib_type_cond">
+ <param name="lib_type" value="paired"/>
+ <param name="read1" value="chrMT-PE_read1.fq" format="fastqsanger"/>
+ <param name="read2" value="chrMT-PE_read2.fq" format="fastqsanger"/>
+ </conditional>
+ <section name="stats">
+ <param name="simulations" value="100000"/>
+ </section>
+ <assert_stdout has_text="saving model..."/>
+ </test>
+
+ <test>
+ <conditional name="lib_type_cond">
+ <param name="lib_type" value="paired"/>
+ <param name="read1" value="chrMT-PE_read1.fq" format="fastqsanger"/>
+ <param name="read2" value="chrMT-PE_read2.fq" format="fastqsanger"/>
+ </conditional>
+ <section name="stats">
+ <param name="simulations" value="100000"/>
+ <conditional name="process_cond">
+ <param name="process" value="max"/>
+ <param name="max_reads" value="100"/>
+ </conditional>
+ </section>
+ <assert_stdout has_text="saving model..."/>
+ </test>
+
+ </tests>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 mergeJobs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mergeJobs.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+import os
+import argparse
+
+def getListOfFiles(inDir,pattern):
+ return [inDir+n for n in os.listdir(inDir) if (pattern in n and os.path.getsize(inDir+n))]
+
+TEMP_IND = 0
+def stripVCF_header(fn):
+ global TEMP_IND
+ f = open(fn,'r')
+ ftn = fn+'_temp'+str(TEMP_IND)
+ f_t = open(ftn,'w')
+ hasHeader = False
+ for line in f:
+ if line[0] == '#':
+ if not hasHeader:
+ TEMP_IND += 1
+ hasHeader = True
+ elif hasHeader:
+ f_t.write(line)
+ else:
+ break
+ f_t.close()
+ f.close()
+ if hasHeader:
+ return ftn
+ else:
+ os.system('rm '+ftn)
+ return fn
+
+def catListOfFiles(l,outName,gzipped=False):
+ for n in l:
+ if n[-3:] == '.gz' or n[-5:] == '.gzip':
+ gzipped = True
+ if gzipped:
+ for n in l:
+ if not n[-3:] == '.gz' and not n[-5:] == '.gzip':
+ print '\nError: Found a mixture of compressed and decompressed files with the specified prefix. Abandoning ship...\n'
+ for m in l:
+ print m
+ print ''
+ exit(1)
+ cmd = 'cat '+' '.join(sorted(l))+' > '+outName+'.gz'
+ else:
+ cmd = 'cat '+' '.join(sorted(l))+' > '+outName
+ print cmd
+ os.system(cmd)
+
+def catBams(l,outName,samtools_exe):
+ l_sort = sorted(l)
+ tmp = outName+'.tempHeader.sam'
+ os.system(samtools_exe+' view -H '+l_sort[0]+' > '+tmp)
+ cmd = samtools_exe+' cat -h '+tmp+' '+' '.join(l_sort)+' > '+outName
+ print cmd
+ os.system(cmd)
+ os.system('rm '+tmp)
+
+
+#####################################
+# main() #
+#####################################
+
+def main():
+
+ parser = argparse.ArgumentParser(description='mergeJobs.py')
+ parser.add_argument('-i', type=str, required=True, metavar='<str>', nargs='+', help="* input prefix: [prefix_1] [prefix_2] ...")
+ parser.add_argument('-o', type=str, required=True, metavar='<str>',            help="* output prefix")
+ parser.add_argument('-s', type=str, required=True, metavar='<str>',            help="* /path/to/samtools")
+
+ args = parser.parse_args()
+ (INP,OUP,SAMTOOLS) = (args.i,args.o,args.s)
+
+ inDir = '/'.join(INP[0].split('/')[:-1])+'/'
+ if inDir == '/':
+ inDir = './'
+ #print inDir
+
+ INP_LIST = []
+ for n in INP:
+ if n[-1] == '/':
+ n = n[:-1]
+ INP_LIST.append(n.split('/')[-1])
+ listing_r1 = []
+ listing_r2 = []
+ listing_b  = []
+ listing_v  = []
+ for n in INP_LIST:
+ listing_r1 += getListOfFiles(inDir,n+'_read1.fq.job')
+ listing_r2 += getListOfFiles(inDir,n+'_read2.fq.job')
+ listing_b  += getListOfFiles(inDir,n+'_golden.bam.job')
+ if len(listing_v): # remove headers from vcf files that aren't the first being processed
+ initList   = getListOfFiles(inDir,n+'_golden.vcf.job')
+ listing_v += [stripVCF_header(n) for n in initList]
+ else:
+ listing_v  += getListOfFiles(inDir,n+'_golden.vcf.job')
+
+ #
+ # merge fq files
+ #
+ if len(listing_r1):
+ catListOfFiles(listing_r1,OUP+'_read1.fq')
+ if len(listing_r2):
+ catListOfFiles(listing_r2,OUP+'_read2.fq')
+
+ #
+ # merge golden alignments, if present
+ #
+ if len(listing_b):
+ catBams(listing_b,OUP+'_golden.bam',SAMTOOLS)
+
+ #
+ # merge golden vcfs, if present
+ #
+ if len(listing_v):
+ catListOfFiles(listing_v,OUP+'_golden.vcf')
+
+
+if __name__ == "__main__":
+ main()
+
b
diff -r 000000000000 -r 6e75a84e9338 models/MutModel_BRCA_US_ICGC.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/MutModel_BRCA_US_ICGC.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,46220 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chr1'\n+p3\n+I1570000\n+I1575000\n+F0.001\n+tp4\n+a(g3\n+I1633000\n+I1637000\n+F0.001\n+tp5\n+a(g3\n+I2301000\n+I2306000\n+F0.001\n+tp6\n+a(g3\n+I2450000\n+I2455000\n+F0.001\n+tp7\n+a(g3\n+I7884000\n+I7889000\n+F0.0014\n+tp8\n+a(g3\n+I8419000\n+I8423000\n+F0.00175\n+tp9\n+a(g3\n+I10711000\n+I10716000\n+F0.001\n+tp10\n+a(g3\n+I12851000\n+I12858000\n+F0.001\n+tp11\n+a(g3\n+I12885000\n+I12890000\n+F0.001\n+tp12\n+a(g3\n+I12916000\n+I12923000\n+F0.001142857142857143\n+tp13\n+a(g3\n+I12937000\n+I12945000\n+F0.001375\n+tp14\n+a(g3\n+I14103000\n+I14111000\n+F0.00125\n+tp15\n+a(g3\n+I16252000\n+I16265000\n+F0.003384615384615385\n+tp16\n+a(g3\n+I16383000\n+I16388000\n+F0.001\n+tp17\n+a(g3\n+I16523000\n+I16527000\n+F0.001\n+tp18\n+a(g3\n+I16887000\n+I16894000\n+F0.001\n+tp19\n+a(g3\n+I16942000\n+I16959000\n+F0.0020714285714285713\n+tp20\n+a(g3\n+I17015000\n+I17019000\n+F0.001\n+tp21\n+a(g3\n+I17081000\n+I17088000\n+F0.003\n+tp22\n+a(g3\n+I17271000\n+I17277000\n+F0.001\n+tp23\n+a(g3\n+I19437000\n+I19441000\n+F0.001\n+tp24\n+a(g3\n+I22170000\n+I22176000\n+F0.001\n+tp25\n+a(g3\n+I23415000\n+I23421000\n+F0.002\n+tp26\n+a(g3\n+I23517000\n+I23522000\n+F0.001\n+tp27\n+a(g3\n+I26606000\n+I26611000\n+F0.0012\n+tp28\n+a(g3\n+I27055000\n+I27059000\n+F0.001\n+tp29\n+a(g3\n+I27097000\n+I27103000\n+F0.0016666666666666668\n+tp30\n+a(g3\n+I27104000\n+I27109000\n+F0.0016\n+tp31\n+a(g3\n+I29039000\n+I29043000\n+F0.001\n+tp32\n+a(g3\n+I33114000\n+I33119000\n+F0.001\n+tp33\n+a(g3\n+I33954000\n+I33962000\n+F0.001\n+tp34\n+a(g3\n+I37945000\n+I37951000\n+F0.001\n+tp35\n+a(g3\n+I39794000\n+I39801000\n+F0.0014285714285714286\n+tp36\n+a(g3\n+I39849000\n+I39855000\n+F0.001\n+tp37\n+a(g3\n+I39875000\n+I39881000\n+F0.0016666666666666668\n+tp38\n+a(g3\n+I40699000\n+I40707000\n+F0.001625\n+tp39\n+a(g3\n+I42044000\n+I42051000\n+F0.0015714285714285715\n+tp40\n+a(g3\n+I43294000\n+I43298000\n+F0.001\n+tp41\n+a(g3\n+I43904000\n+I43910000\n+F0.0015\n+tp42\n+a(g3\n+I44131000\n+I44136000\n+F0.001\n+tp43\n+a(g3\n+I44434000\n+I44439000\n+F0.001\n+tp44\n+a(g3\n+I46084000\n+I46090000\n+F0.001\n+tp45\n+a(g3\n+I46749000\n+I46754000\n+F0.0014\n+tp46\n+a(g3\n+I47396000\n+I47400000\n+F0.001\n+tp47\n+a(g3\n+I55116000\n+I55121000\n+F0.001\n+tp48\n+a(g3\n+I64641000\n+I64645000\n+F0.001\n+tp49\n+a(g3\n+I68622000\n+I68626000\n+F0.00125\n+tp50\n+a(g3\n+I70383000\n+I70387000\n+F0.001\n+tp51\n+a(g3\n+I75034000\n+I75040000\n+F0.002\n+tp52\n+a(g3\n+I75053000\n+I75057000\n+F0.00125\n+tp53\n+a(g3\n+I79401000\n+I79405000\n+F0.001\n+tp54\n+a(g3\n+I82406000\n+I82411000\n+F0.001\n+tp55\n+a(g3\n+I86949000\n+I86954000\n+F0.001\n+tp56\n+a(g3\n+I89446000\n+I89451000\n+F0.001\n+tp57\n+a(g3\n+I90176000\n+I90182000\n+F0.0015\n+tp58\n+a(g3\n+I90397000\n+I90402000\n+F0.0012\n+tp59\n+a(g3\n+I91401000\n+I91408000\n+F0.001\n+tp60\n+a(g3\n+I92645000\n+I92651000\n+F0.001\n+tp61\n+a(g3\n+I92787000\n+I92791000\n+F0.00125\n+tp62\n+a(g3\n+I94666000\n+I94670000\n+F0.001\n+tp63\n+a(g3\n+I104110000\n+I104119000\n+F0.001\n+tp64\n+a(g3\n+I109463000\n+I109468000\n+F0.001\n+tp65\n+a(g3\n+I109791000\n+I109797000\n+F0.0013333333333333333\n+tp66\n+a(g3\n+I110084000\n+I110088000\n+F0.001\n+tp67\n+a(g3\n+I110463000\n+I110468000\n+F0.001\n+tp68\n+a(g3\n+I112995000\n+I113002000\n+F0.001\n+tp69\n+a(g3\n+I113055000\n+I113061000\n+F0.001\n+tp70\n+a(g3\n+I114521000\n+I114526000\n+F0.0016\n+tp71\n+a(g3\n+I114638000\n+I114642000\n+F0.001\n+tp72\n+a(g3\n+I115166000\n+I115170000\n+F0.001\n+tp73\n+a(g3\n+I116278000\n+I116282000\n+F0.001\n+tp74\n+a(g3\n+I120456000\n+I120460000\n+F0.00175\n+tp75\n+a(g3\n+I145073000\n+I145077000\n+F0.001\n+tp76\n+a(g3\n+I145455000\n+I145459000\n+F0.00125\n+tp77\n+a(g3\n+I145471000\n+I145476000\n+F0.001\n+tp78\n+a(g3\n+I145533000\n+I145541000\n+F0.001\n+tp79\n+a(g3\n+I147089000\n+I147094000\n+F0.0016\n+tp80\n+a(g3\n+I149781000\n+I149787000\n+F0.0013333333333333333\n+tp81\n+a(g3\n+I149855000\n+I149860000\n+F0.0022\n+tp82\n+a(g3\n+I150441000\n+I150447000\n+F0.0011666666666666668\n+tp83\n+a(g3\n+I151257000\n+I151265000\n+F0.001625\n+tp84\n+a(g3\n+I151507000\n+I151511000\n+F0.001\n+tp85\n+a(g3\n+I151732000\n+I151737000\n+F0.0012\n+tp86\n+a(g3\n+I151771000\n+I151776000\n+F0.0014\n+tp87\n+a(g3\n+I151817000\n+I151822000\n+F0.0012\n+tp88\n+a(g3\n+I152056000\n+I152062000\n+F0.0011666666666666668\n+tp89\n+a(g3\n+I152077000\n+I152087000\n+F0.0022\n+tp90\n+a(g3\n+I152125000\n+I152132000\n+F0"..b"798329725\n+s(S'TGT'\n+p15791\n+S'TAT'\n+p15792\n+tp15793\n+F0.5110151830902054\n+s(S'TTT'\n+p15794\n+S'TCT'\n+p15795\n+tp15796\n+F0.3153153153153153\n+s(S'TCT'\n+p15797\n+S'TTT'\n+p15798\n+tp15799\n+F0.34469034289713085\n+s(S'GCT'\n+p15800\n+S'GAT'\n+p15801\n+tp15802\n+F0.2535480023852117\n+s(S'CCA'\n+p15803\n+S'CAA'\n+p15804\n+tp15805\n+F0.2673684210526316\n+s(S'CAC'\n+p15806\n+S'CGC'\n+p15807\n+tp15808\n+F0.09031460584552813\n+s(S'TCG'\n+p15809\n+S'TTG'\n+p15810\n+tp15811\n+F0.8512672762954612\n+s(S'ATT'\n+p15812\n+S'ACT'\n+p15813\n+tp15814\n+F0.5770365997638724\n+s(S'CTT'\n+p15815\n+S'CAT'\n+p15816\n+tp15817\n+F0.20787104877538204\n+s(S'ACT'\n+p15818\n+S'AAT'\n+p15819\n+tp15820\n+F0.28917458542947644\n+s(S'CGT'\n+p15821\n+S'CCT'\n+p15822\n+tp15823\n+F0.08529750479846449\n+s(S'AGC'\n+p15824\n+S'AAC'\n+p15825\n+tp15826\n+F0.559720815039964\n+s(S'TTG'\n+p15827\n+S'TGG'\n+p15828\n+tp15829\n+F0.20726837060702874\n+s(S'CCT'\n+p15830\n+S'CGT'\n+p15831\n+tp15832\n+F0.218935516888434\n+s(S'GTC'\n+p15833\n+S'GAC'\n+p15834\n+tp15835\n+F0.3083228247162673\n+s(S'CAG'\n+p15836\n+S'CGG'\n+p15837\n+tp15838\n+F0.5630856760374833\n+s(S'TCC'\n+p15839\n+S'TTC'\n+p15840\n+tp15841\n+F0.49455655868295273\n+s(S'AAT'\n+p15842\n+S'ACT'\n+p15843\n+tp15844\n+F0.19943604204050244\n+s(S'TTA'\n+p15845\n+S'TGA'\n+p15846\n+tp15847\n+F0.34518113465481887\n+s(S'CCC'\n+p15848\n+S'CAC'\n+p15849\n+tp15850\n+F0.3011666243976667\n+s(S'GTA'\n+p15851\n+S'GGA'\n+p15852\n+tp15853\n+F0.23280996210070384\n+s(S'TGG'\n+p15854\n+S'TAG'\n+p15855\n+tp15856\n+F0.4909784513867409\n+s(S'TAG'\n+p15857\n+S'TTG'\n+p15858\n+tp15859\n+F0.2964881980426022\n+s(S'AGG'\n+p15860\n+S'AAG'\n+p15861\n+tp15862\n+F0.42983931240657697\n+s(S'TTC'\n+p15863\n+S'TGC'\n+p15864\n+tp15865\n+F0.20487062404870623\n+s(S'CGG'\n+p15866\n+S'CCG'\n+p15867\n+tp15868\n+F0.09372869802317656\n+s(S'GTG'\n+p15869\n+S'GAG'\n+p15870\n+tp15871\n+F0.04019174041297935\n+s(S'AAC'\n+p15872\n+S'ACC'\n+p15873\n+tp15874\n+F0.283960092095165\n+s(S'CTG'\n+p15875\n+S'CGG'\n+p15876\n+tp15877\n+F0.24988301357042583\n+s(S'TCA'\n+p15878\n+S'TTA'\n+p15879\n+tp15880\n+F0.5640428700266364\n+s(S'GGA'\n+p15881\n+S'GTA'\n+p15882\n+tp15883\n+F0.21586568357466465\n+s(S'ACG'\n+p15884\n+S'AAG'\n+p15885\n+tp15886\n+F0.07233978688278553\n+s(S'GGT'\n+p15887\n+S'GAT'\n+p15888\n+tp15889\n+F0.41646947468054896\n+s(S'TAC'\n+p15890\n+S'TTC'\n+p15891\n+tp15892\n+F0.11092715231788079\n+s(S'TTG'\n+p15893\n+S'TAG'\n+p15894\n+tp15895\n+F0.22044728434504793\n+s(S'AAT'\n+p15896\n+S'ATT'\n+p15897\n+tp15898\n+F0.1948218405537042\n+s(S'GCC'\n+p15899\n+S'GGC'\n+p15900\n+tp15901\n+F0.14519405538329946\n+s(S'CTC'\n+p15902\n+S'CGC'\n+p15903\n+tp15904\n+F0.20212305025996533\n+s(S'CGG'\n+p15905\n+S'CTG'\n+p15906\n+tp15907\n+F0.09597818677573279\n+s(S'TGT'\n+p15908\n+S'TCT'\n+p15909\n+tp15910\n+F0.19857100327478416\n+s(S'ACA'\n+p15911\n+S'AAA'\n+p15912\n+tp15913\n+F0.2707728065078443\n+s(S'CGC'\n+p15914\n+S'CAC'\n+p15915\n+tp15916\n+F0.8863668505161929\n+s(S'GGT'\n+p15917\n+S'GTT'\n+p15918\n+tp15919\n+F0.3599148130619972\n+s(S'TTC'\n+p15920\n+S'TAC'\n+p15921\n+tp15922\n+F0.2264840182648402\n+s(S'GGG'\n+p15923\n+S'GCG'\n+p15924\n+tp15925\n+F0.15543977174047885\n+s(S'GCA'\n+p15926\n+S'GAA'\n+p15927\n+tp15928\n+F0.27027963947307604\n+s(S'CCG'\n+p15929\n+S'CTG'\n+p15930\n+tp15931\n+F0.7898996886890349\n+s(S'CGA'\n+p15932\n+S'CAA'\n+p15933\n+tp15934\n+F0.8470372267759563\n+ssS'AVG_MUT_RATE'\n+p15935\n+F0.00025421070947095426\n+sS'INDEL_FREQ'\n+p15936\n+(dp15937\n+I1\n+F0.35184872011684204\n+sI2\n+F0.03313090937043584\n+sI3\n+F0.02326081943270043\n+sI4\n+F0.0045353217003612865\n+sI6\n+F0.00026135752171573516\n+sI12\n+F0.0002306095779844722\n+sI-1\n+F0.3722653547544006\n+sI-37\n+F0.00026135752171573516\n+sI-35\n+F1.537397186563148e-05\n+sI-34\n+F6.149588746252592e-05\n+sI-22\n+F0.0004304712122376814\n+sI-19\n+F0.0006764547620877851\n+sI-18\n+F6.149588746252592e-05\n+sI-14\n+F0.0006149588746252592\n+sI-13\n+F6.149588746252592e-05\n+sI-12\n+F0.0005227150434314703\n+sI-11\n+F0.00027673149358136665\n+sI-10\n+F0.00041509724037204997\n+sI-9\n+F0.000768698593281574\n+sI-8\n+F0.0003074794373126296\n+sI-7\n+F0.00047659312783457586\n+sI-6\n+F0.000768698593281574\n+sI-5\n+F0.00026135752171573516\n+sI-4\n+F0.029840879391190702\n+sI-3\n+F0.09287416404027976\n+sI-2\n+F0.08577138903835803\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 models/MutModel_CLLE-ES_ICGC.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/MutModel_CLLE-ES_ICGC.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,221812 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chr1'\n+p3\n+I1222000\n+I1227000\n+F0.0006\n+tp4\n+a(g3\n+I2388000\n+I2392000\n+F0.00075\n+tp5\n+a(g3\n+I4961000\n+I4966000\n+F0.0006\n+tp6\n+a(g3\n+I5041000\n+I5046000\n+F0.0006\n+tp7\n+a(g3\n+I5116000\n+I5122000\n+F0.0008333333333333334\n+tp8\n+a(g3\n+I5180000\n+I5185000\n+F0.0006\n+tp9\n+a(g3\n+I6032000\n+I6036000\n+F0.00075\n+tp10\n+a(g3\n+I6458000\n+I6463000\n+F0.0006\n+tp11\n+a(g3\n+I7393000\n+I7398000\n+F0.0006\n+tp12\n+a(g3\n+I7400000\n+I7405000\n+F0.0006\n+tp13\n+a(g3\n+I7720000\n+I7725000\n+F0.0006\n+tp14\n+a(g3\n+I8286000\n+I8291000\n+F0.0006\n+tp15\n+a(g3\n+I14275000\n+I14280000\n+F0.0006\n+tp16\n+a(g3\n+I14583000\n+I14589000\n+F0.0006666666666666666\n+tp17\n+a(g3\n+I14845000\n+I14850000\n+F0.0006\n+tp18\n+a(g3\n+I15291000\n+I15296000\n+F0.0006\n+tp19\n+a(g3\n+I16355000\n+I16360000\n+F0.0006\n+tp20\n+a(g3\n+I19086000\n+I19094000\n+F0.000625\n+tp21\n+a(g3\n+I19106000\n+I19111000\n+F0.0008\n+tp22\n+a(g3\n+I19875000\n+I19879000\n+F0.00075\n+tp23\n+a(g3\n+I20029000\n+I20034000\n+F0.0006\n+tp24\n+a(g3\n+I23594000\n+I23599000\n+F0.0006\n+tp25\n+a(g3\n+I25144000\n+I25148000\n+F0.00075\n+tp26\n+a(g3\n+I29660000\n+I29664000\n+F0.00075\n+tp27\n+a(g3\n+I30729000\n+I30733000\n+F0.00075\n+tp28\n+a(g3\n+I32637000\n+I32642000\n+F0.0006\n+tp29\n+a(g3\n+I34435000\n+I34440000\n+F0.0006\n+tp30\n+a(g3\n+I34487000\n+I34493000\n+F0.0006666666666666666\n+tp31\n+a(g3\n+I35661000\n+I35666000\n+F0.0006\n+tp32\n+a(g3\n+I36720000\n+I36725000\n+F0.0008\n+tp33\n+a(g3\n+I38982000\n+I38987000\n+F0.0006\n+tp34\n+a(g3\n+I40494000\n+I40499000\n+F0.0006\n+tp35\n+a(g3\n+I43592000\n+I43597000\n+F0.0006\n+tp36\n+a(g3\n+I45548000\n+I45553000\n+F0.0006\n+tp37\n+a(g3\n+I48151000\n+I48156000\n+F0.0006\n+tp38\n+a(g3\n+I49564000\n+I49570000\n+F0.0006666666666666666\n+tp39\n+a(g3\n+I49712000\n+I49717000\n+F0.0006\n+tp40\n+a(g3\n+I50108000\n+I50113000\n+F0.0006\n+tp41\n+a(g3\n+I50200000\n+I50205000\n+F0.0006\n+tp42\n+a(g3\n+I50262000\n+I50267000\n+F0.0006\n+tp43\n+a(g3\n+I51915000\n+I51921000\n+F0.0006666666666666666\n+tp44\n+a(g3\n+I51962000\n+I51968000\n+F0.0008333333333333334\n+tp45\n+a(g3\n+I52187000\n+I52194000\n+F0.001\n+tp46\n+a(g3\n+I52540000\n+I52544000\n+F0.00075\n+tp47\n+a(g3\n+I54165000\n+I54170000\n+F0.0006\n+tp48\n+a(g3\n+I54973000\n+I54981000\n+F0.000625\n+tp49\n+a(g3\n+I57307000\n+I57312000\n+F0.0008\n+tp50\n+a(g3\n+I57591000\n+I57596000\n+F0.001\n+tp51\n+a(g3\n+I57836000\n+I57841000\n+F0.0008\n+tp52\n+a(g3\n+I59673000\n+I59678000\n+F0.0006\n+tp53\n+a(g3\n+I60647000\n+I60652000\n+F0.0008\n+tp54\n+a(g3\n+I61114000\n+I61122000\n+F0.000875\n+tp55\n+a(g3\n+I61283000\n+I61287000\n+F0.00075\n+tp56\n+a(g3\n+I62796000\n+I62801000\n+F0.0006\n+tp57\n+a(g3\n+I63390000\n+I63395000\n+F0.0006\n+tp58\n+a(g3\n+I64102000\n+I64107000\n+F0.0006\n+tp59\n+a(g3\n+I64847000\n+I64851000\n+F0.00075\n+tp60\n+a(g3\n+I65995000\n+I66000000\n+F0.0006\n+tp61\n+a(g3\n+I67672000\n+I67677000\n+F0.0006\n+tp62\n+a(g3\n+I68656000\n+I68661000\n+F0.0006\n+tp63\n+a(g3\n+I68717000\n+I68723000\n+F0.0006666666666666666\n+tp64\n+a(g3\n+I69163000\n+I69169000\n+F0.0006666666666666666\n+tp65\n+a(g3\n+I69234000\n+I69240000\n+F0.0006666666666666666\n+tp66\n+a(g3\n+I69276000\n+I69282000\n+F0.0006666666666666666\n+tp67\n+a(g3\n+I69314000\n+I69319000\n+F0.0006\n+tp68\n+a(g3\n+I69324000\n+I69330000\n+F0.0006666666666666666\n+tp69\n+a(g3\n+I69534000\n+I69539000\n+F0.0006\n+tp70\n+a(g3\n+I69604000\n+I69612000\n+F0.000625\n+tp71\n+a(g3\n+I69709000\n+I69715000\n+F0.0006666666666666666\n+tp72\n+a(g3\n+I70002000\n+I70007000\n+F0.0006\n+tp73\n+a(g3\n+I70052000\n+I70057000\n+F0.0006\n+tp74\n+a(g3\n+I70175000\n+I70181000\n+F0.0006666666666666666\n+tp75\n+a(g3\n+I70274000\n+I70279000\n+F0.0008\n+tp76\n+a(g3\n+I71313000\n+I71318000\n+F0.0006\n+tp77\n+a(g3\n+I71746000\n+I71751000\n+F0.0006\n+tp78\n+a(g3\n+I71758000\n+I71764000\n+F0.0008333333333333334\n+tp79\n+a(g3\n+I72270000\n+I72275000\n+F0.0006\n+tp80\n+a(g3\n+I72382000\n+I72388000\n+F0.0006666666666666666\n+tp81\n+a(g3\n+I72471000\n+I72479000\n+F0.000625\n+tp82\n+a(g3\n+I72636000\n+I72641000\n+F0.0006\n+tp83\n+a(g3\n+I72663000\n+I72681000\n+F0.0006348214285714286\n+tp84\n+a(g3\n+I72884000\n+I72889000\n+F0.0006\n+tp85\n+a(g3\n+I72949000\n+I72954000\n+F0.0006\n+tp86\n+a(g3\n+I73046000\n+I73055000\n+F0.0007083333333333334\n+tp87\n+a(g3\n+I73150000\n+I73156000\n+F0.0006666666666666666\n+tp88\n+a(g3\n+I73198000\n+I73"..b"200724262804\n+s(S'AAT'\n+p74866\n+S'ACT'\n+p74867\n+tp74868\n+F0.14837840187809756\n+s(S'TTA'\n+p74869\n+S'TGA'\n+p74870\n+tp74871\n+F0.31232179226069245\n+s(S'CCC'\n+p74872\n+S'CAC'\n+p74873\n+tp74874\n+F0.2595196658545075\n+s(S'GTA'\n+p74875\n+S'GGA'\n+p74876\n+tp74877\n+F0.17067745197168857\n+s(S'TGG'\n+p74878\n+S'TAG'\n+p74879\n+tp74880\n+F0.514733395696913\n+s(S'TAG'\n+p74881\n+S'TTG'\n+p74882\n+tp74883\n+F0.26628104118879026\n+s(S'AGG'\n+p74884\n+S'AAG'\n+p74885\n+tp74886\n+F0.5812510207414666\n+s(S'TTC'\n+p74887\n+S'TGC'\n+p74888\n+tp74889\n+F0.15242178235884338\n+s(S'CGG'\n+p74890\n+S'CCG'\n+p74891\n+tp74892\n+F0.03942414174972315\n+s(S'GTG'\n+p74893\n+S'GAG'\n+p74894\n+tp74895\n+F0.20761326482951892\n+s(S'AAC'\n+p74896\n+S'ACC'\n+p74897\n+tp74898\n+F0.2758257203092059\n+s(S'CTG'\n+p74899\n+S'CGG'\n+p74900\n+tp74901\n+F0.2206308336015449\n+s(S'TCA'\n+p74902\n+S'TTA'\n+p74903\n+tp74904\n+F0.493955974042499\n+s(S'GGA'\n+p74905\n+S'GTA'\n+p74906\n+tp74907\n+F0.2557325835676351\n+s(S'ACG'\n+p74908\n+S'AAG'\n+p74909\n+tp74910\n+F0.053070822574695684\n+s(S'GGT'\n+p74911\n+S'GAT'\n+p74912\n+tp74913\n+F0.45445033025962983\n+s(S'TAC'\n+p74914\n+S'TTC'\n+p74915\n+tp74916\n+F0.2424995398490705\n+s(S'TTG'\n+p74917\n+S'TAG'\n+p74918\n+tp74919\n+F0.306251232498521\n+s(S'AAT'\n+p74920\n+S'ATT'\n+p74921\n+tp74922\n+F0.3098860968611425\n+s(S'GCC'\n+p74923\n+S'GGC'\n+p74924\n+tp74925\n+F0.16905244489485685\n+s(S'CTC'\n+p74926\n+S'CGC'\n+p74927\n+tp74928\n+F0.16395131086142323\n+s(S'CGG'\n+p74929\n+S'CTG'\n+p74930\n+tp74931\n+F0.06341823551125876\n+s(S'TGT'\n+p74932\n+S'TCT'\n+p74933\n+tp74934\n+F0.15200941651252733\n+s(S'ACA'\n+p74935\n+S'AAA'\n+p74936\n+tp74937\n+F0.38468287791426425\n+s(S'CGC'\n+p74938\n+S'CAC'\n+p74939\n+tp74940\n+F0.8930200587389865\n+s(S'GGT'\n+p74941\n+S'GTT'\n+p74942\n+tp74943\n+F0.3893793734876725\n+s(S'TTC'\n+p74944\n+S'TAC'\n+p74945\n+tp74946\n+F0.2448234972179148\n+s(S'GGG'\n+p74947\n+S'GCG'\n+p74948\n+tp74949\n+F0.1459622752726201\n+s(S'GCA'\n+p74950\n+S'GAA'\n+p74951\n+tp74952\n+F0.38426645930533954\n+s(S'CCG'\n+p74953\n+S'CTG'\n+p74954\n+tp74955\n+F0.9041450777202072\n+s(S'CGA'\n+p74956\n+S'CAA'\n+p74957\n+tp74958\n+F0.8623376623376623\n+ssS'AVG_MUT_RATE'\n+p74959\n+F0.0003707863349512272\n+sS'INDEL_FREQ'\n+p74960\n+(dp74961\n+I1\n+F0.10803253685815956\n+sI2\n+F0.0658645427328701\n+sI3\n+F0.02126758176580239\n+sI4\n+F0.026775122860532095\n+sI5\n+F0.006722024515618818\n+sI6\n+F0.009998305371970845\n+sI7\n+F0.003078574252951475\n+sI8\n+F0.00522510308987177\n+sI9\n+F0.002428966841778228\n+sI10\n+F0.0027114048466361615\n+sI11\n+F0.0018358470315765676\n+sI12\n+F0.002203016437891881\n+sI13\n+F0.0017228718296333943\n+sI14\n+F0.0011015082189459405\n+sI15\n+F0.0005366322092300736\n+sI16\n+F0.0012709710218607008\n+sI17\n+F0.0005931198102016603\n+sI18\n+F0.0026549172456645747\n+sI19\n+F0.0002259504038863468\n+sI20\n+F0.002231260238377675\n+sI21\n+F0.0011579958199175273\n+sI22\n+F0.0003389256058295202\n+sI23\n+F0.0005083884087442802\n+sI24\n+F0.0003389256058295202\n+sI26\n+F2.824380048579335e-05\n+sI31\n+F2.824380048579335e-05\n+sI-2\n+F0.14497542789357726\n+sI-40\n+F0.0002824380048579335\n+sI-37\n+F0.00019770660340055342\n+sI-36\n+F2.824380048579335e-05\n+sI-35\n+F0.0003389256058295202\n+sI-34\n+F0.0002259504038863468\n+sI-33\n+F5.64876009715867e-05\n+sI-32\n+F0.0011579958199175273\n+sI-30\n+F0.00031068180534372686\n+sI-29\n+F0.0001129752019431734\n+sI-28\n+F0.0006213636106874537\n+sI-27\n+F0.0006213636106874537\n+sI-26\n+F0.0010167768174885604\n+sI-25\n+F0.0014121900242896675\n+sI-24\n+F0.0004519008077726936\n+sI-23\n+F0.000649607411173247\n+sI-22\n+F0.0015816528272044274\n+sI-21\n+F0.001440433824775461\n+sI-20\n+F0.0013274586228322874\n+sI-19\n+F0.001129752019431734\n+sI-18\n+F0.0021465288369202943\n+sI-17\n+F0.0013274586228322874\n+sI-16\n+F0.0027678924476077483\n+sI-15\n+F0.003135061853923062\n+sI-14\n+F0.004293057673840589\n+sI-13\n+F0.004151838671411623\n+sI-12\n+F0.007964751736993724\n+sI-11\n+F0.004349545274812176\n+sI-10\n+F0.007145681522905717\n+sI-9\n+F0.004603739479184315\n+sI-8\n+F0.006750268316104611\n+sI-7\n+F0.008727334350110146\n+sI-6\n+F0.014150144043382468\n+sI-5\n+F0.026520928656159952\n+sI-4\n+F0.09876857029881934\n+sI-3\n+F0.07123086482517083\n+sI-1\n+F0.30514602044851136\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 models/MutModel_NA12878.p.gz
b
Binary file models/MutModel_NA12878.p.gz has changed
b
diff -r 000000000000 -r 6e75a84e9338 models/MutModel_SKCM-US_ICGC.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/MutModel_SKCM-US_ICGC.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,113200 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chr1'\n+p3\n+I2936000\n+I2941000\n+F0.0054\n+tp4\n+a(g3\n+I3326000\n+I3331000\n+F0.0038\n+tp5\n+a(g3\n+I4770000\n+I4774000\n+F0.003\n+tp6\n+a(g3\n+I7721000\n+I7727000\n+F0.004\n+tp7\n+a(g3\n+I11005000\n+I11011000\n+F0.0036666666666666666\n+tp8\n+a(g3\n+I11559000\n+I11564000\n+F0.0042\n+tp9\n+a(g3\n+I12783000\n+I12787000\n+F0.00375\n+tp10\n+a(g3\n+I12833000\n+I12839000\n+F0.005666666666666667\n+tp11\n+a(g3\n+I12851000\n+I12858000\n+F0.007\n+tp12\n+a(g3\n+I12882000\n+I12890000\n+F0.0045\n+tp13\n+a(g3\n+I12905000\n+I12910000\n+F0.0094\n+tp14\n+a(g3\n+I12916000\n+I12923000\n+F0.005285714285714286\n+tp15\n+a(g3\n+I12939000\n+I12945000\n+F0.003833333333333333\n+tp16\n+a(g3\n+I13181000\n+I13185000\n+F0.01025\n+tp17\n+a(g3\n+I16253000\n+I16267000\n+F0.0035714285714285713\n+tp18\n+a(g3\n+I16971000\n+I16978000\n+F0.00842857142857143\n+tp19\n+a(g3\n+I17081000\n+I17089000\n+F0.005\n+tp20\n+a(g3\n+I18689000\n+I18694000\n+F0.0032\n+tp21\n+a(g3\n+I18805000\n+I18811000\n+F0.008166666666666666\n+tp22\n+a(g3\n+I22900000\n+I22905000\n+F0.0034\n+tp23\n+a(g3\n+I26669000\n+I26674000\n+F0.0038\n+tp24\n+a(g3\n+I26686000\n+I26693000\n+F0.003\n+tp25\n+a(g3\n+I32277000\n+I32282000\n+F0.0032\n+tp26\n+a(g3\n+I34187000\n+I34194000\n+F0.003142857142857143\n+tp27\n+a(g3\n+I34327000\n+I34332000\n+F0.003\n+tp28\n+a(g3\n+I34660000\n+I34665000\n+F0.003\n+tp29\n+a(g3\n+I35221000\n+I35229000\n+F0.00375\n+tp30\n+a(g3\n+I38225000\n+I38229000\n+F0.004\n+tp31\n+a(g3\n+I42043000\n+I42052000\n+F0.006333333333333333\n+tp32\n+a(g3\n+I43775000\n+I43781000\n+F0.003\n+tp33\n+a(g3\n+I44593000\n+I44598000\n+F0.0042\n+tp34\n+a(g3\n+I45290000\n+I45299000\n+F0.003\n+tp35\n+a(g3\n+I46974000\n+I46980000\n+F0.0036666666666666666\n+tp36\n+a(g3\n+I47274000\n+I47286000\n+F0.0031666666666666666\n+tp37\n+a(g3\n+I47396000\n+I47405000\n+F0.004666666666666667\n+tp38\n+a(g3\n+I47512000\n+I47517000\n+F0.0036\n+tp39\n+a(g3\n+I47604000\n+I47613000\n+F0.0034444444444444444\n+tp40\n+a(g3\n+I55116000\n+I55121000\n+F0.003\n+tp41\n+a(g3\n+I57255000\n+I57260000\n+F0.0048\n+tp42\n+a(g3\n+I57474000\n+I57483000\n+F0.00385\n+tp43\n+a(g3\n+I62670000\n+I62679000\n+F0.0035833333333333333\n+tp44\n+a(g3\n+I62735000\n+I62742000\n+F0.004571428571428572\n+tp45\n+a(g3\n+I74504000\n+I74509000\n+F0.0036\n+tp46\n+a(g3\n+I75034000\n+I75041000\n+F0.010285714285714285\n+tp47\n+a(g3\n+I75053000\n+I75057000\n+F0.003\n+tp48\n+a(g3\n+I78956000\n+I78961000\n+F0.0044\n+tp49\n+a(g3\n+I82406000\n+I82411000\n+F0.0034\n+tp50\n+a(g3\n+I85329000\n+I85333000\n+F0.0035\n+tp51\n+a(g3\n+I100150000\n+I100157000\n+F0.003142857142857143\n+tp52\n+a(g3\n+I111057000\n+I111063000\n+F0.005\n+tp53\n+a(g3\n+I112522000\n+I112527000\n+F0.0052\n+tp54\n+a(g3\n+I117309000\n+I117313000\n+F0.0035\n+tp55\n+a(g3\n+I118163000\n+I118168000\n+F0.0032\n+tp56\n+a(g3\n+I119425000\n+I119430000\n+F0.0052\n+tp57\n+a(g3\n+I120054000\n+I120059000\n+F0.003\n+tp58\n+a(g3\n+I120434000\n+I120440000\n+F0.0055\n+tp59\n+a(g3\n+I143765000\n+I143769000\n+F0.00425\n+tp60\n+a(g3\n+I145412000\n+I145418000\n+F0.0033333333333333335\n+tp61\n+a(g3\n+I145556000\n+I145564000\n+F0.003625\n+tp62\n+a(g3\n+I147228000\n+I147233000\n+F0.0032\n+tp63\n+a(g3\n+I147378000\n+I147383000\n+F0.0066\n+tp64\n+a(g3\n+I148887000\n+I148893000\n+F0.0031666666666666666\n+tp65\n+a(g3\n+I150441000\n+I150447000\n+F0.004\n+tp66\n+a(g3\n+I151771000\n+I151777000\n+F0.0033333333333333335\n+tp67\n+a(g3\n+I152055000\n+I152062000\n+F0.009857142857142858\n+tp68\n+a(g3\n+I152078000\n+I152088000\n+F0.0039\n+tp69\n+a(g3\n+I152125000\n+I152132000\n+F0.008571428571428572\n+tp70\n+a(g3\n+I152183000\n+I152197000\n+F0.006801587301587302\n+tp71\n+a(g3\n+I152273000\n+I152289000\n+F0.01525\n+tp72\n+a(g3\n+I152321000\n+I152333000\n+F0.010083333333333333\n+tp73\n+a(g3\n+I152380000\n+I152386000\n+F0.005333333333333333\n+tp74\n+a(g3\n+I152550000\n+I152554000\n+F0.003\n+tp75\n+a(g3\n+I152730000\n+I152735000\n+F0.0076\n+tp76\n+a(g3\n+I152782000\n+I152787000\n+F0.003\n+tp77\n+a(g3\n+I152880000\n+I152885000\n+F0.005\n+tp78\n+a(g3\n+I152973000\n+I152977000\n+F0.00475\n+tp79\n+a(g3\n+I153175000\n+I153179000\n+F0.003\n+tp80\n+a(g3\n+I156637000\n+I156644000\n+F0.004142857142857143\n+tp81\n+a(g3\n+I156808000\n+I156818000\n+F0.0041\n+tp82\n+a(g3\n+I157510000\n+I157518000\n+F0.004125\n+tp83\n+a(g3\n+I157663000\n+I157671000\n+F0.00375\n+tp84\n+a(g3\n+I157769000\n+I157775000\n+F0"..b"2\n+s(S'CTT'\n+p40842\n+S'CAT'\n+p40843\n+tp40844\n+F0.18346281908990011\n+s(S'ACT'\n+p40845\n+S'AAT'\n+p40846\n+tp40847\n+F0.07618294451034664\n+s(S'CGT'\n+p40848\n+S'CCT'\n+p40849\n+tp40850\n+F0.13817495873614713\n+s(S'AGC'\n+p40851\n+S'AAC'\n+p40852\n+tp40853\n+F0.9428863868986694\n+s(S'TTG'\n+p40854\n+S'TGG'\n+p40855\n+tp40856\n+F0.17662337662337663\n+s(S'CCT'\n+p40857\n+S'CGT'\n+p40858\n+tp40859\n+F0.012925383343783845\n+s(S'GTC'\n+p40860\n+S'GAC'\n+p40861\n+tp40862\n+F0.1945316001792918\n+s(S'CAG'\n+p40863\n+S'CGG'\n+p40864\n+tp40865\n+F0.5335125792202804\n+s(S'TCC'\n+p40866\n+S'TTC'\n+p40867\n+tp40868\n+F0.9789979998095056\n+s(S'AAT'\n+p40869\n+S'ACT'\n+p40870\n+tp40871\n+F0.182884448305821\n+s(S'TTA'\n+p40872\n+S'TGA'\n+p40873\n+tp40874\n+F0.13382789317507418\n+s(S'CCC'\n+p40875\n+S'CAC'\n+p40876\n+tp40877\n+F0.014793608066337413\n+s(S'GTA'\n+p40878\n+S'GGA'\n+p40879\n+tp40880\n+F0.10955961331901182\n+s(S'TGG'\n+p40881\n+S'TAG'\n+p40882\n+tp40883\n+F0.9118915159944367\n+s(S'TAG'\n+p40884\n+S'TTG'\n+p40885\n+tp40886\n+F0.2581374321880651\n+s(S'AGG'\n+p40887\n+S'AAG'\n+p40888\n+tp40889\n+F0.956564235468345\n+s(S'TTC'\n+p40890\n+S'TGC'\n+p40891\n+tp40892\n+F0.2053956019043301\n+s(S'CGG'\n+p40893\n+S'CCG'\n+p40894\n+tp40895\n+F0.053501827040194884\n+s(S'GTG'\n+p40896\n+S'GAG'\n+p40897\n+tp40898\n+F0.1759090909090909\n+s(S'AAC'\n+p40899\n+S'ACC'\n+p40900\n+tp40901\n+F0.1224735322425409\n+s(S'CTG'\n+p40902\n+S'CGG'\n+p40903\n+tp40904\n+F0.31238707086930334\n+s(S'TCA'\n+p40905\n+S'TTA'\n+p40906\n+tp40907\n+F0.967667230782934\n+s(S'GGA'\n+p40908\n+S'GTA'\n+p40909\n+tp40910\n+F0.015084859375088014\n+s(S'ACG'\n+p40911\n+S'AAG'\n+p40912\n+tp40913\n+F0.08193944526783824\n+s(S'GGT'\n+p40914\n+S'GAT'\n+p40915\n+tp40916\n+F0.9348146103597387\n+s(S'TAC'\n+p40917\n+S'TTC'\n+p40918\n+tp40919\n+F0.20689655172413793\n+s(S'TTG'\n+p40920\n+S'TAG'\n+p40921\n+tp40922\n+F0.18677685950413223\n+s(S'AAT'\n+p40923\n+S'ATT'\n+p40924\n+tp40925\n+F0.4430929626411816\n+s(S'GCC'\n+p40926\n+S'GGC'\n+p40927\n+tp40928\n+F0.030840687412912217\n+s(S'CTC'\n+p40929\n+S'CGC'\n+p40930\n+tp40931\n+F0.22385997236296637\n+s(S'CGG'\n+p40932\n+S'CTG'\n+p40933\n+tp40934\n+F0.06434226552984165\n+s(S'TGT'\n+p40935\n+S'TCT'\n+p40936\n+tp40937\n+F0.18178796942505815\n+s(S'ACA'\n+p40938\n+S'AAA'\n+p40939\n+tp40940\n+F0.3379728765167737\n+s(S'CGC'\n+p40941\n+S'CAC'\n+p40942\n+tp40943\n+F0.8391111111111111\n+s(S'GGT'\n+p40944\n+S'GTT'\n+p40945\n+tp40946\n+F0.040114085932468485\n+s(S'TTC'\n+p40947\n+S'TAC'\n+p40948\n+tp40949\n+F0.3044661074586262\n+s(S'GGG'\n+p40950\n+S'GCG'\n+p40951\n+tp40952\n+F0.011424739230925835\n+s(S'GCA'\n+p40953\n+S'GAA'\n+p40954\n+tp40955\n+F0.26330759853718\n+s(S'CCG'\n+p40956\n+S'CTG'\n+p40957\n+tp40958\n+F0.8896537212937358\n+s(S'CGA'\n+p40959\n+S'CAA'\n+p40960\n+tp40961\n+F0.9877651545245094\n+ssS'AVG_MUT_RATE'\n+p40962\n+F0.0005808273307844995\n+sS'INDEL_FREQ'\n+p40963\n+(dp40964\n+I1\n+F0.15646889163576436\n+sI2\n+F0.02101359703337454\n+sI3\n+F0.01581170168932839\n+sI4\n+F0.008549649773382777\n+sI5\n+F0.00303873094355171\n+sI6\n+F0.0019056448290070047\n+sI8\n+F0.00046353522867737955\n+sI9\n+F0.00319324268644417\n+sI10\n+F0.00010300782859497325\n+sI11\n+F0.0005150391429748661\n+sI12\n+F0.0012360939431396787\n+sI14\n+F0.0007210548001648126\n+sI15\n+F0.0003090234857849197\n+sI18\n+F0.0002060156571899465\n+sI-1\n+F0.36686238154099715\n+sI-37\n+F0.00015451174289245984\n+sI-30\n+F0.0014421096003296252\n+sI-28\n+F0.0005150391429748661\n+sI-27\n+F0.003090234857849197\n+sI-26\n+F0.000824062628759786\n+sI-25\n+F0.0016996291718170584\n+sI-24\n+F0.003914297486608983\n+sI-23\n+F0.0012875978574371655\n+sI-22\n+F0.001493613514627112\n+sI-21\n+F0.005871446229913474\n+sI-20\n+F0.000412031314379893\n+sI-19\n+F0.004532344458178823\n+sI-18\n+F0.007828594973217966\n+sI-17\n+F0.0028842192006592504\n+sI-16\n+F0.0019571487433044914\n+sI-15\n+F0.004068809229501443\n+sI-14\n+F0.0036052740008240634\n+sI-13\n+F0.005562422744128555\n+sI-12\n+F0.008034610630407913\n+sI-11\n+F0.002987227029254224\n+sI-10\n+F0.00319324268644417\n+sI-9\n+F0.01112484548825711\n+sI-8\n+F0.009373712402142565\n+sI-7\n+F0.004892871858261229\n+sI-6\n+F0.018695920889987644\n+sI-5\n+F0.008910177173465185\n+sI-4\n+F0.017717346518335394\n+sI-3\n+F0.16002266172229093\n+sI-2\n+F0.12350638648537289\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 models/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/README.md Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,34 @@
+# models
+Used by genReads.py for simulating various characteristics of NGS datasets.
+
+
+## mutation models
+
+* Used via '-m' parameter
+
+**MutModel_NA12878.p** - Mimic mutations via statistics derived from NA12878 germline variant calls.
+(Note that due to filesize restrictions, this one will need to be unzipped first)
+**MutModel_BRCA_US_ICGC.p** - Aggregate breast cancer mutation statistics from deidentified ICGC data.
+**MutModel_CLLE-ES_ICGC.p** - Aggregate leukemia mutation statistics from deidentified ICGC data.
+**MutModel_SKCM-US_ICGC.p** - Aggregate melanoma mutation statistics from deidentified ICGC data.
+
+
+## sequencing error models
+
+* Used via '-e' parameter
+
+**errorModel_toy.p** - Sequencing error statistics derived from in-house NGS data.
+
+
+## paired-end fragment length distribution model
+
+* Used via '--pe-model' parameter
+
+**fraglenModel_toy.p** - Fragment length statistics derived from in-house NGS data.
+
+
+## GC% coverage bias model
+
+* Used via '--gc-model' parameter
+
+**gcBias_toy.p** - GC% coverage bias statistics derived from in-house NGS data.
b
diff -r 000000000000 -r 6e75a84e9338 models/errorModel_pacbio_toy.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/errorModel_pacbio_toy.p Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,92 @@
+(lp1
+(lp2
+I0
+aI1
+aI2
+aI3
+aI4
+aI5
+aI6
+aI7
+aI8
+aI9
+aI10
+aI11
+aI12
+aI13
+aI14
+aI15
+aI16
+aI17
+aI18
+aI19
+aI20
+aI21
+aI22
+aI23
+aI24
+aI25
+aI26
+aI27
+aI28
+aI29
+aI30
+aI31
+aI32
+aI33
+aI34
+aI35
+aI36
+aI37
+aI38
+aI39
+aI40
+aI41
+aaI33
+aF0.050000000000000003
+a(lp3
+(lp4
+(lp5
+F0
+aF0.33000000000000002
+aF0.33000000000000002
+aF0.33000000000000002
+aa(lp6
+F0.33000000000000002
+aF0
+aF0.33000000000000002
+aF0.33000000000000002
+aa(lp7
+F0.33000000000000002
+aF0.33000000000000002
+aF0
+aF0.33000000000000002
+aa(lp8
+F0.33000000000000002
+aF0.33000000000000002
+aF0.33000000000000002
+aF0
+aaaF0.9375
+a(lp9
+F0.80000000000000004
+aF0.10000000000000001
+aF0.050000000000000003
+aF0.02
+aF0.014999999999999999
+aF0.01
+aF0.0050000000000000001
+aa(lp10
+I1
+aI2
+aI3
+aI4
+aI5
+aI6
+aI7
+aaF0.25
+a(lp11
+F0.25
+aF0.25
+aF0.25
+aF0.25
+aaa.
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 models/errorModel_toy.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/errorModel_toy.p Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,676666 @@\n+(lp1\n+(lp2\n+(lp3\n+cnumpy.core.multiarray\n+scalar\n+p4\n+(cnumpy\n+dtype\n+p5\n+(S\'f8\'\n+I0\n+I1\n+tRp6\n+(I3\n+S\'<\'\n+NNNI-1\n+I-1\n+I0\n+tbS\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp7\n+ag4\n+(g6\n+S\'\\xb0j\\xfd5\\xb1\\xee,?\'\n+tRp8\n+ag4\n+(g6\n+S\'r["\\xe5\\x97\\xccH?\'\n+tRp9\n+ag4\n+(g6\n+S\'5LG\\x94~\\xaaT?\'\n+tRp10\n+ag4\n+(g6\n+S\'\\xe1\\xa6\\xc6\\xe1*\\xe6K?\'\n+tRp11\n+ag4\n+(g6\n+S\'O\\xf2j\\xde\\xbd\\xffN?\'\n+tRp12\n+ag4\n+(g6\n+S\'\\x04\\x10~\\xe8\\x04\\xb3E?\'\n+tRp13\n+ag4\n+(g6\n+S\'\\xd4\\xd3\\xb4<\\x8b\\xbbF?\'\n+tRp14\n+ag4\n+(g6\n+S\'\\xdf\\x9e\\x87m\\xa8\\x0cQ?\'\n+tRp15\n+ag4\n+(g6\n+S\'~&\\xf5\\x15\\xb5\\x1dS?\'\n+tRp16\n+ag4\n+(g6\n+S\'5LG\\x94~\\xaaT?\'\n+tRp17\n+ag4\n+(g6\n+S\'\\x96\\xc4\\xd9\\xebq\\x99R?\'\n+tRp18\n+ag4\n+(g6\n+S\'5LG\\x94~\\xaaT?\'\n+tRp19\n+ag4\n+(g6\n+S\'7T\\x86\\x08\\x01\\x84_?\'\n+tRp20\n+ag4\n+(g6\n+S\'B\\x1fY9\\x1e\\xd5Y?\'\n+tRp21\n+ag4\n+(g6\n+S\'\\xdf\\x9e\\x87m\\xa8\\x0ca?\'\n+tRp22\n+ag4\n+(g6\n+S\'\\xaeb\\xbe\\xc1.\\x15b?\'\n+tRp23\n+ag4\n+(g6\n+S\'\\x04\\x10~\\xe8\\x04\\xb3e?\'\n+tRp24\n+ag4\n+(g6\n+S\'q\\xd7\\x02\\xab\\xd6_c?\'\n+tRp25\n+ag4\n+(g6\n+S\'\\x05\\x94\\x9d"\\xc6\\x1fk?\'\n+tRp26\n+ag4\n+(g6\n+S\'O\\xf2j\\xde\\xbd\\xffn?\'\n+tRp27\n+ag4\n+(g6\n+S\'}d\\xe5xTgp?\'\n+tRp28\n+ag4\n+(g6\n+S\'A\\x9b9\\xff\\\\ht?\'\n+tRp29\n+ag4\n+(g6\n+S\'\\xdf`\\x97\\n\\t\\xc3s?\'\n+tRp30\n+ag4\n+(g6\n+S\'\\x10\\x9d`\\xb6\\x82\\xbar?\'\n+tRp31\n+ag4\n+(g6\n+S\'\\xed\\xb7\\xc8\\xe9iZ~?\'\n+tRp32\n+ag4\n+(g6\n+S\'\\xbc\\xb9\\xef\\xa0\\x8f\\xac|?\'\n+tRp33\n+ag4\n+(g6\n+S\'\\xa4\\x1b\\x0b\\xcb\\xd20}?\'\n+tRp34\n+ag4\n+(g6\n+S\'(\\x1c\\xcd\\xdao\\x91\\x83?\'\n+tRp35\n+ag4\n+(g6\n+S\'Z\\xfb-rz\\x9a\\x86?\'\n+tRp36\n+ag4\n+(g6\n+S\'\\xa4Y\\xfb-rz\\x8a?\'\n+tRp37\n+ag4\n+(g6\n+S\'\\xb0\\x89u\\xe7\\x80\\x93\\x8b?\'\n+tRp38\n+ag4\n+(g6\n+S\'M\\t\\xa4\\x1b\\x0b\\xcb\\x92?\'\n+tRp39\n+ag4\n+(g6\n+S\'\\xa3\\xb6cB\\xe1h\\x96?\'\n+tRp40\n+ag4\n+(g6\n+S\'U\\n(;E\\x8c\\x9f?\'\n+tRp41\n+ag4\n+(g6\n+S"\\x13\\xeb\\xce\\x01\'\\xb7\\xa5?"\n+tRp42\n+ag4\n+(g6\n+S\'%\\xaf\\xe6\\xdd\\xfb\\xef\\xb1?\'\n+tRp43\n+ag4\n+(g6\n+S\'Q\\xdb1\\xa1p4\\xc7?\'\n+tRp44\n+ag4\n+(g6\n+S\'U\\n(;E\\x8c\\xe0?\'\n+tRp45\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp46\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp47\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp48\n+aa(lp49\n+g4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp50\n+ag4\n+(g6\n+S\'\\x11\\xe3\\x8f\\x8d\\xa4\\xdd:?\'\n+tRp51\n+ag4\n+(g6\n+S\'e\\x88\\x10@\\xf8\\xa1C?\'\n+tRp52\n+ag4\n+(g6\n+S\'\\xf7<lCe\\x88P?\'\n+tRp53\n+ag4\n+(g6\n+S\'e\\x88\\x10@\\xf8\\xa1S?\'\n+tRp54\n+ag4\n+(g6\n+S\'B\\x1fY9\\x1e\\xd5I?\'\n+tRp55\n+ag4\n+(g6\n+S\'\\xe1\\xa6\\xc6\\xe1*\\xe6K?\'\n+tRp56\n+ag4\n+(g6\n+S\'\\xf7<lCe\\x88P?\'\n+tRp57\n+ag4\n+(g6\n+S\'M\\xea+j;&T?\'\n+tRp58\n+ag4\n+(g6\n+S\'\\x8b\\xf9\\x06\\xbbTHX?\'\n+tRp59\n+ag4\n+(g6\n+S\'5LG\\x94~\\xaaT?\'\n+tRp60\n+ag4\n+(g6\n+S\'\\xd4\\xd3\\xb4<\\x8b\\xbbV?\'\n+tRp61\n+ag4\n+(g6\n+S\'\\xe1\\xa6\\xc6\\xe1*\\xe6[?\'\n+tRp62\n+ag4\n+(g6\n+S\'\\x0f\\xdbP\\x19"\\x04`?\'\n+tRp63\n+ag4\n+(g6\n+S\'\\x11\\xe3\\x8f\\x8d\\xa4\\xddZ?\'\n+tRp64\n+ag4\n+(g6\n+S\'7T\\x86\\x08\\x01\\x84_?\'\n+tRp65\n+ag4\n+(g6\n+S\'\\x8b\\xf9\\x06\\xbbTHh?\'\n+tRp66\n+ag4\n+(g6\n+S\'Z\\xbd=\\x0f\\xdbPi?\'\n+tRp67\n+ag4\n+(g6\n+S\'6\\xd0f\\xce?\\x17j?\'\n+tRp68\n+ag4\n+(g6\n+S\'Z\\xbd=\\x0f\\xdbPi?\'\n+tRp69\n+ag4\n+(g6\n+S\'\\x11\\xe3\\x8f\\x8d\\xa4\\xddj?\'\n+tRp70\n+ag4\n+(g6\n+S\'}d\\xe5xTgp?\'\n+tRp71\n+ag4\n+(g6\n+S\'~\\xe8\\x04\\xb3\\x15\\xd4u?\'\n+tRp72\n+ag4\n+(g6\n+S\'\\xbbs\\xc0\\xc9m\\x89t?\'\n+tRp73\n+ag4\n+(g6\n+S\'\\x10_pS\\xe3pu?\'\n+tRp74\n+ag4\n+(g6\n+S\'g\\xce?\\x17\\x1a\\xc5{?\'\n+tRp75\n+ag4\n+(g6\n+S\'\\xd5\\x19\\xe4\\x13\\xad\\xde~?\'\n+tRp76\n+ag4\n+(g6\n+S\'Yw\\x0e8\\xb9-\\x81?\'\n+tRp77\n+ag4\n+(g6\n+S\'~\\x07}d\\xe5x\\x84?\'\n+tRp78\n+ag4\n+(g6\n+S\'\\x97H\\xf9%3\\x06\\x88?\'\n+tRp79\n+ag4\n+(g6\n+S\'\\x8b\\xda\\x8e\\t\\x85\\xa3\\x89?\'\n+tRp80\n+ag4\n+(g6\n+S\'\\x03\\x8c^\\xaeCF\\x90?\'\n+tRp81\n+ag4\n+(g6\n+S\'"\\x04\\x10~\\xe8\\x04\\x93?\'\n+tRp82\n+ag4\n+(g6\n+S\'x\\xb1\\xcf\\xa4\\xbe\\xa2\\x96?\'\n+tRp83\n+ag4\n+(g6\n+S\'\\x0f\\xdbP\\x19"\\x04\\xa0?\'\n+tRp84\n+ag4\n+(g6\n+S\'\\x81tcaY\\x1a\\xa6?\'\n+tRp85\n+ag4\n+(g6\n+S\'YX\\x96\\x86\\xe9\\x88\\xb2?\'\n+tRp86\n+ag4\n+(g6\n+S\'\\xb1\\xcf\\xa4\\xbe\\xa2\\xb6\\xc7?\'\n+tRp87\n+ag4\n+(g6\n+S\'\\xe6:d\\x04\\xf1\\x05\\xe0?\'\n+tRp88\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp89\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp90\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp91\n+aa(lp92\n+g4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp93\n+ag4\n+(g6\n+S\'\\xf7<lCe\\x88 ?\'\n+tRp94\n+ag4\n+(g6\n+S\'\\xf7<lCe\\x88P?\'\n+tRp95\n+ag4\n+(g6\n+S\'~&\\xf5\\x15\\xb5\\x1dS?\'\n+tRp96\n+ag4\n+(g6\n+S\'\\xf7<lCe\\x88P?\'\n+tRp97\n+ag4\n+(g6\n+'..b'168192\n+ag4\n+(g6\n+S\']\\xe2\\xd9\\xbaQ\\t\\xa4?\'\n+tRp168193\n+ag4\n+(g6\n+S\'\\x809\\xd50\\x9c\\x1c\\xa7?\'\n+tRp168194\n+ag4\n+(g6\n+S\'R\\xb0\\x14F0N\\xab?\'\n+tRp168195\n+ag4\n+(g6\n+S\'wsk\\rP\\x1f\\xb0?\'\n+tRp168196\n+ag4\n+(g6\n+S\'k2\\xf9\\xca\\x9a\\xd9\\xb3?\'\n+tRp168197\n+ag4\n+(g6\n+S\'t\\xf8b\\xee\\xe6\\xd6\\xba?\'\n+tRp168198\n+ag4\n+(g6\n+S\'&\\xb8XX$\\xce\\xc4?\'\n+tRp168199\n+ag4\n+(g6\n+S\'\\xc0\\xf5i=\\xf79\\xb6?\'\n+tRp168200\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168201\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168202\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168203\n+aa(lp168204\n+g4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168205\n+ag4\n+(g6\n+S\'\\x08\\x06_\\x0b1P)?\'\n+tRp168206\n+ag4\n+(g6\n+S\'\\x86D\\x87\\xc8$\\xfcR?\'\n+tRp168207\n+ag4\n+(g6\n+S\'G%\\xf3\\xe9*&v?\'\n+tRp168208\n+ag4\n+(g6\n+S\'\\xf5[v/\\xa0\\x9cp?\'\n+tRp168209\n+ag4\n+(g6\n+S\'*\\xd7\\x80=:\\x0fn?\'\n+tRp168210\n+ag4\n+(g6\n+S\'\\x8a\\xc76N=\\xa4o?\'\n+tRp168211\n+ag4\n+(g6\n+S\'\\xc9\\xe6\\xca,7zl?\'\n+tRp168212\n+ag4\n+(g6\n+S\'\\xb6<\\xe2P\\xa6\\xc6s?\'\n+tRp168213\n+ag4\n+(g6\n+S\'\\x86D\\x87\\xc8$\\xfcr?\'\n+tRp168214\n+ag4\n+(g6\n+S"\\xe74=\\xd9\'\\x91t?"\n+tRp168215\n+ag4\n+(g6\n+S\'\\xf5[v/\\xa0\\x9cp?\'\n+tRp168216\n+ag4\n+(g6\n+S\'\\xb6<\\xe2P\\xa6\\xc6s?\'\n+tRp168217\n+ag4\n+(g6\n+S\'w\\x1dNr\\xac\\xf0v?\'\n+tRp168218\n+ag4\n+(g6\n+S\'\\xb6<\\xe2P\\xa6\\xc6s?\'\n+tRp168219\n+ag4\n+(g6\n+S\'\\x99\\xeeo\\xa4\\xb5\\xaf{?\'\n+tRp168220\n+ag4\n+(g6\n+S\'&T\\xd1\\xb7!g\\x81?\'\n+tRp168221\n+ag4\n+(g6\n+S\'\\x17-\\x98a\\xa9[\\x85?\'\n+tRp168222\n+ag4\n+(g6\n+S\'>\\xd0\\xfe{b\\xcc\\x81?\'\n+tRp168223\n+ag4\n+(g6\n+S\'\\x8f\\x99{6\\xedU\\x87?\'\n+tRp168224\n+ag4\n+(g6\n+S\'\\xce\\xb8\\x0f\\x15\\xe7+\\x84?\'\n+tRp168225\n+ag4\n+(g6\n+S\'\\xa8\\x15\\xa9\\xfa-\\xbb\\x87?\'\n+tRp168226\n+ag4\n+(g6\n+S\'\\xc0\\x91\\xd6\\xben \\x88?\'\n+tRp168227\n+ag4\n+(g6\n+S\'Pz\\xe7W\\xf3\\x7f\\x8a?\'\n+tRp168228\n+ag4\n+(g6\n+S\'\\x99\\xeeo\\xa4\\xb5\\xaf\\x8b?\'\n+tRp168229\n+ag4\n+(g6\n+S\'\\r\\xd8\\xa3\\xf3\\xe0\\x01\\x91?\'\n+tRp168230\n+ag4\n+(g6\n+S\'\\xe9\\x9d_\\xcd\\xffi\\x90?\'\n+tRp168231\n+ag4\n+(g6\n+S\'\\x0bo\\x81\\xff\\x08)\\x95?\'\n+tRp168232\n+ag4\n+(g6\n+S\'i\\xf6\\x14\\x1c4\\xe5\\x9a?\'\n+tRp168233\n+ag4\n+(g6\n+S\',@\\xa31\\x12\\xe8\\x99?\'\n+tRp168234\n+ag4\n+(g6\n+S\'\\xe1b\\xf8\\xf0w\\xdf\\x9c?\'\n+tRp168235\n+ag4\n+(g6\n+S\',\\xb3\\xdc\\xe8q\\x80\\xa1?\'\n+tRp168236\n+ag4\n+(g6\n+S\'>\\xd0\\xfe{b\\xcc\\xa1?\'\n+tRp168237\n+ag4\n+(g6\n+S\'}|Y\\xa3\\xfc\\t\\xa7?\'\n+tRp168238\n+ag4\n+(g6\n+S\'\\xc9\\xe6\\xca,7z\\xac?\'\n+tRp168239\n+ag4\n+(g6\n+S\'\\xfd5\\xf6~,\\xd6\\xb2?\'\n+tRp168240\n+ag4\n+(g6\n+S\'L\\x96PE\\xdf\\x86\\xbc?\'\n+tRp168241\n+ag4\n+(g6\n+S\'\\xf6\\xe8<x@\\x04\\xc9?\'\n+tRp168242\n+ag4\n+(g6\n+S\'\\x01#\\xb6\\xa6\\xf2\\xf2\\xc3?\'\n+tRp168243\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168244\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168245\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp168246\n+aa(lp168247\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp168248\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp168249\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aaaa(lp168250\n+I0\n+aI1\n+aI2\n+aI3\n+aI4\n+aI5\n+aI6\n+aI7\n+aI8\n+aI9\n+aI10\n+aI11\n+aI12\n+aI13\n+aI14\n+aI15\n+aI16\n+aI17\n+aI18\n+aI19\n+aI20\n+aI21\n+aI22\n+aI23\n+aI24\n+aI25\n+aI26\n+aI27\n+aI28\n+aI29\n+aI30\n+aI31\n+aI32\n+aI33\n+aI34\n+aI35\n+aI36\n+aI37\n+aI38\n+aI39\n+aI40\n+aI41\n+aaI33\n+aF0.0066381646884956557\n+a(lp168251\n+(lp168252\n+(lp168253\n+F0\n+aF0.49180000000000001\n+aF0.3377\n+aF0.17050000000000001\n+aa(lp168254\n+F0.52380000000000004\n+aF0\n+aF0.2661\n+aF0.21010000000000001\n+aa(lp168255\n+F0.37540000000000001\n+aF0.23549999999999999\n+aF0\n+aF0.38900000000000001\n+aa(lp168256\n+F0.2505\n+aF0.25519999999999998\n+aF0.49419999999999997\n+aF0\n+aaaF0.01\n+a(lp168257\n+F0.999\n+aF0.001\n+aa(lp168258\n+I1\n+aI2\n+aaF0.40000000000000002\n+a(lp168259\n+F0.25\n+aF0.25\n+aF0.25\n+aF0.25\n+aaa.\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 6e75a84e9338 models/fraglenModel_toy.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/fraglenModel_toy.p Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,1536 @@\n+(lp1\n+(lp2\n+I1\n+aI32\n+aI33\n+aI34\n+aI35\n+aI36\n+aI37\n+aI38\n+aI39\n+aI40\n+aI41\n+aI42\n+aI43\n+aI44\n+aI45\n+aI46\n+aI47\n+aI48\n+aI49\n+aI50\n+aI51\n+aI52\n+aI53\n+aI54\n+aI55\n+aI56\n+aI57\n+aI58\n+aI59\n+aI60\n+aI61\n+aI62\n+aI63\n+aI64\n+aI65\n+aI66\n+aI67\n+aI68\n+aI69\n+aI70\n+aI71\n+aI72\n+aI73\n+aI74\n+aI75\n+aI76\n+aI77\n+aI78\n+aI79\n+aI80\n+aI81\n+aI82\n+aI83\n+aI84\n+aI85\n+aI86\n+aI87\n+aI88\n+aI89\n+aI90\n+aI91\n+aI92\n+aI93\n+aI94\n+aI95\n+aI96\n+aI97\n+aI98\n+aI99\n+aI100\n+aI101\n+aI102\n+aI103\n+aI104\n+aI105\n+aI106\n+aI107\n+aI108\n+aI109\n+aI110\n+aI111\n+aI112\n+aI113\n+aI114\n+aI115\n+aI116\n+aI117\n+aI118\n+aI119\n+aI120\n+aI121\n+aI122\n+aI123\n+aI124\n+aI125\n+aI126\n+aI127\n+aI128\n+aI129\n+aI130\n+aI131\n+aI132\n+aI133\n+aI134\n+aI135\n+aI136\n+aI137\n+aI138\n+aI139\n+aI140\n+aI141\n+aI142\n+aI143\n+aI144\n+aI145\n+aI146\n+aI147\n+aI148\n+aI149\n+aI150\n+aI151\n+aI152\n+aI153\n+aI154\n+aI155\n+aI156\n+aI157\n+aI158\n+aI159\n+aI160\n+aI161\n+aI162\n+aI163\n+aI164\n+aI165\n+aI166\n+aI167\n+aI168\n+aI169\n+aI170\n+aI171\n+aI172\n+aI173\n+aI174\n+aI175\n+aI176\n+aI177\n+aI178\n+aI179\n+aI180\n+aI181\n+aI182\n+aI183\n+aI184\n+aI185\n+aI186\n+aI187\n+aI188\n+aI189\n+aI190\n+aI191\n+aI192\n+aI193\n+aI194\n+aI195\n+aI196\n+aI197\n+aI198\n+aI199\n+aI200\n+aI201\n+aI202\n+aI203\n+aI204\n+aI205\n+aI206\n+aI207\n+aI208\n+aI209\n+aI210\n+aI211\n+aI212\n+aI213\n+aI214\n+aI215\n+aI216\n+aI217\n+aI218\n+aI219\n+aI220\n+aI221\n+aI222\n+aI223\n+aI224\n+aI225\n+aI226\n+aI227\n+aI228\n+aI229\n+aI230\n+aI231\n+aI232\n+aI233\n+aI234\n+aI235\n+aI236\n+aI237\n+aI238\n+aI239\n+aI240\n+aI241\n+aI242\n+aI243\n+aI244\n+aI245\n+aI246\n+aI247\n+aI248\n+aI249\n+aI250\n+aI251\n+aI252\n+aI253\n+aI254\n+aI255\n+aI256\n+aI257\n+aI258\n+aI259\n+aI260\n+aI261\n+aI262\n+aI263\n+aI264\n+aI265\n+aI266\n+aI267\n+aI268\n+aI269\n+aI270\n+aI271\n+aI272\n+aI273\n+aI274\n+aI275\n+aI276\n+aI277\n+aI278\n+aI279\n+aI280\n+aI281\n+aI282\n+aI283\n+aI284\n+aI285\n+aI286\n+aI287\n+aI288\n+aI289\n+aI290\n+aI291\n+aI292\n+aI293\n+aI294\n+aI295\n+aI296\n+aI297\n+aI298\n+aI299\n+aI300\n+aI301\n+aI302\n+aI303\n+aI304\n+aI305\n+aI306\n+aI307\n+aI308\n+aI309\n+aI310\n+aI311\n+aI312\n+aI313\n+aI314\n+aI315\n+aI316\n+aI317\n+aI318\n+aI319\n+aI320\n+aI321\n+aI322\n+aI323\n+aI324\n+aI325\n+aI326\n+aI327\n+aI328\n+aI329\n+aI330\n+aI331\n+aI332\n+aI333\n+aI334\n+aI335\n+aI336\n+aI337\n+aI338\n+aI339\n+aI340\n+aI341\n+aI342\n+aI343\n+aI344\n+aI345\n+aI346\n+aI347\n+aI348\n+aI349\n+aI350\n+aI351\n+aI352\n+aI353\n+aI354\n+aI355\n+aI356\n+aI357\n+aI358\n+aI359\n+aI360\n+aI361\n+aI362\n+aI363\n+aI364\n+aI365\n+aI366\n+aI367\n+aI368\n+aI369\n+aI370\n+aI371\n+aI372\n+aI373\n+aI374\n+aI375\n+aI376\n+aI377\n+aI378\n+aI379\n+aI380\n+aI381\n+aI382\n+aI383\n+aI384\n+aI385\n+aI386\n+aI387\n+aI388\n+aI389\n+aI390\n+aI391\n+aI392\n+aI393\n+aI394\n+aI395\n+aI396\n+aI397\n+aI398\n+aI399\n+aI400\n+aI401\n+aI402\n+aI403\n+aI404\n+aI405\n+aI406\n+aI407\n+aI408\n+aI409\n+aI410\n+aI411\n+aI412\n+aI413\n+aI414\n+aI415\n+aI416\n+aI417\n+aI418\n+aI419\n+aI420\n+aI421\n+aI422\n+aI423\n+aI424\n+aI425\n+aI426\n+aI427\n+aI428\n+aI429\n+aI430\n+aI431\n+aI432\n+aI433\n+aI434\n+aI435\n+aI436\n+aI437\n+aI438\n+aI439\n+aI440\n+aI441\n+aI442\n+aI443\n+aI444\n+aI445\n+aI446\n+aI447\n+aI448\n+aI449\n+aI450\n+aI451\n+aI452\n+aI453\n+aI454\n+aI455\n+aI456\n+aI457\n+aI458\n+aI459\n+aI460\n+aI461\n+aI462\n+aI463\n+aI464\n+aI465\n+aI466\n+aI467\n+aI468\n+aI469\n+aI470\n+aI471\n+aI472\n+aI473\n+aI474\n+aI475\n+aI476\n+aI477\n+aI478\n+aI479\n+aI480\n+aI481\n+aI482\n+aI483\n+aI484\n+aI485\n+aI486\n+aI487\n+aI488\n+aI489\n+aI490\n+aI491\n+aI492\n+aI493\n+aI494\n+aI495\n+aI496\n+aI497\n+aI498\n+aI499\n+aI500\n+aI501\n+aI502\n+aI503\n+aI504\n+aI505\n+aI506\n+aI507\n+aI508\n+aI509\n+aI510\n+aI511\n+aI512\n+aI513\n+aI514\n+aI515\n+aI516\n+aI517\n+aI518\n+aI519\n+aI520\n+aI521\n+aI522\n+aI523\n+aI524\n+aI525\n+aI526\n+aI527\n+aI528\n+aI529\n+aI530\n+aI531\n+aI532\n+aI533\n+aI534\n+aI535\n+aI536\n+aI537\n+aI538\n+aI539\n+aI540\n+aI541\n+aI542\n+aI543\n+aI544\n+aI545\n+aI546\n+aI547\n+aI548\n+aI549\n+aI550\n+aI551\n+aI552\n+aI553\n+aI554\n+aI555\n+aI556\n+aI557\n+aI558\n+aI559\n+aI560\n+aI561\n+aI562\n+aI563\n+aI564\n+aI565\n+aI566\n+aI567\n+aI568\n+aI569\n+aI570\n+aI571\n+aI572\n+aI573\n+aI574\n+aI575\n+aI576\n+aI577\n+aI578\n+aI579\n+aI580\n+aI581\n+aI582\n+aI583\n+aI584\n+aI585\n+aI586\n+aI587\n+aI588\n+aI589\n+aI590\n+aI591\n+aI592\n+aI593\n+aI594\n+aI595\n+aI596\n+aI597\n+aI598\n+aI599\n+aI600\n+aI601\n+aI602\n+aI603\n+aI604\n+aI605\n+aI606\n+aI607\n'..b'374e-05\n+aF1.6167025312883347e-05\n+aF1.4814598099342955e-05\n+aF1.4262586991775448e-05\n+aF1.4462691018268669e-05\n+aF1.4428190324045701e-05\n+aF1.4345388657910574e-05\n+aF1.4027982271059258e-05\n+aF1.4345388657910574e-05\n+aF1.3558772829626877e-05\n+aF1.3068862971660715e-05\n+aF1.3489771441180939e-05\n+aF1.260655366907293e-05\n+aF1.2544452419471585e-05\n+aF1.2116643811106768e-05\n+aF1.1750936452343293e-05\n+aF1.2758356723653994e-05\n+aF1.1343828260512259e-05\n+aF1.206834283919461e-05\n+aF1.2254646587998644e-05\n+aF1.1454230482025759e-05\n+aF1.1433530065491978e-05\n+aF1.0550312293383967e-05\n+aF1.0398509238802903e-05\n+aF1.0198405212309682e-05\n+aF1.0488211043782622e-05\n+aF9.8740986866137725e-06\n+aF9.9292997973705235e-06\n+aF8.8183774433909164e-06\n+aF9.3082873013570785e-06\n+aF9.2254856352219511e-06\n+aF9.0322817475733251e-06\n+aF8.8252775822355101e-06\n+aF8.5837727226747251e-06\n+aF8.2663663358234098e-06\n+aF8.4319696680936611e-06\n+aF8.0524620316410011e-06\n+aF8.2525660581342224e-06\n+aF8.0524620316410011e-06\n+aF7.5763524513640264e-06\n+aF7.5280514794518699e-06\n+aF7.3210473141140549e-06\n+aF7.2589460645127102e-06\n+aF7.4797505075397125e-06\n+aF7.0243413437965197e-06\n+aF7.279646481046492e-06\n+aF6.679334401566828e-06\n+aF6.016921072485821e-06\n+aF7.2589460645127102e-06\n+aF6.4999307916073885e-06\n+aF6.3619280147155118e-06\n+aF5.9755202394182573e-06\n+aF6.1549238493776968e-06\n+aF5.906518850972319e-06\n+aF6.2515257932020106e-06\n+aF5.7133149633236921e-06\n+aF5.8582178790601625e-06\n+aF5.5546117698980336e-06\n+aF5.906518850972319e-06\n+aF5.5546117698980336e-06\n+aF5.0578017730872779e-06\n+aF5.0785021896210598e-06\n+aF4.9680999681075586e-06\n+aF5.1889044111345609e-06\n+aF4.7886963581481191e-06\n+aF4.7541956639251499e-06\n+aF4.9473995515737767e-06\n+aF4.8024966358373064e-06\n+aF4.6920944143238052e-06\n+aF4.5126908043643657e-06\n+aF4.4505895547630211e-06\n+aF4.0710819183103602e-06\n+aF4.2228849728914242e-06\n+aF4.6299931647224606e-06\n+aF4.4298891382292392e-06\n+aF4.3401873332495199e-06\n+aF3.9468794191076717e-06\n+aF3.9468794191076717e-06\n+aF3.7053745595468871e-06\n+aF3.4293690057631342e-06\n+aF3.98138011333064e-06\n+aF3.8709778918171389e-06\n+aF3.4293690057631342e-06\n+aF3.3879681726955709e-06\n+aF3.5811720603441982e-06\n+aF3.7674758091482318e-06\n+aF3.2844660900266634e-06\n+aF3.2775659511820697e-06\n+aF3.1809640073577559e-06\n+aF2.8083565097496891e-06\n+aF3.4431692834523215e-06\n+aF2.8428572039726583e-06\n+aF3.1188627577564117e-06\n+aF2.8980583147294089e-06\n+aF2.9532594254861595e-06\n+aF2.697954288236188e-06\n+aF2.9739598420199413e-06\n+aF2.649653316324031e-06\n+aF2.442649150986216e-06\n+aF2.7048544270807816e-06\n+aF2.4495492898308101e-06\n+aF2.2218447079592137e-06\n+aF2.490950122898373e-06\n+aF2.5185506782767485e-06\n+aF2.1114424864457121e-06\n+aF2.2494452633375888e-06\n+aF1.9941401260876173e-06\n+aF2.1942441525808382e-06\n+aF2.1528433195132754e-06\n+aF1.9389390153308667e-06\n+aF1.9320388764862726e-06\n+aF1.876837765729522e-06\n+aF1.7319348499930515e-06\n+aF1.5732316565673934e-06\n+aF1.759535405371427e-06\n+aF1.71123443345927e-06\n+aF1.6146324896349564e-06\n+aF1.6974341557700825e-06\n+aF1.4421290185201106e-06\n+aF1.428328740830923e-06\n+aF1.7871359607498023e-06\n+aF1.6008322119457689e-06\n+aF1.3041262416282341e-06\n+aF1.1523231870471697e-06\n+aF1.3524272135403908e-06\n+aF1.3248266581620155e-06\n+aF1.1868238812701388e-06\n+aF1.3731276300741724e-06\n+aF1.3938280466079539e-06\n+aF1.0971220762904191e-06\n+aF1.1730236035809513e-06\n+aF1.0764216597566377e-06\n+aF1.2558252697160772e-06\n+aF1.2351248531822958e-06\n+aF9.1081832748638571e-07\n+aF1.104022215135013e-06\n+aF1.1730236035809513e-06\n+aF9.5911929939854242e-07\n+aF9.9361999362151159e-07\n+aF9.5911929939854242e-07\n+aF9.3151874402016713e-07\n+aF7.9351596712829056e-07\n+aF7.9351596712829056e-07\n+aF9.8671985477691793e-07\n+aF8.4181693904044738e-07\n+aF7.8661582828369668e-07\n+aF7.7971568943910291e-07\n+aF7.6591541174991527e-07\n+aF8.6941749441882267e-07\n+aF7.5211513406072762e-07\n+aF7.5211513406072762e-07\n+aF7.038141621485707e-07\n+aF7.8661582828369668e-07\n+aF7.1761443983775845e-07\n+aa.\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 6e75a84e9338 models/gcBias_toy.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/models/gcBias_toy.p Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,106 @@
+(lp1
+(lp2
+I0
+aI1
+aI2
+aI3
+aI4
+aI5
+aI6
+aI7
+aI8
+aI9
+aI10
+aI11
+aI12
+aI13
+aI14
+aI15
+aI16
+aI17
+aI18
+aI19
+aI20
+aI21
+aI22
+aI23
+aI24
+aI25
+aI26
+aI27
+aI28
+aI29
+aI30
+aI31
+aI32
+aI33
+aI34
+aI35
+aI36
+aI37
+aI38
+aI39
+aI40
+aI41
+aI42
+aI43
+aI44
+aI45
+aI46
+aI47
+aI48
+aI49
+aI50
+aa(lp3
+F0.25948222342397126
+aF0.39624266403291142
+aF0.4887289131998323
+aF0.58490142216892149
+aF0.72795041437474761
+aF0.83142976109689437
+aF0.90278995651523053
+aF0.94308737171956836
+aF0.9647837065785595
+aF0.98043833696270022
+aF0.98678943129526919
+aF0.99333196124574297
+aF0.99931031002740578
+aF0.99891722324641796
+aF0.99938114762812635
+aF1.0012185663383528
+aF1.0108245084926835
+aF1.0061430923013108
+aF1.0482627430005476
+aF1.0553377046446224
+aF1.1361109625621573
+aF1.0035314993564972
+aF0.99118143691482441
+aF0.98603848996853349
+aF1.0310994134455291
+aF0.98087666280842301
+aF0.97459828603729215
+aF0.96834984174682803
+aF0.96150287684694935
+aF0.95255766555556642
+aF0.9408772817987261
+aF0.92826381877804132
+aF0.9087390607016832
+aF0.87689221775375203
+aF0.84997147227954473
+aF0.82049155767913973
+aF0.78099591550325942
+aF0.74627130435236266
+aF0.72038401772286875
+aF0.65924330136812492
+aF0.64676003293480311
+aF0.59094303136879611
+aF0.55917194725499264
+aF0.56567660927491992
+aF0.46950852172658802
+aF0.41737461022004291
+aF0.37560923897559001
+aF0.28556141460184581
+aF0.26371003192674147
+aF0.20507778097581639
+aF0
+aa.
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 models/genReadsTumorTutorial.zip
b
Binary file models/genReadsTumorTutorial.zip has changed
b
diff -r 000000000000 -r 6e75a84e9338 neat_genreads.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/neat_genreads.xml Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,632 @@\n+<tool id="neat_genreads"\n+      name="NEAT-genReads"\n+      version="1.0.0"\n+\t  profile="16.04">\n+  <description>is a fine-grained read simulator</description>\n+  <requirements>\n+    <requirement type="package" version="1.9.1">numpy</requirement>\n+  </requirements>\n+  <command detect_errors="exit_code">\n+    <![CDATA[\n+    python2 $__tool_directory__/genReads.py\n+\t\t#if $in_type.input_type == "built-in":\n+\t\t  -r ${in_type.reference.path}\n+\t\t#else:\n+\t\t  -r ${in_type.reference}\n+\t\t#end if\n+\t\t-R $read_length\n+\t\t-c $coverage\n+\t\t#if $stats.error_model_cond.error_model == \'average_rate\' and $stats.error_model_cond.error_rate != \'\':\n+\t\t  -E $stats.error_model_cond.error_rate\n+\t\t#elif $stats.error_model_cond.error_model == \'error_model_file\':\n+\t\t  -e $stats.error_model_cond.error_file\n+\t\t#end if\n+\t\t#if $stats.mut_rate_cond.mut_rate == \'average_rate\' and $stats.mut_rate_cond.error_rate != \'\':\n+\t\t  -M $stats.mut_rate_cond.error_rate\n+\t\t#elif $stats.mut_rate_cond.mut_rate == \'error_model_file\':\n+\t\t  -m stats.mut_rate_cond.error_file\n+\t\t#elif $stats.mut_rate_cond.mut_rate == \'error_model_bed\':\n+\t\t  -Mb stats.mut_rate_cond.error_file_bed\n+\t\t#end if\n+\t\t#if $stats.gc_file:\n+\t\t  --gc-model $stats.gc_file\n+\t\t#end if\n+\t\t#if $stats.ploidy != 2 and $stats.ploidy > 0:\n+\t\t  -p $stats.ploidy\n+\t\t#end if\n+\t\t-o $out_options.prefix\n+\t\t#if $lib_type_cond.lib_type == "paired":\n+\t\t  #if $lib_type_cond.insert_mod_cond.insert_mod == \'fixed_insert\':\n+\t\t    --pe $lib_type_cond.insert_mod_cond.frag_length $lib_type_cond.insert_mod_cond.frag_length_sd\n+\t\t  #else:\n+\t\t    --pe-model $lib_type_cond.insert_mod_cond.insert_file\n+\t\t  #end if\n+\t    #end if\n+\t\t$out_options.golden_bam\n+\t\t$out_options.golden_vcf\n+\t\t#if $target_vcf_cond.target_vcf == "use_vcf":\n+\t\t  -v ${target_vcf_cond.target_file}\n+\t\t#end if\n+\t\t#if $target_cond.target == "targeted":\n+\t\t  -t ${target_cond.target_file}\n+\t\t  -to ${target_cond.off_target}\n+\t\t#end if\n+\t\t#if $stats.seed != "":\n+\t\t  --rng $stats.seed\n+\t\t#end if\n+\t\t$out_options.compress\n+\t\t$out_options.bypass\n+\t\t#if $out_options.compress:\n+\t\t\t#if not $out_options.bypass:\n+\t\t\t  && mv "${out_options.prefix}_read1.fq.gz" read1.fq.gz\n+\t\t\t  #if $lib_type_cond.lib_type == "paired":\n+\t\t\t\t  && mv "${out_options.prefix}_read2.fq.gz" read2.fq.gz\n+\t\t\t\t#end if\n+\t\t\t#end if\n+\t\t\t#if $out_options.golden_bam != "":\n+\t\t\t  && mv "${out_options.prefix}_golden.bam" golden.bam\n+\t\t\t#end if\n+\t\t\t#if $out_options.golden_vcf != "":\n+\t\t\t  && gunzip "${out_options.prefix}_golden.vcf.gz" && mv "${out_options.prefix}_golden.vcf" golden.vcf\n+\t\t\t#end if\n+\t\t#else:\n+\t\t\t#if not $out_options.bypass:\n+\t\t\t  && mv "${out_options.prefix}_read1.fq" read1.fq\n+\t\t\t  #if $lib_type_cond.lib_type == "paired":\n+\t\t\t\t  && mv "${out_options.prefix}_read2.fq" read2.fq\n+\t\t\t\t#end if\n+\t\t\t#end if\n+\t\t\t#if $out_options.golden_bam != "":\n+\t\t\t  && mv "${out_options.prefix}_golden.bam" golden.bam\n+\t\t\t#end if\n+\t\t\t#if $out_options.golden_vcf != "":\n+\t\t\t  && mv "${out_options.prefix}_golden.vcf" golden.vcf\n+\t\t\t#end if\n+\t\t#end if\n+    ]]>\n+  </command>\n+  <inputs>\n+\n+\t<conditional name="in_type">\n+\t  <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?">\n+\t\t<option value="built-in">Built-in</option>\n+\t\t<option value="history">History file</option>\n+\t  </param>\n+\t  <when value="built-in">\n+\t\t<param name="reference"\n+\t\t\t   type="select"\n+\t\t\t   label="Select a built-in reference sequence"\n+\t\t\t   help="The reference sequence that will be used as the basis for the simulated reads">\n+\t\t  <options from_data_table="all_fasta" />\n+\t\t</param>\n+\t  </when>\n+\t  <when value="history">\n+\t\t<param name="reference"\n+\t\t\t   type="data"\n+\t\t\t   format="fasta"\n+\t\t\t   label="The reference sequence (FASTA format)"\n+\t\t\t   help="The reference sequence that will be used as the basis for the simulated reads"\n+\t\t/>\n+\t  </when>\n+\t</conditional>\n+\t\n+\t<param name="read_length"\n+\t\t type="integer"\n+\t\t value="101"\n+\t\t size="4"\n+\t\t min="30"\n+\t\t label="Read length"\n+\t\t help="The read l'..b'+\t\t\t</conditional>\n+\t\t\t<section name="stats">\n+\t\t\t\t<param name="seed" value="1"/>\n+\t\t\t</section>\n+\t\t\t<param name="read_length" value="101"/>\n+\t\t\t<section name="out_options">\n+\t\t\t\t<param name="prefix" value="out"/>\n+\t\t\t\t<param name="golden_bam" value="true"/>\n+\t\t\t\t<param name="golden_vcf" value="true"/>\n+\t\t\t\t<param name="compress" value="false"/>\n+\t\t\t</section>\n+\t\t\t<conditional name="target_cond">\n+\t\t\t  <param name="target" value="targeted"/>\n+\t\t\t  <param name="target_file" value="chrMT-Targets.bed"/>\n+\t\t\t  <param name="off_target" value="0.02"/>\n+\t\t\t</conditional>\n+\t\t\t<output name="out_file1" file="chrMT-PE-VCF-BAM-Targeted_read1.fq" compare="diff"/>\n+\t\t\t<output name="out_file2" file="chrMT-PE-VCF-BAM-Targeted_read2.fq" compare="diff"/>\n+\t\t\t<output name="out_bam" file="chrMT-PE-VCF-BAM-Targeted.bam" compare="diff"/>\n+\t\t\t<output name="out_vcf" file="chrMT-PE-VCF-BAM-Targeted.vcf" compare="diff" lines_diff="2"/>\n+\t\t\t<assert_stdout has_text="Writing output VCF..."/>\n+\t\t</test>\n+\n+\t\t<test>\n+\t\t\t<conditional name="in_type">\n+\t\t\t\t<param name="input_type" value="history"/>\n+\t\t\t  <param name="reference" value="chrMT.fa" format="fasta"/>\n+\t\t\t</conditional>\n+\t\t\t<conditional name="lib_type_cond">\n+\t\t\t\t<param name="lib_type" value="paired"/>\n+\t\t\t\t<conditional name="insert_mod_cond">\n+\t\t\t\t\t<param name="insert_mod" value="fixed_insert"/>\n+\t\t\t\t\t<param name="frag_length" value="300"/>\n+\t\t\t\t\t<param name="frag_length_sd" value="30"/>\n+\t\t\t\t</conditional>\n+\t\t\t</conditional>\n+\t\t\t<section name="stats">\n+\t\t\t\t<param name="seed" value="1"/>\n+\t\t\t</section>\n+\t\t\t<param name="read_length" value="101"/>\n+\t\t\t<section name="out_options">\n+\t\t\t\t<param name="prefix" value="out"/>\n+\t\t\t\t<param name="golden_bam" value="true"/>\n+\t\t\t\t<param name="golden_vcf" value="true"/>\n+\t\t\t\t<param name="compress" value="true"/>\n+\t\t\t</section>\n+\t\t\t<!-- The decompress does not seem to work \n+\t\t\t<output name="out_file1" file="chrMT-PE-VCF-BAM-gz_read1.fq.gz" compare="diff" decompress="true"/>\n+\t\t\t<output name="out_file2" file="chrMT-PE-VCF-BAM-gz_read2.fq.gz" compare="diff" decompress="true"/>\n+\t\t\t-->\n+\t\t\t<output name="out_bam" file="chrMT-PE-VCF-BAM-gz.bam" compare="diff"/>\n+\t\t\t<output name="out_vcf" file="chrMT-PE-VCF-BAM-gz.vcf" compare="diff" lines_diff="2"/>\n+\t\t\t<assert_stdout has_text="Writing output VCF..."/>\n+\t\t</test>\n+\n+\t\t<test>\n+\t\t\t<conditional name="in_type">\n+\t\t\t\t<param name="input_type" value="history"/>\n+\t\t\t  <param name="reference" value="chrMT.fa" format="fasta"/>\n+\t\t\t</conditional>\n+\t\t\t<conditional name="lib_type_cond">\n+\t\t\t\t<param name="lib_type" value="paired"/>\n+\t\t\t\t<conditional name="insert_mod_cond">\n+\t\t\t\t\t<param name="insert_mod" value="fixed_insert"/>\n+\t\t\t\t\t<param name="frag_length" value="500"/>\n+\t\t\t\t\t<param name="frag_length_sd" value="50"/>\n+\t\t\t\t</conditional>\n+\t\t\t</conditional>\n+\t\t\t<conditional name="error_model_cond">\n+\t\t\t\t<param name="error_model" value="average_rate"/>\n+\t\t\t\t<param name="error_rate" value="0.123"/>\n+\t\t\t</conditional>\n+\t\t\t<conditional name="mut_rate_cond">\n+\t\t\t\t<param name="mut_rate" value="average_rate"/>\n+\t\t\t\t<param name="error_rate" value="0.123"/>\n+\t\t\t</conditional>\n+\t\t\t<section name="stats">\n+\t\t\t\t<param name="seed" value="123"/>\n+\t\t\t\t<param name="ploidy" value="3"/>\n+\t\t\t</section>\n+\t\t\t<param name="read_length" value="151"/>\n+\t\t\t<param name="coverage" value="20"/>\n+\t\t\t<section name="out_options">\n+\t\t\t\t<param name="prefix" value="out"/>\n+\t\t\t\t<param name="golden_bam" value="true"/>\n+\t\t\t\t<param name="golden_vcf" value="true"/>\n+\t\t\t\t<param name="compress" value="false"/>\n+\t\t\t</section>\n+\t\t\t<output name="out_file1" file="chrMT-PE-VCF-BAM-panic_read1.fq" compare="diff"/>\n+\t\t\t<output name="out_file2" file="chrMT-PE-VCF-BAM-panic_read2.fq" compare="diff"/>\n+\t\t\t<output name="out_bam" file="chrMT-PE-VCF-BAM-panic.bam" compare="diff"/>\n+\t\t\t<output name="out_vcf" file="chrMT-PE-VCF-BAM-panic.vcf" compare="diff" lines_diff="2"/>\n+\t\t\t<assert_stdout has_text="Writing output VCF..."/>\n+\t\t</test>\n+\n+\n+\t</tests>\n+</tool>\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-Targeted.bam
b
Binary file new/chrMT-PE-VCF-BAM-Targeted.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-Targeted.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-Targeted.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 4078 . T C . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11743 . C T . PASS WP=1/0
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-Targeted_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-Targeted_read1.fq Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,68 @@
+@out-MT-1/1
+CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCAGACTAATCTTAGTTA
++
+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C
+@out-MT-3/1
+CCTATTAACCACTCACGGGAGCTCTCCCTGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATG
++
+FFEFGGGGFD&;GGGGGFGFGGFBGGF'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD
+@out-MT-5/1
+CCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAG
++
+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3
+@out-MT-7/1
+ATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCAAAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTT
++
+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB
+@out-MT-9/1
+TATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA
++
+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF
+@out-MT-11/1
+GCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTC
++
+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=
+@out-MT-13/1
+TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAA
++
+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9
+@out-MT-15/1
+ACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCCGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGT
++
+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?
+@out-MT-17/1
+TTCCGCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTAACGAACGAAAAATTCTA
++
+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C'5@GF8FGD;FGA@?G/
+@out-MT-19/1
+AGCCTACTCCAATGCTAAAACTAATCGTCCCAACAAGTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACCACCACCCAC
++
+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F
+@out-MT-21/1
+TGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTCATTACCTCAGAGGTTTTTTTCT
++
+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED'GGG?:D=?:FF>GFGFEDC0GA
+@out-MT-23/1
+ACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAACTCAGACGCTCAG
++
+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG
+@out-MT-25/1
+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTCCATGCTAA
++
+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=
+@out-MT-27/1
+ACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCG
++
+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF
+@out-MT-29/1
+AAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTCTGTCAAAGTTAACTTATAGGCTAAAT
++
+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9
+@out-MT-31/1
+GAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCA
++
+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-
+@out-MT-33/1
+GTGAGGCCAAATATCATTCTGAGGGGCCACACTAATTACAAACTTACTATCCGCCACCCCATACATTGGGACATACCTAGTTCAATGAATCCGAGGAGGCT
++
+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-Targeted_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-Targeted_read2.fq Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,68 @@
+@out-MT-1/2
+GAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATTTGTTGTGGGTCTCATGAGTTGGAGTGTAGGATAAATCATGCCAAGGCGAGGA
++
+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7
+@out-MT-3/2
+GGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGTGGCTGTGCAGACATTCAATTGTTATTATTATGTCCTACAAGCATTAATTAATTAACA
++
+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF
+@out-MT-5/2
+TGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGAATAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTGTTCCAATGTCTTTGTGGTTTGT
++
+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==
+@out-MT-7/2
+TGTTTGGATGTAAAGTGAAATATTAGTTGGCGTATGAAGCAGATAGTGAGGAAAGTTGAGCCAATAATGACGTGAAGTCCGTGGAAGCCTGTGGCTACAAA
++
+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D
+@out-MT-9/2
+GGCAAGGTCGAAGGGGGTTCGGTTGGTCTCTGCTAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTA
++
+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*
+@out-MT-11/2
+CCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATATGGTTAGTGTGGTGGT
++
+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B
+@out-MT-13/2
+TGATAAGTGTAGAGGGAAGGTTAATGGTTGATATTGCTAGGGTGGCGCTTCCAATTAGGTGCATGAGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGT
++
+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE
+@out-MT-15/2
+GTCCGTGCGAGAATAATGATGTATGCTTTGTTTCTGTTGAGTGTGGGTTTAGTAATGGGGTTTGTGGGGTTTTCTTCTAAGCCTTCACCTATTTATGGGGG
++
+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2
+@out-MT-17/2
+GGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGATGGTGAGAGCTAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTA
++
+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG
+@out-MT-19/2
+GGGAGATTAGTATACAGAGGTAGAGTTTTTTTCGTGATAGTGGTTCACTGGATAAGTGGCGTTGGCTTGCCATGATTGTGAGGGGTAGGAGTCAGGTAGTT
++
+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF
+@out-MT-21/2
+ATAAGCAGTGCTTGAATTCTTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGCTGATTGATACTCCGGATGCGAGTAATCCGGATGTGTTTAG
++
+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?
+@out-MT-23/2
+GGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTACTCGTAGGTTCAGTACCATTGGTGGCCAATT
++
+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE
+@out-MT-25/2
+ACCGACCTGGATTACTCCGGTCTGAACTCAGATCACGTAGGACTTTAATCGTTGAACAAACGAACCTTTAATAGCGGCTGCACCATCGGGATGTCCTGAGC
++
+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D
+@out-MT-27/2
+AGTCCTTGAGAGAGGATTATGATGCGACTGTGAGTGCGTTCGTAGTTTGAGTTTGCTAGGCAGAATAGTAATGAGGATGTAAGCCCGTGGGCGATTATGAG
++
+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@
+@out-MT-29/2
+CTGACGGTTTCTATTTCCTGAGCGTCTGAGATGTTAGTATTAGTTAGTTTTGTTGTGAGTGTTAGGAAAAGGGCATACAGGACTAGGAAGCAGATAAGGAA
++
+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90
+@out-MT-31/2
+CCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGTGAAATATTAGTTGGCGGATGAAGCAGATAGTGAGG
++
+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3
+@out-MT-33/2
+AGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTT
++
+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-vcf.bam
b
Binary file new/chrMT-PE-VCF-BAM-vcf.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-vcf.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-vcf.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,48 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 798 . C CTC . PASS WP=0/1
+MT 1009 . C T . PASS WP=1/0
+MT 1828 . A ACTACGA . PASS WP=0/1
+MT 2268 . G A . PASS WP=0/1
+MT 3073 . C T . PASS WP=0/1
+MT 3860 . G A . PASS WP=1/0
+MT 3908 . C T . PASS WP=1/0
+MT 4078 . T C . PASS WP=0/1
+MT 4250 . C T . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7168 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7770 . A G . PASS WP=0/1
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 9353 . C T . PASS WP=1/0
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11738 . T C . PASS WP=1/0
+MT 11743 . C T . PASS WP=1/0
+MT 12964 . C T . PASS WP=1/0
+MT 13049 . A G . PASS WP=0/1
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
+MT 14296 . A G . PASS WP=0/1
+MT 14657 . A G . PASS WP=1/0
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-vcf_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-vcf_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3308 @@\n+@out-MT-1/1\n+AGCAGAAAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTCCACAAAATGACATCAAAA\n++\n+GGF8GGDFFFFGGGF5AAGGGGGEGG>/-GE?GF<?FBDGFEF2EFB<GGDGG1&5GBGGGDGGFCFFFDF7>GGEGEGGFB(G3GGF=GGGFCA<EC;1B\n+@out-MT-3/1\n+AATAATTTTCATGATTTGAGAAGCCCTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCAC\n++\n+FFGGGGFD)@GGGGGFGFGGEBGGF\'@FFGGGGGFGGGGFGGGGGGGGFDGGGAGFF6FGFEG0EGGFGGC@GD6GFEFGAFGGDFAE<FGD>GGG0GFA=\n+@out-MT-5/1\n+CCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTT\n++\n+CEGGEGEGGGFGGGGGG?@GD3AFGFGG?GGGGFGG@FFG@CDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFE;;DFGFF;A?F@;==\n+@out-MT-7/1\n+ACTAGTATCCTTAATCATTTTTATTGCCACAACTAACCTCCTCTGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATAAACCTAGCCATGG\n++\n+=DFDGGFGEGFG?GDFEGGGGE9GFGGG@GGECF@GGGFGAGE\'8>GFFGEGGGFGGGFD;G@FF6@GFGG=F4DEGGBFF@CGGG7GFG?8GC-BGGEGF\n+@out-MT-9/1\n+TCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGA\n++\n+6DCEFGGGGFFDGGGG@;GGGGGGFFFFF18AGGGBFGA;GEGCGEFGGGGG<=FGGCDGFGGGGGE>G4?FGCBGEED3GG\'(5@G><>CGFGFDGFED=\n+@out-MT-11/1\n+AATGGGACTTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATTAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGAGAAGC\n++\n+<FGGGDG(G9GDGGG@F?GGGGGBEEGFFGD;D/GFGG:GG+EGFGGGGGG9EGGGFGDGF@/3@E//GGFFF;FGGGFEGFFGF6EGE70G;F6CGA>DB\n+@out-MT-13/1\n+GCGCTAAGCTCGCACTGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCGTTCCACAGA\n++\n+CFFGD21CDFGGGGG<GAFFE7AGGG4FCFGC<F:GG0GG8GFBGGGGGGG;CFFFGGGFGFGGFG>GGG;DF7GGGGGFFGGDFGG=9FFFECGB?)(CD\n+@out-MT-15/1\n+ACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATCGCAAGCCAACGCCACTTATCCAGTGAACCACTATCACGAAAAAAACTCTA\n++\n+EFGCGBBGEGFAGFG@7DGDCGEFGGGBG?GECFAGGGGCDGFGGBBGGG/-GGGEG<GGFGCGGEDGEFGGGDD7GGCEDB.EGAFGFGEF47DGEGFCE\n+@out-MT-17/1\n+TCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCACTCACACTACTCCCCATCTCTAAC\n++\n+E?FGGG2FFGFGGFGG,FGGDG8GAGCADFGG=GGF6EGGCCGFGGGGGGDGD>GGGG>FBFF?FGGAGGFGBEG<C\'4@GG<GGD<FGAA@G4%0BG1FC\n+@out-MT-19/1\n+GTGTTTTAGATCACCCCCTCCCCAATAAAACTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAGACTACGAATGTGGCTTTAACATATC\n++\n+GFGEFFGDGEGGFGG8GFFGC1G1FGFGG3GGF@G0EFDGGFGC*FDG?FFG:GBGGGG<EGEGGCDGE9GEF7FGFEGFF?FG#*D:G=;A9GFDEFEED\n+@out-MT-21/1\n+AAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGCTCAAGCTCAACGCCCACTACCTAAAAA\n++\n+BGFGGEGGGGCFFEGGF=DGF8CGFCF?GFFFFGBFB8GFGF681FFEEFGGGG@EGCG?GDCG>GED=2DED\'GGG@<E?A?GGBGFGGFED2GD01=EA\n+@out-MT-23/1\n+CTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGT\n++\n+GGGGGEGGFBDFGF93G>GGGGGGGGGEFGGFG9BAEGGGFGFGGFGGFG<GFEG>FGFDFGDGGGFFCDFEF(GGG=A@FGEG4FDBFGGEED1AFGADE\n+@out-MT-25/1\n+TCACTTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACATATTCCCCCGAGC\n++\n+FDC5GFFF7FBAGCGFFGGGDFDG<CGGFFGGGFAFCGFGGGGEFEF<GGGB5EGGG70GFFFFE;BB;EGGGGEF>ACFG4;DFGB$G;A>8FECAGG3E\n+@out-MT-27/1\n+ATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCCACCCCTCACAATCAT\n++\n+GG=FGGEFFG@GFGFD>F?GGEFE1GGFFFFFDGGG<=E?DG>EFGGGGGFDGF.FF:E<GGGFEE/BGFFGB?DBGGFE=GDFF.B?DGGGGEFEGGE?F\n+@out-MT-29/1\n+GCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCGAGGTCGGTTTCTGGCTACATTCAAATTCCTC\n++\n+GGGGFFGGGGGGGFEGGGEGGGG:GGGG;G;G8DGCBCGG9FGGGGFGGEGD=DAGFFEGFFFGB8GEF2;=GGFB5F?EFG##F6FDGGGGD6@?E<GGF\n+@out-MT-31/1\n+GGCCACCTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGAC\n++\n+GGBBEFFGG.GGGG0FGGEGGGGGGGGGGGFFFG?GGDGGG;GDGGGF97GGGGEGGG?GFFGFGGG3GGGGB7FGG2<G8C@DGGF58(?GE=8/EBGFC\n+@out-MT-33/1\n+ATAACCTCCCCCAAAATTCAGAATAAGAACACACCCGACCACACCGCTAACCATCAATACTAAACCCCAATAAATAGGAGAAGGCTTAGAAGAAAACCCCA\n++\n+EB1CFGGFGGFC<DEGFBGGGGFGG>-G0GFG93GDGDGDFGGGEGGGG3F.?GFFGG?GGGGGB9(D"E>GEF7:$D10@GFFC%&?GFAAGAGEFE.*D\n+@out-MT-35/1\n+ATCATCCCTCTACTATTTTCTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTTACAACCCCCCTCCTAATACTAAC\n++\n+GDGGGGEGB+GGGDB@AFG%FGGGGD;GFGCGG1EGGGC>FGGDF,GGGGCFFGGGFFF8CGD=DACEGDGAF'..b"TACTCATTCAACCAATAGCCCTGGCCGTACCCCTAACCGCTAACATTACTGCAGGCCACCTACTCAT\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+CTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACAGCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGCACCCC\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+TAGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAAGCCTATACAACCGTATCGGCGATATC\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+TATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAGGAAAACCCC\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+CACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCA\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+GAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAGCAGACGAGGTCAACGATCCCTCCCTT\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+ATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGGAACCGTCTGAACTAT\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+AGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATATTCATCCCTGTAGCAT\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+GCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+CGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGC\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+ACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAAA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CTACACAACACATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACC\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n+@out-MT-1649/1\n+CAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACACACTAGGATACC\n++\n+GGFEFCGFGGEFFGGGFDGG@DGGG@GGFCDEFEGGFAGFEGFFGAGGGCGGGG@F>EG:@FGB>GGFD:FGGGGFEBGGGGB)-DGG,EGGF9ED94GCD\n+@out-MT-1651/1\n+TCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATT\n++\n+GGFD@DFGGFDGAGFCGG@GDGGGGGF/GGGGGECGGF3G>GDGEFE9FDGGEEGFFGG;DGGFGFE3<;GE>G=@GGGFGAFFFGGGGGFA;GBBF?;>C\n+@out-MT-1653/1\n+TCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATACCGC\n++\n+F3BGGGFGEFGFGGGGGGGB9CGGGGGGEGDGGDEGGBFE;GA:DGGBGGGGG?FGGEEG>DDFEF@B>:?GGGG/G7FGFFEGFGF@FFC@8G?GGGDEE\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM-vcf_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM-vcf_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3308 @@\n+@out-MT-1/2\n+AGGCTGCCAATGGTGAGGGAGGTTGAAGTGAGAGGTATGGTTTTGAGTAGTCCTCCTATTTTTCGAATATCTTGTTCATTGTTAAGGTTGTGGATGATGGA\n++\n+E=DGFGGGFEFEGGFFGGGE3E;F=;GEGGGBFG=GCGGGF?FGEGAG6CGGGGGGCFAGGD7GGGFGFEEC.FFF7B;7>GFEFA&?DFDFF@CC=BEGG\n+@out-MT-3/2\n+AGTCATGGAGGCCATGGGGTTGGCTTGAAACCAGCTTTGGGGGGTTCGATTCCTTCCTTTTTTGTCTAGATTTTATGTATACGGATTCTTCGAATGTGTGG\n++\n+DGFGGGGGEGBGGG::GFFG8FF@FGG4F>G>1EGF@GGGEFDFEEGGG@GFGGGGGGGEADF<CF4GGEDDDGFF?GEC5GFGGGGGGG=DEGGGGDGF@\n+@out-MT-5/2\n+GGAGGTAAGCTACATAAACTGTGGGGGGTGTCTTTGGCCTTTGGTTGGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGTGTGCTGGGTAGGATGGGCGGGG\n++\n+BGBGDGGFGCGFF@GEGGG,GGGFGGFGGFGFG:F7G###%DFEGGGGGEGDFFAGGEGFGGG9DEDGFGGF6FGAFFDGG0GEGAF8?GFGFEDFF=ACB\n+@out-MT-7/2\n+TAGGTGCATGCGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGTACGGCCAGGGCTATTGGTTGAATGAGTAGGTTGATGGTTTCGATAATAACTAGTA\n++\n+FGGDG-G6CG,G?G7GDEFFFGDCGEGFF3+=FGGFGF=EGGGGGGFFGGGFFGGGGGGDDF6EGDEGGGGFFGEG(FGE*@FGGB53GF:EGF?GG?FDB\n+@out-MT-9/2\n+TATTATTTACTCTATCAAAGTAACTCTTTTATCAGATATATTTCTTAGGTTTGAGGGGGAATGCTGGAGATTGTAATGGGTATGGAGACATATCATATAAG\n++\n+FBFGGGGCF+DG<GGGGADGFCGGFGFFGCGGGGEB+G@EGGGFGFG:G?EFBFG7=GFFGGBFAGGG:GDGFG=0CGFF?EEGGF<6GF?F?CG;1*3<:\n+@out-MT-11/2\n+AGAGAATAGTCAACGGTCGGCGAACATCAGTGGGGGTGAGGTAAAATGGCTGAGTGAAGCATTGGACTGTAAATCTAAAGACAGGGGTTAGGCCTCTTTTA\n++\n+GFGFFGGGEGFGFDDGGG@GGG&=GGGGGGF5GFGAGGGGFF>CFBEGGGFGFGGGA=CGGCCFDG>GAGGGG2FGGFGDGFG2D:FGFGB)FD4EG8D@(\n+@out-MT-13/2\n+CCTTGGGTAACCTCTGGGACTCAGAAGTGAAAGGGGGCTATTCCTAGTTTTATTGCTCTAGCTATTATGATTATTAATGATGAGTATTGATTGGTAGTATT\n++\n+GGGF=GFFFGGFEB9GG<GGGGBGG@CGDGGGGEGBFFG76EBGFGFGGG=GGGGFB))8GFFDGGDE<ECGEG1FGEGGGGFFGDGGGG4FEFFDFGG*F\n+@out-MT-15/2\n+GTGTGTGCCTGCGTTCTGGCGTTCTGGCTGGTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTA\n++\n+,E':<@G=GGGGGDF1)CGGG:GFFB>FGGGDGGEEDGBGFGFDGG1B@EAGGFDGFGGGGEGFGGEDGF?F?4>GBG5A&FFGGGECFE=BBG8FFGFF4\n+@out-MT-17/2\n+ATTTAACCTAAATTTCTATAAGATTATTAGTATAAAAGGGGAGATAGGTAGGAGTAGCGTGGTAAGGGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTG\n++\n+FFFFG=GFGF@EDEGGDGGCG;GEGGGGGEF9FGGGGGAGGF:GGGCGCEDFGGBCBFGGG=*EBEG=GFF4/F3FDCGGEDB9GGGE>E?GGGA?GEEF:\n+@out-MT-19/2\n+AGATGGCGGTATATAGGCTGAGCAAGAGGTGGTGAGGTTGATCGGGGTTTATCGATTACAGAACAGGCTCCTCTAGAGGGATATGAAGCACCGCCAGGTCC\n++\n+GFG>GFG,CFFDGGGGGGBGGGG:GG<2FGFGGGGGFCGEGGGGGGFGGGCGGFEGGFGGGGGEGGGEGGGGFGGGG9?=:BFG;GFGGDFFCGG'FCG4E\n+@out-MT-21/2\n+AACCTTTCCTTATGAGCATGCCTGTGTTGGGTTGACAGTGAGGGTAATAATGACTAGTTGGTTGACTGTGGATATTGGGCTGTTAATTGTCAGTTCAGTGT\n++\n+GGGGG?FFFGFF50FFD17AFDFG=GGGGC5GGGGG,ECB:GFGGGEGFGGGGFE%GGBFGGGEB&*?F(GGGDFG=6:>=-7DCDEFFGGGF@BDDGFG9\n+@out-MT-23/2\n+GTTAGTTTGTTAGTTAGTAGGCCTAGTCTGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCC\n++\n+DGFFG?1@GEGGGG<GGGFGFG%FGC%$;GDGGEGGFFBGGD:?EFGGCGGGFGGCGEECGGDAGGDGGFGGDGEGFE@EFD@EEFGGFF7:@D=CFFGFF\n+@out-MT-25/2\n+TAGCGATGGAGGTAGGATTGGTGCTGTGGGTGAAAGAGTATGATGGGGTGGTGGTTGTGGTAAACTTTAATAGTGTAGGAAGCTGAATAATCTATGAAGGA\n++\n+GGF@GFGGFF=GBG7>GE6;CGDBCFEGFGGG6=GGDGGGGGGGGGFGGG?EGDBAGGGGGDGFGGAGG?EGGGGGDGBF7G:GGFCFFFB8CF=FF$%AF\n+@out-MT-27/2\n+GTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATGTTATAATTAAGG\n++\n+CGGG>GCCEGG:G*G0GFCFAFG@GGFFGCFFG>BGBDDGFFGD64EGGGD7<GGFFDGGGGEBFEFGFFFGGF1D@7EFGFFGFF?FFFFG'FFDFE@DE\n+@out-MT-29/2\n+ATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTTAACAAACCCTGTTCTTGGG\n++\n+GBFGGGGGGEFEEF;3E?GEG;GFFGGGG<FFGFFFF=FFCG7=GFGGGG>GDG0GFDEG=GEFGGGGFGGDGBFGGGBFBDG?FEGFGGGG:9.C=?@78\n+@out-MT-31/2\n+ATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTAT\n++\n+GG:FGDGCD/GGFFFBEGGFFGCGGFGGCGFG@F9GDGGGFFGGFGGEG.E8FGGG5=GGF+FGDFFGGGGGGFEGCAFGAA49G:FGCB8E>1C;,A,GE\n+@out-MT-33/2\n+GAGTGAGCCGAAGTTTCATCATGCGGAGATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTG\n++\n+@A?0GGGGGGGGGG@FDFGCEGGGCFGGGGGGGFGBGGFEGGGGFFGGGGGGE?@G>5GG:FF0DCGFGEGGGDG:GGEG=G?C;FG;G@=E/=GDBEFFB\n+@out-MT-35/2\n+GCTGGTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATGTTATAATT\n++\n+=C?.G(GGEGGDG6GGGAFGFGG<EFBGDEGGEEGGGBFGGGEFDA@GGEC?G<GG?D*CGG:F>EGDFGCGF"..b"GCATGTGATTGGTGGGTCATTATGTGTTGCCGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTA\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTTGAGTTTGATGCTCA\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAAGCTTGGATTAGCGTTTAGAAGGGCTATTT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+AGTTTCATCATGCGGAGATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGT\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+GTAGGTGTGCCTTATGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTAG\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CAGCGGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATT\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+ACTACTCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGACCGCCGTAGTCGGTGTA\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+ATCATCAACTGATGAGCAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+GGTTGTTTGGGTTGTGGCTCAGTGTCAGTTCGAGATAAGAACTTCTTGGTCTAGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGAGG\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+TGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAGTAGATTAGGCGTAGGTAGAAGTAGAAG\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+TCAGGCGTTCTGGCTGGTTGCCTCAGCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACTCTATCAAAGT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n+@out-MT-1649/2\n+TTCACTTTAGCTACCCCCAAGTGTTATGGGCCCGGAGCGAGGAGAGTAGCACTCTTGTGCGGGATATTGATTTCACGGAGGATGGTGGTCAAGGGACCCCT\n++\n+BGG@4:GGG2?FF=FGF?GGEGGGGFFGGGGGGGG<>GGFGEG>FFGFAEF:F?FGG-@)EEGFEGAEGGFFF:/GAFBCEGGFGGG697B=?EGGFFDDC\n+@out-MT-1651/2\n+CCTAGGAAGCCAATTGATATCATAGCTCAGACCATACCTATGTATCCAAATGGTTCTTTTTTTCCGGAGTAGTAAGTTACAATATGGGAGATTATTCCGAA\n++\n+GGEF9EFGDGDGGFEGGFGEG<GGGBGCGGGEFEAEF=GGGG)+GGGG@CDGGGEFFGEGEGGE9FFD9GG=6ADBEDE>GD)E;FF6C@F1@BEFBGG@=\n+@out-MT-1653/2\n+CTAAGCACTCTACTCTTAGTTTACTGCTAAATCCACCTTCGACCCTTAAGTTTCATAAGGGCTATTGTATTTTTCTGGGGTAGAAAATGTAGCCCATTTCT\n++\n+G=@GGDA@FG=GFGGGGGFCGEG@G?GAFG??G>GGGGGFGEGGFGEFGGE8FGGBGDFG;AFEF$,DG'GGGG6FGFDC/3DEA?4DFFGGGAGF?A<BC\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM.bam
b
Binary file new/chrMT-PE-VCF-BAM.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM.bam.bai
b
Binary file new/chrMT-PE-VCF-BAM.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 4078 . T C . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11743 . C T . PASS WP=1/0
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3296 @@\n+@out-MT-1/1\n+CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCAGACTAATCTTAGTTA\n++\n+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>\'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C\n+@out-MT-3/1\n+CCTATTAACCACTCACGGGAGCTCTCCCTGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATG\n++\n+FFEFGGGGFD&;GGGGGFGFGGFBGGF\'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD\n+@out-MT-5/1\n+CCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAG\n++\n+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3\n+@out-MT-7/1\n+ATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCAAAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTT\n++\n+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE\'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB\n+@out-MT-9/1\n+TATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA\n++\n+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF\n+@out-MT-11/1\n+GCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTC\n++\n+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=\n+@out-MT-13/1\n+TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAA\n++\n+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9\n+@out-MT-15/1\n+ACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCCGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGT\n++\n+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?\n+@out-MT-17/1\n+TTCCGCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTAACGAACGAAAAATTCTA\n++\n+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C\'5@GF8FGD;FGA@?G/\n+@out-MT-19/1\n+AGCCTACTCCAATGCTAAAACTAATCGTCCCAACAAGTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACCACCACCCAC\n++\n+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F\n+@out-MT-21/1\n+TGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTCATTACCTCAGAGGTTTTTTTCT\n++\n+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED\'GGG?:D=?:FF>GFGFEDC0GA\n+@out-MT-23/1\n+ACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAACTCAGACGCTCAG\n++\n+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG\n+@out-MT-25/1\n+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTCCATGCTAA\n++\n+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=\n+@out-MT-27/1\n+ACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCG\n++\n+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF\n+@out-MT-29/1\n+AAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTCTGTCAAAGTTAACTTATAGGCTAAAT\n++\n+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9\n+@out-MT-31/1\n+GAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCA\n++\n+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-\n+@out-MT-33/1\n+GTGAGGCCAAATATCATTCTGAGGGGCCACACTAATTACAAACTTACTATCCGCCACCCCATACATTGGGACATACCTAGTTCAATGAATCCGAGGAGGCT\n++\n+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC\n+@out-MT-35/1\n+CCAGGTCGGTTTCTATCTACATCCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTGCCCCCGTAAATGATATCATC\n++\n+GFGFDGGGGEGB-GGGDBAAFG$FGGGGD=GFGCGG1EGGGC=GGGCF.GGGGCFFGGGFFF8AGD<EBCDGD'..b"TCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAA\n++\n+2BEGBGGGFGCF>GG8G?GGFG=GFEGG.GAEFGFGFAG>BDDGFF0GF8FGGFGGG<GGDGGGF=GGFGBEEGD@CGCGGGG;ACEE;F7CG?GC+<:'=\n+@out-MT-1615/1\n+CCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTGCTTCTTCCCACTCAT\n++\n+GGGGCGGGFGEGFGDD@GEGGFGGGF?GGG:GF1GGGEGFGGGGC?GGGGGGGE<GGFGFEGGGDD;GB@@C?DBFFGEGG>GGB#GFE>FC>E+E79G>E\n+@out-MT-1617/1\n+TTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGG\n++\n+GFAFE?GGGEGF@-GEBGGGGAGF>GFFCGFGGGDGFGFGGFDFGAGCGGGFG@GD=FFGGF@FEGGFDFGFDE=FGEGGED;GGBD*=E:CE47EGFGG,\n+@out-MT-1619/1\n+TATCCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTAAGCCTAACCGCTAACATTACTGCAGGCCACCTACTCA\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+GCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCGTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCAAACCCA\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+AGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATGCTATACAACCGTATCGGCGATATCG\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+AAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAAGAGGAGAAGG\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+ACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCAT\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAGATAACAGACGAGGTCAACGATCCCTC\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGA\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+TGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+AACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACA\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+AACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAA\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE-VCF-BAM_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE-VCF-BAM_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3296 @@\n+@out-MT-1/2\n+GAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATTTGTTGTGGGTCTCATGAGTTGGAGTGTAGGATAAATCATGCCAAGGCGAGGA\n++\n+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7\n+@out-MT-3/2\n+GGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGTGGCTGTGCAGACATTCAATTGTTATTATTATGTCCTACAAGCATTAATTAATTAACA\n++\n+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF\n+@out-MT-5/2\n+TGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGAATAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTGTTCCAATGTCTTTGTGGTTTGT\n++\n+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==\n+@out-MT-7/2\n+TGTTTGGATGTAAAGTGAAATATTAGTTGGCGTATGAAGCAGATAGTGAGGAAAGTTGAGCCAATAATGACGTGAAGTCCGTGGAAGCCTGTGGCTACAAA\n++\n+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D\n+@out-MT-9/2\n+GGCAAGGTCGAAGGGGGTTCGGTTGGTCTCTGCTAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTA\n++\n+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*\n+@out-MT-11/2\n+CCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATATGGTTAGTGTGGTGGT\n++\n+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B\n+@out-MT-13/2\n+TGATAAGTGTAGAGGGAAGGTTAATGGTTGATATTGCTAGGGTGGCGCTTCCAATTAGGTGCATGAGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGT\n++\n+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE\n+@out-MT-15/2\n+GTCCGTGCGAGAATAATGATGTATGCTTTGTTTCTGTTGAGTGTGGGTTTAGTAATGGGGTTTGTGGGGTTTTCTTCTAAGCCTTCACCTATTTATGGGGG\n++\n+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2\n+@out-MT-17/2\n+GGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGATGGTGAGAGCTAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTA\n++\n+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG\n+@out-MT-19/2\n+GGGAGATTAGTATACAGAGGTAGAGTTTTTTTCGTGATAGTGGTTCACTGGATAAGTGGCGTTGGCTTGCCATGATTGTGAGGGGTAGGAGTCAGGTAGTT\n++\n+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF\n+@out-MT-21/2\n+ATAAGCAGTGCTTGAATTCTTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGCTGATTGATACTCCGGATGCGAGTAATCCGGATGTGTTTAG\n++\n+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?\n+@out-MT-23/2\n+GGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTACTCGTAGGTTCAGTACCATTGGTGGCCAATT\n++\n+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE\n+@out-MT-25/2\n+ACCGACCTGGATTACTCCGGTCTGAACTCAGATCACGTAGGACTTTAATCGTTGAACAAACGAACCTTTAATAGCGGCTGCACCATCGGGATGTCCTGAGC\n++\n+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D\n+@out-MT-27/2\n+AGTCCTTGAGAGAGGATTATGATGCGACTGTGAGTGCGTTCGTAGTTTGAGTTTGCTAGGCAGAATAGTAATGAGGATGTAAGCCCGTGGGCGATTATGAG\n++\n+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@\n+@out-MT-29/2\n+CTGACGGTTTCTATTTCCTGAGCGTCTGAGATGTTAGTATTAGTTAGTTTTGTTGTGAGTGTTAGGAAAAGGGCATACAGGACTAGGAAGCAGATAAGGAA\n++\n+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90\n+@out-MT-31/2\n+CCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGTGAAATATTAGTTGGCGGATGAAGCAGATAGTGAGG\n++\n+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3\n+@out-MT-33/2\n+AGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTT\n++\n+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B\n+@out-MT-35/2\n+GAGGAGTATGAGGTTGGCCATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTT\n++\n+GFF;B>3G(GGEGGDG5GGGBFGFGG<EFBGDEGGEEGGGBFGGGEFDAAGGDB@G<GG?D(CGG:F?EGDFG"..b"GTATGTGCTTTCTCGTGTTACATTGCGCCATCATTGGTATATGGTTAGTGTGTTGGTTAGTAGGCCT\n++\n+GGGGG:FGGGEGGGGGG=FGFFAFGG3GGFEGG8EGGFGG-?EGGGG<GGF@GB:CF(G=GGGEG8GGEGGG@FEFG==+FFGEG6:AFFFGFGG???940\n+@out-MT-1615/2\n+AATTTATGAAGGAGAGGGGTCAGGGTTGATTCGGGAGGAACCTATTGGTGCGGGGGCTTTGTATGATTATGGGCGTTGATTAGTAGTAGTTACTGGTTGAA\n++\n+GGEGGB;FFGGGE?@GFGGF-GGFGCGCE=GGGEFFGGG$FDGGGGGGB:FFDDGGFE=EGGFGDAGFFA@EBCFGGFFAEGGFFGFFG7D1-=EGGGF>C\n+@out-MT-1617/2\n+AAGGGCGCAGACTGCTGCGAACAGAGTGGTGATAGCGCCTAAGCATAGTGTTAGAGTTTGGATTAGTGGGCTATTTTCGGCTAGGGGGTGGAAGAGGATGA\n++\n+@FG7;FG@GGBGDGGGGFG=F<GECFG?GFEGGGGFEFGCGG<GGGFGCFE?GDGGGFGGGE@AGGF?<FGGGFGFE=-FE=9GGF5GGF@-G&+EFG;;B\n+@out-MT-1619/2\n+AGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTATGTGTTGTGGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTAG\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AGCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTT\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GGAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+GATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGTGTTCTTGTAGTTGAAA\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+TGTAGGTGTGCCTAGTGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTA\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATTATTA\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+TCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCAGTAGTCGGTGTACTCGT\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+CAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGAT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+TGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTTGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTGT\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+GGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAAT\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+GCGTTCTGGCTGGTTGCCTCATCGGTTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATGGGGTGTGATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3296 @@\n+@out-MT-1/1\n+CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCAGACTAATCTTAGTTA\n++\n+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>\'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C\n+@out-MT-3/1\n+CCTATTAACCACTCACGGGAGCTCTCCCTGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATG\n++\n+FFEFGGGGFD&;GGGGGFGFGGFBGGF\'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD\n+@out-MT-5/1\n+CCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAG\n++\n+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3\n+@out-MT-7/1\n+ATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCAAAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTT\n++\n+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE\'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB\n+@out-MT-9/1\n+TATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA\n++\n+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF\n+@out-MT-11/1\n+GCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTC\n++\n+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=\n+@out-MT-13/1\n+TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAA\n++\n+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9\n+@out-MT-15/1\n+ACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCCGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGT\n++\n+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?\n+@out-MT-17/1\n+TTCCGCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTAACGAACGAAAAATTCTA\n++\n+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C\'5@GF8FGD;FGA@?G/\n+@out-MT-19/1\n+AGCCTACTCCAATGCTAAAACTAATCGTCCCAACAAGTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACCACCACCCAC\n++\n+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F\n+@out-MT-21/1\n+TGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTCATTACCTCAGAGGTTTTTTTCT\n++\n+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED\'GGG?:D=?:FF>GFGFEDC0GA\n+@out-MT-23/1\n+ACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAACTCAGACGCTCAG\n++\n+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG\n+@out-MT-25/1\n+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTCCATGCTAA\n++\n+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=\n+@out-MT-27/1\n+ACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCG\n++\n+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF\n+@out-MT-29/1\n+AAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTCTGTCAAAGTTAACTTATAGGCTAAAT\n++\n+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9\n+@out-MT-31/1\n+GAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCA\n++\n+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-\n+@out-MT-33/1\n+GTGAGGCCAAATATCATTCTGAGGGGCCACACTAATTACAAACTTACTATCCGCCACCCCATACATTGGGACATACCTAGTTCAATGAATCCGAGGAGGCT\n++\n+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC\n+@out-MT-35/1\n+CCAGGTCGGTTTCTATCTACATCCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTGCCCCCGTAAATGATATCATC\n++\n+GFGFDGGGGEGB-GGGDBAAFG$FGGGGD=GFGCGG1EGGGC=GGGCF.GGGGCFFGGGFFF8AGD<EBCDGD'..b"TCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAA\n++\n+2BEGBGGGFGCF>GG8G?GGFG=GFEGG.GAEFGFGFAG>BDDGFF0GF8FGGFGGG<GGDGGGF=GGFGBEEGD@CGCGGGG;ACEE;F7CG?GC+<:'=\n+@out-MT-1615/1\n+CCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTGCTTCTTCCCACTCAT\n++\n+GGGGCGGGFGEGFGDD@GEGGFGGGF?GGG:GF1GGGEGFGGGGC?GGGGGGGE<GGFGFEGGGDD;GB@@C?DBFFGEGG>GGB#GFE>FC>E+E79G>E\n+@out-MT-1617/1\n+TTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGG\n++\n+GFAFE?GGGEGF@-GEBGGGGAGF>GFFCGFGGGDGFGFGGFDFGAGCGGGFG@GD=FFGGF@FEGGFDFGFDE=FGEGGED;GGBD*=E:CE47EGFGG,\n+@out-MT-1619/1\n+TATCCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTAAGCCTAACCGCTAACATTACTGCAGGCCACCTACTCA\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+GCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCGTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCAAACCCA\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+AGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATGCTATACAACCGTATCGGCGATATCG\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+AAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAAGAGGAGAAGG\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+ACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCAT\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAGATAACAGACGAGGTCAACGATCCCTC\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGA\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+TGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+AACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACA\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+AACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAA\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT-PE_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT-PE_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3296 @@\n+@out-MT-1/2\n+GAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATTTGTTGTGGGTCTCATGAGTTGGAGTGTAGGATAAATCATGCCAAGGCGAGGA\n++\n+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7\n+@out-MT-3/2\n+GGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGTGGCTGTGCAGACATTCAATTGTTATTATTATGTCCTACAAGCATTAATTAATTAACA\n++\n+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF\n+@out-MT-5/2\n+TGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGAATAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTGTTCCAATGTCTTTGTGGTTTGT\n++\n+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==\n+@out-MT-7/2\n+TGTTTGGATGTAAAGTGAAATATTAGTTGGCGTATGAAGCAGATAGTGAGGAAAGTTGAGCCAATAATGACGTGAAGTCCGTGGAAGCCTGTGGCTACAAA\n++\n+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D\n+@out-MT-9/2\n+GGCAAGGTCGAAGGGGGTTCGGTTGGTCTCTGCTAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTA\n++\n+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*\n+@out-MT-11/2\n+CCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATATGGTTAGTGTGGTGGT\n++\n+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B\n+@out-MT-13/2\n+TGATAAGTGTAGAGGGAAGGTTAATGGTTGATATTGCTAGGGTGGCGCTTCCAATTAGGTGCATGAGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGT\n++\n+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE\n+@out-MT-15/2\n+GTCCGTGCGAGAATAATGATGTATGCTTTGTTTCTGTTGAGTGTGGGTTTAGTAATGGGGTTTGTGGGGTTTTCTTCTAAGCCTTCACCTATTTATGGGGG\n++\n+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2\n+@out-MT-17/2\n+GGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGATGGTGAGAGCTAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTA\n++\n+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG\n+@out-MT-19/2\n+GGGAGATTAGTATACAGAGGTAGAGTTTTTTTCGTGATAGTGGTTCACTGGATAAGTGGCGTTGGCTTGCCATGATTGTGAGGGGTAGGAGTCAGGTAGTT\n++\n+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF\n+@out-MT-21/2\n+ATAAGCAGTGCTTGAATTCTTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGCTGATTGATACTCCGGATGCGAGTAATCCGGATGTGTTTAG\n++\n+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?\n+@out-MT-23/2\n+GGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTACTCGTAGGTTCAGTACCATTGGTGGCCAATT\n++\n+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE\n+@out-MT-25/2\n+ACCGACCTGGATTACTCCGGTCTGAACTCAGATCACGTAGGACTTTAATCGTTGAACAAACGAACCTTTAATAGCGGCTGCACCATCGGGATGTCCTGAGC\n++\n+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D\n+@out-MT-27/2\n+AGTCCTTGAGAGAGGATTATGATGCGACTGTGAGTGCGTTCGTAGTTTGAGTTTGCTAGGCAGAATAGTAATGAGGATGTAAGCCCGTGGGCGATTATGAG\n++\n+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@\n+@out-MT-29/2\n+CTGACGGTTTCTATTTCCTGAGCGTCTGAGATGTTAGTATTAGTTAGTTTTGTTGTGAGTGTTAGGAAAAGGGCATACAGGACTAGGAAGCAGATAAGGAA\n++\n+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90\n+@out-MT-31/2\n+CCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGTGAAATATTAGTTGGCGGATGAAGCAGATAGTGAGG\n++\n+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3\n+@out-MT-33/2\n+AGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTT\n++\n+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B\n+@out-MT-35/2\n+GAGGAGTATGAGGTTGGCCATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTT\n++\n+GFF;B>3G(GGEGGDG5GGGBFGFGG<EFBGDEGGEEGGGBFGGGEFDAAGGDB@G<GG?D(CGG:F?EGDFG"..b"GTATGTGCTTTCTCGTGTTACATTGCGCCATCATTGGTATATGGTTAGTGTGTTGGTTAGTAGGCCT\n++\n+GGGGG:FGGGEGGGGGG=FGFFAFGG3GGFEGG8EGGFGG-?EGGGG<GGF@GB:CF(G=GGGEG8GGEGGG@FEFG==+FFGEG6:AFFFGFGG???940\n+@out-MT-1615/2\n+AATTTATGAAGGAGAGGGGTCAGGGTTGATTCGGGAGGAACCTATTGGTGCGGGGGCTTTGTATGATTATGGGCGTTGATTAGTAGTAGTTACTGGTTGAA\n++\n+GGEGGB;FFGGGE?@GFGGF-GGFGCGCE=GGGEFFGGG$FDGGGGGGB:FFDDGGFE=EGGFGDAGFFA@EBCFGGFFAEGGFFGFFG7D1-=EGGGF>C\n+@out-MT-1617/2\n+AAGGGCGCAGACTGCTGCGAACAGAGTGGTGATAGCGCCTAAGCATAGTGTTAGAGTTTGGATTAGTGGGCTATTTTCGGCTAGGGGGTGGAAGAGGATGA\n++\n+@FG7;FG@GGBGDGGGGFG=F<GECFG?GFEGGGGFEFGCGG<GGGFGCFE?GDGGGFGGGE@AGGF?<FGGGFGFE=-FE=9GGF5GGF@-G&+EFG;;B\n+@out-MT-1619/2\n+AGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTATGTGTTGTGGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTAG\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AGCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTT\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GGAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+GATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGTGTTCTTGTAGTTGAAA\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+TGTAGGTGTGCCTAGTGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTA\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATTATTA\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+TCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCAGTAGTCGGTGTACTCGT\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+CAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGAT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+TGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTTGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTGT\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+GGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAAT\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+GCGTTCTGGCTGGTTGCCTCATCGGTTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATGGGGTGTGATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n"
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT.fa.fai Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,1 @@
+MT 16569 4 60 61
b
diff -r 000000000000 -r 6e75a84e9338 new/chrMT_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new/chrMT_read1.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,6576 @@\n+@out-MT-1/1\n+CGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTAGCGCACCTACGTTCAATATTACAGGCGAACA\n++\n+GGFCF5AFFDDDFE2GGGCGGAEGGGFGGEGGGGG7CGFBFGGFFFGGFF3,GGGFAFGGGGDFGGFDFG'GGGGGFFE4GF@EEDBEFFE*99EGGGGAE\n+@out-MT-2/1\n+CCTGTTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAAG\n++\n+FFEGGGGAG@89GGGFGGFGD<DGGGFEGG/F>?G8@GF?ACG?GFGGEDF=G6(GEGFE9GGG?@FGFGGDFDA6?GFGGGEBABGGDAEEF@)8/B2+A\n+@out-MT-3/1\n+ACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATTCA\n++\n+G9GGGEGGBGGGGFGGFGFBE@GG@GF@EDG'BF2GGGDEGEGEGGGGGFGGGFE=G@GGGGG@FFDFEF$BEDEGGFGE@$5EGGFGFGEFGC:DEB$8;\n+@out-MT-4/1\n+TCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTAATTAATTAA\n++\n+GGGGFG@G?FFFCDGFGDGFGGG9GGFFGGFGGBG/9G;E?FGG77G8E>:/FGGGGFGGGD6GGFFFF8CEAGFGGGGGDG?GGG6-FEBG3DC8@FG-=\n+@out-MT-5/1\n+GAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATT\n++\n+GGGFGGGGGGGEDGGGFFGGG8GFCEG)2CGGGGGGGGGGFGEEFGGGBGGGGFFGGGGGGFFGFGFEF>DE*:@GGEGDEGGFGFGFGG94GA)9AGEFF\n+@out-MT-6/1\n+TGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTC\n++\n+FG>BGGFGG3CADGG98G@EE857FGGEGEFEGFGGGGGFGGGGGFGGG:FBBFFDDFFF=DGFFGGG4?GAG@GCGGEG@GEE;EAFGF&GGFCEGEFE@\n+@out-MT-7/1\n+CAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAA\n++\n+GCGFFGGGGGGGEGAGFGGFGE-GFEFGGGFGFFFBFFCGFAGGGGGFACEDFA>B@GGCEGGGFBF:G9GFGG?F9?DBFGEGCFDE6F>C@FFFF@/E<\n+@out-MT-8/1\n+GAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACA\n++\n+FFGGGGGGFGG'5DCGB>EGGGGG79GGEGGGG=@GE=CGECDFGEGGG=DGGAFFG4DGG6FGGFGC<<F>B?2GEGGCG)G3=E'F7F.B==CAAE<6A\n+@out-MT-9/1\n+GAGCTAAACCTAGCCCCAAACCCACTCCACCTAACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGC\n++\n+GGFGGFGE-?:FFGGGGEFD?FGAGG<$%FG<&FE/$2CEGEDCGDFFEF.;FGDAG>GGAGA2?DFFGG??G?GGCBADA.19ADGGFGEC>FGGF8/EF\n+@out-MT-10/1\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCCACGCATTTATATAG\n++\n+75FFA:EGGGGGGFF<9GG3?EEGEEFGEG:FGDFGGGGGFFFFGGFFGGDGEG7FFGGGGF+FFGAF6GGFGGE:GE?DGGFGAE'8G5EEGG<<GD;GE\n+@out-MT-11/1\n+TCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATC\n++\n+EDGDGGGFGFFGGFGGEDFGGCFGGGGGD;GFGEFG?FBFGGGBFFGGGGD@GBG@F?;BGGE=D?@FFG<FF?D<GE7DFGGBG'F0F>FGG;@1EGFFE\n+@out-MT-12/1\n+CAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTACCTGTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGAAG\n++\n+GFGE:BGGGEGGGF?GGGFGGGAGGGG<F/?GGG@GGGGGFGGGGC590FEGB@G>DGGE?FCGDG1FDFFGBG3FGFGEDGGGCGEGD=<FGF8EGG$2D\n+@out-MT-13/1\n+AATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGGTCCA\n++\n+CGGGGFGD8CGGFDGGGF=:BFGG-GFGDDCGGEFE3$GG91EEDGCBGGGG4G5=FGG+7-5G<FA(3<FGFFD?EEGB.+>>CGGFGG6E8AB>*3EGG\n+@out-MT-14/1\n+TATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGA\n++\n+FGEFC+GGCGDGGGFGGGFGEEG=GGCFGGGGCGGEFGGGEFGGGD,FDGBGGD.G??E@;FGG97EGCF=GGGF:GFEEGGFB;0GG7EGFG=FG;8G@B\n+@out-MT-15/1\n+CCAAGCATAATATAGCAAGGACTTACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCA\n++\n+GFFGGGBGEED)*FG@GFG96G?2G9FG@GGGGGBGGGCGFFGGFGGG:)F=;G9BDGGGF>GCFGFGGFG=EGDDGCG@>FAGE;FEF9,<C;9F@@GGF\n+@out-MT-16/1\n+ACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTT\n++\n+G9GGFGFF?GGA+@FFGG6.EBGGGGDGEB5EGGGFFGGFGFCF=GFGGDGGF;GGDGED:FGGED49@@.D&D$A9;F8GGGFGBC)$<EGG4GEC91@@\n+@out-MT-17/1\n+AATTTTATCTTTTGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCAATACTACTAATCTCATCAATACAACCC\n++\n+GGEG=AGGDFGC;FCGECGGGGGGGF=FCGFGGGGGAGGFFGFGG9GGGFGFFAAFDBGEG=@EAGF8>GEDA(FDB8DGGG,DEFEGFEGF&CGF>F-F5\n+@out-MT-18/1\n+CCAACGGAACAAGTTACCCTAGGGATAACATCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGA\n++\n+GGGBA@GGD@GGA3BD+1G<FGGFAFFFEFG,5GGG>G&GEGGGGDGGCC;GFE?GGCGGGGFEGB:GGGGGF7FCE"..b"ACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAAC\n++\n+<GGFGGGG=FGG>@GBG?F<FGGGGGGFGA?GFGGGGGGFF@8FG<DGGGGGEDFGAGG:2FE-GGGEG#G@GGCFE5C.DFF-EFGGGE8.G$8=GFGE&\n+@out-MT-1628/1\n+AAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCCTATAGTAAAACCCAGCCCATGACCCCTAACAGGGGTCCTCTCAGCCCT\n++\n+GFGG@8GGFF2GGGEFGGEEGFGGFFGGCGFFCFGGFGF6FFGGGEFGDGFF(GGGAGBGGG@FEFGDGEFB=FDGB6<BCEGG<GD@%;3GDGFGEGG4@\n+@out-MT-1629/1\n+CCACAGAACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGTACGCCTGAACGCA\n++\n+GFCGGGEG/F1F?GE?EDC@GFGGG<GGGGBGGEGEFGFAGGGFG<E0GGGG9AGGC>FGG9EGGBFGGGGFD,CDF@AF9GFGGF;%(,4;5EFF1<FDA\n+@out-MT-1630/1\n+TATCAAACTCCTGAGCCAACAACTTAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACTTATGACTCCCTAAAGCC\n++\n+FGGGGDGFFFF8GDF?GDGGGBGGFGDGGGFBGFGFGFGGCGC+5FGGGFFDAAGGFGFGGAD)GFE4EFFGFEFAG9G@=FGGFFE3GFFGF1EFFCDFE\n+@out-MT-1631/1\n+TACCACAACCACCACCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCTGACC\n++\n+GGDG>GGF3G(GGEBG>GFCGGGEGGGGFFFGFGEG?GE@GG@GEEFGGG10GEDG>GG:4==FFB<+GABGE21GFCGGG@<CBC<GF@FDGF@3FGGG1\n+@out-MT-1632/1\n+GTTTCCCCGCATAAACAACATAAGCTTCTGCCTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAA\n++\n+DFGGGF<GG&6FFGCFFEDDFGFF.:DG?G#FGGGEG3FGFGFFEDFGEDGGCFGFCGDGGFGGGGEGFG,FGGFDG%FGDCFFFGGBG5FB;GCCGDFFF\n+@out-MT-1633/1\n+TATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTTTAGGTAACGAC\n++\n+GDGFFG<GGGGGGFG@FGDC2GG?GEFGGGGGBGGDEGGEGGGGGFGGGGFFCGCFFCAG;GGFGGG:GFF<;0/8G1FGGGFBFE@>C7GEGGGFEF+DE\n+@out-MT-1634/1\n+ATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAAT\n++\n+G(GGGGDGGGFB><GGEGGGEGEGGG6GGGGDGGGG>G3FEGGG8GGGGGGFGFGFFGEGGG5GFEEG7G?FF?AGBF?<5GGGGG8=GGGFFGGFA7GGG\n+@out-MT-1635/1\n+AAGTCCTAATAGTAGAGGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGT\n++\n+GFGD@@EGGGFFGAFG/BG?GGGGGGGGGGG?DFG:GGGDGFGGGGGGGCGBFF>6FGGGG@G?A<CGD@GGF@GCE8=GGAGG@2GF4;?2?><FED=C0\n+@out-MT-1636/1\n+TCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAACTACTCCCACCCTG\n++\n+GGFGGFGDGAAGFGGAAGGGGG4FBG1GFGGGGCGGG<GGGBGEGF.GG9?GFGGGGG7A&GA=DGEB0G7DEG?GE4F<GGDCG4GE7CEAG0CC'?BBA\n+@out-MT-1637/1\n+TAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCT\n++\n+GGG=FGEGGGAFGFDGGGEFGGGGGGGGAGGGGEGFC8C>GFEFCAG6GFEGBBGGGFGGCGGGGGGGG>GGGG?C5E?AGGEFEDFDBGEFGGGADGGG2\n+@out-MT-1638/1\n+AAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTGACCGGAATCGGAGGACAACCAATAAGCTACCCTT\n++\n+GGFGGGGDGFGGGG4CGGG@GDFGFCFGEDCDGGFEGGGGEGEDFGG@FFF?GG3GEGEAGGDEEB&GEG$BGG<=GD0AF-E846>F*$F35CEEFEAF4\n+@out-MT-1639/1\n+TTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCT\n++\n+G?DGCE:CE1E>GGGGGF3G5EGCGG5GEED@GCG3GGGG:E$A=F4GGGGGGGCBFGFGDGF:(:GBCGF=GCGGDDFEGEF*<BFGGGDGD9BFF>FB<\n+@out-MT-1640/1\n+AAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAATGACTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTAC\n++\n+F1GEG?AGGGBGFEGGFGFGGFEGGGAGAGA=GGGFFF>GGGGGG==DD$@G4?GDGFFEFG>@GFG>:CE>7FGAEGGAFGGEGGCFGGEF?CB$FED@E\n+@out-MT-1641/1\n+CCCCATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCA\n++\n+F=GECFGG6CGF6E8G;CGGGCBBGF;GGGGG3:FGGFF=FG/GGGGGGGAFG7GGGGGGEGBEGGGGGGC=DFGFA@G;1BFGGFGF<G@CGBFFF24BA\n+@out-MT-1642/1\n+ATGACCCCAATACGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGG\n++\n+F?GGGDEFBGGE4B)GDGGFG<GDGGFGFFGGGBGFGEGGGFFGDGDFGGGGGDGFDGGGGGFGF3B?BGDFGG>GGDCEF;?G@EDGDC34?>A<;GG?C\n+@out-MT-1643/1\n+ACTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTT\n++\n+GEE=AFGGGAFG;@GG0GFEFGGFFFGFEGGDFFFE<AGG@CDGG*51+GEFGFEGG:;FGGFFGEGGFD5CEECGFGFGGGGGFF?GGFED->GGGFFFF\n+@out-MT-1644/1\n+CCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCGCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGT\n++\n+GCFGGGGGGGDGGGFC/G?G@FFGGE=GEDF7GGFGFEGGD$4FEFGGGF?FFFGGFGGCFGGFFG8GF2DGGGF>FF<GGGGFFEDB;GEFFBDDGGGE8\n"
b
diff -r 000000000000 -r 6e75a84e9338 py/OutputFileWriter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/OutputFileWriter.py Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,251 @@\n+import sys\n+import os\n+import re\n+import gzip\n+from struct import pack\n+\n+from biopython_modified_bgzf import BgzfWriter\n+\n+BAM_COMPRESSION_LEVEL = 6\n+\n+# return the reverse complement of a string\n+RC_DICT = {\'A\':\'T\',\'C\':\'G\',\'G\':\'C\',\'T\':\'A\',\'N\':\'N\'}\n+def RC(s):\n+\treturn \'\'.join(RC_DICT[n] for n in s[::-1])\n+\n+# SAMtools reg2bin function\n+def reg2bin(a,b):\n+\tb -= 1\n+\tif (a>>14 == b>>14): return ((1<<15)-1)/7 + (a>>14)\n+\tif (a>>17 == b>>17): return ((1<<12)-1)/7 + (a>>17)\n+\tif (a>>20 == b>>20): return  ((1<<9)-1)/7 + (a>>20)\n+\tif (a>>23 == b>>23): return  ((1<<6)-1)/7 + (a>>23)\n+\tif (a>>26 == b>>26): return  ((1<<3)-1)/7 + (a>>26)\n+\treturn 0\n+\n+CIGAR_PACKED = {\'M\':0, \'I\':1, \'D\':2, \'N\':3, \'S\':4, \'H\':5, \'P\':6, \'=\':7, \'X\':8}\n+SEQ_PACKED   = {\'=\':0, \'A\':1, \'C\':2, \'M\':3, \'G\':4, \'R\':5, \'S\':6, \'V\':7,\n+                \'T\':8, \'W\':9, \'Y\':10,\'H\':11,\'K\':12,\'D\':13,\'B\':14,\'N\':15}\n+\n+BUFFER_BATCH_SIZE = 1000\t\t# write out to file after this many reads\n+\n+#\n+#\toutFQ      = path to output FASTQ prefix\n+#\tpaired     = True for PE reads, False for SE\n+#\tBAM_header = [refIndex]\n+#\tVCF_header = [path_to_ref]\n+#\tgzipped    = True for compressed FASTQ/VCF, False for uncompressed\n+#\n+class OutputFileWriter:\n+\tdef __init__(self, outPrefix, paired=False, BAM_header=None, VCF_header=None, gzipped=False, jobTuple=(1,1), noFASTQ=False):\n+\t\t\n+\t\tjobSuffix = \'\'\n+\t\tif jobTuple[1] > 1:\n+\t\t\tjsl = len(str(jobTuple[1]))\n+\t\t\tjsb = \'0\'*(jsl-len(str(jobTuple[0])))\n+\t\t\tjobSuffix = \'.job\'+jsb+str(jobTuple[0])+\'of\'+str(jobTuple[1])\n+\n+\t\tfq1 = outPrefix+\'_read1.fq\'+jobSuffix\n+\t\tfq2 = outPrefix+\'_read2.fq\'+jobSuffix\n+\t\tbam = outPrefix+\'_golden.bam\'+jobSuffix\n+\t\tvcf = outPrefix+\'_golden.vcf\'+jobSuffix\n+\n+\t\tself.noFASTQ = noFASTQ\n+\t\tif not self.noFASTQ:\n+\t\t\tif gzipped:\n+\t\t\t\tself.fq1_file = gzip.open(fq1+\'.gz\', \'wb\')\n+\t\t\telse:\n+\t\t\t\tself.fq1_file = open(fq1,\'w\')\n+\n+\t\t\tself.fq2_file = None\n+\t\t\tif paired:\n+\t\t\t\tif gzipped:\n+\t\t\t\t\tself.fq2_file = gzip.open(fq2+\'.gz\', \'wb\')\n+\t\t\t\telse:\n+\t\t\t\t\tself.fq2_file = open(fq2,\'w\')\n+\n+\t\t#\n+\t\t#\tVCF OUTPUT\n+\t\t#\n+\t\tself.vcf_file = None\n+\t\tif VCF_header != None:\n+\t\t\tif gzipped:\n+\t\t\t\tself.vcf_file = gzip.open(vcf+\'.gz\', \'wb\')\n+\t\t\telse:\n+\t\t\t\tself.vcf_file = open(vcf, \'wb\')\n+\n+\t\t\t# WRITE VCF HEADER (if parallel: only for first job)\n+\t\t\tif jobTuple[0] == 1:\n+\t\t\t\tself.vcf_file.write(\'##fileformat=VCFv4.1\\n\')\n+\t\t\t\tself.vcf_file.write(\'##reference=\'+VCF_header[0]+\'\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">\\n\')\n+\t\t\t\t#self.vcf_file.write(\'##INFO=<ID=READS,Number=1,Type=String,Description="Names of Reads Covering this Variant">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=DEL,Description="Deletion">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=DUP,Description="Duplication">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=INS,Description="Insertion of novel sequence">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=INV,Description="Inversion">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=CNV,Description="Copy number variable region">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=TRANS,Description="Translocation">\\n\')\n+\t\t\t\tself.vcf_file.write(\'##ALT=<ID=INV-TRANS,Description="Inverted translocation">\\n\')\n+\t\t\t\tself.vcf_file.write(\'#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n\')\n+\n+\t\t#\n+\t\t#\tBAM OUTPUT\n+\t\t#\n+\t\tself.bam_file = None\n+\t\tif BAM_header != None:\n+\t\t\tself.bam_file = BgzfWriter(bam, \'w\', compresslevel=BAM_COMPRESSION_LEVEL)\n+\n+\t\t\t# WRITE BAM HEADER (if parallel: only for first job)'..b'ar)[1:]\n+\t\tcig_numbers = [int(n) for n in re.findall(r"\\d+",cigar)]\n+\t\tcig_ops     = len(cig_letters)\n+\t\tnext_refID = refID\n+\t\tif matePos == None:\n+\t\t\tnext_pos = 0\n+\t\t\tmy_tlen  = 0\n+\t\telse:\n+\t\t\tnext_pos = matePos\n+\t\t\tif pos_0 < next_pos:\n+\t\t\t\tmy_tlen = next_pos + len(seq) - pos_0\n+\t\t\telse:\n+\t\t\t\tmy_tlen = -pos_0 - len(seq) + next_pos\n+\n+\t\tencodedCig = \'\'\n+\t\tfor i in xrange(cig_ops):\n+\t\t\tencodedCig += pack(\'<I\',(cig_numbers[i]<<4) + CIGAR_PACKED[cig_letters[i]])\n+\t\tencodedSeq = \'\'\n+\t\tencodedLen = (len(seq)+1)/2\n+\t\tseqLen     = len(seq)\n+\t\tif seqLen&1:\n+\t\t\tseq += \'=\'\n+\t\tfor i in xrange(encodedLen):\n+\t\t\tencodedSeq += pack(\'<B\',(SEQ_PACKED[seq[2*i]]<<4) + SEQ_PACKED[seq[2*i+1]])\n+\n+\t\t# apparently samtools automatically adds 33 to the quality score string...\n+\t\tencodedQual = \'\'.join([chr(ord(n)-33) for n in qual])\n+\n+\t\t#blockSize = 4 +\t\t# refID \t\tint32\n+\t\t#            4 +\t\t# pos\t\t\tint32\n+\t\t#            4 +\t\t# bin_mq_nl\t\tuint32\n+\t\t#            4 +\t\t# flag_nc\t\tuint32\n+\t\t#            4 +\t\t# l_seq\t\t\tint32\n+\t\t#            4 +\t\t# next_refID\tint32\n+\t\t#            4 +\t\t# next_pos\t\tint32\n+\t\t#            4 +\t\t# tlen\t\t\tint32\n+\t\t#            len(readName)+1 +\n+\t\t#            4*cig_ops +\n+\t\t#            encodedLen +\n+\t\t#            len(seq)\n+\n+\t\t#blockSize = 32 + len(readName)+1 + 4*cig_ops + encodedLen + len(seq)\n+\t\tblockSize = 32 + len(readName)+1 + len(encodedCig) + len(encodedSeq) + len(encodedQual)\n+\n+\t\t####self.bam_file.write(pack(\'<i\',blockSize))\n+\t\t####self.bam_file.write(pack(\'<i\',refID))\n+\t\t####self.bam_file.write(pack(\'<i\',pos_0))\n+\t\t####self.bam_file.write(pack(\'<I\',(myBin<<16) + (myMapQual<<8) + len(readName)+1))\n+\t\t####self.bam_file.write(pack(\'<I\',(samFlag<<16) + cig_ops))\n+\t\t####self.bam_file.write(pack(\'<i\',seqLen))\n+\t\t####self.bam_file.write(pack(\'<i\',next_refID))\n+\t\t####self.bam_file.write(pack(\'<i\',next_pos))\n+\t\t####self.bam_file.write(pack(\'<i\',my_tlen))\n+\t\t####self.bam_file.write(readName+\'\\0\')\n+\t\t####self.bam_file.write(encodedCig)\n+\t\t####self.bam_file.write(encodedSeq)\n+\t\t####self.bam_file.write(encodedQual)\n+\n+\t\t# a horribly compressed line, I\'m sorry.\n+\t\t# (ref_index, position, data)\n+\t\tself.bam_buffer.append((refID, pos_0, pack(\'<i\',blockSize) + pack(\'<i\',refID) + pack(\'<i\',pos_0) + pack(\'<I\',(myBin<<16) + (myMapQual<<8) + len(readName)+1) + pack(\'<I\',(samFlag<<16) + cig_ops) + pack(\'<i\',seqLen) + pack(\'<i\',next_refID) + pack(\'<i\',next_pos) + pack(\'<i\',my_tlen) + readName+\'\\0\' + encodedCig + encodedSeq + encodedQual))\n+\n+\n+\tdef flushBuffers(self,bamMax=None,lastTime=False):\n+\t\tif (len(self.fq1_buffer) >= BUFFER_BATCH_SIZE or len(self.bam_buffer) >= BUFFER_BATCH_SIZE) or (len(self.fq1_buffer) and lastTime) or (len(self.bam_buffer) and lastTime):\n+\t\t\t# fq\n+\t\t\tif not self.noFASTQ:\n+\t\t\t\tself.fq1_file.write(\'\'.join(self.fq1_buffer))\n+\t\t\t\tif len(self.fq2_buffer):\n+\t\t\t\t\tself.fq2_file.write(\'\'.join(self.fq2_buffer))\n+\t\t\t# bam\n+\t\t\tif len(self.bam_buffer):\n+\t\t\t\tbam_data = sorted(self.bam_buffer)\n+\t\t\t\tif lastTime:\n+\t\t\t\t\tself.bam_file.write(\'\'.join([n[2] for n in bam_data]))\n+\t\t\t\t\tself.bam_buffer = []\n+\t\t\t\telse:\n+\t\t\t\t\tind_to_stop_at = 0\n+\t\t\t\t\tfor i in xrange(0,len(bam_data)):\n+\t\t\t\t\t\t# if we are from previous reference, or have coordinates lower than next window position, it\'s safe to write out to file\n+\t\t\t\t\t\tif bam_data[i][0] != bam_data[-1][0] or bam_data[i][1] < bamMax:\n+\t\t\t\t\t\t\tind_to_stop_at = i+1\n+\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\tbreak\n+\t\t\t\t\tself.bam_file.write(\'\'.join([n[2] for n in bam_data[:ind_to_stop_at]]))\n+\t\t\t\t\t####print \'BAM WRITING:\',ind_to_stop_at,\'/\',len(bam_data)\n+\t\t\t\t\tif ind_to_stop_at >= len(bam_data):\n+\t\t\t\t\t\tself.bam_buffer = []\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tself.bam_buffer = bam_data[ind_to_stop_at:]\n+\t\t\tself.fq1_buffer = []\n+\t\t\tself.fq2_buffer = []\n+\n+\n+\tdef closeFiles(self):\n+\t\tself.flushBuffers(lastTime=True)\n+\t\tif not self.noFASTQ:\n+\t\t\tself.fq1_file.close()\n+\t\t\tif self.fq2_file != None:\n+\t\t\t\tself.fq2_file.close()\n+\t\tif self.vcf_file != None:\n+\t\t\tself.vcf_file.close()\n+\t\tif self.bam_file != None:\n+\t\t\tself.bam_file.close()\n+\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 py/OutputFileWriter.pyc
b
Binary file py/OutputFileWriter.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/SequenceContainer.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/SequenceContainer.py Tue May 15 02:39:53 2018 -0400
[
b"@@ -0,0 +1,1058 @@\n+import random\n+import copy\n+import re\n+import os\n+import bisect\n+import cPickle as pickle\n+import numpy as np\n+\n+from probability import DiscreteDistribution, poisson_list, quantize_list\n+from cigar import CigarString\n+\n+MAX_ATTEMPTS = 100\t# max attempts to insert a mutation into a valid position\n+MAX_MUTFRAC  = 0.3\t# the maximum percentage of a window that can contain mutations\n+\n+NUCL    = ['A','C','G','T']\n+TRI_IND = {'AA':0,  'AC':1,  'AG':2,   'AT':3,  'CA':4,  'CC':5,  'CG':6,  'CT':7,\n+           'GA':8,  'GC':9,  'GG':10,  'GT':11, 'TA':12, 'TC':13, 'TG':14, 'TT':15}\n+NUC_IND = {'A':0, 'C':1, 'G':2, 'T':3}\n+ALL_TRI = [NUCL[i]+NUCL[j]+NUCL[k] for i in xrange(len(NUCL)) for j in xrange(len(NUCL)) for k in xrange(len(NUCL))]\n+ALL_IND = {ALL_TRI[i]:i for i in xrange(len(ALL_TRI))}\n+\n+# DEBUG\n+IGNORE_TRINUC = False\n+\n+# percentile resolution used for fraglen quantizing\n+COV_FRAGLEN_PERCENTILE = 10.\n+LARGE_NUMBER = 9999999999\n+\n+#\n+#\tContainer for reference sequences, applies mutations\n+#\n+class SequenceContainer:\n+\tdef __init__(self, xOffset, sequence, ploidy, windowOverlap, readLen, mutationModels=[], mutRate=None, onlyVCF=False):\n+\t\t# initialize basic variables\n+\t\tself.onlyVCF = onlyVCF\n+\t\tself.init_basicVars(xOffset, sequence, ploidy, windowOverlap, readLen)\n+\t\t# initialize mutation models\n+\t\tself.init_mutModels(mutationModels, mutRate)\n+\t\t# sample the number of variants that will be inserted into each ploid\n+\t\tself.init_poisson()\n+\t\tself.indelsToAdd = [n.sample() for n in self.ind_pois]\n+\t\tself.snpsToAdd   = [n.sample() for n in self.snp_pois]\n+\t\t# initialize trinuc snp bias\n+\t\tself.init_trinucBias()\n+\n+\tdef init_basicVars(self, xOffset, sequence, ploidy, windowOverlap, readLen):\n+\t\tself.x         = xOffset\n+\t\tself.ploidy    = ploidy\n+\t\tself.readLen   = readLen\n+\t\tself.sequences = [bytearray(sequence) for n in xrange(self.ploidy)]\n+\t\tself.seqLen    = len(sequence)\n+\t\tself.indelList = [[] for n in xrange(self.ploidy)]\n+\t\tself.snpList   = [[] for n in xrange(self.ploidy)]\n+\t\tself.allCigar  = [[] for n in xrange(self.ploidy)]\n+\t\tself.FM_pos    = [[] for n in xrange(self.ploidy)]\n+\t\tself.FM_span   = [[] for n in xrange(self.ploidy)]\n+\t\tself.adj       = [None for n in xrange(self.ploidy)]\n+\t\t# blackList[ploid][pos] = 0\t\tsafe to insert variant here\n+\t\t# blackList[ploid][pos] = 1\t\tindel inserted here\n+\t\t# blackList[ploid][pos] = 2\t\tsnp inserted here\n+\t\t# blackList[ploid][pos] = 3\t\tinvalid position for various processing reasons\n+\t\tself.blackList = [np.zeros(self.seqLen,dtype='<i4') for n in xrange(self.ploidy)]\n+\n+\t\t# disallow mutations to occur on window overlap points\n+\t\tself.winBuffer = windowOverlap\n+\t\tfor p in xrange(self.ploidy):\n+\t\t\tself.blackList[p][-self.winBuffer]   = 3\n+\t\t\tself.blackList[p][-self.winBuffer-1] = 3\n+\n+\tdef init_coverage(self,coverageDat,fragDist=None):\n+\t\t# if we're only creating a vcf, skip some expensive initialization related to coverage depth\n+\t\tif not self.onlyVCF:\n+\t\t\t(self.windowSize, gc_scalars, targetCov_vals) = coverageDat\n+\t\t\tgcCov_vals = [[] for n in self.sequences]\n+\t\t\ttrCov_vals = [[] for n in self.sequences]\n+\t\t\tself.coverage_distribution = []\n+\t\t\tavg_out = []\n+\t\t\tfor i in xrange(len(self.sequences)):\n+\t\t\t\t# compute gc-bias\n+\t\t\t\tj = 0\n+\t\t\t\twhile j+self.windowSize < len(self.sequences[i]):\n+\t\t\t\t\tgc_c = self.sequences[i][j:j+self.windowSize].count('G') + self.sequences[i][j:j+self.windowSize].count('C')\n+\t\t\t\t\tgcCov_vals[i].extend([gc_scalars[gc_c]]*self.windowSize)\n+\t\t\t\t\tj += self.windowSize\n+\t\t\t\tgc_c = self.sequences[i][-self.windowSize:].count('G') + self.sequences[i][-self.windowSize:].count('C')\n+\t\t\t\tgcCov_vals[i].extend([gc_scalars[gc_c]]*(len(self.sequences[i])-len(gcCov_vals[i])))\n+\t\t\t\t#\n+\t\t\t\ttrCov_vals[i].append(targetCov_vals[0])\n+\t\t\t\tprevVal = self.FM_pos[i][0]\n+\t\t\t\tfor j in xrange(1,len(self.sequences[i])-self.readLen):\n+\t\t\t\t\tif self.FM_pos[i][j] == None:\n+\t\t\t\t\t\ttrCov_vals[i].append(targetCov_vals[prevVal])\n+\t\t\t\t\telse:\n+\t\t\t\t\t\ttrCov_vals[i].append(sum(target"..b":] == '.trinuc']\n+\t\tlisting  = sorted(listing1) + sorted(listing2)\n+\t\tfor l in listing:\n+\t\t\tf = open(prefix+l,'r')\n+\t\t\tfr = [n.split('\\t') for n in f.read().split('\\n')]\n+\t\t\tf.close()\n+\n+\t\t\tif '_overall.prob' in l:\n+\t\t\t\tmyIns = None\n+\t\t\t\tmyDel = None\n+\t\t\t\tfor dat in fr[1:]:\n+\t\t\t\t\tif len(dat) == 2:\n+\t\t\t\t\t\tif dat[0] == 'insertion':\n+\t\t\t\t\t\t\tmyIns = float(dat[1])\n+\t\t\t\t\t\telif dat[0] == 'deletion':\n+\t\t\t\t\t\t\tmyDel = float(dat[1])\n+\t\t\t\tif myIns != None and myDel != None:\n+\t\t\t\t\toutModel[2] = myIns + myDel\n+\t\t\t\t\toutModel[3] = myIns / (myIns + myDel)\n+\t\t\t\t\tprint '-',l\n+\n+\t\t\tif '_insLength.prob' in l:\n+\t\t\t\tinsVals = {}\n+\t\t\t\tfor dat in fr[1:]:\n+\t\t\t\t\tif len(dat) == 2:\n+\t\t\t\t\t\tinsVals[int(dat[0])] = float(dat[1])\n+\t\t\t\tif len(insVals):\n+\t\t\t\t\toutModel[4] = sorted(insVals.keys())\n+\t\t\t\t\toutModel[5] = [insVals[n] for n in outModel[4]]\n+\t\t\t\t\tprint '-',l\n+\n+\t\t\tif '_delLength.prob' in l:\n+\t\t\t\tdelVals = {}\n+\t\t\t\tfor dat in fr[1:]:\n+\t\t\t\t\tif len(dat) == 2:\n+\t\t\t\t\t\tdelVals[int(dat[0])] = float(dat[1])\n+\t\t\t\tif len(delVals):\n+\t\t\t\t\toutModel[6] = sorted(delVals.keys())\n+\t\t\t\t\toutModel[7] = [delVals[n] for n in outModel[6]]\n+\t\t\t\t\tprint '-',l\n+\n+\t\t\tif '.trinuc' == l[-7:]:\n+\t\t\t\tcontext_ind = TRI_IND[l[-10]+l[-8]]\n+\t\t\t\tp_matrix    = [[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]]\n+\t\t\t\tfor i in xrange(len(p_matrix)):\n+\t\t\t\t\tfor j in xrange(len(fr[i])):\n+\t\t\t\t\t\tp_matrix[i][j] = float(fr[i][j])\n+\t\t\t\tanyNone = False\n+\t\t\t\tfor i in xrange(len(p_matrix)):\n+\t\t\t\t\tfor j in xrange(len(p_matrix[i])):\n+\t\t\t\t\t\tif p_matrix[i][j] == -1:\n+\t\t\t\t\t\t\tanyNone = True\n+\t\t\t\tif not anyNone:\n+\t\t\t\t\toutModel[8][context_ind] = copy.deepcopy(p_matrix)\n+\t\t\t\t\tprint '-',l\n+\n+\treturn outModel\n+\n+######################\n+#\tDEFAULT VALUES   #\n+######################\n+\n+DEFAULT_1_OVERALL_MUT_RATE   = 0.001\n+DEFAULT_1_HOMOZYGOUS_FREQ    = 0.010\n+DEFAULT_1_INDEL_FRACTION     = 0.05\n+DEFAULT_1_INS_VS_DEL         = 0.6\n+DEFAULT_1_INS_LENGTH_VALUES  = [1,2,3,4,5,6,7,8,9,10]\n+DEFAULT_1_INS_LENGTH_WEIGHTS = [0.4, 0.2, 0.1, 0.05, 0.05, 0.05, 0.05, 0.034, 0.033, 0.033]\n+DEFAULT_1_DEL_LENGTH_VALUES  = [1,2,3,4,5]\n+DEFAULT_1_DEL_LENGTH_WEIGHTS = [0.3,0.2,0.2,0.2,0.1]\n+example_matrix_1             = [[0.0, 0.15, 0.7, 0.15],\n+\t\t\t\t\t\t        [0.15, 0.0, 0.15, 0.7],\n+\t\t\t\t\t\t        [0.7, 0.15, 0.0, 0.15],\n+\t\t\t\t\t\t        [0.15, 0.7, 0.15, 0.0]]\n+DEFAULT_1_TRI_FREQS          = [copy.deepcopy(example_matrix_1) for n in xrange(16)]\n+DEFAULT_1_TRINUC_BIAS        = [1./float(len(ALL_TRI)) for n in ALL_TRI]\n+DEFAULT_MODEL_1              = [DEFAULT_1_OVERALL_MUT_RATE,\n+\t\t\t\t\t\t\t    DEFAULT_1_HOMOZYGOUS_FREQ,\n+\t\t\t\t\t\t\t    DEFAULT_1_INDEL_FRACTION,\n+\t\t\t\t\t\t\t    DEFAULT_1_INS_VS_DEL,\n+\t\t\t\t\t\t\t    DEFAULT_1_INS_LENGTH_VALUES,\n+\t\t\t\t\t\t\t    DEFAULT_1_INS_LENGTH_WEIGHTS,\n+\t\t\t\t\t\t\t    DEFAULT_1_DEL_LENGTH_VALUES,\n+\t\t\t\t\t\t\t    DEFAULT_1_DEL_LENGTH_WEIGHTS,\n+\t\t\t\t\t\t\t    DEFAULT_1_TRI_FREQS,\n+\t\t\t\t\t\t\t    DEFAULT_1_TRINUC_BIAS]\n+\n+DEFAULT_2_OVERALL_MUT_RATE   = 0.002\n+DEFAULT_2_HOMOZYGOUS_FREQ    = 0.200\n+DEFAULT_2_INDEL_FRACTION     = 0.1\n+DEFAULT_2_INS_VS_DEL         = 0.3\n+DEFAULT_2_INS_LENGTH_VALUES  = [1,2,3,4,5,6,7,8,9,10]\n+DEFAULT_2_INS_LENGTH_WEIGHTS = [0.1, 0.1, 0.2, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05]\n+DEFAULT_2_DEL_LENGTH_VALUES  = [1,2,3,4,5]\n+DEFAULT_2_DEL_LENGTH_WEIGHTS = [0.3,0.2,0.2,0.2,0.1]\n+example_matrix_2             = [[0.0, 0.15, 0.7, 0.15],\n+\t\t\t\t\t\t        [0.15, 0.0, 0.15, 0.7],\n+\t\t\t\t\t\t        [0.7, 0.15, 0.0, 0.15],\n+\t\t\t\t\t\t        [0.15, 0.7, 0.15, 0.0]]\n+DEFAULT_2_TRI_FREQS          = [copy.deepcopy(example_matrix_2) for n in xrange(16)]\n+DEFAULT_2_TRINUC_BIAS        = [1./float(len(ALL_TRI)) for n in ALL_TRI]\n+DEFAULT_MODEL_2              = [DEFAULT_2_OVERALL_MUT_RATE,\n+\t\t\t\t\t\t\t    DEFAULT_2_HOMOZYGOUS_FREQ,\n+\t\t\t\t\t\t\t    DEFAULT_2_INDEL_FRACTION,\n+\t\t\t\t\t\t\t    DEFAULT_2_INS_VS_DEL,\n+\t\t\t\t\t\t\t    DEFAULT_2_INS_LENGTH_VALUES,\n+\t\t\t\t\t\t\t    DEFAULT_2_INS_LENGTH_WEIGHTS,\n+\t\t\t\t\t\t\t    DEFAULT_2_DEL_LENGTH_VALUES,\n+\t\t\t\t\t\t\t    DEFAULT_2_DEL_LENGTH_WEIGHTS,\n+\t\t\t\t\t\t\t    DEFAULT_2_TRI_FREQS,\n+\t\t\t\t\t\t\t    DEFAULT_2_TRINUC_BIAS]\n+\n+\n"
b
diff -r 000000000000 -r 6e75a84e9338 py/SequenceContainer.pyc
b
Binary file py/SequenceContainer.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/biopython_modified_bgzf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/biopython_modified_bgzf.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# Copyright 2010-2013 by Peter Cock.
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license.  Please see the LICENSE file that should have been included
+# as part of this package.
+
+""" ############################################################################
+#######                                                                  #######
+#######    06/02/2015:                                                   #######
+#######        - I picked out the bits and pieces of code needed         #######
+#######          to write BAM files, removed python 3.0 compatibility    #######
+#######                                                                  #######
+############################################################################ """
+
+import zlib
+import struct
+
+_bgzf_header = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00"
+_bgzf_eof    = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+
+class BgzfWriter(object):
+
+ def __init__(self, filename=None, mode="w", fileobj=None, compresslevel=6):
+ if fileobj:
+ assert filename is None
+ handle = fileobj
+ else:
+ if "w" not in mode.lower() \
+ and "a" not in mode.lower():
+ raise ValueError("Must use write or append mode, not %r" % mode)
+ if "a" in mode.lower():
+ handle = open(filename, "ab")
+ else:
+ handle = open(filename, "wb")
+ self._text = "b" not in mode.lower()
+ self._handle = handle
+ self._buffer = b""
+ self.compresslevel = compresslevel
+
+ def _write_block(self, block):
+ start_offset = self._handle.tell()
+ assert len(block) <= 65536
+ # Giving a negative window bits means no gzip/zlib headers, -15 used in samtools
+ c = zlib.compressobj(self.compresslevel,
+  zlib.DEFLATED,
+  -15,
+  zlib.DEF_MEM_LEVEL,
+  0)
+ compressed = c.compress(block) + c.flush()
+ del c
+ assert len(compressed) < 65536, "TODO - Didn't compress enough, try less data in this block"
+ crc = zlib.crc32(block)
+ # Should cope with a mix of Python platforms...
+ if crc < 0:
+ crc = struct.pack("<i", crc)
+ else:
+ crc = struct.pack("<I", crc)
+ bsize = struct.pack("<H", len(compressed) + 25)  # includes -1
+ crc = struct.pack("<I", zlib.crc32(block) & 0xffffffff)
+ uncompressed_length = struct.pack("<I", len(block))
+ data = _bgzf_header + bsize + compressed + crc + uncompressed_length
+ self._handle.write(data)
+
+ def write(self, data):
+ data_len = len(data)
+ if len(self._buffer) + data_len < 65536:
+ self._buffer += data
+ return
+ else:
+ self._buffer += data
+ while len(self._buffer) >= 65536:
+ self._write_block(self._buffer[:65536])
+ self._buffer = self._buffer[65536:]
+
+ def flush(self):
+ while len(self._buffer) >= 65536:
+ self._write_block(self._buffer[:65535])
+ self._buffer = self._buffer[65535:]
+ self._write_block(self._buffer)
+ self._buffer = b""
+ self._handle.flush()
+
+ def close(self):
+ """Flush data, write 28 bytes empty BGZF EOF marker, and close the BGZF file."""
+ if self._buffer:
+ self.flush()
+ # samtools will look for a magic EOF marker, just a 28 byte empty BGZF block,
+ # and if it is missing warns the BAM file may be truncated. In addition to
+ # samtools writing this block, so too does bgzip - so we should too.
+ self._handle.write(_bgzf_eof)
+ self._handle.flush()
+ self._handle.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ self.close()
+
+
+if __name__ == "__main__":
+ pass
b
diff -r 000000000000 -r 6e75a84e9338 py/biopython_modified_bgzf.pyc
b
Binary file py/biopython_modified_bgzf.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/cigar.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/cigar.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,108 @@
+import re
+
+class CigarString:
+ def __init__(self, stringIn=None, listIn=None):
+
+ if stringIn == None and listIn == None:
+ print '\nError: CigarString object not initialized.\n'
+ exit(1)
+
+ self.cigarData = []
+
+ if stringIn != None:
+ self.joinCigar(j_stringIn=stringIn)
+
+ if listIn != None:
+ self.joinCigar(j_listIn=listIn)
+
+
+ def stringToList(self, s):
+
+ cigarDat = []
+ letters = re.split(r"\d+",s)[1:]
+ numbers = [int(n) for n in re.findall(r"\d+",s)]
+ dReserve = 0
+ for i in xrange(len(letters)):
+ if letters[i] == 'D':
+ dReserve = numbers[i]
+ if letters[i] == 'M' or letters[i] == 'I':
+ if dReserve:
+ cigarDat += ['D'*dReserve+letters[i]] + [letters[i]]*(int(numbers[i])-1)
+ else:
+ cigarDat += [letters[i]]*int(numbers[i])
+ dReserve = 0
+ return cigarDat
+
+
+ def listToString(self, l):
+
+ symbols      = ''
+ currentSym   = l[0]
+ currentCount = 1
+ if 'D' in currentSym:
+ currentSym   = currentSym[-1]
+ for k in xrange(1,len(l)):
+ nextSym = l[k]
+ if len(nextSym) == 1 and nextSym == currentSym:
+ currentCount += 1
+ else:
+ symbols += str(currentCount) + currentSym
+ if 'D' in nextSym:
+ symbols += str(nextSym.count('D')) + 'D'
+ currentSym   = nextSym[-1]
+ else:
+ currentSym   = nextSym
+ currentCount = 1
+ symbols += str(currentCount) + currentSym
+ return symbols
+
+ def getList(self):
+
+ return self.cigarData
+
+
+ def getString(self):
+
+ return self.listToString(self.cigarData)
+
+
+ def joinCigar(self, j_stringIn=None, j_listIn=None):
+
+ if j_stringIn == None and j_listIn == None:
+ print '\nError: Invalid join operation in CigarString\n'
+ exit(1)
+
+ if j_stringIn != None:
+ self.cigarData += self.stringToList(j_stringIn)
+
+ if j_listIn != None:
+ self.cigarData += j_listIn
+
+
+ def insertCigarElement(self, pos, i_stringIn=None, i_listIn=None):
+
+ if i_stringIn == None and i_listIn == None:
+ print '\nError: Invalid insertion operation in CigarString\n'
+ exit(1)
+
+ if pos < 0 or pos >= len(self.cigarData):
+ print '\nError: Invalid insertion position in CigarString\n'
+ exit(1)
+
+ if i_stringIn != None:
+ self.cigarData = self.cigarData[:pos] + self.stringToList(i_stringIn) + self.cigarData[pos:]
+
+ if i_listIn != None:
+ self.cigarData = self.cigarData[:pos] + i_listIn + self.cigarData[pos:]
+
+
+if __name__ == '__main__':
+ print 'testing CigarString class...'
+
+ str1 = '50M10D7I23M'
+ str2 = '10I25M'
+ iPos = 20
+ myCigar  = CigarString(stringIn=str1)
+ myCigar.insertCigarElement(iPos,i_stringIn=str2)
+ print str1,'+',str2,'[inserted at position',str(iPos)+']','=',myCigar.getString()
+
b
diff -r 000000000000 -r 6e75a84e9338 py/cigar.pyc
b
Binary file py/cigar.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/inputChecking.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/inputChecking.py Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,29 @@
+import os
+import sys
+
+def requiredField(s,errString):
+ if s == None:
+ print '\n'+errString+'\n'
+ exit(1)
+
+def checkFileOpen(fn,errString,required=False):
+ if required or fn != None:
+ if fn == None:
+ print '\n'+errString+'\n'
+ exit(1)
+ else:
+ try:
+ open(fn,'r')
+ except:
+ print '\n'+errString+'\n'
+ exit(1)
+
+def checkDir(dir,errString):
+ if not os.path.isdir(dir):
+ print '\n'+errString+'\n'
+ exit(1)
+
+def isInRange(val,lb,ub,errString):
+ if val < lb or val > ub:
+ print '\n'+errString+'\n'
+ exit(1)
b
diff -r 000000000000 -r 6e75a84e9338 py/inputChecking.pyc
b
Binary file py/inputChecking.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/probability.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/probability.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,146 @@
+import math
+import random
+import bisect
+import copy
+import numpy as np
+
+LOW_PROB_THRESH = 1e-12
+
+def mean_ind_of_weighted_list(l):
+ myMid = sum(l)/2.0
+ mySum = 0.0
+ for i in xrange(len(l)):
+ mySum += l[i]
+ if mySum >= myMid:
+ return i
+
+class DiscreteDistribution:
+ def __init__(self, weights, values, degenerateVal=None, method='bisect'):
+
+ # some sanity checking
+ if not len(weights) or not len(values):
+ print '\nError: weight or value vector given to DiscreteDistribution() are 0-length.\n'
+ asdf = intentional_crash[0]
+ exit(1)
+
+ self.method  = method
+ sumWeight    = float(sum(weights))
+
+ # if probability of all input events is 0, consider it degenerate and always return the first value
+ if sumWeight < LOW_PROB_THRESH:
+ self.degenerate = values[0]
+ else:
+ self.weights = [n/sumWeight for n in weights]
+ self.values  = copy.deepcopy(values)
+ if len(self.values) != len(self.weights):
+ print '\nError: length and weights and values vectors must be the same.\n'
+ exit(1)
+ self.degenerate = degenerateVal
+ # prune values with probability too low to be worth using [DOESN'T REALLY IMPROVE PERFORMANCE]
+ ####if self.degenerate != None:
+ #### for i in xrange(len(self.weights)-1,-1,-1):
+ #### if self.weights[i] < LOW_PROB_THRESH:
+ #### del self.weights[i]
+ #### del self.values[i]
+ #### if len(self.weights) == 0:
+ #### print '\nError: probability distribution has no usable values.\n'
+ #### exit(1)
+
+ if self.method == 'alias':
+ K       = len(self.weights)
+ q       = np.zeros(K)
+ J       = np.zeros(K, dtype=np.int)
+ smaller = []
+ larger  = []
+ for kk, prob in enumerate(self.weights):
+ q[kk] = K*prob
+ if q[kk] < 1.0:
+ smaller.append(kk)
+ else:
+ larger.append(kk)
+ while len(smaller) > 0 and len(larger) > 0:
+ small = smaller.pop()
+ large = larger.pop()
+ J[small] = large
+ q[large] = (q[large] + q[small]) - 1.0
+ if q[large] < 1.0:
+ smaller.append(large)
+ else:
+ larger.append(large)
+
+ self.a1 = len(J)-1
+ self.a2 = J.tolist()
+ self.a3 = q.tolist()
+
+ elif self.method == 'bisect':
+ self.cumP = np.cumsum(self.weights).tolist()[:-1]
+ self.cumP.insert(0,0.)
+
+ def __str__(self):
+ return str(self.weights)+' '+str(self.values)+' '+self.method
+
+ def sample(self):
+
+ if self.degenerate != None:
+ return self.degenerate
+
+ else:
+
+ if self.method == 'alias':
+ r1 = random.randint(0,self.a1)
+ r2 = random.random()
+ if r2 < self.a3[r1]:
+ return self.values[r1]
+ else:
+ return self.values[self.a2[r1]]
+
+ elif self.method == 'bisect':
+ r = random.random()
+ return self.values[bisect.bisect(self.cumP,r)-1]
+
+
+# takes k_range, lambda, [0,1,2,..], returns a DiscreteDistribution object with the corresponding to a poisson distribution
+MIN_WEIGHT = 1e-12
+def poisson_list(k_range,l):
+ if l < MIN_WEIGHT:
+ return DiscreteDistribution([1],[0],degenerateVal=0)
+ logFactorial_list = [0.0]
+ for k in k_range[1:]:
+ logFactorial_list.append(np.log(float(k))+logFactorial_list[k-1])
+ w_range = [np.exp(k*np.log(l) - l - logFactorial_list[k]) for k in k_range]
+ w_range = [n for n in w_range if n >= MIN_WEIGHT]
+ if len(w_range) <= 1:
+ return DiscreteDistribution([1],[0],degenerateVal=0)
+ return DiscreteDistribution(w_range,k_range[:len(w_range)])
+
+# quantize a list of values into blocks
+MIN_PROB = 1e-12
+QUANT_BLOCKS = 10
+def quantize_list(l):
+ suml = float(sum(l))
+ ls = sorted([n for n in l if n >= MIN_PROB*suml])
+ if len(ls) == 0:
+ return None
+ qi = []
+ for i in xrange(QUANT_BLOCKS):
+ #qi.append(ls[int((i)*(len(ls)/float(QUANT_BLOCKS)))])
+ qi.append(ls[0]+(i/float(QUANT_BLOCKS))*(ls[-1]-ls[0]))
+ qi.append(1e12)
+ runningList = []
+ prevBi = None
+ previ  = None
+ for i in xrange(len(l)):
+ if l[i] >= MIN_PROB*suml:
+ bi = bisect.bisect(qi,l[i])
+ #print i, l[i], qi[bi-1]
+ if prevBi != None:
+ if bi == prevBi and previ == i-1:
+ runningList[-1][1] += 1
+ else:
+ runningList.append([i,i,qi[bi-1]])
+ else:
+ runningList.append([i,i,qi[bi-1]])
+ prevBi = bi
+ previ  = i
+ return runningList
+
b
diff -r 000000000000 -r 6e75a84e9338 py/probability.pyc
b
Binary file py/probability.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/refFunc.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/refFunc.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,210 @@
+import sys
+import time
+import os
+import random
+
+OK_CHR_ORD   = {ord('A'):True,ord('C'):True,ord('G'):True,ord('T'):True,ord('U'):True}
+ALLOWED_NUCL = ['A','C','G','T']
+
+#
+# Index reference fasta
+#
+def indexRef(refPath):
+
+ tt = time.time()
+
+ fn = None
+ if os.path.isfile(refPath+'i'):
+ print 'found index '+refPath+'i'
+ fn = refPath+'i'
+ if os.path.isfile(refPath+'.fai'):
+ print 'found index '+refPath+'.fai'
+ fn = refPath+'.fai'
+
+ ref_inds = []
+ if fn != None:
+ fai = open(fn,'r')
+ for line in fai:
+ splt = line[:-1].split('\t')
+ seqLen = int(splt[1])
+ offset = int(splt[2])
+ lineLn = int(splt[3])
+ nLines = seqLen/lineLn
+ if seqLen%lineLn != 0:
+ nLines += 1
+ ref_inds.append((splt[0],offset,offset+seqLen+nLines,seqLen))
+ fai.close()
+ return ref_inds
+
+ sys.stdout.write('index not found, creating one... ')
+ sys.stdout.flush()
+ refFile = open(refPath,'r')
+ prevR   = None
+ prevP   = None
+ seqLen  = 0
+ while 1:
+ data = refFile.readline()
+ if not data:
+ ref_inds.append( (prevR, prevP, refFile.tell()-len(data), seqLen) )
+ break
+ if data[0] == '>':
+ if prevP != None:
+ ref_inds.append( (prevR, prevP, refFile.tell()-len(data), seqLen) )
+ seqLen = 0
+ prevP  = refFile.tell()
+ prevR  = data[1:-1]
+ else:
+ seqLen += len(data)-1
+ refFile.close()
+
+ print '{0:.3f} (sec)'.format(time.time()-tt)
+ return ref_inds
+
+
+#
+# Read in sequence data from reference fasta
+#
+# N_unknowns  = True --> all ambiguous characters will be treated as Ns
+# N_handling  = (mode,params)
+# - ('random',read/frag len)      --> all regions of Ns smaller than read or fragment
+#                                           length (whichever is bigger) will be replaced
+#                                           with uniformly random nucleotides
+# - ('allChr',read/frag len, chr) --> same as above, but replaced instead with a string
+#                                           of 'chr's
+# - ('ignore')                    --> do not alter nucleotides in N regions
+#
+def readRef(refPath,ref_inds_i,N_handling,N_unknowns=True,quiet=False):
+
+ tt = time.time()
+ if not quiet:
+ sys.stdout.write('reading '+ref_inds_i[0]+'... ')
+ sys.stdout.flush()
+
+ refFile = open(refPath,'r')
+ refFile.seek(ref_inds_i[1])
+ myDat = ''.join(refFile.read(ref_inds_i[2]-ref_inds_i[1]).split('\n'))
+ myDat = bytearray(myDat.upper())
+
+ # find N regions
+ # data explanation: myDat[N_atlas[0][0]:N_atlas[0][1]] = solid block of Ns
+ prevNI = 0
+ nCount = 0
+ N_atlas = []
+ for i in xrange(len(myDat)):
+ if myDat[i] == ord('N') or (N_unknowns and myDat[i] not in OK_CHR_ORD):
+ if nCount == 0:
+ prevNI = i
+ nCount += 1
+ if i == len(myDat)-1:
+ N_atlas.append((prevNI,prevNI+nCount))
+ else:
+ if nCount > 0:
+ N_atlas.append((prevNI,prevNI+nCount))
+ nCount = 0
+
+ # handle N base-calls as desired
+ N_info = {}
+ N_info['all']   = []
+ N_info['big']   = []
+ N_info['non_N'] = []
+ if N_handling[0] == 'random':
+ for region in N_atlas:
+ N_info['all'].extend(region)
+ if region[1]-region[0] <= N_handling[1]:
+ for i in xrange(region[0],region[1]):
+ myDat[i] = random.choice(ALLOWED_NUCL)
+ else:
+ N_info['big'].extend(region)
+ elif N_handling[0] == 'allChr' and N_handling[2] in OK_CHR_ORD:
+ for region in N_atlas:
+ N_info['all'].extend(region)
+ if region[1]-region[0] <= N_handling[1]:
+ for i in xrange(region[0],region[1]):
+ myDat[i] = N_handling[2]
+ else:
+ N_info['big'].extend(region)
+ elif N_handling[0] == 'ignore':
+ for region in N_atlas:
+ N_info['all'].extend(region)
+ N_info['big'].extend(region)
+ else:
+ print '\nERROR: UNKNOWN N_HANDLING MODE\n'
+ exit(1)
+
+ habitableRegions = []
+ if N_info['big'] == []:
+ N_info['non_N'] = [(0,len(myDat))]
+ else:
+ for i in xrange(0,len(N_info['big']),2):
+ if i == 0:
+ habitableRegions.append((0,N_info['big'][0]))
+ else:
+ habitableRegions.append((N_info['big'][i-1],N_info['big'][i]))
+ habitableRegions.append((N_info['big'][-1],len(myDat)))
+ for n in habitableRegions:
+ if n[0] != n[1]:
+ N_info['non_N'].append(n)
+
+ if not quiet:
+ print '{0:.3f} (sec)'.format(time.time()-tt)
+ return (myDat,N_info)
+
+#
+# find all non-N regions in reference sequence ahead of time, for computing jobs in parallel
+#
+def getAllRefRegions(refPath,ref_inds,N_handling,saveOutput=False):
+ outRegions = {}
+ fn = refPath+'.nnr'
+ if os.path.isfile(fn) and not(saveOutput):
+ print 'found list of preidentified non-N regions...'
+ f = open(fn,'r')
+ for line in f:
+ splt = line.strip().split('\t')
+ if splt[0] not in outRegions:
+ outRegions[splt[0]] = []
+ outRegions[splt[0]].append((int(splt[1]),int(splt[2])))
+ f.close()
+ return outRegions
+ else:
+ print 'enumerating all non-N regions in reference sequence...'
+ for RI in xrange(len(ref_inds)):
+ (refSequence,N_regions) = readRef(refPath,ref_inds[RI],N_handling,quiet=True)
+ refName = ref_inds[RI][0]
+ outRegions[refName] = [n for n in N_regions['non_N']]
+ if saveOutput:
+ f = open(fn,'w')
+ for k in outRegions.keys():
+ for n in outRegions[k]:
+ f.write(k+'\t'+str(n[0])+'\t'+str(n[1])+'\n')
+ f.close()
+ return outRegions
+
+#
+# find which of the non-N regions are going to be used for this job
+#
+def partitionRefRegions(inRegions,ref_inds,myjob,njobs):
+
+ totSize = 0
+ for RI in xrange(len(ref_inds)):
+ refName = ref_inds[RI][0]
+ for region in inRegions[refName]:
+ totSize += region[1] - region[0]
+ sizePerJob = int(totSize/float(njobs)-0.5)
+
+ regionsPerJob = [[] for n in xrange(njobs)]
+ refsPerJob    = [{} for n in xrange(njobs)]
+ currentInd    = 0
+ currentCount  = 0
+ for RI in xrange(len(ref_inds)):
+ refName = ref_inds[RI][0]
+ for region in inRegions[refName]:
+ regionsPerJob[currentInd].append((refName,region[0],region[1]))
+ refsPerJob[currentInd][refName] = True
+ currentCount += region[1] - region[0]
+ if currentCount >= sizePerJob:
+ currentCount = 0
+ currentInd   = min([currentInd+1,njobs-1])
+
+ relevantRefs = refsPerJob[myjob-1].keys()
+ relevantRegs = regionsPerJob[myjob-1]
+ return (relevantRefs,relevantRegs)
b
diff -r 000000000000 -r 6e75a84e9338 py/refFunc.pyc
b
Binary file py/refFunc.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 py/vcfFunc.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/vcfFunc.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,182 @@
+import sys
+import time
+import os
+import re
+import random
+
+INCLUDE_HOMS = False
+INCLUDE_FAIL = False
+CHOOSE_RANDOM_PLOID_IF_NO_GT_FOUND = True
+
+def parseLine(splt,colDict,colSamp):
+
+ # check if we want to proceed..
+ ra = splt[colDict['REF']]
+ aa = splt[colDict['ALT']]
+ # enough columns?
+ if len(splt) != len(colDict):
+ return None
+ # exclude homs / filtered?
+ if not(INCLUDE_HOMS) and (aa == '.' or aa == '' or aa == ra):
+ return None
+ if not(INCLUDE_FAIL) and (splt[colDict['FILTER']] != 'PASS' and splt[colDict['FILTER']] != '.'):
+ return None
+
+ # default vals
+ alt_alleles = [aa]
+ alt_freqs   = []
+
+ gt_perSamp  = []
+
+ # any alt alleles?
+ alt_split = aa.split(',')
+ if len(alt_split) > 1:
+ alt_alleles = alt_split
+
+ # check INFO for AF
+ af = None
+ if 'INFO' in colDict and ';AF=' in ';'+splt[colDict['INFO']]:
+ info = splt[colDict['INFO']]+';'
+ af   = re.findall(r"AF=.*?(?=;)",info)[0][3:]
+ if af != None:
+ af_splt = af.split(',')
+ while(len(af_splt) < len(alt_alleles)): # are we lacking enough AF values for some reason?
+ af_splt.append(af_splt[-1]) # phone it in.
+ if len(af_splt) != 0 and af_splt[0] != '.' and af_splt[0] != '': # missing data, yay
+ alt_freqs = [float(n) for n in af_splt]
+ else:
+ alt_freqs = [None]*max([len(alt_alleles),1])
+
+ gt_perSamp = None
+ # if available (i.e. we simulated it) look for WP in info
+ if len(colSamp) == 0 and 'INFO' in colDict and 'WP=' in splt[colDict['INFO']]:
+ info       = splt[colDict['INFO']]+';'
+ gt_perSamp = [re.findall(r"WP=.*?(?=;)",info)[0][3:]]
+ else:
+ # if no sample columns, check info for GT
+ if len(colSamp) == 0 and 'INFO' in colDict and 'GT=' in splt[colDict['INFO']]:
+ info       = splt[colDict['INFO']]+';'
+ gt_perSamp = [re.findall(r"GT=.*?(?=;)",info)[0][3:]]
+ elif len(colSamp):
+ fmt = ':'+splt[colDict['FORMAT']]+':'
+ if ':GT:' in fmt:
+ gtInd = fmt.split(':').index('GT')
+ gt_perSamp = [splt[colSamp[iii]].split(':')[gtInd-1] for iii in xrange(len(colSamp))]
+ for i in xrange(len(gt_perSamp)):
+ gt_perSamp[i] = gt_perSamp[i].replace('.','0')
+ if gt_perSamp == None:
+ gt_perSamp = [None]*max([len(colSamp),1])
+
+ return (alt_alleles, alt_freqs, gt_perSamp)
+
+
+
+def parseVCF(vcfPath,tumorNormal=False,ploidy=2):
+
+ tt = time.time()
+ print '--------------------------------'
+ sys.stdout.write('reading input VCF...\n')
+ sys.stdout.flush()
+
+ colDict   = {}
+ colSamp   = []
+ nSkipped  = 0
+ nSkipped_becauseHash = 0
+ allVars   = {} # [ref][pos]
+ sampNames = []
+ alreadyPrintedWarning = False
+ f = open(vcfPath,'r')
+ for line in f:
+
+ if line[0] != '#':
+ if len(colDict) == 0:
+ print '\n\nERROR: VCF has no header?\n'+VCF_FILENAME+'\n\n'
+ f.close()
+ exit(1)
+ splt = line[:-1].split('\t')
+ plOut = parseLine(splt,colDict,colSamp)
+ if plOut == None:
+ nSkipped += 1
+ else:
+ (aa, af, gt) = plOut
+
+ # make sure at least one allele somewhere contains the variant
+ if tumorNormal:
+ gtEval = gt[:2]
+ else:
+ gtEval = gt[:1]
+ if None in gtEval:
+ if CHOOSE_RANDOM_PLOID_IF_NO_GT_FOUND:
+ if not alreadyPrintedWarning:
+ print 'Warning: Found variants without a GT field, assuming heterozygous...'
+ alreadyPrintedWarning = True
+ for i in xrange(len(gtEval)):
+ tmp = ['0']*ploidy
+ tmp[random.randint(0,ploidy-1)] = '1'
+ gtEval[i] = '/'.join(tmp)
+ else:
+ # skip because no GT field was found
+ nSkipped += 1
+ continue
+ isNonReference = False
+ for gtVal in gtEval:
+ if gtVal != None:
+ if '1' in gtVal:
+ isNonReference = True
+ if not isNonReference:
+ # skip if no genotype actually contains this variant
+ nSkipped += 1
+ continue
+
+ chrom = splt[0]
+ pos   = int(splt[1])
+ ref   = splt[3]
+ # skip if position is <= 0
+ if pos <= 0:
+ nSkipped += 1
+ continue
+
+ # hash variants to avoid inserting duplicates (there are some messy VCFs out there...)
+ if chrom not in allVars:
+ allVars[chrom] = {}
+ if pos not in allVars[chrom]:
+ allVars[chrom][pos] = (pos,ref,aa,af,gtEval)
+ else:
+ nSkipped_becauseHash += 1
+
+ else:
+ if line[1] != '#':
+ cols = line[1:-1].split('\t')
+ for i in xrange(len(cols)):
+ if 'FORMAT' in colDict:
+ colSamp.append(i)
+ colDict[cols[i]] = i
+ if len(colSamp):
+ sampNames = cols[-len(colSamp):]
+ if len(colSamp) == 1:
+ pass
+ elif len(colSamp) == 2 and tumorNormal:
+ print 'Detected 2 sample columns in input VCF, assuming tumor/normal.'
+ else:
+ print 'Warning: Multiple sample columns present in input VCF. By default genReads uses only the first column.'
+ else:
+ sampNames = ['Unknown']
+ if tumorNormal:
+ #tumorInd  = sampNames.index('TUMOR')
+ #normalInd = sampNames.index('NORMAL')
+ if 'NORMAL' not in sampNames or 'TUMOR' not in sampNames:
+ print '\n\nERROR: Input VCF must have a "NORMAL" and "TUMOR" column.\n'
+ f.close()
+
+ varsOut = {}
+ for r in allVars.keys():
+ varsOut[r] = [allVars[r][k] for k in sorted(allVars[r].keys())]
+
+ print 'found',sum([len(n) for n in allVars.values()]),'valid variants in input vcf.'
+ print ' *',nSkipped,'variants skipped: (qual filtered / ref genotypes / invalid syntax)'
+ print ' *',nSkipped_becauseHash,'variants skipped due to multiple variants found per position'
+ print '--------------------------------'
+ return (sampNames, varsOut)
+
+
+
b
diff -r 000000000000 -r 6e75a84e9338 py/vcfFunc.pyc
b
Binary file py/vcfFunc.pyc has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-100reads_read1_genSeqErrorModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-100reads_read1_genSeqErrorModel.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,857175 @@\n+(lp1\n+(lp2\n+(lp3\n+cnumpy.core.multiarray\n+scalar\n+p4\n+(cnumpy\n+dtype\n+p5\n+(S'f8'\n+I0\n+I1\n+tRp6\n+(I3\n+S'<'\n+NNNI-1\n+I-1\n+I0\n+tbS'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp7\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp8\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp9\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp10\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp11\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp12\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp13\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp14\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp15\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp16\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp17\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp18\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp19\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp20\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp21\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp22\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp23\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp24\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp25\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp26\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp27\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp28\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp29\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp30\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp31\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp32\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp33\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp34\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp35\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp36\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp37\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp38\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp39\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp40\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp41\n+ag4\n+(g6\n+S'\\xb8\\x1e\\x85\\xebQ\\xb8\\x9e?'\n+tRp42\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp43\n+ag4\n+(g6\n+S'q=\\n\\xd7\\xa3p\\xcd?'\n+tRp44\n+ag4\n+(g6\n+S'=\\n\\xd7\\xa3p=\\xe2?'\n+tRp45\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp46\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp47\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp48\n+aa(lp49\n+g4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp50\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp51\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp52\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp53\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp54\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp55\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp56\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp57\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp58\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp59\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp60\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp61\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp62\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp63\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp64\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp65\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp66\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp67\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp68\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp69\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp70\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp71\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp72\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp73\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp74\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp75\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp76\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp77\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp78\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp79\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp80\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x94?'\n+tRp81\n+ag4\n+(g6\n+S'{\\x14\\xaeG\\xe1z\\x84?'\n+tRp82\n+ag"..b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171092\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171093\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171094\n+aa(lp171095\n+g4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171096\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171097\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171098\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171099\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171100\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171101\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171102\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171103\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171104\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171105\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171106\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171107\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171108\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171109\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171110\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171111\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171112\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171113\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171114\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171115\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171116\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171117\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171118\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171119\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171120\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171121\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171122\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171123\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171124\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171125\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171126\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171127\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xa9?'\n+tRp171128\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171129\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171130\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xb9?'\n+tRp171131\n+ag4\n+(g6\n+S'\\x9a\\x99\\x99\\x99\\x99\\x99\\xc9?'\n+tRp171132\n+ag4\n+(g6\n+S'333333\\xc3?'\n+tRp171133\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\xd0?'\n+tRp171134\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171135\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171136\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp171137\n+aa(lp171138\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp171139\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp171140\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aaaa(lp171141\n+I0\n+aI1\n+aI2\n+aI3\n+aI4\n+aI5\n+aI6\n+aI7\n+aI8\n+aI9\n+aI10\n+aI11\n+aI12\n+aI13\n+aI14\n+aI15\n+aI16\n+aI17\n+aI18\n+aI19\n+aI20\n+aI21\n+aI22\n+aI23\n+aI24\n+aI25\n+aI26\n+aI27\n+aI28\n+aI29\n+aI30\n+aI31\n+aI32\n+aI33\n+aI34\n+aI35\n+aI36\n+aI37\n+aI38\n+aI39\n+aI40\n+aI41\n+aaI33\n+aF0.0071828128863052863\n+a(lp171142\n+(lp171143\n+(lp171144\n+F0\n+aF0.49180000000000001\n+aF0.3377\n+aF0.17050000000000001\n+aa(lp171145\n+F0.52380000000000004\n+aF0\n+aF0.2661\n+aF0.21010000000000001\n+aa(lp171146\n+F0.37540000000000001\n+aF0.23549999999999999\n+aF0\n+aF0.38900000000000001\n+aa(lp171147\n+F0.2505\n+aF0.25519999999999998\n+aF0.49419999999999997\n+aF0\n+aaaF0.01\n+a(lp171148\n+F0.999\n+aF0.001\n+aa(lp171149\n+I1\n+aI2\n+aaF0.40000000000000002\n+a(lp171150\n+F0.25\n+aF0.25\n+aF0.25\n+aF0.25\n+aaa.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,1270 @@\n+(dp0\n+S'SNP_TRANS_FREQ'\n+p1\n+(dp2\n+(S'A'\n+p3\n+S'G'\n+p4\n+tp5\n+F1.0\n+s(S'C'\n+p6\n+S'T'\n+p7\n+tp8\n+F1.0\n+s(g7\n+g6\n+tp9\n+F1.0\n+s(g4\n+g3\n+tp10\n+F1.0\n+ssS'TRINUC_MUT_PROB'\n+p11\n+(dp12\n+S'ACC'\n+p13\n+F0.003883495145631068\n+sS'ATG'\n+p14\n+F0.0\n+sS'AAG'\n+p15\n+F0.0\n+sS'AAA'\n+p16\n+F0.0\n+sS'ATC'\n+p17\n+F0.0\n+sS'AAC'\n+p18\n+F0.00202020202020202\n+sS'ATA'\n+p19\n+F0.0027247956403269754\n+sS'AGG'\n+p20\n+F0.005747126436781609\n+sS'CCT'\n+p21\n+F0.0036900369003690036\n+sS'CTC'\n+p22\n+F0.0\n+sS'AGC'\n+p23\n+F0.0035460992907801418\n+sS'ACA'\n+p24\n+F0.0\n+sS'AGA'\n+p25\n+F0.0\n+sS'CAT'\n+p26\n+F0.002403846153846154\n+sS'AAT'\n+p27\n+F0.0\n+sS'ATT'\n+p28\n+F0.0\n+sS'CTG'\n+p29\n+F0.005555555555555556\n+sS'CTA'\n+p30\n+F0.0\n+sS'ACT'\n+p31\n+F0.0\n+sS'CAC'\n+p32\n+F0.0\n+sS'ACG'\n+p33\n+F0.0\n+sS'CAA'\n+p34\n+F0.0\n+sS'AGT'\n+p35\n+F0.0\n+sS'CCA'\n+p36\n+F0.0\n+sS'CCG'\n+p37\n+F0.0\n+sS'CCC'\n+p38\n+F0.0\n+sS'TAT'\n+p39\n+F0.0\n+sS'GGT'\n+p40\n+F0.0\n+sS'TGT'\n+p41\n+F0.0\n+sS'CGA'\n+p42\n+F0.0\n+sS'CAG'\n+p43\n+F0.005025125628140704\n+sS'CGC'\n+p44\n+F0.0\n+sS'GAT'\n+p45\n+F0.0\n+sS'CGG'\n+p46\n+F0.0\n+sS'CTT'\n+p47\n+F0.0\n+sS'TGC'\n+p48\n+F0.0\n+sS'GGG'\n+p49\n+F0.0\n+sS'TAG'\n+p50\n+F0.0\n+sS'GGA'\n+p51\n+F0.0\n+sS'TAA'\n+p52\n+F0.0024154589371980675\n+sS'GGC'\n+p53\n+F0.0\n+sS'TAC'\n+p54\n+F0.0\n+sS'GAG'\n+p55\n+F0.0\n+sS'TCG'\n+p56\n+F0.0\n+sS'TTA'\n+p57\n+F0.0\n+sS'TTT'\n+p58\n+F0.0\n+sS'GAC'\n+p59\n+F0.0\n+sS'CGT'\n+p60\n+F0.0\n+sS'GAA'\n+p61\n+F0.0\n+sS'TCA'\n+p62\n+F0.0024096385542168677\n+sS'GCA'\n+p63\n+F0.0\n+sS'GTA'\n+p64\n+F0.0\n+sS'GCC'\n+p65\n+F0.0036900369003690036\n+sS'GTC'\n+p66\n+F0.0\n+sS'GCG'\n+p67\n+F0.0\n+sS'GTG'\n+p68\n+F0.0\n+sS'TTC'\n+p69\n+F0.0\n+sS'GTT'\n+p70\n+F0.0\n+sS'GCT'\n+p71\n+F0.0\n+sS'TGA'\n+p72\n+F0.0\n+sS'TTG'\n+p73\n+F0.0\n+sS'TCC'\n+p74\n+F0.0\n+sS'TGG'\n+p75\n+F0.0\n+sS'TCT'\n+p76\n+F0.0\n+ssS'AVG_MUT_RATE'\n+p77\n+F0.0009053597295992274\n+sS'INDEL_FREQ'\n+p78\n+(dp79\n+I1\n+F1.0000000000000002\n+ssS'SNP_FREQ'\n+p80\n+F0.9333333333333333\n+sS'TRINUC_TRANS_PROBS'\n+p81\n+(dp82\n+(S'TTA'\n+p83\n+S'TAA'\n+p84\n+tp85\n+F0.0\n+s(S'TGA'\n+p86\n+S'TCA'\n+p87\n+tp88\n+F0.0\n+s(S'GAA'\n+p89\n+S'GCA'\n+p90\n+tp91\n+F0.0\n+s(S'ACG'\n+p92\n+S'AGG'\n+p93\n+tp94\n+F0.0\n+s(g86\n+S'TTA'\n+p95\n+tp96\n+F0.0\n+s(S'AAG'\n+p97\n+S'ATG'\n+p98\n+tp99\n+F0.0\n+s(S'TCA'\n+p100\n+S'TGA'\n+p101\n+tp102\n+F0.0\n+s(S'GAC'\n+p103\n+S'GCC'\n+p104\n+tp105\n+F0.0\n+s(S'CTT'\n+p106\n+S'CGT'\n+p107\n+tp108\n+F0.0\n+s(S'GGC'\n+p109\n+S'GTC'\n+p110\n+tp111\n+F0.0\n+s(g89\n+S'GTA'\n+p112\n+tp113\n+F0.0\n+s(S'AAT'\n+p114\n+S'AGT'\n+p115\n+tp116\n+F0.0\n+s(S'GCG'\n+p117\n+S'GTG'\n+p118\n+tp119\n+F0.0\n+s(S'TAA'\n+p120\n+S'TCA'\n+p121\n+tp122\n+F0.0\n+s(S'CCT'\n+p123\n+S'CAT'\n+p124\n+tp125\n+F0.0\n+s(S'GAG'\n+p126\n+S'GCG'\n+p127\n+tp128\n+F0.0\n+s(S'ATT'\n+p129\n+S'AGT'\n+p130\n+tp131\n+F0.0\n+s(S'CGT'\n+p132\n+S'CTT'\n+p133\n+tp134\n+F0.0\n+s(S'CAT'\n+p135\n+S'CGT'\n+p136\n+tp137\n+F1.0\n+s(g117\n+S'GAG'\n+p138\n+tp139\n+F0.0\n+s(g89\n+S'GGA'\n+p140\n+tp141\n+F0.0\n+s(g109\n+S'GAC'\n+p142\n+tp143\n+F0.0\n+s(S'TTG'\n+p144\n+S'TCG'\n+p145\n+tp146\n+F0.0\n+s(S'ATA'\n+p147\n+S'AGA'\n+p148\n+tp149\n+F0.0\n+s(S'TGG'\n+p150\n+S'TCG'\n+p151\n+tp152\n+F0.0\n+s(S'GAT'\n+p153\n+S'GCT'\n+p154\n+tp155\n+F0.0\n+s(S'GGG'\n+p156\n+S'GAG'\n+p157\n+tp158\n+F0.0\n+s(S'AAA'\n+p159\n+S'AGA'\n+p160\n+tp161\n+F0.0\n+s(S'TCA'\n+p162\n+S'TTA'\n+p163\n+tp164\n+F1.0\n+s(S'ACC'\n+p165\n+S'AGC'\n+p166\n+tp167\n+F0.0\n+s(g117\n+S'GGG'\n+p168\n+tp169\n+F0.0\n+s(S'AAC'\n+p170\n+S'AGC'\n+p171\n+tp172\n+F1.0\n+s(S'GTA'\n+p173\n+S'GCA'\n+p174\n+tp175\n+F0.0\n+s(S'ACA'\n+p176\n+S'AGA'\n+p177\n+tp178\n+F0.0\n+s(S'GCA'\n+p179\n+S'GTA'\n+p180\n+tp181\n+F0.0\n+s(S'CCG'\n+p182\n+S'CGG'\n+p183\n+tp184\n+F0.0\n+s(S'TAT'\n+p185\n+S'TTT'\n+p186\n+tp187\n+F0.0\n+s(S'AGC'\n+p188\n+S'ACC'\n+p189\n+tp190\n+F0.0\n+s(S'AGA'\n+p191\n+S'ATA'\n+p192\n+tp193\n+F0.0\n+s(S'CCA'\n+p194\n+S'CGA'\n+p195\n+tp196\n+F0.0\n+s(S'TGC'\n+p197\n+S'TAC'\n+p198\n+tp199\n+F0.0\n+s(g106\n+S'CCT'\n+p200\n+tp201\n+F0.0\n+s(S'ACT'\n+p202\n+S'AGT'\n+p203\n+tp204\n+F0.0\n+s(S'AGG'\n+p205\n+S'ACG'\n+p206\n+tp207\n+F0.0\n+s(S'CAC'\n+p208\n+S'CCC'\n+p209\n+tp210\n+F0.0\n+s(S'TAC'\n+p211\n+S'TCC'\n+p212\n+tp213\n+F0.0\n+s(g191\n+S'AAA'\n+p214\n+tp215\n+F0.0\n+s(S'CTG'\n+p216\n+S'CCG'\n+p217\n+tp218\n+F1.0\n+s(S'CAA'\n+p219\n+S'CCA'\n+p220\n+tp221\n+F0.0\n+s(g188\n+S'ATC'\n+p222\n+tp223\n+F0.0\n+s(g219\n+S'CGA'\n+p224\n+tp225\n+F0.0\n+s(S'AGT'\n+p226\n+S'AAT'\n+p227\n+tp228\n+F0.0\n+s"..b"F0.0\n+s(S'TTT'\n+p306\n+S'TAT'\n+p307\n+tp308\n+F0.0\n+s(g194\n+S'CTA'\n+p309\n+tp310\n+F0.0\n+s(S'ATG'\n+p311\n+S'AGG'\n+p312\n+tp313\n+F0.0\n+s(S'TAG'\n+p314\n+S'TCG'\n+p315\n+tp316\n+F0.0\n+s(g153\n+S'GTT'\n+p317\n+tp318\n+F0.0\n+s(S'TGT'\n+p319\n+S'TTT'\n+p320\n+tp321\n+F0.0\n+s(g176\n+S'AAA'\n+p322\n+tp323\n+F0.0\n+s(g202\n+S'ATT'\n+p324\n+tp325\n+F0.0\n+s(g278\n+S'TGC'\n+p326\n+tp327\n+F0.0\n+s(g159\n+S'ATA'\n+p328\n+tp329\n+F0.0\n+s(g109\n+S'GCC'\n+p330\n+tp331\n+F0.0\n+s(S'GCC'\n+p332\n+S'GAC'\n+p333\n+tp334\n+F0.0\n+s(g247\n+S'AAC'\n+p335\n+tp336\n+F0.0\n+s(S'CAT'\n+p337\n+S'CTT'\n+p338\n+tp339\n+F0.0\n+s(g268\n+S'CCA'\n+p340\n+tp341\n+F0.0\n+s(S'TCT'\n+p342\n+S'TGT'\n+p343\n+tp344\n+F0.0\n+s(S'GGA'\n+p345\n+S'GCA'\n+p346\n+tp347\n+F0.0\n+s(S'TCG'\n+p348\n+S'TGG'\n+p349\n+tp350\n+F0.0\n+s(g173\n+S'GAA'\n+p351\n+tp352\n+F0.0\n+s(g229\n+S'CTG'\n+p353\n+tp354\n+F0.0\n+s(g311\n+S'AAG'\n+p355\n+tp356\n+F0.0\n+s(S'CCT'\n+p357\n+S'CTT'\n+p358\n+tp359\n+F1.0\n+s(g179\n+S'GGA'\n+p360\n+tp361\n+F0.0\n+s(S'CCC'\n+p362\n+S'CTC'\n+p363\n+tp364\n+F0.0\n+s(g345\n+S'GAA'\n+p365\n+tp366\n+F0.0\n+s(g103\n+S'GTC'\n+p367\n+tp368\n+F0.0\n+s(g83\n+S'TCA'\n+p369\n+tp370\n+F0.0\n+s(g306\n+S'TGT'\n+p371\n+tp372\n+F0.0\n+s(g362\n+S'CGC'\n+p373\n+tp374\n+F0.0\n+s(g247\n+S'ACC'\n+p375\n+tp376\n+F0.0\n+s(S'GCT'\n+p377\n+S'GGT'\n+p378\n+tp379\n+F0.0\n+s(g314\n+S'TGG'\n+p380\n+tp381\n+F0.0\n+s(g219\n+S'CTA'\n+p382\n+tp383\n+F0.0\n+s(S'CTG'\n+p384\n+S'CAG'\n+p385\n+tp386\n+F0.0\n+s(g208\n+S'CTC'\n+p387\n+tp388\n+F0.0\n+s(g126\n+S'GTG'\n+p389\n+tp390\n+F0.0\n+s(S'GGT'\n+p391\n+S'GCT'\n+p392\n+tp393\n+F0.0\n+s(g197\n+S'TCC'\n+p394\n+tp395\n+F0.0\n+s(g311\n+S'ACG'\n+p396\n+tp397\n+F0.0\n+s(g182\n+S'CAG'\n+p398\n+tp399\n+F0.0\n+s(g156\n+S'GTG'\n+p400\n+tp401\n+F0.0\n+s(S'GCC'\n+p402\n+S'GTC'\n+p403\n+tp404\n+F1.0\n+s(g185\n+S'TCT'\n+p405\n+tp406\n+F0.0\n+s(g342\n+S'TAT'\n+p407\n+tp408\n+F0.0\n+s(g377\n+S'GTT'\n+p409\n+tp410\n+F0.0\n+s(g191\n+S'ACA'\n+p411\n+tp412\n+F0.0\n+s(g240\n+S'CAC'\n+p413\n+tp414\n+F0.0\n+s(S'CGG'\n+p415\n+S'CAG'\n+p416\n+tp417\n+F0.0\n+s(g252\n+S'CTA'\n+p418\n+tp419\n+F0.0\n+s(g299\n+S'GGG'\n+p420\n+tp421\n+F0.0\n+s(g147\n+S'AAA'\n+p422\n+tp423\n+F0.0\n+s(g197\n+S'TTC'\n+p424\n+tp425\n+F0.0\n+s(g337\n+S'CCT'\n+p426\n+tp427\n+F0.0\n+s(g265\n+S'CTC'\n+p428\n+tp429\n+F0.0\n+s(g132\n+S'CAT'\n+p430\n+tp431\n+F0.0\n+s(g86\n+S'TAA'\n+p432\n+tp433\n+F0.0\n+s(g348\n+S'TAG'\n+p434\n+tp435\n+F0.0\n+s(g262\n+S'GGC'\n+p436\n+tp437\n+F0.0\n+s(g120\n+S'TTA'\n+p438\n+tp439\n+F0.0\n+s(g97\n+S'AGG'\n+p440\n+tp441\n+F0.0\n+s(g150\n+S'TTG'\n+p442\n+tp443\n+F0.0\n+s(g319\n+S'TAT'\n+p444\n+tp445\n+F0.0\n+s(g306\n+S'TCT'\n+p446\n+tp447\n+F0.0\n+s(g342\n+S'TTT'\n+p448\n+tp449\n+F0.0\n+s(g377\n+S'GAT'\n+p450\n+tp451\n+F0.0\n+s(g194\n+S'CAA'\n+p452\n+tp453\n+F0.0\n+s(g208\n+S'CGC'\n+p454\n+tp455\n+F0.0\n+s(g348\n+S'TTG'\n+p456\n+tp457\n+F0.0\n+s(g129\n+S'ACT'\n+p458\n+tp459\n+F0.0\n+s(g106\n+S'CAT'\n+p460\n+tp461\n+F0.0\n+s(g202\n+S'AAT'\n+p462\n+tp463\n+F0.0\n+s(g132\n+S'CCT'\n+p464\n+tp465\n+F0.0\n+s(S'AGC'\n+p466\n+S'AAC'\n+p467\n+tp468\n+F1.0\n+s(g144\n+S'TGG'\n+p469\n+tp470\n+F0.0\n+s(g123\n+S'CGT'\n+p471\n+tp472\n+F0.0\n+s(g262\n+S'GAC'\n+p473\n+tp474\n+F0.0\n+s(S'CAG'\n+p475\n+S'CGG'\n+p476\n+tp477\n+F1.0\n+s(g278\n+S'TTC'\n+p478\n+tp479\n+F0.0\n+s(g114\n+S'ACT'\n+p480\n+tp481\n+F0.0\n+s(g83\n+S'TGA'\n+p482\n+tp483\n+F0.0\n+s(g362\n+S'CAC'\n+p484\n+tp485\n+F0.0\n+s(g173\n+S'GGA'\n+p486\n+tp487\n+F0.0\n+s(g150\n+S'TAG'\n+p488\n+tp489\n+F0.0\n+s(g314\n+S'TTG'\n+p490\n+tp491\n+F0.0\n+s(S'AGG'\n+p492\n+S'AAG'\n+p493\n+tp494\n+F1.0\n+s(g237\n+S'TGC'\n+p495\n+tp496\n+F0.0\n+s(g415\n+S'CCG'\n+p497\n+tp498\n+F0.0\n+s(g299\n+S'GAG'\n+p499\n+tp500\n+F0.0\n+s(g292\n+S'ACC'\n+p501\n+tp502\n+F0.0\n+s(g384\n+S'CGG'\n+p503\n+tp504\n+F0.0\n+s(g268\n+S'CGA'\n+p505\n+tp506\n+F0.0\n+s(g345\n+S'GTA'\n+p507\n+tp508\n+F0.0\n+s(g92\n+S'AAG'\n+p509\n+tp510\n+F0.0\n+s(g391\n+S'GAT'\n+p511\n+tp512\n+F0.0\n+s(g211\n+S'TTC'\n+p513\n+tp514\n+F0.0\n+s(g144\n+S'TAG'\n+p515\n+tp516\n+F0.0\n+s(g114\n+S'ATT'\n+p517\n+tp518\n+F0.0\n+s(g332\n+S'GGC'\n+p519\n+tp520\n+F0.0\n+s(g240\n+S'CGC'\n+p521\n+tp522\n+F0.0\n+s(g415\n+S'CTG'\n+p523\n+tp524\n+F0.0\n+s(g319\n+S'TCT'\n+p525\n+tp526\n+F0.0\n+s(g97\n+S'ACG'\n+p527\n+tp528\n+F0.0\n+s(g265\n+S'CAC'\n+p529\n+tp530\n+F0.0\n+s(g391\n+S'GTT'\n+p531\n+tp532\n+F0.0\n+s(g237\n+S'TAC'\n+p533\n+tp534\n+F0.0\n+s(g156\n+S'GCG'\n+p535\n+tp536\n+F0.0\n+s(g179\n+S'GAA'\n+p537\n+tp538\n+F0.0\n+s(g182\n+S'CTG'\n+p539\n+tp540\n+F0.0\n+s(g252\n+S'CAA'\n+p541\n+tp542\n+F0.0\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,1327 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chrMT'\n+p3\n+I8000\n+I16000\n+F0.000875\n+tp4\n+asS'COMMON_VARIANTS'\n+p5\n+(lp6\n+(g3\n+I10749\n+S'A'\n+p7\n+S'G'\n+p8\n+F1e-05\n+tp9\n+a(g3\n+I11272\n+S'G'\n+p10\n+S'A'\n+p11\n+F1e-05\n+tp12\n+a(g3\n+I11742\n+S'C'\n+p13\n+S'T'\n+p14\n+F1e-05\n+tp15\n+a(g3\n+I13131\n+S'-'\n+p16\n+S'T'\n+p17\n+F1e-05\n+tp18\n+a(g3\n+I13272\n+S'A'\n+p19\n+S'G'\n+p20\n+F1e-05\n+tp21\n+a(g3\n+I13803\n+S'G'\n+p22\n+S'A'\n+p23\n+F1e-05\n+tp24\n+a(g3\n+I14066\n+S'C'\n+p25\n+S'T'\n+p26\n+F1e-05\n+tp27\n+asS'SNP_TRANS_FREQ'\n+p28\n+(dp29\n+(S'A'\n+p30\n+S'G'\n+p31\n+tp32\n+F1.0\n+s(S'C'\n+p33\n+g17\n+tp34\n+F1.0\n+s(g31\n+g30\n+tp35\n+F1.0\n+ssS'SNP_FREQ'\n+p36\n+F0.8571428571428571\n+sS'TRINUC_MUT_PROB'\n+p37\n+(dp38\n+S'ACC'\n+p39\n+F0.0\n+sS'ATG'\n+p40\n+F0.0\n+sS'AAG'\n+p41\n+F0.0\n+sS'AAA'\n+p42\n+F0.0\n+sS'ATC'\n+p43\n+F0.0\n+sS'AAC'\n+p44\n+F0.0041841004184100415\n+sS'ATA'\n+p45\n+F0.0\n+sS'AGG'\n+p46\n+F0.017241379310344827\n+sS'CCT'\n+p47\n+F0.003816793893129771\n+sS'CTC'\n+p48\n+F0.0\n+sS'AGC'\n+p49\n+F0.007751937984496124\n+sS'ACA'\n+p50\n+F0.0\n+sS'AGA'\n+p51\n+F0.0\n+sS'CAT'\n+p52\n+F0.004878048780487805\n+sS'AAT'\n+p53\n+F0.0\n+sS'ATT'\n+p54\n+F0.0\n+sS'CTG'\n+p55\n+F0.0\n+sS'CTA'\n+p56\n+F0.0\n+sS'ACT'\n+p57\n+F0.0\n+sS'CAC'\n+p58\n+F0.0\n+sS'ACG'\n+p59\n+F0.0\n+sS'CAA'\n+p60\n+F0.0\n+sS'AGT'\n+p61\n+F0.0\n+sS'CCA'\n+p62\n+F0.0\n+sS'CCG'\n+p63\n+F0.0\n+sS'CCC'\n+p64\n+F0.0\n+sS'TAT'\n+p65\n+F0.0\n+sS'GGT'\n+p66\n+F0.0\n+sS'TGT'\n+p67\n+F0.0\n+sS'CGA'\n+p68\n+F0.0\n+sS'CAG'\n+p69\n+F0.0\n+sS'CGC'\n+p70\n+F0.0\n+sS'GAT'\n+p71\n+F0.0\n+sS'CGG'\n+p72\n+F0.0\n+sS'CTT'\n+p73\n+F0.0\n+sS'TGC'\n+p74\n+F0.0\n+sS'GGG'\n+p75\n+F0.0\n+sS'TAG'\n+p76\n+F0.0\n+sS'GGA'\n+p77\n+F0.0\n+sS'TAA'\n+p78\n+F0.0\n+sS'GGC'\n+p79\n+F0.0\n+sS'TAC'\n+p80\n+F0.0\n+sS'GAG'\n+p81\n+F0.0\n+sS'TCG'\n+p82\n+F0.0\n+sS'TTA'\n+p83\n+F0.0\n+sS'TTT'\n+p84\n+F0.0\n+sS'GAC'\n+p85\n+F0.0\n+sS'CGT'\n+p86\n+F0.0\n+sS'GAA'\n+p87\n+F0.0\n+sS'TCA'\n+p88\n+F0.0\n+sS'GCA'\n+p89\n+F0.0\n+sS'GTA'\n+p90\n+F0.0\n+sS'GCC'\n+p91\n+F0.008264462809917356\n+sS'GTC'\n+p92\n+F0.0\n+sS'GCG'\n+p93\n+F0.0\n+sS'GTG'\n+p94\n+F0.0\n+sS'TTC'\n+p95\n+F0.0\n+sS'GTT'\n+p96\n+F0.0\n+sS'GCT'\n+p97\n+F0.0\n+sS'TGA'\n+p98\n+F0.0\n+sS'TTG'\n+p99\n+F0.0\n+sS'TCC'\n+p100\n+F0.0\n+sS'TGG'\n+p101\n+F0.0\n+sS'TCT'\n+p102\n+F0.0\n+ssS'TRINUC_TRANS_PROBS'\n+p103\n+(dp104\n+(S'TTA'\n+p105\n+S'TAA'\n+p106\n+tp107\n+F0.0\n+s(S'TGA'\n+p108\n+S'TCA'\n+p109\n+tp110\n+F0.0\n+s(S'GAA'\n+p111\n+S'GCA'\n+p112\n+tp113\n+F0.0\n+s(S'ACG'\n+p114\n+S'AGG'\n+p115\n+tp116\n+F0.0\n+s(g108\n+S'TTA'\n+p117\n+tp118\n+F0.0\n+s(S'AAG'\n+p119\n+S'ATG'\n+p120\n+tp121\n+F0.0\n+s(S'TCA'\n+p122\n+S'TGA'\n+p123\n+tp124\n+F0.0\n+s(S'GAC'\n+p125\n+S'GCC'\n+p126\n+tp127\n+F0.0\n+s(S'CTT'\n+p128\n+S'CGT'\n+p129\n+tp130\n+F0.0\n+s(S'GGC'\n+p131\n+S'GTC'\n+p132\n+tp133\n+F0.0\n+s(g111\n+S'GTA'\n+p134\n+tp135\n+F0.0\n+s(S'AGA'\n+p136\n+S'ACA'\n+p137\n+tp138\n+F0.0\n+s(S'GCG'\n+p139\n+S'GTG'\n+p140\n+tp141\n+F0.0\n+s(S'TAA'\n+p142\n+S'TCA'\n+p143\n+tp144\n+F0.0\n+s(S'CCT'\n+p145\n+S'CAT'\n+p146\n+tp147\n+F0.0\n+s(S'GAG'\n+p148\n+S'GCG'\n+p149\n+tp150\n+F0.0\n+s(S'ATT'\n+p151\n+S'AGT'\n+p152\n+tp153\n+F0.0\n+s(S'CGT'\n+p154\n+S'CTT'\n+p155\n+tp156\n+F0.0\n+s(S'CAT'\n+p157\n+S'CGT'\n+p158\n+tp159\n+F1.0\n+s(g139\n+S'GAG'\n+p160\n+tp161\n+F0.0\n+s(g111\n+S'GGA'\n+p162\n+tp163\n+F0.0\n+s(g131\n+S'GAC'\n+p164\n+tp165\n+F0.0\n+s(S'TTG'\n+p166\n+S'TCG'\n+p167\n+tp168\n+F0.0\n+s(S'ATA'\n+p169\n+S'AGA'\n+p170\n+tp171\n+F0.0\n+s(S'TGG'\n+p172\n+S'TCG'\n+p173\n+tp174\n+F0.0\n+s(S'GAT'\n+p175\n+S'GCT'\n+p176\n+tp177\n+F0.0\n+s(S'GGG'\n+p178\n+S'GAG'\n+p179\n+tp180\n+F0.0\n+s(S'AAA'\n+p181\n+S'AGA'\n+p182\n+tp183\n+F0.0\n+s(S'CTA'\n+p184\n+S'CGA'\n+p185\n+tp186\n+F0.0\n+s(S'ACC'\n+p187\n+S'AGC'\n+p188\n+tp189\n+F0.0\n+s(g139\n+S'GGG'\n+p190\n+tp191\n+F0.0\n+s(S'AAC'\n+p192\n+S'AGC'\n+p193\n+tp194\n+F1.0\n+s(S'GTA'\n+p195\n+S'GCA'\n+p196\n+tp197\n+F0.0\n+s(S'ACA'\n+p198\n+S'AGA'\n+p199\n+tp200\n+F0.0\n+s(S'GCA'\n+p201\n+S'GTA'\n+p202\n+tp203\n+F0.0\n+s(S'CCG'\n+p204\n+S'CGG'\n+p205\n+tp206\n+F0.0\n+s(S'TAT'\n+p207\n+S'TTT'\n+p208\n+tp209\n+F0.0\n+s(S'AGC'\n+p210\n+S'ACC'\n+p211\n+tp212\n+F0.0\n+s(g136\n+S'ATA'\n+p213\n+tp214\n+F0.0\n+s(S'CCA'\n+p215\n+S'CGA'\n+p216\n+tp217\n+F0.0\n+s(S'TGC'\n+p218\n+S'TAC'\n+p219\n+tp220\n+F0.0\n+s(g128\n+S'CCT'\n+p221\n+tp222\n+F0.0\n+s(S'ACT'\n+p223\n+S'AGT'\n+p224\n+tp225\n+F0.0\n+s(S'AGG'\n+p226\n+S'ACG'\n+p227\n+tp228\n+F0.0\n+s(S'CAC'\n+p229\n+S'CCC'\n+p230\n+tp231\n+F0.0\n+s(S'TAC'\n+p232\n+S'TCC'\n+p233"..b"AGG'\n+p329\n+tp330\n+F0.0\n+s(S'TAG'\n+p331\n+S'TCG'\n+p332\n+tp333\n+F0.0\n+s(g175\n+S'GTT'\n+p334\n+tp335\n+F0.0\n+s(S'TGT'\n+p336\n+S'TTT'\n+p337\n+tp338\n+F0.0\n+s(g119\n+S'ACG'\n+p339\n+tp340\n+F0.0\n+s(g223\n+S'ATT'\n+p341\n+tp342\n+F0.0\n+s(g296\n+S'TGC'\n+p343\n+tp344\n+F0.0\n+s(g181\n+S'ATA'\n+p345\n+tp346\n+F0.0\n+s(g131\n+S'GCC'\n+p347\n+tp348\n+F0.0\n+s(S'GCC'\n+p349\n+S'GAC'\n+p350\n+tp351\n+F0.0\n+s(g267\n+S'AAC'\n+p352\n+tp353\n+F0.0\n+s(S'CAT'\n+p354\n+S'CTT'\n+p355\n+tp356\n+F0.0\n+s(g184\n+S'CCA'\n+p357\n+tp358\n+F0.0\n+s(S'TCT'\n+p359\n+S'TGT'\n+p360\n+tp361\n+F0.0\n+s(S'GGA'\n+p362\n+S'GCA'\n+p363\n+tp364\n+F0.0\n+s(S'TCG'\n+p365\n+S'TGG'\n+p366\n+tp367\n+F0.0\n+s(g195\n+S'GAA'\n+p368\n+tp369\n+F0.0\n+s(g250\n+S'CTG'\n+p370\n+tp371\n+F0.0\n+s(g328\n+S'AAG'\n+p372\n+tp373\n+F0.0\n+s(S'CCT'\n+p374\n+S'CTT'\n+p375\n+tp376\n+F1.0\n+s(g201\n+S'GGA'\n+p377\n+tp378\n+F0.0\n+s(S'CCC'\n+p379\n+S'CTC'\n+p380\n+tp381\n+F0.0\n+s(g362\n+S'GAA'\n+p382\n+tp383\n+F0.0\n+s(g125\n+S'GTC'\n+p384\n+tp385\n+F0.0\n+s(g105\n+S'TCA'\n+p386\n+tp387\n+F0.0\n+s(g323\n+S'TGT'\n+p388\n+tp389\n+F0.0\n+s(g379\n+S'CGC'\n+p390\n+tp391\n+F0.0\n+s(g267\n+S'ACC'\n+p392\n+tp393\n+F0.0\n+s(S'GCT'\n+p394\n+S'GGT'\n+p395\n+tp396\n+F0.0\n+s(g331\n+S'TGG'\n+p397\n+tp398\n+F0.0\n+s(g240\n+S'CTA'\n+p399\n+tp400\n+F0.0\n+s(g237\n+S'CAG'\n+p401\n+tp402\n+F0.0\n+s(g229\n+S'CTC'\n+p403\n+tp404\n+F0.0\n+s(g148\n+S'GTG'\n+p405\n+tp406\n+F0.0\n+s(S'GGT'\n+p407\n+S'GCT'\n+p408\n+tp409\n+F0.0\n+s(g218\n+S'TCC'\n+p410\n+tp411\n+F0.0\n+s(g328\n+S'ACG'\n+p412\n+tp413\n+F0.0\n+s(g204\n+S'CAG'\n+p414\n+tp415\n+F0.0\n+s(g178\n+S'GTG'\n+p416\n+tp417\n+F0.0\n+s(S'GCC'\n+p418\n+S'GTC'\n+p419\n+tp420\n+F1.0\n+s(g207\n+S'TCT'\n+p421\n+tp422\n+F0.0\n+s(g359\n+S'TAT'\n+p423\n+tp424\n+F0.0\n+s(g394\n+S'GTT'\n+p425\n+tp426\n+F0.0\n+s(S'AAT'\n+p427\n+S'AGT'\n+p428\n+tp429\n+F0.0\n+s(g260\n+S'CAC'\n+p430\n+tp431\n+F0.0\n+s(S'CGG'\n+p432\n+S'CAG'\n+p433\n+tp434\n+F0.0\n+s(g272\n+S'CTA'\n+p435\n+tp436\n+F0.0\n+s(g316\n+S'GGG'\n+p437\n+tp438\n+F0.0\n+s(g169\n+S'AAA'\n+p439\n+tp440\n+F0.0\n+s(g218\n+S'TTC'\n+p441\n+tp442\n+F0.0\n+s(g354\n+S'CCT'\n+p443\n+tp444\n+F0.0\n+s(g284\n+S'CTC'\n+p445\n+tp446\n+F0.0\n+s(g154\n+S'CAT'\n+p447\n+tp448\n+F0.0\n+s(g108\n+S'TAA'\n+p449\n+tp450\n+F0.0\n+s(g365\n+S'TAG'\n+p451\n+tp452\n+F0.0\n+s(g281\n+S'GGC'\n+p453\n+tp454\n+F0.0\n+s(g142\n+S'TTA'\n+p455\n+tp456\n+F0.0\n+s(g119\n+S'AGG'\n+p457\n+tp458\n+F0.0\n+s(g172\n+S'TTG'\n+p459\n+tp460\n+F0.0\n+s(g336\n+S'TAT'\n+p461\n+tp462\n+F0.0\n+s(g323\n+S'TCT'\n+p463\n+tp464\n+F0.0\n+s(g359\n+S'TTT'\n+p465\n+tp466\n+F0.0\n+s(g394\n+S'GAT'\n+p467\n+tp468\n+F0.0\n+s(g215\n+S'CAA'\n+p469\n+tp470\n+F0.0\n+s(g229\n+S'CGC'\n+p471\n+tp472\n+F0.0\n+s(g365\n+S'TTG'\n+p473\n+tp474\n+F0.0\n+s(g151\n+S'ACT'\n+p475\n+tp476\n+F0.0\n+s(g128\n+S'CAT'\n+p477\n+tp478\n+F0.0\n+s(g223\n+S'AAT'\n+p479\n+tp480\n+F0.0\n+s(g154\n+S'CCT'\n+p481\n+tp482\n+F0.0\n+s(S'AGC'\n+p483\n+S'AAC'\n+p484\n+tp485\n+F1.0\n+s(g166\n+S'TGG'\n+p486\n+tp487\n+F0.0\n+s(g145\n+S'CGT'\n+p488\n+tp489\n+F0.0\n+s(g281\n+S'GAC'\n+p490\n+tp491\n+F0.0\n+s(g250\n+S'CGG'\n+p492\n+tp493\n+F0.0\n+s(g296\n+S'TTC'\n+p494\n+tp495\n+F0.0\n+s(g427\n+S'ACT'\n+p496\n+tp497\n+F0.0\n+s(g105\n+S'TGA'\n+p498\n+tp499\n+F0.0\n+s(g379\n+S'CAC'\n+p500\n+tp501\n+F0.0\n+s(g195\n+S'GGA'\n+p502\n+tp503\n+F0.0\n+s(g172\n+S'TAG'\n+p504\n+tp505\n+F0.0\n+s(g331\n+S'TTG'\n+p506\n+tp507\n+F0.0\n+s(S'AGG'\n+p508\n+S'AAG'\n+p509\n+tp510\n+F1.0\n+s(g257\n+S'TGC'\n+p511\n+tp512\n+F0.0\n+s(g432\n+S'CCG'\n+p513\n+tp514\n+F0.0\n+s(g316\n+S'GAG'\n+p515\n+tp516\n+F0.0\n+s(g309\n+S'ACC'\n+p517\n+tp518\n+F0.0\n+s(g237\n+S'CGG'\n+p519\n+tp520\n+F0.0\n+s(g122\n+S'TTA'\n+p521\n+tp522\n+F0.0\n+s(g362\n+S'GTA'\n+p523\n+tp524\n+F0.0\n+s(g114\n+S'AAG'\n+p525\n+tp526\n+F0.0\n+s(g407\n+S'GAT'\n+p527\n+tp528\n+F0.0\n+s(g232\n+S'TTC'\n+p529\n+tp530\n+F0.0\n+s(g166\n+S'TAG'\n+p531\n+tp532\n+F0.0\n+s(g427\n+S'ATT'\n+p533\n+tp534\n+F0.0\n+s(g349\n+S'GGC'\n+p535\n+tp536\n+F0.0\n+s(g260\n+S'CGC'\n+p537\n+tp538\n+F0.0\n+s(g432\n+S'CTG'\n+p539\n+tp540\n+F0.0\n+s(g336\n+S'TCT'\n+p541\n+tp542\n+F0.0\n+s(g198\n+S'AAA'\n+p543\n+tp544\n+F0.0\n+s(g284\n+S'CAC'\n+p545\n+tp546\n+F0.0\n+s(g407\n+S'GTT'\n+p547\n+tp548\n+F0.0\n+s(g257\n+S'TAC'\n+p549\n+tp550\n+F0.0\n+s(g178\n+S'GCG'\n+p551\n+tp552\n+F0.0\n+s(g201\n+S'GAA'\n+p553\n+tp554\n+F0.0\n+s(g204\n+S'CTG'\n+p555\n+tp556\n+F0.0\n+s(g272\n+S'CAA'\n+p557\n+tp558\n+F0.0\n+ssS'AVG_MUT_RATE'\n+p559\n+F0.0009251916468411313\n+sS'INDEL_FREQ'\n+p560\n+(dp561\n+I1\n+F0.9999999999999997\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,1335 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chrMT'\n+p3\n+I2000\n+I10000\n+F0.001\n+tp4\n+asS'COMMON_VARIANTS'\n+p5\n+(lp6\n+(g3\n+I4077\n+S'T'\n+p7\n+S'C'\n+p8\n+F1e-05\n+tp9\n+a(g3\n+I4681\n+S'A'\n+p10\n+S'G'\n+p11\n+F1e-05\n+tp12\n+a(g3\n+I5712\n+S'C'\n+p13\n+S'T'\n+p14\n+F1e-05\n+tp15\n+a(g3\n+I6502\n+S'A'\n+p16\n+S'G'\n+p17\n+F1e-05\n+tp18\n+a(g3\n+I7424\n+S'C'\n+p19\n+S'T'\n+p20\n+F1e-05\n+tp21\n+a(g3\n+I7777\n+S'T'\n+p22\n+S'C'\n+p23\n+F1e-05\n+tp24\n+a(g3\n+I8470\n+S'C'\n+p25\n+S'T'\n+p26\n+F1e-05\n+tp27\n+a(g3\n+I8775\n+S'C'\n+p28\n+S'T'\n+p29\n+F1e-05\n+tp30\n+asS'SNP_TRANS_FREQ'\n+p31\n+(dp32\n+(S'A'\n+p33\n+S'G'\n+p34\n+tp35\n+F1.0\n+s(S'C'\n+p36\n+S'T'\n+p37\n+tp38\n+F1.0\n+s(g37\n+g36\n+tp39\n+F1.0\n+ssS'SNP_FREQ'\n+p40\n+F1.0\n+sS'TRINUC_MUT_PROB'\n+p41\n+(dp42\n+S'ACC'\n+p43\n+F0.007067137809187279\n+sS'ATG'\n+p44\n+F0.0\n+sS'AAG'\n+p45\n+F0.0\n+sS'AAA'\n+p46\n+F0.0\n+sS'ATC'\n+p47\n+F0.0\n+sS'AAC'\n+p48\n+F0.0\n+sS'ATA'\n+p49\n+F0.005050505050505051\n+sS'AGG'\n+p50\n+F0.0\n+sS'CCT'\n+p51\n+F0.0035714285714285713\n+sS'CTC'\n+p52\n+F0.0\n+sS'AGC'\n+p53\n+F0.0\n+sS'ACA'\n+p54\n+F0.0\n+sS'AGA'\n+p55\n+F0.0\n+sS'CAT'\n+p56\n+F0.0\n+sS'AAT'\n+p57\n+F0.0\n+sS'ATT'\n+p58\n+F0.0\n+sS'CTG'\n+p59\n+F0.008928571428571428\n+sS'CTA'\n+p60\n+F0.0\n+sS'ACT'\n+p61\n+F0.0\n+sS'CAC'\n+p62\n+F0.0\n+sS'ACG'\n+p63\n+F0.0\n+sS'CAA'\n+p64\n+F0.0\n+sS'AGT'\n+p65\n+F0.0\n+sS'CCA'\n+p66\n+F0.0\n+sS'CCG'\n+p67\n+F0.0\n+sS'CCC'\n+p68\n+F0.0\n+sS'TAT'\n+p69\n+F0.0\n+sS'GGT'\n+p70\n+F0.0\n+sS'TGT'\n+p71\n+F0.0\n+sS'CGA'\n+p72\n+F0.0\n+sS'CAG'\n+p73\n+F0.009433962264150943\n+sS'CGC'\n+p74\n+F0.0\n+sS'GAT'\n+p75\n+F0.0\n+sS'CGG'\n+p76\n+F0.0\n+sS'CTT'\n+p77\n+F0.0\n+sS'TGC'\n+p78\n+F0.0\n+sS'GGG'\n+p79\n+F0.0\n+sS'TAG'\n+p80\n+F0.0\n+sS'GGA'\n+p81\n+F0.0\n+sS'TAA'\n+p82\n+F0.0043859649122807015\n+sS'GGC'\n+p83\n+F0.0\n+sS'TAC'\n+p84\n+F0.0\n+sS'GAG'\n+p85\n+F0.0\n+sS'TCG'\n+p86\n+F0.0\n+sS'TTA'\n+p87\n+F0.0\n+sS'TTT'\n+p88\n+F0.0\n+sS'GAC'\n+p89\n+F0.0\n+sS'CGT'\n+p90\n+F0.0\n+sS'GAA'\n+p91\n+F0.0\n+sS'TCA'\n+p92\n+F0.0048543689320388345\n+sS'GCA'\n+p93\n+F0.0\n+sS'GTA'\n+p94\n+F0.0\n+sS'GCC'\n+p95\n+F0.0\n+sS'GTC'\n+p96\n+F0.0\n+sS'GCG'\n+p97\n+F0.0\n+sS'GTG'\n+p98\n+F0.0\n+sS'TTC'\n+p99\n+F0.0\n+sS'GTT'\n+p100\n+F0.0\n+sS'GCT'\n+p101\n+F0.0\n+sS'TGA'\n+p102\n+F0.0\n+sS'TTG'\n+p103\n+F0.0\n+sS'TCC'\n+p104\n+F0.0\n+sS'TGG'\n+p105\n+F0.0\n+sS'TCT'\n+p106\n+F0.0\n+ssS'TRINUC_TRANS_PROBS'\n+p107\n+(dp108\n+(S'TTA'\n+p109\n+S'TAA'\n+p110\n+tp111\n+F0.0\n+s(S'TGA'\n+p112\n+S'TCA'\n+p113\n+tp114\n+F0.0\n+s(S'GAA'\n+p115\n+S'GCA'\n+p116\n+tp117\n+F0.0\n+s(S'ACG'\n+p118\n+S'AGG'\n+p119\n+tp120\n+F0.0\n+s(g112\n+S'TTA'\n+p121\n+tp122\n+F0.0\n+s(S'AAG'\n+p123\n+S'ATG'\n+p124\n+tp125\n+F0.0\n+s(S'TCA'\n+p126\n+S'TGA'\n+p127\n+tp128\n+F0.0\n+s(S'GAC'\n+p129\n+S'GCC'\n+p130\n+tp131\n+F0.0\n+s(S'CTT'\n+p132\n+S'CGT'\n+p133\n+tp134\n+F0.0\n+s(S'GGC'\n+p135\n+S'GTC'\n+p136\n+tp137\n+F0.0\n+s(g115\n+S'GTA'\n+p138\n+tp139\n+F0.0\n+s(S'AGA'\n+p140\n+S'ACA'\n+p141\n+tp142\n+F0.0\n+s(S'GCG'\n+p143\n+S'GTG'\n+p144\n+tp145\n+F0.0\n+s(S'TAA'\n+p146\n+S'TCA'\n+p147\n+tp148\n+F0.0\n+s(S'CCT'\n+p149\n+S'CAT'\n+p150\n+tp151\n+F0.0\n+s(S'GAG'\n+p152\n+S'GCG'\n+p153\n+tp154\n+F0.0\n+s(S'ATT'\n+p155\n+S'AGT'\n+p156\n+tp157\n+F0.0\n+s(S'CGT'\n+p158\n+S'CTT'\n+p159\n+tp160\n+F0.0\n+s(S'CAT'\n+p161\n+S'CGT'\n+p162\n+tp163\n+F0.0\n+s(g143\n+S'GAG'\n+p164\n+tp165\n+F0.0\n+s(g115\n+S'GGA'\n+p166\n+tp167\n+F0.0\n+s(g135\n+S'GAC'\n+p168\n+tp169\n+F0.0\n+s(S'TTG'\n+p170\n+S'TCG'\n+p171\n+tp172\n+F0.0\n+s(S'ATA'\n+p173\n+S'AGA'\n+p174\n+tp175\n+F0.0\n+s(S'TGG'\n+p176\n+S'TCG'\n+p177\n+tp178\n+F0.0\n+s(S'GAT'\n+p179\n+S'GCT'\n+p180\n+tp181\n+F0.0\n+s(S'GGG'\n+p182\n+S'GAG'\n+p183\n+tp184\n+F0.0\n+s(S'AAA'\n+p185\n+S'AGA'\n+p186\n+tp187\n+F0.0\n+s(S'TCA'\n+p188\n+S'TTA'\n+p189\n+tp190\n+F1.0\n+s(S'ACC'\n+p191\n+S'AGC'\n+p192\n+tp193\n+F0.0\n+s(g143\n+S'GGG'\n+p194\n+tp195\n+F0.0\n+s(S'AAC'\n+p196\n+S'AGC'\n+p197\n+tp198\n+F0.0\n+s(S'GTA'\n+p199\n+S'GCA'\n+p200\n+tp201\n+F0.0\n+s(S'ACA'\n+p202\n+S'AGA'\n+p203\n+tp204\n+F0.0\n+s(S'GCA'\n+p205\n+S'GTA'\n+p206\n+tp207\n+F0.0\n+s(S'CCG'\n+p208\n+S'CGG'\n+p209\n+tp210\n+F0.0\n+s(S'TAT'\n+p211\n+S'TTT'\n+p212\n+tp213\n+F0.0\n+s(S'AGC'\n+p214\n+S'ACC'\n+p215\n+tp216\n+F0.0\n+s(g140\n+S'ATA'\n+p217\n+tp218\n+F0.0\n+s(S'CCA'\n+p219\n+S'CGA'\n+p220\n+tp221\n+F0.0\n+s(S'TGC'\n+p222\n+S'TAC'\n+p223\n+tp224\n+F0.0\n+s(g132\n+S'CCT'\n+p225\n+tp226\n+F0.0\n+s(S'ACT'\n+p227\n+S'AGT'\n+p228\n+tp229\n+F0.0\n+s(S'AGG'\n+p230\n+S'ACG'\n+p231\n+tp232\n+F0.0\n+s(S'CAC'\n+p233\n+S'"..b"TA'\n+p333\n+tp334\n+F0.0\n+s(S'ATG'\n+p335\n+S'AGG'\n+p336\n+tp337\n+F0.0\n+s(S'TAG'\n+p338\n+S'TCG'\n+p339\n+tp340\n+F0.0\n+s(g179\n+S'GTT'\n+p341\n+tp342\n+F0.0\n+s(S'TGT'\n+p343\n+S'TTT'\n+p344\n+tp345\n+F0.0\n+s(g123\n+S'ACG'\n+p346\n+tp347\n+F0.0\n+s(g227\n+S'ATT'\n+p348\n+tp349\n+F0.0\n+s(g303\n+S'TGC'\n+p350\n+tp351\n+F0.0\n+s(g185\n+S'ATA'\n+p352\n+tp353\n+F0.0\n+s(g135\n+S'GCC'\n+p354\n+tp355\n+F0.0\n+s(S'GCC'\n+p356\n+S'GAC'\n+p357\n+tp358\n+F0.0\n+s(g272\n+S'AAC'\n+p359\n+tp360\n+F0.0\n+s(g161\n+S'CTT'\n+p361\n+tp362\n+F0.0\n+s(g293\n+S'CCA'\n+p363\n+tp364\n+F0.0\n+s(S'TCT'\n+p365\n+S'TGT'\n+p366\n+tp367\n+F0.0\n+s(S'GGA'\n+p368\n+S'GCA'\n+p369\n+tp370\n+F0.0\n+s(S'TCG'\n+p371\n+S'TGG'\n+p372\n+tp373\n+F0.0\n+s(g199\n+S'GAA'\n+p374\n+tp375\n+F0.0\n+s(g254\n+S'CTG'\n+p376\n+tp377\n+F0.0\n+s(g335\n+S'AAG'\n+p378\n+tp379\n+F0.0\n+s(S'CCT'\n+p380\n+S'CTT'\n+p381\n+tp382\n+F1.0\n+s(g205\n+S'GGA'\n+p383\n+tp384\n+F0.0\n+s(S'CCC'\n+p385\n+S'CTC'\n+p386\n+tp387\n+F0.0\n+s(g368\n+S'GAA'\n+p388\n+tp389\n+F0.0\n+s(g129\n+S'GTC'\n+p390\n+tp391\n+F0.0\n+s(g109\n+S'TCA'\n+p392\n+tp393\n+F0.0\n+s(g330\n+S'TGT'\n+p394\n+tp395\n+F0.0\n+s(g385\n+S'CGC'\n+p396\n+tp397\n+F0.0\n+s(g272\n+S'ACC'\n+p398\n+tp399\n+F0.0\n+s(S'GCT'\n+p400\n+S'GGT'\n+p401\n+tp402\n+F0.0\n+s(g338\n+S'TGG'\n+p403\n+tp404\n+F0.0\n+s(g244\n+S'CTA'\n+p405\n+tp406\n+F0.0\n+s(S'CTG'\n+p407\n+S'CAG'\n+p408\n+tp409\n+F0.0\n+s(g233\n+S'CTC'\n+p410\n+tp411\n+F0.0\n+s(g152\n+S'GTG'\n+p412\n+tp413\n+F0.0\n+s(S'GGT'\n+p414\n+S'GCT'\n+p415\n+tp416\n+F0.0\n+s(g222\n+S'TCC'\n+p417\n+tp418\n+F0.0\n+s(g335\n+S'ACG'\n+p419\n+tp420\n+F0.0\n+s(g208\n+S'CAG'\n+p421\n+tp422\n+F0.0\n+s(g182\n+S'GTG'\n+p423\n+tp424\n+F0.0\n+s(g356\n+S'GTC'\n+p425\n+tp426\n+F0.0\n+s(g211\n+S'TCT'\n+p427\n+tp428\n+F0.0\n+s(g365\n+S'TAT'\n+p429\n+tp430\n+F0.0\n+s(g400\n+S'GTT'\n+p431\n+tp432\n+F0.0\n+s(S'AAT'\n+p433\n+S'AGT'\n+p434\n+tp435\n+F0.0\n+s(g265\n+S'CAC'\n+p436\n+tp437\n+F0.0\n+s(S'CGG'\n+p438\n+S'CAG'\n+p439\n+tp440\n+F0.0\n+s(g277\n+S'CTA'\n+p441\n+tp442\n+F0.0\n+s(g323\n+S'GGG'\n+p443\n+tp444\n+F0.0\n+s(g173\n+S'AAA'\n+p445\n+tp446\n+F0.0\n+s(g222\n+S'TTC'\n+p447\n+tp448\n+F0.0\n+s(g161\n+S'CCT'\n+p449\n+tp450\n+F0.0\n+s(g290\n+S'CTC'\n+p451\n+tp452\n+F0.0\n+s(g158\n+S'CAT'\n+p453\n+tp454\n+F0.0\n+s(g112\n+S'TAA'\n+p455\n+tp456\n+F0.0\n+s(g371\n+S'TAG'\n+p457\n+tp458\n+F0.0\n+s(g287\n+S'GGC'\n+p459\n+tp460\n+F0.0\n+s(g146\n+S'TTA'\n+p461\n+tp462\n+F0.0\n+s(g123\n+S'AGG'\n+p463\n+tp464\n+F0.0\n+s(g176\n+S'TTG'\n+p465\n+tp466\n+F0.0\n+s(g343\n+S'TAT'\n+p467\n+tp468\n+F0.0\n+s(g330\n+S'TCT'\n+p469\n+tp470\n+F0.0\n+s(g365\n+S'TTT'\n+p471\n+tp472\n+F0.0\n+s(g400\n+S'GAT'\n+p473\n+tp474\n+F0.0\n+s(g219\n+S'CAA'\n+p475\n+tp476\n+F0.0\n+s(g233\n+S'CGC'\n+p477\n+tp478\n+F0.0\n+s(g371\n+S'TTG'\n+p479\n+tp480\n+F0.0\n+s(g155\n+S'ACT'\n+p481\n+tp482\n+F0.0\n+s(g132\n+S'CAT'\n+p483\n+tp484\n+F0.0\n+s(g227\n+S'AAT'\n+p485\n+tp486\n+F0.0\n+s(g158\n+S'CCT'\n+p487\n+tp488\n+F0.0\n+s(g214\n+S'AAC'\n+p489\n+tp490\n+F0.0\n+s(g170\n+S'TGG'\n+p491\n+tp492\n+F0.0\n+s(g149\n+S'CGT'\n+p493\n+tp494\n+F0.0\n+s(g287\n+S'GAC'\n+p495\n+tp496\n+F0.0\n+s(S'CAG'\n+p497\n+S'CGG'\n+p498\n+tp499\n+F1.0\n+s(g303\n+S'TTC'\n+p500\n+tp501\n+F0.0\n+s(g433\n+S'ACT'\n+p502\n+tp503\n+F0.0\n+s(g109\n+S'TGA'\n+p504\n+tp505\n+F0.0\n+s(g385\n+S'CAC'\n+p506\n+tp507\n+F0.0\n+s(g199\n+S'GGA'\n+p508\n+tp509\n+F0.0\n+s(g176\n+S'TAG'\n+p510\n+tp511\n+F0.0\n+s(g338\n+S'TTG'\n+p512\n+tp513\n+F0.0\n+s(g230\n+S'AAG'\n+p514\n+tp515\n+F0.0\n+s(g262\n+S'TGC'\n+p516\n+tp517\n+F0.0\n+s(g438\n+S'CCG'\n+p518\n+tp519\n+F0.0\n+s(g323\n+S'GAG'\n+p520\n+tp521\n+F0.0\n+s(g196\n+S'ACC'\n+p522\n+tp523\n+F0.0\n+s(g407\n+S'CGG'\n+p524\n+tp525\n+F0.0\n+s(g293\n+S'CGA'\n+p526\n+tp527\n+F0.0\n+s(g368\n+S'GTA'\n+p528\n+tp529\n+F0.0\n+s(g118\n+S'AAG'\n+p530\n+tp531\n+F0.0\n+s(g414\n+S'GAT'\n+p532\n+tp533\n+F0.0\n+s(g236\n+S'TTC'\n+p534\n+tp535\n+F0.0\n+s(g170\n+S'TAG'\n+p536\n+tp537\n+F0.0\n+s(g433\n+S'ATT'\n+p538\n+tp539\n+F0.0\n+s(g356\n+S'GGC'\n+p540\n+tp541\n+F0.0\n+s(g265\n+S'CGC'\n+p542\n+tp543\n+F0.0\n+s(g438\n+S'CTG'\n+p544\n+tp545\n+F0.0\n+s(g343\n+S'TCT'\n+p546\n+tp547\n+F0.0\n+s(g202\n+S'AAA'\n+p548\n+tp549\n+F0.0\n+s(g290\n+S'CAC'\n+p550\n+tp551\n+F0.0\n+s(g414\n+S'GTT'\n+p552\n+tp553\n+F0.0\n+s(g262\n+S'TAC'\n+p554\n+tp555\n+F0.0\n+s(g182\n+S'GCG'\n+p556\n+tp557\n+F0.0\n+s(g205\n+S'GAA'\n+p558\n+tp559\n+F0.0\n+s(g208\n+S'CTG'\n+p560\n+tp561\n+F0.0\n+s(g277\n+S'CAA'\n+p562\n+tp563\n+F0.0\n+ssS'AVG_MUT_RATE'\n+p564\n+F0.0008886914019106865\n+sS'INDEL_FREQ'\n+p565\n+(dp566\n+s.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-Targeted.bam
b
Binary file test-data/chrMT-PE-VCF-BAM-Targeted.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-Targeted.bam.bai
b
Binary file test-data/chrMT-PE-VCF-BAM-Targeted.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-Targeted.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-Targeted.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 4078 . T C . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11743 . C T . PASS WP=1/0
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-Targeted_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-Targeted_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,1840 @@\n+@out-MT-1/1\n+CCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAGTAATTACATCACAA\n++\n+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>\'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C\n+@out-MT-3/1\n+TTCAAATTTTATCTTTTGGCGGTATGCCCTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACA\n++\n+FFEFGGGGFD&;GGGGGFGFGGFBGGF\'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD\n+@out-MT-5/1\n+CACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTAC\n++\n+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3\n+@out-MT-7/1\n+GAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAAGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCT\n++\n+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE\'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB\n+@out-MT-9/1\n+CTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAA\n++\n+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF\n+@out-MT-11/1\n+AGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGA\n++\n+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=\n+@out-MT-13/1\n+GGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGC\n++\n+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9\n+@out-MT-15/1\n+ACCACAAGGCACACCTACACCCCTTATCCCCATACTAGTTATTATCGAAACCATCAACCTACTCATTCAACCAATAGCCCTGGCCGTACGCCTAACCGCTA\n++\n+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?\n+@out-MT-17/1\n+CCCGGGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACACACAGGTCCTAAACTA\n++\n+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C\'5@GF8FGD;FGA@?G/\n+@out-MT-19/1\n+AGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCGTAGGATTCATCTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAACCTCATCACT\n++\n+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F\n+@out-MT-21/1\n+GCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTACTAATCGGTGCCGCCGATATGGC\n++\n+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED\'GGG?:D=?:FF>GFGFEDC0GA\n+@out-MT-23/1\n+ATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATACCATCCACCCTCC\n++\n+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG\n+@out-MT-25/1\n+ACAATTAACAGCCCAATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAACAAAAGTAA\n++\n+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=\n+@out-MT-27/1\n+GATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACAT\n++\n+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF\n+@out-MT-29/1\n+ACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTACCTATTTATATTAACCTAACTACTACC\n++\n+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9\n+@out-MT-31/1\n+CCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAACTACTCCCACCCTGGAGCCTCCG\n++\n+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-\n+@out-MT-33/1\n+CACTAACCATATACCAATGATGGCGCGATGTGACACGAGAAAGCACATACCAAGGCAACCACACACCACCTGTTCAAAAAGGCCTTCGATAAGGGATAATC\n++\n+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC\n+@out-MT-35/1\n+TGCAAAGGTAGCATAATCACTTCTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCTTACTTGTAACCAGTGAAATTGACCTG\n++\n+GFGFDGGGGEGB-GGGDBAAFG$FGGGGD=GFGCGG1EGGGC=GGGCF.GGGGCFFGGGFFF8AGD<EBCDGD'..b"GTAACTATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGCCCTTACCACGCTACTCCTACCT\n++\n+GFG<GBF.GGGFGGGFFGFFG$..GGEGGEF?2/$A?GGG:GGBGBGGCCGGD?6AGCC3G9;4FEGACF5EGEFFGGEGFGFFC5A@CDFGD&EGFGGDE\n+@out-MT-887/1\n+ACTATACCTATTATTCTGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACG\n++\n+GGGFGEGFDGGGGGEG#EFGCGGFGGGGGFGGGBFGFDG5GBEG>3CGGFGFGGGGGGGE?DF%>E-=DD@GGGF98GFFCFGEFDGBF:9C=A?F;GGDF\n+@out-MT-889/1\n+TCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGC\n++\n+E?EGFFDGG<EB<FGGGEFGG?;DF=DGGFGFFFFGGB?GC6?@@BGGGGGGFGGGCGGGD,GF;@EE9CGF@)GGGFEFD+D<GGGE<DE@*21GGGCF6\n+@out-MT-891/1\n+GGATAACAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGATGGTGCAGCCGCTATTAAAGGT\n++\n+GAGGGG8GGGDGGGGG8GFFGCDEGG:@EGG>GFG<GGGFF?CDAFGFGGFGEGFGGCGEFGGGFGCDGGGGFBG,FBFE@DBEGFAFFD@EEEG3+FG6-\n+@out-MT-893/1\n+TCTACGGCTCAACATTTTTTGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCCTCACTATCTGCTTCATCCGCCAACTAATATTT\n++\n+FGGDFDGFG;EDGDACFEFFGGGGGGFGFE@@EG8FFGB>GGBGGG/>GC=3=GBG:%)2&GBEEC/GGGDGG*G<=GFG3FDE9B5FGGGGGD?GECEGE\n+@out-MT-895/1\n+CATCGTACTACACGACACGTACTACGTTGTAGCCCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTC\n++\n+GA=EGFGGGEGGFFGGFGGF=FGGGFGGBGGBFEGGGGBGF&FFGGGG:G><-EGFGGFFBG(:GGFFGDGG@GC<;FGGEGGGGFFEFD<+F>BF,B9C>\n+@out-MT-897/1\n+CTGAGAACCAAAATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTCTATTTCCCCCTCTATT\n++\n+5-3GGFDAGG@FFGGFFF;D:AGG*FFGGGGGG9GGG;EGGG<GGGFEDGFGGGGEGC<<FFFGAG=GGGEGGGGGDF+1EGAGF<FGGDFCG=FG4BAFE\n+@out-MT-899/1\n+TCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAA\n++\n+FGGGG>FGGFEFGFGGGFG=AFGGGFEGGFFGEF=G=GFGGG<BFGCGGFFGGAFGGDFGFFG<7DD=DB@FBFEED<GFGGGEDGG@/01B6FDFFF;@2\n+@out-MT-901/1\n+AAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCTACACAGGCATGCTCATAAGGAAATG\n++\n+GFGGGGADGGFDA>GGGEGFGGDCGDBFGFAGFCGGFGFGGGFFFEFGGFGGGGGGFG;)ACFFGG6FGGFFE;G,DEDG=:BEFGGGF/9FD?DGE3=$1\n+@out-MT-903/1\n+CTACCACTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTT\n++\n+GFG?EB2GFGGGAGGGGGG4G.FGGGEGG$8GFF&G26GGFGGDEBFGG9GDG),<GFG=FBFGDEGGFGGF?FGGGFG/=;F+G,EG?FFGGFG@F0DA9\n+@out-MT-905/1\n+CACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAAAATCTTAGAATACTCCTCAATTACCCACATAGGAT\n++\n+GFFGFGGGFGGGGGGGFFEDFGGGEFFGFGFAGEFGGFFFEGBEFGG-FD9CFG<GEGGDBBFEGFFGGGGGGF7GFG9GGEFEFGGEFCA76G6GGFGF?\n+@out-MT-907/1\n+CCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTA\n++\n+@GCFGGGGFABGFCBGGGGDEDGGGFGFGGAEBG;F;CGG8EDFAEGGGCFGFGGFGFGDG<GGF83GGGE@FFFFGGGGEFFDE9FGG>;93FG'8GFFD\n+@out-MT-909/1\n+CCCCCAACACGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTCCTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAG\n++\n+GCG.DFFG1%FG;AG?GGGGG=G>/8FAECAGA@GGFGFGFDGGGG4GF><F#BFGF>GGDEFFGGDFDGC;=FFBGFEFFFFFEFFD6FFFG;;F>6GF1\n+@out-MT-911/1\n+TAATGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAAC\n++\n+GE=@GC6GG;GFAFFEGG9G@GGDFGGFGGGGG;GDGFGEGGFCG6$GEG:FGEGG8GGF6GBFGEG@FFDDBGDEA>G<FFF?CGGBBDEFGGDEGE>B>\n+@out-MT-913/1\n+CTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAA\n++\n+@GG8EGGFFG:GBEG9FGG(*@GGG9GFG>ECEF9FGGGFEGGD;CGECEFEEGEEDG?8GGFF1GBGFGEGFGE@GFFEDAGECGBGGGGGGFBFFG?3:\n+@out-MT-915/1\n+CGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTT\n++\n+DGEGFGGG<6GD?G?BFBGGGGFGFGG2G8EBGGFG?CGGGFA5FGGGGGGGCGGFG9GGFGGGGGGCFF<FGGGG?BEFG@GDG4A67F>934FEC1;B@\n+@out-MT-917/1\n+ACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAAC\n++\n+DFEGFGFFGDGGGGCFFCGGFGFG7GDDGGEF@G6-1GB'GFEEGGFFGGEG@GGGDGGGGFGGE<EG:C::>8CGCGGGEGFF=FCEDE-FF;EDBGA4@\n+@out-MT-919/1\n+GAGCACACCATATATTTACAGTAGGAATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAGTATTT\n++\n+DFGAFAGGGDGGGGGGFG5G<GGFG?GGGGEDGDDGCFFF9GDF7FG@=EEEGF1FFGFGEGCFEG8GAGEGFGFEEFFE;;;GGF?GG1E:FA:E'?BBG\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-Targeted_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-Targeted_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,1840 @@\n+@out-MT-1/2\n+TAGCTTTACAGTGGGCTCTAGAGGGGGTAGAGGGGGTGCTATAGGGTAAATACGGGCCCTATTTCAAAGATTTTTAGGGGAATTAATTCTGGGACGATGGG\n++\n+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7\n+@out-MT-3/2\n+AGAGCTAATAGAAAGGCTAGGACCAAACCTATTTGTTTATGGGGTGATGTGAGCCCGTCTAAACATTTTCAGTGTATTGCTTTGAGGAGGTAAGCTACATA\n++\n+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF\n+@out-MT-5/2\n+TTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTAATATTATTTACTCTATCAAAGTAACTCTTTTATCAGACATATTTCTTAGGTTTGAGGGGGA\n++\n+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==\n+@out-MT-7/2\n+GAGGTTGCGGTCTGTTAGTAGTATAGTGATGCTAGCAGCTAGGACCGGGAGAGATAGGAGAAGTAGGACTGCTGTGATTAGGACGGATCAGACGAAGAGGG\n++\n+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D\n+@out-MT-9/2\n+GGGGAAGGCGCTTTGTGAAGTAGGCCTTATTTCTCTTGTCCTTTCGTACAGGGAGGAATTTGAATGTAGATAGAAACCGACCTGGATTACTCCGGTCTGAC\n++\n+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*\n+@out-MT-11/2\n+GTAGACTGTTCAACCTGTTCCTGCTCCGGCCTCCACTATAGCAGATGCGAGCAGGAGTAGGAGAGAGGGAGGTAAGAGTCAGAAGCTTATGTTGTTGATGC\n++\n+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B\n+@out-MT-13/2\n+GTTACCTAGAAGGTTGCCTGGCTGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGCCTAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTG\n++\n+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE\n+@out-MT-15/2\n+ATATGATAGGCATGTGATTGGTGGGTCATTATGTGTTGTCGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTAGGCTTGGATTCAGGCGACAGCGATT\n++\n+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2\n+@out-MT-17/2\n+GTCGTAAACCCTATTGTTGATATGGACTCTAGAATAGGATTGCGCTGTTATCCCTAGGGTAACTTGTTCCGTTGGTCAAGTTATTGGATCAATTGAGTATA\n++\n+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG\n+@out-MT-19/2\n+GGATAGGCCGAGAACGTGTTGTGGGAAGAAAGTTAGATTTACGCCGATGAATATGATAGTGAAATGGATTTTGGCGTAGGTTTGGTCTAGGGTGTAGCCTG\n++\n+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF\n+@out-MT-21/2\n+GAAATTGATGGCCCCTAACATAGAGGAGACACCTGCTAGGTGTAAGGAGAAGATGGTTAGCTCTACGGAGGCTCTAGGGTGGGAGTATTTCCCTGCTAAGG\n++\n+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?\n+@out-MT-23/2\n+GGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAGTAGATTAGGCGTAGGTAGAAGTAGAGGTT\n++\n+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE\n+@out-MT-25/2\n+TGCCCGCCTCTTCACGGGCAGGTCAATTTCACTGGTTAAAAGTAAGAGACAGCTGAACCCTCGTGGAGCCATTCATACAGGTCCCTATTTAAGGAACAATT\n++\n+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D\n+@out-MT-27/2\n+CTACTTGCGCTGCATGTGCCATTAAGATATATAGGATTTAGCCTATAATTTAACTTTGACAAAGTTATGAAATGGTTTTTCTAATACCTTTTTGAAAAAGT\n++\n+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@\n+@out-MT-29/2\n+GTCTATGATGGTGGGGATGATGAGGCTATTGTTTTTTGTGAATTCTTCGATAATGGCCCATTTGGGCAAAAAGCCGGTTAGCGGGGGCAGGCCTCCTAGGG\n++\n+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90\n+@out-MT-31/2\n+GTTAGTAGTATAGTGATGCCAGCAGCTAGGACTGGGAGAGATAGGAGAAGTAGGACTGCTGTGATTAGGACGGATCAGACGAAGAGGGGCGTTTGGTATTG\n++\n+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3\n+@out-MT-33/2\n+GTTGTTTTCTATTAGACTATGGTGAGCTCAGGTGATTGATACTCCTGATGCGAGTAATACGGATGTGTTTAGGAGTGGGACTTCTAGGGGATTTAGCGGGG\n++\n+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B\n+@out-MT-35/2\n+TCTGCTCCTAGGTCGCCCCAACCGAAATTTTTAATGCAGGTTTGGTAGTTTAGGACCTGTGGGTTTGTTAGGTACTGTTTGCATTAATAAATTAAAGCTCC\n++\n+GFF;B>3G(GGEGGDG5GGGBFGFGG<EFBGDEGGEEGGGBFGGGEFDAAGGDB@G<GG?D(CGG:F?EGDFG"..b"CTGATTTGCGTTCAGTTGATGCAGAGTGGGGTTTTTCAGTCCTTAGCTGTTACAGAAATTAAGTATTGCAACTTACTGAGTGCT\n++\n+DFEDFFEDFGDEGGDCGGGGGGCE<GGF;EFGGDGDFFEEGGGGGGGBEGGE-GGFGGDCGGGGF6ECGFGGF%GGGGDGGGGE:CDFCDF>FDE(?%EEC\n+@out-MT-887/2\n+CTATAGCAGATGCGAGCAGGAGTAGGAGAGAGGGAGGTAAGAGTCAGAAGCTTATGTTGTTTATGCGGGGAAACGCCATATCGGGGGCACCGATTATTAGG\n++\n+GF?AC;EGGEDGABGG'GGGGFFG?FGGDGEEGFGGG<GFDGGGGGGE?GGFF@CF?E-D<:CGGGGGECGGGD9@?GG;DF1E6E%@=GGGEC@FC38;7\n+@out-MT-889/2\n+GCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTTG\n++\n+E@6CGFGAGGEFGGGGGFGEBGGGGGEGGGDEGGEE<GG4FGGGGGEG3GGFGGGFB2?GGEGF<FFFFBGGFFGG@1FGF6FE7GDGGEGGFDG5EFD?&\n+@out-MT-891/2\n+GCCATCTTAACAAACCCTGTTCTTGGGTGGGTGTGGGTATAATACTAAGTTGAGATGATATCATTTACGGGGGAAGGCGCTTTGTGAAGTAGGCCTTATTT\n++\n+AFG=DGGGFFGFD?GG@FGEEGFFFFDGGG+=GDEGGGGGG8DF5GGGGGDGFE8BGFDDG;GGGC<GGGEGGAF>C>EGGGD<<A.EA>G<3GGGEB?A?\n+@out-MT-893/2\n+CGAAGTTTATTACTCTTTTTTGAATGTTGTCAAAACTAGTTAATTGGAAGTTAACGGTACTATTTATACTAAAAGAGTAAGACCCTCATCAATAGACGGAG\n++\n+GFFGGFGE<GGGGEFGGDF:GGAFGGBEGDG:GFGG=G9FG?EGGGGGFG;>7GGFGGGF4AB?GG@04GG:5GEG;FE>G+FGFG8GGF5DFGFB&E93<\n+@out-MT-895/2\n+AATATTACTGCTGTTAGAGAAATGAATGAGCCTACCGATGATAGGATGTTTCATGTGGTGTATGCATCGGGGTAGTCCGAGTAACGTCGGGGCATTCCCGA\n++\n+F@GGFFGG=FEEFG@FFGGB%GGG>GGGGGGGFGG(EDGGGGA>GGGEGE;D2GGEG>=GEGGFE=AFFGG=0GGGGE+GGGDGFGFGGC?EAGE+>C%6A\n+@out-MT-897/2\n+GGTGGTTGGTGTAAATGAGTGAGGCAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAG\n++\n+BGGGFGFBGFFGEGGFG=FGGFEGGFFGDAG>GFD@G8/GGGFGDFF9GGG=FCCFE%EFGFFFEBG>E>5=FGDFDEGGABG@E@2GGGFDE@BG2;?G<\n+@out-MT-899/2\n+TATTGTGTGTTCAGATATGTTAAAGCCACTTTCGTAGTCTATTTTGTGTCAACTGGAGTTTTTTACAACTCAGGTGAGTTTTAGCTTGATTGGGGAGGGGG\n++\n+FFGDGFCCGGGFGFGFGGGGG68GF39G:FCG7GGGGEG+DECGDFGFGGFGGFDFFGGGF89DG14>?F<;)FGADGGGAGDGGA/$(=>@F@FCFFB/F\n+@out-MT-901/2\n+CTTCACGGGCAGGTCAATTTCACTGGTTAAAAGTAAGAGACAGCTGAACCCTCGTGGAGCCATTCATACAGGTCCCTATTTAATGAACAAGTGATTATGCT\n++\n+FGGGGGEGGFGGGFFGGGGG=?GGGGGGF:GGBEGGBFGFGEEGGGGFGFCGFGG=GFGGFGGGFGFGGAF+GF<DC=?FFG&$E<3=FF@=%EGGG54DF\n+@out-MT-903/2\n+GAGTGTGCGTGCAAAGATGGTAGAGTAGATGACGGGTTGGGCCAGGGGATTAATTAGTACGGGAAGGGTATAACCAACATTTTCGGGGTATCGGCCCGATA\n++\n+FDFBGGGF%%FGFG>GEGGF/+DGG@F:FFCFGGGFEFGCGG=DGFGGEFGGFFDGFGGFGFE=FEG>FGCFEGGGCGGEGF=GBF:FAC?07GGF=?5/-\n+@out-MT-905/2\n+AGGGTGGATGGAATTAAGGGTGTTAGTCATGTTAGCTTGTTTCAGGTGCGAGATAGTAGTAGGGTCGTGGTGCTGGAGTTTAAGTTGAGTAGTAGGAATGC\n++\n+FDCFFGGFGEDFFG>D+F>GGDGGGGGGD>FGCGGFGFFEC*FGGGGFGEGGFFFFGG=A@GGEGGEG;CBGGFE<G.CGFCEAFEG7EEGGGGGG@>>ED\n+@out-MT-907/2\n+TGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAAGCGAACAGATTTTCGTTCATTTTGGTTCTCAGGGTTTGTTATAATTTTTTATTTTTATGGG\n++\n+GGGG9DGG4G@GGAG@8G?:GGGFGGGGGDCFGGFFGGFGG>GGGGGFF<GGEGCBGGGF>6GEGEFGGFGF:GBGGGDF;DGFEEAGBFGFEFGF=-A=)\n+@out-MT-909/2\n+AAGCAGATAGTGAGGAAAGTTGAGCCAATAATGAGGTGAAGTCCGTGGAAGCCTGTGGCTACAAAAAATGTTGAGCAGTAGATGCCGTCGGAAATGGTGAA\n++\n+FGBEFGGGFGGCFG;9GCFGGFGEFFGGGFEADG(GGGF2GFGGGG0<DGGGFFGEGGEGF4GF37FFGGG?C=BG(CB@>FDGFEG7D:E<2CFGFFGED\n+@out-MT-911/2\n+TTAAGGAACAAGTGATTATGCTACCTTTGCACGGTTAGGGTACCGCGGCCGTTAAACAGGTGTCACTGGGCAGGCGGTGCCTCTAATACTGGTGATGCTAG\n++\n+GF5GGEFGGFGEGG<CGFGEGFFGEGEFG09GGGGEGGFGFBE=FFEGFGGFGECDBF-3GGF8?GGGGDE;FBCFB6EEGFEGGG65CGGB5;EGGCA58\n+@out-MT-913/2\n+CTTGGGTAACCTCTGGGACTCAGAAGTGAAAGGGGGCTATTCCTAGTTTTATTGCTATAGCTATTATGATTATTAATGATGAGTATTGATTGGTAGTATTG\n++\n+GGF38EGFFFF/<GFG0GGFGGFGBGCDGGGGGCB;4?G;GGGGDGGG>6FGFFGGGGG:=FGGFFGFE8@BE1FFGDCFGGFG5GGE0;0AEFDEF@GGC\n+@out-MT-915/2\n+TCTTCTATAGGGTGATAGATTGGTCCAATTGGGTGTGAGGAGTTCAGTTATATGTTTGGGATTTTTTAGGTAGTGGGTGTTGAGCTTGAACGCTTTCTTAA\n++\n+FEDEGEGFGFGGGFFEEFFGGGFFGFGGFGGGGFGFGGDG>FGGEG?GGF<;FED0FFGGGAFGGG@DGFG)FGDEGEFGDCFD15<FEFE<FEE8BB1EF\n+@out-MT-917/2\n+GAAGGTATAGGGGTTAGTCCTTGCTATATTATGCTTGGTTATAATTTTTCATCTTTCCCTTGCGGTACTATATCTATTGCGCCAGGTTTCAATTTCTATCG\n++\n+G>GGGGFGGGFGGGGGGEFDGFGFGEGGFGGGD7GEGFGGA>GGB4GGF@C>EFD;E>GFFFG/'9FD8*AFGBFC3=B@EGF;?FAF@>GFEDEE522EF\n+@out-MT-919/2\n+GCAAATACAGCTCCTATTGATAGGACATAGTGGAAGTGGGCTACAACGTAGTACGTGTCGTGTAGTACGATGTCTAGTGATGAGGTTGCTAATACAATGCC\n++\n+GFGGFGG<EGAGGG@GGFEDFGFFD,-FFFCGGGGGDCC=FG>GEGCGFGEGGFFGGFFFG<D@GF=-ECD<GDGE<GA9DGGF3FGFGEG5EF%FDEFD7\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-computeGC.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-computeGC.p Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,72 @@
+(lp1
+(lp2
+I0
+aI1
+aI2
+aI3
+aI4
+aI5
+aI6
+aI7
+aI8
+aI9
+aI10
+aa(lp3
+cnumpy.core.multiarray
+scalar
+p4
+(cnumpy
+dtype
+p5
+(S'f8'
+I0
+I1
+tRp6
+(I3
+S'<'
+NNNI-1
+I-1
+I0
+tbS'\x06NA\\8\xdd\xee?'
+tRp7
+ag4
+(g6
+S'\x96\xae\xe2\xed\x85:\xf0?'
+tRp8
+ag4
+(g6
+S'\xcc\xb0\xbaf\xf3\x1f\xf1?'
+tRp9
+ag4
+(g6
+S'\x02DS)\x055\xef?'
+tRp10
+ag4
+(g6
+S'\x0eX7\xe2L&\xf0?'
+tRp11
+ag4
+(g6
+S'\x85\x00\xb4\xd6?\xca\xef?'
+tRp12
+ag4
+(g6
+S'qs\n"\x02\xd7\xef?'
+tRp13
+ag4
+(g6
+S'\x17\xf1&\xbe*3\xef?'
+tRp14
+ag4
+(g6
+S'\x03\xa6\xd4\x8a\xedT\xf0?'
+tRp15
+ag4
+(g6
+S'\x17\xeb\xc7\x18\x96\x0e\xf0?'
+tRp16
+ag4
+(g6
+S'\x00\x00\x00\x00\x00\x00\x00\x00'
+tRp17
+aa.
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-fraglen.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-fraglen.p Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,4 @@
+(lp1
+(lp2
+a(lp3
+a.
\ No newline at end of file
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-genMutModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-genMutModel.p Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,1401 @@\n+(dp0\n+S'HIGH_MUT_REGIONS'\n+p1\n+(lp2\n+(S'chrMT'\n+p3\n+I2000\n+I16000\n+F0.0010714285714285715\n+tp4\n+asS'COMMON_VARIANTS'\n+p5\n+(lp6\n+(g3\n+I4077\n+S'T'\n+p7\n+S'C'\n+p8\n+F1e-05\n+tp9\n+a(g3\n+I4681\n+S'A'\n+p10\n+S'G'\n+p11\n+F1e-05\n+tp12\n+a(g3\n+I5712\n+S'C'\n+p13\n+S'T'\n+p14\n+F1e-05\n+tp15\n+a(g3\n+I6502\n+S'A'\n+p16\n+S'G'\n+p17\n+F1e-05\n+tp18\n+a(g3\n+I7424\n+S'C'\n+p19\n+S'T'\n+p20\n+F1e-05\n+tp21\n+a(g3\n+I7777\n+S'T'\n+p22\n+S'C'\n+p23\n+F1e-05\n+tp24\n+a(g3\n+I8470\n+S'C'\n+p25\n+S'T'\n+p26\n+F1e-05\n+tp27\n+a(g3\n+I8775\n+S'C'\n+p28\n+S'T'\n+p29\n+F1e-05\n+tp30\n+a(g3\n+I10749\n+S'A'\n+p31\n+S'G'\n+p32\n+F1e-05\n+tp33\n+a(g3\n+I11272\n+S'G'\n+p34\n+S'A'\n+p35\n+F1e-05\n+tp36\n+a(g3\n+I11742\n+S'C'\n+p37\n+S'T'\n+p38\n+F1e-05\n+tp39\n+a(g3\n+I13131\n+S'-'\n+p40\n+S'T'\n+p41\n+F1e-05\n+tp42\n+a(g3\n+I13272\n+S'A'\n+p43\n+S'G'\n+p44\n+F1e-05\n+tp45\n+a(g3\n+I13803\n+S'G'\n+p46\n+S'A'\n+p47\n+F1e-05\n+tp48\n+a(g3\n+I14066\n+S'C'\n+p49\n+S'T'\n+p50\n+F1e-05\n+tp51\n+asS'SNP_TRANS_FREQ'\n+p52\n+(dp53\n+(S'A'\n+p54\n+S'G'\n+p55\n+tp56\n+F1.0\n+s(S'C'\n+p57\n+g41\n+tp58\n+F1.0\n+s(g41\n+g57\n+tp59\n+F1.0\n+s(g55\n+g54\n+tp60\n+F1.0\n+ssS'SNP_FREQ'\n+p61\n+F0.9333333333333333\n+sS'TRINUC_MUT_PROB'\n+p62\n+(dp63\n+S'ACC'\n+p64\n+F0.003883495145631068\n+sS'ATG'\n+p65\n+F0.0\n+sS'AAG'\n+p66\n+F0.0\n+sS'AAA'\n+p67\n+F0.0\n+sS'ATC'\n+p68\n+F0.0\n+sS'AAC'\n+p69\n+F0.00202020202020202\n+sS'ATA'\n+p70\n+F0.0027247956403269754\n+sS'AGG'\n+p71\n+F0.005747126436781609\n+sS'CCT'\n+p72\n+F0.0036900369003690036\n+sS'CTC'\n+p73\n+F0.0\n+sS'AGC'\n+p74\n+F0.0035460992907801418\n+sS'ACA'\n+p75\n+F0.0\n+sS'AGA'\n+p76\n+F0.0\n+sS'CAT'\n+p77\n+F0.002403846153846154\n+sS'AAT'\n+p78\n+F0.0\n+sS'ATT'\n+p79\n+F0.0\n+sS'CTG'\n+p80\n+F0.005555555555555556\n+sS'CTA'\n+p81\n+F0.0\n+sS'ACT'\n+p82\n+F0.0\n+sS'CAC'\n+p83\n+F0.0\n+sS'ACG'\n+p84\n+F0.0\n+sS'CAA'\n+p85\n+F0.0\n+sS'AGT'\n+p86\n+F0.0\n+sS'CCA'\n+p87\n+F0.0\n+sS'CCG'\n+p88\n+F0.0\n+sS'CCC'\n+p89\n+F0.0\n+sS'TAT'\n+p90\n+F0.0\n+sS'GGT'\n+p91\n+F0.0\n+sS'TGT'\n+p92\n+F0.0\n+sS'CGA'\n+p93\n+F0.0\n+sS'CAG'\n+p94\n+F0.005025125628140704\n+sS'CGC'\n+p95\n+F0.0\n+sS'GAT'\n+p96\n+F0.0\n+sS'CGG'\n+p97\n+F0.0\n+sS'CTT'\n+p98\n+F0.0\n+sS'TGC'\n+p99\n+F0.0\n+sS'GGG'\n+p100\n+F0.0\n+sS'TAG'\n+p101\n+F0.0\n+sS'GGA'\n+p102\n+F0.0\n+sS'TAA'\n+p103\n+F0.0024154589371980675\n+sS'GGC'\n+p104\n+F0.0\n+sS'TAC'\n+p105\n+F0.0\n+sS'GAG'\n+p106\n+F0.0\n+sS'TCG'\n+p107\n+F0.0\n+sS'TTA'\n+p108\n+F0.0\n+sS'TTT'\n+p109\n+F0.0\n+sS'GAC'\n+p110\n+F0.0\n+sS'CGT'\n+p111\n+F0.0\n+sS'GAA'\n+p112\n+F0.0\n+sS'TCA'\n+p113\n+F0.0024096385542168677\n+sS'GCA'\n+p114\n+F0.0\n+sS'GTA'\n+p115\n+F0.0\n+sS'GCC'\n+p116\n+F0.0036900369003690036\n+sS'GTC'\n+p117\n+F0.0\n+sS'GCG'\n+p118\n+F0.0\n+sS'GTG'\n+p119\n+F0.0\n+sS'TTC'\n+p120\n+F0.0\n+sS'GTT'\n+p121\n+F0.0\n+sS'GCT'\n+p122\n+F0.0\n+sS'TGA'\n+p123\n+F0.0\n+sS'TTG'\n+p124\n+F0.0\n+sS'TCC'\n+p125\n+F0.0\n+sS'TGG'\n+p126\n+F0.0\n+sS'TCT'\n+p127\n+F0.0\n+ssS'TRINUC_TRANS_PROBS'\n+p128\n+(dp129\n+(S'TTA'\n+p130\n+S'TAA'\n+p131\n+tp132\n+F0.0\n+s(S'TGA'\n+p133\n+S'TCA'\n+p134\n+tp135\n+F0.0\n+s(S'GAA'\n+p136\n+S'GCA'\n+p137\n+tp138\n+F0.0\n+s(S'ACG'\n+p139\n+S'AGG'\n+p140\n+tp141\n+F0.0\n+s(g133\n+S'TTA'\n+p142\n+tp143\n+F0.0\n+s(S'AAG'\n+p144\n+S'ATG'\n+p145\n+tp146\n+F0.0\n+s(S'TCA'\n+p147\n+S'TGA'\n+p148\n+tp149\n+F0.0\n+s(S'GAC'\n+p150\n+S'GCC'\n+p151\n+tp152\n+F0.0\n+s(S'CTT'\n+p153\n+S'CGT'\n+p154\n+tp155\n+F0.0\n+s(S'GGC'\n+p156\n+S'GTC'\n+p157\n+tp158\n+F0.0\n+s(g136\n+S'GTA'\n+p159\n+tp160\n+F0.0\n+s(S'AAT'\n+p161\n+S'AGT'\n+p162\n+tp163\n+F0.0\n+s(S'GCG'\n+p164\n+S'GTG'\n+p165\n+tp166\n+F0.0\n+s(S'TAA'\n+p167\n+S'TCA'\n+p168\n+tp169\n+F0.0\n+s(S'CCT'\n+p170\n+S'CAT'\n+p171\n+tp172\n+F0.0\n+s(S'GAG'\n+p173\n+S'GCG'\n+p174\n+tp175\n+F0.0\n+s(S'ATT'\n+p176\n+S'AGT'\n+p177\n+tp178\n+F0.0\n+s(S'CGT'\n+p179\n+S'CTT'\n+p180\n+tp181\n+F0.0\n+s(S'CAT'\n+p182\n+S'CGT'\n+p183\n+tp184\n+F1.0\n+s(g164\n+S'GAG'\n+p185\n+tp186\n+F0.0\n+s(g136\n+S'GGA'\n+p187\n+tp188\n+F0.0\n+s(g156\n+S'GAC'\n+p189\n+tp190\n+F0.0\n+s(S'TTG'\n+p191\n+S'TCG'\n+p192\n+tp193\n+F0.0\n+s(S'ATA'\n+p194\n+S'AGA'\n+p195\n+tp196\n+F0.0\n+s(S'TGG'\n+p197\n+S'TCG'\n+p198\n+tp199\n+F0.0\n+s(S'GAT'\n+p200\n+S'GCT'\n+p201\n+tp202\n+F0.0\n+s(S'GGG'\n+p203\n+S'GAG'\n+p204\n+tp205\n+F0.0\n+s(S'AAA'\n+p206\n+S'AGA'\n+p207\n+tp208\n+F0.0\n+s(S'TCA'\n+p209\n+S'TTA'\n+p210\n+tp211\n+F1.0\n+s(S'ACC'\n+p212\n+S'AGC'\n+p213\n+tp214\n+F0.0\n+s(g164\n+S'GGG'\n+p215\n+tp216\n+F0.0\n+s(S'AAC'\n+p217\n+S'AGC'\n+p218\n+tp219\n+F1.0\n+s(S'GTA'\n+p"..b"59\n+tp360\n+F0.0\n+s(S'TAG'\n+p361\n+S'TCG'\n+p362\n+tp363\n+F0.0\n+s(g200\n+S'GTT'\n+p364\n+tp365\n+F0.0\n+s(S'TGT'\n+p366\n+S'TTT'\n+p367\n+tp368\n+F0.0\n+s(g223\n+S'AAA'\n+p369\n+tp370\n+F0.0\n+s(g249\n+S'ATT'\n+p371\n+tp372\n+F0.0\n+s(g325\n+S'TGC'\n+p373\n+tp374\n+F0.0\n+s(g206\n+S'ATA'\n+p375\n+tp376\n+F0.0\n+s(g156\n+S'GCC'\n+p377\n+tp378\n+F0.0\n+s(S'GCC'\n+p379\n+S'GAC'\n+p380\n+tp381\n+F0.0\n+s(g294\n+S'AAC'\n+p382\n+tp383\n+F0.0\n+s(S'CAT'\n+p384\n+S'CTT'\n+p385\n+tp386\n+F0.0\n+s(g315\n+S'CCA'\n+p387\n+tp388\n+F0.0\n+s(S'TCT'\n+p389\n+S'TGT'\n+p390\n+tp391\n+F0.0\n+s(S'GGA'\n+p392\n+S'GCA'\n+p393\n+tp394\n+F0.0\n+s(S'TCG'\n+p395\n+S'TGG'\n+p396\n+tp397\n+F0.0\n+s(g220\n+S'GAA'\n+p398\n+tp399\n+F0.0\n+s(g276\n+S'CTG'\n+p400\n+tp401\n+F0.0\n+s(g358\n+S'AAG'\n+p402\n+tp403\n+F0.0\n+s(S'CCT'\n+p404\n+S'CTT'\n+p405\n+tp406\n+F1.0\n+s(g226\n+S'GGA'\n+p407\n+tp408\n+F0.0\n+s(S'CCC'\n+p409\n+S'CTC'\n+p410\n+tp411\n+F0.0\n+s(g392\n+S'GAA'\n+p412\n+tp413\n+F0.0\n+s(g150\n+S'GTC'\n+p414\n+tp415\n+F0.0\n+s(g130\n+S'TCA'\n+p416\n+tp417\n+F0.0\n+s(g353\n+S'TGT'\n+p418\n+tp419\n+F0.0\n+s(g409\n+S'CGC'\n+p420\n+tp421\n+F0.0\n+s(g294\n+S'ACC'\n+p422\n+tp423\n+F0.0\n+s(S'GCT'\n+p424\n+S'GGT'\n+p425\n+tp426\n+F0.0\n+s(g361\n+S'TGG'\n+p427\n+tp428\n+F0.0\n+s(g266\n+S'CTA'\n+p429\n+tp430\n+F0.0\n+s(S'CTG'\n+p431\n+S'CAG'\n+p432\n+tp433\n+F0.0\n+s(g255\n+S'CTC'\n+p434\n+tp435\n+F0.0\n+s(g173\n+S'GTG'\n+p436\n+tp437\n+F0.0\n+s(S'GGT'\n+p438\n+S'GCT'\n+p439\n+tp440\n+F0.0\n+s(g244\n+S'TCC'\n+p441\n+tp442\n+F0.0\n+s(g358\n+S'ACG'\n+p443\n+tp444\n+F0.0\n+s(g229\n+S'CAG'\n+p445\n+tp446\n+F0.0\n+s(g203\n+S'GTG'\n+p447\n+tp448\n+F0.0\n+s(S'GCC'\n+p449\n+S'GTC'\n+p450\n+tp451\n+F1.0\n+s(g232\n+S'TCT'\n+p452\n+tp453\n+F0.0\n+s(g389\n+S'TAT'\n+p454\n+tp455\n+F0.0\n+s(g424\n+S'GTT'\n+p456\n+tp457\n+F0.0\n+s(g238\n+S'ACA'\n+p458\n+tp459\n+F0.0\n+s(g287\n+S'CAC'\n+p460\n+tp461\n+F0.0\n+s(S'CGG'\n+p462\n+S'CAG'\n+p463\n+tp464\n+F0.0\n+s(g299\n+S'CTA'\n+p465\n+tp466\n+F0.0\n+s(g346\n+S'GGG'\n+p467\n+tp468\n+F0.0\n+s(g194\n+S'AAA'\n+p469\n+tp470\n+F0.0\n+s(g244\n+S'TTC'\n+p471\n+tp472\n+F0.0\n+s(g384\n+S'CCT'\n+p473\n+tp474\n+F0.0\n+s(g312\n+S'CTC'\n+p475\n+tp476\n+F0.0\n+s(g179\n+S'CAT'\n+p477\n+tp478\n+F0.0\n+s(g133\n+S'TAA'\n+p479\n+tp480\n+F0.0\n+s(g395\n+S'TAG'\n+p481\n+tp482\n+F0.0\n+s(g309\n+S'GGC'\n+p483\n+tp484\n+F0.0\n+s(g167\n+S'TTA'\n+p485\n+tp486\n+F0.0\n+s(g144\n+S'AGG'\n+p487\n+tp488\n+F0.0\n+s(g197\n+S'TTG'\n+p489\n+tp490\n+F0.0\n+s(g366\n+S'TAT'\n+p491\n+tp492\n+F0.0\n+s(g353\n+S'TCT'\n+p493\n+tp494\n+F0.0\n+s(g389\n+S'TTT'\n+p495\n+tp496\n+F0.0\n+s(g424\n+S'GAT'\n+p497\n+tp498\n+F0.0\n+s(g241\n+S'CAA'\n+p499\n+tp500\n+F0.0\n+s(g255\n+S'CGC'\n+p501\n+tp502\n+F0.0\n+s(g395\n+S'TTG'\n+p503\n+tp504\n+F0.0\n+s(g176\n+S'ACT'\n+p505\n+tp506\n+F0.0\n+s(g153\n+S'CAT'\n+p507\n+tp508\n+F0.0\n+s(g249\n+S'AAT'\n+p509\n+tp510\n+F0.0\n+s(g179\n+S'CCT'\n+p511\n+tp512\n+F0.0\n+s(S'AGC'\n+p513\n+S'AAC'\n+p514\n+tp515\n+F1.0\n+s(g191\n+S'TGG'\n+p516\n+tp517\n+F0.0\n+s(g170\n+S'CGT'\n+p518\n+tp519\n+F0.0\n+s(g309\n+S'GAC'\n+p520\n+tp521\n+F0.0\n+s(S'CAG'\n+p522\n+S'CGG'\n+p523\n+tp524\n+F1.0\n+s(g325\n+S'TTC'\n+p525\n+tp526\n+F0.0\n+s(g161\n+S'ACT'\n+p527\n+tp528\n+F0.0\n+s(g130\n+S'TGA'\n+p529\n+tp530\n+F0.0\n+s(g409\n+S'CAC'\n+p531\n+tp532\n+F0.0\n+s(g220\n+S'GGA'\n+p533\n+tp534\n+F0.0\n+s(g197\n+S'TAG'\n+p535\n+tp536\n+F0.0\n+s(g361\n+S'TTG'\n+p537\n+tp538\n+F0.0\n+s(S'AGG'\n+p539\n+S'AAG'\n+p540\n+tp541\n+F1.0\n+s(g284\n+S'TGC'\n+p542\n+tp543\n+F0.0\n+s(g462\n+S'CCG'\n+p544\n+tp545\n+F0.0\n+s(g346\n+S'GAG'\n+p546\n+tp547\n+F0.0\n+s(g339\n+S'ACC'\n+p548\n+tp549\n+F0.0\n+s(g431\n+S'CGG'\n+p550\n+tp551\n+F0.0\n+s(g315\n+S'CGA'\n+p552\n+tp553\n+F0.0\n+s(g392\n+S'GTA'\n+p554\n+tp555\n+F0.0\n+s(g139\n+S'AAG'\n+p556\n+tp557\n+F0.0\n+s(g438\n+S'GAT'\n+p558\n+tp559\n+F0.0\n+s(g258\n+S'TTC'\n+p560\n+tp561\n+F0.0\n+s(g191\n+S'TAG'\n+p562\n+tp563\n+F0.0\n+s(g161\n+S'ATT'\n+p564\n+tp565\n+F0.0\n+s(g379\n+S'GGC'\n+p566\n+tp567\n+F0.0\n+s(g287\n+S'CGC'\n+p568\n+tp569\n+F0.0\n+s(g462\n+S'CTG'\n+p570\n+tp571\n+F0.0\n+s(g366\n+S'TCT'\n+p572\n+tp573\n+F0.0\n+s(g144\n+S'ACG'\n+p574\n+tp575\n+F0.0\n+s(g312\n+S'CAC'\n+p576\n+tp577\n+F0.0\n+s(g438\n+S'GTT'\n+p578\n+tp579\n+F0.0\n+s(g284\n+S'TAC'\n+p580\n+tp581\n+F0.0\n+s(g203\n+S'GCG'\n+p582\n+tp583\n+F0.0\n+s(g226\n+S'GAA'\n+p584\n+tp585\n+F0.0\n+s(g229\n+S'CTG'\n+p586\n+tp587\n+F0.0\n+s(g299\n+S'CAA'\n+p588\n+tp589\n+F0.0\n+ssS'AVG_MUT_RATE'\n+p590\n+F0.0009053597295992274\n+sS'INDEL_FREQ'\n+p591\n+(dp592\n+I1\n+F1.0000000000000002\n+ss.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-gz.bam
b
Binary file test-data/chrMT-PE-VCF-BAM-gz.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-gz.bam.bai
b
Binary file test-data/chrMT-PE-VCF-BAM-gz.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-gz.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-gz.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 4078 . T C . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11743 . C T . PASS WP=1/0
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-gz_read1.fq.gz
b
Binary file test-data/chrMT-PE-VCF-BAM-gz_read1.fq.gz has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-gz_read2.fq.gz
b
Binary file test-data/chrMT-PE-VCF-BAM-gz_read2.fq.gz has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-panic.bam
b
Binary file test-data/chrMT-PE-VCF-BAM-panic.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-panic.bam.bai
b
Binary file test-data/chrMT-PE-VCF-BAM-panic.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-panic.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-panic.vcf Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,581 @@\n+##fileformat=VCFv4.1\n+##reference=chrMT.fa\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">\n+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">\n+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">\n+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">\n+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">\n+##ALT=<ID=DEL,Description="Deletion">\n+##ALT=<ID=DUP,Description="Duplication">\n+##ALT=<ID=INS,Description="Insertion of novel sequence">\n+##ALT=<ID=INV,Description="Inversion">\n+##ALT=<ID=CNV,Description="Copy number variable region">\n+##ALT=<ID=TRANS,Description="Translocation">\n+##ALT=<ID=INV-TRANS,Description="Inverted translocation">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+MT\t550\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t553\t.\tC\tCGAGGC\t.\tPASS\tWP=0/0/1\n+MT\t622\t.\tG\tT\t.\tPASS\tWP=0/1/0\n+MT\t626\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t627\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t669\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t677\t.\tCTCT\tC\t.\tPASS\tWP=0/0/1\n+MT\t689\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t691\t.\tA\tT\t.\tPASS\tWP=0/0/1\n+MT\t702\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t798\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t821\t.\tT\tG\t.\tPASS\tWP=1/0/0\n+MT\t858\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t866\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t915\t.\tC\tG\t.\tPASS\tWP=0/0/1\n+MT\t980\t.\tT\tA\t.\tPASS\tWP=1/0/0\n+MT\t1000\t.\tT\tA\t.\tPASS\tWP=1/0/0\n+MT\t1008\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t1041\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t1072\t.\tG\tC\t.\tPASS\tWP=1/0/0\n+MT\t1082\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t1099\t.\tC\tA\t.\tPASS\tWP=0/0/1\n+MT\t1116\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t1124\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t1136\t.\tC\tG\t.\tPASS\tWP=0/1/0\n+MT\t1210\t.\tT\tTC\t.\tPASS\tWP=0/0/1\n+MT\t1224\t.\tC\tG\t.\tPASS\tWP=0/0/1\n+MT\t1250\t.\tC\tCTTCTC\t.\tPASS\tWP=1/0/0\n+MT\t1292\t.\tA\tT\t.\tPASS\tWP=1/0/0\n+MT\t1314\t.\tC\tG\t.\tPASS\tWP=0/0/1\n+MT\t1330\t.\tC\tCG\t.\tPASS\tWP=0/0/1\n+MT\t1330\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t1334\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t1361\t.\tG\tT\t.\tPASS\tWP=0/1/0\n+MT\t1378\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t1400\t.\tT\tG\t.\tPASS\tWP=1/0/0\n+MT\t1404\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t1423\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t1432\t.\tT\tTCATTGTTT\t.\tPASS\tWP=0/0/1\n+MT\t1453\t.\tA\tC\t.\tPASS\tWP=0/1/0\n+MT\t1456\t.\tT\tA\t.\tPASS\tWP=0/1/0\n+MT\t1516\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t1528\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t1544\t.\tA\tACGGGA\t.\tPASS\tWP=0/0/1\n+MT\t1633\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t1635\t.\tC\tG\t.\tPASS\tWP=0/1/0\n+MT\t1683\t.\tC\tCA\t.\tPASS\tWP=0/0/1\n+MT\t1725\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t1776\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t1792\t.\tG\tGT\t.\tPASS\tWP=0/0/1\n+MT\t1797\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t1802\t.\tA\tC\t.\tPASS\tWP=1/0/0\n+MT\t1878\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t1879\t.\tG\tC\t.\tPASS\tWP=1/0/0\n+MT\t1909\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t1937\t.\tA\tAC\t.\tPASS\tWP=0/0/1\n+MT\t1982\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t1984\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t1991\t.\tA\tAATGCT\t.\tPASS\tWP=0/0/1\n+MT\t2027\t.\tA\tT\t.\tPASS\tWP=1/0/0\n+MT\t2034\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t2075\t.\tT\tC\t.\tPASS\tWP=1/0/1\n+MT\t2120\t.\tGGAC\tG\t.\tPASS\tWP=0/1/0\n+MT\t2135\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t2157\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t2177\t.\tT\tG\t.\tPASS\tWP=0/0/1\n+MT\t2225\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t2229\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t2312\t.\tAACAT\tA\t.\tPASS\tWP=1/0/0\n+MT\t2361\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t2363\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t2384\t.\tA\tG\t.\tPASS\tWP=1/1/1\n+MT\t2402\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t2494\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t2497\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t2510\t.\tT\tG\t.\tPASS\tWP=0/1/0\n+MT\t2528\t.\tG\tGATAAACT\t.\tPASS\tWP=1/0/0\n+MT\t2530\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t2554\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t2564\t.\tA\tT\t.\tPASS\tWP=1/0/0\n+MT\t2626\t.\tT\tTT\t.\tPASS\tWP=0/0/1\n+MT\t2636\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t2646\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t2663\t.\tC\tG\t.\tPASS\tWP=1/0/0\n+MT\t2673\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t2717\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t2737\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t2739\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t2756\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t2846\t.\tG\tT\t.\tPASS\tWP=0/1/0\n+MT\t2851\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t2889\t.\tC\tA\t.\tPASS\tWP=1/0/0\n+MT\t2938\t.\tA\tAG\t.\tPASS\tWP=1/0/0\n+MT\t2947\t.\tTC\tT\t.\tPASS\tWP=1/0/0\n+MT\t2979\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t3010\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t3011\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t3043\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t3066\t.\tC'..b'PASS\tWP=1/0/0\n+MT\t13103\t.\tG\tGA\t.\tPASS\tWP=0/1/0\n+MT\t13232\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t13263\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t13265\t.\tT\tG\t.\tPASS\tWP=0/1/0\n+MT\t13289\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t13298\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t13310\t.\tCATT\tC\t.\tPASS\tWP=0/0/1\n+MT\t13323\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t13344\t.\tAGCCA\tA\t.\tPASS\tWP=1/0/0\n+MT\t13369\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t13380\t.\tC\tCGTC\t.\tPASS\tWP=0/1/0\n+MT\t13384\t.\tC\tA\t.\tPASS\tWP=0/0/1\n+MT\t13386\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t13411\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t13462\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t13488\t.\tT\tA\t.\tPASS\tWP=1/0/0\n+MT\t13493\t.\tT\tG\t.\tPASS\tWP=0/1/0\n+MT\t13501\t.\tT\tTCGTCT\t.\tPASS\tWP=0/0/1\n+MT\t13617\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t13619\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t13639\t.\tCCT\tC\t.\tPASS\tWP=0/0/1\n+MT\t13672\t.\tAA\tA\t.\tPASS\tWP=0/0/1\n+MT\t13758\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t13763\t.\tC\tA\t.\tPASS\tWP=0/0/1\n+MT\t13787\t.\tT\tG\t.\tPASS\tWP=0/0/1\n+MT\t13808\t.\tT\tA\t.\tPASS\tWP=0/0/1\n+MT\t13894\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t13941\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t13943\t.\tC\tA\t.\tPASS\tWP=0/1/0\n+MT\t14005\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t14008\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t14060\t.\tT\tG\t.\tPASS\tWP=1/0/0\n+MT\t14067\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t14114\t.\tT\tTA\t.\tPASS\tWP=1/0/0\n+MT\t14137\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t14175\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t14187\t.\tC\tA\t.\tPASS\tWP=0/1/0\n+MT\t14242\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t14326\t.\tT\tC\t.\tPASS\tWP=1/1/1\n+MT\t14356\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t14373\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t14383\t.\tC\tCTTCGTCC\t.\tPASS\tWP=1/0/0\n+MT\t14405\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t14410\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t14460\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t14492\t.\tC\tG\t.\tPASS\tWP=1/0/0\n+MT\t14520\t.\tC\tA\t.\tPASS\tWP=1/0/0\n+MT\t14522\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t14595\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t14612\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t14653\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t14665\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t14673\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t14699\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t14703\t.\tA\tT\t.\tPASS\tWP=1/0/0\n+MT\t14727\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t14740\t.\tA\tG\t.\tPASS\tWP=0/0/1\n+MT\t14807\t.\tCT\tC\t.\tPASS\tWP=1/0/0\n+MT\t14809\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t14831\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t14851\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t14895\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t14900\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t14941\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t15009\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t15050\t.\tC\tCAAGGCATAT\t.\tPASS\tWP=0/1/0\n+MT\t15066\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t15073\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t15081\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t15110\t.\tGCAACT\tG\t.\tPASS\tWP=0/0/1\n+MT\t15123\t.\tC\tG\t.\tPASS\tWP=0/0/1\n+MT\t15123\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t15125\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t15190\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t15231\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t15234\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t15270\t.\tT\tA\t.\tPASS\tWP=0/0/1\n+MT\t15280\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t15293\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t15307\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t15311\t.\tA\tAT\t.\tPASS\tWP=0/1/0\n+MT\t15317\t.\tG\tA\t.\tPASS\tWP=0/1/0\n+MT\t15324\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t15375\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t15427\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t15477\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t15527\t.\tC\tG\t.\tPASS\tWP=1/0/0\n+MT\t15565\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t15569\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t15596\t.\tG\tGAGTCC\t.\tPASS\tWP=1/0/0\n+MT\t15608\t.\tC\tG\t.\tPASS\tWP=1/0/0\n+MT\t15611\t.\tG\tC\t.\tPASS\tWP=0/1/0\n+MT\t15683\t.\tCA\tC\t.\tPASS\tWP=0/1/0\n+MT\t15748\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t15762\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t15807\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t15822\t.\tT\tG\t.\tPASS\tWP=0/0/1\n+MT\t15833\t.\tC\tA\t.\tPASS\tWP=0/0/1\n+MT\t15847\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t15957\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t15968\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t15997\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t15999\t.\tA\tC\t.\tPASS\tWP=0/0/1\n+MT\t16062\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t16067\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t16088\t.\tT\tC\t.\tPASS\tWP=0/0/1\n+MT\t16116\t.\tA\tG\t.\tPASS\tWP=1/0/0\n+MT\t16132\t.\tA\tG\t.\tPASS\tWP=0/1/0\n+MT\t16201\t.\tC\tA\t.\tPASS\tWP=1/0/0\n+MT\t16203\t.\tA\tT\t.\tPASS\tWP=0/1/0\n+MT\t16217\t.\tT\tC\t.\tPASS\tWP=1/0/0\n+MT\t16228\t.\tC\tT\t.\tPASS\tWP=1/0/0\n+MT\t16266\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t16274\t.\tG\tA\t.\tPASS\tWP=0/0/1\n+MT\t16277\t.\tA\tC\t.\tPASS\tWP=0/0/1\n+MT\t16295\t.\tC\tT\t.\tPASS\tWP=0/0/1\n+MT\t16301\t.\tC\tT\t.\tPASS\tWP=0/1/0\n+MT\t16308\t.\tT\tC\t.\tPASS\tWP=1/1/1\n+MT\t16345\t.\tA\tT\t.\tPASS\tWP=1/0/0\n+MT\t16390\t.\tG\tA\t.\tPASS\tWP=1/0/0\n+MT\t16409\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t16413\t.\tT\tC\t.\tPASS\tWP=0/1/0\n+MT\t16555\t.\tT\tC\t.\tPASS\tWP=0/0/1\n'
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-panic_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-panic_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,4416 @@\n+@out-MT-1/1\n+ACCAGAGTGTAGCTTAACACAAGGCACCCAACTCAGACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACTCACTCCACATTACTACCAGACAACCTTAGCCAAATCATTTACCCAAATCAAATATAGGCG\n++\n+G6FGGFFGGGGGGFG6GDFEGA;FGFEGFGG@FCFGFFGFGGGFEEFFFGBFDGGEGGG?EFGGFGFGGDGGGGFFGF?G<@4GGCGE9G/GAG@GGGG#EGGG=BDGGFGGGGFGGGGG1DAGG7FGCE:FDEG<GGC5GF-@CB>FDGG\n+@out-MT-3/1\n+CTAGCATCACACACCGTAAACTCCCCTATCTAGGCCTTCTTACGCGCCAAAACCTGCCCCTACTCCTCCTAGACCTAACCCGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACTTAATCTCCACCTCCATCATCACCTCAACCCA\n++\n+GGGGEFD@GGG?GGGGGGG6*GGBGGAGGGFGGGGFGGGFGGGGGDGFGGEGGGCFGGGFGFGCFGBGFGGFGGFGFGGBFGGGGEGFGEC?EGFGGG:=GGDG:BGGGBGGDFGG@GA64FBGEBF<G@GEGC8=FG<>?@DF=7GEGGG\n+@out-MT-5/1\n+CGCTCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTAATATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCTCTCACGTTATTGACTCCTAGCCGCAGACC\n++\n+GG11GGGDGGEGEFG@;FBGGGDGGG@BGCFGGGGGF:GDGF3EGFGFDFGGGGF@GGEGC1BGAGG=GGGGG=GDGDF66:EGGDGGF>8FFGGCGGFGGGGGGGGGCGGGGFA9DGGG@&$FGGF<GEFC:GGGEF;@FE@?EBFCB8C\n+@out-MT-7/1\n+TCCTAACTCCTACCGCATTCCTACTGATCAACTTAAACTCCAGCACCACGTCCCTACTACTATCTCGCACCTGAAACAAGCTAAAATGACTAACAACCTTAATTCTATCCACCCTCCTCTCCCTATGAGGCCTGCCCCCGCTACCCGGCTA\n++\n+EGGCGGG?5GE3GFEGAGGGFGGGBDDCGE>GFGGGGF@CGGDGGGDGGG-FGGGGGGBDFGCGFF8GGGF?BBDG8C<GGFGG%?E:GEGGGGA\'GFEGGGFGG%FA9AGGCGFGGGGG@FGGA+FDEDC=F9=G?A@GFFE/>GFGF9E\n+@out-MT-9/1\n+AGCTAAACCTAGCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGCAACCTGTCGCAATAGATATAGGGCCGCAAGGGGAAGATGACAAATTATAAGCAAGCATA\n++\n+G<CGGGGEGGGGGG=FGEGGFGGFADGFFF3EGBEGEGBFDFEGGGGFGFFFFGB8FGG?GFEFGDEGGGGGFGCGGGGGGGGFGGGFDFG*CGEDGF(GG:8DGDBG=DFFB&$FE=DGGGG3.FFFEFF4=FECB8FEBA4:GAFFAGE\n+@out-MT-11/1\n+ATTCAAATTCCTCCCTGTACGAAACCACAAGAGAAATAAGGCATACTTCACAAATCGCCTGACCCCGTAAATGATATCATCTCAACTTAGTATTGTACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGGCAGCGCCCGGTAATCGCA\n++\n+GGFGGDGGGFGEGGDFFAGDGG<CA1?GGFGD=FDGGGAFGE0FGGGG9GGEEE6GGGAE%%FFGFDG9G<BDGCGEEGGGFGGG?DGBCFGGGGGG<8BFGGAGGEAGEB:G5GDEFFFGDFG1GGGGGF@DFGF-:CGGDFE?E?;>C;\n+@out-MT-13/1\n+AAAACAATTTCACAGCACCATATCTCCACCTCCAGCATCACTTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTACCCACTAGTCCTAACCCTACTCTAAAGAACATAACCTCTTCCCTAGAGCCATCTCAATTAC\n++\n+DGGFGFGAGGGBGGGGGGBF\'BFG?FGCGF:FFFGGFFFGBGGFF?FG>GG;GA@EGGEDDGFC=BFEF:E<ADG?BGFAF5:GGGGCDGG9FGFF,/GGD>GFGGGGGFFCG.GF1-=GFCFC<:4\'A?GE=5(2FGF\'6FGCGGEFB;F\n+@out-MT-15/1\n+CTCTAGTATATCCAAAGACAACCATCCTTCCCGCTAAATAAATTAAAAAGACTATTAAACACATATAACCTCCCGCAGAATTCAGAATAATAACACACCCGACCTCACCGCTAACAATCAATACTAAACTCACATAAATAGGAGAAGGGTG\n++\n+G?GFGGEGGGFBFGGG;GGGFGGFGG*GGGAE<GGGG>DFGGEAGFGGF5FG?GGF6GGE<DGGGGG?GGFEGD?FF.8?GGF8FE>DGGAGGEGBFCFGEG<E:@FBEFF>E@ECBFFGFFACGEE??%F(=B6>BFEGEGGB;FGF/=-\n+@out-MT-17/1\n+AGAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTGGTCCAAGGACAAATCAGCGAAAAAGTCTTTAAGTCCACCACTAGCACCCAAAGCTAAGATTCTAATTTAAACTAT\n++\n+G"GGCAFFFGFGCGEBGGGFGGGGD@GGGFGGG=C?GGGCFFFFGG@GGAGGFGGGGFGFGFGGGDBGGEFGGEB\'.9GFGFFBFGGFF7GGF)4DFAGEGAFGGGGF2GD<GEFD,F:FGGECGCFF?4F2;<GGEFGGED@GAGGA9GB\n+@out-MT-19/1\n+TCTGATATCCGGCCTGCTTCTTCTCACATGCATAAAAACTAGCCCCCATCTCAAGCATATACCAAATCACTACCTCACTAAACGGAAGCCTTCTCCTCACTCTCTCAACCTTATCCATCATCGCAGGCAGTTGAGGTGGACTAAACCAAAC\n++\n+GFGGG0FGGGFFGDF8FFGGGFGGCFFGFDGGFFFFBFGGFFFCDGBGGGGGGG1FFGE=A=?FGGCC/GF5GGGDGGF<GGG0@@@GGGGFF7FGCGDFGFGDGGFF<GCGGBG?;FEGD0=BFGFF@CC3BFFFGBGEF5CGF@;AC=D\n+@out-MT-21/1\n+CCGCCTGCCCAGTGACACATGTTTTACGGCCGCGGTACCCTAACCGTGCAAAGCTAGCATAATCACTTGTTCCGGAGATAGGGACCTGTATGAATGGCTCCACGATAGTTCAGCTGTATCTTAGTTTTAACCAGTGAAATTGACCTGCTCG\n++\n+G:GGGCCAGFGGGEGFCGGGG/EGFGG@EFGGEE3EGGG?GCFEF<@AGFGG4GF<F?DG?FFGGECFGGGGE-+@.;GGG5GF;<GFB=GDDFFGGGEEGEF>G5G=EGGFG@13;%<EE;928G3FBBGFGGDFBD3GGDGG=5@3.F>\n+@out-MT-23/1\n+AATAAAATAACAGTTTGAACATCCAAAACCCACCCCATTCCTCCCCAACTCATCGCCCTTACCAAGCTACTCCTACCTGTCTCCCCTTTTATACTAATACAGATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCGTCAGAGCCCT\n++\n+FBGGGGG?3EGGGBGFGGGGFF\'FGGFGF@GGGGG@FGGGDFGFGD2BE9EGGDFFFGG=FG>GG=GEE1@GGGGFEDGGG>GDGAFC@:FGFGB>BD:?3GC>@GGAEGEFG6?FCGFGFGBG=F<G;;GG=G65@E5DB-E:9\'EFEG6\n+@out-MT-25/1\n+CTTCAAAGTAAGCGCAAGTACCCACGTAAACCCGTTAGGTCAAGATGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTGTCTACCCTAAACA'..b'FGFGFFG<AGGGDGGDE9FG/?GFEG@GEFGGGGACGEGGGEFFFGGGDCGG;GGGDFGGFEFF>E7GFGGB<GEGBF(F:GFGGDGF:FG<G7F2.A\n+@out-MT-2185/1\n+CCCTCCTAATGACCTCCGGCCTAGCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATCTAGGCCTACTAGCCAACACACTAACCATATACCCATGATTGCGCGCTGTAACGCGAGAAAGCAAATGCCAAGGCCACCACAGACCACCT\n++\n+EGGFFGGGGG@GGD;=GGFGGFGFFGFGGGGFGCDGCGGGAGGGGGDG:GFGEGEGD:GGGGGGGGGFGFFFG$8CGGG?F:DFG?FB0G@DE<G*6GGG=EGGGFF/FFFFGF0FDGGE9GGFF/E3.G:=GFBCE?FGGEG0GG9:GCF\n+@out-MT-2187/1\n+TCTTCACAATTCTAATTCTATTGACTATCCTAGAAATCGCTGTCGCCATAATCCTCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCACATAGTTTAACCCAGCC\n++\n+7AGCDGG=FGG1FFGGFGGG$GGGFGG8:GGGFGDGGGEFGGGGDGE@GGFFGGFGGDGGDEGEGGFFFGBEGACGFGF?GBGG;EFGGFGGG6GGGEGGGGFBFGEF7;FGEEEDCGDGG??<?;GGEGG>FFGGF=E5,$CB:EFCGGF\n+@out-MT-2189/1\n+TAACACCAGCCTAACCAGGTTTCAAATTTTATCTTTTGGCGGTATGCACTTTGAACAGGCACCCCCCAACTAACACATGATTTTCCCCTCCCACTCCCATACTAATAATCTCATCCATACAACCCCCGCCCATCCTACACACCACACACAC\n++\n+GGDGFEGGFGGFGGGG2FF=G<8EDFGGGGGEF>9DCEGGGGEGG?>FGEEG?GFCGG-GGGCGFFGFGGGC=EGGG533GG=EGEEGFFGGGGGFG>FBGFFG.GGGBDFGGEF\'?GFFDBDBGEFGGE;<@;FGD?.EF-A?GGFF;3F\n+@out-MT-2191/1\n+CCCTACTAGTGTCAATCTCCAACACATATGGCCTAGACTACGTACATAAGCTAAACTTACTCCAATGCTAAAACTAATCGTCCCAACAATCATATTACTACCACTGACATGACTTTCCGAAAAACACATCATTTGGAGCCCGACAACCACC\n++\n+EFGGF=FF6G4DGEFDGGGDDFFGG5GFGEFGFBDEGGGGGGGG@FFG5,<GBEGGGFA6EGGGGGGGFEGGF:GGGGBG4GFGEGGEDB/FGGF3FE@A?G?FGGDGGFFGFF<EE>*DDGGGGGGFG)F>CE11C\'9DF.;FFFD>EAE\n+@out-MT-2193/1\n+AAACTAGGCGGCTATGGTATAATACGCATCGCACTCCTGCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATACTTATAACAAGCTCCATCTGCCGGCGACAAACAGACCTAAAATCCCTCATGT\n++\n+DADEEEEFGGGCGGGDEG@GGGFFDGC3GG8BGGCGBE6CEDBG9GG>G5GAGFGGGFGGEFGGGGGEGGGFF@GGGEFFGDGFFGG6EGGGGC?GFBG1EGFGG>FFEFGCBFGGGFGGF5/FEGFGGEGFB8FEGF>FBFE4D2;AE)0\n+@out-MT-2195/1\n+AATAGCTTTTTGATGACTTCTAGCACGCCTCGCAAACCTCGCGTTACCCCCCACTATTAACCTACTGGGAGAACTCTCTGTGTTAGTGACCACGTCCTCCTGATCAAATATCACTCTCCTGCTTACAGGACTCAACATGCTAGTCACACCC\n++\n+GGGFGFGFGFDDDEGGFGGCGGGFG5FGGEEGF%GFGA?GGFBGGFBFGGEFCFG@;F@GECFGGFGGGGG@GFFFBBFDGF,EGFA*FGFGGG>8GGG?7EFGGGEGFAFDGFGGEFGD;GGDGGG=GE>GFEFG=<#ADFGGGGGB2D9\n+@out-MT-2197/1\n+AACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAGTAGATAACATTCACCGCCGCATAAGCCTGCGTCAGAATAAAACAATGAACTGACAATTAACAGCACAATGTCTACAATCAAACAATAGGT\n++\n+=EGGGGFGG8BFGFBFGEE;GGFGG>GGFGFFDBFGG@GGGEAGFF>?GGEEAFGG1GCGFG:GG%FGGEEGF,DD@FGF6F6AFGEGFGGGGGG=G,BFGDGGG<FFGGDF@EGGCGGEGCFGG-GGGCDCFFGCE>BBGE\'9FD-F@C?\n+@out-MT-2199/1\n+GCACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCAGAATCACTTGTTCCTTAAAAGCGGACCCGTATGAATGACTCCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTAACCGGTGAAGAGGCAGGC\n++\n+G?GCGGGGGEGFFA8?FBFGFFGBGFFFFGGGFEFEEFGDDGGGFBGGG?GGGEFEAGGGFEGG,$-=DGGF<GGGGGGGDEEGFGFGG=FGGGF>FFEDEFGBGGAFGAGC9GGDF<9AFFGGGGBCEFD?/&AG$=G5GGCGFE1GA9E\n+@out-MT-2201/1\n+GGCAAGAAATGGGCTACATTTTCTACCCTAGAAAACTACGATAGCCCTTAGGAAGCTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGGACTGAAGCGCGTACACACCGCCCGTCACCATCCTC\n++\n+EGEGGGGGFAFGGGG97GGFDFGGGGCGGEGGGGF@=GFGAGFGGGA@GFFFGGGG2GFEFDGG6FGGFFBFG?GGEFGGGDGFGBEFFCAFGEADGGDG?EA3GFEGD2GDFG40)GGGFCE?FAGGGDGCFG>GFCGD:FG5G5BFEDE\n+@out-MT-2203/1\n+CGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGACCAATGTCTACAATCAACCAACAAGTCATTATTACACTCACTGTCAACCAAGCCCAGGCATGCTCATAAGGAAAGGTTAAAAAAAGTAAAAGGCCATCGGCAAA\n++\n+G@G9CD=GGGGFGBFGF7BCFFCA;DGGGBGGC;GGEGFGGGGGGF6FGGDA?GDFEFAGG>GED9GGGF8EGFGCGGGGG?\'GDFCF/GFCGGDC7?/G"FGGFC=>FFGGGFFGFGGGGFG=ACGGGFA<@GC<G665\'#-GB=GCEF8\n+@out-MT-2205/1\n+CGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTTACACCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCACACCTACCATCACCCTCTACACCACCGCCCCGACCTTAGCACATCTCGCCATCGGGC\n++\n+GGGGGG6GGFGGFFCGFGG;GGGGG?FGGFFGGDGCFG=FGGFFFGGEBF?)FGGGAGFEC?7FE=GEGGFGGDGFGGCEGEG7GFG@GEDEFFGG5FGGFFFGFF?GDGGGDG;-8DFGGG@FEE@EGECFBGG@C=FBD7FDEDCF((F\n+@out-MT-2207/1\n+TACATAAAATCTAGACAAAAAAGGAAGGAATAGAACCCCCCAAAGCTGGTTTCAAGCCAACCCCATAGCCTCCATTACTTTACTTCAAAAAGGTATTAGAAAAACCGTTTCGTAACTTTGTCACAATTAAATTATAGGCTAAAGCCTATAT\n++\n+GGGGGGGGFGFGBGGCG@GGGDGG@FGGEGCFGCGGGGBGGGCFFGFC19GEEGDFGAAFGEGGF54FEGDGGFG:E<GFGFGGFGGBGE=G>:FCGFGFFG<GG@F0GGGEGG@CGFEG:>G%8&@GFDGGGFF@FEBGG5>.FGGFGBB\n'
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-panic_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-panic_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,4416 @@\n+@out-MT-1/2\n+TCTTCTCTATCTATAAGTCTTCCCACTATTTTGCTACATAGACGGGGGTGCTCTTTTGGCTGTTCTTAAGAAGCTCGTCTGGTTTCGGGGGTCTTAGCTTTGGCTCTCCTTGCTAAGTTATTTCTAGTTAATTCATTAGGCAGAAGGTATT\n++\n+FG%#GFGGGGDGGGG:.FEGAFDCGG@<EEGGFB;GGGG>GGFEGG)@FEEGDF8DG3CGGGGB=GBG0G1BB>DGGF5CGGDFA0FGGEGCFE?GEGGFBGGF?GEGGG2@=2C?GGG?CG@BGGGGCAFGEE;AB9)FGCGFGFFEDF$\n+@out-MT-3/2\n+GATGGAGGTAAGATTGGTGCTGTGGGTAAACTAGTATGTTGGGGTGGTGGGTGTGGTGAACTTTAATAGTGTAGGAAGCTGAATAATTTATGAAGGAGAGGGGTCAGGGTTGATTCGGGAGGATCCTATTGGTGCGGTGGCTTTGTATGAC\n++\n+CGFFCGGGGAG<GCGGBGFGGGGGFGFGFG>,GGGFGG.G2CGGGGGGFF-BG9FE?GGEGGFGGFFFFGFE<F5FDBGBGGGGGFFGGEGGGEFGBF6BGGFFG0GGEG8GGG>A@GDGGD>6FE>BGFGGEDGDD$E2FECGDFG@DE1\n+@out-MT-5/2\n+GAATAGTTTAAAGTAGAATCGTGGCTTTGGGTGCTAATGGTGGAGTTAAAGACTTTTTCTCTGATTTGTCCTTGGAAAAAGGTTTTCATCTCCGGTTTACAAGACTGGTTTATTAGTTTATACTACAAGGACAGGCCCATTGGCGTATTTT\n++\n+GGGGE@?FEF:GGFGECG=GGGFEGGGAGGBGGGGCGFFDFGGDEAGFGGF8EG9GBECGGGFGG@=FDF?GGFGGGGGECFGG:@GGGCGGGGGGGGEGGG@F@GGFG(FEFDGC8FDCG6CGE5G:EGGAD9D:GEGGF&:'AE>DA<F\n+@out-MT-7/2\n+CAACTTACTGAGGGCTTTGAAGGCTCTTGGTCTGTATTTAACCTAAATTTCGATAAGATTATTAGTGTAAAAGGGGAGATAGGTAGGAGTAGCGTGGTAAGGGCGAGTAGTGGGGGGAGGAATGGGGTGGGTTGTGCATGTTCAAACTGTC\n++\n+GGGG?CG8EGGGEGCGGGGFCGFFFFFG9GGGDGCGGCEGGG@GGGGGFD>#GGGG>GGEGGGGFCGDFFGGGGFFGGFGGE@E?GGFGGGGEGAEGGGFGGGG02,-EEGG2<E57GGFGGEEDGG@ED;FC7/9(:B5AFFEFGBGGEF\n+@out-MT-9/2\n+CTAGTGTCCAAAGAGCTGTTCCTCTTTGGACTAACAGTTAAATTGACAAGGGGGTTTAGAGGGTTCTGTGGGCAAATTTAAAGTTGAATTAAGACTCTATCATGGACAACCAGCTATCACCAGGCCCGGTAGGTTTGTCGCCTCTACCAAT\n++\n+GGDBFGFFGEGFGEGGFFGGFGGFEDDFGGEAGFGFEGFFGG?G6GGGFFEGCFGGGGGGGFG?GFFFGF7DEDEFEGAFDDGGCGGB'EGGGGBAB?EFGEGGC;6>GGFGFGFFGGFGFGGFF-6FEGGEG:F?ECGFGDDCCEFF'??\n+@out-MT-11/2\n+GGGGCCTAGGTTGAGGTTGTCCAGGGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGGTGGTGAGAGCCAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTATGGCCTCTTTGGTGAAGAGTTTTATG\n++\n+G5GGEGGGGGGFGGDGG?G)DEGGFGGGDGEECE9CGEGGGFGGGD5GGFGEGEGGF@FEGGGG3GEEGEGCFG7'6GEGFGF;DGGGGAFFF4?6G:>F;EEEC>FDFFDFFGFGGGGFFFFGF5G>-FGGFDD>DF6FEG:F89EGEBC\n+@out-MT-13/2\n+CATGGCGGTCAGGGGTTGAGGTCTTGGTGAGTGTTTTAGTGGGGTTAGCGGACGAAGATGGAGGTAGGATTGGTGCTGTGGGTGAAAGAATTCGATGGGGTGGTGGTTGTGGTGAACTTTAATAGGGTAGCAAGCTGAATTATTCATGAAG\n++\n+GFFGE3GGF9GGGFCGEFFGGGCFFGG6GC=GFGGGDE?D=FGGGAGGGEGEGFGGDFGEGGGFGFGGGCFEB7FFG<;EA3GG6DGG?%E&+E999BFEFFG7CGGG;DGFD>FGGBEGFGGGF17/EF+DBFGGFGGG,AGG&4DGFBG\n+@out-MT-15/2\n+GACTAGTGCATGGCTAGGGATAGTCCTGTGGTGATTTGGAGGATCAGGCAGGCGCCAAGGAGCGAGCCGAAGTTTCATCATGCCGAGATGTTGGCTGGGGTGGGAGGTCCATGAATGAGTGGTTAATTCAATTTCTTAGGGGGTTAGTTGT\n++\n+GB'5FGFFG80GEFEDGGGGGFGDGGGGGGGG4FGGGFGGGGGG=GGGBGDGDDGGFGFGFGFBGGGGFG<G<BCDGFG3FGE-4GFGGEGGGE;GG=GC2CG;>GGGG*ECGGEGFGG@FGFEFG?7.F>GFGGFCG8GGGCAFD=GG$A\n+@out-MT-17/2\n+TCATCCATGGGGACCAGAAGGGATTTGACGGTAATGTGCTATGTACGGTAAATGGGTTTATGTACTGTGTACTATTAAGGGTGGGTAGCGTTGTTGGAATCGTAGTGGATGAGGGGTGGCTTTGGAGTCGCAGTCTATGTGTGATAGTTGA\n++\n+GGGGFFGGGFGGGD5<GGGGGGF:GGGGG2GGGGGG?DFFGGCF9CDC@DFG:?3,CGG=EEGGFEGFF=G?>ADGGCEGDGFEGGF?:$GGGGFFG*EDF4>=<DFEBFFGGGEFFAD9?<>ACGED4EGG<G,CFGCAFAECAGGEGF@\n+@out-MT-19/2\n+GTTAGATATGGGGAGTAGTGTGATTGAGGTGGAGTAGATTAGGCGTAGTTGGAAGTAACGGTTAAGGAGGGTGATGATGGCTATGATGGTGGGGATGATGAGGATATTGTTTTTTGTGAATTCTTCGATAATGGCCCATTTCGGCTAGAAG\n++\n+AGAFGEDEFGGCGBF4FFGE@FGGG0=FEGCGFCGGGGG>FG>GGEFD&=5EF:GGG&+GGFEFGEGGFBGCGGGG'BGEFGGGGGCCBFGFFEFGGGGAFG:,GECA@>G2DGFFD7GDGFGGF?G=GFFBEF4FGF?DG1GAE9F0BGF\n+@out-MT-21/2\n+CCTAGGGTAACTTGTTCCGTTGGTCAAGTTATTGGATCAATTGATTATAGTAGCTCGCTTTGACTGGTGAAGTCTTAGCATGTACTGCTCGGAGGTTGGGTTCTGCTCCGAGTTCGCCCCAACCGAAATTTTTAATGCAGGTTTGGTAGTT\n++\n+GGFFGFGDFGEEGFG@FGCAFGBFD?G<GBGGGGFGBGGEGFFG3@?GGGD@26G>GGGFGFG=EFFGEGFFEGGFGGEGFFE9GEDED?G7FCEGFFGGGCFGGFBGGDG9,GGGCGAB?FAFEGGFD>FDF@DBFFGD6C8G@>EF?4E\n+@out-MT-23/2\n+ATGGCTGAGTGAAGCAATGGACTGTAAATCTAAAGACAGGGGTTAGGACTCTTTTTACCAGCTCCGAGGTGATTTTCATATTGAATTGCAAATTCGAAGAAGCAGCTTCAAACCTGCCGGGGCTTCTCCCGCCTTTGTTCCCGGCGGTGGG\n++\n+FGFGGFGBBEFGCGGA4GGGC@GFGGFGFAG@GBFF5GGGFGGGGGG1GGGGCFEGFFFGGFDGGGFGGFFDFAGGFGGFGFDGFFGGBGC?FGDF6FG?BFGEGGGEDGCCA8F;3GGGEGE2=FEGGGFE@5A9$E5F2>7B@G@4EGE\n+@out-MT-25/2\n+ATTTCTATCGCCTATACTCTATTTGGTTAAATGGTTTGGCTAAGGTTGTCTGGTAGTAAGGTTGAGGGGGTTTGGGGCTAGGTTTAGCTCAGAG"..b'DGFF?G7GBFGGD@GGFGFFEDFDBDFGGFGG=FFFGFBGFFBE>FGFD:GGFG9CGGFBGGFFFFGGEF4GGGGGGDGAGC=E;CGEG8?CDF;:GF\n+@out-MT-2185/2\n+TGGAAGCCTGTGGCTACAAAAAATGTTGAGCCGTAGATGCCGTCGGGAATGGTGAAGGGAGACTCGAAGTACTTTGAGTCTTGTAGGAGGGTAAAATAGAGACCCACTAAAATTGTAATAAGCAGTCCTTGAATTATTTGGTTTCGGTTGT\n++\n+GGGBGGEFGFGB@F@GE<F5CGBGGFG7?FGFGFGFGGG?GF92GGFFAGGGEGBGG>GCGBB5GGFAG@EGGBBFG<)<>EFFCGFGGAGGGBGGEGGGBGGGGF0FGBGGEFGFFGFFGGGG?E8@GAAA9E@AD=EDAGDAD;AFFFF\n+@out-MT-2187/2\n+TTGTAACAAGCAGTGCTTGAATTATTTGGTTTCGGTTGTTTTCTATTAGACTATTGAGCTCAGGGGATTGATACTCCTGATTCGAGTAATACGGTTGTGTTTAGGAGTGGGACTTCTAGGGGGTTGAGCGGGGGGGATGCCGGTCTGGGGC\n++\n+GEFGFGGFGGFGDFGGGG;DGBGGGE<FGCF56>GFGFGE<GGEGEGGGGG=DGGFGF=GBFGA6GG8DGD=FGGGG;DBGGBGFF@FG9EGD2-;GGFF7ACFF@B;>1FDGCGGFFGFGF,A82G>FDFGFF?;@GED83CF,->?DF8\n+@out-MT-2189/2\n+AAACTTTCGTTTATTGCTAAAGGTTAGCCACTGCTGTTTCCGGGGGGGGTGTGGCTAGGCTAAGCGTTTTGAGCTGCATTGCTGCGTGCTTGATGCCTGTTCCTTTTGATCGTGGTGATTTAGAGGGTGACCTCACTGGAACAGGGATGCT\n++\n+CGGGAGGGGFGGGGF9GGGGGGFFG6(AGGGGFGGGEGFBA7BE>DGGGFCEG;EFFGGGGGGEGGGGGGGGGGGGFGGGGGGFF>FF?8:<FGGF(GGGF>=EEGGGGGFDA8@GGFGDGGGF1EG?GG*F?>GGGGFD0@><BGB=EG1\n+@out-MT-2191/2\n+TGCGTTCAGGCGTTCTGGCTGGGTGCCTCATCGGGTCCTGGTAGCCAAGGTGGGGATAAGTGTGGTTTCGCAGAAGATATAAAATACGATTAGTTCTGTGGCTGTGAATGTTATAATTAAGGAAATTTGTAGGGAGATTATTATAGAGAGG\n++\n+BGGGEGGGFGFFFG8GGDFCGG?FEDGGFEGGDGDFG%F5GDEF>GGGGFFGBGCDGGGGFGDGGGGGG>&GGCGFEGGF<EGGEGGG=F8GC;:4FFGBEG<G@FGG4DGGGG:GGCGGF<<1GGD0;DE@FGFE@0@@\'9FGFGG4GGF\n+@out-MT-2193/2\n+TATGTTGAGTCCTGTAAGTAGGAGAGCGATATTTGATCAGGAGAACGTGGTTACTAGCACAGAGAGTTCGCCCAGTAGGGTAATAGTGTGGGGTAAGGCGAGGTTAGCGAGGCTTGCTAGAAGTCATTAAAAAGCTATTAGTGGGAGTACG\n++\n+FGGGGFEFGGGGFGGFGGGFF<=FGGEGGGGGDEFGGFGGGGGGFA8CB:=DDEFEGGDGGFFFGGGG8+CEGDGFGGG0EGFCGFDF,D=E8AFGDGFEFGG?G@FFGGG@GGGDC@GBFCEG=G?0FGFBGB4GFGGFGGCB?@;ED1:\n+@out-MT-2195/2\n+TGCAACAAAATCTTTGGGGCCTAAGACCAATGGATAGCTGTTATCCTTTACAAGTTGAGAAAGCCATGTTGTTAGATGGGGGCATGAGTTAGCAGTTCTTGTGAGCTTTCTCTGTAAATAAGGGGTCGAAAGCCTCTGCTGTCAGAGTCAC\n++\n+GGFG=7FE5DG:GGGGGGD@GGFGGGGGGE;GFDGGGGA?7EEG:CG<GGEFDEGG?GGFCGGFGGEFGFGECEGFFBGFGG>FGGGDFGGGGGGGGFGGFGFGG@@:@EF:=FGFFF;F=CEF=9F9(GFAE9EG8F*FGEDGE=,CGGB\n+@out-MT-2197/2\n+GTTGGGTTCTGCTCCGAGGTCGCCCCACCCGAAATTTTTAATGCAGGTTTGGTAGTTTAGGACCTGTGCGTTTGTAAGGTACTATTTGCATTAATAAATTAAAGCTCCATCGGGGATTCTCGCCTTGCTTTGTGATGCCCGCACCTTCACG\n++\n+GGECGGFA6GGBDGCGGGEBGG@GAFG+:;FGFFG>GGFGGDGAF<GGGG6GGGGGGAGGFFFF?EDG.CEEBG;<GGGFAGFGGG<DG@GFGGGFDEGGGGEGGGFEGG.?FE+$CFDBFEDDGBFGD1CBC&?BGGFGFE3%?@FFGEE\n+@out-MT-2199/2\n+GCACCATCGGGATGTCCTGATCCAACATGGAGGTCGTAAACCCTATTGTTGATATGGACTCTAGAATAGGATTGCGCTGTTATCTCTAGGGTAACTTGTTCAGTTGGTCAAGTTATTGGATAAATTGAGTAGCCTAGTTCGCTTTGCCTGT\n++\n+FGCF@FGG:<GGG@>FD>GGGF8FGF<F-GG8E;GGGFBBFGFGFGGGDEFFFF<@9GGGFGGGFGDGEG8CGEFEBGGCEFGG"FGDGDGEGG;DBGGG:,F5>DFGFGGGGEGGGFCEA04?<;GFGFB.$+FGG?FF>GFEGG\'DC?%\n+@out-MT-2201/2\n+AGCTTTGGCTCTCCTTGGAAAGTTATTTCTATTTAATTCACTATGCAGAAGGTATAGTGGTTAGTCCTTGCTATATTATGCGTGGTTCTAATTTGTCATCTTGTCCTTGCGGTACTATATCTATTGCGCCAGGTTTCAATTTCTATCGCGT\n++\n+GGGGFGFCCCFGGDBEGGGFFGEGFEF6<GG&EGG?G=GF?CGFGFGGGGFEFGGED\'EGEGFFGFBGFGG8DAEGG>GGGGGEGFB+GGGFGFGFBGB<GB-%FFGEGGDEF=FBGBDCGGGGGDCFEEFD97G;CGDGFGDGGG=EG+C\n+@out-MT-2203/2\n+AGGACCTGTGGGTTTGTTAGGTTCTGTTTGCATTAATAAATTTAGGCCCCATAGGGTCCTCTCGTCTTGCTGTGTTATCCCCGCCTCTTCACGGGCAGGTCAATTTCATTGGTTAAAAGTAATAGACCGCTGGACCCTCGTGGAGCCATTC\n++\n+FFCGGGGGGEFBGFGGBEGFFE.G@AFF1FGGGD@FGF7GE;(GFGG.GGGEGGGE<G,FGGGCCGGFG@DEFGGEFE-GGGGGGBCFFFGFGF;GGGGFGCGGGGGECG;GGGGB??3DFF,EDEG.:;EC%EEFDDFG@DFGGCCAGCC\n+@out-MT-2205/2\n+TTTCTGCCAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTGTTGTGATAAGGGTGGAGAGGTTAAAGGAGCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTC\n++\n+G.G?BBG88D?GGCGFGDEGEGFGGEAGG=DGGEGFEGFGC9GGDFAGGGGGGFGEGF@F3EGGEFBAEG@@B9DGEGGGDG:GEFEAGGG7GBGGGGCGGGGFGG@GFEAFGGGGFGFF2GD>EGEGD?FD9AFF@D/GGFGGDF>AGGE\n+@out-MT-2207/2\n+TAGGTCCAGTACCATTGCTGGCCAATTGATTTGATGGTAAGGGAGGGATCGTTGACCTCGTCTGTTATGTAAAGGATGCGCAGGGATGGGAGGGCGATGAGGACTAGGATGATGGCGGGCAGGATAGTCCAGACGGTTTCTCTTTCCTTAG\n++\n+DGEGB2GGEFGGGGGFGGGEGFGFGGFGGCDBEGFGEGFFF;FGDFFGF?FAFGGGGG=FGF:FGG?GGGCFGFFGGFGGGGF<GFD@BGFGFFDGFEGGFGFF>GG>BGGFEEGGEGGF@EF@BGGGEGGBFCDGGGFF?.FFGCFF*GA\n'
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-vcf.bam
b
Binary file test-data/chrMT-PE-VCF-BAM-vcf.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-vcf.bam.bai
b
Binary file test-data/chrMT-PE-VCF-BAM-vcf.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-vcf.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-vcf.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,48 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 798 . C CTC . PASS WP=0/1
+MT 1009 . C T . PASS WP=1/0
+MT 1828 . A ACTACGA . PASS WP=0/1
+MT 2268 . G A . PASS WP=0/1
+MT 3073 . C T . PASS WP=0/1
+MT 3860 . G A . PASS WP=1/0
+MT 3908 . C T . PASS WP=1/0
+MT 4078 . T C . PASS WP=0/1
+MT 4250 . C T . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7168 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7770 . A G . PASS WP=0/1
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 9353 . C T . PASS WP=1/0
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11738 . T C . PASS WP=1/0
+MT 11743 . C T . PASS WP=1/0
+MT 12964 . C T . PASS WP=1/0
+MT 13049 . A G . PASS WP=0/1
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
+MT 14296 . A G . PASS WP=0/1
+MT 14657 . A G . PASS WP=1/0
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-vcf_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-vcf_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3308 @@\n+@out-MT-1/1\n+AGCAGAAAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTCCACAAAATGACATCAAAA\n++\n+GGF8GGDFFFFGGGF5AAGGGGGEGG>/-GE?GF<?FBDGFEF2EFB<GGDGG1&5GBGGGDGGFCFFFDF7>GGEGEGGFB(G3GGF=GGGFCA<EC;1B\n+@out-MT-3/1\n+AATAATTTTCATGATTTGAGAAGCCCTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCAC\n++\n+FFGGGGFD)@GGGGGFGFGGEBGGF\'@FFGGGGGFGGGGFGGGGGGGGFDGGGAGFF6FGFEG0EGGFGGC@GD6GFEFGAFGGDFAE<FGD>GGG0GFA=\n+@out-MT-5/1\n+CCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTT\n++\n+CEGGEGEGGGFGGGGGG?@GD3AFGFGG?GGGGFGG@FFG@CDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFE;;DFGFF;A?F@;==\n+@out-MT-7/1\n+ACTAGTATCCTTAATCATTTTTATTGCCACAACTAACCTCCTCTGACTCCTGCCTCACTCATTTACACCAACCACCCAACTATCTATAAACCTAGCCATGG\n++\n+=DFDGGFGEGFG?GDFEGGGGE9GFGGG@GGECF@GGGFGAGE\'8>GFFGEGGGFGGGFD;G@FF6@GFGG=F4DEGGBFF@CGGG7GFG?8GC-BGGEGF\n+@out-MT-9/1\n+TCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGA\n++\n+6DCEFGGGGFFDGGGG@;GGGGGGFFFFF18AGGGBFGA;GEGCGEFGGGGG<=FGGCDGFGGGGGE>G4?FGCBGEED3GG\'(5@G><>CGFGFDGFED=\n+@out-MT-11/1\n+AATGGGACTTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATTAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGAGAAGC\n++\n+<FGGGDG(G9GDGGG@F?GGGGGBEEGFFGD;D/GFGG:GG+EGFGGGGGG9EGGGFGDGF@/3@E//GGFFF;FGGGFEGFFGF6EGE70G;F6CGA>DB\n+@out-MT-13/1\n+GCGCTAAGCTCGCACTGATTTTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCGTTCCACAGA\n++\n+CFFGD21CDFGGGGG<GAFFE7AGGG4FCFGC<F:GG0GG8GFBGGGGGGG;CFFFGGGFGFGGFG>GGG;DF7GGGGGFFGGDFGG=9FFFECGB?)(CD\n+@out-MT-15/1\n+ACAACCCCCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATCGCAAGCCAACGCCACTTATCCAGTGAACCACTATCACGAAAAAAACTCTA\n++\n+EFGCGBBGEGFAGFG@7DGDCGEFGGGBG?GECFAGGGGCDGFGGBBGGG/-GGGEG<GGFGCGGEDGEFGGGDD7GGCEDB.EGAFGFGEF47DGEGFCE\n+@out-MT-17/1\n+TCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCACTCACACTACTCCCCATCTCTAAC\n++\n+E?FGGG2FFGFGGFGG,FGGDG8GAGCADFGG=GGF6EGGCCGFGGGGGGDGD>GGGG>FBFF?FGGAGGFGBEG<C\'4@GG<GGD<FGAA@G4%0BG1FC\n+@out-MT-19/1\n+GTGTTTTAGATCACCCCCTCCCCAATAAAACTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAGACTACGAATGTGGCTTTAACATATC\n++\n+GFGEFFGDGEGGFGG8GFFGC1G1FGFGG3GGF@G0EFDGGFGC*FDG?FFG:GBGGGG<EGEGGCDGE9GEF7FGFEGFF?FG#*D:G=;A9GFDEFEED\n+@out-MT-21/1\n+AAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGCTCAAGCTCAACGCCCACTACCTAAAAA\n++\n+BGFGGEGGGGCFFEGGF=DGF8CGFCF?GFFFFGBFB8GFGF681FFEEFGGGG@EGCG?GDCG>GED=2DED\'GGG@<E?A?GGBGFGGFED2GD01=EA\n+@out-MT-23/1\n+CTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGT\n++\n+GGGGGEGGFBDFGF93G>GGGGGGGGGEFGGFG9BAEGGGFGFGGFGGFG<GFEG>FGFDFGDGGGFFCDFEF(GGG=A@FGEG4FDBFGGEED1AFGADE\n+@out-MT-25/1\n+TCACTTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACATATTCCCCCGAGC\n++\n+FDC5GFFF7FBAGCGFFGGGDFDG<CGGFFGGGFAFCGFGGGGEFEF<GGGB5EGGG70GFFFFE;BB;EGGGGEF>ACFG4;DFGB$G;A>8FECAGG3E\n+@out-MT-27/1\n+ATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCCACCCCTCACAATCAT\n++\n+GG=FGGEFFG@GFGFD>F?GGEFE1GGFFFFFDGGG<=E?DG>EFGGGGGFDGF.FF:E<GGGFEE/BGFFGB?DBGGFE=GDFF.B?DGGGGEFEGGE?F\n+@out-MT-29/1\n+GCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCGAGGTCGGTTTCTGGCTACATTCAAATTCCTC\n++\n+GGGGFFGGGGGGGFEGGGEGGGG:GGGG;G;G8DGCBCGG9FGGGGFGGEGD=DAGFFEGFFFGB8GEF2;=GGFB5F?EFG##F6FDGGGGD6@?E<GGF\n+@out-MT-31/1\n+GGCCACCTACTCATGCACCTAATTGGAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGAC\n++\n+GGBBEFFGG.GGGG0FGGEGGGGGGGGGGGFFFG?GGDGGG;GDGGGF97GGGGEGGG?GFFGFGGG3GGGGB7FGG2<G8C@DGGF58(?GE=8/EBGFC\n+@out-MT-33/1\n+ATAACCTCCCCCAAAATTCAGAATAAGAACACACCCGACCACACCGCTAACCATCAATACTAAACCCCAATAAATAGGAGAAGGCTTAGAAGAAAACCCCA\n++\n+EB1CFGGFGGFC<DEGFBGGGGFGG>-G0GFG93GDGDGDFGGGEGGGG3F.?GFFGG?GGGGGB9(D"E>GEF7:$D10@GFFC%&?GFAAGAGEFE.*D\n+@out-MT-35/1\n+ATCATCCCTCTACTATTTTCTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTTACAACCCCCCTCCTAATACTAAC\n++\n+GDGGGGEGB+GGGDB@AFG%FGGGGD;GFGCGG1EGGGC>FGGDF,GGGGCFFGGGFFF8CGD=DACEGDGAF'..b"TACTCATTCAACCAATAGCCCTGGCCGTACCCCTAACCGCTAACATTACTGCAGGCCACCTACTCAT\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+CTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACAGCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGCACCCC\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+TAGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAAGCCTATACAACCGTATCGGCGATATC\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+TATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAGGAAAACCCC\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+CACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCA\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+GAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAGCAGACGAGGTCAACGATCCCTCCCTT\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+ATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGGAACCGTCTGAACTAT\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+AGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATATTCATCCCTGTAGCAT\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+GCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+CGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGC\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+ACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAAA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CTACACAACACATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACC\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n+@out-MT-1649/1\n+CAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACACACTAGGATACC\n++\n+GGFEFCGFGGEFFGGGFDGG@DGGG@GGFCDEFEGGFAGFEGFFGAGGGCGGGG@F>EG:@FGB>GGFD:FGGGGFEBGGGGB)-DGG,EGGF9ED94GCD\n+@out-MT-1651/1\n+TCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATT\n++\n+GGFD@DFGGFDGAGFCGG@GDGGGGGF/GGGGGECGGF3G>GDGEFE9FDGGEEGFFGG;DGGFGFE3<;GE>G=@GGGFGAFFFGGGGGFA;GBBF?;>C\n+@out-MT-1653/1\n+TCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATACCGC\n++\n+F3BGGGFGEFGFGGGGGGGB9CGGGGGGEGDGGDEGGBFE;GA:DGGBGGGGG?FGGEEG>DDFEF@B>:?GGGG/G7FGFFEGFGF@FFC@8G?GGGDEE\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM-vcf_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM-vcf_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3308 @@\n+@out-MT-1/2\n+AGGCTGCCAATGGTGAGGGAGGTTGAAGTGAGAGGTATGGTTTTGAGTAGTCCTCCTATTTTTCGAATATCTTGTTCATTGTTAAGGTTGTGGATGATGGA\n++\n+E=DGFGGGFEFEGGFFGGGE3E;F=;GEGGGBFG=GCGGGF?FGEGAG6CGGGGGGCFAGGD7GGGFGFEEC.FFF7B;7>GFEFA&?DFDFF@CC=BEGG\n+@out-MT-3/2\n+AGTCATGGAGGCCATGGGGTTGGCTTGAAACCAGCTTTGGGGGGTTCGATTCCTTCCTTTTTTGTCTAGATTTTATGTATACGGATTCTTCGAATGTGTGG\n++\n+DGFGGGGGEGBGGG::GFFG8FF@FGG4F>G>1EGF@GGGEFDFEEGGG@GFGGGGGGGEADF<CF4GGEDDDGFF?GEC5GFGGGGGGG=DEGGGGDGF@\n+@out-MT-5/2\n+GGAGGTAAGCTACATAAACTGTGGGGGGTGTCTTTGGCCTTTGGTTGGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGTGTGCTGGGTAGGATGGGCGGGG\n++\n+BGBGDGGFGCGFF@GEGGG,GGGFGGFGGFGFG:F7G###%DFEGGGGGEGDFFAGGEGFGGG9DEDGFGGF6FGAFFDGG0GEGAF8?GFGFEDFF=ACB\n+@out-MT-7/2\n+TAGGTGCATGCGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGTACGGCCAGGGCTATTGGTTGAATGAGTAGGTTGATGGTTTCGATAATAACTAGTA\n++\n+FGGDG-G6CG,G?G7GDEFFFGDCGEGFF3+=FGGFGF=EGGGGGGFFGGGFFGGGGGGDDF6EGDEGGGGFFGEG(FGE*@FGGB53GF:EGF?GG?FDB\n+@out-MT-9/2\n+TATTATTTACTCTATCAAAGTAACTCTTTTATCAGATATATTTCTTAGGTTTGAGGGGGAATGCTGGAGATTGTAATGGGTATGGAGACATATCATATAAG\n++\n+FBFGGGGCF+DG<GGGGADGFCGGFGFFGCGGGGEB+G@EGGGFGFG:G?EFBFG7=GFFGGBFAGGG:GDGFG=0CGFF?EEGGF<6GF?F?CG;1*3<:\n+@out-MT-11/2\n+AGAGAATAGTCAACGGTCGGCGAACATCAGTGGGGGTGAGGTAAAATGGCTGAGTGAAGCATTGGACTGTAAATCTAAAGACAGGGGTTAGGCCTCTTTTA\n++\n+GFGFFGGGEGFGFDDGGG@GGG&=GGGGGGF5GFGAGGGGFF>CFBEGGGFGFGGGA=CGGCCFDG>GAGGGG2FGGFGDGFG2D:FGFGB)FD4EG8D@(\n+@out-MT-13/2\n+CCTTGGGTAACCTCTGGGACTCAGAAGTGAAAGGGGGCTATTCCTAGTTTTATTGCTCTAGCTATTATGATTATTAATGATGAGTATTGATTGGTAGTATT\n++\n+GGGF=GFFFGGFEB9GG<GGGGBGG@CGDGGGGEGBFFG76EBGFGFGGG=GGGGFB))8GFFDGGDE<ECGEG1FGEGGGGFFGDGGGG4FEFFDFGG*F\n+@out-MT-15/2\n+GTGTGTGCCTGCGTTCTGGCGTTCTGGCTGGTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTA\n++\n+,E':<@G=GGGGGDF1)CGGG:GFFB>FGGGDGGEEDGBGFGFDGG1B@EAGGFDGFGGGGEGFGGEDGF?F?4>GBG5A&FFGGGECFE=BBG8FFGFF4\n+@out-MT-17/2\n+ATTTAACCTAAATTTCTATAAGATTATTAGTATAAAAGGGGAGATAGGTAGGAGTAGCGTGGTAAGGGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTG\n++\n+FFFFG=GFGF@EDEGGDGGCG;GEGGGGGEF9FGGGGGAGGF:GGGCGCEDFGGBCBFGGG=*EBEG=GFF4/F3FDCGGEDB9GGGE>E?GGGA?GEEF:\n+@out-MT-19/2\n+AGATGGCGGTATATAGGCTGAGCAAGAGGTGGTGAGGTTGATCGGGGTTTATCGATTACAGAACAGGCTCCTCTAGAGGGATATGAAGCACCGCCAGGTCC\n++\n+GFG>GFG,CFFDGGGGGGBGGGG:GG<2FGFGGGGGFCGEGGGGGGFGGGCGGFEGGFGGGGGEGGGEGGGGFGGGG9?=:BFG;GFGGDFFCGG'FCG4E\n+@out-MT-21/2\n+AACCTTTCCTTATGAGCATGCCTGTGTTGGGTTGACAGTGAGGGTAATAATGACTAGTTGGTTGACTGTGGATATTGGGCTGTTAATTGTCAGTTCAGTGT\n++\n+GGGGG?FFFGFF50FFD17AFDFG=GGGGC5GGGGG,ECB:GFGGGEGFGGGGFE%GGBFGGGEB&*?F(GGGDFG=6:>=-7DCDEFFGGGF@BDDGFG9\n+@out-MT-23/2\n+GTTAGTTTGTTAGTTAGTAGGCCTAGTCTGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCC\n++\n+DGFFG?1@GEGGGG<GGGFGFG%FGC%$;GDGGEGGFFBGGD:?EFGGCGGGFGGCGEECGGDAGGDGGFGGDGEGFE@EFD@EEFGGFF7:@D=CFFGFF\n+@out-MT-25/2\n+TAGCGATGGAGGTAGGATTGGTGCTGTGGGTGAAAGAGTATGATGGGGTGGTGGTTGTGGTAAACTTTAATAGTGTAGGAAGCTGAATAATCTATGAAGGA\n++\n+GGF@GFGGFF=GBG7>GE6;CGDBCFEGFGGG6=GGDGGGGGGGGGFGGG?EGDBAGGGGGDGFGGAGG?EGGGGGDGBF7G:GGFCFFFB8CF=FF$%AF\n+@out-MT-27/2\n+GTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATGTTATAATTAAGG\n++\n+CGGG>GCCEGG:G*G0GFCFAFG@GGFFGCFFG>BGBDDGFFGD64EGGGD7<GGFFDGGGGEBFEFGFFFGGF1D@7EFGFFGFF?FFFFG'FFDFE@DE\n+@out-MT-29/2\n+ATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTTAACAAACCCTGTTCTTGGG\n++\n+GBFGGGGGGEFEEF;3E?GEG;GFFGGGG<FFGFFFF=FFCG7=GFGGGG>GDG0GFDEG=GEFGGGGFGGDGBFGGGBFBDG?FEGFGGGG:9.C=?@78\n+@out-MT-31/2\n+ATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTAT\n++\n+GG:FGDGCD/GGFFFBEGGFFGCGGFGGCGFG@F9GDGGGFFGGFGGEG.E8FGGG5=GGF+FGDFFGGGGGGFEGCAFGAA49G:FGCB8E>1C;,A,GE\n+@out-MT-33/2\n+GAGTGAGCCGAAGTTTCATCATGCGGAGATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTG\n++\n+@A?0GGGGGGGGGG@FDFGCEGGGCFGGGGGGGFGBGGFEGGGGFFGGGGGGE?@G>5GG:FF0DCGFGEGGGDG:GGEG=G?C;FG;G@=E/=GDBEFFB\n+@out-MT-35/2\n+GCTGGTTGCCTCATCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATGTTATAATT\n++\n+=C?.G(GGEGGDG6GGGAFGFGG<EFBGDEGGEEGGGBFGGGEFDA@GGEC?G<GG?D*CGG:F>EGDFGCGF"..b"GCATGTGATTGGTGGGTCATTATGTGTTGCCGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTA\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTTGAGTTTGATGCTCA\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAAGCTTGGATTAGCGTTTAGAAGGGCTATTT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+AGTTTCATCATGCGGAGATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGT\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+GTAGGTGTGCCTTATGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTAG\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CAGCGGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATT\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+ACTACTCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGACCGCCGTAGTCGGTGTA\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+ATCATCAACTGATGAGCAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+GGTTGTTTGGGTTGTGGCTCAGTGTCAGTTCGAGATAAGAACTTCTTGGTCTAGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGAGG\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+TGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAGTAGATTAGGCGTAGGTAGAAGTAGAAG\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+TCAGGCGTTCTGGCTGGTTGCCTCAGCGGGTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACTCTATCAAAGT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n+@out-MT-1649/2\n+TTCACTTTAGCTACCCCCAAGTGTTATGGGCCCGGAGCGAGGAGAGTAGCACTCTTGTGCGGGATATTGATTTCACGGAGGATGGTGGTCAAGGGACCCCT\n++\n+BGG@4:GGG2?FF=FGF?GGEGGGGFFGGGGGGGG<>GGFGEG>FFGFAEF:F?FGG-@)EEGFEGAEGGFFF:/GAFBCEGGFGGG697B=?EGGFFDDC\n+@out-MT-1651/2\n+CCTAGGAAGCCAATTGATATCATAGCTCAGACCATACCTATGTATCCAAATGGTTCTTTTTTTCCGGAGTAGTAAGTTACAATATGGGAGATTATTCCGAA\n++\n+GGEF9EFGDGDGGFEGGFGEG<GGGBGCGGGEFEAEF=GGGG)+GGGG@CDGGGEFFGEGEGGE9FFD9GG=6ADBEDE>GD)E;FF6C@F1@BEFBGG@=\n+@out-MT-1653/2\n+CTAAGCACTCTACTCTTAGTTTACTGCTAAATCCACCTTCGACCCTTAAGTTTCATAAGGGCTATTGTATTTTTCTGGGGTAGAAAATGTAGCCCATTTCT\n++\n+G=@GGDA@FG=GFGGGGGFCGEG@G?GAFG??G>GGGGGFGEGGFGEFGGE8FGGBGDFG;AFEF$,DG'GGGG6FGFDC/3DEA?4DFFGGGAGF?A<BC\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM.bam
b
Binary file test-data/chrMT-PE-VCF-BAM.bam has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM.bam.bai
b
Binary file test-data/chrMT-PE-VCF-BAM.bam.bai has changed
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM.genomecov
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM.genomecov Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,16569 @@\n+MT\t1\t0\n+MT\t2\t0\n+MT\t3\t0\n+MT\t4\t0\n+MT\t5\t0\n+MT\t6\t0\n+MT\t7\t0\n+MT\t8\t0\n+MT\t9\t0\n+MT\t10\t0\n+MT\t11\t0\n+MT\t12\t0\n+MT\t13\t0\n+MT\t14\t0\n+MT\t15\t0\n+MT\t16\t0\n+MT\t17\t0\n+MT\t18\t1\n+MT\t19\t1\n+MT\t20\t1\n+MT\t21\t1\n+MT\t22\t1\n+MT\t23\t1\n+MT\t24\t1\n+MT\t25\t1\n+MT\t26\t1\n+MT\t27\t1\n+MT\t28\t1\n+MT\t29\t1\n+MT\t30\t1\n+MT\t31\t1\n+MT\t32\t1\n+MT\t33\t1\n+MT\t34\t1\n+MT\t35\t1\n+MT\t36\t1\n+MT\t37\t1\n+MT\t38\t1\n+MT\t39\t1\n+MT\t40\t1\n+MT\t41\t1\n+MT\t42\t1\n+MT\t43\t1\n+MT\t44\t1\n+MT\t45\t1\n+MT\t46\t1\n+MT\t47\t1\n+MT\t48\t1\n+MT\t49\t1\n+MT\t50\t1\n+MT\t51\t1\n+MT\t52\t1\n+MT\t53\t2\n+MT\t54\t2\n+MT\t55\t2\n+MT\t56\t2\n+MT\t57\t2\n+MT\t58\t2\n+MT\t59\t2\n+MT\t60\t2\n+MT\t61\t2\n+MT\t62\t3\n+MT\t63\t3\n+MT\t64\t3\n+MT\t65\t3\n+MT\t66\t3\n+MT\t67\t3\n+MT\t68\t3\n+MT\t69\t3\n+MT\t70\t3\n+MT\t71\t3\n+MT\t72\t3\n+MT\t73\t3\n+MT\t74\t3\n+MT\t75\t3\n+MT\t76\t3\n+MT\t77\t3\n+MT\t78\t4\n+MT\t79\t4\n+MT\t80\t4\n+MT\t81\t4\n+MT\t82\t4\n+MT\t83\t4\n+MT\t84\t4\n+MT\t85\t4\n+MT\t86\t4\n+MT\t87\t4\n+MT\t88\t4\n+MT\t89\t4\n+MT\t90\t4\n+MT\t91\t4\n+MT\t92\t4\n+MT\t93\t4\n+MT\t94\t4\n+MT\t95\t4\n+MT\t96\t4\n+MT\t97\t4\n+MT\t98\t4\n+MT\t99\t4\n+MT\t100\t4\n+MT\t101\t4\n+MT\t102\t4\n+MT\t103\t4\n+MT\t104\t4\n+MT\t105\t4\n+MT\t106\t4\n+MT\t107\t4\n+MT\t108\t4\n+MT\t109\t4\n+MT\t110\t4\n+MT\t111\t4\n+MT\t112\t4\n+MT\t113\t4\n+MT\t114\t4\n+MT\t115\t4\n+MT\t116\t4\n+MT\t117\t4\n+MT\t118\t4\n+MT\t119\t3\n+MT\t120\t3\n+MT\t121\t3\n+MT\t122\t3\n+MT\t123\t3\n+MT\t124\t3\n+MT\t125\t3\n+MT\t126\t3\n+MT\t127\t3\n+MT\t128\t3\n+MT\t129\t3\n+MT\t130\t3\n+MT\t131\t3\n+MT\t132\t3\n+MT\t133\t3\n+MT\t134\t3\n+MT\t135\t3\n+MT\t136\t3\n+MT\t137\t3\n+MT\t138\t3\n+MT\t139\t3\n+MT\t140\t3\n+MT\t141\t3\n+MT\t142\t3\n+MT\t143\t3\n+MT\t144\t3\n+MT\t145\t4\n+MT\t146\t5\n+MT\t147\t5\n+MT\t148\t5\n+MT\t149\t5\n+MT\t150\t5\n+MT\t151\t5\n+MT\t152\t5\n+MT\t153\t5\n+MT\t154\t4\n+MT\t155\t4\n+MT\t156\t4\n+MT\t157\t4\n+MT\t158\t5\n+MT\t159\t5\n+MT\t160\t5\n+MT\t161\t5\n+MT\t162\t5\n+MT\t163\t4\n+MT\t164\t4\n+MT\t165\t4\n+MT\t166\t4\n+MT\t167\t4\n+MT\t168\t5\n+MT\t169\t5\n+MT\t170\t5\n+MT\t171\t6\n+MT\t172\t6\n+MT\t173\t6\n+MT\t174\t6\n+MT\t175\t6\n+MT\t176\t6\n+MT\t177\t6\n+MT\t178\t6\n+MT\t179\t5\n+MT\t180\t5\n+MT\t181\t5\n+MT\t182\t5\n+MT\t183\t5\n+MT\t184\t5\n+MT\t185\t5\n+MT\t186\t5\n+MT\t187\t5\n+MT\t188\t5\n+MT\t189\t5\n+MT\t190\t5\n+MT\t191\t6\n+MT\t192\t6\n+MT\t193\t6\n+MT\t194\t6\n+MT\t195\t6\n+MT\t196\t6\n+MT\t197\t6\n+MT\t198\t6\n+MT\t199\t6\n+MT\t200\t6\n+MT\t201\t6\n+MT\t202\t7\n+MT\t203\t7\n+MT\t204\t7\n+MT\t205\t7\n+MT\t206\t8\n+MT\t207\t8\n+MT\t208\t8\n+MT\t209\t8\n+MT\t210\t8\n+MT\t211\t8\n+MT\t212\t8\n+MT\t213\t8\n+MT\t214\t8\n+MT\t215\t8\n+MT\t216\t8\n+MT\t217\t8\n+MT\t218\t8\n+MT\t219\t8\n+MT\t220\t8\n+MT\t221\t8\n+MT\t222\t8\n+MT\t223\t8\n+MT\t224\t8\n+MT\t225\t9\n+MT\t226\t9\n+MT\t227\t9\n+MT\t228\t9\n+MT\t229\t9\n+MT\t230\t9\n+MT\t231\t10\n+MT\t232\t10\n+MT\t233\t11\n+MT\t234\t11\n+MT\t235\t11\n+MT\t236\t11\n+MT\t237\t11\n+MT\t238\t11\n+MT\t239\t11\n+MT\t240\t12\n+MT\t241\t12\n+MT\t242\t12\n+MT\t243\t13\n+MT\t244\t13\n+MT\t245\t13\n+MT\t246\t12\n+MT\t247\t11\n+MT\t248\t11\n+MT\t249\t11\n+MT\t250\t11\n+MT\t251\t11\n+MT\t252\t11\n+MT\t253\t11\n+MT\t254\t11\n+MT\t255\t11\n+MT\t256\t11\n+MT\t257\t11\n+MT\t258\t11\n+MT\t259\t10\n+MT\t260\t10\n+MT\t261\t10\n+MT\t262\t10\n+MT\t263\t10\n+MT\t264\t11\n+MT\t265\t11\n+MT\t266\t11\n+MT\t267\t11\n+MT\t268\t11\n+MT\t269\t10\n+MT\t270\t10\n+MT\t271\t10\n+MT\t272\t9\n+MT\t273\t9\n+MT\t274\t9\n+MT\t275\t9\n+MT\t276\t9\n+MT\t277\t10\n+MT\t278\t10\n+MT\t279\t10\n+MT\t280\t10\n+MT\t281\t10\n+MT\t282\t11\n+MT\t283\t11\n+MT\t284\t11\n+MT\t285\t11\n+MT\t286\t11\n+MT\t287\t11\n+MT\t288\t11\n+MT\t289\t12\n+MT\t290\t12\n+MT\t291\t12\n+MT\t292\t12\n+MT\t293\t12\n+MT\t294\t12\n+MT\t295\t12\n+MT\t296\t12\n+MT\t297\t12\n+MT\t298\t12\n+MT\t299\t13\n+MT\t300\t13\n+MT\t301\t13\n+MT\t302\t13\n+MT\t303\t12\n+MT\t304\t12\n+MT\t305\t12\n+MT\t306\t12\n+MT\t307\t12\n+MT\t308\t12\n+MT\t309\t12\n+MT\t310\t12\n+MT\t311\t14\n+MT\t312\t14\n+MT\t313\t14\n+MT\t314\t15\n+MT\t315\t15\n+MT\t316\t15\n+MT\t317\t15\n+MT\t318\t15\n+MT\t319\t15\n+MT\t320\t15\n+MT\t321\t15\n+MT\t322\t15\n+MT\t323\t15\n+MT\t324\t15\n+MT\t325\t15\n+MT\t326\t14\n+MT\t327\t14\n+MT\t328\t15\n+MT\t329\t15\n+MT\t330\t15\n+MT\t331\t15\n+MT\t332\t15\n+MT\t333\t15\n+MT\t334\t14\n+MT\t335\t14\n+MT\t336\t14\n+MT\t337\t14\n+MT\t338\t14\n+MT\t339\t14\n+MT\t340\t13\n+MT\t341\t13\n+MT\t342\t13\n+MT\t343\t13\n+MT\t344\t13\n+MT\t345\t13\n+MT\t346\t13\n+MT\t347\t13\n+MT\t348\t13\n+MT\t349\t13\n+MT\t350\t13\n+MT\t351\t13\n+MT\t352\t13\n+MT\t353\t13\n+MT\t354\t13\n+MT\t355\t13\n+MT\t356\t14\n+MT\t357\t14\n+MT\t358\t14\n+MT\t359\t14\n+MT\t360\t16\n+MT\t361\t16\n+MT\t362\t16\n+MT\t363\t16\n+MT\t364\t16\n+MT\t365\t15\n+MT\t366\t15\n+MT\t367\t15\n+MT\t368\t15\n+MT\t369\t15\n+MT\t370\t15\n+MT\t371\t15\n+MT\t372\t15\n+MT\t373\t15\n+MT\t374\t15\n+MT\t375\t15\n+MT\t376\t15\n+MT\t377\t15\n+MT\t378\t14\n+MT\t379\t13\n+MT\t380\t13\n+MT\t381\t13\n+MT\t382\t13\n+MT\t383\t13\n+MT\t384\t13\n+MT\t385\t13\n+MT\t386\t12\n+MT\t387\t12\n+MT\t388\t12\n+MT\t389\t11\n+MT\t390\t11\n+MT\t391\t11\n+MT\t392\t11\n+MT\t393\t11'..b'+MT\t16242\t4\n+MT\t16243\t5\n+MT\t16244\t5\n+MT\t16245\t5\n+MT\t16246\t5\n+MT\t16247\t5\n+MT\t16248\t5\n+MT\t16249\t5\n+MT\t16250\t5\n+MT\t16251\t5\n+MT\t16252\t5\n+MT\t16253\t6\n+MT\t16254\t6\n+MT\t16255\t6\n+MT\t16256\t6\n+MT\t16257\t6\n+MT\t16258\t6\n+MT\t16259\t6\n+MT\t16260\t6\n+MT\t16261\t6\n+MT\t16262\t6\n+MT\t16263\t6\n+MT\t16264\t7\n+MT\t16265\t7\n+MT\t16266\t7\n+MT\t16267\t7\n+MT\t16268\t7\n+MT\t16269\t7\n+MT\t16270\t7\n+MT\t16271\t7\n+MT\t16272\t8\n+MT\t16273\t8\n+MT\t16274\t9\n+MT\t16275\t10\n+MT\t16276\t10\n+MT\t16277\t10\n+MT\t16278\t10\n+MT\t16279\t10\n+MT\t16280\t10\n+MT\t16281\t10\n+MT\t16282\t10\n+MT\t16283\t10\n+MT\t16284\t10\n+MT\t16285\t11\n+MT\t16286\t11\n+MT\t16287\t11\n+MT\t16288\t10\n+MT\t16289\t10\n+MT\t16290\t10\n+MT\t16291\t10\n+MT\t16292\t10\n+MT\t16293\t10\n+MT\t16294\t10\n+MT\t16295\t10\n+MT\t16296\t10\n+MT\t16297\t10\n+MT\t16298\t10\n+MT\t16299\t10\n+MT\t16300\t10\n+MT\t16301\t10\n+MT\t16302\t10\n+MT\t16303\t10\n+MT\t16304\t10\n+MT\t16305\t10\n+MT\t16306\t10\n+MT\t16307\t11\n+MT\t16308\t12\n+MT\t16309\t12\n+MT\t16310\t12\n+MT\t16311\t12\n+MT\t16312\t12\n+MT\t16313\t12\n+MT\t16314\t12\n+MT\t16315\t12\n+MT\t16316\t12\n+MT\t16317\t12\n+MT\t16318\t12\n+MT\t16319\t12\n+MT\t16320\t12\n+MT\t16321\t12\n+MT\t16322\t12\n+MT\t16323\t12\n+MT\t16324\t12\n+MT\t16325\t12\n+MT\t16326\t12\n+MT\t16327\t12\n+MT\t16328\t11\n+MT\t16329\t11\n+MT\t16330\t11\n+MT\t16331\t11\n+MT\t16332\t11\n+MT\t16333\t11\n+MT\t16334\t11\n+MT\t16335\t10\n+MT\t16336\t10\n+MT\t16337\t10\n+MT\t16338\t9\n+MT\t16339\t9\n+MT\t16340\t9\n+MT\t16341\t9\n+MT\t16342\t9\n+MT\t16343\t9\n+MT\t16344\t8\n+MT\t16345\t8\n+MT\t16346\t8\n+MT\t16347\t8\n+MT\t16348\t8\n+MT\t16349\t8\n+MT\t16350\t8\n+MT\t16351\t8\n+MT\t16352\t8\n+MT\t16353\t8\n+MT\t16354\t7\n+MT\t16355\t7\n+MT\t16356\t7\n+MT\t16357\t7\n+MT\t16358\t8\n+MT\t16359\t8\n+MT\t16360\t8\n+MT\t16361\t8\n+MT\t16362\t8\n+MT\t16363\t8\n+MT\t16364\t8\n+MT\t16365\t7\n+MT\t16366\t7\n+MT\t16367\t7\n+MT\t16368\t7\n+MT\t16369\t7\n+MT\t16370\t7\n+MT\t16371\t7\n+MT\t16372\t7\n+MT\t16373\t6\n+MT\t16374\t6\n+MT\t16375\t5\n+MT\t16376\t4\n+MT\t16377\t4\n+MT\t16378\t5\n+MT\t16379\t5\n+MT\t16380\t5\n+MT\t16381\t5\n+MT\t16382\t5\n+MT\t16383\t5\n+MT\t16384\t5\n+MT\t16385\t5\n+MT\t16386\t4\n+MT\t16387\t4\n+MT\t16388\t4\n+MT\t16389\t4\n+MT\t16390\t4\n+MT\t16391\t4\n+MT\t16392\t4\n+MT\t16393\t4\n+MT\t16394\t4\n+MT\t16395\t4\n+MT\t16396\t4\n+MT\t16397\t4\n+MT\t16398\t4\n+MT\t16399\t4\n+MT\t16400\t4\n+MT\t16401\t4\n+MT\t16402\t4\n+MT\t16403\t4\n+MT\t16404\t4\n+MT\t16405\t4\n+MT\t16406\t4\n+MT\t16407\t4\n+MT\t16408\t3\n+MT\t16409\t2\n+MT\t16410\t2\n+MT\t16411\t2\n+MT\t16412\t2\n+MT\t16413\t2\n+MT\t16414\t2\n+MT\t16415\t2\n+MT\t16416\t2\n+MT\t16417\t2\n+MT\t16418\t2\n+MT\t16419\t2\n+MT\t16420\t2\n+MT\t16421\t2\n+MT\t16422\t2\n+MT\t16423\t2\n+MT\t16424\t2\n+MT\t16425\t2\n+MT\t16426\t2\n+MT\t16427\t2\n+MT\t16428\t2\n+MT\t16429\t2\n+MT\t16430\t2\n+MT\t16431\t2\n+MT\t16432\t2\n+MT\t16433\t2\n+MT\t16434\t2\n+MT\t16435\t2\n+MT\t16436\t2\n+MT\t16437\t3\n+MT\t16438\t3\n+MT\t16439\t3\n+MT\t16440\t3\n+MT\t16441\t3\n+MT\t16442\t3\n+MT\t16443\t3\n+MT\t16444\t3\n+MT\t16445\t3\n+MT\t16446\t3\n+MT\t16447\t3\n+MT\t16448\t3\n+MT\t16449\t3\n+MT\t16450\t3\n+MT\t16451\t3\n+MT\t16452\t3\n+MT\t16453\t3\n+MT\t16454\t3\n+MT\t16455\t3\n+MT\t16456\t3\n+MT\t16457\t3\n+MT\t16458\t3\n+MT\t16459\t3\n+MT\t16460\t3\n+MT\t16461\t3\n+MT\t16462\t3\n+MT\t16463\t3\n+MT\t16464\t3\n+MT\t16465\t3\n+MT\t16466\t3\n+MT\t16467\t3\n+MT\t16468\t3\n+MT\t16469\t3\n+MT\t16470\t3\n+MT\t16471\t3\n+MT\t16472\t3\n+MT\t16473\t3\n+MT\t16474\t3\n+MT\t16475\t3\n+MT\t16476\t3\n+MT\t16477\t3\n+MT\t16478\t3\n+MT\t16479\t2\n+MT\t16480\t2\n+MT\t16481\t2\n+MT\t16482\t2\n+MT\t16483\t2\n+MT\t16484\t2\n+MT\t16485\t2\n+MT\t16486\t2\n+MT\t16487\t2\n+MT\t16488\t2\n+MT\t16489\t2\n+MT\t16490\t2\n+MT\t16491\t2\n+MT\t16492\t2\n+MT\t16493\t2\n+MT\t16494\t2\n+MT\t16495\t2\n+MT\t16496\t2\n+MT\t16497\t2\n+MT\t16498\t2\n+MT\t16499\t2\n+MT\t16500\t2\n+MT\t16501\t2\n+MT\t16502\t2\n+MT\t16503\t2\n+MT\t16504\t2\n+MT\t16505\t2\n+MT\t16506\t2\n+MT\t16507\t2\n+MT\t16508\t2\n+MT\t16509\t2\n+MT\t16510\t2\n+MT\t16511\t2\n+MT\t16512\t2\n+MT\t16513\t2\n+MT\t16514\t2\n+MT\t16515\t2\n+MT\t16516\t2\n+MT\t16517\t2\n+MT\t16518\t2\n+MT\t16519\t2\n+MT\t16520\t2\n+MT\t16521\t2\n+MT\t16522\t2\n+MT\t16523\t2\n+MT\t16524\t2\n+MT\t16525\t2\n+MT\t16526\t2\n+MT\t16527\t2\n+MT\t16528\t2\n+MT\t16529\t2\n+MT\t16530\t2\n+MT\t16531\t2\n+MT\t16532\t2\n+MT\t16533\t2\n+MT\t16534\t2\n+MT\t16535\t2\n+MT\t16536\t2\n+MT\t16537\t2\n+MT\t16538\t1\n+MT\t16539\t1\n+MT\t16540\t1\n+MT\t16541\t1\n+MT\t16542\t1\n+MT\t16543\t1\n+MT\t16544\t1\n+MT\t16545\t1\n+MT\t16546\t1\n+MT\t16547\t1\n+MT\t16548\t1\n+MT\t16549\t1\n+MT\t16550\t1\n+MT\t16551\t1\n+MT\t16552\t1\n+MT\t16553\t1\n+MT\t16554\t1\n+MT\t16555\t1\n+MT\t16556\t1\n+MT\t16557\t1\n+MT\t16558\t1\n+MT\t16559\t1\n+MT\t16560\t0\n+MT\t16561\t0\n+MT\t16562\t0\n+MT\t16563\t0\n+MT\t16564\t0\n+MT\t16565\t0\n+MT\t16566\t0\n+MT\t16567\t0\n+MT\t16568\t0\n+MT\t16569\t0\n'
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM.vcf Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,32 @@
+##fileformat=VCFv4.1
+##reference=chrMT.fa
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=VMX,Number=1,Type=String,Description="SNP is Missense in these Read Frames">
+##INFO=<ID=VNX,Number=1,Type=String,Description="SNP is Nonsense in these Read Frames">
+##INFO=<ID=VFX,Number=1,Type=String,Description="Indel Causes Frameshift">
+##INFO=<ID=WP,Number=A,Type=Integer,Description="NEAT-GenReads ploidy indicator">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##ALT=<ID=TRANS,Description="Translocation">
+##ALT=<ID=INV-TRANS,Description="Inverted translocation">
+#CHROM POS ID REF ALT QUAL FILTER INFO
+MT 335 . A AATTA . PASS WP=0/1
+MT 4078 . T C . PASS WP=0/1
+MT 4682 . A G . PASS WP=0/1
+MT 5713 . C T . PASS WP=0/1
+MT 6503 . A G . PASS WP=0/1
+MT 7425 . C T . PASS WP=1/0
+MT 7778 . T C . PASS WP=1/0
+MT 8471 . C T . PASS WP=1/0
+MT 8776 . C T . PASS WP=0/1
+MT 10750 . A G . PASS WP=0/1
+MT 11273 . G A . PASS WP=0/1
+MT 11743 . C T . PASS WP=1/0
+MT 13132 . C CT . PASS WP=0/1
+MT 13273 . A G . PASS WP=1/0
+MT 13804 . G A . PASS WP=0/1
+MT 14067 . C T . PASS WP=0/1
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3296 @@\n+@out-MT-1/1\n+CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCAGACTAATCTTAGTTA\n++\n+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>\'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C\n+@out-MT-3/1\n+CCTATTAACCACTCACGGGAGCTCTCCCTGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATG\n++\n+FFEFGGGGFD&;GGGGGFGFGGFBGGF\'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD\n+@out-MT-5/1\n+CCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAG\n++\n+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3\n+@out-MT-7/1\n+ATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCAAAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTT\n++\n+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE\'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB\n+@out-MT-9/1\n+TATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA\n++\n+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF\n+@out-MT-11/1\n+GCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTC\n++\n+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=\n+@out-MT-13/1\n+TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAA\n++\n+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9\n+@out-MT-15/1\n+ACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCCGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGT\n++\n+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?\n+@out-MT-17/1\n+TTCCGCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTAACGAACGAAAAATTCTA\n++\n+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C\'5@GF8FGD;FGA@?G/\n+@out-MT-19/1\n+AGCCTACTCCAATGCTAAAACTAATCGTCCCAACAAGTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACCACCACCCAC\n++\n+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F\n+@out-MT-21/1\n+TGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTCATTACCTCAGAGGTTTTTTTCT\n++\n+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED\'GGG?:D=?:FF>GFGFEDC0GA\n+@out-MT-23/1\n+ACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAACTCAGACGCTCAG\n++\n+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG\n+@out-MT-25/1\n+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTCCATGCTAA\n++\n+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=\n+@out-MT-27/1\n+ACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCG\n++\n+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF\n+@out-MT-29/1\n+AAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTCTGTCAAAGTTAACTTATAGGCTAAAT\n++\n+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9\n+@out-MT-31/1\n+GAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCA\n++\n+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-\n+@out-MT-33/1\n+GTGAGGCCAAATATCATTCTGAGGGGCCACACTAATTACAAACTTACTATCCGCCACCCCATACATTGGGACATACCTAGTTCAATGAATCCGAGGAGGCT\n++\n+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC\n+@out-MT-35/1\n+CCAGGTCGGTTTCTATCTACATCCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTGCCCCCGTAAATGATATCATC\n++\n+GFGFDGGGGEGB-GGGDBAAFG$FGGGGD=GFGCGG1EGGGC=GGGCF.GGGGCFFGGGFFF8AGD<EBCDGD'..b"TCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAA\n++\n+2BEGBGGGFGCF>GG8G?GGFG=GFEGG.GAEFGFGFAG>BDDGFF0GF8FGGFGGG<GGDGGGF=GGFGBEEGD@CGCGGGG;ACEE;F7CG?GC+<:'=\n+@out-MT-1615/1\n+CCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTGCTTCTTCCCACTCAT\n++\n+GGGGCGGGFGEGFGDD@GEGGFGGGF?GGG:GF1GGGEGFGGGGC?GGGGGGGE<GGFGFEGGGDD;GB@@C?DBFFGEGG>GGB#GFE>FC>E+E79G>E\n+@out-MT-1617/1\n+TTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGG\n++\n+GFAFE?GGGEGF@-GEBGGGGAGF>GFFCGFGGGDGFGFGGFDFGAGCGGGFG@GD=FFGGF@FEGGFDFGFDE=FGEGGED;GGBD*=E:CE47EGFGG,\n+@out-MT-1619/1\n+TATCCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTAAGCCTAACCGCTAACATTACTGCAGGCCACCTACTCA\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+GCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCGTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCAAACCCA\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+AGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATGCTATACAACCGTATCGGCGATATCG\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+AAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAAGAGGAGAAGG\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+ACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCAT\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAGATAACAGACGAGGTCAACGATCCCTC\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGA\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+TGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+AACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACA\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+AACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAA\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE-VCF-BAM_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE-VCF-BAM_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3296 @@\n+@out-MT-1/2\n+GAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATTTGTTGTGGGTCTCATGAGTTGGAGTGTAGGATAAATCATGCCAAGGCGAGGA\n++\n+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7\n+@out-MT-3/2\n+GGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGTGGCTGTGCAGACATTCAATTGTTATTATTATGTCCTACAAGCATTAATTAATTAACA\n++\n+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF\n+@out-MT-5/2\n+TGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGAATAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTGTTCCAATGTCTTTGTGGTTTGT\n++\n+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==\n+@out-MT-7/2\n+TGTTTGGATGTAAAGTGAAATATTAGTTGGCGTATGAAGCAGATAGTGAGGAAAGTTGAGCCAATAATGACGTGAAGTCCGTGGAAGCCTGTGGCTACAAA\n++\n+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D\n+@out-MT-9/2\n+GGCAAGGTCGAAGGGGGTTCGGTTGGTCTCTGCTAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTA\n++\n+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*\n+@out-MT-11/2\n+CCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATATGGTTAGTGTGGTGGT\n++\n+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B\n+@out-MT-13/2\n+TGATAAGTGTAGAGGGAAGGTTAATGGTTGATATTGCTAGGGTGGCGCTTCCAATTAGGTGCATGAGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGT\n++\n+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE\n+@out-MT-15/2\n+GTCCGTGCGAGAATAATGATGTATGCTTTGTTTCTGTTGAGTGTGGGTTTAGTAATGGGGTTTGTGGGGTTTTCTTCTAAGCCTTCACCTATTTATGGGGG\n++\n+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2\n+@out-MT-17/2\n+GGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGATGGTGAGAGCTAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTA\n++\n+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG\n+@out-MT-19/2\n+GGGAGATTAGTATACAGAGGTAGAGTTTTTTTCGTGATAGTGGTTCACTGGATAAGTGGCGTTGGCTTGCCATGATTGTGAGGGGTAGGAGTCAGGTAGTT\n++\n+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF\n+@out-MT-21/2\n+ATAAGCAGTGCTTGAATTCTTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGCTGATTGATACTCCGGATGCGAGTAATCCGGATGTGTTTAG\n++\n+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?\n+@out-MT-23/2\n+GGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTACTCGTAGGTTCAGTACCATTGGTGGCCAATT\n++\n+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE\n+@out-MT-25/2\n+ACCGACCTGGATTACTCCGGTCTGAACTCAGATCACGTAGGACTTTAATCGTTGAACAAACGAACCTTTAATAGCGGCTGCACCATCGGGATGTCCTGAGC\n++\n+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D\n+@out-MT-27/2\n+AGTCCTTGAGAGAGGATTATGATGCGACTGTGAGTGCGTTCGTAGTTTGAGTTTGCTAGGCAGAATAGTAATGAGGATGTAAGCCCGTGGGCGATTATGAG\n++\n+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@\n+@out-MT-29/2\n+CTGACGGTTTCTATTTCCTGAGCGTCTGAGATGTTAGTATTAGTTAGTTTTGTTGTGAGTGTTAGGAAAAGGGCATACAGGACTAGGAAGCAGATAAGGAA\n++\n+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90\n+@out-MT-31/2\n+CCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGTGAAATATTAGTTGGCGGATGAAGCAGATAGTGAGG\n++\n+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3\n+@out-MT-33/2\n+AGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTT\n++\n+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B\n+@out-MT-35/2\n+GAGGAGTATGAGGTTGGCCATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTT\n++\n+GFF;B>3G(GGEGGDG5GGGBFGFGG<EFBGDEGGEEGGGBFGGGEFDAAGGDB@G<GG?D(CGG:F?EGDFG"..b"GTATGTGCTTTCTCGTGTTACATTGCGCCATCATTGGTATATGGTTAGTGTGTTGGTTAGTAGGCCT\n++\n+GGGGG:FGGGEGGGGGG=FGFFAFGG3GGFEGG8EGGFGG-?EGGGG<GGF@GB:CF(G=GGGEG8GGEGGG@FEFG==+FFGEG6:AFFFGFGG???940\n+@out-MT-1615/2\n+AATTTATGAAGGAGAGGGGTCAGGGTTGATTCGGGAGGAACCTATTGGTGCGGGGGCTTTGTATGATTATGGGCGTTGATTAGTAGTAGTTACTGGTTGAA\n++\n+GGEGGB;FFGGGE?@GFGGF-GGFGCGCE=GGGEFFGGG$FDGGGGGGB:FFDDGGFE=EGGFGDAGFFA@EBCFGGFFAEGGFFGFFG7D1-=EGGGF>C\n+@out-MT-1617/2\n+AAGGGCGCAGACTGCTGCGAACAGAGTGGTGATAGCGCCTAAGCATAGTGTTAGAGTTTGGATTAGTGGGCTATTTTCGGCTAGGGGGTGGAAGAGGATGA\n++\n+@FG7;FG@GGBGDGGGGFG=F<GECFG?GFEGGGGFEFGCGG<GGGFGCFE?GDGGGFGGGE@AGGF?<FGGGFGFE=-FE=9GGF5GGF@-G&+EFG;;B\n+@out-MT-1619/2\n+AGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTATGTGTTGTGGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTAG\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AGCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTT\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GGAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+GATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGTGTTCTTGTAGTTGAAA\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+TGTAGGTGTGCCTAGTGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTA\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATTATTA\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+TCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCAGTAGTCGGTGTACTCGT\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+CAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGAT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+TGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTTGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTGT\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+GGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAAT\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+GCGTTCTGGCTGGTTGCCTCATCGGTTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATGGGGTGTGATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE_read1.fq Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,3296 @@\n+@out-MT-1/1\n+CATCATAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCAGACTAATCTTAGTTA\n++\n+GGGGGGF:GGDFFFFGGGF5@AGGGGGEGG>\'%GE?FF<?FBDGFEF2DFB=GFCGG1&/GBGGGDGGFBFEFDF6@GGEGEGGFA&F0FGF=GGGFC@9C\n+@out-MT-3/1\n+CCTATTAACCACTCACGGGAGCTCTCCCTGCATTTGGTATTTTCGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATG\n++\n+FFEFGGGGFD&;GGGGGFGFGGFBGGF\'EFFGGGGGFGGGGFGGGGGGGGFDGGGBGFF6EGFEG1EGGFGGC@GD7GFEFG@FGGCE?E<FGD=GGG/FD\n+@out-MT-5/1\n+CCCGCCGCCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAG\n++\n+F0;BGGEGEGGGFGFGGGG?=GE3DFGFGG?GGGGFGG?FFG@DDFGGGG>EGEFG@G?EGCFFGGEFGFBGEF(GGGGDFEGGCAFD98DEGFF;A>E>3\n+@out-MT-7/1\n+ATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCAAAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTT\n++\n+AG=AFEGGFGDGFG>GDFEGGGGE:GFGGG@GGECF@GGGFGAGE\'1BGFFGEGGGFGGGFD;G@FF5>GFGG=F4BEGGBEF@BGGG7GEG@7GC.AGGB\n+@out-MT-9/1\n+TATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA\n++\n+EEFD6BAFFGGGGFFDGGGG@:GGGGGGFFFFF/9=FGGBFGA:FDGBGEFGGGGG;@FGGCDGFGGGGGE<G4>FGCAGEED1GG(*0?G=:>CGFGFCF\n+@out-MT-11/1\n+GCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTC\n++\n+BFAGF;FGGGDG+G8GDGGG@F?GGGGGBEEGFFGD9B.GFGG:GG)DGFGGGGGG9EGFGFGDGF?+2=E.2GGFFF:EFGGFEGFEFE3DGE71G;E2=\n+@out-MT-13/1\n+TATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCACAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAA\n++\n+GGGECFFGD17EDFGGGGG<GAFFE6CGGG3FCFGC>F:GG0GG8FFBGGGGGGG;DFFEGGGFGFGGFG>FFG;DF6GGGGGFFGGCEFG<6EFFEBGA9\n+@out-MT-15/1\n+ACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCCGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGT\n++\n+GFG>FGEFGDGBDGDGF@GFG@6DFCCGEEGGGBG>GEDFAGGGGBDGFGGBBGGG0)GGGEG;FGFGBGGEDGEFGGGDC6GGBDB?(AG?FGFGEE22?\n+@out-MT-17/1\n+TTCCGCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTAACGAACGAAAAATTCTA\n++\n+GFEB/GFE?FGGG2FFGFGGFGG+FGGDG7GBGC@BFGG=GGF6EGGBAGFGGGGGGDGC<GGGG=FAFF?FGG@GGEGBDG:C\'5@GF8FGD;FGA@?G/\n+@out-MT-19/1\n+AGCCTACTCCAATGCTAAAACTAATCGTCCCAACAAGTATATTACTACCACTGACATGACTTTCCAAAAAACACATAATTTGAATCAACACCACCACCCAC\n++\n+G@GFGA8GFGFFFGDGEGGFGG8GFFGC1G2GGFGG3GGF@F0EFCGGFFB)ECG?FFG9GBGGGG<DFEGGCCGE8GEF7EGFDGFE<FG#,D9G=<B7F\n+@out-MT-21/1\n+TGTAACACGAGAAAGCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTCATTACCTCAGAGGTTTTTTTCT\n++\n+GEGFAAGFGGEGGGGCFFEGGF=DGF8BFECE>GFFFFGBFB8GFGF67+FFEDFGGGG?EGCG>GDCG>GEC=4CED\'GGG?:D=?:FF>GFGFEDC0GA\n+@out-MT-23/1\n+ACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAACTCAGACGCTCAG\n++\n+GFGGGGGGEGGFCEFGF98G>GGGGGGGGGEFGGFG9@@EGGGFGFFGFGGFG;GFEG>FGFDFGDGGGFFCDFEF)FGG<@?FGEG2ECAFGGEED.BFG\n+@out-MT-25/1\n+TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTCCATGCTAA\n++\n+EGGGGFDC7GFFF9EA@GCGFFGGGDGEG<CGGFFGGFFAFCGFGGGGDFEF;GGGB7FGGG7.GFFFFF<BA8DFGGGEF=ABFG38CFGA$G;A=7ED=\n+@out-MT-27/1\n+ACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACAGACCTAAAATCG\n++\n+EBGGG=GGGEFFG@GFGFD;F?GGEFE0FGFFFFFEGGG<?F@DG>EFGGGGGFDGF-FF:E=GGGFFF1AGFFGB?DBGGFE<GDFF/C@CGGGGEFEFF\n+@out-MT-29/1\n+AAAGCTGGTTTCAAGCCAACCCCATGGCCTCCATGACTTTTTCAAAAAGGTATTAGAAAAACCATTTCATAACTCTGTCAAAGTTAACTTATAGGCTAAAT\n++\n+F>F?BGGGGEFGGGGGGGFEGGGEGGGG:GGGG;G;G7FGCBBGG8GGGGGFGGDGD<DAGFEEGEFFGA3FDF17<GGFB4F>CFG$%C4FDGGGGD5=9\n+@out-MT-31/1\n+GAGCTCACCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCA\n++\n+FGEGGGGADEFFGG.GGGG1FGGEGGGGGGGGGGGFFFG?GGDGGG:GDGGGF87GGGGEGGG>GFFGFGGG1GGGGB6FGG09G8B?BGGE3;)?FD;7-\n+@out-MT-33/1\n+GTGAGGCCAAATATCATTCTGAGGGGCCACACTAATTACAAACTTACTATCCGCCACCCCATACATTGGGACATACCTAGTTCAATGAATCCGAGGAGGCT\n++\n+GEGCGEA6FFGGFGGFB:DEGEAGGGGFGG>%G0GFG95GDGCFCFGGGEFGGG3F+CGFFGG>GGGGGA5)D"F>GEF68$D/-?GFFB&$CGFA@GAGC\n+@out-MT-35/1\n+CCAGGTCGGTTTCTATCTACATCCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTGCCCCCGTAAATGATATCATC\n++\n+GFGFDGGGGEGB-GGGDBAAFG$FGGGGD=GFGCGG1EGGGC=GGGCF.GGGGCFFGGGFFF8AGD<EBCDGD'..b"TCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACAACACATAATGACCCACCAA\n++\n+2BEGBGGGFGCF>GG8G?GGFG=GFEGG.GAEFGFGFAG>BDDGFF0GF8FGGFGGG<GGDGGGF=GGFGBEEGD@CGCGGGG;ACEE;F7CG?GC+<:'=\n+@out-MT-1615/1\n+CCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTGCTTCTTCCCACTCAT\n++\n+GGGGCGGGFGEGFGDD@GEGGFGGGF?GGG:GF1GGGEGFGGGGC?GGGGGGGE<GGFGFEGGGDD;GB@@C?DBFFGEGG>GGB#GFE>FC>E+E79G>E\n+@out-MT-1617/1\n+TTCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCAAGCCTCACCCCACTACTAGG\n++\n+GFAFE?GGGEGF@-GEBGGGGAGF>GFFCGFGGGDGFGFGGFDFGAGCGGGFG@GD=FFGGF@FEGGFDFGFDE=FGEGGED;GGBD*=E:CE47EGFGG,\n+@out-MT-1619/1\n+TATCCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGTAAGCCTAACCGCTAACATTACTGCAGGCCACCTACTCA\n++\n+GGBGGG=GCE:FG<FGGFGGEFF<AGCAGFGGGEDFGFGGGGGFG(DGFF8DFGGEDFGGG1GG3FGFBGGFBFGFF5GG%60EA;D?EGGAG5F?>>EE9\n+@out-MT-1621/1\n+GCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCGTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCAAACCCA\n++\n+GE?GGFGEDGGGDGGG?GGEFGCGGG=G7F:FGGGCG;EGGG9G=EG>FGF@%EGGGGGF?FGFF@<BGFF@4$GFEDGF7B)EG?CCFGGFE;;*2);G8\n+@out-MT-1623/1\n+AGGAATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATGCTATACAACCGTATCGGCGATATCG\n++\n+EGGGGGGGFE5EGGEGGCGGGGG:F/GAGGFFGE;FAGBFFFG?CCFF8GFGGGGGEBGEGGEG4E@6FGGEFE>&FGGFGG6$14.1<CCEGGGGGEFGE\n+@out-MT-1625/1\n+AAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAATACTAAACCCCCATAAAGAGGAGAAGG\n++\n+EGGGBFGGF;CG49GGFGGEAFGGGGFGCFG6FGFGGGGFFDGG=EGB3GGFEGGGGG4GABDB=FFCGF@F>EFFFA8*GGGGGGG=>AF+GGE@F@>DE\n+@out-MT-1627/1\n+ACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGATAACCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCAT\n++\n+GGGGFGGFF<FGGGFGGFGEGGGG<G;GDGCE;FFGF3%GGGFGGGEBGGGEFGGB<GFCGGDGGGEGGGDFFCF@<4FCEG:*F8;CEGFFGGG2AFBF+\n+@out-MT-1629/1\n+ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTAGATAACAGACGAGGTCAACGATCCCTC\n++\n+GGGG?GGFEGGGFF4=GGFFEGFFCGDFFAFGGGEGGGGFECGFDE;GGFBFA:GGD9FGF;/GGEGGFFFFFE$-G<FFFFGG>FGC38@FFGFFBCEDF\n+@out-MT-1631/1\n+TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGA\n++\n+FGGGGFBGGGGFGGGFGCGGGG=G=>FGFEGFGGDGGBFGGGFGFGGGFFGGGGFGGGAGGGFD<G;GFGF2C@FGG2GFBGEGFCEF=?-C?G7>EDA?C\n+@out-MT-1633/1\n+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAATA\n++\n+;GG=GFGEGGGEGFGFGGG8DG@GFGGFGG:CFGGGGGEEGGFEGGG?8FGDFA;GEGGBGGFDDGG&EFFADGGFC.=G.FGGECD6GG=F@6AGFCFG6\n+@out-MT-1635/1\n+TGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCA\n++\n+G;GGGFGGGGFFEGFFFGF9FGGEGGDGGGGGF?GGGEGEEGGEGGGEFGGAFAG;GDGGCGGGBFCFG@7ECGGDG2DGF@GEBDC35CG@GGEBE5>*F\n+@out-MT-1637/1\n+AACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACA\n++\n+FGFGGGFFFF;GEDFG?GG;G9BGGGGGGDFGEEG;FGGEBGFGFEGC:EF4GG@FGCF:GGGFGGFDGG?GGGBFF49F=EGGGGEA9C=BGCG;EAA@B\n+@out-MT-1639/1\n+AACCACCCACAGCCTAATTATTAGCATCATCCCTCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCA\n++\n+GGGGE@GGGGECFGC@GCGDG?C@AAGFGF?7GGGGGGDFGGF6G>C?GCBFGDFGFFDGGGBGC1FGFGGEG?B2EAEBFGG;G?EGG81GF76FF@;B'\n+@out-MT-1641/1\n+CTTCCCTCTACACTTATCATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGTAAG\n++\n+FG7FF0FFFGGFGC;GGGFDEG<FFFCGCD>EF<EGGGGDGG:FGG>FF>FEGFEGGDCGFEEGEF9GGGGGGGGGE7GGEFB;@G;@BGFEFGGFGGF?6\n+@out-MT-1643/1\n+TAATAATAACAATTGAATGTATGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACT\n++\n+G;AFGGGGFG@GFGF7EGGF#GGGFEGGGFDGGD*FE6GFGGFE;GF?GG@GGGFGG>FAFEEEFF5FF9GGGB=$A;?G<CCFGGGG?@B$E@AC>E?.>\n+@out-MT-1645/1\n+CATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAA\n++\n+GGGFGDFF(GGFFGGF.GFGGFGGGAEF;DGAFFCFEDGFGG-GFEGGDFB6GEEGBFEGBCGFGGGGGGGFG<E:=G7FCG<?@G@BBGEE=GBDFAFFE\n+@out-MT-1647/1\n+ACTGCTAACTCATGCCCCCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACT\n++\n+CFFGFF<?FEFDEGGCGGF:GEEGGGGFFGEGAEGEFEFG;;GCFGGGDFG8DGFGGGGGFFGGGEFG/;FGGGGGF2CGGFFDEG?F@GGEFCDDGFD@A\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE_read1_genSeqErrorModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE_read1_genSeqErrorModel.p Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,1239459 @@\n+(lp1\n+(lp2\n+(lp3\n+cnumpy.core.multiarray\n+scalar\n+p4\n+(cnumpy\n+dtype\n+p5\n+(S\'f8\'\n+I0\n+I1\n+tRp6\n+(I3\n+S\'<\'\n+NNNI-1\n+I-1\n+I0\n+tbS\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp7\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp8\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp9\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2c?\'\n+tRp10\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp11\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp12\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp13\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp14\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp15\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp16\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp17\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp18\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp19\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2c?\'\n+tRp20\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp21\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp22\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp23\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp24\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xdax?\'\n+tRp25\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp26\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3}?\'\n+tRp27\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp28\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp29\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2c?\'\n+tRp30\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2c?\'\n+tRp31\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp32\n+ag4\n+(g6\n+S\'ch\\xba\\x83\\xfdV\\x8b?\'\n+tRp33\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp34\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xdax?\'\n+tRp35\n+ag4\n+(g6\n+S\'#>\\x81Tr^\\x86?\'\n+tRp36\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2\\x83?\'\n+tRp37\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xda\\x88?\'\n+tRp38\n+ag4\n+(g6\n+S\'\\x93\\xf3\\xb2\\x88O \\x95?\'\n+tRp39\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2\\x93?\'\n+tRp40\n+ag4\n+(g6\n+S\'\\xba\\x83\\xfdV\\x1bC\\xa3?\'\n+tRp41\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3\\xad?\'\n+tRp42\n+ag4\n+(g6\n+S\'\\xe2\\x13H%\\xe7e\\xb1?\'\n+tRp43\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xda\\xc8?\'\n+tRp44\n+ag4\n+(g6\n+S\'Ur^\\x16\\xf1\\t\\xe0?\'\n+tRp45\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp46\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp47\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp48\n+aa(lp49\n+g4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp50\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp51\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp52\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp53\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp54\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp55\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp56\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2c?\'\n+tRp57\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp58\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp59\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp60\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp61\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp62\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp63\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp64\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp65\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp66\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2s?\'\n+tRp67\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp68\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp69\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2S?\'\n+tRp70\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2s?\'\n+tRp71\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp72\n+ag4\n+(g6\n+S\'\\x84\\xfdV\\x1bC\\xd3m?\'\n+tRp73\n+ag4\n+(g6\n+S\'\\xe2\\x13H%\\xe7e\\x81?\'\n+tRp74\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2s?\'\n+tRp75\n+ag4\n+(g6\n+S\'\\x02\\xa9\\xe4\\xbc,\\xe2\\x83?\'\n+tRp76\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xdax?\'\n+tRp77\n+ag4\n+(g6\n+S\'\\xe2\\x13H%\\xe7e\\x81?\'\n+tRp78\n+ag4\n+(g6\n+S"R\\xc9yY\\xc4\'\\x90?"\n+tRp79\n+ag4\n+(g6\n+S\'C\\xd3\\x1d\\xec\\xb7\\xda\\x88?\'\n+tRp80\n+ag4\n+(g6\n+S\'\\xe2\\x13H%\\xe7e\\x91?\'\n+tRp81\n+ag4\n+(g6\n+S\'r^\\x16\\xf1\\t\\xa4\\x92?\'\n+tRp82\n+ag4\n+(g6\n+S\'r^\\x16\\xf1\\t\\xa4\\x92?\'\n+tRp83\n+ag4\n+(g6\n+S"R\\xc9yY\\xc4\'\\xa0?"\n+tRp84\n+ag4\n+(g6\n+S\'r^\\x16\\xf1\\t\\xa4\\xa2?\'\n+tRp85\n+ag4\n+(g6\n+S\'\\xe2\\x13H%\\xe7e\\xb1?\'\n+tRp86\n+ag4\n+(g6\n+S\'#>\\x81Tr^\\xc6?\'\n+tRp87\n+ag4\n+(g6\n+S\'\\xe7e\\x11\\x9f@*\\xe1?\'\n+tRp88\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp89\n+ag4\n+(g6\n+S\'\\x'..b"2\\xb3?'\n+tRp298515\n+ag4\n+(g6\n+S'\\xbecj`\\xef\\x98\\xba?'\n+tRp298516\n+ag4\n+(g6\n+S'\\xbecj`\\xef\\x98\\xba?'\n+tRp298517\n+ag4\n+(g6\n+S'\\x92$I\\x92$I\\xc2?'\n+tRp298518\n+ag4\n+(g6\n+S'qV~B\\x9c\\x95\\xbf?'\n+tRp298519\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298520\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298521\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298522\n+aa(lp298523\n+g4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298524\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298525\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298526\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298527\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298528\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298529\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298530\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298531\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298532\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298533\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298534\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298535\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298536\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298537\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298538\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298539\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298540\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298541\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298542\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x80?'\n+tRp298543\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298544\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298545\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298546\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298547\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298548\n+ag4\n+(g6\n+S'\\x18\\x86a\\x18\\x86a\\x98?'\n+tRp298549\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298550\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298551\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298552\n+ag4\n+(g6\n+S'\\x14EQ\\x14EQ\\xa4?'\n+tRp298553\n+ag4\n+(g6\n+S'\\x18\\x86a\\x18\\x86a\\x98?'\n+tRp298554\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298555\n+ag4\n+(g6\n+S'\\x1c\\xc7q\\x1c\\xc7q\\xac?'\n+tRp298556\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\x90?'\n+tRp298557\n+ag4\n+(g6\n+S'\\x92$I\\x92$I\\xb2?'\n+tRp298558\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\xb0?'\n+tRp298559\n+ag4\n+(g6\n+S'\\x10\\x04A\\x10\\x04A\\xc0?'\n+tRp298560\n+ag4\n+(g6\n+S'\\x18\\x86a\\x18\\x86a\\xc8?'\n+tRp298561\n+ag4\n+(g6\n+S'\\x96eY\\x96eY\\xc6?'\n+tRp298562\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298563\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298564\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp298565\n+aa(lp298566\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp298567\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp298568\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aaaa(lp298569\n+I0\n+aI1\n+aI2\n+aI3\n+aI4\n+aI5\n+aI6\n+aI7\n+aI8\n+aI9\n+aI10\n+aI11\n+aI12\n+aI13\n+aI14\n+aI15\n+aI16\n+aI17\n+aI18\n+aI19\n+aI20\n+aI21\n+aI22\n+aI23\n+aI24\n+aI25\n+aI26\n+aI27\n+aI28\n+aI29\n+aI30\n+aI31\n+aI32\n+aI33\n+aI34\n+aI35\n+aI36\n+aI37\n+aI38\n+aI39\n+aI40\n+aI41\n+aaI33\n+aF0.006529258107000625\n+a(lp298570\n+(lp298571\n+(lp298572\n+F0\n+aF0.49180000000000001\n+aF0.3377\n+aF0.17050000000000001\n+aa(lp298573\n+F0.52380000000000004\n+aF0\n+aF0.2661\n+aF0.21010000000000001\n+aa(lp298574\n+F0.37540000000000001\n+aF0.23549999999999999\n+aF0\n+aF0.38900000000000001\n+aa(lp298575\n+F0.2505\n+aF0.25519999999999998\n+aF0.49419999999999997\n+aF0\n+aaaF0.01\n+a(lp298576\n+F0.999\n+aF0.001\n+aa(lp298577\n+I1\n+aI2\n+aaF0.40000000000000002\n+a(lp298578\n+F0.25\n+aF0.25\n+aF0.25\n+aF0.25\n+aaa.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-PE_read2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-PE_read2.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,3296 @@\n+@out-MT-1/2\n+GAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATTTGTTGTGGGTCTCATGAGTTGGAGTGTAGGATAAATCATGCCAAGGCGAGGA\n++\n+GEFEE<DGFGGGFEFFGGEFGGGE1A<G==FEGFGBFG=GCGGGF>FGEGAG6BGGGGGGCE@GGD6GGFFGFEDB,FEF7B;6=GECF@%CDFDFF?CC7\n+@out-MT-3/2\n+GGGGTTTGGTGGAAATTTTTTGTTATGATGTCTGTGTGGAAAGTGGCTGTGCAGACATTCAATTGTTATTATTATGTCCTACAAGCATTAATTAATTAACA\n++\n+FFCGFGGGGGEGCGGG::GFFG9FF@FGG3F=G>/DGFAGGGEFDFEEGGG?GFGGGGGGGE@DF=CF5GGECDDGFF>GEC4GFGGGGGGG=CEGGGGDF\n+@out-MT-5/2\n+TGGCCCAGCTCGGCTCGAATAAGGAGGCTTAGAGCTGTGAATAGGACTCCAGCTCATGCGCCGAATAATAGGTATAGTGTTCCAATGTCTTTGTGGTTTGT\n++\n+FGCGBGDGGFGCGFF@GEGGG+GGGFGGFGGFGFG:F6G##'4FFDGGGGGEGDFEAGGEGFGGG9DEDGFGGF6FGAFFDGG/FDG@F8>GFGEDDFF==\n+@out-MT-7/2\n+TGTTTGGATGTAAAGTGAAATATTAGTTGGCGTATGAAGCAGATAGTGAGGAAAGTTGAGCCAATAATGACGTGAAGTCCGTGGAAGCCTGTGGCTACAAA\n++\n+:GFGGDG-G7CG.G>G7FDEFFFGDBGEGFF2&1GGGFGF<EGGGGGGFFGGGFFGGFFGGEDF6DGDFGGGGFFGEG)FGD(<FGGA44GF:EGF?GG>D\n+@out-MT-9/2\n+GGCAAGGTCGAAGGGGGTTCGGTTGGTCTCTGCTAGTGTGGAGATAAATCATATTATGGCCAAGGGTCATGATGGCAGGAGTAATCAGAGGTGTTCTTGTA\n++\n+GF=GFBFGGGGCF+;G;GGGGADGFBGGEGFFGCGGGGEA-G@EGGGFGFG:G>EFBFG5=GFEGGBFAGGG9GCGFG=3DGFF?EDGGF;5GF?F?BG9*\n+@out-MT-11/2\n+CCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATATGGTTAGTGTGGTGGT\n++\n+GDG5GGFGFFGGGEGFGFDDGGG@GGG%<GGGGGGF6GFGAGGGGFF>BFBEGGGFFFGGGA>BFGCCFDG>GAGGGG1FGGFGDGEG0D8FGFGA(FD1B\n+@out-MT-13/2\n+TGATAAGTGTAGAGGGAAGGTTAATGGTTGATATTGCTAGGGTGGCGCTTCCAATTAGGTGCATGAGTAGGTGGCCTGCAGTAATGTTAGCGGTTAGGCGT\n++\n+GDG?GGGF=GFFFGGFEA7GG<GGGGBGG@AGEGGGGEGBFFG79D?GFGFGGG=FGGGFB**=GFFDGGDE;BAGEG1FFCGGGGFFGDGGGG4FDFFCE\n+@out-MT-15/2\n+GTCCGTGCGAGAATAATGATGTATGCTTTGTTTCTGTTGAGTGTGGGTTTAGTAATGGGGTTTGTGGGGTTTTCTTCTAAGCCTTCACCTATTTATGGGGG\n++\n+:@DB8F.F&F?AG=GGGGGDF0%DGGG:GFFB<FGGGCGGEEDGBGFGFCGG3@?D@GGFDGFGGGGEGFGGECGF?F>/;F@G4?$DFGGGECED<BBG2\n+@out-MT-17/2\n+GGGGTTGGGTATGGGGAGGGGGGTTCATAGTAGAAGAGCGATGGTGAGAGCTAAGGTCGGGGCGGTGATGTAGAGGGTGATGGTAGATGTGGCGGGTTTTA\n++\n+GGGF;GEFFFGG=GFGF@EDEGGDGGCG;GEGGGGGEF:FGGGGGAGFF:GGGCGCECFGGBCBFGGG=-EBDG<GFF3,F2FCAGGDC@6GFFD=E>GGG\n+@out-MT-19/2\n+GGGAGATTAGTATACAGAGGTAGAGTTTTTTTCGTGATAGTGGTTCACTGGATAAGTGGCGTTGGCTTGCCATGATTGTGAGGGGTAGGAGTCAGGTAGTT\n++\n+GBGGGF?GFG?GFG+?FFDGGGGGGBGGGF8GG<4FGFGGGGGFCGEGGGGGGEGGGCGGFEGGFGGGGGEGGGEGGGGFGGGF6<;8AEG9GFGGDFFBF\n+@out-MT-21/2\n+ATAAGCAGTGCTTGAATTCTTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGCTGATTGATACTCCGGATGCGAGTAATCCGGATGTGTTTAG\n++\n+AGGGFGGGGG?EFFGFF3'FED.3AFDFG=FGGGC5GGGGG.ECA:GFGGGEGFGGGGFE%GGAFGGGEC$0;F'GGGDFG=4799(/>?BDFFGGGF?A?\n+@out-MT-23/2\n+GGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCCGTAGTCGGTGTACTCGTAGGTTCAGTACCATTGGTGGCCAATT\n++\n+DDGFFG?2@GEGGGG<GGGFGFG%FGC%$BGDGGEGGFFBGGD9?EEGGCGGGFGGCGEEDGGDBGGEGGFGGDGEGFE@EFC@EEFGGFF8:@C<CFFGE\n+@out-MT-25/2\n+ACCGACCTGGATTACTCCGGTCTGAACTCAGATCACGTAGGACTTTAATCGTTGAACAAACGAACCTTTAATAGCGGCTGCACCATCGGGATGTCCTGAGC\n++\n+GFGFFGGFAGFGGFF=GAG7>GE7<CGDBAEEGFGGG6@GGDGGGGGGGGGFGGG?DGDB@GGGGGDFFGGAGG>EFGGGGCGAF6G9FFEAFEFB8BF<D\n+@out-MT-27/2\n+AGTCCTTGAGAGAGGATTATGATGCGACTGTGAGTGCGTTCGTAGTTTGAGTTTGCTAGGCAGAATAGTAATGAGGATGTAAGCCCGTGGGCGATTATGAG\n++\n+FGGEFCGGG>GCDEGG:G*G.GFCFAFG@GGFFGCFFG>BGBDCGFFGD23DGGFC7;GGFFDGGGFDBFEFGFFFGGF/B?6BFGFFGEF>FFFFG&FF@\n+@out-MT-29/2\n+CTGACGGTTTCTATTTCCTGAGCGTCTGAGATGTTAGTATTAGTTAGTTTTGTTGTGAGTGTTAGGAAAAGGGCATACAGGACTAGGAAGCAGATAAGGAA\n++\n+'$FGGFGGBFGGGGGGEFEEG<7E?FEG;GFFGGFG<FFGFFFF<FFCG7;GFGGGG>GDG0FFDDG=GDEGGGGFFGDGAFGGGAF@CF;EDGFGGGG90\n+@out-MT-31/2\n+CCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGTGAAATATTAGTTGGCGGATGAAGCAGATAGTGAGG\n++\n+GEGF:GG9FGDGCD/FGFFFBEGGFFGCGGGGGCGFG@F8GDFGGFEGGFGGEG.E6FGGG5<GGF*EGCFFGGGGGGFDGB@FG@@29G8FGBA8D=0A3\n+@out-MT-33/2\n+AGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTT\n++\n+DGG?A?5GGGGGGGGGG@GEFGCEGGGCFGGGGGGGFGBGGFEGFGGFFGGGGGGE??G>8GG9EF/ECGFGEFGGCG:GGDG=G?B9FG;GA=E.;GD@B\n+@out-MT-35/2\n+GAGGAGTATGAGGTTGGCCATGGGTATGTTGTTAAGAAGAGGAATTGAACCTCTGACTGTAAAGTTTTAAGTTTTATGCGATTACCGGGCTCTGCCATCTT\n++\n+GFF;B>3G(GGEGGDG5GGGBFGFGG<EFBGDEGGEEGGGBFGGGEFDAAGGDB@G<GG?D(CGG:F?EGDFG"..b"GTATGTGCTTTCTCGTGTTACATTGCGCCATCATTGGTATATGGTTAGTGTGTTGGTTAGTAGGCCT\n++\n+GGGGG:FGGGEGGGGGG=FGFFAFGG3GGFEGG8EGGFGG-?EGGGG<GGF@GB:CF(G=GGGEG8GGEGGG@FEFG==+FFGEG6:AFFFGFGG???940\n+@out-MT-1615/2\n+AATTTATGAAGGAGAGGGGTCAGGGTTGATTCGGGAGGAACCTATTGGTGCGGGGGCTTTGTATGATTATGGGCGTTGATTAGTAGTAGTTACTGGTTGAA\n++\n+GGEGGB;FFGGGE?@GFGGF-GGFGCGCE=GGGEFFGGG$FDGGGGGGB:FFDDGGFE=EGGFGDAGFFA@EBCFGGFFAEGGFFGFFG7D1-=EGGGF>C\n+@out-MT-1617/2\n+AAGGGCGCAGACTGCTGCGAACAGAGTGGTGATAGCGCCTAAGCATAGTGTTAGAGTTTGGATTAGTGGGCTATTTTCGGCTAGGGGGTGGAAGAGGATGA\n++\n+@FG7;FG@GGBGDGGGGFG=F<GECFG?GFEGGGGFEFGCGG<GGGFGCFE?GDGGGFGGGE@AGGF?<FGGGFGFE=-FE=9GGF5GGF@-G&+EFG;;B\n+@out-MT-1619/2\n+AGGGGTCATGGGCTGGGTTTTACTATATGATAGGCATGTGATTGGTGGGTCATTATGTGTTGTGGTGCAGGTAGAGGCTTACTAGAAGTGTGAAAACGTAG\n++\n+GG9G<EFGGFGGFGGGFGGCGGG>E>.G8?F,'GFGGEGC=GG>FGGGFFEGGG<FGGEF,E)#;BG9DGGGBFG5<DGGFCGG=>GF8FEGGFDEG=8/<\n+@out-MT-1621/2\n+AGCCACTTATTAGTAATGTTGATAGTAGAATGATGGCTAGGGTGACTTCATATGAGATTGTTTGGGCTACTGCTCGCAGTGCGCCGATCAGGGCGTAGTTT\n++\n+FGGGCFEFF8?DFFGBEGGGFGGG7GDG@CDFGGFF2GGGGGDGGAFG?FG@<GGGEFFGGFG2GG@GFGGFFFBGGGEC5&GEFGFFDGGFDGFEC??>@\n+@out-MT-1623/2\n+GGAGTCAGGGGTGGAGACCTAATTGGGCTGATTTGCCTGCTGCTGCTAGGAGGAGGCCTAGTAGTGGGGTGAGGCTTGGATTAGCGTTTAGAAGGGCTATT\n++\n+GBGFGGGGFGEG:;FGFGEEG+G4GGGGGGGGGGGGFGGGG?FGE8=G@GGFGFD-FFCFGGEGE?DCGDFFFG,.FGGDCGGGFFD?FC<+)CA4FGD-=\n+@out-MT-1625/2\n+GATGTTGGATGGGGTGGGGAGGTCGATGAATGAGTGGTTAATTAATTTTATTAGGGGGTTAGTTTTGCGTATTGGGGTCATTGGTGTTCTTGTAGTTGAAA\n++\n+EGGA:DEGGGGDGGGGGCGGEDBBAGGFEGC:GFG9GG'6G;FFGFFGE<GEGGG<C:GGGEGEEGGBGGE8=AG6FGDE24%(GB?AGE?3*%DBAD6D2\n+@out-MT-1627/2\n+TGTAGGTGTGCCTAGTGGTAAGAAGTGGGCTAGGGCATTTTTAATCTTAGAGCGAAAGCCTATAATCACTGTGCCCGCTCATAAGGGGATGGCCATGGCTA\n++\n+GGF?GGG8-GGGG(3GEGFG:FGGGGGGGFGFF7GG)EGEGGAFEFG7GG@1GG@FGGGFGCGFGDGFG7G-GFGF+GB?GGGGFGG.'?<GGEBGGEDF>\n+@out-MT-1629/2\n+CGTGAAAGTGGTTTGGTTTAGACGTCCGGGAATTGCATCTGTTTTTAAGCCTAATGTGGGGACAGCTCATGAGTGCAAGACGTCTTGTGATGTAATTATTA\n++\n+6GEGGGG>GFGC;GB3BGA3GGGGFEEG=G>GGGFGGFG>CGF;EF@GE-<GDGFF=GGGGE0GGGECF<E@FF<GBG:>>C:GACGFFFFEEE;GG@FFF\n+@out-MT-1631/2\n+TCGATTGTCAACGTCAAGGAGTCGCAGGTCGCCTGGTTCTAGGAATAATGGGGGAAGTATGTAGGAGTTGAAGATTAGTCCGCAGTAGTCGGTGTACTCGT\n++\n+GGGF2B29FGFGGGFGG?EGFGGGGG5GECGEGGGGEGFC@F?F;FGFGGGE>?ADE=CGEGFEFEGG>@GGF3GF6GGE?FG(C4EGGFGFGGEF4;D>2\n+@out-MT-1633/2\n+CAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGAT\n++\n+GGG9EGGGGGGEGG?G?@DGGAFGGG>FEEEFF@D>GEDGF?>=?DGFEGGFEGG:DFGCFEGFGFGG$<GAFGFGGECFGFBFFGGF>E7GF<:?A@E21\n+@out-MT-1635/2\n+TGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTTGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTGT\n++\n+GFGGE?GGGG<FFFG;GGFG<GGGGFFGGGGGDGFGEG%@FGFFGEGFGGG>D0CGGF=DGFG9:@>@94G=FFGD>?CGGGEE4'G=GFEB=4:EC=<#C\n+@out-MT-1637/2\n+GGCGATGAGTGTGGGGAGGAATGGGGTGGGTTTTGTATGTTCAAACTGTCATTTTATTTTTACGTTGTTAGATATGGGGAGTAGTGTGATTGAGGTGGAAT\n++\n+FGGGGGG5GGGGGGGGGEFGG?GEF=0EGGG/8CBGE=GFGGEG;FFG;CGG9EGGFFGFGGGEFGFDC3B.FE;G=FFGGGFBGFD=D?CFEED>FFC#A\n+@out-MT-1639/2\n+GCGTTCTGGCTGGTTGCCTCATCGGTTGATGATAGCCAAGGTGGGGATAAGTGTGGTTTCGAAGAAGATATAAAATATGATTAGTTCTGTGGCTGTGAATG\n++\n+FEDGFFDGAGFGGFGGGFFGGGGGG$G;GG@GGGFEGGGGGGGGGFCBEFF@GFDG2BGGFGGG7GG@FGGGEE<FFGGGGFGGGGF*G=EAFEBB@?-7A\n+@out-MT-1641/2\n+CTAGTATGAGGAGCGTTATGGAGTGGAAGTGAAATCACATGGCTAGGCCGGAGGTCATTAGGAGGGCTGAGAGGGCCCCTGTTAGGGGTCATGGGCTGGGT\n++\n+GFGGGGDGGGG=EDFFGGGCFGFFGFG3AEGEGFFGACGFDGFFEGGGGG?AGFEAFGGG5DB>GCGC@FFEG5<GEFGGGGGGCE@EFBAEGDFEGDAFC\n+@out-MT-1643/2\n+GGTTCGGGGTATGGGGTTAGCAGCGGTGTGTGGGTGCTGGGTAGGATGGGCGGGGGTTGTATTGATGAGATTAGTAGTATGGGAGTGGGAGGGGAAAATAA\n++\n+FFGGGGFGFAFGGDGFFDEFGDGG1GGG?EGG;DFBFGGCGGG:GGGGD4GEC*GGGGEGGGGDFGGFGEDAG<FG<GF)CEBCGGE@FGFEFEGEDGGFF\n+@out-MT-1645/2\n+GATGGGGTGTGATAGGTGGCACGGAGAATTTTGGATTCTCAGGGATGGGTTCGATTCTCATAGTCCTAGAAATAAGGGGGTTTAAGCTCCTATTATTTACT\n++\n+GGGGGBGDFGEGGG;?GGCFFG@EDGGEGGGEGGGGGFF@GFGDD6EGG#GFC=GGGFGFGFGFFGGF@AFGGF9>G><FGC;D<GEFFFFGGDDEEGFAC\n+@out-MT-1647/2\n+GGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTTTTTTGTTAGGGTTAACGAGGGTGGTAAGGATGGGGGG\n++\n+GGGGDGGGGG7EGGGGGGGGAGGGBFFFGEGAGGFGGGEGGFGGFGGGGFGGGFEGCEFGGGFGGGGDGF@>GFGGGGFFGGGGFG8FEFF@FF>2EGFE?\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT-Targets.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT-Targets.bed Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,1 @@
+MT 1000 10001
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT.fa Tue May 15 02:39:53 2018 -0400
b
b'@@ -0,0 +1,278 @@\n+>MT\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTT\n+CGTCTGGGGGGTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTC\n+GCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n+ACAGGCGAACATACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATA\n+ACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATCATAACAAAAAATTTCCACCA\n+AACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAAAA\n+ACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n+TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAAT\n+CTCATCAATACAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATA\n+CCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAA\n+GCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGGTC\n+CTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGT\n+TCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGCAATGCAGCTC\n+AAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n+ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGC\n+GGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC\n+TCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGAC\n+TACGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTAGA\n+TACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAA\n+CACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGG\n+AGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n+CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAG\n+ACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAG\n+AAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAG\n+AGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTC\n+AAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAAGT\n+CGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACACA\n+AAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n+GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAA\n+AGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATG\n+AAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAA\n+TTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAGCT\n+ACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATA\n+GGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAG\n+TTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n+CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTA\n+ACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCA\n+CTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTATC\n+ACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGC\n+CTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCAAC\n+AAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAAAA\n+AAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n+ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCT\n+AACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCC\n+ACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCG\n+GGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTA\n+CCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGA\n+CCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCGAA\n+CTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n+GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCA\n+GGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTAC\n+GTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCC\n+CTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATGA\n+TATCATCTCAACTTAGTATTATACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGGC\n+AGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAATTCCTCTTCTT\n+AACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n+TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAAC\n+GTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAA\n+GAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCT\n+CTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTC\n+AACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGA\n+TCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAGTAGCCCAA\n+ACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n+TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCA\n+TGACCCT'..b'TAGTTACCGCTAACAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGA\n+ATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCC\n+ATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATGA\n+TTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCCA\n+AGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n+CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCAC\n+TCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAA\n+AATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCA\n+GCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCTCCACTTCAAGT\n+CAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCATTCCTGCAC\n+ATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCATCCAC\n+AACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n+ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGT\n+TTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCC\n+CTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTT\n+CTCACCCTAACAGGTCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCCC\n+ACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATT\n+ACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACTC\n+ACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n+AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTC\n+TACCCTAGCATCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTG\n+CCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAG\n+CACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTC\n+CTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCCG\n+AGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACTACTACTAA\n+TCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n+CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCAC\n+CCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAAC\n+ACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGC\n+TGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATTAAAAAAACTATTAAACC\n+CATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAA\n+TACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAA\n+ACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n+CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACG\n+CAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATC\n+CAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAAT\n+CACCACAGGACTATTCCTAGCCATGCACTACTCACCAGACGCCTCAACCGCCTTTTCATC\n+AATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAA\n+TGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGG\n+ATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n+AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGT\n+AATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTG\n+AGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTT\n+GCCCTTCATTATTGCAGCCCTAGCAACACTCCACCTCCTATTCTTGCACGAAACGGGATC\n+AAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACAC\n+AATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTATT\n+CTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n+TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCC\n+TAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCC\n+CATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTA\n+TTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTA\n+CCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCT\n+AATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTAT\n+AAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n+GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCT\n+CTGTTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACA\n+ACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAAT\n+ACTTGACCACCTGTAGTACATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTA\n+CAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACC\n+CCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGC\n+CATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n+TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCT\n+ACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACAT\n+CTGGTTCCTACTTCAGGGTCATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGAC\n+ATCACGATG\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT.fa.fai Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,1 @@
+MT 16569 4 60 61
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT.fa.trinucCounts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT.fa.trinucCounts Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,64 @@
+AAA 524
+AAC 495
+AAG 209
+AAT 376
+ACA 448
+ACC 515
+ACG 119
+ACT 412
+AGA 178
+AGC 282
+AGG 174
+AGT 161
+ATA 367
+ATC 371
+ATG 162
+ATT 330
+CAA 465
+CAC 454
+CAG 199
+CAT 416
+CCA 464
+CCC 624
+CCG 141
+CCT 542
+CGA 122
+CGC 155
+CGG 80
+CGT 78
+CTA 523
+CTC 419
+CTG 180
+CTT 318
+GAA 201
+GAC 169
+GAG 129
+GAT 114
+GCA 207
+GCC 271
+GCG 54
+GCT 179
+GGA 122
+GGC 151
+GGG 72
+GGT 80
+GTA 154
+GTC 106
+GTG 55
+GTT 104
+TAA 414
+TAC 377
+TAG 258
+TAT 324
+TCA 415
+TCC 361
+TCG 121
+TCT 307
+TGA 190
+TGC 123
+TGG 99
+TGT 100
+TTA 329
+TTC 308
+TTG 116
+TTT 251
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT_read1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT_read1.fq Tue May 15 02:39:53 2018 -0400
b
b"@@ -0,0 +1,6576 @@\n+@out-MT-1/1\n+CGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTAGCGCACCTACGTTCAATATTACAGGCGAACA\n++\n+GGFCF5AFFDDDFE2GGGCGGAEGGGFGGEGGGGG7CGFBFGGFFFGGFF3,GGGFAFGGGGDFGGFDFG'GGGGGFFE4GF@EEDBEFFE*99EGGGGAE\n+@out-MT-2/1\n+CCTGTTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAAG\n++\n+FFEGGGGAG@89GGGFGGFGD<DGGGFEGG/F>?G8@GF?ACG?GFGGEDF=G6(GEGFE9GGG?@FGFGGDFDA6?GFGGGEBABGGDAEEF@)8/B2+A\n+@out-MT-3/1\n+ACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATTCA\n++\n+G9GGGEGGBGGGGFGGFGFBE@GG@GF@EDG'BF2GGGDEGEGEGGGGGFGGGFE=G@GGGGG@FFDFEF$BEDEGGFGE@$5EGGFGFGEFGC:DEB$8;\n+@out-MT-4/1\n+TCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTAATTAATTAA\n++\n+GGGGFG@G?FFFCDGFGDGFGGG9GGFFGGFGGBG/9G;E?FGG77G8E>:/FGGGGFGGGD6GGFFFF8CEAGFGGGGGDG?GGG6-FEBG3DC8@FG-=\n+@out-MT-5/1\n+GAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATT\n++\n+GGGFGGGGGGGEDGGGFFGGG8GFCEG)2CGGGGGGGGGGFGEEFGGGBGGGGFFGGGGGGFFGFGFEF>DE*:@GGEGDEGGFGFGFGG94GA)9AGEFF\n+@out-MT-6/1\n+TGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTC\n++\n+FG>BGGFGG3CADGG98G@EE857FGGEGEFEGFGGGGGFGGGGGFGGG:FBBFFDDFFF=DGFFGGG4?GAG@GCGGEG@GEE;EAFGF&GGFCEGEFE@\n+@out-MT-7/1\n+CAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAA\n++\n+GCGFFGGGGGGGEGAGFGGFGE-GFEFGGGFGFFFBFFCGFAGGGGGFACEDFA>B@GGCEGGGFBF:G9GFGG?F9?DBFGEGCFDE6F>C@FFFF@/E<\n+@out-MT-8/1\n+GAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACA\n++\n+FFGGGGGGFGG'5DCGB>EGGGGG79GGEGGGG=@GE=CGECDFGEGGG=DGGAFFG4DGG6FGGFGC<<F>B?2GEGGCG)G3=E'F7F.B==CAAE<6A\n+@out-MT-9/1\n+GAGCTAAACCTAGCCCCAAACCCACTCCACCTAACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGC\n++\n+GGFGGFGE-?:FFGGGGEFD?FGAGG<$%FG<&FE/$2CEGEDCGDFFEF.;FGDAG>GGAGA2?DFFGG??G?GGCBADA.19ADGGFGEC>FGGF8/EF\n+@out-MT-10/1\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCCACGCATTTATATAG\n++\n+75FFA:EGGGGGGFF<9GG3?EEGEEFGEG:FGDFGGGGGFFFFGGFFGGDGEG7FFGGGGF+FFGAF6GGFGGE:GE?DGGFGAE'8G5EEGG<<GD;GE\n+@out-MT-11/1\n+TCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATC\n++\n+EDGDGGGFGFFGGFGGEDFGGCFGGGGGD;GFGEFG?FBFGGGBFFGGGGD@GBG@F?;BGGE=D?@FFG<FF?D<GE7DFGGBG'F0F>FGG;@1EGFFE\n+@out-MT-12/1\n+CAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTACCTGTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGAAG\n++\n+GFGE:BGGGEGGGF?GGGFGGGAGGGG<F/?GGG@GGGGGFGGGGC590FEGB@G>DGGE?FCGDG1FDFFGBG3FGFGEDGGGCGEGD=<FGF8EGG$2D\n+@out-MT-13/1\n+AATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGGTCCA\n++\n+CGGGGFGD8CGGFDGGGF=:BFGG-GFGDDCGGEFE3$GG91EEDGCBGGGG4G5=FGG+7-5G<FA(3<FGFFD?EEGB.+>>CGGFGG6E8AB>*3EGG\n+@out-MT-14/1\n+TATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGA\n++\n+FGEFC+GGCGDGGGFGGGFGEEG=GGCFGGGGCGGEFGGGEFGGGD,FDGBGGD.G??E@;FGG97EGCF=GGGF:GFEEGGFB;0GG7EGFG=FG;8G@B\n+@out-MT-15/1\n+CCAAGCATAATATAGCAAGGACTTACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCA\n++\n+GFFGGGBGEED)*FG@GFG96G?2G9FG@GGGGGBGGGCGFFGGFGGG:)F=;G9BDGGGF>GCFGFGGFG=EGDDGCG@>FAGE;FEF9,<C;9F@@GGF\n+@out-MT-16/1\n+ACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTT\n++\n+G9GGFGFF?GGA+@FFGG6.EBGGGGDGEB5EGGGFFGGFGFCF=GFGGDGGF;GGDGED:FGGED49@@.D&D$A9;F8GGGFGBC)$<EGG4GEC91@@\n+@out-MT-17/1\n+AATTTTATCTTTTGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCAATACTACTAATCTCATCAATACAACCC\n++\n+GGEG=AGGDFGC;FCGECGGGGGGGF=FCGFGGGGGAGGFFGFGG9GGGFGFFAAFDBGEG=@EAGF8>GEDA(FDB8DGGG,DEFEGFEGF&CGF>F-F5\n+@out-MT-18/1\n+CCAACGGAACAAGTTACCCTAGGGATAACATCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGA\n++\n+GGGBA@GGD@GGA3BD+1G<FGGFAFFFEFG,5GGG>G&GEGGGGDGGCC;GFE?GGCGGGGFEGB:GGGGGF7FCE"..b"ACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAAC\n++\n+<GGFGGGG=FGG>@GBG?F<FGGGGGGFGA?GFGGGGGGFF@8FG<DGGGGGEDFGAGG:2FE-GGGEG#G@GGCFE5C.DFF-EFGGGE8.G$8=GFGE&\n+@out-MT-1628/1\n+AAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCCTATAGTAAAACCCAGCCCATGACCCCTAACAGGGGTCCTCTCAGCCCT\n++\n+GFGG@8GGFF2GGGEFGGEEGFGGFFGGCGFFCFGGFGF6FFGGGEFGDGFF(GGGAGBGGG@FEFGDGEFB=FDGB6<BCEGG<GD@%;3GDGFGEGG4@\n+@out-MT-1629/1\n+CCACAGAACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTGGCTATCATCACCCGATGAGGCAACCAGCCAGTACGCCTGAACGCA\n++\n+GFCGGGEG/F1F?GE?EDC@GFGGG<GGGGBGGEGEFGFAGGGFG<E0GGGG9AGGC>FGG9EGGBFGGGGFD,CDF@AF9GFGGF;%(,4;5EFF1<FDA\n+@out-MT-1630/1\n+TATCAAACTCCTGAGCCAACAACTTAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACTTATGACTCCCTAAAGCC\n++\n+FGGGGDGFFFF8GDF?GDGGGBGGFGDGGGFBGFGFGFGGCGC+5FGGGFFDAAGGFGFGGAD)GFE4EFFGFEFAG9G@=FGGFFE3GFFGF1EFFCDFE\n+@out-MT-1631/1\n+TACCACAACCACCACCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCCCTGACC\n++\n+GGDG>GGF3G(GGEBG>GFCGGGEGGGGFFFGFGEG?GE@GG@GEEFGGG10GEDG>GG:4==FFB<+GABGE21GFCGGG@<CBC<GF@FDGF@3FGGG1\n+@out-MT-1632/1\n+GTTTCCCCGCATAAACAACATAAGCTTCTGCCTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAA\n++\n+DFGGGF<GG&6FFGCFFEDDFGFF.:DG?G#FGGGEG3FGFGFFEDFGEDGGCFGFCGDGGFGGGGEGFG,FGGFDG%FGDCFFFGGBG5FB;GCCGDFFF\n+@out-MT-1633/1\n+TATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTTTAGGTAACGAC\n++\n+GDGFFG<GGGGGGFG@FGDC2GG?GEFGGGGGBGGDEGGEGGGGGFGGGGFFCGCFFCAG;GGFGGG:GFF<;0/8G1FGGGFBFE@>C7GEGGGFEF+DE\n+@out-MT-1634/1\n+ATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAAT\n++\n+G(GGGGDGGGFB><GGEGGGEGEGGG6GGGGDGGGG>G3FEGGG8GGGGGGFGFGFFGEGGG5GFEEG7G?FF?AGBF?<5GGGGG8=GGGFFGGFA7GGG\n+@out-MT-1635/1\n+AAGTCCTAATAGTAGAGGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGT\n++\n+GFGD@@EGGGFFGAFG/BG?GGGGGGGGGGG?DFG:GGGDGFGGGGGGGCGBFF>6FGGGG@G?A<CGD@GGF@GCE8=GGAGG@2GF4;?2?><FED=C0\n+@out-MT-1636/1\n+TCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAACTACTCCCACCCTG\n++\n+GGFGGFGDGAAGFGGAAGGGGG4FBG1GFGGGGCGGG<GGGBGEGF.GG9?GFGGGGG7A&GA=DGEB0G7DEG?GE4F<GGDCG4GE7CEAG0CC'?BBA\n+@out-MT-1637/1\n+TAGAATTCTCACTGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCT\n++\n+GGG=FGEGGGAFGFDGGGEFGGGGGGGGAGGGGEGFC8C>GFEFCAG6GFEGBBGGGFGGCGGGGGGGG>GGGG?C5E?AGGEFEDFDBGEFGGGADGGG2\n+@out-MT-1638/1\n+AAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTGACCGGAATCGGAGGACAACCAATAAGCTACCCTT\n++\n+GGFGGGGDGFGGGG4CGGG@GDFGFCFGEDCDGGFEGGGGEGEDFGG@FFF?GG3GEGEAGGDEEB&GEG$BGG<=GD0AF-E846>F*$F35CEEFEAF4\n+@out-MT-1639/1\n+TTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCT\n++\n+G?DGCE:CE1E>GGGGGF3G5EGCGG5GEED@GCG3GGGG:E$A=F4GGGGGGGCBFGFGDGF:(:GBCGF=GCGGDDFEGEF*<BFGGGDGD9BFF>FB<\n+@out-MT-1640/1\n+AAGGATTAGACTGAACCGAATTGGTATATAGTTTAAACAAAACGAATGACTTCGACTCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTAC\n++\n+F1GEG?AGGGBGFEGGFGFGGFEGGGAGAGA=GGGFFF>GGGGGG==DD$@G4?GDGFFEFG>@GFG>:CE>7FGAEGGAFGGEGGCFGGEF?CB$FED@E\n+@out-MT-1641/1\n+CCCCATCCTTACCACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCA\n++\n+F=GECFGG6CGF6E8G;CGGGCBBGF;GGGGG3:FGGFF=FG/GGGGGGGAFG7GGGGGGEGBEGGGGGGC=DFGFA@G;1BFGGFGF<G@CGBFFF24BA\n+@out-MT-1642/1\n+ATGACCCCAATACGCAAAACTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGG\n++\n+F?GGGDEFBGGE4B)GDGGFG<GDGGFGFFGGGBGFGEGGGFFGDGDFGGGGGDGFDGGGGGFGF3B?BGDFGG>GGDCEF;?G@EDGDC34?>A<;GG?C\n+@out-MT-1643/1\n+ACTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTT\n++\n+GEE=AFGGGAFG;@GG0GFEFGGFFFGFEGGDFFFE<AGG@CDGG*51+GEFGFEGG:;FGGFFGEGGFD5CEECGFGFGGGGGFF?GGFED->GGGFFFF\n+@out-MT-1644/1\n+CCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCGCTCCTACTCCTGCTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGT\n++\n+GCFGGGGGGGDGGGFC/G?G@FFGGE=GEDF7GGFGFEGGD$4FEFGGGF?FFFGGFGGCFGGFFG8GF2DGGGF>FF<GGGGFFEDB;GEFFBDDGGGE8\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/chrMT_read1_genSeqErrorModel.p
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chrMT_read1_genSeqErrorModel.p Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,656758 @@\n+(lp1\n+(lp2\n+(lp3\n+cnumpy.core.multiarray\n+scalar\n+p4\n+(cnumpy\n+dtype\n+p5\n+(S\'f8\'\n+I0\n+I1\n+tRp6\n+(I3\n+S\'<\'\n+NNNI-1\n+I-1\n+I0\n+tbS\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp7\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp8\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp9\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeec?\'\n+tRp10\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp11\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp12\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp13\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp14\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp15\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp16\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp17\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp18\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp19\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp20\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp21\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp22\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp23\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp24\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp25\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5m?\'\n+tRp26\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5m?\'\n+tRp27\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp28\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xees?\'\n+tRp29\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeax?\'\n+tRp30\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeec?\'\n+tRp31\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xees?\'\n+tRp32\n+ag4\n+(g6\n+S\'\\xe8d\\xfa*alv?\'\n+tRp33\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xee\\x83?\'\n+tRp34\n+ag4\n+(g6\n+S\'\\xd8\\xbf$\\x1fJ\\xab\\x87?\'\n+tRp35\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5\\x8d?\'\n+tRp36\n+ag4\n+(g6\n+S\'{\\xe1"\\xd8\\xbf$\\x8f?\'\n+tRp37\n+ag4\n+(g6\n+S\'\\x9a+\\xce\\xef\\xed\\xa6\\x8c?\'\n+tRp38\n+ag4\n+(g6\n+S\'\\xf7\\t\\xd06x-\\x95?\'\n+tRp39\n+ag4\n+(g6\n+S\'\\xb9uy\\x07\\x1c)\\x9a?\'\n+tRp40\n+ag4\n+(g6\n+S\'"\\xfe\\xb8uy\\x07\\x9c?\'\n+tRp41\n+ag4\n+(g6\n+S\'\\xa4\\xfb\\x04h\\x1b\\xbc\\xa6?\'\n+tRp42\n+ag4\n+(g6\n+S\'F\\x1d\\x03!\\x915\\xae?\'\n+tRp43\n+ag4\n+(g6\n+S\'S\\xa1G\\xf7\\t\\xd0\\xc6?\'\n+tRp44\n+ag4\n+(g6\n+S\'\\xf1\\xc7\\xad\\xcb;\\xe0\\xe0?\'\n+tRp45\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp46\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp47\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp48\n+aa(lp49\n+g4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp50\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp51\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp52\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeC?\'\n+tRp53\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp54\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp55\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp56\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeec?\'\n+tRp57\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp58\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp59\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp60\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5]?\'\n+tRp61\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp62\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp63\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp64\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeeS?\'\n+tRp65\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xeec?\'\n+tRp66\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp67\n+ag4\n+(g6\n+S\'&\\xf9PZ\\xbdpq?\'\n+tRp68\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp69\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp70\n+ag4\n+(g6\n+S\'&\\xf9PZ\\xbdpq?\'\n+tRp71\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xeah?\'\n+tRp72\n+ag4\n+(g6\n+S\'\\xe8d\\xfa*alv?\'\n+tRp73\n+ag4\n+(g6\n+S\'&\\xf9PZ\\xbdpq?\'\n+tRp74\n+ag4\n+(g6\n+S\'\\x07\\xaf\\xa5B\\x8f\\xees?\'\n+tRp75\n+ag4\n+(g6\n+S\'\\x16T{N\\xa6\\xaf\\x82?\'\n+tRp76\n+ag4\n+(g6\n+S\'6\\x9e&f\\xd41\\x80?\'\n+tRp77\n+ag4\n+(g6\n+S\'\\xd8\\xbf$\\x1fJ\\xab\\x87?\'\n+tRp78\n+ag4\n+(g6\n+S\'\\xc9\\x1aO\\x133\\xea\\x88?\'\n+tRp79\n+ag4\n+(g6\n+S\'&\\xf9PZ\\xbdp\\x91?\'\n+tRp80\n+ag4\n+(g6\n+S\'\\xa9\\xd0\\xa3\\xfb\\x04h\\x8b?\'\n+tRp81\n+ag4\n+(g6\n+S\'\\x9e&f\\xd41\\x10\\x92?\'\n+tRp82\n+ag4\n+(g6\n+S\'o7\\xe5\\xb0\\xec\\xcc\\x95?\'\n+tRp83\n+ag4\n+(g6\n+S\'\\x8a\\x86\\xf8\\xe3\\xd6\\xe5\\x9d?\'\n+tRp84\n+ag4\n+(g6\n+S\'\\xfd\\xden\\xcaa\\xd9\\xa9?\'\n+tRp85\n+ag4\n+(g6\n+S\'\\xc8\\xad\\xcb;\\xe0H\\xb1?\'\n+tRp86\n+ag4\n+(g6\n+S\'\\x14\\xa0m\\xf0Z*\\xc4?\'\n+tRp87\n+ag4\n+(g6\n+S"\\x85\\x1e\\xdd\'@\\xdb\\xe0?"\n+tRp88\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp89\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp90\n+ag4\n+(g6\n+S\'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\'\n+tRp91\n+aa(lp92\n+g4\n+(g6\n+S\''..b"?'\n+tRp161559\n+ag4\n+(g6\n+S'\\xbecj`\\xef\\x98\\xaa?'\n+tRp161560\n+ag4\n+(g6\n+S'(\\xc4Y\\xf9\\tq\\xb6?'\n+tRp161561\n+ag4\n+(g6\n+S'\\xf9\\tqV~B\\xbc?'\n+tRp161562\n+ag4\n+(g6\n+S']t\\xd1E\\x17]\\xc4?'\n+tRp161563\n+ag4\n+(g6\n+S'5\\xb0wL\\r\\xec\\xad?'\n+tRp161564\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161565\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161566\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161567\n+aa(lp161568\n+g4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161569\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161570\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161571\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161572\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161573\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4l?'\n+tRp161574\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161575\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4l?'\n+tRp161576\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4l?'\n+tRp161577\n+ag4\n+(g6\n+S'-)\\x07D\\x86{\\x85?'\n+tRp161578\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4l?'\n+tRp161579\n+ag4\n+(g6\n+S'\\xfbL[\\xe3\\xef\\xe6\\x91?'\n+tRp161580\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161581\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161582\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161583\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161584\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161585\n+ag4\n+(g6\n+S'\\xfbL[\\xe3\\xef\\xe6\\x91?'\n+tRp161586\n+ag4\n+(g6\n+S'-)\\x07D\\x86{\\x95?'\n+tRp161587\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4l?'\n+tRp161588\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161589\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4|?'\n+tRp161590\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161591\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161592\n+ag4\n+(g6\n+S'\\xfbL[\\xe3\\xef\\xe6\\x91?'\n+tRp161593\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161594\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x8c?'\n+tRp161595\n+ag4\n+(g6\n+S'\\xfbL[\\xe3\\xef\\xe6\\x91?'\n+tRp161596\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x9c?'\n+tRp161597\n+ag4\n+(g6\n+S'_\\x05\\xb3\\xa4\\x1c\\x10\\x99?'\n+tRp161598\n+ag4\n+(g6\n+S'_\\x05\\xb3\\xa4\\x1c\\x10\\xa9?'\n+tRp161599\n+ag4\n+(g6\n+S'-)\\x07D\\x86{\\xa5?'\n+tRp161600\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x9c?'\n+tRp161601\n+ag4\n+(g6\n+S'\\x91\\xe1^\\x05\\xb3\\xa4\\x9c?'\n+tRp161602\n+ag4\n+(g6\n+S'\\x14;\\xb1\\x13;\\xb1\\xa3?'\n+tRp161603\n+ag4\n+(g6\n+S' 2\\xdc\\xab`\\x96\\xb4?'\n+tRp161604\n+ag4\n+(g6\n+S'\\xb7\\xc6\\xdf\\xcd#T\\xbf?'\n+tRp161605\n+ag4\n+(g6\n+S'\\x81\\xc8p\\xaf\\x82Y\\xc2?'\n+tRp161606\n+ag4\n+(g6\n+S'\\x9a\\xb6\\xc6\\xdf\\xcd#\\xc4?'\n+tRp161607\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161608\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161609\n+ag4\n+(g6\n+S'\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n+tRp161610\n+aa(lp161611\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp161612\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aa(lp161613\n+F0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aF0\n+aaaa(lp161614\n+I0\n+aI1\n+aI2\n+aI3\n+aI4\n+aI5\n+aI6\n+aI7\n+aI8\n+aI9\n+aI10\n+aI11\n+aI12\n+aI13\n+aI14\n+aI15\n+aI16\n+aI17\n+aI18\n+aI19\n+aI20\n+aI21\n+aI22\n+aI23\n+aI24\n+aI25\n+aI26\n+aI27\n+aI28\n+aI29\n+aI30\n+aI31\n+aI32\n+aI33\n+aI34\n+aI35\n+aI36\n+aI37\n+aI38\n+aI39\n+aI40\n+aI41\n+aaI33\n+aF0.0067017413981956637\n+a(lp161615\n+(lp161616\n+(lp161617\n+F0\n+aF0.49180000000000001\n+aF0.3377\n+aF0.17050000000000001\n+aa(lp161618\n+F0.52380000000000004\n+aF0\n+aF0.2661\n+aF0.21010000000000001\n+aa(lp161619\n+F0.37540000000000001\n+aF0.23549999999999999\n+aF0\n+aF0.38900000000000001\n+aa(lp161620\n+F0.2505\n+aF0.25519999999999998\n+aF0.49419999999999997\n+aF0\n+aaaF0.01\n+a(lp161621\n+F0.999\n+aF0.001\n+aa(lp161622\n+I1\n+aI2\n+aaF0.40000000000000002\n+a(lp161623\n+F0.25\n+aF0.25\n+aF0.25\n+aF0.25\n+aaa.\n\\ No newline at end of file\n"
b
diff -r 000000000000 -r 6e75a84e9338 test-data/create_test-data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/create_test-data Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,104 @@
+#!/bin/bash
+source ~/env/bin/activate
+
+samtools faidx chrMT.fa
+#TEST1: single read, with everything default
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1
+mv out_read1.fq chrMT_read1.fq
+
+#TEST2: PE reads, with everything default
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30
+mv out_read1.fq chrMT-PE_read1.fq
+mv out_read2.fq chrMT-PE_read2.fq
+
+#TEST3: PE reads, with everything default, now with VCF and BAM files
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam
+mv out_read1.fq chrMT-PE-VCF-BAM_read1.fq
+mv out_read2.fq chrMT-PE-VCF-BAM_read2.fq
+mv out_golden.bam chrMT-PE-VCF-BAM.bam
+mv out_golden.vcf chrMT-PE-VCF-BAM.vcf
+
+samtools index chrMT-PE-VCF-BAM.bam
+
+#TEST4: PE reads, with VCF and BAM files and VCF file from TEST3 as the seed
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam -v chrMT-PE-VCF-BAM.vcf
+mv out_read1.fq chrMT-PE-VCF-BAM-vcf_read1.fq
+mv out_read2.fq chrMT-PE-VCF-BAM-vcf_read2.fq
+mv out_golden.bam chrMT-PE-VCF-BAM-vcf.bam
+mv out_golden.vcf chrMT-PE-VCF-BAM-vcf.vcf
+
+samtools index chrMT-PE-VCF-BAM-vcf.bam
+
+#TEST5: PE reads, with VCF and BAM files and BED file as the targeted region
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam -t chrMT-Targets.bed -to 0.02
+mv out_read1.fq chrMT-PE-VCF-BAM-Targeted_read1.fq
+mv out_read2.fq chrMT-PE-VCF-BAM-Targeted_read2.fq
+mv out_golden.bam chrMT-PE-VCF-BAM-Targeted.bam
+mv out_golden.vcf chrMT-PE-VCF-BAM-Targeted.vcf
+
+samtools index chrMT-PE-VCF-BAM-Targeted.bam
+
+#TEST6: PE reads, with everything default, now with VCF and BAM files GZIPPED
+python2 ../genReads.py -r chrMT.fa -R 101 -o out --rng 1 --pe 300 30 --vcf --bam --gz
+mv out_read1.fq.gz chrMT-PE-VCF-BAM-gz_read1.fq.gz
+mv out_read2.fq.gz chrMT-PE-VCF-BAM-gz_read2.fq.gz
+mv out_golden.bam chrMT-PE-VCF-BAM-gz.bam
+#Galaxy does not support gzipped VCF file (not sure if this is BGZIPPED)
+gunzip out_golden.vcf.gz && mv out_golden.vcf chrMT-PE-VCF-BAM-gz.vcf
+
+samtools index chrMT-PE-VCF-BAM-gz.bam
+
+#TEST7: PE reads, with all error parameters changed, with VCF and BAM files, not compressed 
+python2 ../genReads.py -r chrMT.fa -R 151 -o out --rng 123 --pe 500 50 --vcf --bam -c 20 -E 0.123 -M 0.123 -p 3
+mv out_read1.fq chrMT-PE-VCF-BAM-panic_read1.fq
+mv out_read2.fq chrMT-PE-VCF-BAM-panic_read2.fq
+mv out_golden.bam chrMT-PE-VCF-BAM-panic.bam
+mv out_golden.vcf chrMT-PE-VCF-BAM-panic.vcf
+
+samtools index chrMT-PE-VCF-BAM-panic.bam
+
+#####
+# computeGC TESTS
+#
+
+#TEST1: Use BAM from TEST3 to create the model file. Window size = 10
+bedtools genomecov -d -ibam chrMT-PE-VCF-BAM.bam -g chrMT.fa > chrMT-PE-VCF-BAM.genomecov
+python2 ../utilities/computeGC.py -r chrMT.fa -i chrMT-PE-VCF-BAM.genomecov -w 10 -o chrMT-PE-VCF-BAM-computeGC.p
+
+#####
+# computeFraglen
+#
+samtools view chrMT-PE-VCF-BAM.bam | python2 ../utilities/computeFraglen.py
+mv fraglen.p chrMT-PE-VCF-BAM-fraglen.p
+
+#####
+# genMutModel
+#
+
+#TEST1: Default settings
+python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-genMutModel.p
+
+#TEST2: Defined include list
+python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-INCLUDELIST-genMutModel.p\
+        -bi chrMT-Targets.bed
+
+#TEST3: Defined exclude list
+python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-EXCLUDELIST-genMutModel.p\
+        -be chrMT-Targets.bed
+
+#TEST1: Default settings with all booleans set to YES
+python2 ../utilities/genMutModel.py -r chrMT.fa -m chrMT-PE-VCF-BAM.vcf -o chrMT-PE-VCF-BAM-BOOLEANS-genMutModel.p\
+        --save-trinuc --no-whitelist --skip-common
+
+#####
+# genSeqErrorModel
+#
+
+#TEST1 - 100,000 simulations - Single read
+python2 ../utilities/genSeqErrorModel.py -i chrMT_read1.fq -s 100000 -o chrMT_read1_genSeqErrorModel.p
+
+#TEST2 - 100,000 simulations - Paired reads
+python2 ../utilities/genSeqErrorModel.py -i chrMT-PE_read1.fq -i2 chrMT-PE_read2.fq -s 100000 -o chrMT-PE_read1_genSeqErrorModel.p
+
+#TEST3 - 100,000 simulations - Only 100 reads
+python2 ../utilities/genSeqErrorModel.py -i chrMT-PE_read1.fq -i2 chrMT-PE_read2.fq -s 100000 -o chrMT-PE-100reads_read1_genSeqErrorModel.p -n 100
b
diff -r 000000000000 -r 6e75a84e9338 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r 6e75a84e9338 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 6e75a84e9338 utilities/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/README.md Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,100 @@
+# computeGC.py
+
+Takes .genomecov files produced by BEDtools genomeCov (with -d option).
+
+```
+bedtools genomecov 
+ -d                          \
+ -ibam normal.bam            \
+        -g reference.fa             
+```
+
+```
+python computeGC.py                 \
+        -r reference.fa             \
+        -i genomecovfile            \
+        -w [sliding window length]  \
+        -o /path/to/model.p
+```
+
+# computeFraglen.py
+
+Takes SAM file via stdin:
+
+./samtools view toy.bam | python computeFraglen.py
+
+and creates fraglen.p model in working directory.
+
+
+# genMutModel.py
+
+Takes references genome and TSV file to generate mutation models:
+
+```
+python genMutModel.py               \
+        -r hg19.fa                  \
+        -m inputVariants.tsv        \
+        -o /home/me/models.p
+```
+
+Trinucleotides are identified in the reference genome and the variant file. Frequencies of each trinucleotide transition are calculated and output as a pickle (.p) file.
+
+# genSeqErrorModel.py
+
+Generates sequence error model for genReads.py -e option.
+
+```
+python genSeqErrorModel.py                            \
+        -i input_read1.fq (.gz) / input_read1.sam     \
+        -o output.p                                   \
+        -i2 input_read2.fq (.gz) / input_read2.sam    \
+        -p input_alignment.pileup                     \
+        -q quality score offset [33]                  \
+        -Q maximum quality score [41]                 \
+        -n maximum number of reads to process [all]   \
+        -s number of simulation iterations [1000000]  \
+        --plot perform some optional plotting
+```
+
+# plotMutModel.py
+
+Performs plotting and comparison of mutation models generated from genMutModel.py.
+
+```
+python plotMutModel.py                                        \
+        -i model1.p [model2.p] [model3.p]...                  \
+        -l legend_label1 [legend_label2] [legend_label3]...   \
+        -o path/to/pdf_plot_prefix
+```
+
+# vcf_compare_OLD.py
+
+Tool for comparing VCF files.
+
+```
+python vcf_compare_OLD.py
+        --version          show program's version number and exit      \
+        -h, --help         show this help message and exit             \
+        -r <ref.fa>        * Reference Fasta                           \
+        -g <golden.vcf>    * Golden VCF                                \
+        -w <workflow.vcf>  * Workflow VCF                              \
+        -o <prefix>        * Output Prefix                             \
+        -m <track.bed>     Mappability Track                           \
+        -M <int>           Maptrack Min Len                            \
+        -t <regions.bed>   Targetted Regions                           \
+        -T <int>           Min Region Len                              \
+        -c <int>           Coverage Filter Threshold [15]              \
+        -a <float>         Allele Freq Filter Threshold [0.3]          \
+        --vcf-out          Output Match/FN/FP variants [False]         \
+        --no-plot          No plotting [False]                         \
+        --incl-homs        Include homozygous ref calls [False]        \
+        --incl-fail        Include calls that failed filters [False]   \
+        --fast             No equivalent variant detection [False]     
+```
+Mappability track examples: https://github.com/zstephens/neat-repeat/tree/master/example_mappabilityTracks
+
+## Controlled Data and Germline-Reference Allele Mismatch Information
+ICGC's "Access Controlled Data" documention can be found at http://docs.icgc.org/access-controlled-data. To have access to controlled germline data, a DACO must be
+submitted. Open tier data can be obtained without a DACO, but germline alleles that do not match the reference genome are masked and replaced with the reference
+allele. Controlled data includes unmasked germline alleles.
+
b
diff -r 000000000000 -r 6e75a84e9338 utilities/computeFraglen.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/computeFraglen.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,88 @@
+#
+#
+#      Compute Fragment Length Model for genReads.py
+#                  computeFraglen.py
+#
+#
+#      Usage: samtools view normal.bam | python computeFraglen.py
+#
+#
+
+import sys
+import fileinput
+import cPickle as pickle
+import numpy as np
+
+FILTER_MAPQUAL  = 10 # only consider reads that are mapped with at least this mapping quality
+FILTER_MINREADS = 100 # only consider fragment lengths that have at least this many read pairs supporting it
+FILTER_MEDDEV_M = 10 # only consider fragment lengths this many median deviations above the median
+
+def quick_median(countDict):
+ midPoint = sum(countDict.values())/2
+ mySum    = 0
+ myInd    = 0
+ sk       = sorted(countDict.keys())
+ while mySum < midPoint:
+ mySum += countDict[sk[myInd]]
+ if mySum >= midPoint:
+ break
+ myInd += 1
+ return myInd
+
+def median_deviation_from_median(countDict):
+ myMedian = quick_median(countDict)
+ deviations = {}
+ for k in sorted(countDict.keys()):
+ d = abs(k-myMedian)
+ deviations[d] = countDict[k]
+ return quick_median(deviations)
+
+if len(sys.argv) != 1:
+ print "Usage: samtools view normal.bam | python computeFraglen.py"
+ exit(1)
+
+all_tlens = {}
+PRINT_EVERY = 100000
+BREAK_AFTER = 1000000
+i = 0
+for line in fileinput.input():
+ splt = line.strip().split('\t')
+ samFlag = int(splt[1])
+ myRef   = splt[2]
+ mapQual = int(splt[4])
+ mateRef = splt[6]
+ myTlen  = abs(int(splt[8]))
+
+ if samFlag&1 and samFlag&64 and mapQual > FILTER_MAPQUAL: # if read is paired, and is first in pair, and is confidently mapped...
+ if mateRef == '=' or mateRef == myRef: # and mate is mapped to same reference
+ if myTlen not in all_tlens:
+ all_tlens[myTlen] = 0
+ all_tlens[myTlen] += 1
+ i += 1
+ if i%PRINT_EVERY == 0:
+ print '---',i, quick_median(all_tlens), median_deviation_from_median(all_tlens)
+ #for k in sorted(all_tlens.keys()):
+ # print k, all_tlens[k]
+
+ #if i > BREAK_AFTER:
+ # break
+
+
+med = quick_median(all_tlens)
+mdm = median_deviation_from_median(all_tlens)
+
+outVals  = []
+outProbs = []
+for k in sorted(all_tlens.keys()):
+ if k > 0 and k < med + FILTER_MEDDEV_M * mdm:
+ if all_tlens[k] >= FILTER_MINREADS:
+ print k, all_tlens[k]
+ outVals.append(k)
+ outProbs.append(all_tlens[k])
+countSum = float(sum(outProbs))
+outProbs = [n/countSum for n in outProbs]
+
+print '\nsaving model...'
+pickle.dump([outVals, outProbs],open('fraglen.p','wb'))
+
+
b
diff -r 000000000000 -r 6e75a84e9338 utilities/computeGC.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/computeGC.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,115 @@
+#
+#
+#            computeGC.py
+#            Compute GC and coverage model for genReads.py
+#
+#            Takes output file from bedtools genomecov to generate GC/coverage model
+#
+#            Usage: bedtools genomecov -d -ibam normal.bam -g reference.fa 
+#                   python computeGC.py -r reference.fa -i genomecovfile -W [sliding window length] -o path/to/output_name.p
+#
+#
+
+
+import time
+import sys
+import argparse
+import numpy as np
+import cPickle as pickle
+
+parser = argparse.ArgumentParser(description='computeGC.py')
+parser.add_argument('-i', type=str, required=True, metavar='<str>', help="input.genomecov")
+parser.add_argument('-r', type=str, required=True, metavar='<str>', help="reference.fa")
+parser.add_argument('-w', type=int, required=True, metavar='<int>', help="sliding window length")
+parser.add_argument('-o', type=str, required=True, metavar='<str>', help="output.p")
+args = parser.parse_args()
+
+(IN_GCB, REF_FILE, WINDOW_SIZE, OUT_P) = (args.i, args.r, args.w, args.o)
+
+GC_BINS = {n:[] for n in range(WINDOW_SIZE+1)}
+
+print 'reading ref...'
+allRefs = {}
+f = open(REF_FILE,'r')
+for line in f:
+ if line[0] == '>':
+ refName = line.strip()[1:]
+ allRefs[refName] = []
+ print refName
+ #if refName == 'chr2':
+ # break
+ else:
+ allRefs[refName].append(line.strip())
+f.close()
+
+print 'capitalizing ref...'
+for k in sorted(allRefs.keys()):
+ print k
+ allRefs[k] = ''.join(allRefs[k])
+ allRefs[k] = allRefs[k].upper()
+
+print 'reading genomecov file...'
+tt = time.time()
+f = open(IN_GCB,'r')
+currentLine = 0
+currentRef  = None
+currentCov  = 0
+linesProcessed = 0
+PRINT_EVERY    = 1000000
+STOP_AFTER     = 1000000
+for line in f:
+ splt = line.strip().split('\t')
+ if linesProcessed%PRINT_EVERY == 0:
+ print linesProcessed
+ linesProcessed += 1
+
+ if currentLine == 0:
+ currentRef = splt[0]
+ sPos       = int(splt[1])-1
+
+ if currentRef not in allRefs:
+ continue
+
+ currentLine += 1
+ currentCov  += int(splt[2])
+
+ if currentLine == WINDOW_SIZE:
+ currentLine = 0
+ seq         = allRefs[currentRef][sPos:sPos+WINDOW_SIZE]
+ if 'N' not in seq:
+ gc_count = seq.count('G') + seq.count('C')
+ GC_BINS[gc_count].append(currentCov)
+ currentCov = 0
+
+ #if linesProcessed >= STOP_AFTER:
+ # break
+
+f.close()
+
+runningTot = 0
+allMean    = 0.0
+for k in sorted(GC_BINS.keys()):
+ if len(GC_BINS[k]) == 0:
+ print '{0:0.2%}'.format(k/float(WINDOW_SIZE)), 0.0, 0
+ GC_BINS[k] = 0
+ else:
+ myMean = np.mean(GC_BINS[k])
+ myLen  = len(GC_BINS[k])
+ print '{0:0.2%}'.format(k/float(WINDOW_SIZE)), myMean, myLen
+ allMean += myMean * myLen
+ runningTot += myLen
+ GC_BINS[k] = myMean
+
+avgCov = allMean/float(runningTot)
+print 'AVERAGE COVERAGE =',avgCov
+
+y_out = []
+for k in sorted(GC_BINS.keys()):
+ GC_BINS[k] /= avgCov
+ y_out.append(GC_BINS[k])
+
+print 'saving model...'
+pickle.dump([range(WINDOW_SIZE+1),y_out],open(OUT_P,'wb'))
+
+print time.time()-tt,'(sec)'
+
b
diff -r 000000000000 -r 6e75a84e9338 utilities/deprecated/FindNucleotideContextOnReference.healthy.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/deprecated/FindNucleotideContextOnReference.healthy.pl Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,508 @@\n+#!/usr/bin/perl\n+\n+use strict;\n+use Math::Round;\n+\n+\n+if ($#ARGV < 1) {\n+   print "parameter mismatch\\nTo run type this command:\\nperl $0 fastahack reference input_pos_file output_file human_gff_file\\n\\n";\n+\n+   print " first argument = full path to fastahack\\n"; \n+   print " second argument = full path to reference genome\\n"; \n+   print " third argument = input file with arbitrary number of columns, but 1st col=chromosome name and 2nd col=position\\n"; \n+   print " fourth argument = output file with three columns: chromosome name, position of the center nucleotide, and the thre-nucleotide context for that position\\n";\n+   print " fifth argument = full path to human gff file\\n\\n\\n"; \n+   exit 1;\n+}\n+\n+\n+my $Fastahack=$ARGV[0];\n+my $Reference=$ARGV[1];\n+open(InputPositions,             \'<\', $ARGV[2]) || die("Could not open file!");\n+open(OutputTrinucleotideContext, \'>\', $ARGV[3]) || die("Could not open file!");\n+open(HumanGFF,                   \'<\', $ARGV[4]) || die("Could not open file!");\n+\n+\n+\n+################ read in one coordinate at a time and execute fastahack on it\n+\n+# reading the header\n+my $head = <InputPositions>;\n+$head =~ s/\\n|\\r//;\n+print OutputTrinucleotideContext "$head\\tContext\\n";\n+my $gffHead = <HumanGFF>;\n+chomp $gffHead;\n+\n+# creating trinucleotide context data hash, insertion and deletion counts\n+my %trinucleotide_context_data;\n+my %context_tally_across_mutated_to;\n+my %gff_hash;\n+my $gffMatch;\n+my %location;\n+# my %genotype_hash;\n+my %insertion_hash;\n+my %deletion_hash;   \n+my $insertion_total;\n+my $deletion_total;\n+my $zygotes_total;\n+my %annotation_hash;\n+my $annotation_total;\n+my %exonic_consequence_hash;\n+my $intronic;\n+my $exonic;\n+my $intergenic;\n+\n+# reading the positional information\n+my $line_count = 1;\n+while (<InputPositions>) {\n+   $_ =~ s/\\n|\\r//;\n+   #print "$_\\n";\n+   my @line = split(\'\\t\', $_);\n+\n+   # getting the chromosome and coordinate fields from input file\n+   # fastahack will need to the chromosome and coordinate to read the information from the reference\n+   my $chromosome = $line[0];\n+   my $coordinate = $line[1];\n+\n+   # get coordinates of first and last character in the context\n+   my $start_region = $coordinate - 1;\n+   my $end_region = $coordinate + 1;\n+\n+   # if the coordinate is the very first letter on the chromosome, then do not read before that position\n+   # the context becomes 2 letter code, as opposed to a trinucleotide\n+   if ( $start_region == 0 ) {\n+      $start_region = 1;\n+      $end_region = 2;\n+   }\n+\n+   #print "$Fastahack -r $chromosome:$start_region..$end_region $Reference\\n";\n+   my $context = `$Fastahack -r $chromosome:$start_region..$end_region $Reference`;\n+   \n+   # capitalize context letters\n+   $context = uc($context);\n+\n+   #### IF USING CONTROLLED DATA, split germline column into germline allele and mutated_to allele\n+   # my @germline = split (\'/\', $line[6]);\n+\n+   # if germline allele does not equal reference allele, print "start_region germline allele end_region"\n+   # specifically, replace the middle letter of the context with the germline allele\n+   #print "$germline[0], $germline[1]\\n";\n+   # if ($germline[0] ne $germline[1]) {\n+      # print "germline/reference mismatch, line number $line_count\\n";\n+      # if ($coordinate != 1) {\n+         # substr($context,1,1)= $germline[1];   \n+      # }\n+      # else {\n+        #  substr($context,0,1)= $germline[1];\n+      # }\n+   # }\n+\n+   print OutputTrinucleotideContext "$_\\t$context";\n+   \n+\n+   ###############################\n+   # new section: forming the data structure\n+   ###############################\n+\n+   # to create N_N contexts for data structure, context_code is defined as the trinucleotide context with a blank middle allele\n+   my $context_code=$context;\n+   $context_code =~ s/\\n|\\r//;\n+   substr($context_code,1,1) = "_";\n+   \n+   # create variables for mutated_from and  mutated_to nucleotides\n+   my $mutated_from = $line[3];\n+   my $mutated_to = $line[4];\n+\n+   # crea'..b'_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};\n+               }# end else statement\n+               # print "$context_code, $mutated_from, $mutated_to-- $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}\\n";\n+            }# end of loop over mutated_to\n+\n+            # print "\\nProbabilities for mutated_from $mutated_from:\\n";\n+\n+\n+            foreach my $mutated_to (@nucleotides) {\n+            #foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) {\n+               my $mutated_from_length = length( $mutated_from);\n+               my $mutated_to_length = length( $mutated_to);\n+               if ( $mutated_from_length == 1 ) {\n+                  if ( $mutated_from ne "-" ) {\n+                     if ( $mutated_to_length == 1 ) {\n+                        if ( $mutated_to ne "-" ) {\n+                           my $SNP_probability;\n+                           if ( $context_sum_across_mutated_to == 0 ) {\n+                              $SNP_probability = 0;\n+                           }\n+                           else {\n+                              $SNP_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_mutated_to;\n+                           }\n+                           if ( $mutated_to eq "T" ) {\n+                              print $trinuc_prob_handle "$SNP_probability";\n+                           }\n+                           else {\n+                              # print "$context_code, $mutated_from, $mutated_to, context_sum_across_mutated_to=$context_sum_across_mutated_to -- $SNP_probability\\n";\n+                              print $trinuc_prob_handle "$SNP_probability\\t";\n+                           }\n+                        }# end of if statement\n+                        else {\n+                           my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                           # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                        }# end else statement\n+                     }# end of if statement\n+                     else {\n+                        # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                        # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                     }# end else statement\n+                  }# end of if statement\n+                  else {\n+                     # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                     # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                  }# end else statement\n+               }# end of if statement\n+               else {\n+                  my $indel_probability;\n+                  if ( $context_sum_across_indel = 0 ) {\n+                     $indel_probability = 0;\n+                  }\n+                  else {\n+                     # $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                     # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                  }\n+               }# end else statement\n+            }# end of loop over mutated_to\n+            print $trinuc_prob_handle "\\n";\n+\n+         }# end of loop over mutated_from\n+\n+     # print "\\n\\n";\n+     \n+\n+  }# end loop over nt3\n+}# end loop over nt1\n+\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 utilities/deprecated/FindNucleotideContextOnReference.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/deprecated/FindNucleotideContextOnReference.pl Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,307 @@\n+#!/usr/bin/perl\n+\n+use strict;\n+\n+\n+if ($#ARGV < 1) {\n+   print "parameter mismatch\\nTo run type this command:\\nperl $0 fastahack reference input_pos_file output_file\\n\\n";\n+\n+   print " first argument = full path to fastahack\\n"; \n+   print " second argument = full path to reference genome\\n"; \n+   print " third argument = input file with arbitrary number of columns, but 1st col=chromosome name and 2nd col=position\\n"; \n+   print " fourth argument = output file with three columns: chromosome name, position of the center nucleotide, and the thre-nucleotide context for that position\\n\\n\\n"; \n+   exit 1;\n+}\n+\n+\n+my $Fastahack=$ARGV[0];\n+my $Reference=$ARGV[1];\n+open(InputPositions,             \'<\', $ARGV[2]) || die("Could not open file!");\n+open(OutputTrinucleotideContext, \'>\', $ARGV[3]) || die("Could not open file!");\n+\n+\n+\n+\n+################ read in one coordinate at a time and execute fastahack on it\n+\n+# reading the header\n+my $head = <InputPositions>;\n+$head =~ s/\\n|\\r//;\n+print OutputTrinucleotideContext "$head\\tContext\\n";\n+\n+# creating trinucleotide context data hash, insertion and deletion counts\n+my %trinucleotide_context_data;\n+my %context_tally_across_mutated_to;\n+my %insertion_hash;\n+my %deletion_hash;   \n+my $insertion_total;\n+my $deletion_total;\n+\n+\n+# reading the positional information\n+my $line_count = 1;\n+while (<InputPositions>) {\n+   $_ =~ s/\\n|\\r//;\n+   #print "$_\\n";\n+   my @line = split(\'\\t\', $_);\n+\n+   # getting the chromosome and coordinate fields from input file\n+   # fastahack will need to the chromosome and coordinate to read the information from the reference\n+   my $chromosome = $line[0];\n+   my $coordinate = $line[1];\n+\n+   # get coordinates of first and last character in the context\n+   my $start_region = $coordinate - 1;\n+   my $end_region = $coordinate + 1;\n+\n+   # if the coordinate is the very first letter on the chromosome, then do not read before that position\n+   # the context becomes 2 letter code, as opposed to a trinucleotide\n+   if ( $start_region == 0 ) {\n+      $start_region = 1;\n+      $end_region = 2;\n+   }\n+\n+   #print "$Fastahack -r $chromosome:$start_region..$end_region $Reference\\n";\n+   my $context = `$Fastahack -r $chromosome:$start_region..$end_region $Reference`;\n+   \n+   # capitalize context letters\n+   $context = uc($context);\n+\n+   # split germline column into germline allele and mutated_to allele\n+   # my @germline = split (\'/\', $line[6]);\n+\n+   # if germline allele does not equal reference allele, print "start_region germline allele end_region"\n+   # specifically, replace the middle letter of the context with the germline allele\n+   #print "$germline[0], $germline[1]\\n";\n+   # if ($germline[0] ne $germline[1]) {\n+      # print "germline/reference mismatch, line number $line_count\\n";\n+      # if ($coordinate != 1) {\n+         # substr($context,1,1)= $germline[1];   \n+      # }\n+      # else {\n+        #  substr($context,0,1)= $germline[1];\n+      # }\n+   # }\n+\n+   print OutputTrinucleotideContext "$_\\t$context";\n+   \n+\n+\n+   ###############################\n+   # new section: forming the data structure\n+   ###############################\n+\n+   # to create N_N contexts for data structure, context_code is defined as the trinucleotide context with a blank middle allele\n+   my $context_code=$context;\n+   $context_code =~ s/\\n|\\r//;\n+   substr($context_code,1,1) = "_";\n+   \n+   # create variables for mutated_from and  mutated_to nucleotides\n+   my $mutated_from = $line[9];\n+   my $mutated_to = $line[10];\n+\n+   # define length of insertions and deletions\n+   # if ($mutated_from eq "-") {\n+   my $insertion_length = length( $mutated_to );\n+   # }\n+\n+   # if ($mutated_to eq "-") {\n+   my $deletion_length = length( $mutated_from );\n+   # }\n+\n+   # context_codes are totalled\n+   $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to} + 1; \n+   $context_tally_across_mutated_to'..b'_across_indel = $context_sum_across_indel + $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to};\n+               }# end else statement\n+               # print "$context_code, $mutated_from, $mutated_to-- $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}\\n";\n+            }# end of loop over mutated_to\n+\n+            # print "\\nProbabilities for mutated_from $mutated_from:\\n";\n+\n+\n+            foreach my $mutated_to (@nucleotides) {\n+            #foreach $mutated_to_nucl_key (keys %{ $trinucleotide_context_data{$context_code}{$mutated_from_nucl_key} }) {\n+               my $mutated_from_length = length( $mutated_from);\n+               my $mutated_to_length = length( $mutated_to);\n+               if ( $mutated_from_length == 1 ) {\n+                  if ( $mutated_from ne "-" ) {\n+                     if ( $mutated_to_length == 1 ) {\n+                        if ( $mutated_to ne "-" ) {\n+                           my $SNP_probability;\n+                           if ( $context_sum_across_mutated_to == 0 ) {\n+                              $SNP_probability = 0;\n+                           }\n+                           else {\n+                              $SNP_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_mutated_to;\n+                           }\n+                           if ( $mutated_to eq "T" ) {\n+                              print $trinuc_prob_handle "$SNP_probability";\n+                           }\n+                           else {\n+                              # print "$context_code, $mutated_from, $mutated_to, context_sum_across_mutated_to=$context_sum_across_mutated_to -- $SNP_probability\\n";\n+                              print $trinuc_prob_handle "$SNP_probability\\t";\n+                           }\n+                        }# end of if statement\n+                        else {\n+                           my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                           # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                        }# end else statement\n+                     }# end of if statement\n+                     else {\n+                        # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                        # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                     }# end else statement\n+                  }# end of if statement\n+                  else {\n+                     # my $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                     # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                  }# end else statement\n+               }# end of if statement\n+               else {\n+                  my $indel_probability;\n+                  if ( $context_sum_across_indel = 0 ) {\n+                     $indel_probability = 0;\n+                  }\n+                  else {\n+                     # $indel_probability = $trinucleotide_context_data{$context_code}{$mutated_from}{$mutated_to}/$context_sum_across_indel;\n+                     # print $indel_prob_handle "$context_code, $mutated_from, $mutated_to, context_sum_across_indel=$context_sum_across_indel -- $indel_probability\\n";\n+                  }\n+               }# end else statement\n+            }# end of loop over mutated_to\n+            print $trinuc_prob_handle "\\n";\n+\n+         }# end of loop over mutated_from\n+\n+     # print "\\n\\n";\n+     \n+\n+  }# end loop over nt3\n+}# end loop over nt1\n+\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 utilities/deprecated/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/deprecated/README.md Tue May 15 02:39:53 2018 -0400
b
@@ -0,0 +1,16 @@
+#Deprecated Perl Scripts
+These scripts were updated and rewritten in python to improve ease of use and speed. Usage and a quick description of the deprecated scripts can be found below. Please use genMutModel.py to generate mutation models.
+
+##FindNucleotideContextOnReference.pl
+This script takes in VCF files and generates variant frequency models for NEAT. Coordinates for each variant are located within the HG19 human reference. The corresponding trinucleotide context around that location on the reference is returned into a new column.
+
+## Running the Script
+The script requires 5 arguments to be entered after the full path to FindNucleotideContextOnReference.healthy.pl
+
+```
+1. Full path to Fastahack
+2. Full path to Reference Genome
+3. Full path to input VCF
+4. Full path to output file
+5. Full path to human GFF
+```
b
diff -r 000000000000 -r 6e75a84e9338 utilities/genMutModel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/genMutModel.py Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,583 @@\n+#!/usr/bin/env python\n+\n+import sys\n+import os\n+import re\n+import bisect\n+import pickle\n+import argparse\n+import numpy as np\n+#matplotlib is not used as far as i can see\n+#import matplotlib.pyplot as mpl\n+\n+# absolute path to the directory above this script\n+SIM_PATH = \'/\'.join(os.path.realpath(__file__).split(\'/\')[:-2])\n+sys.path.append(SIM_PATH+\'/py/\')\n+\n+from refFunc import indexRef\n+\n+REF_WHITELIST =  [str(n) for n in xrange(1,30)] + [\'x\',\'y\',\'X\',\'Y\',\'mt\',\'Mt\',\'MT\']\n+REF_WHITELIST += [\'chr\'+n for n in REF_WHITELIST]\n+VALID_NUCL    =  [\'A\',\'C\',\'G\',\'T\']\n+VALID_TRINUC  =  [VALID_NUCL[i]+VALID_NUCL[j]+VALID_NUCL[k] for i in xrange(len(VALID_NUCL)) for j in xrange(len(VALID_NUCL)) for k in xrange(len(VALID_NUCL))]\n+# if parsing a dbsnp vcf, and no CAF= is found in info tag, use this as default val for population freq\n+VCF_DEFAULT_POP_FREQ = 0.00001\n+\n+\n+#########################################################\n+#\t\t\t\tVARIOUS HELPER FUNCTIONS\t\t\t\t#\n+#########################################################\n+\n+\n+# given a reference index, grab the sequence string of a specified reference\n+def getChrFromFasta(refPath,ref_inds,chrName):\n+\tfor i in xrange(len(ref_inds)):\n+\t\tif ref_inds[i][0] == chrName:\n+\t\t\tref_inds_i = ref_inds[i]\n+\t\t\tbreak\n+\trefFile = open(refPath,\'r\')\n+\trefFile.seek(ref_inds_i[1])\n+\tmyDat = \'\'.join(refFile.read(ref_inds_i[2]-ref_inds_i[1]).split(\'\\n\'))\n+\treturn myDat\n+\n+# cluster a sorted list\n+def clusterList(l,delta):\n+\toutList    = [[l[0]]]\n+\tprevVal    = l[0]\n+\tcurrentInd = 0\n+\tfor n in l[1:]:\n+\t\tif n-prevVal <= delta:\n+\t\t\toutList[currentInd].append(n)\n+\t\telse:\n+\t\t\tcurrentInd += 1\n+\t\t\toutList.append([])\n+\t\t\toutList[currentInd].append(n)\n+\t\tprevVal = n\n+\treturn outList\n+\n+def list_2_countDict(l):\n+\tcDict = {}\n+\tfor n in l:\n+\t\tif n not in cDict:\n+\t\t\tcDict[n] = 0\n+\t\tcDict[n] += 1\n+\treturn cDict\n+\n+def getBedTracks(fn):\n+\tf = open(fn,\'r\')\n+\ttrackDict = {}\n+\tfor line in f:\n+\t\tsplt = line.strip().split(\'\\t\')\n+\t\tif splt[0] not in trackDict:\n+\t\t\ttrackDict[splt[0]] = []\n+\t\ttrackDict[splt[0]].extend([int(splt[1]),int(splt[2])])\n+\tf.close()\n+\treturn trackDict\n+\n+def getTrackLen(trackDict):\n+\ttotSum = 0\n+\tfor k in trackDict.keys():\n+\t\tfor i in xrange(0,len(trackDict[k]),2):\n+\t\t\ttotSum += trackDict[k][i+1] - trackDict[k][i] + 1\n+\treturn totSum\n+\n+def isInBed(track,ind):\n+\tmyInd = bisect.bisect(track,ind)\n+\tif myInd&1:\n+\t\treturn True\n+\tif myInd < len(track):\n+\t\tif track[myInd-1] == ind:\n+\t\t\treturn True\n+\treturn False\n+\n+## return the mean distance to the median of a cluster\n+#def mean_dist_from_median(c):\n+#\tcentroid = np.median([n for n in c])\n+#\tdists    = []\n+#\tfor n in c:\n+#\t\tdists.append(abs(n-centroid))\n+#\treturn np.mean(dists)\n+#\n+## get median value from counting dictionary\n+#def quick_median(countDict):\n+#\tmidPoint = sum(countDict.values())/2\n+#\tmySum    = 0\n+#\tmyInd    = 0\n+#\tsk       = sorted(countDict.keys())\n+#\twhile mySum < midPoint:\n+#\t\tmySum += countDict[sk[myInd]]\n+#\t\tif mySum >= midPoint:\n+#\t\t\tbreak\n+#\t\tmyInd += 1\n+#\treturn myInd\n+#\n+## get median deviation from median of counting dictionary\n+#def median_deviation_from_median(countDict):\n+#\tmyMedian = quick_median(countDict)\n+#\tdeviations = {}\n+#\tfor k in sorted(countDict.keys()):\n+#\t\td = abs(k-myMedian)\n+#\t\tdeviations[d] = countDict[k]\n+#\treturn quick_median(deviations)\n+\n+\n+#################################################\n+#\t\t\t\tPARSE INPUT OPTIONS\t\t\t\t#\n+#################################################\n+\n+\n+parser = argparse.ArgumentParser(description=\'genMutModel.py\')\n+parser.add_argument(\'-r\',  type=str, required=True,  metavar=\'<str>\',                   help="* ref.fa")\n+parser.add_argument(\'-m\',  type=str, required=True,  metavar=\'<str>\',                   help="* mutations.tsv [.vcf]")\n+parser.add_argument(\'-o\',  type=str, required=True,  metavar=\'<str>\',                   help="* output.p")\n+parser.add_argument(\'-bi\', type=str, required=False, metavar=\'<str>\',    default=None,  help="only_use_these_regions.bed")\n+parser.add_argu'..b'TE PROBABILITIES\t\t\t\t\t\t   ###\n+\t########################################################################## """\n+\n+\n+\t#for k in sorted(TRINUC_REF_COUNT.keys()):\n+\t#\t\tprint k, TRINUC_REF_COUNT[k]\n+\t#\n+\t#for k in sorted(TRINUC_TRANSITION_COUNT.keys()):\n+\t#\tprint k, TRINUC_TRANSITION_COUNT[k]\n+\n+\t# frequency that each trinuc mutated into anything else\n+\tTRINUC_MUT_PROB = {}\n+\t# frequency that a trinuc mutates into another trinuc, given that it mutated\n+\tTRINUC_TRANS_PROBS = {}\n+\t# frequency of snp transitions, given a snp occurs.\n+\tSNP_TRANS_FREQ = {}\n+\n+\tfor trinuc in sorted(TRINUC_REF_COUNT.keys()):\n+\t\tmyCount = 0\n+\t\tfor k in sorted(TRINUC_TRANSITION_COUNT.keys()):\n+\t\t\tif k[0] == trinuc:\n+\t\t\t\tmyCount += TRINUC_TRANSITION_COUNT[k]\n+\t\tTRINUC_MUT_PROB[trinuc] = myCount / float(TRINUC_REF_COUNT[trinuc])\n+\t\tfor k in sorted(TRINUC_TRANSITION_COUNT.keys()):\n+\t\t\tif k[0] == trinuc:\n+\t\t\t\tTRINUC_TRANS_PROBS[k] = TRINUC_TRANSITION_COUNT[k] / float(myCount)\n+\n+\tfor n1 in VALID_NUCL:\n+\t\trollingTot = sum([SNP_TRANSITION_COUNT[(n1,n2)] for n2 in VALID_NUCL if (n1,n2) in SNP_TRANSITION_COUNT])\n+\t\tfor n2 in VALID_NUCL:\n+\t\t\tkey2 = (n1,n2)\n+\t\t\tif key2 in SNP_TRANSITION_COUNT:\n+\t\t\t\tSNP_TRANS_FREQ[key2] = SNP_TRANSITION_COUNT[key2] / float(rollingTot)\n+\n+\t# compute average snp and indel frequencies\n+\tSNP_FREQ       = SNP_COUNT/float(totalVar)\n+\tAVG_INDEL_FREQ = 1.-SNP_FREQ\n+\tINDEL_FREQ     = {k:(INDEL_COUNT[k]/float(totalVar))/AVG_INDEL_FREQ for k in INDEL_COUNT.keys()}\n+\tif MYBED != None:\n+\t\tif MYBED[1] == True:\n+\t\t\tAVG_MUT_RATE = totalVar/float(getTrackLen(MYBED[0]))\n+\t\telse:\n+\t\t\tAVG_MUT_RATE = totalVar/float(TOTAL_REFLEN - getTrackLen(MYBED[0]))\n+\telse:\n+\t\tAVG_MUT_RATE = totalVar/float(TOTAL_REFLEN)\n+\n+\t#\n+\t#\tif values weren\'t found in data, appropriately append null entries\n+\t#\n+\tprintTrinucWarning = False\n+\tfor trinuc in VALID_TRINUC:\n+\t\ttrinuc_mut = [trinuc[0]+n+trinuc[2] for n in VALID_NUCL if n != trinuc[1]]\n+\t\tif trinuc not in TRINUC_MUT_PROB:\n+\t\t\tTRINUC_MUT_PROB[trinuc] = 0.\n+\t\t\tprintTrinucWarning = True\n+\t\tfor trinuc2 in trinuc_mut:\n+\t\t\tif (trinuc,trinuc2) not in TRINUC_TRANS_PROBS:\n+\t\t\t\tTRINUC_TRANS_PROBS[(trinuc,trinuc2)] = 0.\n+\t\t\t\tprintTrinucWarning = True\n+\tif printTrinucWarning:\n+\t\tprint \'Warning: Some trinucleotides transitions were not encountered in the input dataset, probabilities of 0.0 have been assigned to these events.\'\n+\n+\t#\n+\t#\tprint some stuff\n+\t#\n+\tfor k in sorted(TRINUC_MUT_PROB.keys()):\n+\t\tprint \'p(\'+k+\' mutates) =\',TRINUC_MUT_PROB[k]\n+\n+\tfor k in sorted(TRINUC_TRANS_PROBS.keys()):\n+\t\tprint \'p(\'+k[0]+\' --> \'+k[1]+\' | \'+k[0]+\' mutates) =\',TRINUC_TRANS_PROBS[k]\n+\n+\tfor k in sorted(INDEL_FREQ.keys()):\n+\t\tif k > 0:\n+\t\t\tprint \'p(ins length = \'+str(abs(k))+\' | indel occurs) =\',INDEL_FREQ[k]\n+\t\telse:\n+\t\t\tprint \'p(del length = \'+str(abs(k))+\' | indel occurs) =\',INDEL_FREQ[k]\n+\n+\tfor k in sorted(SNP_TRANS_FREQ.keys()):\n+\t\tprint \'p(\'+k[0]+\' --> \'+k[1]+\' | SNP occurs) =\',SNP_TRANS_FREQ[k]\n+\n+\t#for n in COMMON_VARIANTS:\n+\t#\tprint n\n+\n+\t#for n in HIGH_MUT_REGIONS:\n+\t#\tprint n\n+\n+\tprint \'p(snp)   =\',SNP_FREQ\n+\tprint \'p(indel) =\',AVG_INDEL_FREQ\n+\tprint \'overall average mut rate:\',AVG_MUT_RATE\n+\tprint \'total variants processed:\',totalVar\n+\n+\t#\n+\t# save variables to file\n+\t#\n+\tif SKIP_COMMON:\n+\t\tOUT_DICT = {\'AVG_MUT_RATE\':AVG_MUT_RATE,\n+\t\t            \'SNP_FREQ\':SNP_FREQ,\n+\t\t            \'SNP_TRANS_FREQ\':SNP_TRANS_FREQ,\n+\t\t            \'INDEL_FREQ\':INDEL_FREQ,\n+\t\t            \'TRINUC_MUT_PROB\':TRINUC_MUT_PROB,\n+\t\t            \'TRINUC_TRANS_PROBS\':TRINUC_TRANS_PROBS}\n+\telse:\n+\t\tOUT_DICT = {\'AVG_MUT_RATE\':AVG_MUT_RATE,\n+\t\t            \'SNP_FREQ\':SNP_FREQ,\n+\t\t            \'SNP_TRANS_FREQ\':SNP_TRANS_FREQ,\n+\t\t            \'INDEL_FREQ\':INDEL_FREQ,\n+\t\t            \'TRINUC_MUT_PROB\':TRINUC_MUT_PROB,\n+\t\t            \'TRINUC_TRANS_PROBS\':TRINUC_TRANS_PROBS,\n+\t\t            \'COMMON_VARIANTS\':COMMON_VARIANTS,\n+\t\t            \'HIGH_MUT_REGIONS\':HIGH_MUT_REGIONS}\n+\tpickle.dump( OUT_DICT, open( OUT_PICKLE, "wb" ) )\n+\n+\n+if __name__ == "__main__":\n+\tmain()\n+\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 utilities/genSeqErrorModel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/genSeqErrorModel.py Tue May 15 02:39:53 2018 -0400
[
b"@@ -0,0 +1,300 @@\n+#!/usr/bin/env python\n+\n+#\n+#\n+#          genSeqErrorModel.py\n+#          Computes sequencing error model for genReads.py\n+#\n+#         \n+#          Usage: python genSeqErrorModel.py -i input_reads.fq -o path/to/output_name.p\n+#\n+#\n+\n+\n+import os\n+import sys\n+import gzip\n+import random\n+import numpy as np\n+import argparse\n+import cPickle as pickle\n+\n+# absolute path to this script\n+SIM_PATH = '/'.join(os.path.realpath(__file__).split('/')[:-2])+'/py/'\n+sys.path.append(SIM_PATH)\n+\n+from probability\t\timport DiscreteDistribution\n+\n+def parseFQ(inf):\n+\tprint 'reading '+inf+'...'\n+\tif inf[-3:] == '.gz':\n+\t\tprint 'detected gzip suffix...'\n+\t\tf = gzip.open(inf,'r')\n+\telse:\n+\t\tf = open(inf,'r')\n+\n+\tIS_SAM = False\n+\tif inf[-4:] == '.sam':\n+\t\tprint 'detected sam input...'\n+\t\tIS_SAM = True\n+\n+\trRead  = 0\n+\tactual_readlen = 0\n+\tqDict  = {}\n+\twhile True:\n+\n+\t\tif IS_SAM:\n+\t\t\tdata4 = f.readline()\n+\t\t\tif not len(data4):\n+\t\t\t\tbreak\n+\t\t\ttry:\n+\t\t\t\tdata4 = data4.split('\\t')[10]\n+\t\t\texcept IndexError:\n+\t\t\t\tbreak\n+\t\t\t# need to add some input checking here? Yup, probably.\n+\t\telse:\n+\t\t\tdata1 = f.readline()\n+\t\t\tdata2 = f.readline()\n+\t\t\tdata3 = f.readline()\n+\t\t\tdata4 = f.readline()\n+\t\t\tif not all([data1,data2,data3,data4]):\n+\t\t\t\tbreak\n+\n+\t\tif actual_readlen == 0:\n+\t\t\tif inf[-3:] != '.gz' and not IS_SAM:\n+\t\t\t\ttotalSize = os.path.getsize(inf)\n+\t\t\t\tentrySize = sum([len(n) for n in [data1,data2,data3,data4]])\n+\t\t\t\tprint 'estimated number of reads in file:',int(float(totalSize)/entrySize)\n+\t\t\tactual_readlen = len(data4)-1\n+\t\t\tprint 'assuming read length is uniform...'\n+\t\t\tprint 'detected read length (from first read found):',actual_readlen\n+\t\t\tpriorQ = np.zeros([actual_readlen,RQ])\n+\t\t\ttotalQ = [None] + [np.zeros([RQ,RQ]) for n in xrange(actual_readlen-1)]\n+\n+\t\t# sanity-check readlengths\n+\t\tif len(data4)-1 != actual_readlen:\n+\t\t\tprint 'skipping read with unexpected length...'\n+\t\t\tcontinue\n+\n+\t\tfor i in range(len(data4)-1):\n+\t\t\tq = ord(data4[i])-offQ\n+\t\t\tqDict[q] = True\n+\t\t\tif i == 0:\n+\t\t\t\tpriorQ[i][q] += 1\n+\t\t\telse:\n+\t\t\t\ttotalQ[i][prevQ,q] += 1\n+\t\t\t\tpriorQ[i][q] += 1\n+\t\t\tprevQ = q\n+\n+\t\trRead += 1\n+\t\tif rRead%PRINT_EVERY == 0:\n+\t\t\tprint rRead\n+\t\tif MAX_READS > 0 and rRead >= MAX_READS:\n+\t\t\tbreak\n+\tf.close()\n+\n+\t# some sanity checking again...\n+\tQRANGE = [min(qDict.keys()),max(qDict.keys())]\n+\tif QRANGE[0] < 0:\n+\t\tprint '\\nError: Read in Q-scores below 0\\n'\n+\t\texit(1)\n+\tif QRANGE[1] > RQ:\n+\t\tprint '\\nError: Read in Q-scores above specified maximum:',QRANGE[1],'>',RQ,'\\n'\n+\t\texit(1)\n+\n+\tprint 'computing probabilities...'\n+\tprobQ  = [None] + [[[0. for m in xrange(RQ)] for n in xrange(RQ)] for p in xrange(actual_readlen-1)]\n+\tfor p in xrange(1,actual_readlen):\n+\t\tfor i in xrange(RQ):\n+\t\t\trowSum = float(np.sum(totalQ[p][i,:]))+PROB_SMOOTH*RQ\n+\t\t\tif rowSum <= 0.:\n+\t\t\t\tcontinue\n+\t\t\tfor j in xrange(RQ):\n+\t\t\t\tprobQ[p][i][j] = (totalQ[p][i][j]+PROB_SMOOTH)/rowSum\n+\n+\tinitQ  = [[0. for m in xrange(RQ)] for n in xrange(actual_readlen)]\n+\tfor i in xrange(actual_readlen):\n+\t\trowSum = float(np.sum(priorQ[i,:]))+INIT_SMOOTH*RQ\n+\t\tif rowSum <= 0.:\n+\t\t\tcontinue\n+\t\tfor j in xrange(RQ):\n+\t\t\tinitQ[i][j] = (priorQ[i][j]+INIT_SMOOTH)/rowSum\n+\n+\tif PLOT_STUFF:\n+\t\tmpl.rcParams.update({'font.size': 14, 'font.weight':'bold', 'lines.linewidth': 3})\n+\n+\t\tmpl.figure(1)\n+\t\tZ = np.array(initQ).T\n+\t\tX, Y = np.meshgrid( range(0,len(Z[0])+1), range(0,len(Z)+1) )\n+\t\tmpl.pcolormesh(X,Y,Z,vmin=0.,vmax=0.25)\n+\t\tmpl.axis([0,len(Z[0]),0,len(Z)])\n+\t\tmpl.yticks(range(0,len(Z),10),range(0,len(Z),10))\n+\t\tmpl.xticks(range(0,len(Z[0]),10),range(0,len(Z[0]),10))\n+\t\tmpl.xlabel('Read Position')\n+\t\tmpl.ylabel('Quality Score')\n+\t\tmpl.title('Q-Score Prior Probabilities')\n+\t\tmpl.colorbar()\n+\n+\t\tmpl.show()\n+\n+\t\tVMIN_LOG = [-4,0]\n+\t\tminVal   = 10**VMIN_LOG[0]\n+\t\tqLabels  = [str(n) for n in range(QRANGE[0],QRANGE[1]+1) if n%5==0]\n+\t\tprint qLabels\n+\t\tqTicksx  = [int(n)+0.5 for n in qLabels]\n+\t\tqTicksy  = [(RQ-int(n))-0.5 for n in qLabels]\n+\n+\t\tfor p in xrange(1,actual_readlen,10):\n+\t\t\tcurrentDat"..b'[-1].append(DiscreteDistribution([1],[Qscores[j]],degenerateVal=Qscores[j]))\n+\t\t\telse:\n+\t\t\t\tprobDistByPosByPrevQ[-1].append(DiscreteDistribution(probQ[i][j],Qscores))\n+\n+\tcountDict = {}\n+\tfor q in Qscores:\n+\t\tcountDict[q] = 0\n+\tfor samp in xrange(1,N_SAMP+1):\n+\t\tif samp%PRINT_EVERY == 0:\n+\t\t\tprint samp\n+\t\tmyQ = initDistByPos[0].sample()\n+\t\tcountDict[myQ] += 1\n+\t\tfor i in xrange(1,len(initQ)):\n+\t\t\tmyQ = probDistByPosByPrevQ[i][myQ].sample()\n+\t\t\tcountDict[myQ] += 1\n+\n+\ttotBases = float(sum(countDict.values()))\n+\tavgError = 0.\n+\tfor k in sorted(countDict.keys()):\n+\t\teVal = 10.**(-k/10.)\n+\t\t#print k, eVal, countDict[k]\n+\t\tavgError += eVal * (countDict[k]/totBases)\n+\tprint \'AVG ERROR RATE:\',avgError\n+\n+\treturn (initQ, probQ, avgError)\n+\n+parser = argparse.ArgumentParser(description=\'genSeqErrorModel.py\')\n+parser.add_argument(\'-i\',  type=str, required=True,  metavar=\'<str>\',                      help="* input_read1.fq (.gz) / input_read1.sam")\n+parser.add_argument(\'-o\',  type=str, required=True,  metavar=\'<str>\',                      help="* output.p")\n+parser.add_argument(\'-i2\', type=str, required=False, metavar=\'<str>\',     default=None,    help="input_read2.fq (.gz) / input_read2.sam")\n+parser.add_argument(\'-p\',  type=str, required=False, metavar=\'<str>\',     default=None,    help="input_alignment.pileup")\n+parser.add_argument(\'-q\',  type=int, required=False, metavar=\'<int>\',     default=33,      help="quality score offset [33]")\n+parser.add_argument(\'-Q\',  type=int, required=False, metavar=\'<int>\',     default=41,      help="maximum quality score [41]")\n+parser.add_argument(\'-n\',  type=int, required=False, metavar=\'<int>\',     default=-1,      help="maximum number of reads to process [all]")\n+parser.add_argument(\'-s\',  type=int, required=False, metavar=\'<int>\',     default=1000000, help="number of simulation iterations [1000000]")\n+parser.add_argument(\'--plot\',        required=False, action=\'store_true\', default=False,   help=\'perform some optional plotting\')\n+args = parser.parse_args()\n+\n+(INF, OUF, offQ, maxQ, MAX_READS, N_SAMP) = (args.i, args.o, args.q, args.Q, args.n, args.s)\n+(INF2, PILEUP) = (args.i2, args.p)\n+\n+RQ = maxQ+1\n+\n+INIT_SMOOTH = 0.\n+PROB_SMOOTH = 0.\n+PRINT_EVERY = 10000\n+PLOT_STUFF  = args.plot\n+if PLOT_STUFF:\n+\tprint \'plotting is desired, lets import matplotlib...\'\n+\timport matplotlib.pyplot as mpl\n+\n+def main():\n+\n+\tQscores = range(RQ)\n+\tif INF2 == None:\n+\t\t(initQ, probQ, avgError) = parseFQ(INF)\n+\telse:\n+\t\t(initQ, probQ, avgError1)   = parseFQ(INF)\n+\t\t(initQ2, probQ2, avgError2) = parseFQ(INF2)\n+\t\tavgError = (avgError1+avgError2)/2.\n+\n+\t#\n+\t#\tembed some default sequencing error parameters if no pileup is provided\n+\t#\n+\tif PILEUP == None:\n+\n+\t\tprint \'Using default sequencing error parameters...\'\n+\n+\t\t# sequencing substitution transition probabilities\n+\t\tSSE_PROB   = [[0.,     0.4918, 0.3377, 0.1705 ],\n+\t\t\t\t\t  [0.5238,     0., 0.2661, 0.2101 ],\n+\t\t\t\t\t  [0.3754, 0.2355,     0., 0.3890 ],\n+\t\t\t\t\t  [0.2505, 0.2552, 0.4942, 0.     ]]\n+\t\t# if a sequencing error occurs, what are the odds it\'s an indel?\n+\t\tSIE_RATE     = 0.01\n+\t\t# sequencing indel error length distribution\n+\t\tSIE_PROB     = [0.999,0.001]\n+\t\tSIE_VAL      = [1,2]\n+\t\t# if a sequencing indel error occurs, what are the odds it\'s an insertion as opposed to a deletion?\n+\t\tSIE_INS_FREQ = 0.4\n+\t\t# if a sequencing insertion error occurs, what\'s the probability of it being an A, C, G, T...\n+\t\tSIE_INS_NUCL = [0.25, 0.25, 0.25, 0.25]\n+\n+\t#\n+\t#\totherwise we need to parse a pileup and compute statistics!\n+\t#\n+\telse:\n+\t\tprint \'\\nPileup parsing coming soon!\\n\'\n+\t\texit(1)\n+\n+\terrorParams  = [SSE_PROB, SIE_RATE, SIE_PROB, SIE_VAL, SIE_INS_FREQ, SIE_INS_NUCL]\n+\n+\t#\n+\t#\tfinally, let\'s save our output model\n+\t#\n+\tprint \'saving model...\'\n+\tif INF2 == None:\n+\t\tpickle.dump([initQ,probQ,Qscores,offQ,avgError,errorParams],open(OUF,\'wb\'))\n+\telse:\n+\t\tpickle.dump([initQ,probQ,initQ2,probQ2,Qscores,offQ,avgError,errorParams],open(OUF,\'wb\'))\n+\n+if __name__ == \'__main__\':\n+\tmain()\n'
b
diff -r 000000000000 -r 6e75a84e9338 utilities/plotMutModel.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/plotMutModel.py Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,278 @@\n+#!/usr/bin/env python\n+\n+#\n+#\ta quick script for comparing mutation models\n+#\n+#\tpython plotMutModel.py -i model1.p [model2.p] [model3.p]... -l legend_label1 [legend_label2] [legend_label3]... -o path/to/pdf_plot_prefix \n+#\n+\n+import sys\n+import pickle\n+import bisect\n+import numpy as np\n+import matplotlib.pyplot as mpl\n+import matplotlib.colors as colors\n+import matplotlib.cm as cmx\n+import argparse\n+\n+#mpl.rc(\'text\',usetex=True)\n+#mpl.rcParams[\'text.latex.preamble\']=[r"\\usepackage{amsmath}"]\n+\n+parser = argparse.ArgumentParser(description=\'Plot and compare mutation models from genMutModel.py Usage: python plotMutModel.py -i model1.p [model2.p] [model3.p]... -l legend_label1 [legend_label2] [legend_label3]... -o path/to/pdf_plot_prefix\')\n+parser.add_argument(\'-i\',  type=str,   required=True,   metavar=\'<str>\',   nargs=\'+\',                help="* mutation_model_1.p [mutation_model_2.p] [mutation_model_3] ...")\n+parser.add_argument(\'-l\',  type=str,   required=True,   metavar=\'<str>\',   nargs=\'+\',                help="* legend labels: model1_name [model2_name] [model3_name]...")\n+parser.add_argument(\'-o\',  type=str,   required=True,   metavar=\'<str>\',                             help="* output pdf prefix")\n+args = parser.parse_args()\n+\n+\n+\n+def getColor(i,N,colormap=\'jet\'):\n+\tcm = mpl.get_cmap(colormap) \n+\tcNorm  = colors.Normalize(vmin=0, vmax=N+1)\n+\tscalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)\n+\tcolorVal = scalarMap.to_rgba(i)\n+\treturn colorVal\n+\n+def isInBed(track,ind):\n+\tif ind in track:\n+\t\treturn True\n+\telif bisect.bisect(track,ind)%1 == 1:\n+\t\treturn True\n+\telse:\n+\t\treturn False\n+\n+def getBedOverlap(track,ind_s,ind_e):\n+\tif ind_s in track:\n+\t\tmyInd = track.index(ind_s)\n+\t\treturn min([track[myInd+1]-ind_s+1,ind_e-ind_s+1])\n+\telse:\n+\t\tmyInd = bisect.bisect(track,ind_s)\n+\t\tif myInd%1 and myInd < len(track)-1:\n+\t\t\treturn min([track[myInd+1]-ind_s+1,ind_e-ind_s+1])\n+\treturn 0\n+\n+# a waaaaaaay slower version of the above function ^^\n+#def getTrackOverlap(track1,track2):\n+#\totrack = [0 for n in xrange(max(track1+track2)+1)]\n+#\tfor i in xrange(0,len(track1),2):\n+#\t\tfor j in xrange(track1[i],track1[i+1]+1):\n+#\t\t\totrack[j] = 1\n+#\tocount = 0\n+#\tfor i in xrange(0,len(track2),2):\n+#\t\tfor j in xrange(track2[i],track2[i+1]+1):\n+#\t\t\tif otrack[j]:\n+#\t\t\t\tocount += 1\n+#\treturn ocount\n+\n+OUP  = args.o\n+LAB = args.l\n+#print LAB\n+INP  = args.i\n+\n+N_FILES = len(INP)\n+\n+mpl.rcParams.update({\'font.size\': 13, \'font.weight\':\'bold\', \'lines.linewidth\': 3})\n+\n+#################################################\n+#\n+#\tBASIC STATS\n+#\n+#################################################\n+mpl.figure(0,figsize=(12,10))\n+\n+mpl.subplot(2,2,1)\n+colorInd = 0\n+for fn in INP:\n+\tmyCol = getColor(colorInd,N_FILES)\n+\tcolorInd += 1\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\t[AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT[\'AVG_MUT_RATE\'], DATA_DICT[\'SNP_FREQ\'], DATA_DICT[\'INDEL_FREQ\']]\n+\tmpl.bar([colorInd-1],[AVG_MUT_RATE],1.,color=myCol)\n+mpl.xlim([-1,N_FILES+1])\n+mpl.grid()\n+mpl.xticks([],[])\n+mpl.ylabel(\'Frequency\')\n+mpl.title(\'Overall mutation rate (1/bp)\')\n+\n+mpl.subplot(2,2,2)\n+colorInd = 0\n+for fn in INP:\n+\tmyCol = getColor(colorInd,N_FILES)\n+\tcolorInd += 1\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\t[AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT[\'AVG_MUT_RATE\'], DATA_DICT[\'SNP_FREQ\'], DATA_DICT[\'INDEL_FREQ\']]\n+\tmpl.bar([colorInd-1],[SNP_FREQ],1.,color=myCol)\n+\tmpl.bar([colorInd-1],[1.-SNP_FREQ],1.,color=myCol,bottom=[SNP_FREQ],hatch=\'/\')\n+mpl.axis([-1,N_FILES+1,0,1.2])\n+mpl.grid()\n+mpl.xticks([],[])\n+mpl.yticks([0,.2,.4,.6,.8,1.],[0,0.2,0.4,0.6,0.8,1.0])\n+mpl.ylabel(\'Frequency\')\n+mpl.title(\'SNP freq [  ] & indel freq [//]\')\n+\n+mpl.subplot(2,1,2)\n+colorInd = 0\n+legText  = LAB\n+for fn in INP:\n+\tmyCol = getColor(colorInd,N_FILES)\n+\tcolorInd += 1\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\t[AVG_MUT_RATE, SNP_FREQ, INDEL_FREQ] = [DATA_DICT[\'AVG_MUT_RATE\'], DATA_DICT[\'SNP_FREQ\'], DATA_DICT[\'INDEL_FREQ\']]\n+\tx = sorted(INDEL_FREQ'..b'##\n+#\n+#\tTRINUC TRANS PROB\n+#\n+#################################################\n+plotNum = 3\n+for fn in INP:\n+\tfig = mpl.figure(plotNum,figsize=(12,10))\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\tTRINUC_TRANS_PROBS = DATA_DICT[\'TRINUC_TRANS_PROBS\']\n+\n+\txt2 = [m[3] for m in sorted([(n[0],n[2],n[1],n) for n in xt])]\n+\treverse_dict = {xt2[i]:i for i in xrange(len(xt2))}\n+\tZ = np.zeros((64,64))\n+\tL = [[\'\' for n in xrange(64)] for m in xrange(64)]\n+\tfor k in TRINUC_TRANS_PROBS:\n+\t\ti = reverse_dict[k[0]]\n+\t\tj = reverse_dict[k[1]]\n+\t\tZ[i][j] = TRINUC_TRANS_PROBS[k]\n+\n+\tHARDCODED_LABEL = [\'A_A\',\'A_C\',\'A_G\',\'A_T\',\n+\t                   \'C_A\',\'C_C\',\'C_G\',\'C_T\',\n+\t                   \'G_A\',\'G_C\',\'G_G\',\'G_T\',\n+\t                   \'T_A\',\'T_C\',\'T_G\',\'T_T\']\n+\n+\tfor pi in xrange(16):\n+\t\tmpl.subplot(4,4,pi+1)\n+\t\tZ2 = Z[pi*4:(pi+1)*4,pi*4:(pi+1)*4]\n+\t\tX, Y = np.meshgrid( range(0,len(Z2[0])+1), range(0,len(Z2)+1) )\n+\t\tim = mpl.pcolormesh(X,Y,Z2[::-1,:],vmin=0.0,vmax=0.5)\n+\t\tmpl.axis([0,4,0,4])\n+\t\tmpl.xticks([0.5,1.5,2.5,3.5],[\'A\',\'C\',\'G\',\'T\'])\n+\t\tmpl.yticks([0.5,1.5,2.5,3.5],[\'T\',\'G\',\'C\',\'A\'])\n+\t\tmpl.text(1.6, 1.8, HARDCODED_LABEL[pi], color=\'white\')\n+\n+\t# colorbar haxx\n+\tfig.subplots_adjust(right=0.8)\n+\tcbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])\n+\tcb = fig.colorbar(im,cax=cbar_ax)\n+\tcb.set_label(r"p(X$Y_1$Z->X$Y_2$Z | X_Z mutates)")\n+\n+\t#mpl.tight_layout()\n+\t#mpl.figtext(0.24,0.94,\'Trinucleotide Mutation Frequency\',size=20)\n+\t#mpl.show()\n+\tmpl.savefig(OUP+\'_plot\'+str(plotNum)+\'_trinucTrans.pdf\')\n+\tplotNum += 1\n+\n+#################################################\n+#\n+#\tHIGH MUT REGIONS\n+#\n+#################################################\n+track_byFile_byChr = [{} for n in INP]\n+bp_total_byFile    = [0 for n in INP]\n+colorInd = 0\n+for fn in INP:\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\tHIGH_MUT_REGIONS = DATA_DICT[\'HIGH_MUT_REGIONS\']\n+\tfor region in HIGH_MUT_REGIONS:\n+\t\tif region[0] not in track_byFile_byChr[colorInd]:\n+\t\t\ttrack_byFile_byChr[colorInd][region[0]] = []\n+\t\ttrack_byFile_byChr[colorInd][region[0]].extend([region[1],region[2]])\n+\t\tbp_total_byFile[colorInd] += region[2]-region[1]+1\n+\tcolorInd += 1\n+\n+bp_overlap_count = [[0 for m in INP] for n in INP]\n+for i in xrange(N_FILES):\n+\tbp_overlap_count[i][i] = bp_total_byFile[i]\n+\tfor j in xrange(i+1,N_FILES):\n+\t\tfor k in track_byFile_byChr[i].keys():\n+\t\t\tif k in track_byFile_byChr[j]:\n+\t\t\t\tfor ii in xrange(len(track_byFile_byChr[i][k][::2])):\n+\t\t\t\t\tbp_overlap_count[i][j] += getBedOverlap(track_byFile_byChr[j][k],track_byFile_byChr[i][k][ii*2],track_byFile_byChr[i][k][ii*2+1])\n+\n+print \'\'\t\t\t\t\n+print \'HIGH_MUT_REGION OVERLAP BETWEEN \'+str(N_FILES)+\' MODELS...\'\n+for i in xrange(N_FILES):\n+\tfor j in xrange(i,N_FILES):\n+\t\tnDissimilar = (bp_overlap_count[i][i]-bp_overlap_count[i][j]) + (bp_overlap_count[j][j]-bp_overlap_count[i][j])\n+\t\tif bp_overlap_count[i][j] == 0:\n+\t\t\tpercentageV = 0.0\n+\t\telse:\n+\t\t\tpercentageV = bp_overlap_count[i][j]/float(bp_overlap_count[i][j]+nDissimilar)\n+\t\tprint \'overlap[\'+str(i)+\',\'+str(j)+\'] = \'+str(bp_overlap_count[i][j])+\' bp ({0:.3f}%)\'.format(percentageV*100.)\n+print \'\'\n+\n+#################################################\n+#\n+#\tCOMMON VARIANTS\n+#\n+#################################################\n+setofVars = [set([]) for n in INP]\n+colorInd = 0\n+for fn in INP:\n+\tDATA_DICT = pickle.load( open( fn, "rb" ) )\n+\tCOMMON_VARIANTS = DATA_DICT[\'COMMON_VARIANTS\']\n+\tfor n in COMMON_VARIANTS:\n+\t\tsetofVars[colorInd].add(n)\n+\tcolorInd += 1\n+\n+print \'\'\n+print \'COMMON_VARIANTS OVERLAP BETWEEN \'+str(N_FILES)+\' MODELS...\'\n+for i in xrange(N_FILES):\n+\tfor j in xrange(i,N_FILES):\n+\t\toverlapCount = len(setofVars[i].intersection(setofVars[j]))\n+\t\tnDissimilar  = (len(setofVars[i])-overlapCount) + (len(setofVars[j])-overlapCount)\n+\t\tif overlapCount == 0:\n+\t\t\tpercentageV = 0.0\n+\t\telse:\n+\t\t\tpercentageV = overlapCount/float(overlapCount+nDissimilar)\n+\t\tprint \'overlap[\'+str(i)+\',\'+str(j)+\'] = \'+str(overlapCount)+\' variants ({0:.3f}%)\'.format(percentageV*100.)\n+print \'\'\n+\n+\n+\n'
b
diff -r 000000000000 -r 6e75a84e9338 utilities/validateBam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/validateBam.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import gzip
+from struct import unpack
+
+BAM_EOF = ['1f', '8b', '08', '04', '00', '00', '00', '00', '00', 'ff', '06', '00', '42', '43', '02', '00', '1b', '00', '03', '00', '00', '00', '00', '00', '00', '00', '00', '00']
+
+def getBytes(fmt,amt):
+ if fmt == '<i' or fmt == '<I':
+ mySize = 4
+ elif fmt == '<c' or fmt == '<b' or fmt == '<B':
+ mySize = 1
+ else:
+ print '\nError, unknown format:',fmt,'\n'
+ exit(1)
+ if amt == 1:
+ fread = f.read(mySize)
+ if not fread:
+ return None
+ return unpack(fmt,fread)[0]
+ else:
+ fread = f.read(mySize*amt)
+ if not fread:
+ return None
+ return unpack(fmt,fread)
+
+# check eof
+IN_BAM = sys.argv[1]
+f = open(IN_BAM,'rb')
+f.seek(os.path.getsize(IN_BAM)-28)
+EOF = [format(ord(n),'02x') for n in f.read()]
+print 'EOF_MARKER:  ', ' '.join(EOF)
+if EOF != BAM_EOF:
+ print '\nWARNING: BAM EOF DOES NOT MATCH EXPECTED STRING.\n'
+f.close()
+
+# check other stuff
+f = gzip.open(IN_BAM,'rb')
+
+print 'MAGIC STRING:', f.read(4)
+l_text = getBytes('<i',1)
+print 'l_text:      ', l_text
+print 'text:      \n', f.read(l_text)
+n_ref = getBytes('<i',1)
+print 'n_ref:       ', n_ref
+
+for i in xrange(n_ref):
+ l_name = getBytes('<i',1)
+ print 'ref'+str(i)+' - l_name:', l_name
+ print 'ref'+str(i)+' - name:  ', f.read(l_name)
+ print 'ref'+str(i)+' - l_ref: ', getBytes('<i',1)
+
+print '\nEXAMINING ALIGNMENT DATA:\n'
+aln_N = 0
+while True:
+ aln_N += 1
+ blockSize = getBytes('<i',1)
+ if blockSize == None:
+ break
+ print '['+str(aln_N)+']:', 'blockSize:', blockSize
+ print '-- refID:', getBytes('<i',1)
+ print '-- pos:  ', getBytes('<i',1)
+ bmqnl = getBytes('<I',1)
+ binv  = (bmqnl>>16)&65535
+ mapq  = (bmqnl>>8)&255
+ lrn   = bmqnl&255
+ print '-- bmqnl:', bmqnl, '(bin='+str(binv)+', mapq='+str(mapq)+', l_readname+1='+str(lrn)+')'
+ flgnc = getBytes('<I',1)
+ flag  = (flgnc>>16)&65535
+ ncig  = flgnc&65535
+ print '-- flgnc:', flgnc, '(flag='+str(flag)+', ncig='+str(ncig)+')'
+ print '-- l_seq:', getBytes('<i',1)
+ print '-- nxtID:', getBytes('<i',1)
+ print '-- nxtPo:', getBytes('<i',1)
+ print '-- tlen: ', getBytes('<i',1)
+ print '-- rname:', str([f.read(lrn)])[1:-1]
+
+ f.read(blockSize-32-lrn)
+ #print [block]
+
+
+f.close()
b
diff -r 000000000000 -r 6e75a84e9338 utilities/validateFQ.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/validateFQ.py Tue May 15 02:39:53 2018 -0400
[
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+#
+# A quickie tool for validating the correctness of a FASTQ file
+#
+# python validateFQ.py read1.fq [read2.fq]
+#
+
+import sys
+
+def get4lines(fn):
+ l1 = fn.readline().strip()
+ l2 = fn.readline().strip()
+ l3 = fn.readline().strip()
+ l4 = fn.readline().strip()
+ if any([l1,l2,l3,l4]) and not all([l1,l2,l3,l4]):
+ print '\nError: missing lines:\n'
+ print l1+'\n'+l2+'\n'+l3+'\n'+l4+'\n'
+ exit(1)
+ return (l1,l2,l3,l4)
+
+ALLOWED_QUAL = '!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJ'
+ALLOWED_NUCL = 'ACGTN'
+
+def validate4lines(l1,l2,l3,l4):
+ failed = 0
+ # make sure lines contain correct delimiters
+ if l1[0] != '@' or l1[-2] != '/' or l3[0] != '+':
+ failed = 1
+ # make sure seq len == qual length
+ if len(l2) != len(l4):
+ failed = 2
+ # make sure seq string contains only valid characters
+ for n in l2:
+ if n not in ALLOWED_NUCL:
+ failed = 3
+ # make sure qual string contains only valid characters
+ for n in l4:
+ if n not in ALLOWED_QUAL:
+ failed = 4
+ if failed:
+ print '\nError: malformed lines:'
+ if failed == 1: print ' ---- invalid delimiters\n'
+ elif failed == 2: print ' ---- seq len != qual len\n'
+ elif failed == 3: print ' ---- seq contains invalid characters\n'
+ elif failed == 4: print ' ---- qual contains invalid characters\n'
+ print l1+'\n'+l2+'\n'+l3+'\n'+l4+'\n'
+ exit(1)
+
+f1 = open(sys.argv[1],'r')
+(l1_r1, l2_r1, l3_r1, l4_r1) = get4lines(f1)
+f2 = None
+if len(sys.argv) == 3:
+ f2 = open(sys.argv[2],'r')
+ (l1_r2, l2_r2, l3_r2, l4_r2) = get4lines(f2)
+
+while l1_r1:
+ # check line syntax
+ validate4lines(l1_r1,l2_r1,l3_r1,l4_r1)
+ if f2 != None:
+ validate4lines(l1_r2,l2_r2,l3_r2,l4_r2)
+ # make sure seq id is same for r1/r2
+ if l1_r1[:-1] != l1_r2[:-1]:
+ print '\nError: mismatched r1/r2 name:\n'
+ print l1_r1+'\n'+l1_r2+'\n'
+ exit(1)
+
+ # grab next 4 lines...
+ (l1_r1, l2_r1, l3_r1, l4_r1) = get4lines(f1)
+ if f2 != None:
+ (l1_r2, l2_r2, l3_r2, l4_r2) = get4lines(f2)
+
+if f2 != None:
+ f2.close()
+f1.close()
+
+print '\nPASSED WITH FLYING COLORS. GOOD DAY.\n'
+
b
diff -r 000000000000 -r 6e75a84e9338 utilities/vcf_compare_OLD.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utilities/vcf_compare_OLD.py Tue May 15 02:39:53 2018 -0400
[
b'@@ -0,0 +1,721 @@\n+#!/usr/bin/env python\n+# encoding: utf-8\n+\n+""" **************************************************\n+\n+vcf_compare.py\n+\n+- compare vcf file produced by workflow to golden vcf produced by simulator\n+\n+Written by:\t\tZach Stephens\n+Date:\t\t\tJanuary 20, 2015\n+Contact:\t\tzstephe2@illinois.edu\n+\n+************************************************** """\n+\n+import sys\n+import os\n+import copy\n+import time\n+import bisect\n+import re\n+import numpy as np\n+import optparse\n+\n+\n+EV_BPRANGE = 50\t\t# how far to either side of a particular variant location do we want to check for equivalents?\n+\n+DEFAULT_QUAL = -666\t# if we can\'t find a qual score, use this instead so we know it\'s missing\n+\n+MAX_VAL = 9999999999999\t# an unreasonably large value that no reference fasta could concievably be longer than\n+\n+DESC   = """%prog: vcf comparison script."""\n+VERS   = 0.1\n+\n+PARSER = optparse.OptionParser(\'python %prog [options] -r <ref.fa> -g <golden.vcf> -w <workflow.vcf>\',description=DESC,version="%prog v"+str(VERS))\n+\n+PARSER.add_option(\'-r\', help=\'* Reference Fasta\', dest=\'REFF\', action=\'store\', metavar=\'<ref.fa>\')\n+PARSER.add_option(\'-g\', help=\'* Golden VCF\',      dest=\'GVCF\', action=\'store\', metavar=\'<golden.vcf>\')\n+PARSER.add_option(\'-w\', help=\'* Workflow VCF\',    dest=\'WVCF\', action=\'store\', metavar=\'<workflow.vcf>\')\n+PARSER.add_option(\'-o\', help=\'* Output Prefix\',   dest=\'OUTF\', action=\'store\', metavar=\'<prefix>\')\n+PARSER.add_option(\'-m\', help=\'Mappability Track\', dest=\'MTRK\', action=\'store\', metavar=\'<track.bed>\')\n+PARSER.add_option(\'-M\', help=\'Maptrack Min Len\',  dest=\'MTMM\', action=\'store\', metavar=\'<int>\')\n+PARSER.add_option(\'-t\', help=\'Targetted Regions\', dest=\'TREG\', action=\'store\', metavar=\'<regions.bed>\')\n+PARSER.add_option(\'-T\', help=\'Min Region Len\',    dest=\'MTRL\', action=\'store\', metavar=\'<int>\')\n+PARSER.add_option(\'-c\', help=\'Coverage Filter Threshold [%default]\',       dest=\'DP_THRESH\', default=15, action=\'store\', metavar=\'<int>\')\n+PARSER.add_option(\'-a\', help=\'Allele Freq Filter Threshold [%default]\',    dest=\'AF_THRESH\', default=0.3, action=\'store\', metavar=\'<float>\')\n+\n+PARSER.add_option(\'--vcf-out\',   help="Output Match/FN/FP variants [%default]",       dest=\'VCF_OUT\', default=False, action=\'store_true\')\n+PARSER.add_option(\'--no-plot\',   help="No plotting [%default]",                       dest=\'NO_PLOT\', default=False, action=\'store_true\')\n+PARSER.add_option(\'--incl-homs\', help="Include homozygous ref calls [%default]",      dest=\'INCL_H\',  default=False, action=\'store_true\')\n+PARSER.add_option(\'--incl-fail\', help="Include calls that failed filters [%default]", dest=\'INCL_F\',  default=False, action=\'store_true\')\n+PARSER.add_option(\'--fast\',      help="No equivalent variant detection [%default]",   dest=\'FAST\',    default=False, action=\'store_true\')\n+\n+(OPTS,ARGS) = PARSER.parse_args()\n+\n+REFERENCE    = OPTS.REFF\n+GOLDEN_VCF   = OPTS.GVCF\n+WORKFLOW_VCF = OPTS.WVCF\n+OUT_PREFIX   = OPTS.OUTF\n+MAPTRACK     = OPTS.MTRK\n+MIN_READLEN  = OPTS.MTMM\n+BEDFILE      = OPTS.TREG\n+DP_THRESH    = int(OPTS.DP_THRESH)\n+AF_THRESH    = float(OPTS.AF_THRESH)\n+\n+VCF_OUT      = OPTS.VCF_OUT\n+NO_PLOT      = OPTS.NO_PLOT\n+INCLUDE_HOMS = OPTS.INCL_H\n+INCLUDE_FAIL = OPTS.INCL_F\n+FAST         = OPTS.FAST\n+\n+if len(sys.argv[1:]) == 0:\n+\tPARSER.print_help()\n+\texit(1)\n+\n+if OPTS.MTRL != None:\n+\tMINREGIONLEN = int(OPTS.MTRL)\n+else:\n+\tMINREGIONLEN = None\n+\n+if MIN_READLEN == None:\n+\tMIN_READLEN = 0\n+else:\n+\tMIN_READLEN = int(MIN_READLEN)\n+\n+if REFERENCE == None:\n+\tprint \'Error: No reference provided.\'\n+\texit(1)\n+if GOLDEN_VCF == None:\n+\tprint \'Error: No golden VCF provided.\'\n+\texit(1)\n+if WORKFLOW_VCF == None:\n+\tprint \'Error: No workflow VCF provided.\'\n+\texit(1)\n+if OUT_PREFIX == None:\n+\tprint \'Error: No output prefix provided.\'\n+\texit(1)\n+if (BEDFILE != None and MINREGIONLEN == None) or (BEDFILE == None and MINREGIONLEN != None):\n+\tprint \'Error: Both -t and -T must be specified\'\n+\texit(1)\n+\n+if NO_PLOT == False:\n+\timport matplotlib\n+\tmatplotlib.us'..b's[refName]):\n+\t\t\t\t\tif mappability_tracks[refName][var[0]]:\n+\t\t\t\t\t\tmappability_vs_FN[1] += 1\n+\t\t\t\t\t\tvenn_data[i][0] = 1\n+\t\t\t\t\t\tnoReason = False\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tmappability_vs_FN[0] += 1\n+\n+\t\t\t#\tcoverage?\n+\t\t\tif var in correctCov:\n+\t\t\t\tc = correctCov[var]\n+\t\t\t\tif c != None:\n+\t\t\t\t\tif c not in coverage_vs_FN:\n+\t\t\t\t\t\tcoverage_vs_FN[c] = 0\n+\t\t\t\t\tcoverage_vs_FN[c] += 1\n+\t\t\t\t\tif c < DP_THRESH:\n+\t\t\t\t\t\tvenn_data[i][1] = 1\n+\t\t\t\t\t\tnoReason = False\n+\n+\t\t\t#\theterozygous genotype messing things up?\n+\t\t\t#if var in correctAF:\n+\t\t\t#\ta = correctAF[var]\n+\t\t\t#\tif a != None:\n+\t\t\t#\t\ta = AF_KEYS[quantize_AF(a)]\n+\t\t\t#\t\tif a not in alleleBal_vs_FN:\n+\t\t\t#\t\t\talleleBal_vs_FN[a] = 0\n+\t\t\t#\t\talleleBal_vs_FN[a] += 1\n+\t\t\t#\t\tif a < AF_THRESH:\n+\t\t\t#\t\t\tvenn_data[i][2] = 1\n+\n+\t\t\t#\tno reason?\n+\t\t\tif noReason:\n+\t\t\t\tvenn_data[i][2] += 1\n+\n+\t\tfor i in xrange(len(notFound)):\n+\t\t\tif venn_data[i][0]: set1.append(i+varAdj)\n+\t\t\tif venn_data[i][1]: set2.append(i+varAdj)\n+\t\t\tif venn_data[i][2]: set3.append(i+varAdj)\n+\t\tvarAdj += len(notFound)\n+\n+\t\t#\n+\t\t#\tif desired, write out vcf files.\n+\t\t#\n+\t\tnotFound   = sorted(notFound)\n+\t\tFPvariants = sorted(FPvariants)\n+\t\tif VCF_OUT:\n+\t\t\tfor line in open(GOLDEN_VCF,\'r\'):\n+\t\t\t\tif line[0] != \'#\':\n+\t\t\t\t\tsplt = line.split(\'\\t\')\n+\t\t\t\t\tif splt[0] == refName:\n+\t\t\t\t\t\tvar  = (int(splt[1]),splt[3],splt[4])\n+\t\t\t\t\t\tif var in notFound:\n+\t\t\t\t\t\t\tvcfo2.write(line)\n+\t\t\tfor line in open(WORKFLOW_VCF,\'r\'):\n+\t\t\t\tif line[0] != \'#\':\n+\t\t\t\t\tsplt = line.split(\'\\t\')\n+\t\t\t\t\tif splt[0] == refName:\n+\t\t\t\t\t\tvar  = (int(splt[1]),splt[3],splt[4])\n+\t\t\t\t\t\tif var in FPvariants:\n+\t\t\t\t\t\t\tvcfo3.write(line)\n+\n+\t\tprint \'{0:.3f} (sec)\'.format(time.time()-tt)\n+\n+\t#\n+\t#\tclose vcf output\n+\t#\n+\tprint \'\'\n+\tif VCF_OUT:\n+\t\tprint OUT_PREFIX+\'_FN.vcf\'\n+\t\tprint OUT_PREFIX+\'_FP.vcf\'\n+\t\tvcfo2.close()\n+\t\tvcfo3.close()\n+\n+\t#\n+\t#\tplot some FN stuff\n+\t#\n+\tif NO_PLOT == False:\n+\t\tnDetected = len(set(set1+set2+set3))\n+\t\tset1 = set(set1)\n+\t\tset2 = set(set2)\n+\t\tset3 = set(set3)\n+\n+\t\tif len(set1): s1 = \'Unmappable\'\n+\t\telse: s1 = \'\'\n+\t\tif len(set2): s2 = \'DP < \'+str(DP_THRESH)\n+\t\telse: s2 = \'\'\n+\t\t#if len(set3): s3 = \'AF < \'+str(AF_THRESH)\n+\t\tif len(set3): s3 = \'Unknown\'\n+\t\telse: s3 = \'\'\n+\n+\t\tmpl.figure(0)\n+\t\ttstr1 = \'False Negative Variants (Missed Detections)\'\n+\t\t#tstr2 = str(nDetected)+\' / \'+str(znF)+\' FN variants categorized\'\n+\t\ttstr2 = \'\'\n+\t\tif MAPTRACK != None:\n+\t\t\tv = venn3([set1, set2, set3], (s1, s2, s3))\n+\t\telse:\n+\t\t\tv = venn2([set2, set3], (s2, s3))\n+\t\tmpl.figtext(0.5,0.95,tstr1,fontdict={\'size\':14,\'weight\':\'bold\'},horizontalalignment=\'center\')\n+\t\tmpl.figtext(0.5,0.03,tstr2,fontdict={\'size\':14,\'weight\':\'bold\'},horizontalalignment=\'center\')\n+\n+\t\touf = OUT_PREFIX+\'_FNvenn.pdf\'\n+\t\tprint ouf\n+\t\tmpl.savefig(ouf)\n+\n+\t#\n+\t#\tspit out results to console\n+\t#\n+\tprint \'\\n**********************************\\n\'\n+\tif BEDFILE != None:\n+\t\tprint \'ONLY CONSIDERING VARIANTS FOUND WITHIN TARGETED REGIONS\\n\\n\'\n+\tprint \'Total Golden Variants:  \',ztV,\'\\t[\',zgF,\'filtered,\',zgM,\'merged,\',zgR,\'redundant ]\'\n+\tprint \'Total Workflow Variants:\',ztW,\'\\t[\',zwF,\'filtered,\',zwM,\'merged,\',zwR,\'redundant ]\'\n+\tprint \'\'\n+\tif ztV > 0 and ztW > 0:\n+\t\tprint \'Perfect Matches:\',znP,\'({0:.2f}%)\'.format(100.*float(znP)/ztV)\n+\t\tprint \'FN variants:    \',znF,\'({0:.2f}%)\'.format(100.*float(znF)/ztV)\n+\t\tprint \'FP variants:    \',zfP#,\'({0:.2f}%)\'.format(100.*float(zfP)/ztW)\n+\tif FAST == False:\n+\t\tprint \'\\nNumber of equivalent variants denoted differently between the two vcfs:\',znE\n+\tif BEDFILE != None:\n+\t\tprint \'\\nNumber of golden variants located in targeted regions that were too small to be sampled from:\',zbM\n+\tif FAST:\n+\t\tprint "\\nWarning! Running with \'--fast\' means that identical variants denoted differently between the two vcfs will not be detected! The values above may be lower than the true accuracy."\n+\t#if NO_PLOT:\n+\tif True:\n+\t\tprint \'\\n#unmappable:  \',len(set1)\n+\t\tprint \'#low_coverage:\',len(set2)\n+\t\tprint \'#unknown:     \',len(set3)\n+\tprint \'\\n**********************************\\n\'\n+\n+\n+\n+\n+\n+if __name__ == \'__main__\':\n+\tmain()\n'