Repository 'medaka_variant_pipeline'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/medaka_variant_pipeline

Changeset 10:7623e5888be9 (2021-04-22)
Previous changeset 9:336b3def9b2b (2021-03-29) Next changeset 11:11fedf536104 (2021-09-12)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 0faf0ade3f13d7c78d93869823ea9fdf25c21b13"
modified:
convert_VCF_info_fields.py
macros.xml
medaka_variant.xml
test-data/all_fasta.loc
removed:
annotateVCF.py
test-data/bwa-mem-mt-genome.fa
test-data/bwa-mem-mt-genome.fa.fai
b
diff -r 336b3def9b2b -r 7623e5888be9 annotateVCF.py
--- a/annotateVCF.py Mon Mar 29 20:06:01 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,409 +0,0 @@\n-#!/usr/bin/env python3\n-\n-# Takes in VCF file and a samtools mpileup output file\n-# Fills in annotation for the VCF file including AF, DP\n-# SB, and DP4\n-#\n-# Usage statement:\n-# python annotateVCF.py in_vcf.vcf in_mpileup.txt out_vcf.vcf\n-#\n-# Can generate in_mileup.txt with samtools mpileup (and can restrict which sites to generate pileups for with in_vcf.vcf)\n-\n-# 08/24/2020 - Nathan P. Roach, natproach@gmail.com\n-\n-import sys\n-from math import isnan, log10\n-\n-from scipy.stats import fisher_exact\n-\n-\n-def pval_to_phredqual(pval):\n-    return int(round(-10. * log10(pval)))\n-\n-\n-def parseSimpleSNPpileup(fields, ref_base, alt_base):\n-    base_to_idx = {\n-        \'A\': 0,\n-        \'a\': 0,\n-        \'T\': 1,\n-        \'t\': 1,\n-        \'C\': 2,\n-        \'c\': 2,\n-        \'G\': 3,\n-        \'g\': 3\n-    }\n-\n-    base_to_idx_stranded = {\n-        \'A\': 0,\n-        \'T\': 1,\n-        \'C\': 2,\n-        \'G\': 3,\n-        \'a\': 4,\n-        \'t\': 5,\n-        \'c\': 6,\n-        \'g\': 7\n-    }\n-    ref_base2 = fields[2]\n-    counts = [0, 0, 0, 0]\n-    stranded_counts = [0, 0, 0, 0, 0, 0, 0, 0]\n-    ref_idx = base_to_idx[fields[2]]\n-    dp = int(fields[3])\n-    carrot_flag = False\n-    ins_flag = False\n-    ins_str = ""\n-    ins_len = 0\n-    insertion = ""\n-    del_flag = False\n-    del_str = ""\n-    del_len = 0\n-    deletion = ""\n-    # dollar_flag = False\n-    for base in fields[4]:\n-        if carrot_flag:\n-            carrot_flag = False\n-            continue\n-        if ins_len > 0:\n-            insertion += base\n-            ins_len -= 1\n-            continue\n-        if del_len > 0:\n-            deletion += base\n-            del_len -= 1\n-            continue\n-        if ins_flag:\n-            if base.isdigit():\n-                ins_str += base\n-            else:\n-                ins_len = int(ins_str) - 1\n-                ins_str = ""\n-                insertion = base\n-                ins_flag = False\n-        elif del_flag:\n-            if base.isdigit():\n-                del_str += base\n-            else:\n-                del_len = int(del_str) - 1\n-                del_str = ""\n-                deletion = base\n-                del_flag = False\n-        else:\n-            if base == \'^\':\n-                carrot_flag = True\n-                continue\n-            elif base == \'$\':\n-                continue\n-            elif base == \'+\':\n-                ins_flag = True\n-            elif base == \'-\':\n-                del_flag = True\n-            elif base == \'.\':\n-                counts[ref_idx] += 1\n-                stranded_counts[base_to_idx_stranded[ref_base2]] += 1\n-            elif base == \',\':\n-                counts[ref_idx] += 1\n-                stranded_counts[base_to_idx_stranded[ref_base2.lower()]] += 1\n-            elif base == \'N\' or base == \'n\':\n-                continue\n-            elif base == \'*\':\n-                continue\n-            else:\n-                counts[base_to_idx[base]] += 1\n-                stranded_counts[base_to_idx_stranded[base]] += 1\n-    if sum(counts) == 0:\n-        af = float("nan")\n-    else:\n-        af = float(counts[base_to_idx[alt_base]]) / float(sum(counts))\n-    if float(sum(stranded_counts[0:4])) == 0:\n-        faf = float("nan")\n-    else:\n-        faf = float(stranded_counts[base_to_idx_stranded[alt_base]]) / float(sum(stranded_counts[0:4]))\n-    if float(sum(stranded_counts[4:])) == 0:\n-        raf = float("nan")\n-    else:\n-        raf = float(stranded_counts[base_to_idx_stranded[alt_base.lower()]]) / float(sum(stranded_counts[4:]))\n-    dp4 = [stranded_counts[base_to_idx_stranded[ref_base]],\n-           stranded_counts[base_to_idx_stranded[ref_base.lower()]],\n-           stranded_counts[base_to_idx_stranded[alt_base]],\n-           stranded_counts[base_to_idx_stranded[alt_base.lower()]]]\n-    return (dp, af, faf, raf, dp4)\n-\n-\n-def parseIndelPileup(fields, ref_base, alt_base):\n-    counts = [0, 0, 0, 0, 0, 0, 0, 0, 0]  # indel ref match, indel fwd ref match, indel rev ref match, '..b'    out_vcf.write(line)\n-        elif line[0] == "#":\n-            out_vcf.write("##annotateVCFVersion=0.2\\n")\n-            out_vcf.write("##INFO=<ID=DP,Number=1,Type=Integer,Description=\\"Raw Depth\\">\\n")\n-            out_vcf.write("##INFO=<ID=AF,Number=1,Type=Float,Description=\\"Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=FAF,Number=1,Type=Float,Description=\\"Forward Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=RAF,Number=1,Type=Float,Description=\\"Reverse Allele Frequency\\">\\n")\n-            out_vcf.write("##INFO=<ID=SB,Number=1,Type=Integer,Description=\\"Phred-scaled strand bias at this position\\">\\n")\n-            out_vcf.write("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\\">\\n")\n-            out_vcf.write(line)\n-        else:\n-            fields = line.strip().split()\n-            if fields[0] in to_examine:\n-                to_examine[fields[0]][int(fields[1])] = (fields[3], fields[4])\n-            else:\n-                to_examine[fields[0]] = {int(fields[1]): (fields[3], fields[4])}\n-    in_vcf.close()\n-    data = {}\n-\n-    # Populate data dictionary, which relates chromosome and position to the following:\n-    #  depth of coverage\n-    #  allele frequency\n-    #  forward strand allele frequency\n-    #  reverse strand allele frequency\n-    #  dp4 - depth of coverage of ref allele fwd strand, DOC of ref allele rev strand, DOC of alt allele fwd strand, DOC of alt allele rev strand\n-    for line in in_mpileup:\n-        fields = line.strip().split()\n-        if fields[0] not in to_examine:\n-            continue\n-        if int(fields[1]) not in to_examine[fields[0]]:\n-            continue\n-        (ref_base, alt_base) = to_examine[fields[0]][int(fields[1])]\n-        if len(ref_base.split(\',\')) > 1:  # Can\'t handle multiple ref alleles\n-            continue\n-        if len(alt_base.split(\',\')) > 1:  # Can\'t handle multiple alt alleles\n-            continue\n-        if len(ref_base) > 1 or len(alt_base) > 1:\n-            if len(ref_base) > 1 and len(alt_base) > 1:  # Can\'t handle complex indels\n-                continue\n-            data[(fields[0], int(fields[1]))] = parseIndelPileup(fields, ref_base, alt_base)\n-        if len(ref_base) == 1 and len(alt_base) == 1:\n-            data[(fields[0], int(fields[1]))] = parseSimpleSNPpileup(fields, ref_base, alt_base)\n-    in_mpileup.close()\n-    # Reopen vcf, this time, skip header, annotate all the sites for which there is an entry in data dictionary\n-    # (Sites without entries have either multiple ref or alt bases, or have complex indels. Not supported (for now), and not reported as a result)\n-    in_vcf = open(in_vcf_filepath, \'r\')\n-    for line in in_vcf:\n-        if line[0] == \'#\':\n-            continue\n-        fields = line.strip().split(\'\\t\')\n-        if (fields[0], int(fields[1])) not in data:\n-            continue\n-        (dp, af, faf, raf, dp4) = data[(fields[0], int(fields[1]))]\n-        dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]\n-        _, p_val = fisher_exact(dp2x2)\n-        sb = pval_to_phredqual(p_val)\n-        if fields[7] == "":\n-            info = []\n-        else:\n-            info = fields[7].split(\';\')\n-        info.append("DP=%d" % (dp))\n-        if isnan(af):\n-            info.append("AF=NaN")\n-        else:\n-            info.append("AF=%.6f" % (af))\n-        if isnan(faf):\n-            info.append("FAF=NaN")\n-        else:\n-            info.append("FAF=%.6f" % (faf))\n-        if isnan(raf):\n-            info.append("RAF=NaN")\n-        else:\n-            info.append("RAF=%.6f" % (raf))\n-        info.append("SB=%d" % (sb))\n-        info.append("DP4=%s" % (\',\'.join([str(x) for x in dp4])))\n-        new_info = \';\'.join(info)\n-        fields[7] = new_info\n-        out_vcf.write("%s\\n" % ("\\t".join(fields)))\n-    in_vcf.close()\n-    out_vcf.close()\n-\n-\n-if __name__ == "__main__":\n-    annotateVCF(sys.argv[1], sys.argv[2], sys.argv[3])\n'
b
diff -r 336b3def9b2b -r 7623e5888be9 convert_VCF_info_fields.py
--- a/convert_VCF_info_fields.py Mon Mar 29 20:06:01 2021 +0000
+++ b/convert_VCF_info_fields.py Thu Apr 22 20:24:49 2021 +0000
[
@@ -11,7 +11,8 @@
 from collections import OrderedDict
 from math import log10
 
-from scipy.stats import fisher_exact
+import scipy
+import scipy.stats
 
 
 def pval_to_phredqual(pval):
@@ -69,7 +70,7 @@
                 for j, i in enumerate(range(2, len(sr_list), 2)):
                     dp4 = (sr_list[ref_fwd], sr_list[ref_rev], sr_list[i], sr_list[i + 1])
                     dp2x2 = [[dp4[0], dp4[1]], [dp4[2], dp4[3]]]
-                    _, p_val = fisher_exact(dp2x2)
+                    _, p_val = scipy.stats.fisher_exact(dp2x2)
                     sb = pval_to_phredqual(p_val)
 
                     as_ = (sc_list[ref_fwd], sc_list[ref_rev], sc_list[i], sc_list[i + 1])
b
diff -r 336b3def9b2b -r 7623e5888be9 macros.xml
--- a/macros.xml Mon Mar 29 20:06:01 2021 +0000
+++ b/macros.xml Thu Apr 22 20:24:49 2021 +0000
b
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">1.0.3</token>
-    <token name="@PROFILE@">18.01</token>
+    <token name="@TOOL_VERSION@">1.3.2</token>
+    <token name="@PROFILE@">20.01</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">medaka</requirement>
@@ -48,29 +48,32 @@
     </xml>
     <xml name="model" token_argument="-m" token_label="Select model">
         <param argument="@ARGUMENT@" type="select" label="@LABEL@">
-            <option value="r10_min_high_g303">r10_min_high_g303</option>
-            <option value="r10_min_high_g340">r10_min_high_g340</option>
             <option value="r103_min_high_g345">r103_min_high_g345</option>
             <option value="r103_min_high_g360">r103_min_high_g360</option>
             <option value="r103_prom_high_g360">r103_prom_high_g360</option>
             <option value="r103_prom_snp_g3210">r103_prom_snp_g3210</option>
             <option value="r103_prom_variant_g3210">r103_prom_variant_g3210</option>
+            <option value="r10_min_high_g303">r10_min_high_g303</option>
+            <option value="r10_min_high_g340">r10_min_high_g340</option>
             <option value="r941_min_fast_g303">r941_min_fast_g303</option>
             <option value="r941_min_high_g303">r941_min_high_g303</option>
             <option value="r941_min_high_g330">r941_min_high_g330</option>
             <option value="r941_min_high_g340_rle">r941_min_high_g340_rle</option>
             <option value="r941_min_high_g344">r941_min_high_g344</option>
             <option value="r941_min_high_g351">r941_min_high_g351</option>
-            <option value="r941_min_high_g360">r941_min_high_g360</option>
+            <option value="r941_min_high_g360" selected="true">r941_min_high_g360</option>
             <option value="r941_prom_fast_g303">r941_prom_fast_g303</option>
             <option value="r941_prom_high_g303">r941_prom_high_g303</option>
             <option value="r941_prom_high_g330">r941_prom_high_g330</option>
             <option value="r941_prom_high_g344">r941_prom_high_g344</option>
-            <option value="r941_prom_high_g360" selected="true">r941_prom_high_g360</option>
+            <option value="r941_prom_high_g360">r941_prom_high_g360</option>
+            <option value="r941_prom_high_g4011">r941_prom_high_g4011</option>
             <option value="r941_prom_snp_g303">r941_prom_snp_g303</option>
             <option value="r941_prom_snp_g322">r941_prom_snp_g322</option>
+            <option value="r941_prom_snp_g360">r941_prom_snp_g360</option>
             <option value="r941_prom_variant_g303">r941_prom_variant_g303</option>
             <option value="r941_prom_variant_g322">r941_prom_variant_g322</option>
+            <option value="r941_prom_variant_g360">r941_prom_variant_g360</option>
         </param>
     </xml>
     <xml name="reference">
b
diff -r 336b3def9b2b -r 7623e5888be9 medaka_variant.xml
--- a/medaka_variant.xml Mon Mar 29 20:06:01 2021 +0000
+++ b/medaka_variant.xml Thu Apr 22 20:24:49 2021 +0000
b
@@ -1,4 +1,4 @@
-<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy2" profile="@PROFILE@">
+<tool id="medaka_variant_pipeline" name="medaka variant pipeline" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
     <description>via neural networks</description>
     <macros>
         <import>macros.xml</import>
@@ -41,8 +41,8 @@
         <expand macro="model" argument="-s" label="Select model for initial SNP calling from mixed reads prior to phasing"/>
         <expand macro="model" argument="-m" label="Select model for final variant calling from phased reads"/>
         <expand macro="b"/>
-        <param argument="-N" type="integer" value="14" label="Set threshold for filtering indels in final VCF"/>
-        <param argument="-P" type="integer" value="12" label="Set threshold for filtering SNPs in final VCF"/>
+        <param argument="-N" type="integer" value="9" label="Set threshold for filtering indels in final VCF"/>
+        <param argument="-P" type="integer" value="8" label="Set threshold for filtering SNPs in final VCF"/>
         <param argument="-U" type="boolean" truevalue="-U" falsevalue="" label="Avoid filtering of final VCF?"/>
         <param argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Stop after initial SNP calling from mixed reads prior to phasing?"/>
         <param name="out" type="select" multiple="true" optional="false" label="Select out file(s)5">
@@ -105,21 +105,21 @@
     <tests>
         <!-- #1 default -->
         <test>
-            <param name="i" value="alignment.bam"/>
+            <param name="i" value="medaka_test.bam"/>
             <conditional name="reference_source">
                 <param name="reference_source_selector" value="cached"/>
-                <param name="ref_file" value="bwa-mem-mt-genome"/>
+                <param name="ref_file" value="ref_fasta"/>
             </conditional>
             <param name="out" value="round_0_hap_mixed_probs.hdf,round_0_hap_mixed_unphased.vcf,log"/>
             <output name="out_round_0_hap_mixed_unphased_vcf">
                 <assert_contents>
-                    <has_n_lines n="6"/>
+                    <has_n_lines n="7"/>
                     <has_line line="##fileformat=VCFv4.1"/>
                 </assert_contents>
             </output>
             <output name="out_round_0_hap_mixed_probs_hdf">
                 <assert_contents>
-                    <has_size value="32624"/>
+                    <has_size value="108753" delta="100"/>
                 </assert_contents>
             </output>
             <output name="out_log">
b
diff -r 336b3def9b2b -r 7623e5888be9 test-data/all_fasta.loc
--- a/test-data/all_fasta.loc Mon Mar 29 20:06:01 2021 +0000
+++ b/test-data/all_fasta.loc Thu Apr 22 20:24:49 2021 +0000
b
@@ -1,1 +1,1 @@
-bwa-mem-mt-genome bwa-mem-mt-genome bwa-mem-mt-genome ${__HERE__}/bwa-mem-mt-genome.fa
\ No newline at end of file
+ref_fasta ref_fasta ref_fasta ${__HERE__}/ref.fasta
\ No newline at end of file
b
diff -r 336b3def9b2b -r 7623e5888be9 test-data/bwa-mem-mt-genome.fa
--- a/test-data/bwa-mem-mt-genome.fa Mon Mar 29 20:06:01 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,238 +0,0 @@\n->gi|251831106|ref|NC_012920.1|\n-GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG\n-GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC\n-CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA\n-ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC\n-ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA\n-AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n-TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA\n-CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC\n-AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC\n-ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA\n-GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC\n-AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n-ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA\n-TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT\n-CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC\n-ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC\n-AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC\n-CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n-CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC\n-AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT\n-GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA\n-AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA\n-TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA\n-GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n-GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG\n-ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA\n-ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC\n-AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA\n-AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT\n-AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n-CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG\n-TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC\n-ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG\n-TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC\n-AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA\n-AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n-ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA\n-AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT\n-TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA\n-TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT\n-AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG\n-TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n-GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG\n-ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG\n-AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT\n-ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA\n-ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT\n-TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n-TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC\n-CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC\n-ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC\n-CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG\n-CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC\n-AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n-TCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCA'..b'CAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA\n-CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA\n-TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT\n-TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA\n-CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n-CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA\n-TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC\n-TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC\n-AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC\n-CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC\n-CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n-ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA\n-AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC\n-TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC\n-ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC\n-TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA\n-CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n-AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA\n-TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT\n-AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA\n-ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA\n-TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA\n-CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n-CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC\n-TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC\n-CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC\n-CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG\n-ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC\n-CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n-CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC\n-CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA\n-ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA\n-CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC\n-ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC\n-TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n-AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC\n-TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA\n-GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT\n-CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC\n-ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT\n-TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n-TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA\n-GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC\n-AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT\n-AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC\n-TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC\n-CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n-GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC\n-ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA\n-TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA\n-ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA\n-ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG\n-TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n-TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG\n-CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC\n-ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG\n'
b
diff -r 336b3def9b2b -r 7623e5888be9 test-data/bwa-mem-mt-genome.fa.fai
--- a/test-data/bwa-mem-mt-genome.fa.fai Mon Mar 29 20:06:01 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-gi|251831106|ref|NC_012920.1| 16569 31 70 71