Mercurial > repos > iuc > mummer_nucmer
changeset 4:7cd7a55a678d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mummer4 commit 026db7297e987c1b7ce7f5dd4f8746d1bd435538
author | iuc |
---|---|
date | Mon, 18 Mar 2024 12:41:25 +0000 |
parents | e18267f90096 |
children | |
files | macros.xml nucmer.xml test-data/1delta.txt test-data/delta.txt test-data/mdelta.txt test-data/nucmer.txt test-data/out.bam test-data/out.cram test-data/out.sam test-data/report.txt |
diffstat | 10 files changed, 180 insertions(+), 107 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Sat Nov 27 09:59:16 2021 +0000 +++ b/macros.xml Mon Mar 18 12:41:25 2024 +0000 @@ -2,7 +2,7 @@ <token name="@MUMMER_GNUPLOT_MANUAL@"><![CDATA[&& gnuplot < out.gp]]></token> <xml name="bio_tools"> <xrefs> - <xref type="bio.tools">mumer4</xref> + <xref type="bio.tools">mummer4</xref> </xrefs> </xml> <xml name="citation"> @@ -10,15 +10,16 @@ <citation type="doi">10.1371/journal.pcbi.1005944</citation> </citations> </xml> + <token name="@TOOL_VERSION@">4.0.0rc1</token> + <token name="@VERSION_SUFFIX@">3</token> + <token name="@PROFILE@">20.05</token> <xml name="gnuplot_requirement"> - <requirement type="package" version="5.4.1">gnuplot</requirement> + <requirement type="package" version="5.4.8">gnuplot</requirement> </xml> - <token name="@TOOL_VERSION@">4.0.0rc1</token> - <token name="@VERSION_SUFFIX@">2</token> - <token name="@PROFILE@">20.05</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">mummer4</requirement> + <requirement type="package" version="1.19.2">samtools</requirement> <yield /> </requirements> </xml> @@ -30,13 +31,13 @@ <option value="">Color</option> <option value="-color">No color (-color)</option> </param> - <param name="coverage" type="select" label="Coverage Plot" help="Generate a reference coverage plot (default for .tiling) or the defualt dotplot." > + <param name="coverage" type="select" label="Coverage Plot" help="Generate a reference coverage plot (default for .tiling) or the default dotplot." > <option value="">Dotplot</option> <option value="-c">Coverage Plot (-c)</option> </param> - <param name="filter" type="boolean" argument="--filter" truevalue="--filter" falsevalue="" label="Filter" + <param type="boolean" argument="--filter" truevalue="--filter" falsevalue="" label="Filter" help="Only display .delta alignments which represent the 'best' hit to any particular spot on either sequence, i.e. a one-to-one mapping of reference and query subsequences. (--filter)" /> - <param name="fat" type="boolean" argument="--fat" truevalue="--fat" falsevalue="" label="Layout sequences using fattest alignment only" help="(--fat)" /> + <param type="boolean" argument="--fat" truevalue="--fat" falsevalue="" label="Layout sequences using fattest alignment only" help="(--fat)" /> <conditional name="labels" > <param name="IDs" type="select" label="Plot a particular reference or query sequence?" help="For alignments that used more than one reference/query." > <option value="no">NO</option> @@ -54,7 +55,7 @@ <option value="large">Large</option> </param> <param name="snp" type="boolean" argument="--SNP" truevalue="--SNP" falsevalue="" label="SNPs" help="Highlight SNP locations in each alignment. (--SNP)" /> - <param name="title" type="text" argument="-title" value="Title" label="Plot Title" help="(-title)" /> + <param type="text" argument="-title" value="Title" label="Plot Title" help="(-title)" /> <conditional name="range" > <param name="custom" type="select" label="Choose custom X and Y axis ranges?" > <option value="no">NO</option>
--- a/nucmer.xml Sat Nov 27 09:59:16 2021 +0000 +++ b/nucmer.xml Mon Mar 18 12:41:25 2024 +0000 @@ -11,8 +11,11 @@ <![CDATA[ ln -s $reference_sequence reference.fa && ln -s $query_sequence query.fa && - nucmer + nucmer $anchoring + #if $outform.out_format != "delta": + --sam-long=outsam.sam + #end if -b '$breaklen' -c '$mincluster' -D '$diagdiff' @@ -31,111 +34,145 @@ $options.genome -M '$options.max_chunk' #end if - 'reference.fa' 'query.fa' - #if $mumplot.plot == 'yes': + 'reference.fa' 'query.fa' + #if $outform.out_format == "delta": + #if $mumplot.plot == 'yes' : && mummerplot - #if $mumplot.sequences.seq_input == 'yes': + #if $outform.mumplot.sequences.seq_input == 'yes': -R '$reference_sequence' -Q '$query_sequence' - $mumplot.sequences.layout + $outform.mumplot.sequences.layout #end if - -b '$mumplot.breaklen' - $mumplot.color - $mumplot.coverage - $mumplot.filter - $mumplot.fat - #if $mumplot.labels.IDs == 'yes': - -IdR '$mumplot.labels.ref_id' - -IdQ '$mumplot.labels.query_id' + -b '$outform.mumplot.breaklen' + $outform.mumplot.color + $outform.mumplot.coverage + $outform.mumplot.filter + $outform.mumplot.fat + #if $outform.mumplot.labels.IDs == 'yes': + -IdR '$outform.mumplot.labels.ref_id' + -IdQ '$outform.mumplot.labels.query_id' #end if - -s '$mumplot.size' + -s '$outform.mumplot.size' -terminal png - -title '$mumplot.title' - $mumplot.snp - #if $mumplot.range.custom == 'yes': - -x [$mumplot.range.min_x:$mumplot.range.max_x] - -y [$mumplot.range.min_y:$mumplot.range.max_y] + -title '$outform.mumplot.title' + $outform.mumplot.snp + #if $outform.mumplot.range.custom == 'yes': + -x [$outform.mumplot.range.min_x:$outform.mumplot.range.max_x] + -y [$outform.mumplot.range.min_y:$outform.mumplot.range.max_y] #end if 'out.delta' @MUMMER_GNUPLOT_MANUAL@ + #end if + #else: + && samtools dict reference.fa > outsamhead + && tail -n +3 outsam.sam >> outsamhead + && samtools sort -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" outsamhead | + #if $outform.out_format == 'bam-long': + samtools calmd -b --threads {GALAXY_SLOTS:-1} - reference.fa > outsam + #else if $outform.out_format == 'cram-long': + samtools view -C --reference reference.fa -o outsam - + #end if #end if ]]> - </command> + </command> <inputs> <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="FastA or multi-FastA" /> <param name="query_sequence" type="data" format="fasta" label="Query Sequence" help="FastA or multi-FastA" /> + <conditional name="outform"> + <param name="out_format" type="select" label="Output format" help="Select delta format if a plot is needed. Jbrowse is a good choice to view cram and bam tracks"> + <option value="bam-long">bam format</option> + <option value="cram-long">cram format</option> + <option value="delta">Mummer delta format - allows plots</option> + </param> + <when value="delta"> + <conditional name="mumplot" > + <param name="plot" type="select" label="Create a 2-D dotplot of the input sequences?" > + <option value="no">No plot</option> + <option value="yes">Plot</option> + </param> + <when value="yes" > + <expand macro="mumplot_input" > + <conditional name="sequences" > + <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" > + <option value="no">NO</option> + <option value="yes">YES</option> + </param> + <when value="yes"> + <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" /> + <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" /> + <param argument="--layout" type="boolean" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion." /> + </when> + <when value="no" /> + </conditional> + </expand> + </when> + <when value="no" /> + </conditional> + </when> + <when value="bam-long"/> + <when value="cram-long"/> + </conditional> <param name="anchoring" type="select" label="Anchoring" help="Choose a match anchoring strategy"> <option value="">Use default</option> <option value="--mum">Unique matches only (--mum)</option> <option value="--maxmatch">All matches (--maxmatch)</option> </param> + <param name="breaklen" type="integer" argument="-b" value="200" label="Break Length" - help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up. (-b)" /> - <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches. (-c)" /> + help="Set the distance an alignment extension will attempt to extend poor scoring regions before giving up." /> + <param name="mincluster" type="integer" argument="-c" value="65" label="Minumum Cluster Length" help="Sets the minimum length of a cluster of matches." /> <param name="diagdiff" type="integer" argument="-D" value="5" label="Maximum Diagonal Difference" - help="Set the maximum diagonal difference between two adjacent anchors in a cluster. (-D)" /> + help="Set the maximum diagonal difference between two adjacent anchors in a cluster." /> <param name="diagfactor" type="float" argument="-d" value="0.12" label="Maximum Diagonal Difference" - help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length. (-d)" /> - <param name="noextend" type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step. (--noextend)" /> + help="Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length." /> + <param type="boolean" argument="--noextend" truevalue="--noextend" falsevalue="" label="No Extend" help="Do not perform cluster extension step." /> <param name="direction" type="select" label="Direction" help="Choose a direction of Query Sequence to Use"> - <option value="">Use foward and reverse sequences</option> + <option value="">Use forward and reverse sequences</option> <option value="-f">Use only forward sequence of query (-f)</option> - <option value="-r">Use only reverese sequence of query (-r)</option> + <option value="-r">Use only reverse sequence of query (-r)</option> </param> - <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster. (-g)" /> - <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match. (-l)" /> - <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension. (-L)" /> - <param name="nooptimize" type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization" + <param name="maxgap" type="integer" argument="-g" value="90" label="Maximum Gap Distance" help="Set the maximum gap between two adjacent matches in a cluster." /> + <param name="minmatch" type="integer" argument="-l" value="20" label="Minimum Match Length" help="Set the minimum length of a single exact match." /> + <param name="minalign" type="integer" argument="-L" value="0" label="Minumum Alignment Length" help="Minimum length of an alignment, after clustering and extension." /> + <param type="boolean" argument="--nooptimize" truevalue="--nooptimize" falsevalue="" label="No Alignment Score Optimization" help="No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence. (--nooptimize)" /> - <param name="nosimplify" type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments" - help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats. (--nosimplify)" /> + <param type="boolean" argument="--nosimplify" truevalue="--nosimplify" falsevalue="" label="Don't Simplify Alignments" + help="Don't simplify alignments by removing shadowed clusters. Use this option when aligning a sequence to itself to look for repeats." /> <conditional name="options"> <param name="advanced" type="select" label="Additional options"> <option value="defaults">Use defaults</option> <option value="enable">Select additional options</option> </param> <when value="enable"> - <param name="banded" type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding" + <param type="boolean" argument="--banded" truevalue="--banded" falsevalue="" label="Banding" help="Enforce absolute banding of dynamic programming matrix based on diagdiff parameter. (--banded)" /> - <param name="large" type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" /> + <param type="boolean" argument="--large" truevalue="--large" falsevalue="" label="Offsets" help="Force the use of large offsets. (--large)" /> <param name="genome" type="boolean" argument="-G" truevalue="-G" falsevalue="" label="Map genome to genome" help="For long query sequences. (-G)" /> <param name="max_chunk" type="integer" argument="-M" value="50000" label="Max Chunk" help="Stop adding sequence for a thread if more than MAX already. (-M)" /> </when> <when value="defaults" /> </conditional> - <conditional name="mumplot" > - <param name="plot" type="select" label="Do you want to output a 2-D dotplot of the input sequences? (mummerplot)" > - <option value="yes">YES</option> - <option value="no">NO</option> - </param> - <when value="yes" > - <expand macro="mumplot_input" > - <conditional name="sequences" > - <param name="seq_input" type="select" label="Plot an ordered set of reference/query sequences?" > - <option value="no">NO</option> - <option value="yes">YES</option> - </param> - <when value="yes"> - <param name="reference_sequence" type="data" format="fasta" label="Reference Sequence" help="(-R)" /> - <param name="query_sequence" type="data" format="fasta" multiple="True" label="Query Sequence(s)" help="(-Q)" /> - <param name="layout" type="boolean" argument="--layout" truevalue="--layout" falsevalue="" label="Layout" help="Layout a .delta multiplot in an intelligible fashion. (--layout)" /> - </when> - <when value="no" /> - </conditional> - </expand> - </when> - <when value="no" /> - </conditional> </inputs> <outputs> - <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: alignment" /> + <data name="delta_output" format="tabular" from_work_dir="out.delta" label="${tool.name} on ${on_string}: delta format"> + <filter>outform["out_format"] == "delta"</filter> + </data> + <data name="sam_output" format="bam" from_work_dir="outsam" label="${tool.name} on ${on_string}"> + <filter>outform["out_format"] != "delta"</filter> + <change_format> + <when input="outform.out_format" value="cram-long" format="cram" /> + </change_format> + </data> <data name="png_output" format="png" from_work_dir="out.png" label="${tool.name} on ${on_string}: plot" > - <filter>mumplot['plot'] == 'yes'</filter> + <filter>outform["out_format"] == "delta" and outform['mumplot']['plot'] == 'yes'</filter> </data> </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name="advanced" value="defaults" /> + <conditional name="outform"> + <param name="out_format" value="delta" /> + </conditional> <param name="plot" value="yes" /> <param name="seq_input" value="yes" /> <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/> @@ -143,11 +180,34 @@ <output name="delta_output" ftype="tabular" compare="diff" lines_diff="2" value="nucmer.txt"/> <output name="png_output" ftype="png" compare="sim_size" value="plot.png" /> </test> + <test expect_num_outputs="1"> + <param name="advanced" value="defaults" /> + <conditional name="outform"> + <param name="out_format" value="bam-long" /> + </conditional> + <param name="seq_input" value="yes" /> + <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/> + <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" /> + <output name="sam_output" ftype="bam" compare="sim_size" value="out.bam" /> + </test> + <test expect_num_outputs="1"> + <param name="advanced" value="defaults" /> + <conditional name="outform"> + <param name="out_format" value="cram-long" /> + </conditional> + <param name="seq_input" value="yes" /> + <param name="reference_sequence" ftype="fasta" value="human_aqp3.fasta"/> + <param name="query_sequence" ftype="fasta" value="mouse_aqp3.fasta" /> + <output name="sam_output" ftype="cram" compare="sim_size" value="out.cram" /> + </test> </tests> <help><![CDATA[ - nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications. + nucmer is for the all-vs-all comparison of nucleotide sequences contained in multi-FastA data files. It is best used for highly similar sequence that may + have large rearrangements. Common use cases are: comparing two unfinished shotgun sequencing assemblies, mapping an unfinished sequencing assembly + to a finished genome, and comparing two fairly similar genomes that may have large rearrangements and duplications. - All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that are unique in the reference sequence by default, use different Anchoring options to change this behavior. + All output coordinates reference the forward strand of the involved sequence, regardless of the match direction. Also, nucmer now uses only matches that + are unique in the reference sequence by default, use different Anchoring options to change this behavior. **Options:**:: @@ -155,18 +215,24 @@ nucmer + --sam-long The original output format of nucmer, the delta format, contains only the minimum information necessary to quickly recreate the alignment. + It contains the name of the matching sequences, the length of the match, number of errors and positions of indels. + With --sam-long, it additionally reports the MD string (which specifies the mismatching positions), the sequence and, if applicable, + the quality values of the matching sequence. The long format is more expensive to compute and it generates larger output files, + but this option allows nucmer4 to match the behavior of other aligners such as Bowtie2 or BWA. + --mum Use anchor matches that are unique in both the reference and query (false) --maxmatch Use all anchor matches regardless of their uniqueness (false) - -b Set the distance an alignment extension will attempt to extend poor scoring regions + -b Set the distance an alignment extension will attempt to extend poor scoring regions before giving up (200) -c Sets the minimum length of a cluster of matches (65) -D Set the maximum diagonal difference between two adjacent anchors in a cluster (5) - -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a + -d Set the maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length (0.12) --noextend Do not perform cluster extension step (false) @@ -181,7 +247,7 @@ -L Minimum length of an alignment, after clustering and extension (0) - --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a + --nooptimize No alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will not backtrack to optimize the alignment score and instead terminate the alignment at the end of the sequence (false) @@ -198,16 +264,16 @@ mummerplot - -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest + -b Highlight alignments with breakpoints further than breaklen nucleotides from the nearest sequence end - -color Color plot lines with a percent similarity gradient or turn off all plot color (default - color by match dir) If the plot is very sparse, edit the .gp script to plot with + -color Color plot lines with a percent similarity gradient or turn off all plot color (default + color by match dir) If the plot is very sparse, edit the .gp script to plot with 'linespoints' instead of 'lines' -c Generate a reference coverage plot (default for .tiling) - --filter Only display .delta alignments which represent the "best" hit to any particular spot on + --filter Only display .delta alignments which represent the "best" hit to any particular spot on either sequence, i.e. a one-to-one mapping of reference and query subsequences --fat Layout sequences using fattest alignment only
--- a/test-data/1delta.txt Sat Nov 27 09:59:16 2021 +0000 +++ b/test-data/1delta.txt Mon Mar 18 12:41:25 2024 +0000 @@ -1,4 +1,4 @@ -/tmp/tmpm1aw7z9b/files/6/1/9/dataset_619c747c-cdb5-4d85-adda-e0ec5f7fa2fe.dat /tmp/tmpm1aw7z9b/files/f/e/c/dataset_fec4ecfe-55bb-4ff9-86e1-292952d00dd1.dat +/tmp/tmpmprg208i/files/f/1/b/dataset_f1b7734e-9d42-4c7c-9775-5f18da934854.dat /tmp/tmpmprg208i/files/6/f/5/dataset_6f561e62-f409-4d5b-9469-6080685d3666.dat NUCMER >NG_007476.1:4960-11439 NC_000070.6:c41098183-41092724 6480 5460 74 223 43 194 19 19 0
--- a/test-data/delta.txt Sat Nov 27 09:59:16 2021 +0000 +++ b/test-data/delta.txt Mon Mar 18 12:41:25 2024 +0000 @@ -1,4 +1,4 @@ -/tmp/tmpm1aw7z9b/files/6/1/9/dataset_619c747c-cdb5-4d85-adda-e0ec5f7fa2fe.dat /tmp/tmpm1aw7z9b/files/f/e/c/dataset_fec4ecfe-55bb-4ff9-86e1-292952d00dd1.dat +/tmp/tmpmprg208i/files/f/1/b/dataset_f1b7734e-9d42-4c7c-9775-5f18da934854.dat /tmp/tmpmprg208i/files/6/f/5/dataset_6f561e62-f409-4d5b-9469-6080685d3666.dat NUCMER >NG_007476.1:4960-11439 NC_000070.6:c41098183-41092724 6480 5460 74 223 43 194 19 19 0
--- a/test-data/mdelta.txt Sat Nov 27 09:59:16 2021 +0000 +++ b/test-data/mdelta.txt Mon Mar 18 12:41:25 2024 +0000 @@ -1,4 +1,4 @@ -/tmp/tmpm1aw7z9b/files/6/1/9/dataset_619c747c-cdb5-4d85-adda-e0ec5f7fa2fe.dat /tmp/tmpm1aw7z9b/files/f/e/c/dataset_fec4ecfe-55bb-4ff9-86e1-292952d00dd1.dat +/tmp/tmpmprg208i/files/f/1/b/dataset_f1b7734e-9d42-4c7c-9775-5f18da934854.dat /tmp/tmpmprg208i/files/6/f/5/dataset_6f561e62-f409-4d5b-9469-6080685d3666.dat NUCMER >NG_007476.1:4960-11439 NC_000070.6:c41098183-41092724 6480 5460 74 223 43 194 19 19 0
--- a/test-data/nucmer.txt Sat Nov 27 09:59:16 2021 +0000 +++ b/test-data/nucmer.txt Mon Mar 18 12:41:25 2024 +0000 @@ -1,4 +1,4 @@ -reference.fa query.fa +/tmp/tmpk0v09mol/files/2/f/7/dataset_2f771d7a-b9f1-4fc9-b1ea-d06b84d013a0.dat /tmp/tmpk0v09mol/files/d/1/b/dataset_d1bcd825-fbc8-415a-9896-a6af5fd5ebaa.dat NUCMER >NG_007476.1:4960-11439 NC_000070.6:c41098183-41092724 6480 5460 74 223 43 194 19 19 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out.sam Mon Mar 18 12:41:25 2024 +0000 @@ -0,0 +1,6 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:NG_007476.1:4960-11439 LN:6480 M5:dcb745df791cdf1218f74d62ab4e1507 UR:file:///tmp/tmp885k3_3o/files/8/f/5/dataset_8f5d35c8-47ee-4bac-87e6-449833607f46.dat +@PG ID:samtools PN:samtools VN:1.19.2 CL:samtools sort -o outsamheadsort outsamhead +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.19.2 CL:samtools calmd outsamheadsort reference.fa +NC_000070.6:c41098183-41092724 0 NG_007476.1:4960-11439 74 10 42S25M1I1M1I124M5266S * 0 0 ACCGCTCTCGGTGCCTTGCGCTAGCTACTTTGCACTCGTACGCCGCCGGACCTCGCCGCTGCCTGCCTCGCGCCATGGGTCGACAGAAGGAGTTGATGAATCGTTGTGGGGAGATGCTTCACATCCGCTACCGGCTGCTTCGCCAGGCGCTGGCGGAGTGCCTGGGGACCCTCATCCTTGTGGTGAGTGCAGGGTAGTGAGCAGTCCTATCTATTTCAGGCCCCGTGGTCCCCAACTCCTTATTCACTCCAGGGACAGTTTTCCAGGCAGACACGACCAGTTCCCCAGCTCTGCTCCCGAGGCTTAACCCTCAGGGTCAAGCTGACCTCCAAAGCCTTCAACAGTCTTGACAGTTCTAACTCAACTTGTGACAGTGCTACTTACCCAAGAACAGCTCTGCTATTTTGCCTTTCCCCCTGGAGACAGAGACCAAGGCTGCGGAGGGTCACCCCTTAGTGTGGGACCTCCCACACGTTTCCTGGCTTCCCAACTTTCCCAGGGCTCCAACCTCAGGACACAGCAGAGAAAGGCAAATGGACACCCCTATTAAGTTATTTGGGTCTTGGGTGTCTGGCCCCTAATGAATAATTAAGCCTCAGAAAGTCCAAAAGTTGCAGTGAAGTGAGGGCGGATTACGGCAGGTAGGGGCAGAGGGCGCAGATTTCTTGGGCTGTAAGCAGCAAAATCGGAAGAACGGAGATTGTGGAAGGAAACAGAGCCTAACAGAACAGTGGGCGCGTCTAAATGTGTTGTGCACACGCTGTAGGAGTCCTTCGGATCCTGCGACCCAGATGCTTCCAGCGTCTTTGCTGGCCTTTTGATCCCCCCTCTGCCTCTCCTCTCAATGGCCCCTGGTGAACCCCCAGATCTCAGGCTTCTGCAGTTTTGCCCCGAGTTCTGGTTAACTTGGCGCACTTCTTGATCAGACAGACACTATGGCTTTCATTTCCTGTCCCAGTTACTCTGATTTTCAGCCACTGACTTGTTTTTCTGAGTCTCTTCTCAATTCCGGTGTCCCTTTTTGCAGTGGGGTGGGTTACTTGAGTTCCAGCAACAGCCTCAGTCTTGGGTAGTGCAGATCAGTCAACTGGCGAGGGAACATGGTGGGGCAGGCTACACATGGAGAAAGGGGGCCCAAATGTGCCTCTGACTTCATAGACTAGTGGCCTATCAGTTGTCCCCACTCCCTCCTGCCTCGGTGGTTCCCTGTTTCCTATCTTGATTTGTGAGTCTGGCCACCTGTTCGGTTGAGAGCAGCAAGAGGACAAGGGAGGGTGTCTTACAGTTTCAAAATGGGGCCCCCTCATCCCGTCCCAGTTCTCAGAGCCTATTCCTCTGTAAAAGAACAGTCAGTCCCCCCACTGAGTCCCAGGATCACTTGGCTTAGCCTAGACCACAAAAAGAGGGTTCTGGATTTGTGGTGAAGGCTTTCAGGCTCAGGTAGATGCGAGAGATCAGGTGTCAGGGAAAAGCTTGAGGAGAGGGGAGAGCAAGCCTAGTGGATCCCCTGATGACTGAGGGCCTGCTCGTGTCCCTGTGTTGGGGACTTGAGACCTCTGTGTCAGGATCCAGGTTGACTTTGGGGTGAGGAAAGCCCCCTTTTTTCTGAGGTATATAGTTGGGTGGTATATGGGGTGAGGGGCTGGCTAAGAACCTGCAGCTGGGTGAAAGTGCAGGATAAGGAAGAGGAAAGATTAGCCCCAAGGTGGTGGAACTGGCTTTGACAGCCTCCTCCTCCGGAGGCCCCTTGGCACGGAGCCAAGTCTGGGCCTCAGGCAAAGAGCTCAGGCCAGGGTCTGACTGGACATAGTGCCCGGGACGGCCACATGCCTCCACCCCTTGCTCTTACAATCTCTGACCTTTGCTCTCACCTGCCGGAACACCTTTGACCTACATCGCTGCCACTGGCCTCCCTGCCTCATCTTCCCGAGTGATAATAATCAGTGCTTAACAACCCAGCACCCAGGCTTGGACCCATTAGAGTTAATGAGGCACAGCTTGCTGCTGGCCAGCGTAGCACTTACTTGTTTGTGAGACCCCGGGTCTCCTTCGCCCTAGTCCTTGGCTGTTCTAGAACTCACTCTGTAGACCAGGCTGGTCTCTCTGCCTCGAGAGTGCAGGGATCAGGCCTGGCTCAGCATAGCACTGGTTCAGTTATGGAGAACTGAGAAAGGAGTCCCAGGGCCGGGCTGGAGGAGCGTGGAGGGCCACTAGAGACTTGAGGCAACAGCACTTAACAGTTGGTGGAGAGTATTCACACTTGAACATATGTCTGTCTGTCCCTGCCGATCCTGAGGCACCTTTTAGTCCCTCTATCCCAGGGTTAGAACAGATCTCACGGGACCTAGAGGGAAGAGCTGCTTAAAACCTTCTATTTAGGACTCTACAGATCAGAGTGGAAATCCTGGAGACAGCGGGCTGGAGAAGGAAGAGGAAAAGTGGCCTGGGGAGGGACCGGCAGGGACAGATGTGAACAGGGAGGCTATAATGAGGAACACGTGAGGACCAGATACGCAGACATTTCTCCATGCAGGAGTAGATTCAACTTCTCTTTTGATCTCTCTCCGTGGCCCCCACCTCCTCCACTATGACCTGGGCTTTGGAAGAGTCCAGGACGATGGGGTATTTCCTACCTCTGTGTGCCAAATCTGGAGGACGGGCAGAAACTGCTGTAGTGATAGCTCCCTTCTGTGCCCTTCCCGGATTCAAGAAAAACCAGTGGATGTCATTTAACCACCTGGGTCCCCAGTCATGTACCCATTACTGATTCCCCCCCATCCCCCCATGAGGCTCTGCACGTCTCCTCTTTCTGGGACTTAAGGAGGGATTTGGGTTATATCCCAGGAAGCACCTTCAGTGGTGTCAGGCATGTCTGATCTCAGTGGGACTTCACTTGCTTTGTTTTCCGACAGATGTTTGGCTGTGGCTCCGTGGCTCAGGTGGTGCTCAGCCGTGGCACCCATGGTGGCTTCCTCACCATCAACTTGGCTTTTGGCTTCGCTGTCACCCTTGGCATCTTGGTGGCTGGCCAGGTGTCTGGTAAGGCCTCAACCCCAGCTTCAGTTTTCAGCCCTCACCAGCATTTCCAACAAGTATCTGCCTAGAGAGCAGAGGGGGAGGAACAACTCCAACCAAGGACGCACACTAACAGCACCTCAGCTTTGGGCCCTTTGGAGGCAAGGGTGGAAGAGACTTCACAGAACCTTTGACTTTCACCTTGGAATCAAAGATCAGTCTGTGACATAGTCCAACTGCCATCCAGGCTAAGAAAACCTACCTACACCCCAGAAAGGAAGAGCCCAAGGTGGCAGGCTGTAACTAATGGATGCTATCTCCTCTTCGCAGGTGCCCACTTGAACCCCGCTGTGACCTTCGCAATGTGCTTCCTGGCACGAGAGCCCTGGATCAAGCTGCCCATCTATGCACTGGCACAGACACTGGGGGCCTTCTTGGGCGCTGGGATTGTTTTTGGGCTGTACTACGGTAAGCATTCCCCATCCCGCCCTCCCTTCTCCACACTTTCCCTCTTTAAGTACTTGTTGGCACCAAGCCCACTGATGACAACCGGGGCCTGCCCAGGCCCAGGGCCCGTGACTCATTCACGAACACTCAGGCCCAGGTTGGGGGCCTAGGGGAAAGAAACGAGTTGGGCAACAACAGAATCTCAGGTCCTCCACCCCGCCCCACCCCCTGAGCCTCTACAGTCATATGCTTACCCATGACCCCTGGCGGGGTGGGGAGGGCAGCTCTGAGAGGAGAGGCTCTGCCCTCACTCACAATGGCTCTAATCTGTCACCAGATGCAATCTGGGCCTTTGCCAACAATGAGCTTTTCGTCTCTGGCCCCAACGGCACAGCTGGAATCTTTGCCACCTATCCCTCTGGACACTTGGACATGGTCAATGGCTTCTTTGATCAGGTATGGACTAGGGACATGTGAAGTAAAGGTAGAGGGAGGAACAGTCTTGTTTTGGACAACGCTCCTTGATTGTAGCAGGATTTCTTCTCAGTTCGTGAGAACCCCAACCTCAGAACATGGTGGTGGCGTTGTCTTTTATAAAGCATGGCGCTTCCCAGTGAGTTCTTGGTTTTGCGATCATCCTAGAGTCAGGATGGTCTCTAGCGTGGAGGACTGGAGCGTGGGAGAAGGAGCTGGCCCTCACCGTGCTCTCTCCCCTCTCCCAGTTCATAGGCACAGCCGCCCTTATTGTGTGTGTACTGGCCATCGTTGACCCTTATAACAACCCTGTGCCCCGTGGCCTGGAGGCTTTCACTGTGGGCCTGGTGGTCCTGGTCATTGGAACCTCCATGGGCTTCAATTCTGGCTATGCCGTCAACCCTGCCCGTGACTTTGGACCTCGCCTCTTCACCGCCCTGGCTGGCTGGGGCTCAGAAGTCTTCACGTGAGTACAGTCCCCACTCCCCAGCTTGCCTCCCCTCTTCCTGCCGACCTGTCTCTGATTTCCGGTGTTCTCCCTCCAGGACTGGCCGGCACTGGTGGTGGGTACCCATTGTCTCCCCACTCCTGGGTTCCATCGCTGGTGTCTTCGTGTACCAGCTCATGATTGGTTGCCACCTGGAGCAGCCCCCACCCTCCACCGAGGAAGAGAATGTGAAGCTGGCCCACATGAAACACAAGGAGCAGATCTGAGTGGGCAGCAGCCCCCCTCCCCCACTGTGCACTCTCCTGAGTGTCCACTGACTGTGTGGGGACCAGTCCCCGAAAGCCCTTTGTGATGCCTCTCTCGGGCTAAACCGCTCCCTGTGTCCACCCCTGCTGGATGGGCCCTCCAGAATTTCTATGAACTCTGCCCATTAGGGCATTAGGTTCCCACCCACCTTTAAGCCAAGGTAGGATAGCAAATAAGATGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAATGAATGTGTACATGTGTGCTGTTTTCTAAGCTGAATGATGCAAAGGCAAGGGACCAAGTTTTCAAAACAAACTGTAGCAGCTCAGGGGAAGGGAGCCCAGGGGAAGGGAGAAAGTGAGTCAGGAATGTGCCAGAGTGTGCATGCTTCAGGGACTCCTCCATGTGGAGGTGGACCCAGAAGTGAGTTTCTAAGTATGCGTGTGCCTACTGTTTTTTTTTTTTTTTTTGAAATGGACTTCTAGGCTTGGGGAGGGGGAAGGGATAAGAAGGGTGTAGCTCACATCTGGAGCTATGACCCTTGACTGGGGGCTGTGTAATATGTTTCTGTTATAAGATAGACATTGGGAGGGGCTGAAGTCCAGGTCGTAAGTTTCATAATTTGTTTTTTAAATATATAAATATATACATACATATATGTTACAGCCCTAGGAATAGGGGTGGGAAACTCCACTTTTTAAAAGGGGTTTCCTTTCTTTAATCCTCCAATCAACAATGTACTGTTGCCTTTTATATATAAAAAAGAATAAAACGTATACATGCTACAGG * NM:i:19 MD:Z:5C3A0G6C30C2G2T0C0C2C2C11C20C2A11C23G10G4 +NC_000070.6:c41098183-41092724 2048 NG_007476.1:4960-11439 5080 10 4147H31M1I239M5D2M2D20M1D11M1D19M1D188M802H * 0 0 GGAGCTGGCCCTCACCGTGCTCTCTCCCCTCTCCCAGTTCATAGGCACAGCCGCCCTTATTGTGTGTGTACTGGCCATCGTTGACCCTTATAACAACCCTGTGCCCCGTGGCCTGGAGGCTTTCACTGTGGGCCTGGTGGTCCTGGTCATTGGAACCTCCATGGGCTTCAATTCTGGCTATGCCGTCAACCCTGCCCGTGACTTTGGACCTCGCCTCTTCACCGCCCTGGCTGGCTGGGGCTCAGAAGTCTTCACGTGAGTACAGTCCCCACTCCCCAGCTTGCCTCCCCTCTTCCTGCCGACCTGTCTCTGATTTCCGGTGTTCTCCCTCCAGGACTGGCCGGCACTGGTGGTGGGTACCCATTGTCTCCCCACTCCTGGGTTCCATCGCTGGTGTCTTCGTGTACCAGCTCATGATTGGTTGCCACCTGGAGCAGCCCCCACCCTCCACCGAGGAAGAGAATGTGAAGCTGGCCCACATGAAACACAAGGAGCAGATCTGAGTGGGCAG * NM:i:72 MD:Z:7C0A0G6C2T4C2A23T7C8G8T8C2C8C2C5A11C5C26C17C2C23G8C2C5T2T2A5T2G11T1C19C5^CCCAG2^CA7C6T5^G0C6C0C2^A4C4C1A0T0G0A4^C1G4C7C4A3T11G5C2G14C5T2G20G5C2C28A27T0G4G26
--- a/test-data/report.txt Sat Nov 27 09:59:16 2021 +0000 +++ b/test-data/report.txt Mon Mar 18 12:41:25 2024 +0000 @@ -1,4 +1,4 @@ -/tmp/tmpm1aw7z9b/files/6/1/9/dataset_619c747c-cdb5-4d85-adda-e0ec5f7fa2fe.dat /tmp/tmpm1aw7z9b/files/f/e/c/dataset_fec4ecfe-55bb-4ff9-86e1-292952d00dd1.dat +/tmp/tmpmprg208i/files/f/1/b/dataset_f1b7734e-9d42-4c7c-9775-5f18da934854.dat /tmp/tmpmprg208i/files/6/f/5/dataset_6f561e62-f409-4d5b-9469-6080685d3666.dat NUCMER [REF] [QRY] @@ -39,49 +39,49 @@ [SNPs] TotalSNPs 78 78 +AT 2(2.5641%) 2(2.5641%) +AC 6(7.6923%) 5(6.4103%) +AG 2(2.5641%) 5(6.4103%) +TA 2(2.5641%) 2(2.5641%) TG 2(2.5641%) 5(6.4103%) TC 9(11.5385%) 28(35.8974%) -TA 2(2.5641%) 2(2.5641%) -GC 6(7.6923%) 6(7.6923%) -GA 5(6.4103%) 2(2.5641%) -GT 5(6.4103%) 2(2.5641%) CT 28(35.8974%) 9(11.5385%) CA 5(6.4103%) 6(7.6923%) CG 6(7.6923%) 6(7.6923%) -AT 2(2.5641%) 2(2.5641%) -AG 2(2.5641%) 5(6.4103%) -AC 6(7.6923%) 5(6.4103%) +GT 5(6.4103%) 2(2.5641%) +GA 5(6.4103%) 2(2.5641%) +GC 6(7.6923%) 6(7.6923%) TotalGSNPs 1 1 -TA 0(0.0000%) 0(0.0000%) +GT 0(0.0000%) 0(0.0000%) +GA 0(0.0000%) 0(0.0000%) +GC 0(0.0000%) 0(0.0000%) +CG 0(0.0000%) 0(0.0000%) +CT 0(0.0000%) 0(0.0000%) +CA 0(0.0000%) 1(100.0000%) +TC 0(0.0000%) 0(0.0000%) TG 0(0.0000%) 0(0.0000%) -TC 0(0.0000%) 0(0.0000%) +TA 0(0.0000%) 0(0.0000%) +AT 0(0.0000%) 0(0.0000%) AG 0(0.0000%) 0(0.0000%) AC 1(100.0000%) 0(0.0000%) -AT 0(0.0000%) 0(0.0000%) -GC 0(0.0000%) 0(0.0000%) -GA 0(0.0000%) 0(0.0000%) -GT 0(0.0000%) 0(0.0000%) -CT 0(0.0000%) 0(0.0000%) -CG 0(0.0000%) 0(0.0000%) -CA 0(0.0000%) 1(100.0000%) TotalIndels 13 13 +A. 3(23.0769%) 0(0.0000%) T. 0(0.0000%) 2(15.3846%) -G. 2(15.3846%) 1(7.6923%) C. 5(38.4615%) 0(0.0000%) -A. 3(23.0769%) 0(0.0000%) -.G 1(7.6923%) 2(15.3846%) -.C 0(0.0000%) 5(38.4615%) +G. 2(15.3846%) 1(7.6923%) .A 0(0.0000%) 3(23.0769%) .T 2(15.3846%) 0(0.0000%) +.C 0(0.0000%) 5(38.4615%) +.G 1(7.6923%) 2(15.3846%) TotalGIndels 0 0 +G. 0(0.0000%) 0(0.0000%) +C. 0(0.0000%) 0(0.0000%) T. 0(0.0000%) 0(0.0000%) A. 0(0.0000%) 0(0.0000%) -G. 0(0.0000%) 0(0.0000%) -C. 0(0.0000%) 0(0.0000%) -.A 0(0.0000%) 0(0.0000%) .C 0(0.0000%) 0(0.0000%) .G 0(0.0000%) 0(0.0000%) +.A 0(0.0000%) 0(0.0000%) .T 0(0.0000%) 0(0.0000%)