Galaxy |

Changeset 0:aa82b2e54055 (2017-11-11)

Next changeset 1:1425ea794026 (2017-12-18)

Commit message:
planemo upload for repository https://github.com/wm75/mimodd_galaxy_wrappers commit b36048cd608ede0ec6f6559648525c9350caae34-dirty

added:
bamsort.xml
cloudmap.xml
convert.xml
covstats.xml
deletion_predictor.xml
fileinfo.xml
macros.xml
rebase.xml
reheader.xml
sam_header.xml
snp_caller_caller.xml
test-data/a.bam
test-data/a.bcf
test-data/a.fa
test-data/a.sam
test-data/a.vcf
test-data/a_part1.bam
test-data/a_part2.bam
test-data/a_part2.bcf
test-data/ce11ToCe10.over.chain
test-data/header_only.bam
test-data/header_only.sam
test-data/reads_1_w_header.bam
test-data/reads_1and2_w_header.bam
test-data/rebased.vcf
test-data/so_coordinate.bam
test-data/so_queryname.sam
test-data/split_pair_reads_1.fastqsanger
test-data/split_pair_reads_2.fastqsanger
test-data/vaf_linkage.pdf
tool-data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
varextract.xml
varreport.xml
vcf_filter.xml

diff -r 000000000000 -r aa82b2e54055 bamsort.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bamsort.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,97 @@
+<tool id="mimodd_sort" name="MiModD Sort" version="@MIMODD_WRAPPER_VERSION@">
+    <description>takes a SAM/BAM dataset and generates a coordinate/name-sorted copy</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+ mimodd sort '$input.ifile'
+     -o '$ofile' --iformat $input.iformat --oformat $oformat $by_name
+    </command>
+
+    <inputs>
+        <conditional name="input">
+            <param name="iformat" type="select" label="Input data format">
+                <option value="bam">bam</option>
+                <option value="sam">sam</option>
+            </param>
+            <when value="bam">
+                <param name="ifile" type="data" format="bam"
+                label="BAM input dataset to sort" />
+            </when>
+            <when value="sam">
+                <param name="ifile" type="data" format="sam"
+                label="SAM input dataset to sort" />
+            </when>
+        </conditional>
+        <param name="oformat" type="select" label="Output format for the sorted data">
+            <option value="bam">bam</option>
+            <option value="sam">sam</option>
+        </param>
+        <param name="by_name" type="boolean" truevalue="-n" falsevalue="" checked="false"
+        label="Sort by read names instead of coordinates"
+        help="A less common option, but necessary, e.g., if you want to re-align coordinate sorted paired-end reads with the MiModD Read Alignment Tool or other NGS mapping tools." />
+    </inputs>
+
+    <outputs>
+        <data name="ofile" format="bam"
+        label="Sorted output from MiModd ${tool.name} on ${on_string}">
+            <change_format>
+             <when input="oformat" value="sam" format="sam" />
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="iformat" value="sam" />
+                <param name="ifile" value="header_only.sam" />
+            </conditional>
+            <param name="oformat" value="bam" />
+            <assert_command>
+                <not_has_text text="-n" />
+            </assert_command>
+            <output name="ofile" ftype="bam" file="so_coordinate.bam" />
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="iformat" value="bam" />
+                <param name="ifile" value="header_only.bam" />
+            </conditional>
+            <param name="oformat" value="sam" />
+            <param name="by_name" value="true" />
+            <assert_command>
+                <has_text text=" -n" />
+            </assert_command>
+            <output name="ofile" ftype="sam" file="so_queryname.sam" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool sorts an aligned reads input dataset, typically by the reference
+genome coordinates that the reads have been mapped to.
+
+Coordinate-sorted input files are expected by most downstream MiModD tools, but
+note that the *MiModD Read Alignment* produces coordinate-sorted output by
+default and it is only necessary to sort files that come from other sources or
+from *MiModD Read Alignment* jobs with a custom sort order.
+
+The option *Sort by read names instead of coordinates* is useful if you want to
+re-align coordinate-sorted paired-end data. In *paired-end mode*, the
+*MiModD Read Alignment* tool expects the reads in the input file to be arranged
+in read pairs, *i.e.*, the forward read information of a pair must be followed
+immediately by its reverse mate information, which is typically not the case in
+coordinate-sorted data. Resorting such data by read names fixes this problem.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
+

diff -r 000000000000 -r aa82b2e54055 cloudmap.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cloudmap.xml Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,591 @@\n+<tool id="mimodd_map" name="MiModD NacreousMap" version="@MIMODD_WRAPPER_VERSION@">\n+ <description>maps phenotypically selected variants by multi-variant linkage analysis</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <macro name="svd_unconditional">\n+ <expand macro="hidden_algo_params" />\n+ <expand macro="seqdict_param" />\n+ <expand macro="bins" />\n+ <conditional name="plotopts">\n+ <param name="plots" type="select" label="graphical output settings">\n+ <option value="-p">Give me graphics.</option>\n+ <option value="">Do not generate graphs.</option>\n+ </param>\n+ <when value="">\n+ </when>\n+ <when value="-p">\n+ <expand macro="scatter_default" />\n+ <param name="show_kde" type="boolean" truevalue="" falsevalue="--no-kde" checked="true"\n+ label="show kde line in histogram plots"\n+ help="The tool can calculate a kernel density estimate for the linkage data based on a bin size of 10 kilobases and display it as a solid line in the histogram plots." />\n+ <param name="hylim" type="text" \n+ label="upper limit for the histogram y-axis (leave blank for automatic scaling)" />\n+ <param name="xlim" type="select" label="x-axis scaling">\n+ <option value="">preserve relative contig sizes</option>\n+ <option value="--fit-width">scale each contig to fit the plot width</option>\n+ </param>\n+ <expand macro="hist_colors" />\n+ </when>\n+ </conditional>\n+ </macro>\n+ <macro name="vaf_unconditional">\n+ <expand macro="bins" />\n+ <conditional name="plotopts">\n+ <param name="plots" type="select" label="graphical output settings">\n+ <option value="-p">Give me everything (scatter plots and histograms)</option>\n+ <option value="--no-scatter -p">Generate only histograms</option>\n+ <option value="--no-hist -p">Generate only scatter plots</option>\n+ <option value="">Do not generate graphs.</option>\n+ </param>\n+ <when value="">\n+ </when>\n+ <when value="--no-scatter -p">\n+ <expand macro="scatter_default" />\n+ <param name="show_kde" type="boolean" truevalue="" falsevalue="--no-kde" checked="true"\n+ label="show kde line in histogram plots"\n+ help="The tool can calculate a kernel density estimate for the linkage data based on a bin size of 10 kilobases and display it as a solid line in the histogram plots." />\n+ <param name="hylim" type="text" \n+ label="upper limit for the histogram y-axis (leave blank for automatic scaling)" />\n+ <param name="xlim" type="select" label="x-axis scaling">\n+ <option value="">preserve relative contig sizes</option>\n+ <option value="--fit-width">scale each contig to fit the plot width</option>\n+ </param>\n+ <expand macro="hist_colors" />\n+ </when>\n+ <when value="--no-hist -p">\n+ <expand macro="hist_default" />\n+ <param name="sylim" type="text" \n+ label="upper limit for the scatter plot y-axis (default: 1)" />\n+ <param name="xlim" type="select" label="x-axis scaling">\n+ <option value="">preserve relative contig sizes</option>\n+ <option value="--fit-width">scale each contig to fit the plot width</option>\n+ </param>\n+ <para'..b' to reduce analysis time. \n+\n+-------------\n+\n+**Output:**\n+\n+The tool produces up to three output files:\n+\n+1) a default tabular report of binned variant counts that can be used to plot\n+ the data with external software such as Excel,\n+\n+2) an optional pdf containing linkage plots, and\n+ \n+3) an optional tabular per-variant report, which can be configured to be either\n+ valid input for the corresponding original CloudMap tool (for users who\n+ really, really want to continue using CloudMap for plotting) or to be\n+ reusable in fast reruns of the tool (which can be useful to experiment with\n+ different plotting parameters).\n+\n+-------------\n+\n+**Settings:**\n+\n+1) Analysis settings\n+\n+ *bin size to analyze variants in* - determines the width of the regions\n+ along each chromosome, in which variants are counted and analyzed together. \n+ \n+ Several bin sizes can be specified and for each size you will get a\n+ corresponding results section in the binned variant counts report and a\n+ linkage histogram plot.\n+ \n+ *sample names (in VAF and VAC modes only)* - to analyze inheritance\n+ patterns, the VAF and VAC modes need information about the relationship\n+ between the samples defined in the input. While VAC mode simply requires\n+ you to name the two contrasting samples for the analysis, the sample roles\n+ in VAF mode are a bit more complicated to understand. Specifically:\n+\n+ The *mapping sample name* should be set to the name of the sample for which\n+ the inheritance pattern is to be analyzed (the pooled progeny population). \n+\n+ The *name of the related sample* should indicate the parent sample that\n+ carried and brought in the unknown mutation to be mapped (or, alternatively,\n+ a closely related ancestor). \n+\n+ The *name of the unrelated sample* should be that of the other parent strain\n+ used in the cross. \n+ \n+ At least one of the parent samples MUST be specified, but if the input\n+ contains variant information for both parents, they can be analyzed together\n+ for higher mapping accuracy. If you are reanalyzing a tabular report from a\n+ previous tool run or from CloudMap, the association between variants and\n+ samples is already stored in the input dataset and cannot be specified\n+ again.\n+\n+2) Graphical output settings\n+\n+ .. class:: warningmark\n+ \n+ To be able to generate plots, the system running MiModD needs to have the\n+ statistical programming environment R and its Python interface rpy2\n+ installed. Disable graphical output if this is not the case.\n+\n+ *y-axes scaling* - if you want to override the defaults\n+\n+ *x-axis scaling* - choose *preserve relative contig sizes* if you want the\n+ largest chromosome to fit the page width and smaller chromosomes to appear\n+ according to their relative size or choose *scale each contig to fit the\n+ plot width* if all chromosomes should exploit the available space\n+\n+ *span value to be used in calculating the Loess regression line* - this\n+ value determines the degree of smoothing of the regression line through the\n+ scatterplot data. Information on loess regression and the loess span\n+ parameter can be found at http://en.wikipedia.org/wiki/Local_regression.\n+ \n+ *colors used for plotting* - can be selected freely from the offered\n+ palette. For histogram colors, the list of selected colors will be used to\n+ provide the colors for the different histograms plotted. If less colors than\n+ histograms (determined by the number of bin sizes selected) are specified,\n+ colors from the list will be recycled.\n+\n+.. _CloudMap: https://usegalaxy.org/u/gm2123/p/cloudmap\n+.. _mapping-by-sequencing analysis workflows in MiModD: http://mimodd.readthedocs.io/en/latest/nacreousmap.html\n+.. _CloudMap-style sequence dictionary: http://mimodd.readthedocs.io/en/latest/fileformats.html#cloudmap-style-sequence-dictionary\n+\n+@HELP_FOOTER@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r aa82b2e54055 convert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/convert.xml Sat Nov 11 18:19:22 2017 -0500

[

b'@@ -0,0 +1,439 @@\n+<tool id="mimodd_convert" name="MiModD Convert" version="@MIMODD_WRAPPER_VERSION@">\n+ <description>converts sequence data into different formats</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <macro name="to_format">\n+ <param name="oformat" type="select" label="to">\n+ \t<option value="sam">sam</option>\n+\t <option value="bam">bam</option>\n+\t <yield />\n+\t </param>\n+ </macro>\n+ <macro name="se_selector" token_format="fastq">\n+ <conditional name="input">\n+ <param name="repr" type="select"\n+ label="Single-end input data provided as">\n+ <option value="individual">Individual datasets</option>\n+ <option value="collection">Collection of datasets</option>\n+ </param>\n+ <when value="individual">\n+ <repeat name="input_data" title="fastq input datasets"\n+ default="1" min="1">\n+\t <param name="file1" type="data" format="@FORMAT@"\n+\t label="single-end read data"/>\n+ </repeat>\n+ </when>\n+ <when value="collection">\n+ <param name="input_data" type="data_collection"\n+ collection_type="list" format="fastq, fastq.gz"\n+ label="collection of single-end read input datasets" /> \n+ </when>\n+ </conditional>\n+ <param name="header" type="data" format="sam" label="Use Header File"\n+ help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>\n+\n+ </macro>\n+ <macro name="pe_selector" token_format="fastq">\n+ <conditional name="input">\n+ <param name="repr" type="select"\n+ label="Paired-end input data provided as">\n+ <option value="individual">Individual datasets</option>\n+ <option value="collection">Paired collection</option>\n+ <option value="list_of_pairs">List of pairs</option>\n+ </param>\n+ <when value="individual">\n+ <repeat name="input_data" title="fastq input datasets"\n+ default="1" min="1">\n+\t <param name="file1" type="data" format="@FORMAT@"\n+\t label="first set of reads of paired-end data"/>\n+\t <param name="file2" type="data" format="@FORMAT@"\n+\t label="second set of reads of paired-end data"/>\n+ </repeat>\n+ </when>\n+ <when value="collection">\n+ <param name="input_data" type="data_collection"\n+ collection_type="paired" format="fastq, fastq.gz"\n+ label="paired input dataset collection" /> \n+ </when>\n+ <when value="list_of_pairs">\n+ <param name="input_data" type="data_collection"\n+ collection_type="list:paired" format="fastq, fastq.gz"\n+ label="nested collection of paired input datasets" />\n+ </when>\n+ </conditional>\n+ <param name="header" type="data" format="sam" label="Use Header File"\n+ help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file."/>\n+\n+ </macro>\n+ <macro name="sam_bam_selector" token_format="sam">\n+ <param name="input_data" type="data" format="@FORMAT@"\n+ label="input dataset"/>\n+ <param name="header" type="hidden" value="None"/>\n+ </macro>\n+ <macro name="fastq_output_choices">\n+ <param name="split_on_rgs" type="hidden" value'..b'ditional name="output">\n+ <param name="oformat" value="bam" />\n+ <conditional name="input">\n+ <param name="repr" value="collection" />\n+ <param name="input_data">\n+ <collection type="paired">\n+ <element name="forward"\n+ value="split_pair_reads_1.fastqsanger" />\n+ <element name="reverse"\n+ value="split_pair_reads_2.fastqsanger" />\n+ </collection>\n+ </param>\n+ </conditional>\n+ <param name="header" value="header_only.sam" />\n+ </conditional>\n+ </conditional>\n+ <output name="outputname" file="reads_1and2_w_header.bam" ftype="bam" />\n+ </test>\n+ <test expect_num_outputs="1">\n+ <conditional name="mode">\n+ <param name="iformat" value="bam" />\n+ <conditional name="output">\n+ <param name="oformat" value="sam" />\n+ <param name="input_data" value="a.bam" />\n+ </conditional>\n+ </conditional>\n+ <output name="outputname" file="a.sam" ftype="sam" />\n+ </test>\n+ <test>\n+ <conditional name="mode">\n+ <param name="iformat" value="sam" />\n+ <conditional name="output">\n+ <param name="oformat" value="bam" />\n+ <param name="input_data" value="a.sam" />\n+ <param name="split_on_rgs" value="true" />\n+ </conditional>\n+ </conditional>\n+ <output_collection name="bam_split_on_read_groups" type="list" count="2">\n+ <element name="read_group_000" file="a_part1.bam" ftype="bam" />\n+ </output_collection>\n+ </test>\n+ </tests>\n+ \n+ <help><![CDATA[\n+.. class:: infomark\n+\n+ **What it does**\n+\n+The tool converts between different file formats used for storing\n+next-generation sequencing data.\n+\n+As input file types it can handle fastq, SAM or BAM format, which it can\n+convert to SAM or BAM format.\n+\n+**Notes:**\n+\n+1) The tool can convert fastq files representing data from paired-end\n+sequencing runs to appropriate SAM/BAM format provided that the mate\n+information is split over two fastq files in corresponding order.\n+\n+ **TIP:** If your paired-end data is arranged differently, you may look into\n+ the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the\n+ `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can\n+ convert your files to the expected format.\n+ \n+2) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is\n+supported both for single-end and paired-end data. Simply add additional input\n+datasets and select the appropriate files (pairs of files in case of paired-end\n+data).\n+\n+ Concatenation of SAM/BAM file during conversion is currently not supported.\n+\n+3) For input in fastq format a SAM header file providing run metadata\n+**has to be specified**. The information in this file will be used as the\n+header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool\n+to generate a new header file for your data.\n+\n+ For input in SAM/BAM format the tool will simply copy the existing header\n+ data to the new file. To modify the header of an existing SAM/BAM file, use\n+ the *Reheader BAM file* tool instead.\n+\n+.. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531\n+.. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy\n+.. _MiModD user guide: http://mimodd.readthedocs.org/en/latest\n+\n+@HELP_FOOTER@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r aa82b2e54055 covstats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/covstats.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,57 @@
+<tool id="mimodd_covstats" name="MiModD Coverage Statistics"
+version="@MIMODD_WRAPPER_VERSION@">
+    <description>
+    calculates coverage statistics for a BCF file as generated by the MiModd Variant Calling tool
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+    mimodd covstats '$ifile' --ofile '$ofile'
+    ]]></command>
+
+    <inputs>
+        <param name="ifile" type="data" format="bcf" label="BCF input file"
+        help="Use the Variant Calling tool to generate input for this tool."/>
+    </inputs>
+    <outputs>
+        <data name="ofile" format="tabular"
+        label="Coverage Statistics for ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="ifile" value="a.bcf" />
+            <output name="ofile" ftype="tabular">
+                <assert_contents>
+                    <has_n_columns n="3" />
+                    <has_line_matching expression="^chrI	.+" />
+                    <has_line_matching expression="^chrII	.+" />
+                    <has_line_matching expression="^chrIII	.+" />
+                    <has_line_matching expression="^chrIV	.+" />
+                    <has_line_matching expression="^chrV	.+" />
+                    <has_line_matching expression="^chrX	.+" />
+                    <has_line_matching expression="^MtDNA	.+" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool takes as input a BCF file produced by the *Variant Calling* tool, and calculates per-chromosome read coverage from it.
+
+.. class:: warningmark
+
+   The tool treats genome positions missing from the BCF input as zero coverage, so it is safe to use ONLY with BCF files produced by the *Variant Calling* tool or through other commands that keep the information for all sites.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 deletion_predictor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deletion_predictor.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,117 @@
+<tool id="mimodd_delcall" name="MiModD Deletion Calling (for PE data)"
+version="@MIMODD_WRAPPER_VERSION@">
+    <description>
+    predicts deletions in one or more aligned paired-end read samples based on coverage of the reference genome and on insert sizes
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+    mimodd delcall
+      #for $bam_file in $list_input
+        '$bam_file'
+      #end for
+        '$covfile' -o '$ofile'
+        --index-files
+      #for $bam_file in $list_input
+        '${bam_file.metadata.bam_index}'
+      #end for
+        --max-cov $max_cov --min-size $min_size
+        $include_uncovered
+        $group_by_id
+        --verbose
+    ]]></command>
+
+    <inputs>
+        <param name="list_input" type="data" multiple="true" format="bam"
+        label="Aligned reads input dataset(s)" />
+        <param name="covfile" type="data" format="bcf"
+        label="BCF variant call dataset to extract coverage from"
+        help="Use the MiModD Variant Calling Tool to generate this file."/>
+        <param name="group_by_id" type="boolean" truevalue="-i" falsevalue="" checked="false"
+        label="group reads based on read group id only"
+        help="If selected, reads from different read groups will be treated strictly separate. If turned off, read groups with identical sample names are used together for identifying uncovered regions, but are still treated separately for the prediction of deletions." />
+        <param name="include_uncovered" type="boolean" truevalue="-u" falsevalue="" checked="false"
+        label="include low-coverage regions"
+        help="If selected, regions that fulfill the coverage criteria below, but are not statistically significant deletions, will be included in the output." />
+        <param name="max_cov" type="integer" value="0"
+        label="maximal coverage allowed inside a low-coverage region (default: 0)"
+        help="The maximal coverage at a site allowed to consider it as part of a low-coverage region" />
+        <param name="min_size" type="integer" value="100"
+        label="minimal deletion size (default: 100)"
+        help="A low-coverage region must consist of at least this number of consecutive bases below the maximal coverage to consider it in further analyses."/>
+    </inputs>
+
+    <outputs>
+        <data name="ofile" format="gff" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="list_input" value="a.bam" />
+            <param name="covfile" value="a.bcf" />
+            <param name="include_uncovered" value="true" />
+            <assert_command>
+                <has_text text=" -u " />
+                <not_has_text text=" -i " />
+            </assert_command>
+            <output name="ofile" ftype="gff">
+                <assert_contents>
+                    <has_n_columns n="9" />
+                    <has_line_matching
+                    expression="^chrI	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^chrII	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^chrIII	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^chrIV	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^chrV	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^chrX	MiModD	Uncovered_Region.+" />
+                    <has_line_matching
+                    expression="^MtDNA	MiModD	Uncovered_Region.+" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool predicts deletions from paired-end data in a two-step process:
+
+1) It finds regions of low-coverage, i.e., candidate regions for deletions, by scanning a BCF file produced by the *Variant Calling* tool.
+
+   The *maximal coverage allowed inside a low-coverage region* and the *minimal deletion size* parameters are used at this step to define what is considered a low-coverage region.
+
+   .. class:: warningmark
+
+      The tool treats genome positions missing from the BCF input as zero coverage, so it is safe to use ONLY with BCF files produced by the *Variant Calling* tool or through other commands that keep the information for all sites.
+
+2) It assesses every low-coverage region statistically for evidence of it being a real deletion. **This step requires paired-end data** since it relies on shifts in the distribution of read pair insert sizes around real deletions.
+
+By default, the tool only reports Deletions, i.e., the subset of low-coverage regions that pass the statistical test.
+If *include low-coverage regions* is selected, regions that failed the test will also be reported.
+
+With *group reads based on read group id only* selected, as it is by default, grouping of reads into samples is done strictly based on their read group IDs.
+With the option deselected, grouping is done based on sample names in the first step of the analysis, i.e. the reads of all samples with a shared sample name are used to identify low-coverage regions.
+In the second step, however, reads will be regrouped by their read group IDs again, i.e. the statistical assessment for real deletions is always done on a per read group basis.
+
+**TIP:**
+Deselecting *group reads based on read group id only* can be useful, for example, if you have both paired-end and single-end sequencing data for the same sample.
+
+In this case, the two sets of reads will usually share a common sample name, but differ in their read groups.
+With grouping based on sample names, the single-end data can be used together with the paired-end data to identify low-coverage regions, thus increasing overall coverage and reliability of this step.
+Still, the assessment of deletions will use only the paired-end data (auto-detecting that the single-end reads do not provide insert size information).
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 fileinfo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fileinfo.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,102 @@
+<tool id="mimodd_info" name="MiModD File Information" version="@MIMODD_WRAPPER_VERSION@">
+    <description>provides summary reports for supported sequence data formats.</description>
+    <macros>
+        <import>macros.xml</import>
+        <xml name="test_with_formats" token_ifile="a.sam" token_oformat="text"
+        token_expectformat="txt">
+            <test>
+                <conditional name="input_selection">
+                    <param name="source" value="history" />
+                    <param name="ifile" value="@IFILE@" />
+                </conditional>
+                <param name="oformat" value="@OFORMAT@" />
+                <output name="ofile" ftype="@EXPECTFORMAT@">
+                    <assert_contents>
+                        <has_text text="chrI" />
+                        <has_text text="chrII" />
+                        <has_text text="chrIII" />
+                        <has_text text="chrIV" />
+                        <has_text text="chrV" />
+                        <has_text text="chrX" />
+                    </assert_contents>
+                </output>
+            </test>
+        </xml>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+    mimodd info -o '$ofile' --verbose --oformat $oformat
+    #if str($input_selection.source) == "history":
+      '$input_selection.ifile'
+    #else:
+      '$input_selection.ifile.fields.path'
+    #end if
+    ]]></command>
+
+    <inputs>
+        <conditional name="input_selection">
+            <param name="source" type="select" label="Input is a">
+                <option value="history">Dataset in history</option>
+                <option value="cached">Genome on server</option>
+            </param>
+            <when value="history">
+                <param name="ifile" type="data" format="bam,sam,vcf,bcf,fasta"
+                label="input dataset" />
+            </when>
+            <when value="cached">
+                <param name="ifile" type="select" label="installed genome">
+                    <options from_data_table="all_fasta" />
+                </param>
+            </when>
+        </conditional>
+        <param name="oformat" type="select" display="radio"
+        label="output format">
+            <option value="txt">text</option>
+            <option value="html">html</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="ofile" format="txt" label="Sample Info on ${on_string}">
+            <change_format>
+             <when input="oformat" value="html" format="html"/>
+            </change_format>
+            <actions>
+                <conditional name="input_selection.source">
+                    <when value="cached">
+                        <action type="metadata" name="dbkey">
+                            <option type="from_data_table" name="all_fasta" column="1" offset="0">
+                                <filter type="param_value" ref="input_selection.ifile" column="0" />
+                            </option>
+                        </action>
+                    </when>
+                </conditional>
+            </actions>
+        </data>
+    </outputs>
+
+    <tests>
+        <expand macro="test_with_formats" ifile="a.bam" />
+        <expand macro="test_with_formats" ifile="a.sam" />
+        <expand macro="test_with_formats" ifile="a.bcf" />
+        <expand macro="test_with_formats" ifile="a.vcf" />
+        <expand macro="test_with_formats" ifile="a.fa" />
+        <expand macro="test_with_formats" ifile="a.bam" oformat="html"
+        expectformat="html" />
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool inspects the input datasets and generates a report summarizing its contents.
+
+It autodetects and works with most file formats produced by MiModD, i.e., **SAM / BAM, vcf / bcf and fasta**, and produces a standardized report for all of them.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,46 @@
+<macros>
+    <token name="@MIMODD_VERSION_REQUIRED@">0.1.8</token>
+    <token name="@MIMODD_REAL_VERSION@">0.1.8</token>
+    <token name="@MIMODD_WRAPPER_VERSION@">0.1.8_0</token>
+    <token name="@HELP_FOOTER@"><![CDATA[
+----
+
+.. class:: infomark
+
+   For **additional help** see these resources:
+
+- The complete `MiModD User Guide <http://mimodd.readthedocs.io/en/v0.1.8/usage_toc.html>`__
+
+- The `MiModD help forum <https://groups.google.com/forum/#!forum/mimodd>`__ reachable also via `email <mailto:mimodd@googlegroups.com>`__
+
+   ]]></token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@MIMODD_VERSION_REQUIRED@">MiModD</requirement>
+            <yield />
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+    <xml name="version_command">
+        <version_command><![CDATA[mimodd version -q]]></version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+@misc{
+maier_mimodd_2014,
+title = {{MiModD} - {Mutation} {Identification} in {Model} {Organism} {Genomes}},
+shorttitle = {{MiModD}},
+url = {https://sourceforge.net/projects/mimodd/},
+author = {Maier, Wolfgang and Moos, Katharina and Seifert, Mark and Baumeister, Ralf},
+year = {2014}, publisher={SourceForge.net} }
+            </citation>
+        </citations>
+    </xml>
+</macros>
+
+

diff -r 000000000000 -r aa82b2e54055 rebase.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rebase.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,75 @@
+<tool id="mimodd_rebase" name="MiModD Rebase Sites"
+version="@MIMODD_WRAPPER_VERSION@">
+    <description>from a VCF file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+ mimodd rebase '$ifile'
+   '$chainfile'
+   $reverse
+   --ofile '$output_vcf'
+   --filter $filter
+   --verbose
+    ]]></command>
+
+    <inputs>
+        <param name="ifile" type="data" format="vcf" label="VCF input file"
+        help="the VCF file to rebase"/>
+        <param name="chainfile" type="data" format="tabular" label="Input chainfile"
+        help="the UCSC chain file to calculate new coordinates from"/>
+        <param name="reverse" type="boolean" truevalue="-r" falsevalue="" checked="false"
+        label="reverse the target and query sites of the initial chainfile"
+        help="swap the genome versions specified in the chain file, i.e., assume the coordinates in the input file are based on the chain file target genome version and should be mapped to the source genome version" />
+        <param name="filter" type="select" display="radio"
+        label="Ambiguously mapping variants"
+        help="How to treat variants that map to more than one position in the target coordinate system" >
+            <option value="unique">Ignore</option>
+            <option value="best">Report only the mapping with the highest score</option>
+            <option value="all">Report all possible mappings</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_vcf" format="vcf"
+        label="Variants rebased with MiModd from ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="ifile" value="a.vcf" />
+            <param name="chainfile" value="ce11ToCe10.over.chain" />
+            <assert_command>
+                <not_has_text text=" -r" />
+            </assert_command>
+            <output name="output_vcf" file="rebased.vcf" ftype="vcf"
+            lines_diff="0" />
+        </test>
+        <test>
+            <param name="ifile" value="rebased.vcf" />
+            <param name="chainfile" value="ce11ToCe10.over.chain" />
+            <param name="reverse" value="true" />
+            <assert_command>
+                <has_text text=" -r" />
+            </assert_command>
+            <output name="output_vcf" file="a.vcf" ftype="vcf"
+            lines_diff="20" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool takes as input a VCF file like the ones produced by the
+**Extract Variant** tool and a Genome Browser chain format, and maps the
+variant positions found in the VCF file to a different reference genome
+coordinate system according to the mapping defined in a UCSC chain file.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 reheader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reheader.xml Sat Nov 11 18:19:22 2017 -0500

[

b'@@ -0,0 +1,270 @@\n+<tool id="mimodd_reheader" name="MiModD Reheader" version="@MIMODD_WRAPPER_VERSION@">\n+ <description>\n+ takes a BAM file and generates a copy with the original header (if any) replaced or modified by that found in a template SAM file\n+ </description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <macro name="getreadgroupinfo">\n+ <conditional name="rginfo">\n+ <param name="source" type="select"\n+ label="source of new read-group information">\n+ <option value="from_file">existing SAM file</option>\n+ <option value="from_form">input form</option>\n+ </param>\n+ <when value="from_file">\n+ <param name="data" type="data" format="sam"\n+ label="read-group template file in SAM format"\n+ help="use the read group information found in this file" />\n+ <repeat name="rg" title="custom read-group mapping" default="0" min="0"\n+ help="read-group information found in the input file, by default, gets updated / replaced with information from template file read-groups with matching IDs. Alternatively, you may specify explicit read-group mappings below.">\n+ <param name="source_id" type="text"\n+ label="modify input file information for read-group ID (will create the read-group if it does not exist)" />\n+ <param name="rg_id" type="text"\n+ label="with template file information for read-group ID" />\n+ </repeat>\n+ </when>\n+ <when value="from_form">\n+ <repeat name="rg" title="new read-group info" default="1" min="1">\n+ <param name="source_id" type="text"\n+ label="read-group ID (will create the read-group if it does not exist)"\n+ help="required field">\n+ <validator type="empty_field" />\n+ </param>\n+ <param name="rg_id" type="hidden" value="" />\n+ <param name="rg_sm" type="text" label="sample name"\n+ help="required field">\n+ <validator type="empty_field" />\n+ </param>\n+ <param name="rg_ds" type="text" label="description" />\n+ <param name="rg_date" type="text"\n+ label="date (YY-MM-DD format) the run was produced" />\n+ <param name="rg_cn" type="text"\n+ label="name of sequencing center" />\n+ <param name="rg_lb" type="text" label="read-group library" />\n+ <param name="rg_pl" type="text"\n+ label="platform/technology used to produce the reads" />\n+ <param name="rg_pi" type="text"\n+ label="predicted median insert size" />\n+ <param name="rg_pu" type="text" \n+ label="platform unit; unique identifier" />\n+ </repeat> \n+ </when>\n+ </conditional>\n+ </macro>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="stdio" />\n+ <expand macro="version_command" />\n+ <command><![CDATA[\n+ #if ($str($rg.treat_rg) != "ignore" and $str($rg.rginfo.source) == "from_form") or $str($co.treat_co) != "ignore":\n+ mimodd header\n+ #if $str($rg.treat_rg) != "ignore" and $str($rg.rginfo.source) == "from_form":\n+ #for $rginfo in $rg.rginfo.rg\n+ \t #if $str($rginfo.source_id):\n+ --rg-id \'${rginfo.source_id}\'\n+ #end if\n+ #if $str($rginfo.rg_sm):\n+ \t --rg-sm \'${rginfo.rg_sm}\'\n+ #end if\n+\t #if $str($rginfo.rg_cn):\n+\t\t --rg-cn \'${rginfo.rg_cn}\'\n+'..b' </when>\n+ <when value="replace">\n+ <repeat name="coinfo" title="comment line" default="0" min="0">\n+ <param name="line" type="text" size="80" />\n+ </repeat>\n+ </when>\n+ <when value="ignore">\n+ </when>\n+ </conditional>\n+ <repeat name="rg_renaming" title="rename read-group" default="0" min="0"\n+ help="Warning: changing read-group IDs may increase job runtime substantially.">\n+ <param name="from" type="text" size="30" label="old name"\n+ help="as it appears in the current input file header"/>\n+ <param name="to" type="text" size="30" label="new name" />\n+ </repeat>\n+ <repeat name="sq_renaming" title="rename sequence" default="0" min="0"\n+ help="Warning: changing sequence names may increase job runtime substantially.">\n+ <param name="from" type="text" size="30" label="old name"\n+ help="as it appears in the current input file header"/>\n+ <param name="to" type="text" size="30" label="new name" />\n+ </repeat>\n+ </inputs>\n+ \n+ <outputs>\n+ <data name="output" format="bam"\n+ label="(Re)headered bam file from MiModd ${tool.name} on ${on_string}" />\n+ </outputs>\n+\n+ <tests>\n+ <test>\n+ <param name="inputfile" value="a.bam" />\n+ <conditional name="co">\n+ <param name="treat_co" value="update" />\n+ <repeat name="coinfo">\n+ <param name="line" value="ceterum censeo ..." />\n+ </repeat>\n+ </conditional>\n+ <output name="output" file="a.bam" ftype="bam" lines_diff="1" />\n+ </test>\n+ <test>\n+ <param name="inputfile" value="header_only.bam" />\n+ <conditional name="rg">\n+ <param name="treat_rg" value="update" />\n+ <conditional name="rginfo">\n+ <param name="source" value="from_form" />\n+ <repeat name="rg">\n+ <param name="source_id" value="000" />\n+ <param name="rg_sm" value="Bristol" />\n+ </repeat>\n+ </conditional>\n+ </conditional>\n+ <output name="output" file="header_only.bam" ftype="bam" lines_diff="2" />\n+ </test>\n+ <test>\n+ <param name="inputfile" value="header_only.bam" />\n+ <conditional name="rg">\n+ <param name="treat_rg" value="update" />\n+ <conditional name="rginfo">\n+ <param name="source" value="from_form" />\n+ <repeat name="rg">\n+ <param name="source_id" value="001" />\n+ <param name="rg_sm" value="Hawaiian" />\n+ </repeat>\n+ </conditional>\n+ </conditional>\n+ <output name="output" file="header_only.bam" ftype="bam" lines_diff="1" />\n+ </test> \n+ </tests>\n+ \n+ <help><![CDATA[\n+.. class:: infomark\n+\n+ **What it does**\n+\n+The tool generates a copy of the BAM input file with a modified header (i.e., metadata). \n+\n+It can update or replace read-group information (i.e., information about the samples in the file), add or replace comment lines, and rename reference sequences declared in the header.\n+\n+The tool ensures that the resulting BAM file is valid and can be further processed by other MiModD tools and standard software like samtools. It aborts with an error message if a valid BAM file cannot be generated with the user-specified settings.\n+ \n+The template information used to modify or replace the input file metadata is provided through forms or, in the case of read-group information, can be taken from an existing SAM file as can be generated, for example, with the *NGS Run Annotation* tool.\n+\n+@HELP_FOOTER@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r aa82b2e54055 sam_header.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_header.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,169 @@
+<tool id="mimodd_header" name="MiModD Run Annotation" version="@MIMODD_WRAPPER_VERSION@">
+    <description>
+    writes run metadata in SAM format for attaching it to sequenced reads data
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+   mimodd header
+     --rg-id '$rg_id'
+     --rg-sm '$rg_sm'
+   #if $str($rg_cn):
+ --rg-cn '$rg_cn'
+   #end if
+   #if $str($rg_ds):
+ --rg-ds '$rg_ds'
+   #end if
+   #if $str($rg_date):
+ --rg-dt '$rg_date'
+   #end if
+   #if $str($rg_lb):
+ --rg-lb '$rg_lb'
+   #end if
+   #if $str($rg_pl):
+ --rg-pl '$rg_pl'
+   #end if
+   #if $str($rg_pi):
+ --rg-pi '$rg_pi'
+   #end if
+   #if $str($rg_pu):
+ --rg-pu '$rg_pu'
+   #end if
+     --ofile '$ofile'
+
+    ]]></command>
+
+    <inputs>
+        <param name="rg_id" type="text" size="80"
+        label="read-group ID (required)">
+            <validator type="empty_field" />
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <param name="rg_sm" type="text" size="80"
+        label="sample name (required)">
+            <validator type="empty_field" />
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <param name="rg_ds" type="text" size="80" label="description">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <param name="rg_date" type="text"
+        label="date (YYYY-MM-DD) the run was produced">
+            <validator type="expression" message="YYYY-MM-DD date format required">not value or (len(value.split('-')) == 3 and all(part.isdigit() and len(part)==expect_len for part, expect_len in zip(value.split('-'), [4,2,2])))</validator>
+        </param>
+        <param name="rg_cn" type="text" size="80"
+        label="name of sequencing center">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <param name="rg_lb" type="text" size="80" label="read-group library">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <param name="rg_pl" type="text"
+        label="platform/technology used to produce the reads">
+            <option value="ILLUMINA">ILLUMINA</option>
+            <option value="PACBIO">PACBIO</option>
+            <option value="IONTORRENT">IONTORRENT</option>
+            <option value="ONT">ONT</option>
+            <option value="LS454">LS454</option>
+            <option value="SOLID">SOLID</option>
+            <option value="HELICOS">HELICOS</option>
+            <option value="CAPILLARY">CAPILLARY</option>
+        </param>
+        <param name="rg_pi" type="text" label="predicted median insert size" />
+        <param name="rg_pu" type="text" size="80"
+        label="platform unit; unique identifier">
+            <sanitizer invalid_char="">
+                <valid initial="string.printable">
+                    <remove value=""" />
+                </valid>
+                <mapping initial="none">
+                    <add source=""" target="\""/>
+                </mapping>
+            </sanitizer>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="ofile" format="sam"
+        label="${rg_sm} (${rg_id}) header information from MiModd ${tool.name} on ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="rg_id" value="000" />
+            <param name="rg_sm" value="N2" />
+            <param name="rg_ds" value="C. elegans wt" />
+            <param name="rg_date" value="2017-06-27" />
+            <param name="rg_cn" value="ABC" />
+            <param name="rg_lb" value="XYZ" />
+            <param name="rg_pl" value="ILLUMINA" />
+            <param name="rg_pi" value="400" />
+            <param name="rg_pu" value="SEQ123" />
+            <output name="ofile" ftype="sam" file="header_only.sam" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+This tool takes user-provided information about a next-generation sequencing
+run and constructs a valid SAM format header from it.
+
+The resulting dataset can be used by the *MiModD Convert*, *MiModD Reheader*
+and the *MiModD Read Alignment* tool to add run metadata to sequenced reads
+input datasets (or to overwrite pre-existing information).
+
+**Note:**
+
+**MiModD requires run metadata for every input file at the Alignment step !**
+
+**Tip:**
+
+While you can do Alignments from fastq file format by providing a custom header file directly to the *SNAP Read Alignment* tool, we **recommend** you to first convert all input files to and archive all datasets in SAM/BAM format with appropriate header information prior to any downstream analysis. Although a bit more time-consuming, this practice protects against information loss and ensures that the input datasets will remain useful for others in the future.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
+

diff -r 000000000000 -r aa82b2e54055 snp_caller_caller.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snp_caller_caller.xml Sat Nov 11 18:19:22 2017 -0500

[

b'@@ -0,0 +1,191 @@\n+<tool id="mimodd_varcall" name="MiModD Variant Calling"\n+version="@MIMODD_WRAPPER_VERSION@">\n+ <description>\n+ generates a BCF file of position-specific variant likelihoods and coverage information based on a reference sequence and reads aligned against it\n+ </description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <macro name="test_mentions_samples">\n+ <assert_stdout>\n+ <has_text_matching expression="000.+N2" />\n+ <has_text_matching expression="266-1.+ot266" />\n+ </assert_stdout>\n+ </macro>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="stdio" />\n+ <expand macro="version_command" />\n+ <command><![CDATA[\n+\tmimodd varcall\n+\t #if str($reference.source) == "cached":\n+ \'$reference.genome.fields.path\'\n+ #else:\n+ \'$reference.genome\'\n+ #end if\n+\t #for $input_file in $list_input\n+ \'$input_file\'\n+ #end for\n+ --index-files \n+ #for $input_file in $list_input\n+ \'${input_file.metadata.bam_index}\'\n+ #end for\n+\t --ofile \'$ofile\'\n+\t $group_by_id\n+\t $adv_settings.md5_check\n+ --max-depth $adv_settings.max_depth\n+ --verbose\n+ --quiet\n+ ]]></command>\n+\n+ <inputs>\n+ <conditional name="reference">\n+ <param name="source" type="select"\n+ label="Will you select a reference genome from your history or use a built-in genome?">\n+ <option value="cached">Use a built-in genome</option>\n+ <option value="history">Use a genome from my history</option>\n+ </param>\n+ <when value="cached">\n+ <param name="genome" type="select"\n+ label="reference genome"\n+ help="The fasta reference genome that variants should be called against.">\n+ <options from_data_table="all_fasta" />\n+ </param>\n+ </when>\n+ <when value="history">\n+ <param name="genome" type="data" format="fasta"\n+ label="reference genome"\n+ help="The fasta reference genome that variants should be called against."/>\n+ </when>\n+ </conditional>\n+ <param name="list_input" type="data" multiple="true" format="bam"\n+ label="Aligned reads input dataset(s)"\n+ help="Select at least one dataset to call variants on. If you select several datasets or a dataset collection, this tool will perform joint variant calling on all of them and produce a single, possibly multisample, output dataset." />\n+ <param name="group_by_id" type="boolean" truevalue="-i" falsevalue="" checked="false" \n+ label="group reads based on read group id only" \n+ help="If selected, this option ensures that only the read group id (but not the sample name) is considered in grouping reads in the input file(s). If turned off, read groups with identical sample names are automatically pooled and analyzed together even if they come from different NGS runs." />\n+ \t<section name="adv_settings" title="More options" expanded="False">\n+ <param name="md5_check" type="boolean" truevalue="" falsevalue="-x" checked="true" \n+ label="md5 sum verification of contigs/chromosomes" \n+ help="leave turned on to avoid accidental variant calling against a wrong reference genome version (see the tool help below)." />\n+ <param name="max_depth" type="integer" value="250" min="0"\n+ label="average sample depth cap limit (default: 250)" \n+ help="only relevant for very large sample numbers and/or very high sample coverage; increase to use more of the data, decrease to save memory"/>\n+ </section>\n+ </inputs>\n+\n+ <outputs>\n+ <data name="ofile" format="bcf" \n+ label="Variant Calls from MiModd Variant Calling on ${on_string}">\n+ <actions>\n+ <conditional name="re'..b' <param name="genome" value="a.fa" />\n+ </conditional>\n+ <param name="list_input" value="a.bam" />\n+ <expand macro="test_mentions_samples" />\n+ </test>\n+ <test>\n+ <conditional name="reference">\n+ <param name="source" value="history" />\n+ <param name="genome" value="a.fa" />\n+ </conditional>\n+ <param name="list_input" value="a_part1.bam,a_part2.bam" />\n+ <expand macro="test_mentions_samples" />\n+ </test>\n+ <test>\n+ <conditional name="reference">\n+ <param name="source" value="history" />\n+ <param name="genome" value="a.fa" />\n+ </conditional>\n+ <param name="list_input" value="a.bam" />\n+ <param name="group_by_id" value="true" />\n+ <section name="adv_settings">\n+ <param name="md5_check" value="false" />\n+ <param name="max_depth" value="1000" />\n+ </section>\n+ <assert_command>\n+ <has_text text="-i" />\n+ <has_text text="-x" />\n+ <has_text text="--max-depth 1000" />\n+ </assert_command>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+.. class:: infomark\n+\n+ **What it does**\n+\n+The tool transforms the read-centered information in the aligned reads input\n+datasets into position-centered information including variant call statistics\n+(using samtools mpileup and bcftools internally).\n+\n+**It produces a BCF file that serves as the basis for all further variant\n+analyses with MiModD**.\n+\n+-----\n+\n+**Notes on Advanced Settings:**\n+\n+**MD5 checksums**\n+\n+By default, the tool will check whether the input BAM dataset(s) provide(s) MD5\n+checksums for the reference genome contig/chromosome sequences used during read\n+alignment (e.g., the *MiModD Read Alignment* tool stores these in the BAM file\n+header). If it finds MD5 sums for all sequences, it will compare them to the\n+checksums of the reference genome sequences used in the current tool run and\n+abort with an error message if there is a discrepancy between them. If it finds\n+contigs/chromosomes with matching checksum, but different names in the aligned\n+reads dataset(s) and the reference genome dataset, it will use the name from\n+the reference genome in its output.\n+\n+This behavior has two benefits:\n+\n+1) It protects from accidental variant calling against a wrong reference genome\n+(*i.e.*, a different one than that used during the alignment step), which would\n+result in wrong calls. This is the primary reason why we recommend to leave the\n+check activated.\n+\n+2) It provides an opportunity to change sequence names between aligned reads\n+files and variant call files by providing a reference genome file with altered\n+sequence names (but identical sequence data).\n+\n+Since there may be rare cases where you *really* want to align against a\n+reference genome with different checksums (e.g., you may have edited the\n+reference sequence based on the alignment results), the check can be turned\n+off, but only do this if you know *exactly* why.\n+\n+\n+**Average sample depth cap limit**\n+\n+For each of a total of ``M`` BAM input datasets, the tool will only pile up a\n+maximum number of reads ``N`` per position to avoid excessive memory usage with\n+very large numbers of samples sequenced at high coverage.\n+N will be calculated as the maximum of ``8000/M`` and ``DEPTH*S``, where ``S``\n+is the maximum number of samples found in a single input dataset and ``DEPTH``\n+is the *average sample depth cap limit* specified in the tool form.\n+\n+This parameter, thus sets the average depth of the pile-up per sample that is\n+guaranteed to be used even when there is a very large number of samples. As can\n+be seen from the formula above, however, it will rarely become relevant for any\n+regular-size analysis.\n+\n+\n+@HELP_FOOTER@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r aa82b2e54055 test-data/a.bam

Binary file test-data/a.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/a.bcf

Binary file test-data/a.bcf has changed

diff -r 000000000000 -r aa82b2e54055 test-data/a.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/a.fa Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,3289 @@\n+>chrI\n+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc\n+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct\n+aagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa\n+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc\n+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct\n+aagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaa\n+gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc\n+ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct\n+aagcctaagcctaagcctaagcctaagcctaaaaaattgagataagaaaa\n+cattttactttttcaaaattgttttcatgctaaattcaaaacgttttttt\n+tttagtgaagcttctagatatttggcgggtacctctaattttgcctgcct\n+gccaacctatatgctcctgtgtttaggcctaatactaagcctaagcctaa\n+gcctaatactaagcctaagcctaagactaagcctaatactaagcctaagc\n+ctaagactaagcctaagactaagcctaagactaagcctaatactaagcct\n+aagcctaagactaagcctaagcctaatactaagcctaagcctaagactaa\n+gcctaatactaagcctaagcctaagactaagcctaagactaagcctaaga\n+ctaagcctaatactaagcctaagcctaagactaagcctaagcctaaaaga\n+atatggtagctacagaaacggtagtacactcttctgaaaatacaaaaaat\n+ttgcaatttttatagctagggcactttttgtctgcccaaatataggcaac\n+caaaaataattgccaagtttttaatgatttgttgcatattgaaaaaaaca\n+tttttcgggttttttgaaatgaatatcgtagctacagaaacggttgtgca\n+ctcatctgaaagtttgtttttcttgttttcttgcactttgtgcagaattc\n+ttgattcttgattcttgcagaaatttgcaagaaaattcgcaagaaatttg\n+tattaaaaactgttcaaaatttttggaaattagtttaaaaatctcacatt\n+ttttttagaaaaattatttttaagaatttttcattttaggaatattgtta\n+tttcagaaaatagctaaatgtgatttctgtaattttgcctgccaaattcg\n+tgaaatgcaataaaaatctaatatccctcatcagtgcgatttccgaatca\n+gtatatttttacgtaatagcttctttgacatcaataagtatttgcctata\n+tgactttagacttgaaattggctattaatgccaatttcatgatatctagc\n+cactttagtataattgtttttagtttttggcaaaactattgtctaaacag\n+atattcgtgttttcaagaaatttttcatggtttttcttggtcttttcttg\n+gtatttttttgacaaaaatttttgtttcttgattcttgcaaaaatttttc\n+cgtttgacggccttgatgtgcactaccttcgcttaaatactacattttct\n+gaaaatgttataatagtgttcattgtttcatacaaatacttatttaatag\n+tatttctggttatataatttgtataaaaagtggttgacataacaaggctg\n+acgaaactttgtgatggctgaaaatattttcctagctttattgattttta\n+tttatacgtgtttgaataacttggccaaatcgccgagaaggaatagaata\n+ctggacgacattgtacatattttccaaaaaatcagaaagtagatgacggg\n+accaattctttctgtcaggttttacaaccgcccagtgcgtctacgtcaca\n+tgttgtataaatggttgtaaacaatatgcggaaacaatcaaatgcattcc\n+cataaggcataatatagaggctacaggcaatgagtatcgctctttgcttt\n+gtttaaagggggagtagagtttgtggggaaatatatgtttctgactctaa\n+ttttgcccctgataccgaatatcgatgtgaaaaaatttaaaaaaatttcc\n+ctgattttatattaatttttaaaatccgaaaatccattggatgcctatat\n+gtgagtttttaaacgcaaaattttcccggcagagacgccccgcccacgaa\n+accgtgccgcacgtgtgggtttacgagctgaatattttccttctattttt\n+atttgattttataccgattttcgtcgatttttctcattttttctcttttt\n+tttggtgttttttattgaaaattttgtgattttcgtaaatttattcctat\n+ttattaataaaaacaaaaacaattccattaaatatcccattttcagcgca\n+aaatcgactggagactaggaaaatcgtctggagatagaacggatcaacaa\n+gattattattatatcattaataatatttatcaattttcttctgagagtct\n+cattgagactcttatttacgccaagaaataaatttaacattaaaattgtt\n+catttttgaaaaaaaaataattaaaaaaacacattttttggaaaaaaaaa\n+taaataaaaaaaattgtcctcgaggatcctccggagcgcgtcgaatcaat\n+gtttccggaactctgaaaattaaatgtttgtatgattgtagaaccctttc\n+gctattgagatttgataacttttaagtaataaaattttcgcagtaagaca\n+ttaaaacatttcacaattaagctggttctgaactgtgtgaagtatattga\n+aaaaaactaactgatacaaaaatataattttatgatagttttctggatgt\n+cccaatataaacgatgtcaattctgcgacatgctacagtcatccacgaaa\n+gtaacccgaataccgacaaaagaagaggaacgccaactttggatagacgc\n+tctaggggctgattttggtcggaaaatagtcgggaaaaaatagaggacat\n+tacagatgaggatgaggatgaagatagaaatttgccgacaacttcgtcat\n+gccgctgatttttttgatgttctacgcttaaattttcagcgaacgaacta\n+ttttttatattttgattgtttttaaataatatttgccataagaaattctc\n+acttttccaggaaacgtcgtttcgccgcgattttcctcgtctccagtcga\n+ttttgcgctgaaaatgggatatttaatggaattgtttttgtttttattaa\n+taaataggaataaatttacgaaaatcacaaaattttcaataaaaaacacc\n+aaaaaaaaagagaaaaaatgagaaaaatcgacgaaaatcggtataaaatc\n+aaataaaaatagaaggaaaatattcagctcgtaaacccgcaagtgcggca\n+cggtttcgtgggcggggcgtctctggcgggaaaattttgcgtttgaaaac\n+tcacatataggcatccaatggattttcggattttcaaaattaatataaaa\n+tcagggaaatttttttaaattttgtcacatcgatattcggtatcaggggc\n+aaaattagagtcagaaacatatatttccccacaaactctactcccccttt\n+aacaaccacccgaggatatattcgacaaacgatctatctactaggaataa\n+ctcgattattgacatattatagacttcttttagtatttgtaaaatagagg\n+atcagacccaaaattcagcccgcgaaggcatgacgtcagcgcgaggcagt\n+agtttccagaagaactctgt'..b'\n+ggttacaggacatcagtgatattgaagatatgaatatagagatattcctg\n+ggttagaatttgactcttatataaaatcactagatcaattaagtttaggt\n+gaaccacgtttattagaagttgataatcgttgtgttattccttgtgatac\n+taacattcgtttttgtattacatctgctgatgttattcatgcttgggcat\n+taaattctttatctgtaaaattagatgctataagaggaattttaagaaca\n+tttagttacaggtttcctatagtgggagttttttatggtcaatgttcaga\n+gatttgtggagcaaatcatagttttataccaattgctttagaggtaacat\n+tattggataattttaaaagatgatgttttggtactatagaataatttagc\n+ttaatagtttatattaaaatgtttacttgtggtgtaagagaatatagagc\n+tttaaattttacttgtttaaatattggtattgcatactattacaataaaa\n+tttcatgttaatgaaaaatagaaacaaagggtagagtaaatattagtttt\n+attgtttcatactaaaaattatatttattagagttgatatgtcgaccttt\n+gtgataactgtttttatttttatattagaaaattatatattatataatta\n+ttttaggaaatttaaaatttgaagtgttttaaatttatgttttacaacat\n+tttcctaattttatttaagtttaatttttaatttaataaagttttattaa\n+ataaataatttgtaaattagtaaattttataaatttaatttattattaaa\n+atataattgaagaacttgaagtcttgatcaaatgttttttaaagacttag\n+gctttatattaaagctggcttctgccctatgatatttaaatggcagtctt\n+agcgtgaggacattaaggtagcaaaataatttgtgcttttattgagttcc\n+agtatgaatgaagttattggttagttctatttatgttttatgtttgaatt\n+taatttttatttaagaaaaaataaatatatttatacaaagataagtcttc\n+ggaaattctgttattacacaattaaataattgtgtaataaattttctagg\n+gcagaatattatataatagtatttcactatatttaatttaaagaattact\n+ccggagttaacagaaaatcatacctaatctagtacttatagtaaggtaag\n+ttttacatcgatgttgtattcagataatctaagagaggagaaggcttagt\n+agtttagactgttcttctattaattaatctgacgtgatattagtttaatt\n+cattgtgagatagaattgtttatcttgataaatatttatatttaatacat\n+ttagtacgaaaggaacattgtaaaagttttaaactttaaagattttgaaa\n+tctttattttagtgctattaatagttttagtgtttacgctagttttatta\n+tttgctttttatttgattaattttttattaagaattaaggatataggaaa\n+aaataaaattagagcgtttgaatgtggttttgtaagagttggaaaaattc\n+aaaattcttttagaattcatttttttattatgatattgatatttgttatt\n+tttgatttagaaattgttatgtttttaggtattttagtatcagatttaag\n+ttcgtatatcaggtttttaataatattcatcttcatcttgggaggatttt\n+acatagagtgatgatatggtaaattagtttgagtaatttaattaatattt\n+ctatttttttgattggatttgttttttttataggtggaattagtgtttgg\n+cttatacccacatttaaattaggaatcttttttttagaatgagatttttt\n+aaggttaaaatttaatttttattttaatagaatcttattttcgtttattc\n+tttttttggtaacgtttagagttttagtttttagtacttattatttaaat\n+agtgagttaaactttaattattattattttgtattgttaattttcgtagg\n+tagaatgtttaggctaaattttagaaacagtatttttacaatgttactaa\n+gatgagatttattgggtatttctaggttttttttagttttattttataat\n+aattgagatagatgtaggggtgcaataaatacagcattaactaatcgtct\n+aggtgattattttatatttgtcttttttggtttatcggtttttagaggtt\n+attattttttaagatttagaatatttagaagttatatatctttattatta\n+cttttaacagcttttactaaaagagcacaatttccatttagatcttggtt\n+acccaaagctataagagcccccacaccggtgaggtctttggttcatagta\n+gaactttagttacagcaggattaattctattaataaattttaataattta\n+gtaatacagaaagattttatcagttttgttctgattattggcctatttac\n+tatatttttttctagcttagcaagtttggtcgaagaagatttgaagaagg\n+tggtagccttgaggacactttcacaaataggtttttcaatagttactttg\n+ggcctagggcttaggtttatttcatttattcatttagttagacatgcttt\n+gttcaaaagatgtttatttatacaagtaggttacattattcattgttcat\n+ttggacaacaagatggacgtaattataggaataatggtaatttaccaaat\n+tttattcagttacaaatattagtaaccctattttgtttatgtggattaat\n+tttctcaaggggtgctgtaagaaaagattttattttagaattattttttt\n+ctaataactatataatgttttttagattaatattttttgtttcagtgttt\n+ttgacttttggttacagttttcgtctttgaaaaagattttttttaaggtt\n+taataaagtaataaatcattacagtagcacagtatttataaattttttaa\n+gtttagtattagttattttttctattagatttttatgatgaataaatttt\n+aatcttcttaacatcccaagacttttcttatacgtagatttttttggccc\n+tctagtatttttatttataataatttttttatcttttttaattttaaaaa\n+tattatttaaggagttaatatacaagtttttagttgattatttggctaaa\n+aatagtatttataaaataaagaatttaaaatttatagatttatttttaaa\n+taatattaattctaaggggtacaccttatttttaagcagtggtatattta\n+aaaattactatttaaaaaggttaaattttaatagtgtagtagttttaatt\n+tttattttttttataatttgttaagggattttagtttaataaaaatatat\n+gttttgcatacataagataataattctagatagttttacacgcgcgtata\n+cgcgcgttataaatatatatatatatatatatatatatatatatatataa\n+taataatattatatttatattataaatataatatttattataaattatat\n+attatatttatattataaatataatatttattataaattatatattatat\n+ttatattataaatataatatttattataaattatatattatatttatatt\n+ataaatataatatttattataaattatatattatatttatattataaata\n+taatatttattatatattatatatattatatttatattataaatataata\n+tttattattaataattcatcatatttatattataaatatgatgaagtact\n+aaaaaaaagatgaatattctataatatatttagatatattatagagtatt\n+tatcttattatttatagatatatactttgtatatatctatatta\n'

diff -r 000000000000 -r aa82b2e54055 test-data/a.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/a.sam Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,824 @@\n+@HD\tVN:1.5\tSO:coordinate\n+@RG\tID:000\tSM:N2\n+@RG\tID:266-1\tSM:ot266\tDS:mutant strain carrying the vab-3 allele ot266\tCN:Columbia University\tPL:Illumina\n+@SQ\tSN:chrI\tLN:25050\tM5:7d9f340160ce45568f9bf48d8e16dcf4\n+@SQ\tSN:chrII\tLN:25050\tM5:ba5a89bf12f55a4bd88ddf14e6d27bd6\n+@SQ\tSN:chrIII\tLN:25050\tM5:ba78739f84b6c850448bec286bdd8798\n+@SQ\tSN:chrIV\tLN:25050\tM5:0807d4df5c3593363a5b015f19ef81c2\n+@SQ\tSN:chrV\tLN:25050\tM5:cf942c1556ef72fd9ba352e4a7d9cf9e\n+@SQ\tSN:chrX\tLN:25050\tM5:5ce1b12d74140cfc288b493c16fe280b\n+@SQ\tSN:MtDNA\tLN:13794\tM5:199e147d502d88e45047413dc83c039c\n+@PG\tID:SNAP\tPN:MiModD snap\tVN:0.1.8.0\tCL:paired /var/tmp/batch_dataset_548966_dataset_35_20_0.3 /var/tmp/batch_dataset_54896612531.sam -d 8 -S 200 -n 25 -a 7 -F s -o /var/tmp/batch_dataset_54896615435.tmp -h 250 -c 2 -C++ -M -s 100 10000 -t 2\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1314:19049:65217\t99\tchrI\t556\t60\t100M\t=\t702\t246\tCCTATATGCTCCTGTGTTTAGGCCTAATACTAAGCCTAAGCCTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAGCCTAAG\tCCCFFFFDHHHHHJJJJJJJIJJJJJJIIIIIJIJJJJJGIIJIIHIJIJJHIJIJJHIJJIIJJJIIIIGGGIJHHHHHFFEFFFEECEEDDCACDDC?\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+EAGLE:1:56:17087:17617/1\t0\tchrI\t575\t60\t101M\t*\t0\t0\tAGGCCTAATACTAAGCCTAAGCCTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGACTAAGCC\tGGFGGGDGFGEFBGFGFGGGGGGFGGEGGGGGGGGGGGGGGGGGEGGFFEFGGFFGEAFGFDEGGGGGGGEEFDDEEEEEGFGGEGEDBEEAEDEECFEBC\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1314:19049:65217\t147\tchrI\t702\t60\t100M\t=\t556\t-246\tAGCCTAAGACTAAGCCTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGACTAAGCCTAAGAC\tCCC>AA>;(CCC;C@3HEEEGCIGED=EGGGJHGJIGGCIJIIDFGGIGDIHJIHEGIHJIIEGGIIGJJJIGIGJIJJJIGDIHGIHHFHGFFFFFCB@\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+EAGLE:2:81:6795:13435/1\t16\tchrI\t842\t60\t101M\t*\t0\t0\tTTGAAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAAGTTGCAATTTTTTTAGCTAGGACACTTTTTGTCTGCCCAAATA\tA:@EEEEEBEEEEEEEEEGEEEEGEEEEEEEED=BFEEEGEEGEGFGGGEEGGGGGGGGFGGGFDDAGG=GGEGGGFFFEFEECBFDGGEGDGDGGGFGEG\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:8\n+EAGLE:1:46:9721:15037/1\t16\tchrI\t842\t60\t101M\t*\t0\t0\tTTGAAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTTTAGCTAGGACACTTTTTGTCTGCCCAAATA\tAEEBEBBEEEAAEFEFFFE?FCCCBC?DDBD5DD=D:DDC:CC?C>6EB@CEEEDDEECEEEEE8BFFEEFEEADDCCC@CBCCEEFDFFAEFGFDFGGGD\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:7\n+EAGLE:2:77:15674:6412/1\t0\tchrI\t844\t60\t101M\t*\t0\t0\tGAAATGAATATCGTAGCTACAGAAACGGCAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTATAGCTAGGACACTTTTTGTCTGCCCAAACATA\tGFFGGGCGDFGEGDFEEFEGGFEGFEEEACC?BCCDDCDECBECDEFEECEEDEEEEEEEEBEEEEEEAEEEEE=AC:=CBBCCCC@::?CACC@BBBABE\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:6\n+EAGLE:2:88:10221:3318/1\t16\tchrI\t845\t60\t101M\t*\t0\t0\tAAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTTTAGCTAGGACACTTTTTGTCTGCCCATATATAG\tAEBABBEEBGEGEBGGGEEEEGEEGGEEGGCEEEFEFEEBGGFDEGAEFGGEDFF=EEGEGGGGGG?DBDFGGFGGDFECCFDDCGEEEE:AG?GFFGDGG\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:5\n+EAGLE:2:87:4119:14575/1\t16\tchrI\t845\t60\t101M\t*\t0\t0\tAAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTATAGCTAGGACACTTTTTGTCTGCCCAAATTTAA\tGEEGEDEEGEGGEAEFEB?DEEDBGGBGGFGEFBGFGGBGGGGGFGGEGFGGGGGGGGGGGGFEFAGGGGEGGDGGGGDFGGBFGGGGDFDGGDGGGGGEG\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:5\n+EAGLE:2:95:9634:15406/1\t16\tchrI\t845\t60\t101M\t*\t0\t0\tAAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTTTAGCTAGGACACTTTTTGTCTGCCCAAATATAA\tFEEE?GEFEBE:ECE=BEGGGEGEGBFFFEFAFDFFGEBBGGGFGDGEGGGEEEEE5FFFFCFGAEGGEGGGGGGGFGDGGGFGGGGGGGFGDFFFGGGFD\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:5\n+BS-DSFCONTROL03:121:D0M8KACXX:5:2113:3662:39793\t73\tchrI\t846\t60\t100M\t=\t846\t0\tAATGAATATCGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAATTTGCAATTTTTATAGCTAGGACACTTTTTGTCTGCCCAAATTTAA\tCCCFFFFFHHHHHJJJJIIIIJIJJJFGIGIIJJIGIJIJJGEHIHJHEHHHJIIJJJJJIJJIGIJIHHHHHHEFFFFECEEDBDDCCDDDDD?CDDDC\tPG:Z:SNAP\tRG:Z:000\tNM:i:5\n+BS-DSFCONTROL03:121:D0M8KACXX:5:2113:3662:39793\t133\tchrI\t846\t0\t*\t=\t846\t0\tGTGTACTACCGTTTCTGGAGCTACGATTTATTTTTCAAAACACGCAAAAAGTGTTTTTCACTATGTACAAAATCATCTCAAAATCGGAAATATTTTTTGT\t<<<@@@@@@@@@@???#################################################################################'..b"FCCE>FDEDEAAGAGCCEEGEEGGFEFEFEGD@EFADEFCC>E?E=BBBAEBF\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+EAGLE:2:35:14999:3587/1\t0\tMtDNA\t12745\t60\t101M\t*\t0\t0\tTTTTTTCTAATAACTATATAATGTTTTTTAGATTAATATTTTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTC\tDEFFFFFFFFBFD?FFFBFFFFFAFEEBECFDFFFFFDAFFFFFFCFFFBECECEFFEC?FBFFFDDEE??CDBEFFFFEEFFFFABFFFE8CFFFFFFE;\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:1\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1101:18737:28666\t99\tMtDNA\t12755\t60\t100M\t=\t12783\t128\tTAACTATATAATGTTTTTTAGATTAATATTTTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTAAGGTTTAAT\tCCCFFFFEHHHHHJJJJJJGJGHJJJIGIIJJJJJHIJIIIGHIJJJJJHIIJJJJJIJJJJJIGHHHHFFFFEECECEDDBDDDDDDDDBCCDCCDDDA\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+EAGLE:1:53:16100:4061/1\t0\tMtDNA\t12755\t60\t101M\t*\t0\t0\tTAACTATATAATGTTTTTTAGATTAATATTTTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTAAGGTTTAATA\tFFDFFDEFDFAFE=FDFE@AFB?DFFF5FF?D@<@ACAEEE:ECEEEAA=FFDFDFADDFEEE@EEEEE@AABBB=AEEEDCCFDBEFE@@@DBADEDBBF\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1303:13602:38947\t147\tMtDNA\t12781\t60\t100M\t=\t12495\t-386\tTATTTTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTAAGGTTTAATAAAGTAATAAATCATTACAGTAGCAC\t:?BBBB?CC@5CCCA9'=>>5(;>C;;A>>?;B@BFEFHEBG@=@>FB;BCGB?<GDEDC9<F<EEA<4D>HEGJHE<C<?<?A<2,,?B=2A4DDD8@?\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1101:18737:28666\t147\tMtDNA\t12783\t60\t100M\t=\t12755\t-128\tTTTTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTAAGGTTTAATAAAGTAATAAATCATTACAGTAGCACAG\tBDBDDDDCCDDDDBBDDDDEDCDDDDEEEFFFFFFHHFIJJJJJJJJIIHJIIJJJJJJJHGJJJJIJIJJIJJJJIJJJJJJJJJJGHHHHFFFDFCCC\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+EAGLE:2:109:6839:9395/1\t16\tMtDNA\t12786\t60\t101M\t*\t0\t0\tTTTGTTTCAGTGTTTTTGACTTTTGGTTACAGTTTTCGTCTTTGAAAAAGATTTTTTTTAAGGTTTAATAAAGTAATAAATCATTACAGTAGCACAGTATT\tGGGGGFEFFGGGGGGDGGDGFDGGGGGGFGGGGGGFFGGGFGGGGGGGGGGGGGGGGGGFBGGGGGDGGGGGGGGGGGGGGFGGGGGGGGEGGFGGGGGGG\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+EAGLE:2:89:16184:15896/1\t16\tMtDNA\t12850\t60\t101M\t*\t0\t0\tTTAATAAAGTAATAAATCATTACAGTAGCACAGTATTTATAAATTTTTTAAGTTTAGTATTAGTTATTTTTTCTATTAGATTTTTATGATGAATAAATTTT\tEEEEFEEDFEEEGEBEEGEEBEGEEFGEGEGDGBFGFGGFGDFGEGGFEGGGFGEGGGGGFFGCEGGGGGGDGFFGDEGDGGGEGFDGGGGEGFGFGGGGG\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+EAGLE:2:73:18789:5004/1\t0\tMtDNA\t12862\t60\t97M1I3M\t*\t0\t0\tTAAATCATTACAGTAGCACAGTATTTATAAATTTTTTAAGTTTAGTATTAGTTATTTTTTCTATTAGATTTTTATGATGAATAAATTTTAATCTAATAGAA\tGFEGFGGF=FEADEEEEEEDEEDEEFFFFFGGGGGGDGGGEGDFABE:EEBA=BBFFCCCGE@FF?BBBCACCB8>BEEEFFFGGFFGGE?EEEBD5?BBE\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:4\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1114:11635:22812\t99\tMtDNA\t12940\t60\t100M\t=\t13083\t243\tGAATAAATTTTAATCTTCTTAACATCCCAAGACTTTTCTTATACGTAGATTTTTTTGGTCCTCTAGTATTTTTATTTATAATAATTTTTTTATCTTTTTT\tCCCFFFFFHHHHHJJJJIJJJJJJJIJJJJJJJJJJJJJJJJIIJHIJIJGHJJJJJIHHIJJJFHEHHHHHFFFFFFFEEFEEEFEDDDDDDDDDDDD<\tPG:Z:SNAP\tRG:Z:000\tNM:i:1\n+EAGLE:1:50:13913:15312/1\t16\tMtDNA\t13005\t60\t101M\t*\t0\t0\tGTATTTTTATTTATAATAATTTTTTTATCTTTTTTAATTTTAAAAATATTATTTAAGGAGTTAATATACAAGTTTTTAGTTGATTATTTGGCTAAAAATAG\t?>:>;::BCBB::FFF@@8<96BABC=@DFFBF=EDFDEFCAE=?EE?FAFEEC?=EEEC=FDEEBEEDAFF?EFFEBEDDEEEEBCDDD?E@<DDDEECE\tPG:Z:SNAP\tRG:Z:266-1\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:1114:11635:22812\t147\tMtDNA\t13083\t60\t100M\t=\t12940\t-243\tGTTGATTATTTGGCTAAAAATAGTATTTATAAAATAAAGAATTTAAAATTTATAGATTTATTTTTAAATAATATTAATTCTAAGGGGTACACCTTATTTT\t@CFEDEFFFHGHGIJJJJJJJIHEGJJIHCJJJJJJJJJJJJJJJJJJJJJJJIJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHDHFFFFFCC@\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:2115:12001:40456\t99\tMtDNA\t13159\t60\t100M\t=\t13242\t183\tATTCTAAGGGGTACACCTTATTTTTAAGCAGTGGTATATTTAAAAATTACTATTTAAAAAGGTTAAATTTTAATAGTGTAGTAGTTTTAATTTTTATTTT\t@@@FFFFDFFHDFGGEB;AB<EHGGGBH@FH?CC?F@GGIII>DGHGEDBDHHEDHGHIIGCHGI@@FGHIHGGIC==EC>)=A?CD>DDDEEECAACDD\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n+BS-DSFCONTROL03:121:D0M8KACXX:5:2115:12001:40456\t147\tMtDNA\t13242\t60\t100M\t=\t13159\t-183\tGTTTTAATTTTTATTTTTTTTATAATTTGTTAAGGGATTTTAGTTTAATAAAAATATATGTTTTGCATACATAAGATAATAATTCTAGATAGTTTTACAC\t@DDDDDDDCACDDDFGJIIJIJIIG@HIIJIHIHJJIIGJJIHGGIEIIGIIIIJIGIJJJJIGGCIIIGJIJJJIJJJIHIIJJJJHHFDGFFDDD@@@\tPG:Z:SNAP\tRG:Z:000\tNM:i:0\n"

diff -r 000000000000 -r aa82b2e54055 test-data/a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/a.vcf Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,192 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##reference=/home/wolma/galaxy_2017/database/files/000/dataset_35.dat\n+##contig=<ID=chrI,length=25050>\n+##contig=<ID=chrII,length=25050>\n+##contig=<ID=chrIII,length=25050>\n+##contig=<ID=chrIV,length=25050>\n+##contig=<ID=chrV,length=25050>\n+##contig=<ID=chrX,length=25050>\n+##contig=<ID=MtDNA,length=13794>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=ICB,Number=1,Type=Float,Description="Inbreeding Coefficient Binomial test (bigger is better)">\n+##INFO=<ID=HOB,Number=1,Type=Float,Description="Bias in the number of HOMs number (smaller is better)">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled Genotype Quality">\n+##source=MiModD varcall (version 0.1.8.0)\n+##MiModDCommand=mimodd varcall /home/wolma/galaxy_2017/database/files/000/dataset_35.dat /home/wolma/galaxy_2017/database/files/000/dataset_54.dat --index-files /home/wolma/galaxy_2017/database/files/_metadata_files/000/metadata_14.dat --ofile /home/wolma/galaxy_2017/database/files/000/dataset_58.dat --depth 250 --verbose --quiet\n+##samtoolsCommand=n.a. (wrapped by MiModD)\n+##bcftools_callCommand=n.a. (wrapped by MiModD)\n+##rginfo=<ID=0,Rgid="000",Name="N2">\n+##rginfo=<ID=1,Rgid="266-1",Name="ot266",Description="mutant strain carrying the vab-3 allele ot266">\n+##samtoolsVersion=1.2+htslib-1.2.1\n+##bcftools_callVersion=1.2+htslib-1.2.1\n+##bcftools_concatVersion=1.2+htslib-1.2.1\n+##bcftools_viewVersion=1.2+htslib-1.2.1\n+##bcftools_viewCommand=view /home/wolma/galaxy_2017/database/files/000/dataset_58.dat\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tN2\tot266\n+chrI\t855\t.\tG\tC\t231.0\t.\tDP=27;VDB=2.48853e-13;SGB=13.8822;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,11,13;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:255,72,0:24:0,24:86\n+chrI\t885\t.\tt\ttTCT\t69.0\t.\tINDEL;IDV=4;IMF=0.0769231;DP=56;VDB=2.27029e-09;SGB=1.27626;MQSB=1;MQ0F=0;ICB=0.3;HOB=0.125;AC=1;AN=4;DP4=16,17,3,1;MQ=60\tGT:PL:DP:DPR:GQ\t0/0:0,3,60:1:1,0:11\t0/1:105,0,255:36:32,4:96\n+chrI\t912\t.\tA\tT\t195.0\t'..b',0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t1622\t.\tG\tT\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t1648\t.\tA\tC\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:37,3,0:1:0,1:4\n+chrX\t1650\t.\tT\tA\t10.6576\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:35,3,0:1:0,1:4\n+chrX\t6015\t.\tA\tT\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:38,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6032\t.\tG\tA\t8.01478\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:32,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6058\t.\tta\tt\t15.3937\t.\tINDEL;IDV=1;IMF=1;DP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:40,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6118\t.\tA\tG\t15.394\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:40,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6143\t.\tG\tA\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:37,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6984\t.\tT\tA\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t7025\t.\tA\tG\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:37,3,0:1:0,1:4\n+chrX\t10527\t.\tc\tcC\t8.86989\t.\tINDEL;IDV=1;IMF=1;DP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:33,3,0:1:0,1:4\n+chrX\t10535\t.\tT\tC\t11.8912\t.\tDP=2;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:36,3,0:1:0,1:6\n+chrX\t10562\t.\tC\tG\t18.2649\t.\tDP=7;VDB=0.18;SGB=0.00473136;RPB=0.8;MQB=1;MQSB=1;BQB=0.9;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=4,1,2,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:49,0,131:7:5,2:48\n+chrX\t10595\t.\tT\tG\t148.0\t.\tDP=7;VDB=0.00429645;SGB=2.55596;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,6,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:172,21,0:7:0,7:29\n+chrX\t10629\t.\tgt\tg\t45.8065\t.\tINDEL;IDV=4;IMF=0.666667;DP=6;VDB=0.14;SGB=0.0985265;MQ0F=0;AC=2;AN=2;DP4=0,0,2,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:70,6,0:2:0,2:13\n+chrX\t10630\t.\tT\tC,G\t10.4616\t.\tDP=2;VDB=0.32;SGB=0.0985265;MQSB=1;MQ0F=0;AC=1,1;AN=2;DP4=0,0,1,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0,0,0,0:0:0,0,0:0\t1/2:58,29,26,32,0,29:2:0,1,1:25\n+chrX\t10643\t.\tG\tT\t34.5919\t.\tDP=3;VDB=0.02;SGB=0.0985265;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,1,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:59,6,0:2:0,2:11\n+chrX\t15480\t.\tT\tC\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t15495\t.\tG\tC\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t24890\t.\tC\tG\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t24935\t.\tA\tC\t11.5799\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:36,3,0:1:0,1:4\n+MtDNA\t954\t.\tG\tT\t4.79534\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=0.5;HOB=0.5;AC=2;AN=4;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t0/1:36,3,0:1:0,1:3\t0/1:0,3,33:1:1,0:3\n+MtDNA\t5979\t.\tC\tA\t7.18923\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:31,3,0:1:0,1:4\n+MtDNA\t8429\t.\tA\tG\t38.5774\t.\tDP=2;VDB=0.36;SGB=-0.759771;MQSB=1;MQ0F=0;AC=4;AN=4;DP4=0,0,1,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:26,3,0:1:0,1:9\t1/1:37,3,0:1:0,1:9\n+MtDNA\t12134\t.\tA\tG\t6.22224\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=0.5;HOB=0.5;AC=2;AN=4;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t0/1:0,3,41:1:1,0:3\t0/1:38,3,0:1:0,1:3\n+MtDNA\t12637\t.\tG\tT\t8.72328\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=0.5;HOB=0.5;AC=2;AN=4;DP4=1,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t0/1:41,3,0:1:0,1:3\t0/1:0,3,25:1:1,0:3\n+MtDNA\t12998\t.\tC\tT\t14.4264\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:39,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n'

diff -r 000000000000 -r aa82b2e54055 test-data/a_part1.bam

Binary file test-data/a_part1.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/a_part2.bam

Binary file test-data/a_part2.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/a_part2.bcf

Binary file test-data/a_part2.bcf has changed

diff -r 000000000000 -r aa82b2e54055 test-data/ce11ToCe10.over.chain
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ce11ToCe10.over.chain Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,871 @@\n+chain 1420037770 chrI 15072434 + 0 15072434 chrI 15072423 + 0 15072423 5\n+288367\t0\t1\n+49582\t0\t1\n+3348\t0\t1\n+884584\t1\t0\n+731267\t0\t1\n+203378\t1\t0\n+1324625\t0\t1\n+399852\t13\t13\n+122370\t1\t0\n+213502\t1\t0\n+782497\t1\t0\n+39234\t0\t3\n+867656\t1\t0\n+595979\t1\t0\n+295566\t2\t0\n+147478\t1\t1\n+23\t1\t1\n+191487\t13\t13\n+8983\t1\t0\n+213892\t1\t0\n+58238\t1\t0\n+125173\t11\t11\n+206247\t1\t0\n+126100\t1\t1\n+46\t1\t1\n+281857\t1\t0\n+118832\t1\t0\n+67661\t0\t1\n+470697\t1\t0\n+457535\t1\t0\n+70047\t4\t4\n+38252\t0\t1\n+84452\t1\t0\n+164033\t1\t0\n+319488\t1\t0\n+78543\t1\t0\n+82353\t0\t1\n+129773\t0\t1\n+246657\t1\t0\n+5362\t1\t0\n+354010\t0\t4\n+296600\t1\t0\n+240963\t1\t0\n+37688\t1\t0\n+81200\t0\t1\n+883251\t1\t0\n+528183\t1\t0\n+98784\t1\t1\n+39\t1\t1\n+65027\t1\t0\n+20874\t0\t1\n+542549\t1\t0\n+216555\t1\t1\n+30\t1\t1\n+489109\t1\t0\n+27002\t0\t1\n+222382\t0\t1\n+473092\n+\n+chain 1440101603 chrII 15279421 + 0 15279421 chrII 15279345 + 0 15279345 4\n+523340\t1\t0\n+72281\t0\t8\n+1033090\t1\t0\n+219329\t1\t0\n+1025107\t1\t0\n+69649\t1\t0\n+298884\t0\t1\n+80283\t1\t0\n+210227\t6\t6\n+264469\t1\t0\n+150004\t0\t1\n+195807\t4\t4\n+19\t1\t1\n+130806\t1\t0\n+48603\t0\t1\n+10950\t0\t1\n+40978\t1\t0\n+8097\t1\t0\n+8850\t1\t0\n+8123\t1\t0\n+69125\t11\t11\n+10907\t1\t0\n+5353\t1\t0\n+8256\t1\t0\n+17809\t0\t1\n+114\t1\t0\n+227028\t1\t0\n+45383\t1\t0\n+58370\t0\t1\n+20457\t1\t0\n+58010\t1\t0\n+87001\t1\t0\n+14949\t1\t0\n+74773\t1\t0\n+28654\t0\t1\n+10953\t1\t0\n+15259\t1\t0\n+40597\t1\t0\n+27717\t1\t0\n+6740\t1\t0\n+4824\t1\t1\n+45\t1\t1\n+17113\t1\t0\n+10452\t1\t0\n+10008\t1\t0\n+40\t1\t0\n+325484\t1\t0\n+101544\t1\t0\n+37009\t1\t0\n+81375\t1\t0\n+31951\t0\t1\n+15484\t1\t0\n+2002\t1\t0\n+18194\t1\t0\n+49533\t1\t0\n+10227\t1\t0\n+24688\t1\t0\n+679\t0\t1\n+16673\t0\t1\n+7053\t1\t0\n+1411\t1\t0\n+47812\t1\t0\n+17219\t1\t0\n+61795\t1\t0\n+86135\t1\t0\n+946\t1\t0\n+111201\t1\t0\n+218005\t0\t1\n+11971\t1\t0\n+73658\t1\t0\n+19417\t1\t0\n+72871\t1\t0\n+283\t1\t0\n+2842\t1\t0\n+3514\t0\t1\n+840\t1\t0\n+3474\t1\t0\n+955\t1\t0\n+2287\t1\t0\n+2361\t1\t0\n+1089\t0\t1\n+14383\t1\t0\n+5488\t1\t0\n+4796\t0\t1\n+4398\t16\t15\n+5031\t1\t0\n+113079\t1\t0\n+23\t1\t1\n+66852\t0\t1\n+5830\t0\t1\n+190601\t0\t1\n+198305\t1\t0\n+4222\t1\t0\n+53719\t1\t0\n+756\t0\t1\n+4814\t0\t1\n+30348\t1\t0\n+24557\t1\t0\n+20774\t1\t0\n+27794\t1\t0\n+5248\t1\t0\n+16975\t1\t0\n+88212\t0\t1\n+8139\t1\t0\n+9120\t1\t0\n+5934\t1\t0\n+299\t1\t0\n+3783\t0\t1\n+29302\t1\t0\n+22820\t1\t0\n+2328\t1\t0\n+55710\t0\t1\n+45380\t0\t1\n+3738\t0\t1\n+6318\t1\t0\n+15813\t1\t0\n+53875\t0\t1\n+153420\t1\t0\n+153960\t0\t1\n+23506\t0\t1\n+150079\t1\t0\n+261706\t0\t1\n+119773\t1\t0\n+980\t1\t0\n+5188\t1\t0\n+7342\t1\t0\n+3721\t1\t0\n+38949\t1\t0\n+15594\t1\t0\n+7944\t1\t0\n+2944\t1\t0\n+8840\t1\t0\n+63008\t1\t0\n+20236\t1\t0\n+117305\t0\t1\n+56817\t1\t0\n+60924\t0\t1\n+29445\t0\t1\n+118537\t0\t1\n+119230\t1\t0\n+8658\t1\t0\n+63097\t0\t1\n+2875\t0\t1\n+33991\t1\t0\n+20186\t0\t1\n+28450\t1\t0\n+33646\t0\t1\n+347416\t0\t1\n+7259\t1\t0\n+241675\t1\t0\n+220929\t0\t1\n+3002\t1\t0\n+136217\t0\t1\n+262982\t1\t0\n+147119\t0\t1\n+54695\t0\t1\n+131126\t1\t0\n+32574\t1\t0\n+1218\t1\t0\n+716\t1\t0\n+3556\t1\t0\n+2353\t1\t0\n+348169\t1\t0\n+3214\t1\t0\n+57644\t0\t1\n+81\t0\t1\n+90130\t0\t1\n+2762\t0\t1\n+14905\t1\t0\n+171057\t1\t0\n+7929\t0\t1\n+62035\t1\t0\n+269321\t1\t0\n+42811\t0\t1\n+289174\t1\t0\n+45545\t1\t0\n+61150\t1\t0\n+6930\t1\t0\n+1603\t1\t0\n+11627\t1\t0\n+75337\t0\t1\n+20219\t1\t1\n+38\t1\t0\n+11375\t0\t1\n+4399\t0\t1\n+17225\t1\t0\n+205976\t1\t0\n+8055\t1\t0\n+2824\t0\t1\n+701191\t0\t1\n+99519\t3\t0\n+48815\t1\t0\n+21995\t1\t0\n+138638\t1\t0\n+223081\t1\t0\n+196633\t0\t1\n+274626\t1\t0\n+540443\n+\n+chain 1298422871 chrIII 13783801 + 0 13783801 chrIII 13783700 + 0 13783700 6\n+38559\t1\t0\n+441671\t1\t0\n+63422\t1\t0\n+121743\t1\t0\n+6719\t1\t0\n+86754\t0\t1\n+32684\t1\t0\n+12353\t0\t1\n+1849\t1\t0\n+5970\t1\t0\n+10118\t1\t0\n+25395\t3\t0\n+24340\t1\t0\n+157191\t0\t1\n+124351\t0\t1\n+24720\t0\t1\n+422326\t1\t0\n+36527\t1\t1\n+85\t1\t1\n+324735\t1\t0\n+292394\t1\t0\n+73267\t0\t1\n+214327\t0\t1\n+111579\t0\t1\n+508663\t1\t0\n+882\t6\t6\n+92472\t21\t21\n+16578\t1\t1\n+27\t1\t1\n+68156\t0\t1\n+7040\t1\t0\n+8771\t0\t1\n+22246\t1\t0\n+331\t1\t0\n+12248\t1\t0\n+35539\t0\t1\n+4829\t1\t0\n+5824\t1\t0\n+37908\t1\t0\n+120270\t1\t0\n+5051\t1\t0\n+13804\t1\t0\n+11789\t1\t0\n+15400\t1\t0\n+74896\t0\t1\n+719\t1\t0\n+113657\t1\t0\n+6963\t0\t1\n+30063\t1\t0\n+18197\t1\t0\n+105520\t0\t1\n+4628\t1\t0\n+19202\t1\t0\n+4259\t1\t0\n+104\t1\t0\n+13678\t1\t0\n+15727\t1\t0\n+43284\t1\t0\n+117374\t1\t1\n+21\t1\t1\n+1574\t1\t0\n+129530\t1\t0\n+104666\t1\t0\n+2053\t1\t0\n+573\t1\t0\n+55164\t1\t0\n+102195\t0\t1\n+7782\t1\t0\n+15760\t1\t0\n+11953\t1\t0\n+43896\t0\t1\n+3280\t1\t0\n+1883\t1\t0\n+119248\t1\t0\n+130004\t1\t0\n+9095\t1\t0\n+14169\t0\t1\n+11083\t0\t1\n+10070\t0\t1\n+34120\t1\t0\n+12857\t0\t1\n+2225\t1\t0\n+16098\t1\t0\n+18356\t1\t0\n+39055\t1\t0\n+362323\t0\t1\n+3676\t1\t0\n+40883\t1\t0\n+'..b'820\t1\t0\n+512830\t0\t1\n+500725\t1\t0\n+176265\t1\t0\n+515683\t1\t0\n+67080\t0\t1\n+19249\t1\t0\n+380483\t1\t0\n+46682\t1\t0\n+260457\t1\t0\n+209117\t1\t0\n+552612\t1\t0\n+816610\t1\t0\n+106895\t1\t0\n+12606\t0\t1\n+53468\t1\t0\n+26564\t1\t0\n+275666\t1\t0\n+217083\t0\t1\n+1009889\t1\t0\n+114897\t0\t1\n+500861\t1\t0\n+279757\t1\t0\n+54691\t1\t0\n+209554\t0\t1\n+331356\t1\t1\n+31\t1\t1\n+349157\t1\t0\n+14536\t1\t0\n+82179\t6\t6\n+66027\t0\t1\n+40176\t1\t0\n+32467\t1\t0\n+64194\t1\t0\n+464096\t1\t0\n+1609\t1\t0\n+37905\t1\t0\n+113455\t1\t0\n+243754\t1\t0\n+217119\t0\t1\n+79872\t0\t1\n+15789\t0\t1\n+95\t1\t0\n+423256\t1\t0\n+337914\t1\t0\n+365409\t1\t0\n+469901\t0\t1\n+313459\t1\t0\n+1617\t0\t1\n+4139\t1\t0\n+84\t1\t0\n+75194\t0\t1\n+98987\t0\t1\n+23655\t1\t0\n+32932\t1\t0\n+25720\t0\t1\n+173956\t1\t0\n+1012065\t1\t0\n+484749\t1\t0\n+14808\t1\t0\n+15495\t0\t1\n+14960\t1\t0\n+233789\t1\t0\n+93485\t1\t0\n+21643\t1\t0\n+7923\t1\t0\n+36842\t1\t0\n+1761795\t1\t0\n+465931\t1\t0\n+26392\t1\t0\n+141077\t1\t0\n+126208\t1\t0\n+250326\t0\t1\n+12211\t1\t1\n+18\t1\t1\n+2465\n+\n+chain 1284774 chrM 13794 + 0 13794 chrM 13794 + 0 13794 21\n+13794\n+\n+chain 1970778631 chrV 20924180 + 0 20924180 chrV 20924149 + 0 20924149 1\n+46067\t12\t12\n+120476\t0\t1\n+1109151\t1\t0\n+501367\t1\t0\n+809514\t1\t0\n+689715\t1\t0\n+310727\t10\t0\n+163621\t1\t0\n+61321\t1\t0\n+327298\t0\t1\n+849676\t1\t0\n+173352\t1\t0\n+594036\t1\t0\n+1109131\t1\t0\n+91244\t1\t1\n+31\t2\t2\n+152235\t1\t0\n+96646\t0\t1\n+181165\t1\t0\n+505137\t1\t0\n+533871\t1\t0\n+107988\t1\t0\n+436036\t1\t0\n+38066\t0\t1\n+147968\t1\t0\n+134182\t1\t0\n+408407\t1\t0\n+52531\t0\t1\n+316822\t1\t0\n+96262\t1\t0\n+71747\t0\t1\n+100380\t0\t1\n+119\t0\t1\n+177955\t1\t0\n+34488\t0\t1\n+47765\t0\t1\n+2476\t0\t1\n+97350\t0\t1\n+166629\t0\t1\n+162898\t0\t1\n+756298\t1\t0\n+808254\t0\t1\n+57008\t0\t1\n+220276\t0\t1\n+117644\t0\t1\n+164846\t0\t1\n+128420\t0\t1\n+139963\t1\t0\n+87896\t0\t1\n+33363\t0\t1\n+166892\t1\t0\n+68228\t1\t0\n+183704\t1\t0\n+971868\t1\t0\n+8203\t1\t0\n+501310\t1\t0\n+87822\t1\t0\n+407506\t1\t0\n+524751\t1\t0\n+230024\t1\t0\n+184578\t1\t0\n+30419\t1\t0\n+284334\t1\t0\n+279971\t1\t0\n+506490\t1\t0\n+13210\t1\t0\n+544744\t0\t1\n+23456\t1\t0\n+228549\t1\t0\n+699556\t1\t0\n+308085\t1\t0\n+1130593\n+\n+chain 1668452079 chrX 17718942 + 0 17718942 chrX 17718866 + 0 17718866 2\n+365023\t0\t1\n+59520\t1\t1\n+83\t1\t1\n+64150\t1\t1\n+24\t1\t1\n+93332\t1\t0\n+50100\t1\t0\n+239882\t1\t0\n+56187\t1\t0\n+205216\t1\t1\n+31\t1\t1\n+58960\t1\t0\n+6010\t1\t0\n+2980\t0\t1\n+1468\t1\t0\n+599940\t1\t0\n+362963\t1\t0\n+53132\t1\t0\n+151385\t0\t1\n+65081\t1\t0\n+86753\t0\t1\n+76638\t1\t0\n+92856\t0\t1\n+102821\t1\t0\n+99213\t1\t0\n+5814\t1\t0\n+318947\t1\t0\n+373\t1\t0\n+74234\t1\t0\n+788\t0\t1\n+29243\t1\t0\n+53254\t1\t0\n+35608\t0\t1\n+309218\t1\t0\n+21126\t1\t0\n+8877\t1\t0\n+51664\t1\t0\n+8450\t1\t0\n+667\t1\t0\n+88862\t1\t0\n+73166\t1\t0\n+43899\t1\t0\n+7206\t1\t0\n+61617\t1\t0\n+259376\t1\t0\n+144538\t1\t0\n+14187\t1\t0\n+8035\t1\t0\n+35410\t0\t1\n+1696\t0\t1\n+27102\t1\t0\n+109466\t1\t0\n+15952\t1\t0\n+18537\t1\t0\n+193640\t1\t0\n+280834\t1\t0\n+311195\t1\t0\n+1312\t1\t0\n+1534\t1\t0\n+713059\t1\t0\n+2203\t0\t1\n+2892\t1\t0\n+147098\t1\t0\n+286378\t1\t0\n+26340\t1\t0\n+145722\t1\t0\n+84317\t1\t0\n+32118\t1\t0\n+38351\t1\t0\n+4806\t0\t1\n+6350\t1\t0\n+38251\t1\t0\n+48873\t0\t1\n+148664\t1\t0\n+8155\t1\t0\n+66967\t1\t0\n+101375\t0\t1\n+96243\t1\t0\n+52201\t1\t0\n+18554\t1\t0\n+59690\t1\t0\n+5989\t1\t0\n+5241\t1\t0\n+191703\t0\t1\n+17466\t0\t1\n+7649\t0\t1\n+117783\t0\t1\n+42759\t1\t0\n+7022\t1\t0\n+10\t1\t1\n+19190\t1\t0\n+23579\t0\t1\n+87055\t1\t0\n+43801\t1\t0\n+67035\t1\t0\n+99540\t1\t0\n+176603\t0\t1\n+74325\t1\t0\n+5510\t0\t1\n+149444\t1\t0\n+148509\t1\t0\n+56325\t1\t0\n+93537\t1\t0\n+59304\t1\t0\n+60888\t1\t0\n+65\t1\t0\n+6876\t0\t1\n+7468\t1\t0\n+276908\t0\t1\n+8530\t0\t1\n+170578\t1\t0\n+403043\t1\t0\n+107346\t1\t0\n+88001\t0\t1\n+23258\t1\t0\n+173118\t1\t0\n+30465\t1\t0\n+100620\t1\t0\n+62280\t0\t1\n+4126\t1\t0\n+20159\t1\t0\n+48967\t1\t0\n+205739\t1\t0\n+230925\t0\t1\n+102121\t0\t1\n+159162\t1\t0\n+64642\t1\t0\n+71164\t1\t0\n+90786\t0\t1\n+34364\t1\t0\n+835\t1\t0\n+11432\t0\t1\n+94422\t1\t0\n+620787\t0\t1\n+23610\t1\t0\n+52377\t1\t0\n+107672\t0\t1\n+42438\t1\t0\n+235\t1\t0\n+10781\t1\t0\n+13289\t1\t0\n+34685\t0\t1\n+23366\t0\t1\n+267663\t0\t1\n+126899\t1\t0\n+47857\t0\t1\n+212992\t0\t1\n+15516\t0\t1\n+8186\t1\t0\n+76902\t1\t0\n+3098\t1\t0\n+223975\t0\t1\n+30765\t0\t1\n+197912\t1\t0\n+330005\t1\t0\n+108070\t0\t1\n+254\t1\t0\n+198485\t0\t1\n+22006\t0\t1\n+19263\t1\t0\n+78226\t1\t0\n+39665\t0\t1\n+184579\t1\t0\n+4456\t9\t9\n+270328\t0\t1\n+269016\t0\t1\n+355613\t1\t0\n+87979\t1\t0\n+40892\t13\t13\n+7866\t1\t0\n+57294\t4\t4\n+18253\t1\t0\n+53857\t1\t0\n+4785\t0\t1\n+32952\t0\t1\n+59549\t1\t0\n+40816\t1\t0\n+29424\t1\t0\n+5468\t0\t1\n+36229\t0\t1\n+167876\t1\t1\n+46\t1\t1\n+1224\t1\t0\n+585897\t1\t1\n+20\t1\t1\n+7231\t1\t0\n+323928\t0\t1\n+64761\t1\t0\n+42818\t1\t0\n+105115\t1\t0\n+127679\n+\n'

diff -r 000000000000 -r aa82b2e54055 test-data/header_only.bam

Binary file test-data/header_only.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/header_only.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/header_only.sam Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,2 @@
+@HD VN:1.5
+@RG ID:000 SM:N2 DS:C. elegans wt CN:ABC DT:2017-06-27 LB:XYZ PI:400 PL:ILLUMINA PU:SEQ123

diff -r 000000000000 -r aa82b2e54055 test-data/reads_1_w_header.bam

Binary file test-data/reads_1_w_header.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/reads_1and2_w_header.bam

Binary file test-data/reads_1and2_w_header.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/rebased.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rebased.vcf Sat Nov 11 18:19:22 2017 -0500

b'@@ -0,0 +1,186 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##reference=/home/wolma/galaxy_2017/database/files/000/dataset_35.dat\n+##contig=<ID=chrI,length=15072423>\n+##contig=<ID=chrII,length=15279345>\n+##contig=<ID=chrIII,length=13783700>\n+##contig=<ID=chrIV,length=17493793>\n+##contig=<ID=chrM,length=13794>\n+##contig=<ID=chrV,length=20924149>\n+##contig=<ID=chrX,length=17718866>\n+##ALT=<ID=X,Description="Represents allele(s) other than observed.">\n+##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL.">\n+##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel">\n+##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel">\n+##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth">\n+##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)",Version="3">\n+##INFO=<ID=RPB,Number=1,Type=Float,Description="Mann-Whitney U test of Read Position Bias (bigger is better)">\n+##INFO=<ID=MQB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality Bias (bigger is better)">\n+##INFO=<ID=BQB,Number=1,Type=Float,Description="Mann-Whitney U test of Base Quality Bias (bigger is better)">\n+##INFO=<ID=MQSB,Number=1,Type=Float,Description="Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)">\n+##INFO=<ID=SGB,Number=1,Type=Float,Description="Segregation based metric.">\n+##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)">\n+##INFO=<ID=ICB,Number=1,Type=Float,Description="Inbreeding Coefficient Binomial test (bigger is better)">\n+##INFO=<ID=HOB,Number=1,Type=Float,Description="Bias in the number of HOMs number (smaller is better)">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">\n+##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases">\n+##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n+##FORMAT=<ID=DPR,Number=R,Type=Integer,Description="Number of high-quality bases observed for each allele">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Phred-scaled Genotype Quality">\n+##source=MiModD varcall (version 0.1.8.0)\n+##MiModDCommand=mimodd varcall /home/wolma/galaxy_2017/database/files/000/dataset_35.dat /home/wolma/galaxy_2017/database/files/000/dataset_54.dat --index-files /home/wolma/galaxy_2017/database/files/_metadata_files/000/metadata_14.dat --ofile /home/wolma/galaxy_2017/database/files/000/dataset_58.dat --depth 250 --verbose --quiet\n+##samtoolsCommand=n.a. (wrapped by MiModD)\n+##bcftools_callCommand=n.a. (wrapped by MiModD)\n+##rginfo=<ID=0,Rgid="000",Name="N2">\n+##rginfo=<ID=1,Rgid="266-1",Name="ot266",Description="mutant strain carrying the vab-3 allele ot266">\n+##samtoolsVersion=1.2+htslib-1.2.1\n+##bcftools_callVersion=1.2+htslib-1.2.1\n+##bcftools_concatVersion=1.2+htslib-1.2.1\n+##bcftools_viewVersion=1.2+htslib-1.2.1\n+##bcftools_viewCommand=view /home/wolma/galaxy_2017/database/files/000/dataset_58.dat\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tN2\tot266\n+chrI\t855\t.\tG\tC\t231.0\t.\tDP=27;VDB=2.48853e-13;SGB=13.8822;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,11,13;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:255,72,0:24:0,24:86\n+chrI\t885\t.\tt\ttTCT\t69.0\t.\tINDEL;IDV=4;IMF=0.0769231;DP=56;VDB=2.27029e-09;SGB=1.27626;MQSB=1;MQ0F=0;ICB=0.3;HOB=0.125;AC=1;AN=4;DP4=16,17,3,1;MQ=60\tGT:PL:DP:DPR:GQ\t0/0:0,3,60:1:1,0:11\t0/1:105,0,255:36:32,4:96\n+chrI'..b'6,0:2:0,2:9\n+chrV\t14481\t.\tG\tA\t4.15167\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:32,0,32:2:1,1:32\n+chrV\t14488\t.\tG\tT\t4.15167\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:32,0,32:2:1,1:32\n+chrV\t14528\t.\tG\tA\t3.56356\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:31,0,32:2:1,1:31\n+chrV\t14530\t.\tA\tG\t4.13076\t.\tDP=2;SGB=-0.516033;RPB=1;MQB=1;BQB=1;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=0,1,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:32,0,22:2:1,1:23\n+chrV\t14538\t.\tT\tC\t32.4055\t.\tDP=2;VDB=0.06;SGB=0.0985265;MQ0F=0;AC=2;AN=2;DP4=0,0,0,2;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:57,6,0:2:0,2:9\n+chrX\t1592\t.\tT\tC\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t1622\t.\tG\tT\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t1648\t.\tA\tC\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:37,3,0:1:0,1:4\n+chrX\t1650\t.\tT\tA\t10.6576\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:35,3,0:1:0,1:4\n+chrX\t6015\t.\tA\tT\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:38,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6032\t.\tG\tA\t8.01478\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:32,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6058\t.\tta\tt\t15.3937\t.\tINDEL;IDV=1;IMF=1;DP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:40,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6118\t.\tA\tG\t15.394\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:40,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6143\t.\tG\tA\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60\tGT:PL:DP:DPR:GQ\t1/1:37,3,0:1:0,1:4\t./.:0,0,0:0:0,0:0\n+chrX\t6984\t.\tT\tA\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t7025\t.\tA\tG\t12.5173\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:37,3,0:1:0,1:4\n+chrX\t10527\t.\tc\tcC\t8.86989\t.\tINDEL;IDV=1;IMF=1;DP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:33,3,0:1:0,1:4\n+chrX\t10535\t.\tT\tC\t11.8912\t.\tDP=2;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:36,3,0:1:0,1:6\n+chrX\t10562\t.\tC\tG\t18.2649\t.\tDP=7;VDB=0.18;SGB=0.00473136;RPB=0.8;MQB=1;MQSB=1;BQB=0.9;MQ0F=0;ICB=1;HOB=0.5;AC=1;AN=2;DP4=4,1,2,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t0/1:49,0,131:7:5,2:48\n+chrX\t10595\t.\tT\tG\t148.0\t.\tDP=7;VDB=0.00429645;SGB=2.55596;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,6,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:172,21,0:7:0,7:29\n+chrX\t10629\t.\tgt\tg\t45.8065\t.\tINDEL;IDV=4;IMF=0.666667;DP=6;VDB=0.14;SGB=0.0985265;MQ0F=0;AC=2;AN=2;DP4=0,0,2,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:70,6,0:2:0,2:13\n+chrX\t10630\t.\tT\tC,G\t10.4616\t.\tDP=2;VDB=0.32;SGB=0.0985265;MQSB=1;MQ0F=0;AC=1,1;AN=2;DP4=0,0,1,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0,0,0,0:0:0,0,0:0\t1/2:58,29,26,32,0,29:2:0,1,1:25\n+chrX\t10643\t.\tG\tT\t34.5919\t.\tDP=3;VDB=0.02;SGB=0.0985265;MQSB=1;MQ0F=0;AC=2;AN=2;DP4=0,0,1,1;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:59,6,0:2:0,2:11\n+chrX\t15480\t.\tT\tC\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t15495\t.\tG\tC\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t24890\t.\tC\tG\t13.4669\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:38,3,0:1:0,1:4\n+chrX\t24935\t.\tA\tC\t11.5799\t.\tDP=1;SGB=-0.157211;MQ0F=0;AC=2;AN=2;DP4=0,0,1,0;MQ=60\tGT:PL:DP:DPR:GQ\t./.:0,0,0:0:0,0:0\t1/1:36,3,0:1:0,1:4\n'

diff -r 000000000000 -r aa82b2e54055 test-data/so_coordinate.bam

Binary file test-data/so_coordinate.bam has changed

diff -r 000000000000 -r aa82b2e54055 test-data/so_queryname.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/so_queryname.sam Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,2 @@
+@HD VN:1.5 SO:queryname
+@RG ID:000 SM:N2 DS:C. elegans wt CN:ABC DT:2017-06-27 LB:XYZ PI:400 PL:ILLUMINA PU:SEQ123

diff -r 000000000000 -r aa82b2e54055 test-data/split_pair_reads_1.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_1.fastqsanger Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA
++HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h
+@HWI-EAS91_1_30788AAXX:7:45:408:807/1
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
++HWI-EAS91_1_30788AAXX:7:45:408:807/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA
++HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:64:947:234/1
+TATCAAAAAAGAATATAATCTGAATCAACACTACAA
++HWI-EAS91_1_30788AAXX:7:64:947:234/1
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ

diff -r 000000000000 -r aa82b2e54055 test-data/split_pair_reads_2.fastqsanger
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_2.fastqsanger Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+ACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807/2
+ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807/2
+hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234/2
+CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234/2
+hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd

diff -r 000000000000 -r aa82b2e54055 test-data/vaf_linkage.pdf

Binary file test-data/vaf_linkage.pdf has changed

diff -r 000000000000 -r aa82b2e54055 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#

diff -r 000000000000 -r aa82b2e54055 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Sat Nov 11 18:19:22 2017 -0500

@@ -0,0 +1,7 @@
+<tables>
+    
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>

diff -r 000000000000 -r aa82b2e54055 varextract.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varextract.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,184 @@
+<tool id="mimodd_varextract" name="MiModD Extract Variant Sites"
+version="@MIMODD_WRAPPER_VERSION@">
+    <description>from a BCF file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+ mimodd varextract '$ifile'
+   #if $len($sitesinfo)
+     -p
+     #for $source in $sitesinfo
+          '${source.pre_vcf}'
+     #end for
+   #end if
+   --ofile '$output_vcf'
+   $keep_alts
+   --verbose
+    ]]></command>
+
+    <inputs>
+        <param name="ifile" type="data" format="bcf" label="BCF input file"
+        help="Use the MiModD Variant Calling tool to generate the input for this tool."/>
+        <repeat name="sitesinfo" title="include information from pre-calculated vcf dataset" default="0">
+         <param name="pre_vcf" type="data" format="vcf"
+         label="independently generated vcf datset" />
+        </repeat>
+        <param name="keep_alts" type="boolean" truevalue="-a" falsevalue="" checked="false"
+        label="keep all sites with alternate bases"
+        help="If selected, the VCF output will include ALL sites for which non-reference bases have been observed, i.e., even those not considered allelic sites by the variant caller." />
+    </inputs>
+    <outputs>
+        <data name="output_vcf" format="vcf"
+        label="Variants extracted with MiModd from ${on_string}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="ifile" value="a.bcf" />
+            <output name="output_vcf" ftype="vcf">
+                <assert_contents>
+                    <has_line_matching expression="#CHROM.POS.ID.REF.ALT.QUAL.FILTER.INFO.FORMAT.N2.ot266" />
+                </assert_contents>
+            </output>
+            <assert_command>
+                <not_has_text text="-a" />
+            </assert_command>
+        </test>
+        <test>
+            <param name="ifile" value="a_part2.bcf" />
+            <param name="keep_alts" value="true" />
+            <param name="pre_vcf" value="a.vcf" />
+            <output name="output_vcf" ftype="vcf">
+                <assert_contents>
+                    <has_line_matching expression="#CHROM.POS.ID.REF.ALT.QUAL.FILTER.INFO.FORMAT.ot266.external_source_1_N2.external_source_1_ot266" />
+                </assert_contents>
+            </output>
+            <assert_command>
+                <has_text text="-a" />
+            </assert_command>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool takes as input a BCF dataset like the ones produced by the
+*MiModD Variant Calling* tool, extracts just the variant sites from it and
+reports them in VCF format.
+
+If the BCF input file specifies multiple samples, sites are included if they qualify as variant sites in at least one sample.
+
+----------
+
+**Options:**
+
+**keep all sites with alternate bases**
+
+By default, a variant site is considered to be a position in the genome for
+which a non-reference allele appears in the inferred genotype of any sample.
+
+You can select the *keep all sites with alternate bases* option, if instead
+you want to extract all sites, for which at least one non-reference base has
+been observed (whether resulting in a non-reference allele call or not).
+Using this option should rarely be necessary, but could be occassionally
+helpful for closer inspection of candidate genomic regions.
+
+
+**include information from pre-calculated vcf dataset**
+
+During the process of variant extraction the tool can take into account
+genome positions specified in one or more independently generated VCF datasets.
+If such additional VCF input is provided, the tool output will contain the
+samples found in these files as additional samples and sites from the main BCF
+dataset will be included not only if they qualify as variant sites in at least
+one sample specified in the BCF, but also if they are listed in any of the
+additional VCF datasets.
+
+Optional VCF input can be particularly useful in one of the following
+situations:
+
+1) you have prior information that leads you to think that certain genome
+   positions are of special relevance for your project and, thus, you are
+   interested in the statistics produced by the variant caller for these
+   positions even if they are not considered variant sites. In this case you
+   can use a minimal VCF dataset to guide the variant extraction process to
+   include these positions. This dataset needs a minimal header of the form:
+
+   ``##fileformat=VCFv4.2``
+
+   followed by positional information like in this example::
+
+     #CHROM POS ID REF ALT QUAL FILTER INFO
+     chrI 1222 . . . . . .
+     chrI 2651 . . . . . .
+     chrI 3659 . . . . . .
+     chrI 3731 . . . . . .
+
+   , where columns are tab-separated and . serves as a placeholder for missing
+   information.
+
+2) you have actual variant calls from an additional sample, but you do not
+   have access to the original sequenced reads data (if you had, the
+   recommended approach would be to include that data in the
+   *MiModD Variant Calling* step.
+
+   This situation is often encountered with published datasets. Assume you
+   have obtained a list of known single nucleotide variants (SNVs) found in
+   one particular strain of your favorite model organism and you would like
+   to know which of these SNVs are present in the related strains you have
+   sequenced. You have aligned the sequenced reads from your samples and have
+   used the *MiModD Variant Calling* tool, which has generated a BCF dataset
+   ready for variant extraction. If the SNV list for the previously sequenced
+   strain is in VCF format already, you can now just plug it into the
+   analysis process by specifying it in the tool interface as an
+   *independently generated vcf dataset*.
+   The resulting vcf output will contain all SNV sites along with the variant
+   sites found in the BCF alone. You can then proceed to the
+   *MiModD VCF Filter* tool to look at the original SNV sites only or to
+   investigate any other interesting subset of sites. If the SNV list is in
+   some other format, you will have o convert it to VCF first. At a minimum,
+   the dataset must have a ``##fileformat`` header line like the previous
+   example and have the ``REF`` and ``ALT`` column filled in like so::
+
+     #CHROM POS ID REF ALT QUAL FILTER INFO
+     chrI 1897409 . A G . . .
+     chrI 1897492 . C T . . .
+     chrI 1897616 . C A . . .
+     chrI 1897987 . A T . . .
+     chrI 1898185 . C T . . .
+     chrI 1898715 . G A . . .
+     chrI 1898729 . T C . . .
+     chrI 1900288 . T A . . .
+
+   , in which case the tool will assume that the corresponding sample is
+   homozygous for each of the SNVs.
+   If you need to distinguish between homozygous and heterozygous SNVs you
+   will have to extend the format to include a format and a sample column
+   with genotype (GT) information like in this example::
+
+     #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sampleX
+     chrI 1897409 . A G . . . GT 1/1
+     chrI 1897492 . C T . . . GT 0/1
+     chrI 1897616 . C A . . . GT 0/1
+     chrI 1897987 . A T . . . GT 0/1
+     chrI 1898185 . C T . . . GT 0/1
+     chrI 1898715 . G A . . . GT 0/1
+     chrI 1898729 . T C . . . GT 0/1
+     chrI 1900288 . T A . . . GT 0/1
+
+   , in which sampleX would be heterozygous for all SNVs except the first.
+
+.. class:: warningmark
+
+   If the optional VCF input contains INDEL calls, these will be ignored by the
+   tool.
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 varreport.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/varreport.xml Sat Nov 11 18:19:22 2017 -0500

[

@@ -0,0 +1,140 @@
+<tool id="mimodd_varreport" name="MiModD Report Variants"
+version="@MIMODD_WRAPPER_VERSION@">
+    <description>
+    in a human-friendly format that simplifies data exploration
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command><![CDATA[
+    #if $str($formatting.oformat) == 'html':
+      mkdir '$ofile.files_path' &&
+    #end if
+   mimodd varreport
+     '$inputfile' --oformat ${formatting.oformat}
+   #if $str($formatting.oformat) == 'html':
+     #if $str($formatting.species):
+       --species '${formatting.species}'
+     #end if
+     #if $formatting.link_formatter:
+       --link '${formatting.link_formatter}'
+     #end if
+     --ofile '${ofile.files_path}/variant_report.html'
+     && echo
+     '<html><body><p>MiModD has generated the variant report you requested, but for technical reasons, we have to take you through this little detour to display the report in Galaxy.</p><p>Proceed to <a href="variant_report.html">the variant report</a>.</p></body></html>'
+     > '$ofile'
+   #else
+        --ofile '$ofile'
+   #end if
+    ]]></command>
+
+    <inputs>
+        <param name="inputfile" type="data" format="vcf"
+        label="The VCF input with the variants to be reported" />
+        <conditional name="formatting">
+            <param name="oformat" type="select"
+            label="Format to use for the report">
+                <option value="html">HTML</option>
+                <option value="text">Tab-separated plain text</option>
+            </param>
+            <when value="html">
+                <param name="species" type="text" label="Species"
+                help="If you declare the species your input data comes from, variant reports in html format can have hyperlinks to species-specific databases and genome browsers added. If you have used the MiModD Variant Annotation tool to generate the input dataset, the species information will already be recorded in the dataset, but you can overwrite it if you wish." />
+                <param name="link_formatter" type="data" format="txt" optional="true"
+                label="Optional file with species-specific hyperlink formatting instructions"
+                help="If the tool has no built-in support for your species, i.e., it does not know how to generate hyperlinks for it, you can provide a custom recipe here." />
+            </when>
+            <when value="text">
+                <param name="species" type="hidden" value="" />
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="ofile" format="html">
+            <change_format>
+             <when input="formatting.oformat" value="text" format="tabular"/>
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="inputfile" value="a.vcf" />
+            <conditional name="formatting">
+                <param name="oformat" value="html" />
+                <param name="species" value="C. elegans" />
+            </conditional>
+            <output name="ofile" ftype="html">
+                <extra_files type="file" name="variant_report.html" ftype="html">
+                    <assert_contents>
+                        <has_line_matching expression=".+tools/genome/gbrowse/c_elegans_PRJNA13758.+" />
+                    </assert_contents>
+                </extra_files>
+            </output>
+        </test>
+        <test>
+            <param name="inputfile" value="a.vcf" />
+            <conditional name="formatting">
+                <param name="oformat" value="text" />
+            </conditional>
+            <output name="ofile" ftype="tabular">
+                <assert_contents>
+                    <has_n_columns n="11" />
+                    <has_line_matching expression="Chromosome	Position	Affected Gene	Transcript	Effects	genotype N2	.+genotype ot266	.+" />
+                    <has_line_matching expression="chrI	.+" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+   **What it does**
+
+The tool turns a variant list in VCF format into a more readable summary table
+listing variant sites and effects.
+
+
+**Html output enriched with species-specific hyperlinks**
+
+If you select html as the output format, the tool can insert species- and
+variant-specific hyperlinks to public genome browsers and databases into the
+report. This is a useful feature to explore medium-size lists of variants, but
+requires that the tool knows:
+
+1) the species that you are analyzing data for
+
+   The tool can autodetect the species if the input dataset has been generated
+   with the *MiModD Variant Annotation* tool. Alternatively, you can declare
+   the species you are working with explicitly.
+
+2) how to generate hyperlinks for this species
+
+   The tool has built-in support for a number of standard model organisms.
+   If your organism is not in that list or if you find that the default
+   hyperlinks for a supported species are outdated, you can provide your own
+   recipe to generate correct hyperlinks through a `custom hyperlink template
+   file`_.
+
+   **TIP:**
+   MiModD's built-in hyperlink formatting tables are actively maintained and
+   extended with every new version! If you find the tool produces outdated
+   hyperlinks for any supported species or if you would like to see additional
+   species supported, do not hesitate to `tell us about it`_.
+   If you have a custom hyperlink template file that is working for you, that
+   is even better. We may use it as a starting point for a built-in recipe and,
+   while we are working on that and with your permission, we can post it on the
+   package home page for other users who may need it.
+
+.. _custom hyperlink template file: http://mimodd.readthedocs.io/en/@MIMODD_REAL_VERSION@/recipes.html#hyperlink-template-file
+.. _tell us about it: mailto:mimodd@googlegroups.com
+
+@HELP_FOOTER@
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r aa82b2e54055 vcf_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf_filter.xml Sat Nov 11 18:19:22 2017 -0500

[

b'@@ -0,0 +1,191 @@\n+<tool id="mimodd_vcf_filter" name="MiModD VCF Filter" version="@MIMODD_WRAPPER_VERSION@">\n+ <description>\n+ extracts lines from a vcf variant file based on field-specific filters\n+ </description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="stdio" />\n+ <expand macro="version_command" />\n+ <command><![CDATA[\n+\tmimodd vcf-filter\n+\t \'$inputfile\' -o \'$outputfile\'\n+\t #if len($datasets):\n+\t -s\n+\t #for $i in $datasets\n+\t\t \'$i.sample\'\n+\t #end for\n+\t --gt\n+\t #for $i in $datasets\n+\t ## remove whitespace from free-text input\n+\t \'#echo ("".join($i.GT.split()) or "ANY")#\'\n+\t #echo " "\n+\t #end for\n+\t --dp\n+\t #for $i in $datasets\n+\t $i.DP\n+\t #end for\n+\t --gq\n+\t #for $i in $datasets\n+\t $i.GQ\n+\t #end for\n+\t --af\n+\t #for $i in $datasets\n+\t \'#echo ($i.AF or "::")#\'\n+\t #end for\n+\t #end if\n+\t #if len($regions):\n+\t -r\n+\t #for $i in $regions\n+\t #if $i.stop:\n+\t \'$i.chrom:$i.start-$i.stop\'\n+\t #else:\n+\t \'$i.chrom:$i.start\'\n+\t #end if\n+\t #end for\n+\t #end if\n+\t #if $vfilter:\n+\t --vfilter\n+\t ## remove \',\' and replace with \' \'\n+\t \'#echo (\'" "\'.join($vfilter.split(\',\')))#\'\n+\t #end if\n+\t $vartype\n+ ]]></command>\n+ \n+ <inputs>\n+ <param name="inputfile" type="data" format="vcf" label="VCF input file" />\n+ <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">\n+ <param name="sample" type="text" label="sample"\n+ help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />\n+\t <param name="GT" type="text" \n+\t label="genotype pattern(s) for the inclusion of variants"\n+\t help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />\n+\t <param name="DP" type="integer" value="0" \n+\t label="depth of coverage for the sample at the variant site"\n+\t help="keep only variants with at least this sample-specific coverage at the variant site" />\n+ \t<param name="GQ" type="integer" value="0"\n+ \tlabel="genotype quality for the variant in the sample"\n+ \thelp="keep only variants for which the genotype prediction for the sample has at least this quality" />\n+\t <param name="AF" type="text"\n+\t label="allelic fraction filter"\n+\t help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead" />\n+ </repeat>\n+ <repeat name="regions" title="Region Filter" default="0" min="0"\n+ help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">\n+ <param name="chrom" type="text" label="Chromosome" />\n+ <param name="start" type="text" label="Region Start" />\n+ <param name="stop" type="text" label="Region End" />\n+ </repeat>\n+ <param name="vartype" type="select" \n+ label="Select the types of variants to include in the output">\n+ <option value="">all types of variants</option>\n+ <option value="--no-indels">exclude indels</option>\n+ <option value="--indels-only">only indels</option>\n+ </param>\n+ <param name="vfilter" type="text" label="sample"\n+ help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />\n+ '..b'text="GT:PL:DP:DPR:GQ	0/1" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <test>\n+ <param name="inputfile" value="a.vcf" />\n+ <repeat name="regions">\n+ <param name="chrom" value="chrX" />\n+ </repeat>\n+ <output name="outputfile" ftype="vcf">\n+ <assert_contents>\n+ <has_text text="chrX	" />\n+ <not_has_text text="chrI	" />\n+ <not_has_text text="chrII	" />\n+ <not_has_text text="chrIII	" />\n+ <not_has_text text="chrIV	" />\n+ <not_has_text text="chrV	" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <test>\n+ <param name="inputfile" value="a.vcf" />\n+ <param name="vartype" value="--no-indels" />\n+ <param name="vfilter" value="ot266" />\n+ <output name="outputfile" ftype="vcf">\n+ <assert_contents>\n+ <not_has_text text="INDEL;" />\n+ <has_line line="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ot266" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+ \n+ <help><![CDATA[\n+.. class:: infomark\n+\n+ **What it does**\n+\n+The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.\n+\n+The following types of variant filters can be set up:\n+\n+1) Sample-specific filters:\n+ \n+ Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.\n+ \n+2) Region filters:\n+ \n+ Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.\n+ \n+3) Variant type filter:\n+\n+ Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels\n+ \n+In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.\n+The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.\n+\n+**Examples of sample-specific filters:**\n+\n+*Simple genotype pattern*\n+\n+genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample\'s genotype is homozygous mutant\n+\n+*Complex genotype pattern*\n+\n+genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample\'s genotype is either heterozygous or homozygous wildtype\n+\n+*Multiple sample-specific filters*\n+\n+Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:\n+==> keep all variants for which the first sample\'s gentoype is homozygous wildtype **and** the second sample\'s genotype is homozygous mutant\n+\n+*Combining sample-specific filter criteria*\n+\n+genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9\n+==> keep variants for which the sample\'s genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9\n+**and** at least three reads from the sample cover the variant site\n+\n+**TIP:**\n+\n+As in the example above, genotype quality is typically most useful in combination with a genotype pattern.\n+It acts then, effectively, to make the genotype filter more stringent.\n+\n+@HELP_FOOTER@\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'