Mercurial > repos > iuc > delly_call

--- a/call.xml	Thu Oct 29 20:50:39 2020 +0000
+++ b/call.xml	Fri Jan 22 14:31:44 2021 +0000
@@ -35,89 +35,84 @@
 #if 'dump' in $oo.out
     --dump 'dump.tsv.gz'
 #end if
-## samples
-#for $i, $current in enumerate($samples)
-    'sample_${i}.bam'
+## input
+#for $i, $current in enumerate($input)
+    'input_${i}.bam'
 #end for

 ## postprocessing
 @LOG@
+@DUMP@
 @VCF@
-@DUMP@
     ]]></command>
     <inputs>
-        <expand macro="samples"/>
+        <expand macro="input" format="bam" multiple="true" label="Select input file(s)"/>
         <section name="generic" title="Generic options" expanded="true">
             <expand macro="svtype"/>
             <expand macro="genome"/>
             <expand macro="exclude"/>
         </section>
         <section name="discovery" title="Discovery options" expanded="true">
-            <param name="mapqual" type="integer" value="1" label="Set minimum paired-end (PE) mapping quality" help="(--map-qual)"/>
-            <param name="qualtra" type="integer" value="20" label="Set minimum PE quality for translocation" help="(--qual-tra)"/>
+            <param name="mapqual" type="integer" value="1" label="Set minimum paired-end mapping quality" help="(--map-qual)"/>
+            <param name="qualtra" type="integer" value="20" label="Set minimum paired-end quality for translocation" help="(--qual-tra)"/>
             <param name="madcutoff" type="integer" value="9" label="Set insert size cutoff" help="median+s*MAD, deletions only (--mad-cutoff)"/>
             <expand macro="minclip"/>
             <expand macro="mincliquesize"/>
-            <expand macro="minrefsep"/>
-            <expand macro="maxreadsep"/>
+            <expand macro="minrefsep" default="25"/>
+            <expand macro="maxreadsep" default="40"/>
         </section>
         <section name="genotyping" title="Genotyping options" expanded="true">
             <expand macro="vcffile"/>
             <expand macro="genoqual"/>
         </section>
-        <section name="oo" title="Output options">
+        <section name="oo" title="Output options" expanded="true">
             <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">
                 <option value="bcf" selected="true">BCF</option>
-                <option value="vcf">VCF</option>
+                <option value="log">Log</option>
                 <option value="dump">SV-reads (--dump)</option>
-                <option value="log">Log</option>
+                <option value="vcf">VCF</option>
             </param>
         </section>
     </inputs>
     <outputs>
-        <expand macro="vcf"/>
         <expand macro="bcf"/>
         <expand macro="dump"/>
         <expand macro="log"/>
+        <expand macro="vcf"/>
     </outputs>
     <tests>
         <!-- no test implemented for parameter vcffile -->

         <!-- #1 default, single -->
         <test expect_num_outputs="2">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
             </section>
             <section name="oo">
                 <param name="out" value="vcf,bcf"/>
             </section>
+            <output name="out_bcf">
+                <assert_contents>
+                    <has_size value="1184" delta="10"/>
+                </assert_contents>
+            </output>
             <output name="out_vcf">
                 <assert_contents>
                     <has_size value="3661" delta="10"/>
                     <has_line line="#CHROM&#009;POS&#009;ID&#009;REF&#009;ALT&#009;QUAL&#009;FILTER&#009;INFO&#009;FORMAT&#009;normal"/>
                 </assert_contents>
             </output>
-            <output name="out_bcf">
-                <assert_contents>
-                    <has_size value="1184" delta="10"/>
-                </assert_contents>
-            </output>
         </test>
         <!-- #2 default, multi; test data to small, results are empty -->
         <test expect_num_outputs="3">
-            <param name="samples" value="normal.bam,tumor.bam"/>
+            <param name="input" value="normal.bam,tumor.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
             </section>
             <section name="oo">
                 <param name="out" value="vcf,bcf,log"/>
             </section>
-            <output name="out_vcf">
-                <assert_contents>
-                    <has_size value="0"/>
-                </assert_contents>
-            </output>
             <output name="out_bcf">
                 <assert_contents>
                     <has_size value="0"/>
@@ -128,10 +123,15 @@
                     <has_text_matching expression="Sample has not enough data to estimate library parameters.+"/>
                 </assert_contents>
             </output>
+            <output name="out_vcf">
+                <assert_contents>
+                    <has_size value="0"/>
+                </assert_contents>
+            </output>
         </test>
-       <!-- #3; results are empty due to exclude file -->
+        <!-- #3; results are empty due to exclude file -->
         <test expect_num_outputs="4">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="exclude" value="exclude.tsv"/>
@@ -139,11 +139,6 @@
             <section name="oo">
                 <param name="out" value="vcf,bcf,dump,log"/>
             </section>
-            <output name="out_vcf">
-                <assert_contents>
-                    <has_size value="0"/>
-                </assert_contents>
-            </output>
             <output name="out_bcf">
                 <assert_contents>
                     <has_size value="0"/>
@@ -159,10 +154,15 @@
                     <has_text_matching expression="Sample has not enough data to estimate library parameters.+"/>
                 </assert_contents>
             </output>
+            <output name="out_vcf">
+                <assert_contents>
+                    <has_size value="0"/>
+                </assert_contents>
+            </output>
         </test>
         <!-- #4 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="svtype" value="ALL"/>
                 <param name="genome" value="genome.fasta"/>
@@ -191,7 +191,7 @@
         </test>
         <!-- #5 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="svtype" value="DEL"/>
                 <param name="genome" value="genome.fasta"/>
@@ -207,7 +207,7 @@
         </test>
         <!-- #6 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="svtype" value="INS"/>
                 <param name="genome" value="genome.fasta"/>
@@ -223,7 +223,7 @@
         </test>
         <!-- #7 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="svtype" value="INV"/>
                 <param name="genome" value="genome.fasta"/>
@@ -239,7 +239,7 @@
         </test>
         <!-- #8 -->
         <test expect_num_outputs="2">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="svtype" value="BND"/>
                 <param name="genome" value="genome.fasta"/>
@@ -269,11 +269,11 @@

 **Input**

-Delly *call* needs a sorted, indexed and duplicate marked BAM file for every input sample. An indexed reference genome is required to identify split-reads. Additionally a VCF/BCF file for genotyping can be applied.
+Delly *call* requires a sorted, indexed and duplicate marked BAM file for every input sample. An indexed reference genome is required to identify split-reads. Additionally a BCF/VCF file for genotyping can be applied.

 **Output**

-The output is available in BCF and VCF format. Additionally an output file for SV-reads is provided.
+The output is available in BCF and VCF format. Additionally an output file for SV-reads and a log file are provided.

 .. class:: infomark
--- a/macros.xml	Thu Oct 29 20:50:39 2020 +0000
+++ b/macros.xml	Fri Jan 22 14:31:44 2021 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">0.8.5</token>
+    <token name="@TOOL_VERSION@">0.8.7</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
@@ -17,14 +17,12 @@
         </citations>
     </xml>

-    <!--
-        command
-    -->
+    <!-- command -->

     <token name="@BAM@"><![CDATA[
-#for $i, $current in enumerate($samples)
-    ln -s '${current}' 'sample_${i}.bam' &&
-    ln -s '${current.metadata.bam_index}' 'sample_${i}.bam.bai' &&
+#for $i, $current in enumerate($input)
+    ln -s '${current}' 'input_${i}.bam' &&
+    ln -s '${current.metadata.bam_index}' 'input_${i}.bam.bai' &&
 #end for
     ]]></token>
     <token name="@DUMP@"><![CDATA[
@@ -43,68 +41,79 @@
 #end if
     ]]></token>

-    <!--
-        input
-    -->
+    <!-- input -->

+    <xml name="cnoffset" token_default="">
+        <param name="cnoffset" type="float" min="0.0" max="1.0" value="@DEFAULT@" label="Set minimum CN offset" help="(--cn-offset)"/>
+    </xml>
+    <xml name="coverage" token_label="">
+        <param argument="--coverage" type="integer" value="10" label="@LABEL@"/>
+    </xml>
     <xml name="exclude">
         <param argument="--exclude" type="data" format="tabular" optional="true" label="Select file with regions to exclude"/>
     </xml>
     <xml name="genome">
-        <param argument="--genome" type="data" format="fasta" label="Select genome"/>
+        <param argument="--genome" type="data" format="fasta" label="Select genome file"/>
     </xml>
     <xml name="genoqual">
         <param name="genoqual" type="integer" value="5" label="Set minimum mapping quality for genotyping" help="(--geno-qual)"/>
     </xml>
+    <xml name="input" token_format="" token_multiple="false" token_label="">
+        <param name="input" type="data" format="@FORMAT@" multiple="@MULTIPLE@" label="@LABEL@"/>
+    </xml>
+    <xml name="maxreadsep" token_default="">
+        <param argument="--maxreadsep" type="integer" value="@DEFAULT@" label="Set maximum read separation"/>
+    </xml>
+    <xml name="maxsize" token_default="" token_label="">
+        <param argument="--maxsize" type="integer" value="@DEFAULT@" label="@LABEL@"/>
+    </xml>
     <xml name="minclip">
         <param argument="--minclip" type="integer" value="25" label="Set minimum clipping length"/>
     </xml>
-    <xml name="maxreadsep" token_default="40">
-        <param argument="--maxreadsep" type="integer" value="@DEFAULT@" label="Set maximum read separation"/>
+    <xml name="mincliquesize">
+        <param name="mincliquesize" type="integer" value="2" label="Set minimum paired-end/single-read clique size" help="(--min-clique-size)"/>
     </xml>
-    <xml name="maxsize" token_default="1000000">
-        <param argument="--maxsize" type="integer" value="@DEFAULT@" label="Set maximum SV size"/>
-    </xml>
-    <xml name="mincliquesize">
-        <param name="mincliquesize" type="integer" value="2" label="Set minimum min. PE/SR clique size" help="(--min-clique-size)"/>
-    </xml>
-    <xml name="minrefsep" token_default="25">
+    <xml name="minrefsep" token_default="">
         <param argument="--minrefsep" type="integer" value="@DEFAULT@" label="Set minimum reference separation"/>
     </xml>
-    <xml name="minsize">
-        <param argument="--minsize" type="integer" value="0" label="Set minimum SV size"/>
+    <xml name="minsize" token_default="" token_label="">
+        <param argument="--minsize" type="integer" value="@DEFAULT@" label="@LABEL@"/>
+    </xml>
+    <xml name="pass">
+        <param argument="--pass" type="boolean" truevalue="--pass" falsevalue="" label="Filter sites for PASS?"/>
     </xml>
-    <xml name="samples" token_format="bam" token_multiple="true" token_label="Select sample file(s)">
-        <param name="samples" type="data" format="@FORMAT@" multiple="@MULTIPLE@" label="@LABEL@"/>
+    <xml name="ploidy">
+        <param argument="--ploidy" type="integer" value="2" label="Set baseline ploidy"/>
+    </xml>
+    <xml name="samples">
+        <param argument="--samples" type="data" format="tabular" label="Select sample file" help="Two-column sample file listing sample name and tumor or control."/>
     </xml>
     <xml name="svtype">
         <param argument="--svtype" type="select" label="Select type(s) of structural variants to detect">
             <option value="ALL" selected="true">All types (ALL)</option>
             <option value="DEL">Deletion (DEL)</option>
+            <option value="DUP">Duplication (DUP)</option>
             <option value="INS">Insertion (INS)</option>
-            <option value="DUP">Duplication (DUP)</option>
             <option value="INV">Inversion (INV)</option>
             <option value="BND">Translocation (BND)</option>
         </param>
     </xml>
     <xml name="vcffile">
-        <param argument="--vcffile" type="data" format="vcf,bcf" optional="true" label="Select genotyping file"/>
+        <param argument="--vcffile" type="data" format="bcf,vcf" optional="true" label="Select genotyping file"/>
     </xml>

-    <!--
-        output
-    -->
+    <!-- output -->

+    <xml name="bcf">
+        <data name="out_bcf" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: Result (BCF)">
+            <filter>'bcf' in oo['out']</filter>
+        </data>
+    </xml>
     <xml name="vcf">
         <data name="out_vcf" format="vcf" from_work_dir="result.vcf" label="${tool.name} on ${on_string}: Result (VCF)">
             <filter>'vcf' in oo['out']</filter>
         </data>
     </xml>
-     <xml name="bcf">
-        <data name="out_bcf" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: Result (BCF)">
-            <filter>'bcf' in oo['out']</filter>
-        </data>
-    </xml>
     <xml name="dump">
         <data name="out_dump" format="tabular" from_work_dir="dump.tsv" label="${tool.name} on ${on_string}: SV-reads">
             <filter>'dump' in oo['out']</filter>
@@ -116,12 +125,25 @@
         </data>
     </xml>

-    <!--
-        Help
-    -->
+    <!-- help -->

     <token name="@WID@"><![CDATA[
 Delly is an integrated structural variant (SV) prediction method that can discover, genotype and visualize deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends, split-reads and read-depth to sensitively and accurately delineate genomic rearrangements throughout the genome.
+
+Short-read SV calling
+
+- *call* to discover and genotype structural variants
+- *merge* structural variants across VCF/BCF files and within a single VCF/BCF file
+- *filter* somatic or germline structural variants
+
+Long-read SV calling
+
+- *lr* for long-read SV discovery
+
+Copy-number variant calling
+
+- *cnv* to discover and genotype copy-number variants
+- *classify* somatic or germline copy-number variants
     ]]></token>
     <token name="@REFERENCES@"><![CDATA[
 More information are available on `GitHub <https://github.com/dellytools/delly>`_.