diff basil.xml @ 0:e6ef29001647 draft default tip

planemo upload commit b89c8017aeef91f940543a1cc7dadb4a85290865
author iuc
date Thu, 30 May 2019 21:14:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/basil.xml	Thu May 30 21:14:58 2019 -0400
@@ -0,0 +1,61 @@
+<tool id="basil" name="basil" version="1.2.0">
+    <description>Breakpoint detection, including large insertions</description>
+    <requirements>
+        <requirement type="package" version="1.2.0">anise_basil</requirement>
+    </requirements>
+    <version_command>basil --version 2>&amp;1 | grep 'basil version' | cut -f 3 -d ' '</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+        ln -s '$ref' 'ref.fa' &&
+        ln -s '$bam' 'in.bam' &&
+        ln -s '$vcf' 'out.vcf' &&
+        basil
+        --input-reference 'ref.fa'
+        --input-mapping 'in.bam'
+        --out-vcf 'out.vcf'
+        --oea-min-support-each-side '$min_oea_each_side'
+    ]]></command>
+    <inputs>
+        <param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/>
+        <param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/>
+        <param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion.  In range [1..inf].  This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered." />
+    </inputs>
+    <outputs>
+        <data name="vcf" format="vcf" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="ref" value="ref.fa"/>
+            <param name="bam" value="simulated.bam"/>
+            <param name="min_oea_each_side"  value="2"/>
+            <output name="vcf" file="basil.vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        BASIL is a method to detect breakpoints for structural variants (including insertion breakpoints) from aligned paired HTS reads in BAM format.  Use BASIL to analyze BAM files for tentative insertion sites.
+
+        Note that BASIL will in general detect all kinds of breakpoints, e.g. for inversions on real-world data.
+
+        BASIL VCF fields
+
+        A typical line in BASIL might look as follows.
+
+        1 5001 site_0 T <INS>   . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT    46.4256:10:12:35:32
+
+        The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV).
+
+        The eighth column contains the names of the score values given in the ninth column:
+
+        GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores.
+        CLEFT Number of clipping signatures supporting the site from the left side.
+        CRIGHT Number of clipping signatures supporting the site from the right side.
+        OEALEFT Number of OEA alignments supporting the site from the left.
+        OEARIGHT Number of OEA alignmetns supproting the site from the right.
+
+        Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data.
+
+        For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv051</citation>
+    </citations>
+</tool>