Mercurial > repos > iuc > basil
view basil.xml @ 1:77fc7640abc7 draft default tip
planemo upload commit 49a2861a9b3480ea25f1e5526d2edf9dc8cb5334
author | iuc |
---|---|
date | Sun, 11 Aug 2024 21:06:14 +0000 |
parents | e6ef29001647 |
children |
line wrap: on
line source
<tool id="basil" name="basil" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0"> <description>Breakpoint detection, including large insertions</description> <macros> <token name="@TOOL_VERSION@">1.2.0</token> <token name="@VERSION_SUFFIX@">1</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">anise_basil</requirement> </requirements> <version_command>basil --version 2>&1 | grep 'basil version' | cut -f 3 -d ' '</version_command> <command detect_errors="aggressive"><![CDATA[ #if $reference_source.reference_source_selector == 'history': ln -f -s '$reference_source.ref' ref.fa && #else: ln -f -s '$reference_source.ref.fields.path' ref.fa && #end if ln -s '$bam' 'in.bam' && ln -s '$vcf' 'out.vcf' && basil --input-reference 'ref.fa' --input-mapping 'in.bam' --out-vcf 'out.vcf' --oea-min-support-each-side '$min_oea_each_side' ]]></command> <inputs> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference."> <option value="cached">Use a built-in genome index</option> <option value="history">Use a genome from history and build index</option> </param> <when value="cached"> <param name="ref" type="select" label="Using reference genome" help="Select genome from the list"> <options from_data_table="all_fasta"> <filter type="sort_by" column="2"/> <validator type="no_options" message="No reference genomes are available"/> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/> </when> </conditional> <param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/> <param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion. In range [1..inf]. This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered."/> </inputs> <outputs> <data name="vcf" format="vcf"/> </outputs> <tests> <test> <conditional name="reference_source"> <param name="reference_source_selector" value="history" /> <param name="ref" ftype="fasta" value="ref.fa"/> </conditional> <param name="ref" value="ref.fa"/> <param name="bam" value="simulated.bam"/> <param name="min_oea_each_side" value="2"/> <output name="vcf" file="basil.vcf"/> </test> <test> <conditional name="reference_source"> <param name="reference_source_selector" value="cached" /> <param name="ref" value="genome"/> </conditional> <param name="bam" value="simulated.bam"/> <param name="min_oea_each_side" value="2"/> <output name="vcf" file="basil.vcf"/> </test> </tests> <help><![CDATA[ BASIL is a method to detect breakpoints for structural variants (including insertion breakpoints) from aligned paired HTS reads in BAM format. Use BASIL to analyze BAM files for tentative insertion sites. Note that BASIL will in general detect all kinds of breakpoints, e.g. for inversions on real-world data. BASIL VCF fields A typical line in BASIL might look as follows. 1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV). The eighth column contains the names of the score values given in the ninth column: GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores. CLEFT Number of clipping signatures supporting the site from the left side. CRIGHT Number of clipping signatures supporting the site from the right side. OEALEFT Number of OEA alignments supporting the site from the left. OEARIGHT Number of OEA alignmetns supproting the site from the right. Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data. For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning. ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btv051</citation> </citations> </tool>