Mercurial > repos > yhoogstrate > fuma

<?xml version="1.0" encoding="UTF-8"?>
<tool id="fuma" name="FuMa" version="2.10.0.a">
    <description>match detected fusion genes based on gene names (in particular for RNA-Seq).</description>

    <requirements>
        <requirement type="package" version="2.7">python</requirement>
        <requirement type="package" version="2.10.0">fuma</requirement>
    </requirements>

    <version_command>fuma --version 2>&amp;1 | head -n 1</version_command><!-- -V also works, but is not GNU standard -->

    <command><![CDATA[
        #import pipes

        #set $gene_annotations = []
        #set $samples = []
        #set $links = []

        #for $i, $d in enumerate( $datasets )
            #set $sample_name = pipes.quote(str($d['sample'].name))

            #set $gene_annotations = $gene_annotations + [ "ga_" + str($i) + ":" + str($d['gene_annotation'].file_name) ]

            #set $samples = $samples + [ $sample_name + ":" + str($d['format']) + ":" + str($d['sample'].file_name) ]
            #set $links = $links + [ $sample_name + ":" + str("ga_") + str($i) ]
        #end for

        #set $gene_annotations_str = " ".join(gene_annotations)
        #set $samples_str = " ".join(samples)
        #set $links_str = " ".join(links)

        fuma
          -m
            $params.matching_method

          $params.strand_specific_matching
          $params.acceptor_donor_order_specific_matchig

          -a
            $gene_annotations_str
          -s
            $samples_str
          -l
            $links_str
        #if $params.output_format.value == "list_boolean"
          -f list
        #else
          -f $params.output_format.value
        #end if
          -o $fuma_overview ;


        #if $params.output_format.value == "list_boolean"
            fuma-list-to-boolean-list -o tmp.txt $fuma_overview ;
            mv tmp.txt $fuma_overview
        #end if
    ]]></command>

    <inputs>
        <repeat name="datasets" title="FusionGene Datasets" min="2">
            <param name="sample" type="data" format="txt,tabular" label="Dataset (RNA-Seq fusion gene detection experiment)" />
            <param name="format" type="select" label="Format of dataset">
                <option value="chimera">Chimera prettyPrint()</option>
                <option value="chimerascan">ChimeraScan</option>
                <option value="defuse">DeFuse</option>
                <option value="complete-genomics">Complete Genomics var/mastervar</option>
                <option value="fusion-catcher_final">Fusion Catcher (final-list file)</option>
                <option value="fusionmap">FusionMap</option>
                <option value="trinity-gmap">GMAP (As step after Trinity)</option>
                <option value="oncofuse">OncoFuse</option>
                <option value="rna-star_chimeric">STAR (chimeric file)</option>
                <option value="star-fusion_final">STAR-Fusion (candidates.final)</option>
                <option value="tophat-fusion_pre">Tophat Fusion Pre (fusions.out)</option>
                <option value="tophat-fusion_post_potential_fusion">Tophat Fusion Post (potential_fusion.txt)</option>
                <option value="tophat-fusion_post_result">Tophat Fusion Post (result.txt)</option>
                <option value="tophat-fusion_post_result_html">Tophat Fusion Post (result.html)</option>
            </param>
            <param name="gene_annotation" type="data" format="bed" label="Corresponding gene-name annotation file (BED format)" help="Make use of persistent gene annotations! Gene annotations should only be different if different reference genome builds were used." />
        </repeat>

        <conditional name="params">
            <param name="settingsType" type="select" label="Settings to use" help="You can use the default settings or set custom values for any FuMa parameter.">
                <option value="preSet" selected="true">Use Defaults</option>
                <option value="full">Full parameter list</option>
            </param>
            <when value="preSet">
                <param name="strand_specific_matching" type="hidden" value="--strand-specific-matching" />
                <param name="acceptor_donor_order_specific_matchig" type="hidden" value="--acceptor-donor-order-specific-matching" />
            </when>
            <when value="full">
                <param name="matching_method" type="select" label="Matching method: technique used to match fusion genes based on annotated gene sets" help="Overlap is the most sensitive but also more sensitive for long gene artefacts; subset is the recommended technique and EGM is conservative.">
                    <option value="overlap">Overlap</option>
                    <option value="subset" selected="True">Subset</option>
                    <option value="egm">Exact Geneset Matching (EGM)</option>
                </param>

                <param name="strand_specific_matching" type="boolean" checked="True" truevalue="--strand-specific-matching" falsevalue="" label="Consider fusion genes distinct when the breakpoints have different strands" help="Only a limited number of file formats support this feature." />
                <param name="acceptor_donor_order_specific_matchig" type="boolean" checked="True" truevalue="--acceptor-donor-order-specific-matching" falsevalue="" label="Consider fusion genes distinct when the donor and acceptor sites are swapped (A,B) != (B,A)" help="This settings is not recommended when fusion genes detected in DNA-Seq are used" />

                <param name="output_format" type="select" label="Output format">
                    <option value="list_boolean" selected="true">List (Boolean)</option>
                    <option value="list">List</option>
                    <option value="summary">Count summary</option>
                </param>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="tabular" name="fuma_overview" label="${tool.name} on ${', '.join([ str(d['sample'].hid)+': '+d['sample'].name for d in $datasets ])}" />
    </outputs>

    <tests>
        <test>
            <!-- <repeat name="datasets"> -->
                <param name="datasets_0|sample" value="edgren_chimerascan.txt" ftype="tabular" />
                <param name="datasets_0|format" value="chimerascan" />
                <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_1|sample" value="edgren_defuse.txt" ftype="tabular" />
                <param name="datasets_1|format" value="defuse" />
                <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_2|sample" value="edgren_fusion-map.txt" ftype="tabular" />
                <param name="datasets_2|format" value="fusionmap" />
                <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_3|sample" value="edgren_true_positives.txt" ftype="tabular" />
                <param name="datasets_3|format" value="fusionmap" />
                <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->

            <param name="settingsType" value="full" />

            <param name="matching_method" value="subset" />
            <param name="strand_specific_matching" value="--strand-specific-matching" />
            <param name="acceptor_donor_order_specific_matchig" value="--acceptor-donor-order-specific-matching" />
            <param name="output_format" value="list_boolean" />

            <output name="fuma_overview" file="edgren_test_01_specifc_matching_output.txt" />
        </test>
        <test>
            <!-- <repeat name="datasets"> -->
                <param name="datasets_0|sample" value="edgren_chimerascan.txt" ftype="tabular" />
                <param name="datasets_0|format" value="chimerascan" />
                <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_1|sample" value="edgren_defuse.txt" ftype="tabular" />
                <param name="datasets_1|format" value="defuse" />
                <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_2|sample" value="edgren_fusion-map.txt" ftype="tabular" />
                <param name="datasets_2|format" value="fusionmap" />
                <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->
            <!-- <repeat name="datasets"> -->
                <param name="datasets_3|sample" value="edgren_true_positives.txt" ftype="tabular" />
                <param name="datasets_3|format" value="fusionmap" />
                <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
            <!-- </repeat> -->

            <param name="settingsType" value="full" />

            <param name="matching_method" value="subset" />
            <param name="strand_specific_matching" value="" />
            <param name="acceptor_donor_order_specific_matchig" value="" />
            <param name="output_format" value="list_boolean" />

            <output name="fuma_overview" file="edgren_test_02_unspecifc_matching_output.txt" />
        </test>
    </tests>

    <help><![CDATA[
============
Introduction
============

FuMa (Fusion Matcher) matches predicted fusion events (both genomic and transcriptomic) according to chromosomal location or assocatiated gene annotation(s) where the latter should be genome build inspecific.

Because RNA-Sequencing deals with samples that may have undergrond splicing, reads may split up because of biological processes. If a fusion event takes place, the same thing may happen. Therefore we hypothesize that using spanning read distances may be unreliable, because there are known introns of > 100kb. Therefore, FuMa translates the breakpoint to gene names, and only overlaps breakpoints with the same genename(s).

=====
Usage
=====

After you have uploaded the results of your Fusion Gene detection experiment, and selected the format to be *tabular*, you can start the FuMa wrapper. For each dataset you simply have to add another repeat. Then you have to select a corresponding format:

*******
Formats
*******

+-------------------+-----------------------+-------------------------------------+
|Tools              | File                  | Format string                       |
+===================+=======================+=====================================+
|Chimera            | prettyPrint() output  | chimera                             |
+-------------------+-----------------------+-------------------------------------+
|ChimeraScan        | chimeras.bedpe        | chimerascan                         |
+-------------------+-----------------------+-------------------------------------+
|Complete Genomics  | highConfidenceJu*.tsv | complete-genomics                   |
+-------------------+-----------------------+-------------------------------------+
|Complete Genomics  | allJunctionsBeta*.tsv | complete-genomics                   |
+-------------------+-----------------------+-------------------------------------+
|DeFuse             | results.txt           | defuse                              |
+-------------------+-----------------------+-------------------------------------+
|DeFuse             | results.classify.txt  | defuse                              |
+-------------------+-----------------------+-------------------------------------+
|DeFuse             | results.filtered.txt  | defuse                              |
+-------------------+-----------------------+-------------------------------------+
|Fusion Catcher     | final-list_cand*.txt  | fusion-catcher_final                |
+-------------------+-----------------------+-------------------------------------+
|FusionMap          |                       | fusionmap                           |
+-------------------+-----------------------+-------------------------------------+
|Trinity + GMAP     |                       | trinity-gmap                        |
+-------------------+-----------------------+-------------------------------------+
|OncoFuse           |                       | oncofuse                            |
+-------------------+-----------------------+-------------------------------------+
|RNA STAR           | Chimeric.out.junction | rna-star_chimeric                   |
+-------------------+-----------------------+-------------------------------------+
|STAR Fusion        | _candidates.final     | star-fusion_final                   |
+-------------------+-----------------------+-------------------------------------+
|TopHat Fusion pre  | fusions.out           | tophat-fusion_pre                   |
+-------------------+-----------------------+-------------------------------------+
|TopHat Fusion post | potential_fusion.txt  | tophat-fusion_post_potential_fusion |
+-------------------+-----------------------+-------------------------------------+
|TopHat Fusion post | result.txt            | tophat-fusion_post_result           |
+-------------------+-----------------------+-------------------------------------+
|TopHat Fusion post | result.html           | tophat-fusion_post_result_html      |
+-------------------+-----------------------+-------------------------------------+

To annotate genes upon the breakpoints you must provide a BED file that contains gene annotations for the user genome build. Make sure **your BED file contains one gene per line**. You should use BED files that contain one exon per line only if you want restrict your analysis to fusion genes detected within exons.

UCSC genome browser provides a very simple way of obtaining BED files with one gene per line by selecting their *RefSeq Genes*-track and *knownGene*-table and putting the export format to BED. Galaxy should have a built-in UCSC table browser.

    ]]></help>

    <citations>
        <citation type="bibtex">
           @unpublished{fuma,
              author       = {Youri Hoogstrate},
              title        = {FuMa: reporting overlap in RNA-seq detected fusion genes},
              url          = { https://github.com/yhoogstrate/fuma }
            }
        </citation>
    </citations>
</tool>
author	yhoogstrate
date	Tue, 20 Oct 2015 10:12:08 -0400
parents	86526900cb8f
children	cb0543909e83