comparison fuma.xml @ 0:a4cfaa0e3e5d draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/fuma commit f56125b28ec44aa28943ed040b7b202fed9c875b-dirty
author yhoogstrate
date Thu, 21 May 2015 09:56:41 -0400
parents
children 54ce44828e1b
comparison
equal deleted inserted replaced
-1:000000000000 0:a4cfaa0e3e5d
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="fuma" name="FuMa" version="2.7.1.b">
3 <description>FuMa (FusionMatcher) matches detected fusion genes based on gene name subset matching (designed in particular for RNA-Seq).</description>
4
5 <requirements>
6 <requirement type="package" version="2.7.1">fuma</requirement>
7 </requirements>
8
9 <version_command>fuma --version 2>&amp;1 | head -n 1</version_command><!-- -V also works, but is not GNU standard -->
10
11 <command>
12 #import pipes
13
14 #set $gene_annotations = []
15 #set $samples = []
16 #set $links = []
17
18 #for $i, $d in enumerate( $datasets )
19
20 #set $sample_name = pipes.quote(str($d['sample'].name))
21
22 #set $gene_annotations = $gene_annotations + [ "ga_" + str($i) + ":" + str($d['gene_annotation'].file_name) ]
23
24 #set $samples = $samples + [ $sample_name + ":" + str($d['format']) + ":" + str($d['sample'].file_name) ]
25 #set $links = $links + [ $sample_name + ":" + str("ga_") + str($i) ]
26 #end for
27
28 #set $gene_annotations_str = " ".join(gene_annotations)
29 #set $samples_str = " ".join(samples)
30 #set $links_str = " ".join(links)
31
32 fuma
33 -a
34 $gene_annotations_str
35 -s
36 $samples_str
37 -l
38 $links_str
39 #if $output_format.value == "list_boolean"
40 -f list
41 #else
42 -f $output_format.value
43 #end if
44 -o $fuma_overview ;
45
46
47
48 #if $output_format.value == "list_boolean"
49 fuma-list-to-boolean-list -o tmp.txt $fuma_overview &amp;&amp;
50 mv tmp.txt $fuma_overview
51 #end if
52 </command>
53
54 <inputs>
55 <repeat name="datasets" title="FusionGene Datasets" min="2">
56 <param name="sample" type="data" format="txt,tabular" label="Dataset (RNA-Seq fusion gene detection experiment)" />
57 <param name="format" type="select" label="Format of dataset">
58 <option value="chimerascan">ChimeraScan</option>
59 <option value="defuse">DeFuse</option>
60 <option value="complete-genomics">Complete Genomics</option>
61 <option value="fusion-catcher_final">Fusion Catcher (final-list file)</option>
62 <option value="fusionmap">FusionMap</option>
63 <option value="trinity-gmap">GMAP (As step after Trinity)</option>
64 <option value="oncofuse">OncoFuse</option>
65 <option value="rna-star_chimeric">STAR (chimeric file)</option>
66 <option value="tophat-fusion_pre">Tophat Fusion Pre (fusions.out)</option>
67 <option value="tophat-fusion_post_potential_fusion">Tophat Fusion Post (potential_fusion.txt)</option>
68 <option value="tophat-fusion_post_result">Tophat Fusion Post (result.txt)</option>
69 </param>
70 <param name="gene_annotation" type="data" format="bed" label="Corresponding gene-name annotation file (BED format)" help="Make use of persistent gene annotations! Gene annotations should only be different if different reference genome builds were used." />
71 </repeat>
72
73 <param name="output_format" type="select" label="Output format">
74 <option value="list_boolean" selected="true">List (Boolean)</option>
75 <option value="list">List</option>
76 <option value="summary">Count summary</option>
77 </param>
78 </inputs>
79
80 <outputs>
81 <data format="tabular" name="fuma_overview" label="${tool.name} on ${', '.join([ str(d['sample'].hid)+': '+d['sample'].name for d in $datasets ])}" />
82 </outputs>
83
84 <tests>
85 <test>
86 <!-- <repeat name="datasets"> -->
87 <param name="datasets_0|sample" value="chimerascan.txt" ftype="tabular" />
88 <param name="datasets_0|format" value="chimerascan" />
89 <param name="datasets_0|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
90 <!-- </repeat> -->
91 <!-- <repeat name="datasets"> -->
92 <param name="datasets_1|sample" value="defuse.txt" ftype="tabular" />
93 <param name="datasets_1|format" value="defuse" />
94 <param name="datasets_1|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
95 <!-- </repeat> -->
96 <!-- <repeat name="datasets"> -->
97 <param name="datasets_2|sample" value="fusion-map.txt" ftype="tabular" />
98 <param name="datasets_2|format" value="fusionmap" />
99 <param name="datasets_2|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
100 <!-- </repeat> -->
101 <!-- <repeat name="datasets"> -->
102 <param name="datasets_3|sample" value="edgren_tp.txt" ftype="tabular" />
103 <param name="datasets_3|format" value="fusionmap" />
104 <param name="datasets_3|gene_annotation" value="refseq_genes_hg19.bed" ftype="bed" />
105 <!-- </repeat> -->
106
107 <param name="output_format" value="summary" />
108
109 <output name="fuma_overview" file="output.txt" />
110 </test>
111 </tests>
112
113 <help>============
114 Introduction
115 ============
116
117 FuMa (Fusion Matcher) matches predicted fusion events (both genomic and transcriptomic) according to chromosomal location or assocatiated gene annotation(s) where the latter should be genome build inspecific.
118
119 Because RNA-Sequencing deals with samples that may have undergrond splicing, reads may split up because of biological processes. If a fusion event takes place, the same thing may happen. Therefore we hypothesize that using spanning read distances may be unreliable, because there are known introns of > 100kb. Therefore, FuMa translates the breakpoint to gene names, and only overlaps breakpoints with the same genename(s).
120
121 =====
122 Usage
123 =====
124
125 After you have uploaded the results of your Fusion Gene detection experiment, and selected the format to be *tabular*, you can start the FuMa wrapper. For each dataset you simply have to add another repeat. Then you have to select a corresponding format:
126
127 *******
128 Formats
129 *******
130
131 +-------------------+-----------------------+-------------------------------------+
132 |Tools | File | Format string |
133 +===================+=======================+=====================================+
134 |ChimeraScan | chimeras.bedpe | chimerascan |
135 +-------------------+-----------------------+-------------------------------------+
136 |Complete Genomics | highConfidenceJu*.tsv | complete-genomics |
137 +-------------------+-----------------------+-------------------------------------+
138 |Complete Genomics | allJunctionsBeta*.tsv | complete-genomics |
139 +-------------------+-----------------------+-------------------------------------+
140 |DeFuse | results.txt | defuse |
141 +-------------------+-----------------------+-------------------------------------+
142 |DeFuse | results.classify.txt | defuse |
143 +-------------------+-----------------------+-------------------------------------+
144 |DeFuse | results.filtered.txt | defuse |
145 +-------------------+-----------------------+-------------------------------------+
146 |Fusion Catcher | final-list_cand*.txt | fusion-catcher_final |
147 +-------------------+-----------------------+-------------------------------------+
148 |FusionMap | | fusionmap |
149 +-------------------+-----------------------+-------------------------------------+
150 |Trinity + GMAP | | trinity-gmap |
151 +-------------------+-----------------------+-------------------------------------+
152 |OncoFuse | | oncofuse |
153 +-------------------+-----------------------+-------------------------------------+
154 |RNA STAR | Chimeric.out.junction | rna-star_chimeric |
155 +-------------------+-----------------------+-------------------------------------+
156 |TopHat Fusion pre | fusions.out | tophat-fusion_pre |
157 +-------------------+-----------------------+-------------------------------------+
158 |TopHat Fusion post | potential_fusion.txt | tophat-fusion_post_potential_fusion |
159 +-------------------+-----------------------+-------------------------------------+
160 |TopHat Fusion post | result.txt | tophat-fusion_post_result |
161 +-------------------+-----------------------+-------------------------------------+
162
163 To annotate genes upon the breakpoints you must provide a BED file that contains gene annotations for the user genome build. Make sure **your BED file contains one gene per line**. You should use BED files that contain one exon per line only if you want restrict your analysis to fusion genes detected within exons.
164
165 UCSC genome browser provides a very simple way of obtaining BED files with one gene per line by selecting their *RefSeq Genes*-track and *knownGene*-table and putting the export format to BED. Galaxy should have a built-in UCSC table browser.
166
167 </help>
168 </tool>