diff arriba.xml @ 9:8c4c97fd0555 draft

"planemo upload for repository https://github.com/jj-umn/tools-iuc/tree/arriba/tools/arriba commit bd2c6bea7cb7dc30ca57f9d69ad49460ddf7f14b"
author jjohnson
date Wed, 13 Oct 2021 18:45:16 +0000
parents 1a56888ddb7d
children c58d1774c762
line wrap: on
line diff
--- a/arriba.xml	Mon Oct 11 19:00:45 2021 +0000
+++ b/arriba.xml	Wed Oct 13 18:45:16 2021 +0000
@@ -6,23 +6,38 @@
     <expand macro="requirements" />
     <expand macro="version_command" />
     <command detect_errors="exit_code"><![CDATA[
+@GENOME_SOURCE@
 #if str($input_params.input_source) == "use_fastq"
     #set $readFilesCommand = ''
-    #if $input_params.left_fq.is_of_type("fastq.gz"):
-        #set read1 = 'input_1.fastq.gz'
-        #set $readFilesCommand = '--readFilesCommand zcat'
-    #else:
-        #set read1 = 'input_1.fastq'
+    #set $read2 = ''
+    #if str($input_params.singlePaired.sPaired) == 'paired_collection':
+        #if $input_params.singlePaired.input.forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
+            #set $readFilesCommand = '--readFilesCommand zcat'
+            #set read1 = 'input_1.fastq.gz'
+            #set read2 = 'input_2.fastq.gz'
+        #else
+            #set read1 = 'input_1.fastq'
+            #set read2 = 'input_2.fastq'
+        #end if
+        ln -sf '${$input_params.singlePaired.input.forward}' ${read1} &&
+        ln -sf '${$input_params.singlePaired.input.reverse}' ${read2} &&
+    #else
+        #if $input_params.singlePaired.input1.is_of_type('fastq.gz', 'fastqsanger.gz'):
+            #set $readFilesCommand = '--readFilesCommand zcat'
+            #set read1 = 'input_1.fastq.gz'
+        #else
+            #set read1 = 'input_1.fastq'
+        #end if
+        ln -sf '$input_params.singlePaired.input1' ${read1} &&
+        #if str($input_params.singlePaired.sPaired) == 'paired':
+            #set $read2 = $read1.replace('1','2')
+            ln -sf '$input_params.singlePaired.input2' ${read2} &&
+        #end if
     #end if
-    ln -f -s '${input_params.left_fq}' ${read1} &&
-    #if $input_params.right_fq.is_of_type("fastq.gz"):
-        #set read2 = 'input_2.fastq.gz'
-    #else:
-        #set read2 = 'input_2.fastq'
-    #end if
-    ln -f -s '${input_params.right_fq}' ${read2} &&
     #if str($input_params.index.index_source) == "history"
         #set $star_index_dir = $input_params.index.star_index.extra_files_path
+    #else 
+        #set $star_index_dir = $input_params.index.arriba_ref.fields.star_index
     #end if
     STAR 
     --runThreadN \${GALAXY_SLOTS:-1} 
@@ -57,7 +72,7 @@
     #end if
 #end if
     -a '$genome_assembly'
-    -g '$annotation'
+    -g '$genome_annotation'
     #if $blacklist
         -b '$blacklist'
     #else
@@ -174,29 +189,43 @@
                 </param>
             </when>
             <when value="use_fastq">
-                <param name="left_fq"
-                       type="data"
-                       format="fastqsanger,fastqsanger.gz"
-                       argument="--left_fq"
-                       label="left.fq file"/>
-                <param name="right_fq"
-                       type="data"
-                       format="fastqsanger,fastqsanger.gz"
-                       argument="--right_fq"
-                       label="right.fq file"/>
+                <conditional name="singlePaired">
+                    <param name="sPaired" type="select" label="Single-end or paired-end reads">
+                        <option value="single" selected="true">Single-end</option>
+                        <option value="paired">Paired-end (as individual datasets)</option>
+                        <option value="paired_collection">Paired-end (as collection)</option>
+                    </param>
+                    <when value="single">
+                        <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file"/>
+                    </when>
+                    <when value="paired">
+                        <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>
+                        <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>
+                    </when>
+                    <when value="paired_collection">
+                        <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ/FASTA paired reads"/>
+                    </when>
+                </conditional>
                 <conditional name="index">
                     <param name="index_source" type="select" label="Arriba STAR index source">
                         <option value="history">Arriba STAR index from your history</option>
+                        <option value="cached">Use a built-in Arriba STAR index</option>
                     </param>
                     <when value="history">
                         <param name="star_index" argument="--genomeDir" type="data" format="txt" label="Arriba STAR index"
                             help="generated by:  Arriba Reference"/>
                     </when> 
+                    <when value="cached">
+                        <param name="arriba_ref" type="select" label="Arriba STAR index">
+                            <options from_data_table="arriba_indexes">
+                            </options>
+                        </param>
+                    </when> 
                 </conditional>
+
             </when>
         </conditional>
-        <param name="genome_assembly" argument="-a" type="data" format="fasta" label="genome assembly fasta"/>
-        <param name="annotation" argument="-g" type="data" format="gtf" label="GTF file with gene annotation"/>
+        <expand macro="genome_source" />
         <param name="blacklist" argument="-b" type="data" format="tabular,tabular.gz" optional="true" label="File containing blacklisted ranges."/>
         <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing protein domains"/>
         <param name="known_fusions" argument="-k" type="data" format="tabular,tabular.gz"  optional="true" label="File containing known fusions">
@@ -395,7 +424,7 @@
     <outputs>
         <data name="fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.tsv" from_work_dir="fusions.tsv"/>
         <data name="discarded_fusions_tsv" format="tabular" label="${tool.name} on ${on_string}: fusions.discarded.tsv" from_work_dir="fusions.discarded.tsv">
-            <filter> output_fusions_discarded == "yes"</filter>
+            <filter> output_fusions_discarded == True</filter>
         </data> 
         <data name="aligned_bam" format="bam" label="${tool.name} on ${on_string}: Aligned.bam" from_work_dir="Aligned.sortedByCoord.out.bam">
             <filter>input_params['input_source'] == "use_fastq"</filter>
@@ -411,8 +440,11 @@
                 <param name="input_source" value="use_star"/>
                 <param name="input" ftype="sam" value="Aligned.out.sam"/>
             </conditional>
-            <param name="genome_assembly" ftype="fasta" value="genome.fasta"/>
-            <param name="annotation" ftype="gtf" value="genome.gtf"/>
+            <conditional name="genome">
+                <param name="genome_source" value="history"/>
+                <param name="assembly" ftype="fasta" value="genome.fasta"/>
+                <param name="annotation" ftype="gtf" value="genome.gtf"/>
+            </conditional>
             <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
             <conditional name="visualization">
                 <param name="do_viz" value="no"/>
@@ -425,13 +457,17 @@
             </output>
         </test>
         <!-- Test 2 - From exisitng BAM with protein_domains and visualization -->
+
         <test> 
             <conditional name="input_params">
                 <param name="input_source" value="use_star"/>
                 <param name="input" ftype="sam" value="Aligned.out.sam"/>
             </conditional>
-            <param name="genome_assembly" ftype="fasta" value="genome.fasta"/>
-            <param name="annotation" ftype="gtf" value="genome.gtf"/>
+            <conditional name="genome">
+                <param name="genome_source" value="history"/>
+                <param name="assembly" ftype="fasta" value="genome.fasta"/>
+                <param name="annotation" ftype="gtf" value="genome.gtf"/>
+            </conditional>
             <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
             <conditional name="visualization">
                 <param name="do_viz" value="yes"/>
@@ -448,6 +484,27 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- Test 3 - From exisitng BAM using cached genome source -->
+        <test> 
+            <conditional name="input_params">
+                <param name="input_source" value="use_star"/>
+                <param name="input" ftype="sam" value="Aligned.out.sam"/>
+            </conditional>
+            <conditional name="genome">
+                <param name="genome_source" value="cached"/>
+                <param name="arriba_ref" value="GRCh38+ENSEMBL93"/>
+            </conditional>
+            <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/>
+            <conditional name="visualization">
+                <param name="do_viz" value="no"/>
+                <param name="cytobands" ftype="tabular" value="cytobands.tsv"/>
+            </conditional>
+            <output name="fusions_tsv">
+                <assert_contents>
+                    <has_text_matching expression="BCR\tABL1"/>
+                </assert_contents>
+            </output>
+        </test>
 
     </tests>
     <help><![CDATA[
@@ -601,6 +658,13 @@
     NOTE: Arriba was designed for alignments from RNA-Seq data. It should not be run on WGS data directly. Many assumptions made by Arriba about the data (statistical models, blacklist, etc.) only apply to RNA-Seq data and are not valid for DNA-Seq data. For such data, a structural variant calling algorithm should be used and the results should be passed to Arriba.
 
 
+**OPTIONS**
+
+  - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba
+  - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr
+  - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/
+
+
 **OUTPUTS**
 
 See:  https://arriba.readthedocs.io/en/latest/output-files/
@@ -662,12 +726,9 @@
 
     A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint.
 
-
-**OPTIONS**
-
-  - Arriba: https://arriba.readthedocs.io/en/latest/command-line-options/#arriba
-  - Visualization: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr
-  - RNA STAR: https://arriba.readthedocs.io/en/latest/workflow/
+.. image:: draw-fusions-example.png
+  :width: 800
+  :height: 467
 
 
 .. _Arriba: https://arriba.readthedocs.io/en/latest/