diff TargetFinder.xml @ 1:c9d56748fbde draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/targetfinder/ commit fe9dd99a14770b6c5f26e24893599acb577e304f"
author rnateam
date Thu, 25 Mar 2021 19:53:15 +0000
parents 773fdd1a02ea
children
line wrap: on
line diff
--- a/TargetFinder.xml	Wed Dec 21 17:31:28 2016 -0500
+++ b/TargetFinder.xml	Thu Mar 25 19:53:15 2021 +0000
@@ -1,73 +1,224 @@
-<tool id="targetfinder" name="Plant small RNA target prediction tool" version="1.7.0">
-    <requirements>
-        <requirement type="package" version="1.7">targetfinder</requirement>
-    </requirements>
+<tool id='targetfinder' name='TargetFinder' version='1.7.0+galaxy1' profile='20.01'>
+    <description>plant small RNA target prediction tool</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='edam' />
+    <expand macro='requirements' />
     <stdio>
-        <exit_code range="1:" />
+        <exit_code range='1:' />
     </stdio>
     <command><![CDATA[
         targetfinder_threads.pl
-            -f $f
-            -d $d
+            -f '$f'
+            -d '$d'
             -c $c
+            -p $output_format
             -t "\${GALAXY_SLOTS:-12}"
             $r
-            -o $output1
+            -o '$output_file'
+     
     ]]></command>
     <inputs>
-        <param label="Input small RNA sequences file" help="(-f) FASTA-format" name="f" type="data" format="fasta" />
-        <param label="Target sequence database file" help="(-d) FASTA-format" name="d" type="data" format="fasta" />
-        <param label="Prediction score cutoff value" help="(-c) DEFAULT = 4" name="c" type="float" value="4.0" />
-        <param label="Search reverse strand for targets?" help="(-r) Use this option if the database is genomic DNA." name="r" type="boolean" falsevalue="" truevalue="-r" checked="false" />
+        <param argument='-f' type='data' format='fasta' label='Input small RNA sequences file' help='FASTA-format' />
+        <param argument='-d' type='data' format='fasta' label='Target sequence database file' help='FASTA-format' />
+        <param argument='-c' type='float' value='4.0' label='Prediction score cutoff value' help='Predicted targets are printed out if they are equal to or lower than the cutoff score specified. DEFAULT = 4' />
+        <param argument='-c' type='float' value='4.0' label='Prediction score cutoff value' help='Predicted targets are printed out if they are equal to or lower than the cutoff score specified. DEFAULT = 4' />
+        <param argument='-r' type='boolean' falsevalue='' truevalue='-r' checked='false' label='Search reverse strand for targets?' help='Use this option if the database is genomic DNA.'  />
+        <param name='output_format' argument='-p' type='select' label='Output format' help='Output format for small RNA-target pairs (Default = classic).' >
+                <option value='classic'>Original TargetFinder base-pair format</option>
+                <option value='gff'>GFF</option>
+                <option value='json'>JavaScript Object Notation (JSON)</option>
+                <option value='table'>Tab-delimited format</option>
+        </param>
     </inputs>
     <outputs>
-        <data name="output1" format="data"/>
+        <data name='output_file' format='txt'>
+            <change_format>
+                <when input="output_format" value="gff" format="gff" />
+                <when input="output_format" value="json" format="json" />
+                <when input="output_format" value="table" format="tabular" />
+            </change_format>
+        </data>
     </outputs>
     <tests>
         <test>
-            <param name="f" value="ath_miRNAs_test.fa"/>
-            <param name="d" value="Antirrhinum_majus.mRNA.EST.fasta"/>
-            <param name="r" value=""/>
-            <param name="c" value="4.0"/>
-            <param name="t" value="1"/>
-            <output name="output1" file="ath_miRNAs_predicted_targets.txt" compare="contains"/>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='classic'/>
+            <output name='output_file' file='targetfinder_test_01.txt' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='true'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='classic'/>
+            <output name='output_file' file='targetfinder_test_02.txt' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='6.0'/>
+            <param name='output_format' value='classic'/>
+            <output name='output_file' file='targetfinder_test_03.txt' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='classic'/>
+            <output name='output_file' file='targetfinder_test_04.txt' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='gff'/>
+            <output name='output_file' file='targetfinder_test_05.gff' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='table'/>
+            <output name='output_file' file='targetfinder_test_06.tab' compare='contains'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='false'/>
+            <param name='c' value='4.0'/>
+            <param name='output_format' value='json'/>
+            <output name='output_file' file='targetfinder_test_07.json' compare='contains' lines_diff='1'/>
+        </test>
+        <test>
+            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
+            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
+            <param name='r' value='true'/>
+            <param name='c' value='6.5'/>
+            <param name='output_format' value='table'/>
+            <output name='output_file' file='targetfinder_test_08.tab' compare='contains'/>
         </test>
     </tests>
     <help><![CDATA[
+.. class:: infomark
+
 **What it does**
 
 TargetFinder will computationally predict small RNA binding sites on target transcripts from a sequence database.
 
-This is done by aligning the input small RNA sequence against all transcripts,
-followed by site scoring using a position-weighted scoring matrix.
+This is done by aligning the input small RNA sequence against all transcripts, followed by site scoring using a position-weighted scoring matrix.
+
+----
+
+.. class:: infomark
 
 **Input**
 
--f Input small RNA sequences file (FASTA-format).
+This tool requires two fasta files:
 
--d Target sequence database file (FASTA-format)
+    ::
+
+        -f Input small RNA sequences file (FASTA-format).
+        -d Target sequence database file (FASTA-format)
 
-**Output**
+----
+
+.. class:: infomark
 
-Each predicted target site is printed out separately.
-The output consists of two parts.
+**Original TargetFinder Output**
+
+
+Each predicted target site is printed out separately. 
 
-The first is a description line and the second is a base-pairing diagram of the target and small RNA (query) sequence.
+The output consists of two parts. The first is a description line and the second is a base-pairing diagram of the target and small RNA (query) sequence. The description line contains the query name, the description line from the target sequence database, and the target prediction score.
 
-The description line contains the query name, the description line from the target sequence database, and the target prediction score.
+    ::
+
+            query=miR399a, target=AT2G33770.1 | Symbol: None |  ubiquitin-conjugating enzyme family protein, low similarity to u, score=1.5
+
 
-The base-pairing diagram has the target site sequence on top in 5'-3' orientation and the query sequence on the bottom in 3'-5' orientation.
+The base-pairing diagram has the target site sequence on top in 5'-3' orientation and the query sequence on the bottom in 3'-5' orientation. Between the target site sequece and the query sequence are base pair symbols. A ':' (colon) symbol represents an ordinary Watson-Crick base pair, a '.' (period) represents a G:U base pair, and a ' ' (space) represents a mismatch.
 
-Between the target site sequece and the query sequence are base pair symbols.
+    ::
+        
+        target  5' UAGGGCAAAUCUUCUUUGGCA 3'  
+                   .:::::::::::.::::::::  
+        query   3' GUCCCGUUUAGAGGAAACCGU 5'
 
-A ":" (colon) symbol represents an ordinary Watson-Crick base pair, a "." (period) represents a G:U base pair, and a " " (space) represents a mismatch.
 
 If a small RNA is predicted to target a sequence more than once, each target site will be output as separate output.
+
+
+----
+
+.. class:: infomark
+
+**Additional output formats**
+
+In addition to the output described above ('classic' output), three new output format options were added to TargetFinder.
+
+Generic Feature Format (GFF3):
+
+    ::
+
+        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  607 627 1.5 + . smallRNA=miR399a;target_seq=UAGGGCAAAUCUUCUUUGGCA;base_pairs=.:::::::::::.::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU
+        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  740 760 1.5 + . smallRNA=miR399a;target_seq=UAGGGCAUAUCUCCUUUGGCA;base_pairs=.:::::: :::::::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU
+        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  829 849 1.5 + . smallRNA=miR399a;target_seq=UUGGGCAAAUCUCCUUUGGCA;base_pairs=. :::::::::::::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU
+
+
+Tab-deliminated Format:
+
+    ::
+
+        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	607	627	+	1.5	UAGGGCAAAUCUUCUUUGGCA	.:::::::::::.::::::::	GUCCCGUUUAGAGGAAACCGU
+        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	740	760	+	1.5	UAGGGCAUAUCUCCUUUGGCA	.:::::: :::::::::::::	GUCCCGUUUAGAGGAAACCGU
+        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	829	849	+	1.5	UUGGGCAAAUCUCCUUUGGCA	. :::::::::::::::::::	GUCCCGUUUAGAGGAAACCGU
+
+JavaScript Object Notation Format (JSON):
+
+    ::
+
+        {
+            'miR399a': {
+                'hits' : [
+                    {
+                        'Target accession': 'AT2G33770.1 | Symbols: UBC24, ATUBC24, PHO2 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN',
+                        'Score': '1.5',
+                        'Coordinates': '607-627',
+                        'Strand': '+',
+                        'Target sequence': 'UAGGGCAAAUCUUCUUUGGCA',
+                        'Base pairing': '.:::::::::::.::::::::',
+                        'amiRNA sequence': 'GUCCCGUUUAGAGGAAACCGU'
+                    }
+                ]
+            }
+        }
+
+
+----
+
+.. class:: infomark
+
+**Method**
+
+TargetFinder searches for potential miRNA target sites in a FASTA-formated sequence database using three main steps.
+
+
+        1. The small RNA query sequence is aligned to every sequence in the FASTA-formated sequence database using `Smith-Waterman (SW) alignments <https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm>`_ implemented in the FASTA package (ssearch35_t).
+        2. The SW alignments are converted into RNA duplexes.
+        3. Each duplex is scored using a position-dependent scoring matrix.
+
+SW alignments are used to identify the best complementary regions between the small RNA query sequence and every sequence in the FASTA-formated sequence database.
+
 ]]></help>
-    <citations>
-        <citation type="doi">10.1016/j.cell.2005.04.004</citation>
-        <citation type="doi">10.1371/journal.pone.0000219</citation>
-        <citation type="doi">10.1007/978-1-60327-005-2_4</citation>
-    </citations>
+    <expand macro='citations' />
 </tool>