view TargetFinder.xml @ 1:c9d56748fbde draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/targetfinder/ commit fe9dd99a14770b6c5f26e24893599acb577e304f"
author rnateam
date Thu, 25 Mar 2021 19:53:15 +0000
parents 773fdd1a02ea
children
line wrap: on
line source

<tool id='targetfinder' name='TargetFinder' version='1.7.0+galaxy1' profile='20.01'>
    <description>plant small RNA target prediction tool</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='edam' />
    <expand macro='requirements' />
    <stdio>
        <exit_code range='1:' />
    </stdio>
    <command><![CDATA[
        targetfinder_threads.pl
            -f '$f'
            -d '$d'
            -c $c
            -p $output_format
            -t "\${GALAXY_SLOTS:-12}"
            $r
            -o '$output_file'
     
    ]]></command>
    <inputs>
        <param argument='-f' type='data' format='fasta' label='Input small RNA sequences file' help='FASTA-format' />
        <param argument='-d' type='data' format='fasta' label='Target sequence database file' help='FASTA-format' />
        <param argument='-c' type='float' value='4.0' label='Prediction score cutoff value' help='Predicted targets are printed out if they are equal to or lower than the cutoff score specified. DEFAULT = 4' />
        <param argument='-c' type='float' value='4.0' label='Prediction score cutoff value' help='Predicted targets are printed out if they are equal to or lower than the cutoff score specified. DEFAULT = 4' />
        <param argument='-r' type='boolean' falsevalue='' truevalue='-r' checked='false' label='Search reverse strand for targets?' help='Use this option if the database is genomic DNA.'  />
        <param name='output_format' argument='-p' type='select' label='Output format' help='Output format for small RNA-target pairs (Default = classic).' >
                <option value='classic'>Original TargetFinder base-pair format</option>
                <option value='gff'>GFF</option>
                <option value='json'>JavaScript Object Notation (JSON)</option>
                <option value='table'>Tab-delimited format</option>
        </param>
    </inputs>
    <outputs>
        <data name='output_file' format='txt'>
            <change_format>
                <when input="output_format" value="gff" format="gff" />
                <when input="output_format" value="json" format="json" />
                <when input="output_format" value="table" format="tabular" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='classic'/>
            <output name='output_file' file='targetfinder_test_01.txt' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='true'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='classic'/>
            <output name='output_file' file='targetfinder_test_02.txt' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='6.0'/>
            <param name='output_format' value='classic'/>
            <output name='output_file' file='targetfinder_test_03.txt' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='classic'/>
            <output name='output_file' file='targetfinder_test_04.txt' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='gff'/>
            <output name='output_file' file='targetfinder_test_05.gff' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='table'/>
            <output name='output_file' file='targetfinder_test_06.tab' compare='contains'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='false'/>
            <param name='c' value='4.0'/>
            <param name='output_format' value='json'/>
            <output name='output_file' file='targetfinder_test_07.json' compare='contains' lines_diff='1'/>
        </test>
        <test>
            <param name='f' value='arabidopsis_thaliana_miRNA.fasta'/>
            <param name='d' value='arabidopsis_thaliana_mRNA.fasta'/>
            <param name='r' value='true'/>
            <param name='c' value='6.5'/>
            <param name='output_format' value='table'/>
            <output name='output_file' file='targetfinder_test_08.tab' compare='contains'/>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

TargetFinder will computationally predict small RNA binding sites on target transcripts from a sequence database.

This is done by aligning the input small RNA sequence against all transcripts, followed by site scoring using a position-weighted scoring matrix.

----

.. class:: infomark

**Input**

This tool requires two fasta files:

    ::

        -f Input small RNA sequences file (FASTA-format).
        -d Target sequence database file (FASTA-format)

----

.. class:: infomark

**Original TargetFinder Output**


Each predicted target site is printed out separately. 

The output consists of two parts. The first is a description line and the second is a base-pairing diagram of the target and small RNA (query) sequence. The description line contains the query name, the description line from the target sequence database, and the target prediction score.

    ::

            query=miR399a, target=AT2G33770.1 | Symbol: None |  ubiquitin-conjugating enzyme family protein, low similarity to u, score=1.5


The base-pairing diagram has the target site sequence on top in 5'-3' orientation and the query sequence on the bottom in 3'-5' orientation. Between the target site sequece and the query sequence are base pair symbols. A ':' (colon) symbol represents an ordinary Watson-Crick base pair, a '.' (period) represents a G:U base pair, and a ' ' (space) represents a mismatch.

    ::
        
        target  5' UAGGGCAAAUCUUCUUUGGCA 3'  
                   .:::::::::::.::::::::  
        query   3' GUCCCGUUUAGAGGAAACCGU 5'


If a small RNA is predicted to target a sequence more than once, each target site will be output as separate output.


----

.. class:: infomark

**Additional output formats**

In addition to the output described above ('classic' output), three new output format options were added to TargetFinder.

Generic Feature Format (GFF3):

    ::

        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  607 627 1.5 + . smallRNA=miR399a;target_seq=UAGGGCAAAUCUUCUUUGGCA;base_pairs=.:::::::::::.::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU
        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  740 760 1.5 + . smallRNA=miR399a;target_seq=UAGGGCAUAUCUCCUUUGGCA;base_pairs=.:::::: :::::::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU
        AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN  targetfinder  rna_target  829 849 1.5 + . smallRNA=miR399a;target_seq=UUGGGCAAAUCUCCUUUGGCA;base_pairs=. :::::::::::::::::::;miR_seq=GUCCCGUUUAGAGGAAACCGU


Tab-deliminated Format:

    ::

        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	607	627	+	1.5	UAGGGCAAAUCUUCUUUGGCA	.:::::::::::.::::::::	GUCCCGUUUAGAGGAAACCGU
        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	740	760	+	1.5	UAGGGCAUAUCUCCUUUGGCA	.:::::: :::::::::::::	GUCCCGUUUAGAGGAAACCGU
        miR399a	AT2G33770.1 | Symbols: UBC24 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN	829	849	+	1.5	UUGGGCAAAUCUCCUUUGGCA	. :::::::::::::::::::	GUCCCGUUUAGAGGAAACCGU

JavaScript Object Notation Format (JSON):

    ::

        {
            'miR399a': {
                'hits' : [
                    {
                        'Target accession': 'AT2G33770.1 | Symbols: UBC24, ATUBC24, PHO2 | phosphate 2 | chr2:14277558-14283040 REVERSE LEN',
                        'Score': '1.5',
                        'Coordinates': '607-627',
                        'Strand': '+',
                        'Target sequence': 'UAGGGCAAAUCUUCUUUGGCA',
                        'Base pairing': '.:::::::::::.::::::::',
                        'amiRNA sequence': 'GUCCCGUUUAGAGGAAACCGU'
                    }
                ]
            }
        }


----

.. class:: infomark

**Method**

TargetFinder searches for potential miRNA target sites in a FASTA-formated sequence database using three main steps.


        1. The small RNA query sequence is aligned to every sequence in the FASTA-formated sequence database using `Smith-Waterman (SW) alignments <https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm>`_ implemented in the FASTA package (ssearch35_t).
        2. The SW alignments are converted into RNA duplexes.
        3. Each duplex is scored using a position-dependent scoring matrix.

SW alignments are used to identify the best complementary regions between the small RNA query sequence and every sequence in the FASTA-formated sequence database.

]]></help>
    <expand macro='citations' />
</tool>