view taxonomy_filter_refseq.xml @ 1:d9662c76b6d5 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit e68b434ae52d29f038bac26874ceb51d4911b4fb"
author iuc
date Wed, 29 Apr 2020 16:50:22 -0400
parents 28e95c6a944d
children
line wrap: on
line source

<tool id="taxonomy_filter_refseq" name="Filter RefSeq by taxonomy" version="@TOOL_VERSION@+galaxy0">
    <description>Only retain sequences that are descendants of a given taxonomic node.</description>
    <macros>
        <token name="@TOOL_VERSION@">0.3.0</token>
    </macros>
    <edam_topics>
        <edam_topic>topic_0091</edam_topic>
    </edam_topics>
    <edam_operations>
        <edam_operation>operation_3460</edam_operation>
    </edam_operations>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">rust-ncbitaxonomy</requirement>
    </requirements>
    <!-- current using redirect to output rather than output to file due to bug in pre-0.1.4 rust-ncbitaxonomy -->
    <command detect_errors="aggressive"><![CDATA[
        #if str($refseq.refseq_source) == 'cached':
            #set $refseq_input = str( $refseq.cached_refseq.fields.path )
        #else:
            ln -s '$refseq.history_refseq' refseq.fasta &&
            #set $refseq_input = "refseq.fasta"
        #end if
        #if str($taxonomy.taxonomy_source) == 'cached':
            #set $taxonomy_dir = $taxonomy.taxonomy_table.fields.path
        #else:
            mkdir taxonomy && ln -s '$taxonomy.nodes' taxonomy/nodes.dmp && ln -s '$taxonomy.names' taxonomy/names.dmp &&
            #set $taxonomy_dir = 'taxonomy'
        #end if
	    taxonomy_filter_refseq $no_curated $no_predicted '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta'
    ]]></command>
    <inputs>
        <conditional name="taxonomy">
            <param name="taxonomy_source" type="select" label="Choose source of NCBI Taxonomy">
                <option value="cached" selected="true">Use built-in NCBI Taxonomy database</option>
                <option value="history">Datasets from history</option>
            </param>
            <when value="cached">
                <param type="select" name="taxonomy_table" label="NCBI Taxonomy database">
                    <options from_data_table="ncbi_taxonomy">
                        <filter type="sort_by" column="name" /> 
                        <validator type="no_options" message="No NCBI Taxonomy downloads are available" />
                    </options>
                    <validator type="no_options" message="No NCBI Taxonomy database download is available" />
                </param>
            </when>
            <when value="history">
                <param name="nodes" type="data" format="txt" label="NCBI Taxonomy nodes.dmp file" />
                <param name="names" type="data" format="txt" label="NCBI Taxonomy names.dmp file" />
            </when>
        </conditional>
        <conditional name="refseq">
            <param name="refseq_source" type="select" label="Choose source of RefSeq sequences">
                <option value="cached" selected="true">Use a built-in RefSeq FASTA file</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param type="select" name="cached_refseq" label="Select RefSeq FASTA file">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="name" /> 
                        <validator type="no_options" message="No FASTA data is available" />
                    </options>
                    <validator type="no_options" message="No FASTA data is available" />
                </param>
            </when>
            <when value="history">
                <param type="data" name="history_refseq" format="fasta" label="RefSeq FASTA file" />
            </when>
        </conditional>
        <param name="ancestor_name" type="text" label="Ancestor taxon (scientific) name" />
        <param argument="--no_curated" type="boolean" truevalue="--no_curated" falsevalue="" checked="false"
            label="Exclude curated sequences from dataset"
            help="Curated sequences are identified by their accession number prefixes and excluded if this option is checked"
        />
        <param argument="--no_predicted" type="boolean" truevalue="--no_predicted" falsevalue="" checked="false"
            label="Exclude predicted sequences from the dataset"
            help="Computationally predicted (but not curated) sequences are identifed by their accession number prefix and excluded if this option is checked"
        />
    </inputs>
    <outputs>
        <data name="refseq_output_fasta" format="fasta" label="${tool.name} on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="taxonomy_source" value="history" />
            <param name="refseq_source" value="history" />
            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
            <param name="ancestor_name" value="unclassified bacterial viruses" />
            <output name="refseq_output_fasta" value="output1.fasta" />
        </test>
        <test>
            <param name="taxonomy_source" value="cached" />
            <param name="taxonomy_table" value="2019-01-01" />
            <param name="refseq_source" value="cached" />
            <param name="cached_refseq" value="refseq_sample" />
            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
            <param name="ancestor_name" value="unclassified bacterial viruses" />
            <param name="no_predicted" value="false" />
            <param name="no_curated" value="false" />
            <output name="refseq_output_fasta" value="output1.fasta" />
        </test>
        <test>
            <param name="taxonomy_source" value="history" />
            <param name="refseq_source" value="history" />
            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
            <param name="ancestor_name" value="unclassified bacterial viruses" />
            <param name="no_predicted" value="true" />
            <param name="no_curated" value="false" />
            <output name="refseq_output_fasta" value="output2.fasta" />
        </test>        
        <test>
            <param name="taxonomy_source" value="history" />
            <param name="refseq_source" value="history" />
            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
            <param name="ancestor_name" value="unclassified bacterial viruses" />
            <param name="no_predicted" value="false" />
            <param name="no_curated" value="true" />
            <output name="refseq_output_fasta" value="output3.fasta" />
        </test>        
    </tests>
    <help><![CDATA[
        This tool allows NCBI RefSeq sequences to be filtered so that only those whose species name are 
        descendants of a given taxon in the NCBI taxonomy are retained. For example, from the NCBI RefSeq
        "other vertebrate" file only ray finned fishes can be retained by filtering for "Actinopterygii".

        The NCBI RefSeq FASTA files can either be provided by the Galaxy administrator or from the user
        history. The NCBI taxonomy should be provided by the Galaxy administrator but if that is not
        possible the nodes.dmp and names.dmp files from the NCBI taxonomy can be provided in the history.
    ]]></help>
    <citations>
       <citation type="bibtex">
    @misc{vanHeusden2019,
    author = {van Heusden, Peter},
    year = {2019},
    title = ncbitaxonomy Rust crate},
    publisher = {crates.io},
    journal = {Rust Package Registry},
    url = {https://crates.io/crates/ncbitaxonomy},        
    }
        </citation>
    </citations>
</tool>