Mercurial > repos > iuc > taxonomy_filter_refseq
diff taxonomy_filter_refseq.xml @ 0:28e95c6a944d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc commit 665d364fb51576608b0eb66a4f89d92159925ccc
author | iuc |
---|---|
date | Wed, 16 Jan 2019 08:30:47 -0500 |
parents | |
children | d9662c76b6d5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/taxonomy_filter_refseq.xml Wed Jan 16 08:30:47 2019 -0500 @@ -0,0 +1,117 @@ +<tool id="taxonomy_filter_refseq" name="Filter RefSeq by taxonomy" version="@TOOL_VERSION@+galaxy0"> + <description>Only retain sequences that are descendants of a given taxonomic node.</description> + <macros> + <token name="@TOOL_VERSION@">0.1.4</token> + </macros> + <edam_topics> + <edam_topic>topic_0091</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3460</edam_operation> + </edam_operations> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">rust-ncbitaxonomy</requirement> + </requirements> + <!-- current using redirect to output rather than output to file due to bug in pre-0.1.4 rust-ncbitaxonomy --> + <command detect_errors="aggressive"><![CDATA[ + #if str($refseq.refseq_source) == 'cached': + #set $refseq_input = str( $refseq.cached_refseq.fields.path ) + #else: + ln -s '$refseq.history_refseq' refseq.fasta && + #set $refseq_input = "refseq.fasta" + #end if + #if str($taxonomy.taxonomy_source) == 'cached': + #set $taxonomy_dir = $taxonomy.taxonomy_table.fields.path + #else: + mkdir taxonomy && ln -s '$taxonomy.nodes' taxonomy/nodes.dmp && ln -s '$taxonomy.names' taxonomy/names.dmp && + #set $taxonomy_dir = 'taxonomy' + #end if + taxonomy_filter_refseq '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta' + ]]></command> + <inputs> + <conditional name="taxonomy"> + <param name="taxonomy_source" type="select" label="Choose source of NCBI Taxonomy"> + <option value="cached" selected="true">Use built-in NCBI Taxonomy database</option> + <option value="history">Datasets from history</option> + </param> + <when value="cached"> + <param type="select" name="taxonomy_table" label="NCBI Taxonomy database"> + <options from_data_table="ncbi_taxonomy"> + <filter type="sort_by" column="name" /> + <validator type="no_options" message="No NCBI Taxonomy downloads are available" /> + </options> + <validator type="no_options" message="No NCBI Taxonomy database download is available" /> + </param> + </when> + <when value="history"> + <param name="nodes" type="data" format="txt" label="NCBI Taxonomy nodes.dmp file" /> + <param name="names" type="data" format="txt" label="NCBI Taxonomy names.dmp file" /> + </when> + </conditional> + <conditional name="refseq"> + <param name="refseq_source" type="select" label="Choose source of RefSeq sequences"> + <option value="cached" selected="true">Use a built-in RefSeq FASTA file</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param type="select" name="cached_refseq" label="Select RefSeq FASTA file"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="name" /> + <validator type="no_options" message="No FASTA data is available" /> + </options> + <validator type="no_options" message="No FASTA data is available" /> + </param> + </when> + <when value="history"> + <param type="data" name="history_refseq" format="fasta" label="RefSeq FASTA file" /> + </when> + </conditional> + <param name="ancestor_name" type="text" label="Ancestor taxon (scientific) name" /> + </inputs> + <outputs> + <data name="refseq_output_fasta" format="fasta" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + <test> + <param name="taxonomy_source" value="history" /> + <param name="refseq_source" value="history" /> + <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" /> + <param name="names" ftype="txt" value="sample_tree_names.dmp" /> + <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" /> + <param name="ancestor_name" value="unclassified bacterial viruses" /> + <output name="refseq_output_fasta" value="output1.fasta" /> + </test> + <test> + <param name="taxonomy_source" value="cached" /> + <param name="taxonomy_table" value="2019-01-01" /> + <param name="refseq_source" value="cached" /> + <param name="cached_refseq" value="refseq_sample" /> + <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" /> + <param name="names" ftype="txt" value="sample_tree_names.dmp" /> + <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" /> + <param name="ancestor_name" value="unclassified bacterial viruses" /> + <output name="refseq_output_fasta" value="output1.fasta" /> + </test> + </tests> + <help><![CDATA[ + This tool allows NCBI RefSeq sequences to be filtered so that only those whose species name are + descendants of a given taxon in the NCBI taxonomy are retained. For example, from the NCBI RefSeq + "other vertebrate" file only ray finned fishes can be retained by filtering for "Actinopterygii". + + The NCBI RefSeq FASTA files can either be provided by the Galaxy administrator or from the user + history. The NCBI taxonomy should be provided by the Galaxy administrator but if that is not + possible the nodes.dmp and names.dmp files from the NCBI taxonomy can be provided in the history. + ]]></help> + <citations> + <citation type="bibtex"> + @misc{vanHeusden2019, + author = {van Heusden, Peter}, + year = {2019}, + title = ncbitaxonomy Rust crate}, + publisher = {crates.io}, + journal = {Rust Package Registry}, + url = {https://crates.io/crates/ncbitaxonomy}, + } + </citation> + </citations> +</tool>