view data_manager/fetch_refseq.xml @ 1:937f167631b1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit 8cad1e44b2393cb36f60a4d705536dee235a8434
author iuc
date Sun, 13 Jan 2019 09:26:57 -0500
parents 8b91891ae805
children 5d2ef93c6314
line wrap: on
line source

<tool id="data_manager_fetch_refseq" name="RefSeq data manager" version="0.0.18" tool_type="manage_data">
    <description>Fetch FASTA data from NCBI RefSeq and update all_fasta data table</description>
    <requirements>
        <requirement type="package">python</requirement>
        <requirement type="package">requests</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
    python $__tool_directory__/fetch_refseq.py
      #if str( $advanced.advanced_selector ) == 'advanced':
        ${advanced.compress}
      #end if
      --galaxy_datamanager_filename '${output_file}'
      --division_names ${division_names}
      --mol_types ${mol_types}
      #if str( $pin_date ) != 'NO':
        --pin_date '${pin_date}'
      #end if
    ]]></command>
    <inputs>
        <param argument="division_names" type="select" label="RefSeq division" multiple="true">
            <option value="archea">Archea</option>
            <option value="bacteria">Bacteria</option>
            <option value="complete">Complete</option>
            <option value="fungi">Fungi</option>
            <option value="invertebrate">Invertebrate</option>
            <option value="mitochondrion">Mitochondrion</option>
            <option value="other">Other</option>
            <option value="plant">Plant</option>
            <option value="plasmid">Plasmid</option>
            <option value="plastid">Plastid</option>
            <option value="protozoa">Protozoa</option>
            <option value="vertebrate_mammalian">Mammalian Vertebrate</option>
            <option value="vertebrate_other">Other Vertebrate</option>
            <option value="viral">Viral</option>
        </param>
        <param argument="mol_types" type="select" multiple="true" label="Molecule type" help="Select at least one of genomic, protein or rna sequence">
            <option value="protein">Protein</option>
            <option value="genomic">Genomic (DNA)</option>
            <option value="rna">RNA</option>
        </param>
        <conditional name="advanced">
            <param name="advanced_selector" type="select" label="Advanced Options">
                <option value="basic" selected="True">Basic</option>
                <option value="advanced">Advanced</option>
            </param>
            <when value="basic">
            </when>
            <when value="advanced">
                <param type="boolean" argument="--compress" truevalue="--compress" falsevalue="" label="Compress FASTA files" 
                    help="Compress downloaded FASTA files (with gzip). Limits compatibility with tools expecting uncompressed FASTA."/>
            </when>
        </conditional>
        <param argument="--pin_date" type="hidden" value="NO" help="Used for testing"/>
    </inputs>
    <outputs>
        <data name="output_file" format="data_manager_json"/>
    </outputs>
    <tests>
        <test>
            <param name="division_names" value="plastid"/>
            <param name="mol_types" value="protein"/>
            <param name="pin_date" value="2018-03-14"/>            
            <param name="advanced_selector" value="basic"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="2018-03-14"/>
                    <has_text text="refseq_plastid"/>
                    <has_text text="/refseq_plastid."/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="division_names" value="plastid"/>
            <param name="mol_types" value="protein"/>
            <param name="pin_date" value="2018-03-14"/>            
            <param name="advanced_selector" value="advanced"/>
            <param name="compress" value="" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="2018-03-14"/>
                    <has_text text="refseq_plastid"/>
                    <has_text text="/refseq_plastid."/>
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[
This data manager fetches FASTA format collections of proteins, nucleotides (genomic DNA) and RNA
from NCBI's RefSeq_ data collection.

RefSeq is released every two months and consists of a number of divisions. Some sequences are shared
between multiple divisions. This data manager allows the Galaxy administrator to select which
divisions and which molecule types within each division to download. Once downloaded the
files are made accessible by adding an entry into the *all_fasta* data table.

.. _RefSeq: https://www.ncbi.nlm.nih.gov/refseq/
    ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/nar/gkv1189</citation>
    </citations>
</tool>