view fetch_fasta_from_NCBI.xml @ 5:706fe8139955 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit b5ef783237b244d684e26b1ed1cc333a8305ce3e"
author artbio
date Tue, 16 Mar 2021 23:26:58 +0000
parents c667d0ee39f5
children 4af77e1af12a
line wrap: on
line source

<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="3.0.0">
    <description></description>
    <requirements>
        <requirement type="package" version="1.25.9">urllib3</requirement>
    </requirements>
    <command><![CDATA[
    python '$__tool_directory__'/fetch_fasta_from_NCBI.py
        #if $query.option == 'query':
            --query '$query.queryString'
        #else:
            --iud_file '$query.iud_list'
        #end if
        --dbname '$dbname'
        --logfile '$logfile'
        #if $fetch_option == 'fasta':
            --fasta $fasta
        #end if
  ]]></command>
  <inputs>

    <conditional name="query">
        <param name="option" type="select" label="retrieve data from query or IUD list" display="radio">
            <option value="query" selected="true">Query string</option>
            <option value="list">IUD list</option>
        </param>
        <when value="query">
            <param name="queryString" type="text" size="5x80" area="True"
                   value=""
                   label="Query to NCBI in entrez format"
                   help="exemple: `Drosophila melanogaster[Organism] AND Gcn5[Title]`">
            <sanitizer>
                <valid initial="string.printable">
                    <remove value="&quot;"/>
                    <remove value="\"/>
                </valid>
                <mapping initial="none">
                    <add source="&quot;" target="\&quot;"/>
                    <add source="\" target="\\"/>
                </mapping>
            </sanitizer>
            </param>
        </when>
        <when value="list">
            <param name="iud_list" format="txt,tabular" type="data" label="A list of NCBI UIDs"
                   help="a file with a single column of UIDs, in txt or tabular format"/>
        </when>
    </conditional>      
    <param name="dbname" type="select" label="NCBI database">
      <option value="nuccore">Nucleotide</option>
      <option value="protein">Protein</option>
    </param>
    <param name="fetch_option" type="select" label="select what will be retrieved">
      <option value="fasta" selected="true">Fasta and IUDs</option>
      <option value="justiuds">Only IUDs</option>
    </param>
  </inputs>
  <outputs>
    <data name="fasta" format="fasta" label="Fasta sequences retrieved from NCBI" >
      <filter>fetch_option == "fasta"</filter>
    </data>
    <data name="UIDs" format="txt" label="UIDs" from_work_dir="retrieved_uid_list.txt">
      <filter>query['option'] == "query"</filter>
    </data>
    <data format="txt" name="logfile" label="logs"/>
  </outputs>
  <tests>
      <test>
          <param name="queryString" value="9629650[gi]" />
          <param name="dbname" value="nuccore" />
          <param name="fetch_option" value="fasta"/>
          <output name="fasta" ftype="fasta" file="output.fa" />
      </test>
      <test>
          <param name="queryString" value="CU929326[Accession]" />
          <param name="dbname" value="nuccore" />
          <param name="fetch_option" value="justiuds"/>
          <output name="logfile" ftype="txt" file="dry_run.log" compare="sim_size"/>
      </test>
      <test>
          <param name="option" value="list" />
          <param name="iud_list" value="input_list.txt" ftype="txt" />
          <param name="dbname" value="nuccore" />
          <param name="fetch_option" value="fasta"/>
          <output name="fasta" ftype="fasta" file="output_list.fa"/>
      </test>
      <test>
          <param name="queryString" value="Drosophila[Organism] AND 2017[Modification Date] AND virus" />
          <param name="dbname" value="nuccore" />
          <param name="fetch_option" value="fasta"/>
          <output name="fasta" ftype="fasta" >
              <metadata name="sequences" value="9" />
          </output>
      </test>
      <test>
          <param name="queryString" value="labalbalbalbaalablalbabal[Title]" />
          <param name="dbname" value="nuccore" />
          <param name="fetch_option" value="justiuds"/>
          <output name="logfile" ftype="txt">
              <assert_contents>
                  <has_line_matching expression=".*Found\s+0\s+UIDs" />
              </assert_contents>
          </output>
      </test>
  </tests>
  <help>
**What it does**

This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database (nuccore or protein) for a given entrez query.

The tool can be set with the query "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose

See `Entrez help`_ for explanation of query formats

Be sure to use the appropriate NCBI query syntax. Always use [] to specify the search fields.

By checking the checkbox you can also run your query without sequence retrieval and get the number of sequences your query will fetch.

Note that the tool may fail in case of interrupted connexion with the NCBI database (see the log dataset)

Retrieval progress is reported in the log dataset.

**Options**::
  <![CDATA[
  usage: fetch_fasta_from_NCBI.py [-h] [--query QUERY_STRING]
                                  [--iud_file IUDS_FILE] [--output OUTNAME]
                                  [--dbname DBNAME] [--fasta GET_FASTA]
                                  [--logfile LOGFILE]
                                  [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
  
  Retrieve data from NCBI
  
  optional arguments:
    -h, --help            show this help message and exit
    --query QUERY_STRING, -i QUERY_STRING
                          NCBI Query String
    --iud_file IUDS_FILE  input list of iuds to be fetched
    --output OUTNAME, -o OUTNAME
                          output file name
    --dbname DBNAME, -d DBNAME
                          database type
    --fasta GET_FASTA, -F GET_FASTA
                          retrieve fasta sequences
    --logfile LOGFILE, -l LOGFILE
                          log file (default=stderr)
    --loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}
                          logging level (default: INFO)
  ]]>

**Acknowledgments**

This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_.

It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_.

.. _Entrez help: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options
.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon
.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en
.. _MIT license: http://opensource.org/licenses/MIT

  </help>
  <citations>
      <citation type="doi">10.1186/1471-2105-14-73</citation>
  </citations>
</tool>