Next changeset 1:0719cfc3ebcb (2017-11-09) |
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/enasearch/ commit 6eda25f5cccc0cf9be09c38a8b48d37aff56ed87 |
added:
README.md enasearch_retrieve_data.xml generate_macros.py macros.xml search_macros.xml |
b |
diff -r 000000000000 -r cbc24be56f7b README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Tue Aug 29 04:13:09 2017 -0400 |
[ |
@@ -0,0 +1,11 @@ +ENASearch +========= + +[ENASearch](https://github.com/bebatut/enasearch) is a Python library for interacting with [ENA](http://www.ebi.ac.uk/ena/browse/programmatic-access)'s API. + +For any change in the `macros.xml`, please change on [`generate_macros.py`](generate_macros.py) and regenerate the `macros.xml` with + +``` +$ conda install enasearch +$ python generate_macros.py +``` \ No newline at end of file |
b |
diff -r 000000000000 -r cbc24be56f7b enasearch_retrieve_data.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enasearch_retrieve_data.xml Tue Aug 29 04:13:09 2017 -0400 |
[ |
@@ -0,0 +1,72 @@ +<tool id="enasearch_retrieve_data" name="Retrieve ENA data" version="@WRAPPER_VERSION@.0"> + <description>(other than taxon and project)</description> + <macros> + <import>macros.xml</import> + <import>search_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="aggressive"><![CDATA[ +enasearch + retrieve_data + @IDS@ + @DISPLAY@ + $expanded + $header + --file '$output' + --download 'txt' + ]]></command> + <inputs> + <param argument="--ids" type="text" multiple="true" label="Ids for records to return" help="Other than Taxon and Project. It can also be range of ids (e.g. ERA000010-ERA000020)"/> + <expand macro="display_opt"/> + <expand macro="expanded"/> + <expand macro="header"/> + </inputs> + <outputs> + <data name="output" format="tabular" label="${tool.name} on ${ids}"> + <expand macro="change_format"/> + </data> + </outputs> + <tests> + <test> + <param name="ids" value="ERA000010-ERA000020"/> + <conditional name="display_opt"> + <param name="display" value="xml"/> + </conditional> + <output name="output" ftype="xml" md5="d42dec30c41578705ba9b167dc0c30b1"/> + </test> + <test> + <param name="ids" value="A00145"/> + <conditional name="display_opt"> + <param name="display" value="fasta"/> + <param name="range_start" value="3"/> + <param name="range_stop" value="63"/> + </conditional> + <output name="output" ftype="fasta" md5="758cb77161dcf6f8cf841cb141e9277e"/> + </test> + <test> + <param name="ids" value="AL513382"/> + <conditional name="display_opt"> + <param name="display" value="text"/> + <param name="offset" value="0"/> + <param name="length" value="100"/> + </conditional> + <param name="expanded" value="true"/> + <output name="output" md5="e77ed0fb6f75093251b6ddc98f9db835"/> + </test> + <test> + <param name="ids" value="PRJEB2772,AL513382"/> + <conditional name="display_opt"> + <param name="display" value="html"/> + </conditional> + <param name="header" value="true"/> + <output name="output" md5="7b23a8c12893728272f4993073e341f6"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +This tool retrieve ENA data (other than taxon and project) + ]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 000000000000 -r cbc24be56f7b generate_macros.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_macros.py Tue Aug 29 04:13:09 2017 -0400 |
[ |
b'@@ -0,0 +1,227 @@\n+#!/usr/bin/env python\n+\n+import enasearch\n+\n+spaces = \' \'\n+operator_names = {\n+ "=": "equal",\n+ "!=": "different",\n+ "<": "lower",\n+ "<=": "equal or lower",\n+ ">": "higher",\n+ ">=": "equal or higher",\n+}\n+\n+\n+def format_name(name, alternative_name):\n+ """\n+ Format name to remove None name and & in name\n+ """\n+ if name is None:\n+ name = alternative_name\n+ name = name.replace("&", "and")\n+ return name\n+\n+\n+def sort_by_name(dict):\n+ """\n+ Sort a dictionary on the values\n+ """\n+ return sorted(dict, key=dict.get)\n+\n+\n+def write_analysis_fields():\n+ """\n+ Write the analysis fields\n+ """\n+ s = \'%s<xml name="analysis_fields">\\n\' % (spaces)\n+ fields = enasearch.get_returnable_fields(result="analysis", verbose=False)\n+ for f in fields:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (2 * spaces, f, f)\n+ s += \'%s</xml>\\n\' % (spaces)\n+ return s\n+\n+\n+def write_display_options():\n+ """\n+ Write the display options\n+ """\n+ s = \'%s<xml name="display_options">\\n\' % (spaces)\n+ when_s = \'%s<xml name="when_display_options">\\n\' % (spaces)\n+ options = enasearch.get_display_options(verbose=False)\n+ for opt in options:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (2 * spaces, opt, options[opt][\'description\'])\n+ when_s += \'%s<when value="%s">\\n\' % (2 * spaces, opt)\n+ if opt == \'fasta\' or opt == \'fastq\':\n+ when_s += \'%s<param name="range_start" argument="--subseq_range" type="integer" optional="true" label="Start integer for subsequences"/>\\n\' % (3 * spaces)\n+ when_s += \'%s<param name="range_stop" argument="--subseq_range" type="integer" optional="true" label="Stop integer for subsequences"/>\\n\' % (3 * spaces)\n+ else:\n+ when_s += \'%s<param argument="--offset" type="integer" optional="true" label="First record to get"/>\\n\' % (3 * spaces)\n+ when_s += \'%s<param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\\n\' % (3 * spaces)\n+ when_s += \'%s</when>\\n\' % (2 * spaces)\n+ s += \'%s</xml>\\n\' % (spaces)\n+ when_s += \'%s</xml>\\n\' % (spaces)\n+ s += when_s\n+ return s\n+\n+\n+def write_run_fields():\n+ """\n+ Write the run fields\n+ """\n+ s = \'%s<xml name="run_fields">\\n\' % (spaces)\n+ fields = enasearch.get_returnable_fields(result="read_run", verbose=False)\n+ for f in fields:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (2 * spaces, f, f)\n+ s += \'%s</xml>\\n\' % (spaces)\n+ return s\n+\n+\n+def write_taxonomy_results():\n+ """\n+ Write the possible taxonomy results\n+ """\n+ s = \'%s<xml name="taxonomy_results">\\n\' % (spaces)\n+ fields = enasearch.get_taxonomy_results(verbose=False)\n+ for f in fields:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (2 * spaces, f, fields[f][\'description\'])\n+ s += \'%s</xml>\\n\' % (spaces)\n+ return s\n+\n+\n+def write_result_parameters(fts=False):\n+ """\n+ Write the parameters that are dependant of results\n+ """\n+ res = enasearch.get_results(verbose=False)\n+ options = enasearch.get_display_options(verbose=False)\n+ ft = enasearch.get_filter_types(verbose=False)\n+ # Format the filter type related parameters\n+ ft_parameters = {}\n+ for t in ft:\n+ s = \'\'\n+ if \'operators\' in ft[t]:\n+ s = \'%s<param name="operation" type="select" label="Operator">\\n\' % (7 * spaces)\n+ for o in ft[t][\'operators\']:\n+ on = o\n+ if o in operator_names:\n+ on = operator_names[o]\n+ s += \'%s<option value="%s">%s</option>\\n\' % (8 * spaces, on, on)\n+ s += \'%s</param>\\n\' % (7 * spaces)\n+ if \'value\' in ft[t]:\n+ value_format = \'float\' if t == \'Number\' else \'text\'\n+ s += \'%s<param name="value" type="%s" value="" label="%s"/>\\n\' % (7 * spaces, value_format, ft[t][\'value\'])\n+ elif \'values\' in ft['..b'tion operator\n+ s += \'%s<param name="combination_operation" type="select" label="Combination operation">\\n\' % (5 * spaces)\n+ s += \'%s<option value="AND">AND</option>\\n\' % (6 * spaces)\n+ s += \'%s<option value="OR">OR</option>\\n\' % (6 * spaces)\n+ s += \'%s<option value="NOT">NOT</option>\\n\' % (6 * spaces)\n+ s += \'%s</param>\\n\' % (5 * spaces)\n+ s += \'%s<conditional name="filter_field">\\n\' % (5 * spaces)\n+ s += \'%s<param name="field" type="select" label="Field to query">\\n\' % (6 * spaces)\n+ for f in ff:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (7 * spaces, f, ff[f][\'description\'])\n+ s += \'%s</param>\\n\' % (6 * spaces)\n+ for f in ff:\n+ # Add the correct parameter given the type of field\n+ typ = ff[f][\'type\'].capitalize()\n+ if typ not in ft_parameters:\n+ if f == \'location\':\n+ typ = \'Geospatial\'\n+ else:\n+ continue\n+ s += \'%s<when value="%s">\\n\' % (6 * spaces, f)\n+ s += ft_parameters[typ]\n+ s += \'%s</when>\\n\' % (6 * spaces)\n+ s += \'%s</conditional>\\n\' % (5 * spaces)\n+ s += \'%s</repeat>\\n\' % (4 * spaces)\n+ # Add display opt\n+ s += \'%s<conditional name="display_opt">\\n\' % (4 * spaces)\n+ s += \'%s<param argument="--display" type="select" label="Display option to specify the display format">\\n\' % (5 * spaces)\n+ s += \'%s<expand macro="display_options"/>\\n\' % (6 * spaces)\n+ s += \'%s</param>\\n\' % (5 * spaces)\n+ for opt in options:\n+ s += \'%s<when value="%s"\' % (5 * spaces, opt)\n+ if opt != \'fasta\' and opt != \'fastq\':\n+ s += \'>\\n\'\n+ s += \'%s<param argument="--offset" type="integer" optional="true" label="First record to get"/>\\n\' % (6 * spaces)\n+ s += \'%s<param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\\n\' % (6 * spaces)\n+ if opt == \'report\':\n+ s += \'%s<param argument="--fields" type="select" multiple="true" label="Fields to return">\\n\' % (6 * spaces)\n+ for f in res[r][\'returnable_fields\']:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (7 * spaces, f, f)\n+ s += \'%s</param>\\n\' % (6 * spaces)\n+ s += \'%s<param argument="--sortfields" type="select" optional="true" multiple="true" label="Fields to sort the results">\\n\' % (6 * spaces)\n+ for f in sf:\n+ s += \'%s<option value="%s">%s</option>\\n\' % (7 * spaces, f, sf[f][\'description\'])\n+ s += \'%s</param>\\n\' % (6 * spaces)\n+ s += \'%s</when>\\n\' % (5 * spaces)\n+ else:\n+ s += \'/>\\n\'\n+ s += \'%s</conditional>\\n\' % (4 * spaces)\n+ s += \'%s</when>\\n\' % (3 * spaces)\n+ s += \'%s</conditional>\\n\' % (2 * spaces)\n+ return s\n+\n+\n+def write_search_data_parameters():\n+ """\n+ Write the parameters for search_data\n+ """\n+ fts = \'%s<xml name="free_text_search">\\n\' % (spaces)\n+ fts += write_result_parameters(True)\n+ fts += \'%s</xml>\\n\' % (spaces)\n+ cts = \'%s<xml name="conditional_text_search">\\n\' % (spaces)\n+ cts += write_result_parameters(False)\n+ cts += \'%s</xml>\\n\' % (spaces)\n+ return fts + cts\n+\n+\n+def generate_search_macros(filepath):\n+ """\n+ Generate the content of the macro file\n+ """\n+ s = \'<?xml version="1.0" ?>\\n\'\n+ s += \'<macros>\\n\'\n+ s += write_analysis_fields()\n+ s += write_display_options()\n+ s += write_run_fields()\n+ s += write_taxonomy_results()\n+ s += write_search_data_parameters()\n+ s += \'</macros>\\n\'\n+ with open(filepath, "w") as file:\n+ file.write(s)\n+\n+\n+if __name__ == \'__main__\':\n+ generate_search_macros("search_macros.xml")\n' |
b |
diff -r 000000000000 -r cbc24be56f7b macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Aug 29 04:13:09 2017 -0400 |
[ |
@@ -0,0 +1,96 @@ +<?xml version="1.0" ?> +<macros> + <token name="@WRAPPER_VERSION@">0.1.1</token> + <xml name="version"> + <version_command>@WRAPPER_VERSION@</version_command> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@WRAPPER_VERSION@">enasearch</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"/> + <token name="@IDS@"> +<![CDATA[ + #for $id in str($ids).split(',') + --ids '$id' + #end for +]]> + </token> + <xml name="display_opt"> + <conditional name="display_opt"> + <param argument="--display" type="select" label="Display option to specify the display format"> + <expand macro="display_options"/> + </param> + <expand macro="when_display_options"/> + </conditional> + </xml> + <token name="@DISPLAY@"> +<![CDATA[ + --display '$display_opt.display' + #if $display_opt.display == 'fasta' or $display_opt.display == 'fastq' + #if str($display_opt.range_start) != '' and str($display_opt.range_stop) != '' + --subseq_range '$display_opt.range_start'-'$display_opt.range_stop' + #end if + #else + #if str($display_opt.offset) != '' + --offset '$display_opt.offset' + #end if + #if str($display_opt.length) != '' + --length '$display_opt.length' + #end if + #end if +]]> + </token> + <xml name="expanded"> + <param argument="--expanded" type="boolean" truevalue="--expanded" falsevalue="" label="Expand a CON record?"/> + </xml> + <xml name="header"> + <param argument="--header" type="boolean" truevalue="--header" falsevalue="" label="Obtain only the header of a record?"/> + </xml> + <xml name="accession"> + <param argument="--accession" type="text" label="Accession id" help="Study accessions (ERP, SRP, DRP, PRJ prefixes), experiment accessions (ERX, SRX, DRX prefixes), sample accessions (ERS, SRS, DRS, SAM prefixes) and run accessions"/> + </xml> + <token name="@FIELDS@"> +<![CDATA[ + #if $fields + #for $f in str($fields).split(',') + --fields '$f' + #end for + #end if +]]> + </token> + <xml name="change_format"> + <change_format> + <when input="display_opt.display" value="fasta" format="fasta" /> + <when input="display_opt.display" value="fastq" format="fastq" /> + <when input="display_opt.display" value="html" format="html" /> + <when input="display_opt.display" value="text" format="text" /> + <when input="display_opt.display" value="xml" format="xml" /> + </change_format> + </xml> + <token name="@SEARCH_DATA_DISPLAY@"> +<![CDATA[ + --display '$query_type.res.display_opt.display' + #if $query_type.res.display_opt.display != 'fasta' and $query_type.res.display_opt.display != 'fastq' + #if str($query_type.res.display_opt.offset) != '' + --offset '$query_type.res.display_opt.offset' + #end if + #if str($query_type.res.display_opt.length) != '' + --length '$query_type.res.display_opt.length' + #end if + #if $query_type.res.display_opt.display == 'report' + #for $f in str($query_type.res.display_opt.fields).split(',') + --fields '$f' + #end for + #if $query_type.res.display_opt.sortfields + #for $f in str($query_type.res.display_opt.sortfields).split(',') + --sortfields '$f' + #end for + #end if + #end if + #end if +]]> + </token> +</macros> |
b |
diff -r 000000000000 -r cbc24be56f7b search_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search_macros.xml Tue Aug 29 04:13:09 2017 -0400 |
b |
b'@@ -0,0 +1,9549 @@\n+<?xml version="1.0" ?>\n+<macros>\n+ <xml name="analysis_fields">\n+ <option value="analysis_accession">analysis_accession</option>\n+ <option value="study_accession">study_accession</option>\n+ <option value="secondary_study_accession">secondary_study_accession</option>\n+ <option value="sample_accession">sample_accession</option>\n+ <option value="secondary_sample_accession">secondary_sample_accession</option>\n+ <option value="analysis_title">analysis_title</option>\n+ <option value="analysis_type">analysis_type</option>\n+ <option value="center_name">center_name</option>\n+ <option value="first_public">first_public</option>\n+ <option value="last_updated">last_updated</option>\n+ <option value="study_title">study_title</option>\n+ <option value="tax_id">tax_id</option>\n+ <option value="scientific_name">scientific_name</option>\n+ <option value="analysis_alias">analysis_alias</option>\n+ <option value="study_alias">study_alias</option>\n+ <option value="submitted_bytes">submitted_bytes</option>\n+ <option value="submitted_md5">submitted_md5</option>\n+ <option value="submitted_ftp">submitted_ftp</option>\n+ <option value="submitted_aspera">submitted_aspera</option>\n+ <option value="submitted_galaxy">submitted_galaxy</option>\n+ <option value="sample_alias">sample_alias</option>\n+ <option value="broker_name">broker_name</option>\n+ </xml>\n+ <xml name="display_options">\n+ <option value="xml">Results are displayed in XML format. Supported by all ENA data classes.</option>\n+ <option value="text">Results are displayed in text format. Supported only by assembled and annotated sequence data classes.</option>\n+ <option value="fastq">Results are displayed in fastq format. Supported only by Trace data class.</option>\n+ <option value="html">Results are displayed in HTML format. Supported by all ENA data classes. HTML is the default display format if no other display option has been specified.</option>\n+ <option value="report">Results are displayed as a tab separated report</option>\n+ <option value="fasta">Results are displayed in fasta format. Supported by assembled and annotated sequence and Trace data classes.</option>\n+ </xml>\n+ <xml name="when_display_options">\n+ <when value="xml">\n+ <param argument="--offset" type="integer" optional="true" label="First record to get"/>\n+ <param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\n+ </when>\n+ <when value="text">\n+ <param argument="--offset" type="integer" optional="true" label="First record to get"/>\n+ <param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\n+ </when>\n+ <when value="fastq">\n+ <param name="range_start" argument="--subseq_range" type="integer" optional="true" label="Start integer for subsequences"/>\n+ <param name="range_stop" argument="--subseq_range" type="integer" optional="true" label="Stop integer for subsequences"/>\n+ </when>\n+ <when value="html">\n+ <param argument="--offset" type="integer" optional="true" label="First record to get"/>\n+ <param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\n+ </when>\n+ <when value="report">\n+ <param argument="--offset" type="integer" optional="true" label="First record to get"/>\n+ <param argument="--length" type="integer" optional="true" label="Number of records to retrieve"/>\n+ </when>\n+ <when value="fasta">\n+ <param name="range_start" argument="--subseq_range" type="integer" optional="true" label="Start integer for subsequences"/>\n+ <param name="range_stop" argument="--subseq_range" type="integ'..b'idence</option>\n+ <option value="location">geographic location of isolation of the sample</option>\n+ <option value="study_accession">study accession number</option>\n+ <option value="function">function attributed to a sequence</option>\n+ <option value="germline">the sample is an unrearranged molecule that was inherited from the parental germline</option>\n+ <option value="product">name of the product associated with the feature</option>\n+ <option value="description">brief sequence description</option>\n+ <option value="ecotype">a population within a given species displaying traits that reflect adaptation to a local habitat</option>\n+ <option value="cell_type">cell type from which the sample was obtained</option>\n+ <option value="accession">accession number</option>\n+ <option value="isolation_source">describes the physical, environmental and/or local geographical source of the sample</option>\n+ <option value="mating_type">mating type of the organism from which the sequence was obtained</option>\n+ <option value="tissue_type">tissue type from which the sample was obtained</option>\n+ <option value="sub_strain">name or identifier of a genetically or otherwise modified strain from which sample was obtained</option>\n+ <option value="tissue_lib">tissue library from which sample was obtained</option>\n+ <option value="cultivar">cultivar (cultivated variety) of plant from which sample was obtained</option>\n+ <option value="base_count">number of base pairs</option>\n+ <option value="topology">sequence topology: circular or linear</option>\n+ <option value="sub_species">name of sub-species of organism from which sample was obtained</option>\n+ <option value="environmental_sample">identifies sequences derived by direct molecular isolation from an environmental DNA sample</option>\n+ <option value="first_public">date when made public</option>\n+ <option value="strain">strain from which sample was obtained</option>\n+ <option value="country">locality of sample isolation: country names, oceans or seas, followed by regions and localities</option>\n+ <option value="variety">variety (varietas, a formal Linnaean rank) of organism from which sample was derived</option>\n+ <option value="host">natural (as opposed to laboratory) host to the organism from which sample was obtained</option>\n+ <option value="gene_synonym">synonymous, replaced, obsolete or former gene symbol</option>\n+ <option value="organelle">membrane-bound intracellular structure from which the sequence was obtained</option>\n+ <option value="serovar">serological variety of a species (usually a prokaryote) characterized by its antigenic properties</option>\n+ <option value="rna_class">classification of RNA</option>\n+ <option value="dataclass">sequence data class</option>\n+ <option value="gene">symbol of the gene corresponding to a sequence region</option>\n+ <option value="culture_collection">identifier for the sample culture including institute and collection code</option>\n+ </param>\n+ </when>\n+ <when value="fasta"/>\n+ </conditional>\n+ </when>\n+ </conditional>\n+ </xml>\n+</macros>\n' |