Repository 'sbtas_se'
hg clone https://toolshed.g2.bx.psu.edu/repos/fabio/sbtas_se

Changeset 4:35593423c2e2 (2018-01-31)
Previous changeset 3:d7b97b60d0ea (2018-01-24) Next changeset 5:5e2877685afc (2018-01-31)
Commit message:
Uploaded 20180131
modified:
.shed.yml
added:
._.shed.yml
._example.tsv
._query.py
._query.xml
example.tsv
query.py
query.xml
removed:
retrieve.py
retrieve.xml
search.py
search.xml
b
diff -r d7b97b60d0ea -r 35593423c2e2 ._.shed.yml
b
Binary file ._.shed.yml has changed
b
diff -r d7b97b60d0ea -r 35593423c2e2 ._example.tsv
b
Binary file ._example.tsv has changed
b
diff -r d7b97b60d0ea -r 35593423c2e2 ._query.py
b
Binary file ._query.py has changed
b
diff -r d7b97b60d0ea -r 35593423c2e2 ._query.xml
b
Binary file ._query.xml has changed
b
diff -r d7b97b60d0ea -r 35593423c2e2 .shed.yml
--- a/.shed.yml Wed Jan 24 11:26:33 2018 -0500
+++ b/.shed.yml Wed Jan 31 11:28:53 2018 -0500
b
@@ -1,20 +1,12 @@
 name: sbtas_se
 owner: iuc
 categories:
+  - Data Source
   - Web Services
-  - Data Source
 description: AllSome Sequence Bloom Tree Search Engine
 long_description: |
-  A fast querying tool to search on the Sequence Read Archive repository
-  using Bloom Filters.
+  A fast querying tool to identify all publicly available sequenced
+  samples which express a transcript of interest
 remote_repository_url: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
 homepage_url: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
-type: unrestricted
-auto_tool_repositories:
-  name_template: "{{ tool_id }}"
-  descriptor_template: "Wrapper for AllSome Sequence Bloom Tree Search Engine application: {{ tool_name }}."
-suite:
-  name: "sbtas_se_suite"
-  description: "A suite of Galaxy tools designed to interface with the AllSome Sequence Bloom Tree Search Engine APIs."
-  long_description: |
-    Rapid querying of massive sequence datasets
\ No newline at end of file
+type: unrestricted
\ No newline at end of file
b
diff -r d7b97b60d0ea -r 35593423c2e2 example.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/example.tsv Wed Jan 31 11:28:53 2018 -0500
b
@@ -0,0 +1,3 @@
+0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCAAAAATGGCGTTAAAATGTGTAATCAGAGAAGCGACACGAAAAGGGGATCAGCTCTTGGCTGGCAATTGGTAGGTCAGAGGTGGATTGGGAAAAGGCAAGTCAGCAACTGTCGATGACGGCGACTGACTGTTAATGAAAATTGTTTTGGCTGTGTGGAAAAAAATACGCGGGAATCCGTGAATTTTCCGAGGAGCTGGTGGAGCGAAGAAAACGGGGTGCTGCTGTTGTAAATGATTGGTGAAAGTCACACGCCCGCAGCCTTGCCAAACTAATTAACGCCAAATGGAGCTAAGGCCTTTGAATGATGGCTGCAGGCTAGCTTATGAAAAGGGGTTGAAGAGAAGTGGAAAAATTGGTAGAAAGGGATTTGCTCAAGATGCC
+1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAATGAATGGCCATTTCATTTGCATGTTGGGAGCAACAGAAATGAGAGAGCATCCGAAGCTAACCACAAAAATGGACTTTGCTTCATTATGCACAAACACGCCAATAAATGTAACGAGAAAGATAGTAGGAGCGAAAGACGAGACGAGACAAACAGGAAGAAGACGAGTGGACGAGTGTTTTTTGTAACGAAACTCTTAATCGCTCCTTTGCAGGCTTAAGCTGATAGTTGCTACGTTTATGCCATGAATTTCAAGATCTCTCAAATGCGTGAAAATCCAGTTTATGCGACAGACAAATTCATGTATTTGAAAAATCTTAGCTGATAGAAATCAAAGGTGATT
+2 CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
\ No newline at end of file
b
diff -r d7b97b60d0ea -r 35593423c2e2 query.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query.py Wed Jan 31 11:28:53 2018 -0500
[
b'@@ -0,0 +1,174 @@\n+#!/usr/bin/env python\n+\n+# https://github.com/ross/requests-futures\n+# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests\n+\n+import os, uuid, optparse, requests, json, time\n+#from requests_futures.sessions import FuturesSession\n+\n+#### NN14 ####\n+service_url = "http://nn14.galaxyproject.org:8080/";\n+#service_url = "http://127.0.0.1:8082/";\n+query_url = service_url+"tree/0/query";\n+status_url = service_url+"status/<task_id>";\n+##############\n+\n+def query_request( options, args, payload ):\n+    # add additional parameters to the payload\n+    #payload["tree_id"] = str(options.treeid);\n+    payload["search_mode"] = str(options.search);\n+    payload["exact_algorithm"] = int(options.exact);\n+    payload["search_threshold"] = float(options.sthreshold);\n+    # set the content type to application/json\n+    headers = {\'Content-type\': \'application/json\'};\n+\n+    # create a session\n+    session = requests.Session();\n+    # make a synchronous post request to the query route\n+    req = session.post(query_url, headers=headers, json=payload);\n+    resp_code = req.status_code;\n+    #print(str(req.content)+"\\n\\n");\n+    if resp_code == requests.codes.ok:\n+        resp_content = str(req.content);\n+        # convert out to json\n+        json_content = json.loads(resp_content);\n+        # retrieve task id\n+        task_id = json_content[\'task_id\'];\n+        task_processed = False;\n+        # results json content\n+        json_status_content = None;\n+        task_status = None;\n+        while task_processed is False:\n+            # create a new session\n+            session = requests.Session();\n+            # make a synchronous get request to the status route\n+            status_query_url = status_url.replace("<task_id>", task_id);\n+            status_req = session.get(status_query_url);\n+            status_resp_content = str(status_req.content);\n+            #print(status_resp_content+"\\n\\n");\n+            # convert out to json\n+            json_status_content = json.loads(status_resp_content);\n+            # take a look at the state\n+            # state attribute is always available\n+            if json_status_content[\'state\'] == \'SUCCESS\':\n+                task_processed = True;\n+                break;\n+            elif json_status_content[\'state\'] in [\'FAILURE\', \'REVOKED\']:\n+                return "Task status: "+str(json_status_content[\'state\']);\n+            else:\n+                time.sleep(60); # in seconds\n+        \n+        # get output dir (collection) path\n+        output_dir_path = options.outputdir;\n+        if not os.path.exists(output_dir_path):\n+            os.makedirs(output_dir_path);\n+        out_file_format = "txt";\n+\n+        for block in json_status_content[\'results\']:\n+            seq_id = block[\'sequence_id\'];\n+            accessions = block[\'accession_numbers\'];\n+            # put response block in the output collection\n+            output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format);\n+            accessions_list = "";\n+            for accession_number in accessions:\n+                accessions_list = accessions_list + accession_number + "\\n";\n+            with open(output_file_path, \'w\') as out:\n+                out.write(accessions_list.strip());\n+    else:\n+        return "Unable to query the remote server. Please try again in a while.";\n+\n+def query( options, args ):\n+    multiple_data = {};\n+    comma_sep_file_paths = options.files;\n+    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));\n+    # check if options.files contains at least one file path\n+    if comma_sep_file_paths is not None:\n+        # split file paths\n+        file_paths = comma_sep_file_paths.split(",");\n+        # split file names\n+        comma_sep_file_names = str(options.names);\n+        #print("names: "+str(comma_sep_file_names));\n+        file_names = comma_sep_file_names.split(",");\n+        for idx, file_path in enumerate(fil'..b's, args );\n+        else:\n+            return "An error has occurred. Please be sure that your input files are valid.";\n+    else:\n+        # try with the sequence in --sequence\n+        text_content = options.sequences;\n+        #print("sequences: "+text_content);\n+        # check if options.sequences contains a list of sequences (one for each row)\n+        if text_content is not None:\n+            text_content = str(text_content);\n+            if text_content.strip():\n+                # populate a dictionary with the files containing the sequences to query\n+                text_content = text_content.strip().split("__cn__"); # split on new line\n+                for line in text_content:\n+                    if line.strip() != "":\n+                        line_split = line.strip().split("__tc__"); # split on tab\n+                        if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line\n+                            seq_id = line_split[0];\n+                            seq_text = line_split[1];\n+                            if seq_id in multiple_data:\n+                                return "Error: the id \'"+seq_id+"\' is duplicated";\n+                            multiple_data[seq_id] = seq_text;\n+                if len(multiple_data) > 0:\n+                    return async_request( options, args, multiple_data );\n+                    #return echo( options, args );\n+                else:\n+                    return "An error has occurred. Please be sure that your input files are valid.";\n+            else:\n+                return "You have to insert at least one row formatted as a tab delimited <id, sequence> touple";\n+    return -1;\n+\n+def __main__():\n+    # Parse the command line options\n+    usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path";\n+    parser = optparse.OptionParser(usage = usage);\n+    parser.add_option("-f", "--files", type="string",\n+                    action="store", dest="files", help="comma separated files path");\n+    parser.add_option("-n", "--names", type="string",\n+                    action="store", dest="names", help="comma separated names associated to the files specified in --files");\n+    parser.add_option("-s", "--sequences", type="string",\n+                    action="store", dest="sequences", help="contains a list of sequences (one for each row)");\n+    parser.add_option("-a", "--fasta", type="string",\n+                    action="store", dest="fasta", help="contains the content of a fasta file");\n+    parser.add_option("-x", "--search", type="string", default=0,\n+                    action="store", dest="search", help="search mode");\n+    parser.add_option("-e", "--exact", type="int", default=0,\n+                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");\n+    parser.add_option("-t", "--sthreshold", type="float",\n+                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");\n+    parser.add_option("-o", "--outputdir", type="string",\n+                    action="store", dest="outputdir", help="output directory (collection) path");\n+\n+    #parser.add_option("-k", "--outfile", type="string",\n+                    #action="store", dest="outfile", help="output file");\n+    \n+    # TEST\n+    #--search \'rrr\'\n+    #--sthreshold 0.5\n+    #--exact 0\n+    #--sequences \'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC\'\n+    #--outputdir \'collection_content\'\n+    #sequences = \'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC\';\n+    #print(sequences);\n+    #(options, args) = parser.parse_args([\'-x\', \'rrr\', \'-t\', 0.5, \'-s\', sequences, \'-o\', \'collection_content\']);\n+    \n+    (options, args) = parser.parse_args();\n+    return query( options, args );\n+\n+if __name__ == "__main__": __main__()\n'
b
diff -r d7b97b60d0ea -r 35593423c2e2 query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/query.xml Wed Jan 31 11:28:53 2018 -0500
[
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<tool name="Query" id="sbtas_se_query" version="1.0.0">
+    <description>the AllSome Sequence Bloom Tree</description>
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="2.18.4">requests</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+    python '$__tool_directory__/query.py'
+    
+    --search 'rrr'
+    --sthreshold ${sthreshold}
+    --exact 0
+    
+    #if $conditional_input.inputtype == '0':
+        #set file_paths = ','.join( [ str( $f ) for $f in $conditional_input.txtfiles ] )
+        #if $file_paths is not 'None':
+            --files '${file_paths}'
+            #set file_names = ','.join( [ str( $f.name ) for $f in $conditional_input.txtfiles ] )
+                --names '${file_names}'
+        #end if
+    #elif $conditional_input.inputtype == '1':
+        --sequences '${conditional_input.sequences}'
+    #end if
+
+    --outputdir 'collection_content'
+]]>
+    </command>
+    <inputs>
+        <conditional name="conditional_input">
+            <param name="inputtype" type="select" label="Input mode" help="Select a mode based on how do you want to specify the input">
+                <option value="0" selected="true">By file</option>
+                <option value="1">By manually inserted text</option>
+            </param>
+            <when value="0">
+                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+            </when>
+            <when value="1">
+                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+            </when>
+        </conditional>            
+        <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Search threshold" help="This threshold controls the specificity. Lower values will produce more hits to the query. Higher values are more stringent and will produce fewer hits." />
+    </inputs>
+    <outputs>
+        <collection name="output_collect" type="list" label="AllSome Sequence Bloom Tree Search Collection">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)" directory="collection_content" ext="tabular" />
+        </collection>
+    </outputs>
+
+    <help><![CDATA[
+The AllSome Sequence Bloom Tree Search Engine is a fast querying tool to identify all publicly available 
+sequenced samples which express a transcript of interest.
+
+----
+
+**Example**
+
+The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line,
+in a tab delimited format::
+    
+    seq_id_0  CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA
+    seq_id_1  TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT
+    ...
+    seq_id_n  CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
+
+The output of the tool is a collection that contains a file for each ID with a list of
+accession numbers representing the samples that express one particular transcript.
+
+----
+
+.. class:: infomark
+
+**Notes**
+
+This Galaxy tool has been developed by Fabio Cumbo.
+
+Please visit this GithHub_repository_ for more information about the AllSome Sequence Bloom Tree Search Engine
+
+.. _GithHub_repository: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1101/090464</citation>
+    </citations>
+</tool>
b
diff -r d7b97b60d0ea -r 35593423c2e2 retrieve.py
--- a/retrieve.py Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-
-# NCBI SRA Tools
-# https://galaxyproject.org/tutorials/upload/
-
-import os
-import optparse
-from subprocess import Popen, PIPE
-
-db_key = "?";
-sra_instant_url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/";
-
-def convertSRA(tmp_dir, accession_number, data_format):
-    absolute_tmp_dir = os.path.abspath(tmp_dir);
-    sra_file_path = os.path.join(absolute_tmp_dir, accession_number+".sra");
-    if os.path.isdir(absolute_tmp_dir) and os.path.exists(sra_file_path):
-        process = None;
-        if data_format == ".fasta.gz":
-            process = Popen(["fastq-dump", "--fasta", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fastq.gz":
-            process = Popen(["fastq-dump", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fasta":
-            process = Popen(["fastq-dump", "--fasta", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fastq":
-            process = Popen(["fastq-dump", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        else:
-            process = None;
-        if process is not None:
-            (output, err) = process.communicate();
-            if err:
-                # kill the process
-                # kill_process(process.pid);
-                # remove any trace of the output file
-                an_file_path = os.path.join(tmp_dir, accession_number+data_format);
-                if os.path.exists(an_file_path):
-                    os.unlink(an_file_path);
-                # try to restart the process
-                return downloadAccessionData(tmp_dir, accession_number, data_format);
-            #exit_code = process.wait();
-            return os.path.join(tmp_dir, accession_number+data_format);
-    return "";
-
-def downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit=10):
-    split = accession_number[:6];
-    srr_path = sra_instant_url+split+"/"+accession_number+"/"+accession_number+".sra";
-    sra_file_path = os.path.join(appdata_path, accession_number+".sra");
-    process = Popen(['wget', srr_path, "--output-document="+sra_file_path], stdout=PIPE);
-    (output, err) = process.communicate();
-    if err:
-        # remove any trace of the output file
-        if os.path.exists(an_file_path):
-            os.unlink(an_file_path);
-        # try to restart the process
-        if limit > 0:
-            return downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit-1);
-        return -1;
-    if os.path.exists(sra_file_path):
-        converted_file_path = convertSRA(appdata_path, accession_number, data_format);
-        if os.path.exists(converted_file_path):
-            os.rename(converted_file_path, accession_path);
-        os.unlink(sra_file_path);
-    return 0;
-
-def process_accessions( options, args ):
-    # create appdata dir if it does not exist
-    appdata_path = options.appdata;
-    if not os.path.exists(appdata_path):
-        os.makedirs(appdata_path);
-    data_format = options.dataformat;
-    '''
-    # Collection test
-    test_file_name = "Test Collection" + "_" + "SRRtest" + "_" + data_format[1:] + "_" + db_key;
-    test_file_path = os.path.join(appdata_path, test_file_name);
-    file = open(test_file_path, "w");
-    file.write("Hello World");
-    file.close();
-    '''
-    # read inputs
-    comma_sep_file_paths = options.files;
-    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
-    # check if options.files contains at least one file path
-    if comma_sep_file_paths is not None:
-        # split file paths
-        file_paths = comma_sep_file_paths.split(",");
-        # split file names
-        comma_sep_file_names = str(options.names);
-        #print("names: "+str(comma_sep_file_names));
-        file_names = comma_sep_file_names.split(",");
-        # populate a dictionary with the files containing the sequences to query
-        for idx, file_path in enumerate(file_paths):
-            file_name = file_names[idx];
-            #print(file_name + ": " + file_path);
-            with open(file_path) as accessions:
-                for line in accessions:
-                    if line.strip() != "" and not line.startswith(">"):
-                        accession_number = line.strip();
-                        filename_with_collection_prefix = file_name + "_" + accession_number + "_" + data_format[1:] + "_" + db_key;
-                        accession_path = os.path.join(appdata_path, filename_with_collection_prefix)
-                        # download fastq filte related to accession_number
-                        downloadAccessionData( accession_number, accession_path, appdata_path, data_format );
-    return 0;
-
-def __main__():
-    # Parse the command line options
-    usage = "Usage: retrieve.py --files comma_sep_file_paths --names comma_seq_file_names --format data_format --appdata folder_name";
-    parser = optparse.OptionParser(usage = usage);
-    parser.add_option("-f", "--files", type="string",
-                    action="store", dest="files", help="comma separated files path");
-    parser.add_option("-n", "--names", type="string",
-                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
-    parser.add_option("-e", "--format", type="string",
-                    action="store", dest="dataformat", help="data format");
-    parser.add_option("-a", "--appdata", type="string",
-                    action="store", dest="appdata", help="appdata folder name");
-    (options, args) = parser.parse_args();
-    return process_accessions( options, args );
-
-if __name__ == "__main__": __main__()
b
diff -r d7b97b60d0ea -r 35593423c2e2 retrieve.xml
--- a/retrieve.xml Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,41 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Retrieve" id="sbtas_se_retrieve" version="1.0.0">
-    <description>data from SRA</description>
-    <requirements>
-        <requirement type="package" version="2.7.10">python</requirement>
-        <requirement type="package" version="2.8.2">sra-tools</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-<![CDATA[
-    python '$__tool_directory__/retrieve.py'
-    #set file_paths = ','.join( [ str( $f ) for $f in $files ] )
-    --files '${file_paths}'
-    #set file_names = ','.join( [ str( $f.name ) for $f in $files ] )
-        --names '${file_names}'
-    --format '${dataformat}'
-    --appdata 'tmp'
-    > ${stdouterr}
-]]>
-    </command>
-    <inputs>
-        <param format="json" name="files" type="data" label="Select input files" multiple="true" optional="false" help="Select one or more json files containing a list of accession numbers (as result of the Search tool)." />
-        <param name="dataformat" type="select" label="Select a data format" help="Select a data format for the accession numbers related files that will be downloaded">
-            <option value=".fastq">.fastq</option>
-            <option value=".fastq.gz">.fastq.gz</option>
-            <option value=".fasta">.fasta</option>
-            <option value=".fasta.gz">.fasta.gz</option>
-        </param>
-    </inputs>
-    <outputs>
-        <collection name="list_output" type="list:list" label="${tool.name} Accessions: Output Collection">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)_(?P&lt;dbkey&gt;[^_]+)" ext="auto" visible="False" directory="tmp" />
-        </collection>
-        <data format="txt" name="stdouterr" />
-    </outputs>
-
-    <help><![CDATA[
-Authors: Fabio Cumbo, Robert S. Harris, Chen Sun
-
-This tool will retrieve fastq files associated to the accession numbers listed in the input files.
-    ]]></help>
-</tool>
b
diff -r d7b97b60d0ea -r 35593423c2e2 search.py
--- a/search.py Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,140 +0,0 @@
-#!/usr/bin/env python
-
-# https://github.com/ross/requests-futures
-# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
-
-import os, uuid
-import optparse
-import requests
-from requests_futures.sessions import FuturesSession
-
-#### UV0 ####
-# proxy to uv0
-#service_url = "http://deputy.bx.psu.edu/";
-# url to query page
-#query_url = service_url+"query.php";
-# url to echo page: just return 'it works!'
-#echo_url = service_url+"echo.php";
-#############
-
-#### NN14 ####
-service_url = "http://nn14.galaxyproject.org:8080/";
-query_url = service_url+"tree/0/query";
-##############
-
-'''
-# synchronous
-def echo( options, args ):
-    # create a session
-    session = requests.Session()
-    # make a sync get request
-    resp = session.get(echo_url)
-    # check for response status code
-    resp_code = resp.status_code;
-    if resp_code == requests.codes.ok:
-        # get output file path
-        output_file_path = options.output;
-        # write response on the output file
-        with open(output_file_path, 'w') as out:
-            #out.write(resp.data);
-            out.write(resp.content);
-        return 0;
-    else:
-        return resp_code;
-'''
-
-# asynchronous
-def async_request( options, args, payload ):
-    # add additional parameters to the payload
-    #payload["tree_id"] = str(options.treeid);
-    payload["search_mode"] = str(options.search);
-    payload["exact_algorithm"] = int(options.exact);
-    payload["search_threshold"] = float(options.sthreshold);
-    # set the content type to application/json
-    headers = {'Content-type': 'application/json'};
-    # create a session
-    session = FuturesSession();
-    # make an async post request with requests-futures
-    future_req = session.post(query_url, headers=headers, json=payload);
-    # wait for the request to complete, if it has not already
-    resp = future_req.result();
-    # check for response status code
-    resp_code = resp.status_code;
-    # get output file path
-    output_file_path = options.output;
-    # write response on the output file
-    with open(output_file_path, 'w') as out:
-        #out.write(resp.data);
-        out.write(str(resp.content));
-    if resp_code == requests.codes.ok:
-        return 0;
-    else:
-        return resp_code;
-
-def srase_query( options, args ):
-    multiple_data = {};
-    comma_sep_file_paths = options.files;
-    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
-    # check if options.files contains at least one file path
-    if comma_sep_file_paths is not None:
-        # split file paths
-        file_paths = comma_sep_file_paths.split(",");
-        # split file names
-        comma_sep_file_names = str(options.names);
-        #print("names: "+str(comma_sep_file_names));
-        file_names = comma_sep_file_names.split(",");
-        # populate a dictionary with the files containing the sequences to query
-        sequences = [];
-        for idx, file_path in enumerate(file_paths):
-            #file_name = file_names[idx];
-            with open(file_path, 'r') as content_file:
-                content = content_file.read()
-                sequences.append(content.strip());
-                #multiple_data[file_name] = content;
-                #print(file_name+": "+content+"\n");
-        if len(sequences) > 0:
-            multiple_data['sequences'] = sequences;
-            return async_request( options, args,  multiple_data );
-            #return echo( options, args );
-        else:
-            return -1;
-    else:
-        # try with the sequence in --sequence
-        text_content = options.sequences;
-        #print("sequences: "+text_content);
-        # check if options.sequences contains a list of sequences (one for each row)
-        if text_content is not None:
-            text_content = str(text_content);
-            if text_content.strip():
-                # populate a dictionary with the files containing the sequences to query
-                multiple_data['sequences'] = text_content.strip().split("__cn__");
-                return async_request( options, args, multiple_data );
-                #return echo( options, args );
-            else:
-                return -1;
-    return -1;
-
-def __main__():
-    # Parse the command line options
-    usage = "Usage: search.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --output output_file_path";
-    parser = optparse.OptionParser(usage = usage);
-    parser.add_option("-f", "--files", type="string",
-                    action="store", dest="files", help="comma separated files path");
-    parser.add_option("-n", "--names", type="string",
-                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
-    parser.add_option("-s", "--sequences", type="string",
-                    action="store", dest="sequences", help="contains a list of sequences (one for each row)");
-    parser.add_option("-a", "--fasta", type="string",
-                    action="store", dest="fasta", help="contains the content of a fasta file");
-    parser.add_option("-x", "--search", type="string", default=0,
-                    action="store", dest="search", help="search mode");
-    parser.add_option("-e", "--exact", type="int", default=0,
-                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
-    parser.add_option("-t", "--sthreshold", type="float",
-                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");
-    parser.add_option("-o", "--output", type="string",
-                    action="store", dest="output", help="output file path");
-    (options, args) = parser.parse_args();
-    return srase_query( options, args );
-
-if __name__ == "__main__": __main__()
b
diff -r d7b97b60d0ea -r 35593423c2e2 search.xml
--- a/search.xml Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,57 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Search" id="sbtas_se_search" version="1.0.0">
-    <description>your sequences in the big SRA data lake</description>
-    <requirements>
-        <requirement type="package" version="2.7.10">python</requirement>
-        <requirement type="package" version="2.18.4">requests</requirement>
-        <requirement type="package" version="0.9.7">requests-futures</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-<![CDATA[
-    python '$__tool_directory__/search.py'
-    
-    --search 'rrr'
-    --sthreshold ${sthreshold}
-    --exact 0
-    
-    #if $conditional_input_zero.inputtype_zero == '0':
-        #set file_paths = ','.join( [ str( $f ) for $f in $conditional_input_zero.txtfiles ] )
-        #if $file_paths is not 'None':
-            --files '${file_paths}'
-            #set file_names = ','.join( [ str( $f.name ) for $f in $conditional_input_zero.txtfiles ] )
-                --names '${file_names}'
-        #end if
-    #elif $conditional_input_zero.inputtype_zero == '1':
-        --sequences '${conditional_input_zero.sequences}'
-    #end if
-
-    --output '${output}'
-]]>
-    </command>
-    <inputs>
-        <conditional name="conditional_input_zero">
-            <param name="inputtype_zero" type="select" label="Input mode" help="Select a mode based on how do you want to specify the input">
-                <option value="0" selected="true">By file</option>
-                <option value="1">By manually inserted text</option>
-            </param>
-            <when value="0">
-                <param format="txt" name="txtfiles" type="data" label="Select sequences" multiple="true" optional="true" help="Select one or more txt files containing a sequence. A single file can contain more sequences, one for each row. Every file will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a list of accession numbers. It is worth noting that the result could be empty." />
-            </when>
-            <when value="1">
-                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequence" optional="true" help="Insert a list of sequences (one for each row) in this text field representing a query to the AllSome Sequence Bloom Tree Search Engine. It is worth noting that the result could be empty." />
-            </when>
-        </conditional>            
-        <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Threshold applied to the search algorithm" />
-    </inputs>
-    <outputs>
-        <data name="output" format="json" label="${tool.name} on ${on_string}: AllSome Sequence Bloom Tree Search Result" />
-    </outputs>
-
-    <help><![CDATA[
-Authors: Fabio Cumbo, Robert S. Harris, Chen Sun
-    ]]></help>
-
-    <citations>
-        <citation type="doi">10.1101/090464</citation>
-    </citations>
-</tool>