# HG changeset patch
# User fabio
# Date 1517416133 18000
# Node ID 35593423c2e2bbd6503563e618b8f7079cc45167
# Parent  d7b97b60d0ea1428845283ccabf9d6fe8cd9366d
Uploaded 20180131

diff -r d7b97b60d0ea -r 35593423c2e2 ._.shed.yml
Binary file ._.shed.yml has changed
diff -r d7b97b60d0ea -r 35593423c2e2 ._example.tsv
Binary file ._example.tsv has changed
diff -r d7b97b60d0ea -r 35593423c2e2 ._query.py
Binary file ._query.py has changed
diff -r d7b97b60d0ea -r 35593423c2e2 ._query.xml
Binary file ._query.xml has changed
diff -r d7b97b60d0ea -r 35593423c2e2 .shed.yml
--- a/.shed.yml	Wed Jan 24 11:26:33 2018 -0500
+++ b/.shed.yml	Wed Jan 31 11:28:53 2018 -0500
@@ -1,20 +1,12 @@
 name: sbtas_se
 owner: iuc
 categories:
+  - Data Source
   - Web Services
-  - Data Source
 description: AllSome Sequence Bloom Tree Search Engine
 long_description: |
-  A fast querying tool to search on the Sequence Read Archive repository
-  using Bloom Filters.
+  A fast querying tool to identify all publicly available sequenced
+  samples which express a transcript of interest
 remote_repository_url: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
 homepage_url: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
-type: unrestricted
-auto_tool_repositories:
-  name_template: "{{ tool_id }}"
-  descriptor_template: "Wrapper for AllSome Sequence Bloom Tree Search Engine application: {{ tool_name }}."
-suite:
-  name: "sbtas_se_suite"
-  description: "A suite of Galaxy tools designed to interface with the AllSome Sequence Bloom Tree Search Engine APIs."
-  long_description: |
-    Rapid querying of massive sequence datasets
\ No newline at end of file
+type: unrestricted
\ No newline at end of file
diff -r d7b97b60d0ea -r 35593423c2e2 example.tsv
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example.tsv	Wed Jan 31 11:28:53 2018 -0500
@@ -0,0 +1,3 @@
+0	CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCAAAAATGGCGTTAAAATGTGTAATCAGAGAAGCGACACGAAAAGGGGATCAGCTCTTGGCTGGCAATTGGTAGGTCAGAGGTGGATTGGGAAAAGGCAAGTCAGCAACTGTCGATGACGGCGACTGACTGTTAATGAAAATTGTTTTGGCTGTGTGGAAAAAAATACGCGGGAATCCGTGAATTTTCCGAGGAGCTGGTGGAGCGAAGAAAACGGGGTGCTGCTGTTGTAAATGATTGGTGAAAGTCACACGCCCGCAGCCTTGCCAAACTAATTAACGCCAAATGGAGCTAAGGCCTTTGAATGATGGCTGCAGGCTAGCTTATGAAAAGGGGTTGAAGAGAAGTGGAAAAATTGGTAGAAAGGGATTTGCTCAAGATGCC
+1	TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAATGAATGGCCATTTCATTTGCATGTTGGGAGCAACAGAAATGAGAGAGCATCCGAAGCTAACCACAAAAATGGACTTTGCTTCATTATGCACAAACACGCCAATAAATGTAACGAGAAAGATAGTAGGAGCGAAAGACGAGACGAGACAAACAGGAAGAAGACGAGTGGACGAGTGTTTTTTGTAACGAAACTCTTAATCGCTCCTTTGCAGGCTTAAGCTGATAGTTGCTACGTTTATGCCATGAATTTCAAGATCTCTCAAATGCGTGAAAATCCAGTTTATGCGACAGACAAATTCATGTATTTGAAAAATCTTAGCTGATAGAAATCAAAGGTGATT
+2	CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
\ No newline at end of file
diff -r d7b97b60d0ea -r 35593423c2e2 query.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/query.py	Wed Jan 31 11:28:53 2018 -0500
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+
+# https://github.com/ross/requests-futures
+# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
+
+import os, uuid, optparse, requests, json, time
+#from requests_futures.sessions import FuturesSession
+
+#### NN14 ####
+service_url = "http://nn14.galaxyproject.org:8080/";
+#service_url = "http://127.0.0.1:8082/";
+query_url = service_url+"tree/0/query";
+status_url = service_url+"status/<task_id>";
+##############
+
+def query_request( options, args, payload ):
+    # add additional parameters to the payload
+    #payload["tree_id"] = str(options.treeid);
+    payload["search_mode"] = str(options.search);
+    payload["exact_algorithm"] = int(options.exact);
+    payload["search_threshold"] = float(options.sthreshold);
+    # set the content type to application/json
+    headers = {'Content-type': 'application/json'};
+
+    # create a session
+    session = requests.Session();
+    # make a synchronous post request to the query route
+    req = session.post(query_url, headers=headers, json=payload);
+    resp_code = req.status_code;
+    #print(str(req.content)+"\n\n");
+    if resp_code == requests.codes.ok:
+        resp_content = str(req.content);
+        # convert out to json
+        json_content = json.loads(resp_content);
+        # retrieve task id
+        task_id = json_content['task_id'];
+        task_processed = False;
+        # results json content
+        json_status_content = None;
+        task_status = None;
+        while task_processed is False:
+            # create a new session
+            session = requests.Session();
+            # make a synchronous get request to the status route
+            status_query_url = status_url.replace("<task_id>", task_id);
+            status_req = session.get(status_query_url);
+            status_resp_content = str(status_req.content);
+            #print(status_resp_content+"\n\n");
+            # convert out to json
+            json_status_content = json.loads(status_resp_content);
+            # take a look at the state
+            # state attribute is always available
+            if json_status_content['state'] == 'SUCCESS':
+                task_processed = True;
+                break;
+            elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
+                return "Task status: "+str(json_status_content['state']);
+            else:
+                time.sleep(60); # in seconds
+        
+        # get output dir (collection) path
+        output_dir_path = options.outputdir;
+        if not os.path.exists(output_dir_path):
+            os.makedirs(output_dir_path);
+        out_file_format = "txt";
+
+        for block in json_status_content['results']:
+            seq_id = block['sequence_id'];
+            accessions = block['accession_numbers'];
+            # put response block in the output collection
+            output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format);
+            accessions_list = "";
+            for accession_number in accessions:
+                accessions_list = accessions_list + accession_number + "\n";
+            with open(output_file_path, 'w') as out:
+                out.write(accessions_list.strip());
+    else:
+        return "Unable to query the remote server. Please try again in a while.";
+
+def query( options, args ):
+    multiple_data = {};
+    comma_sep_file_paths = options.files;
+    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
+    # check if options.files contains at least one file path
+    if comma_sep_file_paths is not None:
+        # split file paths
+        file_paths = comma_sep_file_paths.split(",");
+        # split file names
+        comma_sep_file_names = str(options.names);
+        #print("names: "+str(comma_sep_file_names));
+        file_names = comma_sep_file_names.split(",");
+        for idx, file_path in enumerate(file_paths):
+            #file_name = file_names[idx];
+            with open(file_path, 'r') as content_file:
+                for line in content_file:
+                    if line.strip() != "":
+                        line_split = line.strip().split("__tc__"); # split on tab
+                        if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
+                            seq_id = line_split[0];
+                            seq_text = line_split[1];
+                            if seq_id in multiple_data:
+                                return "Error: the id '"+seq_id+"' is duplicated";
+                            multiple_data[seq_id] = seq_text;
+        if len(multiple_data) > 0:
+            return async_request( options, args,  multiple_data );
+            #return echo( options, args );
+        else:
+            return "An error has occurred. Please be sure that your input files are valid.";
+    else:
+        # try with the sequence in --sequence
+        text_content = options.sequences;
+        #print("sequences: "+text_content);
+        # check if options.sequences contains a list of sequences (one for each row)
+        if text_content is not None:
+            text_content = str(text_content);
+            if text_content.strip():
+                # populate a dictionary with the files containing the sequences to query
+                text_content = text_content.strip().split("__cn__"); # split on new line
+                for line in text_content:
+                    if line.strip() != "":
+                        line_split = line.strip().split("__tc__"); # split on tab
+                        if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
+                            seq_id = line_split[0];
+                            seq_text = line_split[1];
+                            if seq_id in multiple_data:
+                                return "Error: the id '"+seq_id+"' is duplicated";
+                            multiple_data[seq_id] = seq_text;
+                if len(multiple_data) > 0:
+                    return async_request( options, args, multiple_data );
+                    #return echo( options, args );
+                else:
+                    return "An error has occurred. Please be sure that your input files are valid.";
+            else:
+                return "You have to insert at least one row formatted as a tab delimited <id, sequence> touple";
+    return -1;
+
+def __main__():
+    # Parse the command line options
+    usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path";
+    parser = optparse.OptionParser(usage = usage);
+    parser.add_option("-f", "--files", type="string",
+                    action="store", dest="files", help="comma separated files path");
+    parser.add_option("-n", "--names", type="string",
+                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
+    parser.add_option("-s", "--sequences", type="string",
+                    action="store", dest="sequences", help="contains a list of sequences (one for each row)");
+    parser.add_option("-a", "--fasta", type="string",
+                    action="store", dest="fasta", help="contains the content of a fasta file");
+    parser.add_option("-x", "--search", type="string", default=0,
+                    action="store", dest="search", help="search mode");
+    parser.add_option("-e", "--exact", type="int", default=0,
+                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
+    parser.add_option("-t", "--sthreshold", type="float",
+                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");
+    parser.add_option("-o", "--outputdir", type="string",
+                    action="store", dest="outputdir", help="output directory (collection) path");
+
+    #parser.add_option("-k", "--outfile", type="string",
+                    #action="store", dest="outfile", help="output file");
+    
+    # TEST
+    #--search 'rrr'
+    #--sthreshold 0.5
+    #--exact 0
+    #--sequences 'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC'
+    #--outputdir 'collection_content'
+    #sequences = 'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC';
+    #print(sequences);
+    #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']);
+    
+    (options, args) = parser.parse_args();
+    return query( options, args );
+
+if __name__ == "__main__": __main__()
diff -r d7b97b60d0ea -r 35593423c2e2 query.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/query.xml	Wed Jan 31 11:28:53 2018 -0500
@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<tool name="Query" id="sbtas_se_query" version="1.0.0">
+    <description>the AllSome Sequence Bloom Tree</description>
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="2.18.4">requests</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+<![CDATA[
+    python '$__tool_directory__/query.py'
+    
+    --search 'rrr'
+    --sthreshold ${sthreshold}
+    --exact 0
+    
+    #if $conditional_input.inputtype == '0':
+        #set file_paths = ','.join( [ str( $f ) for $f in $conditional_input.txtfiles ] )
+        #if $file_paths is not 'None':
+            --files '${file_paths}'
+            #set file_names = ','.join( [ str( $f.name ) for $f in $conditional_input.txtfiles ] )
+                --names '${file_names}'
+        #end if
+    #elif $conditional_input.inputtype == '1':
+        --sequences '${conditional_input.sequences}'
+    #end if
+
+    --outputdir 'collection_content'
+]]>
+    </command>
+    <inputs>
+        <conditional name="conditional_input">
+            <param name="inputtype" type="select" label="Input mode" help="Select a mode based on how do you want to specify the input">
+                <option value="0" selected="true">By file</option>
+                <option value="1">By manually inserted text</option>
+            </param>
+            <when value="0">
+                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+            </when>
+            <when value="1">
+                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each id. The content of these files as result of the tool will be a list of accession numbers." />
+            </when>
+        </conditional>            
+        <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Search threshold" help="This threshold controls the specificity. Lower values will produce more hits to the query. Higher values are more stringent and will produce fewer hits." />
+    </inputs>
+    <outputs>
+        <collection name="output_collect" type="list" label="AllSome Sequence Bloom Tree Search Collection">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)" directory="collection_content" ext="tabular" />
+        </collection>
+    </outputs>
+
+    <help><![CDATA[
+The AllSome Sequence Bloom Tree Search Engine is a fast querying tool to identify all publicly available 
+sequenced samples which express a transcript of interest.
+
+----
+
+**Example**
+
+The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line,
+in a tab delimited format::
+    
+    seq_id_0  CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA
+    seq_id_1  TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAAT
+    ...
+    seq_id_n  CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
+
+The output of the tool is a collection that contains a file for each ID with a list of
+accession numbers representing the samples that express one particular transcript.
+
+----
+
+.. class:: infomark
+
+**Notes**
+
+This Galaxy tool has been developed by Fabio Cumbo.
+
+Please visit this GithHub_repository_ for more information about the AllSome Sequence Bloom Tree Search Engine
+
+.. _GithHub_repository: https://github.com/fabio-cumbo/bloomtree-allsome-search-engine
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1101/090464</citation>
+    </citations>
+</tool>
diff -r d7b97b60d0ea -r 35593423c2e2 retrieve.py
--- a/retrieve.py	Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-
-# NCBI SRA Tools
-# https://galaxyproject.org/tutorials/upload/
-
-import os
-import optparse
-from subprocess import Popen, PIPE
-
-db_key = "?";
-sra_instant_url = "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/";
-
-def convertSRA(tmp_dir, accession_number, data_format):
-    absolute_tmp_dir = os.path.abspath(tmp_dir);
-    sra_file_path = os.path.join(absolute_tmp_dir, accession_number+".sra");
-    if os.path.isdir(absolute_tmp_dir) and os.path.exists(sra_file_path):
-        process = None;
-        if data_format == ".fasta.gz":
-            process = Popen(["fastq-dump", "--fasta", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fastq.gz":
-            process = Popen(["fastq-dump", "--gzip", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fasta":
-            process = Popen(["fastq-dump", "--fasta", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        elif data_format == ".fastq":
-            process = Popen(["fastq-dump", sra_file_path, "--outdir", absolute_tmp_dir], stdout=PIPE);
-        else:
-            process = None;
-        if process is not None:
-            (output, err) = process.communicate();
-            if err:
-                # kill the process
-                # kill_process(process.pid);
-                # remove any trace of the output file
-                an_file_path = os.path.join(tmp_dir, accession_number+data_format);
-                if os.path.exists(an_file_path):
-                    os.unlink(an_file_path);
-                # try to restart the process
-                return downloadAccessionData(tmp_dir, accession_number, data_format);
-            #exit_code = process.wait();
-            return os.path.join(tmp_dir, accession_number+data_format);
-    return "";
-
-def downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit=10):
-    split = accession_number[:6];
-    srr_path = sra_instant_url+split+"/"+accession_number+"/"+accession_number+".sra";
-    sra_file_path = os.path.join(appdata_path, accession_number+".sra");
-    process = Popen(['wget', srr_path, "--output-document="+sra_file_path], stdout=PIPE);
-    (output, err) = process.communicate();
-    if err:
-        # remove any trace of the output file
-        if os.path.exists(an_file_path):
-            os.unlink(an_file_path);
-        # try to restart the process
-        if limit > 0:
-            return downloadAccessionData(accession_number, accession_path, appdata_path, data_format, limit-1);
-        return -1;
-    if os.path.exists(sra_file_path):
-        converted_file_path = convertSRA(appdata_path, accession_number, data_format);
-        if os.path.exists(converted_file_path):
-            os.rename(converted_file_path, accession_path);
-        os.unlink(sra_file_path);
-    return 0;
-
-def process_accessions( options, args ):
-    # create appdata dir if it does not exist
-    appdata_path = options.appdata;
-    if not os.path.exists(appdata_path):
-        os.makedirs(appdata_path);
-    data_format = options.dataformat;
-    '''
-    # Collection test
-    test_file_name = "Test Collection" + "_" + "SRRtest" + "_" + data_format[1:] + "_" + db_key;
-    test_file_path = os.path.join(appdata_path, test_file_name);
-    file = open(test_file_path, "w");
-    file.write("Hello World");
-    file.close();
-    '''
-    # read inputs
-    comma_sep_file_paths = options.files;
-    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
-    # check if options.files contains at least one file path
-    if comma_sep_file_paths is not None:
-        # split file paths
-        file_paths = comma_sep_file_paths.split(",");
-        # split file names
-        comma_sep_file_names = str(options.names);
-        #print("names: "+str(comma_sep_file_names));
-        file_names = comma_sep_file_names.split(",");
-        # populate a dictionary with the files containing the sequences to query
-        for idx, file_path in enumerate(file_paths):
-            file_name = file_names[idx];
-            #print(file_name + ": " + file_path);
-            with open(file_path) as accessions:
-                for line in accessions:
-                    if line.strip() != "" and not line.startswith(">"):
-                        accession_number = line.strip();
-                        filename_with_collection_prefix = file_name + "_" + accession_number + "_" + data_format[1:] + "_" + db_key;
-                        accession_path = os.path.join(appdata_path, filename_with_collection_prefix)
-                        # download fastq filte related to accession_number
-                        downloadAccessionData( accession_number, accession_path, appdata_path, data_format );
-    return 0;
-
-def __main__():
-    # Parse the command line options
-    usage = "Usage: retrieve.py --files comma_sep_file_paths --names comma_seq_file_names --format data_format --appdata folder_name";
-    parser = optparse.OptionParser(usage = usage);
-    parser.add_option("-f", "--files", type="string",
-                    action="store", dest="files", help="comma separated files path");
-    parser.add_option("-n", "--names", type="string",
-                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
-    parser.add_option("-e", "--format", type="string",
-                    action="store", dest="dataformat", help="data format");
-    parser.add_option("-a", "--appdata", type="string",
-                    action="store", dest="appdata", help="appdata folder name");
-    (options, args) = parser.parse_args();
-    return process_accessions( options, args );
-
-if __name__ == "__main__": __main__()
diff -r d7b97b60d0ea -r 35593423c2e2 retrieve.xml
--- a/retrieve.xml	Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Retrieve" id="sbtas_se_retrieve" version="1.0.0">
-    <description>data from SRA</description>
-    <requirements>
-        <requirement type="package" version="2.7.10">python</requirement>
-        <requirement type="package" version="2.8.2">sra-tools</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-<![CDATA[
-    python '$__tool_directory__/retrieve.py'
-    #set file_paths = ','.join( [ str( $f ) for $f in $files ] )
-    --files '${file_paths}'
-    #set file_names = ','.join( [ str( $f.name ) for $f in $files ] )
-        --names '${file_names}'
-    --format '${dataformat}'
-    --appdata 'tmp'
-    > ${stdouterr}
-]]>
-    </command>
-    <inputs>
-        <param format="json" name="files" type="data" label="Select input files" multiple="true" optional="false" help="Select one or more json files containing a list of accession numbers (as result of the Search tool)." />
-        <param name="dataformat" type="select" label="Select a data format" help="Select a data format for the accession numbers related files that will be downloaded">
-            <option value=".fastq">.fastq</option>
-            <option value=".fastq.gz">.fastq.gz</option>
-            <option value=".fasta">.fasta</option>
-            <option value=".fasta.gz">.fasta.gz</option>
-        </param>
-    </inputs>
-    <outputs>
-        <collection name="list_output" type="list:list" label="${tool.name} Accessions: Output Collection">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)_(?P&lt;dbkey&gt;[^_]+)" ext="auto" visible="False" directory="tmp" />
-        </collection>
-        <data format="txt" name="stdouterr" />
-    </outputs>
-
-    <help><![CDATA[
-Authors: Fabio Cumbo, Robert S. Harris, Chen Sun
-
-This tool will retrieve fastq files associated to the accession numbers listed in the input files.
-    ]]></help>
-</tool>
diff -r d7b97b60d0ea -r 35593423c2e2 search.py
--- a/search.py	Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,140 +0,0 @@
-#!/usr/bin/env python
-
-# https://github.com/ross/requests-futures
-# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
-
-import os, uuid
-import optparse
-import requests
-from requests_futures.sessions import FuturesSession
-
-#### UV0 ####
-# proxy to uv0
-#service_url = "http://deputy.bx.psu.edu/";
-# url to query page
-#query_url = service_url+"query.php";
-# url to echo page: just return 'it works!'
-#echo_url = service_url+"echo.php";
-#############
-
-#### NN14 ####
-service_url = "http://nn14.galaxyproject.org:8080/";
-query_url = service_url+"tree/0/query";
-##############
-
-'''
-# synchronous
-def echo( options, args ):
-    # create a session
-    session = requests.Session()
-    # make a sync get request
-    resp = session.get(echo_url)
-    # check for response status code
-    resp_code = resp.status_code;
-    if resp_code == requests.codes.ok:
-        # get output file path
-        output_file_path = options.output;
-        # write response on the output file
-        with open(output_file_path, 'w') as out:
-            #out.write(resp.data);
-            out.write(resp.content);
-        return 0;
-    else:
-        return resp_code;
-'''
-
-# asynchronous
-def async_request( options, args, payload ):
-    # add additional parameters to the payload
-    #payload["tree_id"] = str(options.treeid);
-    payload["search_mode"] = str(options.search);
-    payload["exact_algorithm"] = int(options.exact);
-    payload["search_threshold"] = float(options.sthreshold);
-    # set the content type to application/json
-    headers = {'Content-type': 'application/json'};
-    # create a session
-    session = FuturesSession();
-    # make an async post request with requests-futures
-    future_req = session.post(query_url, headers=headers, json=payload);
-    # wait for the request to complete, if it has not already
-    resp = future_req.result();
-    # check for response status code
-    resp_code = resp.status_code;
-    # get output file path
-    output_file_path = options.output;
-    # write response on the output file
-    with open(output_file_path, 'w') as out:
-        #out.write(resp.data);
-        out.write(str(resp.content));
-    if resp_code == requests.codes.ok:
-        return 0;
-    else:
-        return resp_code;
-
-def srase_query( options, args ):
-    multiple_data = {};
-    comma_sep_file_paths = options.files;
-    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
-    # check if options.files contains at least one file path
-    if comma_sep_file_paths is not None:
-        # split file paths
-        file_paths = comma_sep_file_paths.split(",");
-        # split file names
-        comma_sep_file_names = str(options.names);
-        #print("names: "+str(comma_sep_file_names));
-        file_names = comma_sep_file_names.split(",");
-        # populate a dictionary with the files containing the sequences to query
-        sequences = [];
-        for idx, file_path in enumerate(file_paths):
-            #file_name = file_names[idx];
-            with open(file_path, 'r') as content_file:
-                content = content_file.read()
-                sequences.append(content.strip());
-                #multiple_data[file_name] = content;
-                #print(file_name+": "+content+"\n");
-        if len(sequences) > 0:
-            multiple_data['sequences'] = sequences;
-            return async_request( options, args,  multiple_data );
-            #return echo( options, args );
-        else:
-            return -1;
-    else:
-        # try with the sequence in --sequence
-        text_content = options.sequences;
-        #print("sequences: "+text_content);
-        # check if options.sequences contains a list of sequences (one for each row)
-        if text_content is not None:
-            text_content = str(text_content);
-            if text_content.strip():
-                # populate a dictionary with the files containing the sequences to query
-                multiple_data['sequences'] = text_content.strip().split("__cn__");
-                return async_request( options, args, multiple_data );
-                #return echo( options, args );
-            else:
-                return -1;
-    return -1;
-
-def __main__():
-    # Parse the command line options
-    usage = "Usage: search.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --output output_file_path";
-    parser = optparse.OptionParser(usage = usage);
-    parser.add_option("-f", "--files", type="string",
-                    action="store", dest="files", help="comma separated files path");
-    parser.add_option("-n", "--names", type="string",
-                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
-    parser.add_option("-s", "--sequences", type="string",
-                    action="store", dest="sequences", help="contains a list of sequences (one for each row)");
-    parser.add_option("-a", "--fasta", type="string",
-                    action="store", dest="fasta", help="contains the content of a fasta file");
-    parser.add_option("-x", "--search", type="string", default=0,
-                    action="store", dest="search", help="search mode");
-    parser.add_option("-e", "--exact", type="int", default=0,
-                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
-    parser.add_option("-t", "--sthreshold", type="float",
-                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");
-    parser.add_option("-o", "--output", type="string",
-                    action="store", dest="output", help="output file path");
-    (options, args) = parser.parse_args();
-    return srase_query( options, args );
-
-if __name__ == "__main__": __main__()
diff -r d7b97b60d0ea -r 35593423c2e2 search.xml
--- a/search.xml	Wed Jan 24 11:26:33 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Search" id="sbtas_se_search" version="1.0.0">
-    <description>your sequences in the big SRA data lake</description>
-    <requirements>
-        <requirement type="package" version="2.7.10">python</requirement>
-        <requirement type="package" version="2.18.4">requests</requirement>
-        <requirement type="package" version="0.9.7">requests-futures</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-<![CDATA[
-    python '$__tool_directory__/search.py'
-    
-    --search 'rrr'
-    --sthreshold ${sthreshold}
-    --exact 0
-    
-    #if $conditional_input_zero.inputtype_zero == '0':
-        #set file_paths = ','.join( [ str( $f ) for $f in $conditional_input_zero.txtfiles ] )
-        #if $file_paths is not 'None':
-            --files '${file_paths}'
-            #set file_names = ','.join( [ str( $f.name ) for $f in $conditional_input_zero.txtfiles ] )
-                --names '${file_names}'
-        #end if
-    #elif $conditional_input_zero.inputtype_zero == '1':
-        --sequences '${conditional_input_zero.sequences}'
-    #end if
-
-    --output '${output}'
-]]>
-    </command>
-    <inputs>
-        <conditional name="conditional_input_zero">
-            <param name="inputtype_zero" type="select" label="Input mode" help="Select a mode based on how do you want to specify the input">
-                <option value="0" selected="true">By file</option>
-                <option value="1">By manually inserted text</option>
-            </param>
-            <when value="0">
-                <param format="txt" name="txtfiles" type="data" label="Select sequences" multiple="true" optional="true" help="Select one or more txt files containing a sequence. A single file can contain more sequences, one for each row. Every file will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a list of accession numbers. It is worth noting that the result could be empty." />
-            </when>
-            <when value="1">
-                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequence" optional="true" help="Insert a list of sequences (one for each row) in this text field representing a query to the AllSome Sequence Bloom Tree Search Engine. It is worth noting that the result could be empty." />
-            </when>
-        </conditional>            
-        <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Threshold applied to the search algorithm" />
-    </inputs>
-    <outputs>
-        <data name="output" format="json" label="${tool.name} on ${on_string}: AllSome Sequence Bloom Tree Search Result" />
-    </outputs>
-
-    <help><![CDATA[
-Authors: Fabio Cumbo, Robert S. Harris, Chen Sun
-    ]]></help>
-
-    <citations>
-        <citation type="doi">10.1101/090464</citation>
-    </citations>
-</tool>