changeset 2:4291c9d1ff07 draft

Uploaded 20180124
author fabio
date Wed, 24 Jan 2018 11:26:24 -0500
parents ad40eae04cdc
children d7b97b60d0ea
files ._retrieve.xml ._search.py ._search.xml retrieve.py retrieve.xml search.py search.xml
diffstat 7 files changed, 31 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
Binary file ._retrieve.xml has changed
Binary file ._search.py has changed
Binary file ._search.xml has changed
--- a/retrieve.py	Mon Jan 22 16:42:08 2018 -0500
+++ b/retrieve.py	Wed Jan 24 11:26:24 2018 -0500
@@ -112,12 +112,7 @@
                     action="store", dest="dataformat", help="data format");
     parser.add_option("-a", "--appdata", type="string",
                     action="store", dest="appdata", help="appdata folder name");
-    parser.add_option("-v", "--version", action="store_true", dest="version",
-                    default=False, help="display version and exit");
     (options, args) = parser.parse_args();
-    if options.version:
-        print __version__;
-    else:
-        return process_accessions( options, args );
+    return process_accessions( options, args );
 
 if __name__ == "__main__": __main__()
--- a/retrieve.xml	Mon Jan 22 16:42:08 2018 -0500
+++ b/retrieve.xml	Wed Jan 24 11:26:24 2018 -0500
@@ -18,7 +18,7 @@
 ]]>
     </command>
     <inputs>
-        <param format="txt" name="files" type="data" label="Select input files" multiple="true" optional="false" help="Select one or more txt files containing a list of accession numbers." />
+        <param format="json" name="files" type="data" label="Select input files" multiple="true" optional="false" help="Select one or more json files containing a list of accession numbers (as result of the Search tool)." />
         <param name="dataformat" type="select" label="Select a data format" help="Select a data format for the accession numbers related files that will be downloaded">
             <option value=".fastq">.fastq</option>
             <option value=".fastq.gz">.fastq.gz</option>
--- a/search.py	Mon Jan 22 16:42:08 2018 -0500
+++ b/search.py	Wed Jan 24 11:26:24 2018 -0500
@@ -46,14 +46,16 @@
 # asynchronous
 def async_request( options, args, payload ):
     # add additional parameters to the payload
-    payload["tree_id"] = str(options.treeid);
+    #payload["tree_id"] = str(options.treeid);
     payload["search_mode"] = str(options.search);
-    payload["exact_algorithm"] = str(options.exact);
-    payload["search_threshold"] = str(options.sthreshold);
+    payload["exact_algorithm"] = int(options.exact);
+    payload["search_threshold"] = float(options.sthreshold);
+    # set the content type to application/json
+    headers = {'Content-type': 'application/json'};
     # create a session
     session = FuturesSession();
     # make an async post request with requests-futures
-    future_req = session.post(query_url, data=payload);
+    future_req = session.post(query_url, headers=headers, json=payload);
     # wait for the request to complete, if it has not already
     resp = future_req.result();
     # check for response status code
@@ -63,14 +65,14 @@
     # write response on the output file
     with open(output_file_path, 'w') as out:
         #out.write(resp.data);
-        out.write(str(resp_code)+"\n"+str(resp.content));
+        out.write(str(resp.content));
     if resp_code == requests.codes.ok:
         return 0;
     else:
         return resp_code;
 
 def srase_query( options, args ):
-    multiple_files = {};
+    multiple_data = {};
     comma_sep_file_paths = options.files;
     #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
     # check if options.files contains at least one file path
@@ -82,41 +84,31 @@
         #print("names: "+str(comma_sep_file_names));
         file_names = comma_sep_file_names.split(",");
         # populate a dictionary with the files containing the sequences to query
+        sequences = [];
         for idx, file_path in enumerate(file_paths):
-            file_name = file_names[idx];
+            #file_name = file_names[idx];
             with open(file_path, 'r') as content_file:
                 content = content_file.read()
-                multiple_files[file_name] = content;
+                sequences.append(content.strip());
+                #multiple_data[file_name] = content;
                 #print(file_name+": "+content+"\n");
-        if len(multiple_files) > 0:
-            return async_request( options, args,  multiple_files );
+        if len(sequences) > 0:
+            multiple_data['sequences'] = sequences;
+            return async_request( options, args,  multiple_data );
             #return echo( options, args );
+        else:
+            return -1;
     else:
-        search_mode = str(options.search);
-        text_content = "";
-        if search_mode == "0":
-            # try with the sequence in --sequence
-            text_content = options.sequences;
-        elif search_mode == "1":
-            # try with the fasta content in --fasta
-            text_content = options.fasta;
+        # try with the sequence in --sequence
+        text_content = options.sequences;
         #print("sequences: "+text_content);
         # check if options.sequences contains a list of sequences (one for each row)
         if text_content is not None:
             text_content = str(text_content);
             if text_content.strip():
-                if search_mode == "0":
-                    # populate a dictionary with the files containing the sequences to query
-                    seq_counter = 0;
-                    sequences_arr = text_content.split("__cn__");
-                    for seq in sequences_arr:
-                        seq_index = 'sequence'+str(seq_counter);
-                        multiple_files[seq_index] = seq;
-                        #print(str(seq_counter)+": "+seq);
-                        seq_counter += 1;
-                elif search_mode == "1":
-                    multiple_files["fasta"] = text_content;
-                return async_request( options, args, multiple_files );
+                # populate a dictionary with the files containing the sequences to query
+                multiple_data['sequences'] = text_content.strip().split("__cn__");
+                return async_request( options, args, multiple_data );
                 #return echo( options, args );
             else:
                 return -1;
@@ -126,8 +118,6 @@
     # Parse the command line options
     usage = "Usage: search.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --output output_file_path";
     parser = optparse.OptionParser(usage = usage);
-    parser.add_option("-i", "--treeid", type="string",
-                    action="store", dest="treeid", help="string representing the tree id");
     parser.add_option("-f", "--files", type="string",
                     action="store", dest="files", help="comma separated files path");
     parser.add_option("-n", "--names", type="string",
@@ -136,20 +126,15 @@
                     action="store", dest="sequences", help="contains a list of sequences (one for each row)");
     parser.add_option("-a", "--fasta", type="string",
                     action="store", dest="fasta", help="contains the content of a fasta file");
-    parser.add_option("-x", "--search", type="int", default=0,
+    parser.add_option("-x", "--search", type="string", default=0,
                     action="store", dest="search", help="search mode");
     parser.add_option("-e", "--exact", type="int", default=0,
                     action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
-    parser.add_option("-t", "--sthreshold", type="string",
+    parser.add_option("-t", "--sthreshold", type="float",
                     action="store", dest="sthreshold", help="threshold applied to the search algrithm");
     parser.add_option("-o", "--output", type="string",
                     action="store", dest="output", help="output file path");
-    parser.add_option("-v", "--version", action="store_true", dest="version",
-                    default=False, help="display version and exit");
     (options, args) = parser.parse_args();
-    if options.version:
-        print __version__;
-    else:
-        srase_query( options, args );
+    return srase_query( options, args );
 
 if __name__ == "__main__": __main__()
--- a/search.xml	Mon Jan 22 16:42:08 2018 -0500
+++ b/search.xml	Wed Jan 24 11:26:24 2018 -0500
@@ -10,10 +10,9 @@
 <![CDATA[
     python '$__tool_directory__/search.py'
     
-    --treeid '0'
-    --search '0'
-    --sthreshold '${sthreshold}'
-    --exact '0'
+    --search 'rrr'
+    --sthreshold ${sthreshold}
+    --exact 0
     
     #if $conditional_input_zero.inputtype_zero == '0':
         #set file_paths = ','.join( [ str( $f ) for $f in $conditional_input_zero.txtfiles ] )
@@ -45,7 +44,7 @@
         <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Threshold applied to the search algorithm" />
     </inputs>
     <outputs>
-        <data name="output" format="txt" label="${tool.name} on ${on_string}: AllSome Sequence Bloom Tree Search Result" />
+        <data name="output" format="json" label="${tool.name} on ${on_string}: AllSome Sequence Bloom Tree Search Result" />
     </outputs>
 
     <help><![CDATA[