diff search.py @ 0:854be3d51221 draft

Uploaded 20171204
author fabio
date Mon, 04 Dec 2017 16:05:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/search.py	Mon Dec 04 16:05:45 2017 -0500
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+# https://github.com/ross/requests-futures
+# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
+
+import os, uuid
+import optparse
+import requests
+from requests_futures.sessions import FuturesSession
+
+# proxy to uv0
+service_url = "http://deputy.bx.psu.edu/";
+# url to query page
+query_url = service_url+"query.php";
+# url to echo page: just return 'it works!'
+#echo_url = service_url+"echo.php";
+
+'''
+# synchronous
+def echo( options, args ):
+    # create a session
+    session = requests.Session()
+    # make a sync get request
+    resp = session.get(echo_url)
+    # check for response status code
+    resp_code = resp.status_code;
+    if resp_code == requests.codes.ok:
+        # get output file path
+        output_file_path = options.output;
+        # write response on the output file
+        with open(output_file_path, 'w') as out:
+            #out.write(resp.data);
+            out.write(resp.content);
+        return 0;
+    else:
+        return resp_code;
+'''
+
+# asynchronous
+def async_request( options, args, payload ):
+    # add additional parameters to the payload
+    payload["search_mode"] = str(options.search);
+    payload["exact_algorithm"] = str(options.exact);
+    payload["search_threshold"] = str(options.sthreshold);
+    # create a session
+    session = FuturesSession();
+    # make an async post request with requests-futures
+    future_req = session.post(query_url, data=payload);
+    # wait for the request to complete, if it has not already
+    resp = future_req.result();
+    # check for response status code
+    resp_code = resp.status_code;
+    # get output file path
+    output_file_path = options.output;
+    # write response on the output file
+    with open(output_file_path, 'w') as out:
+        #out.write(resp.data);
+        out.write(str(resp_code)+"\n"+str(resp.content));
+    if resp_code == requests.codes.ok:
+        return 0;
+    else:
+        return resp_code;
+
+def srase_query( options, args ):
+    multiple_files = {};
+    comma_sep_file_paths = options.files;
+    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
+    # check if options.files contains at least one file path
+    if comma_sep_file_paths is not None:
+        # split file paths
+        file_paths = comma_sep_file_paths.split(",");
+        # split file names
+        comma_sep_file_names = str(options.names);
+        #print("names: "+str(comma_sep_file_names));
+        file_names = comma_sep_file_names.split(",");
+        # populate a dictionary with the files containing the sequences to query
+        for idx, file_path in enumerate(file_paths):
+            file_name = file_names[idx];
+            with open(file_path, 'r') as content_file:
+                content = content_file.read()
+                multiple_files[file_name] = content;
+        if len(multiple_files) > 0:
+            return async_request( options, args,  multiple_files );
+            #return echo( options, args );
+    else:
+        # try with the sequence in --sequence
+        sequences_text = options.sequences;
+        #print("sequences: "+sequences_text);
+        # check if options.sequences contains a list of sequences (one for each row)
+        if sequences_text is not None:
+            sequences_text = str(sequences_text);
+            if sequences_text.strip():
+                # populate a dictionary with the files containing the sequences to query
+                seq_counter = 0;
+                sequences_arr = sequences_text.split("__cn__");
+                for seq in sequences_arr:
+                    seq_index = 'sequence'+str(seq_counter);
+                    multiple_files[seq_index] = seq;
+                    #print(str(seq_counter)+": "+seq);
+                    seq_counter += 1;
+                return async_request( options, args, multiple_files );
+                #return echo( options, args );
+            else:
+                return -1;
+    return -1;
+
+def __main__():
+    # Parse the command line options
+    usage = "Usage: search.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --output output_file_path";
+    parser = optparse.OptionParser(usage = usage);
+    parser.add_option("-f", "--files", type="string",
+                    action="store", dest="files", help="comma separated files path");
+    parser.add_option("-n", "--names", type="string",
+                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
+    parser.add_option("-s", "--sequences", type="string",
+                    action="store", dest="sequences", help="optional filed, contains a list of sequences (one for each row)");
+    parser.add_option("-x", "--search", type="int", default=0,
+                    action="store", dest="search", help="search mode");
+    parser.add_option("-e", "--exact", type="int", default=0,
+                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
+    parser.add_option("-t", "--sthreshold", type="string",
+                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");
+    parser.add_option("-o", "--output", type="string",
+                    action="store", dest="output", help="output file path");
+    parser.add_option("-v", "--version", action="store_true", dest="version",
+                    default=False, help="display version and exit");
+    (options, args) = parser.parse_args();
+    if options.version:
+        print __version__;
+    else:
+        srase_query( options, args );
+
+if __name__ == "__main__": __main__()