Repository 'sbtas_se'
hg clone https://toolshed.g2.bx.psu.edu/repos/fabio/sbtas_se

Changeset 12:039e8e1e8b1f (2018-02-01)
Previous changeset 11:0d0f7080b55c (2018-01-31) Next changeset 13:b5f070767ed4 (2018-02-01)
Commit message:
Uploaded 20180201
modified:
query.py
query.xml
added:
._.shed.yml
._example.tsv
._query.py
._query.xml
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f ._.shed.yml
b
Binary file ._.shed.yml has changed
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f ._example.tsv
b
Binary file ._example.tsv has changed
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f ._query.py
b
Binary file ._query.py has changed
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f ._query.xml
b
Binary file ._query.xml has changed
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f query.py
--- a/query.py Wed Jan 31 17:29:13 2018 -0500
+++ b/query.py Thu Feb 01 16:23:17 2018 -0500
[
b'@@ -3,7 +3,7 @@\n # https://github.com/ross/requests-futures\n # http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests\n \n-import os, uuid, optparse, requests, json, time\n+import sys, os, uuid, optparse, requests, json, time\n #from requests_futures.sessions import FuturesSession\n \n #### NN14 ####\n@@ -16,9 +16,17 @@\n QUERY_DELAY = 30;\n ##############\n \n+__version__ = "1.0.0";\n VALID_CHARS = \'.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \'\n \n+# in the case of collections, exitcodes equal to 0 and 1 are not considered errors\n+def raiseException( exitcode, message, errorfilepath ):\n+    with open(errorfilepath, \'w\') as out:\n+        out.write(message);\n+    sys.exit(exitcode);\n+\n def query_request( options, args, payload ):\n+    output_dir_path = options.outputdir;\n     # add additional parameters to the payload\n     #payload["tree_id"] = str(options.treeid);\n     payload["search_mode"] = str(options.search);\n@@ -32,7 +40,7 @@\n     # make a synchronous post request to the query route\n     req = session.post(QUERY_URL, headers=headers, json=payload);\n     resp_code = req.status_code;\n-    print(str(req.content)+"\\n\\n");\n+    #print(str(req.content)+"\\n\\n");\n     if resp_code == requests.codes.ok:\n         resp_content = str(req.content);\n         # convert out to json\n@@ -42,7 +50,6 @@\n         task_processed = False;\n         # results json content\n         json_status_content = None;\n-        task_status = None;\n         while task_processed is False:\n             # create a new session\n             session = requests.Session();\n@@ -50,7 +57,7 @@\n             status_query_url = STATUS_URL.replace("<task_id>", task_id);\n             status_req = session.get(status_query_url);\n             status_resp_content = str(status_req.content);\n-            print(status_resp_content+"\\n\\n");\n+            #print(status_resp_content+"\\n\\n");\n             # convert out to json\n             json_status_content = json.loads(status_resp_content);\n             # take a look at the state\n@@ -59,16 +66,11 @@\n                 task_processed = True;\n                 break;\n             elif json_status_content[\'state\'] in [\'FAILURE\', \'REVOKED\']:\n-                return "Task status: "+str(json_status_content[\'state\']);\n+                return raiseException( 1, "Task ID: "+str(task_id)+"\\nTask status: "+str(json_status_content[\'state\']), str(options.errorfile) );\n             else:\n                 time.sleep(QUERY_DELAY); # in seconds\n         \n-        # get output dir (collection) path\n-        output_dir_path = options.outputdir;\n-        if not os.path.exists(output_dir_path):\n-            os.makedirs(output_dir_path);\n         out_file_format = "tabular";\n-\n         for block in json_status_content[\'results\']:\n             seq_id = block[\'sequence_id\'];\n             accessions = block[\'accession_numbers\'];\n@@ -79,10 +81,12 @@\n                 accessions_list = accessions_list + accession_number + "\\n";\n             with open(output_file_path, \'w\') as out:\n                 out.write(accessions_list.strip());\n+        return sys.exit(0);\n     else:\n-        return "Unable to query the remote server. Please try again in a while.";\n+        return raiseException( 1, "Unable to query the remote server. Please try again in a while.", str(options.errorfile) );\n \n def query( options, args ):\n+    output_dir_path = options.outputdir;\n     multiple_data = {};\n     comma_sep_file_paths = options.files;\n     #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));\n@@ -106,13 +110,13 @@\n                             seq_id = \'\'.join(e for e in seq_id if e in VALID_CHARS)\n                             seq_text = line_split[1];\n                             if seq_id in multiple_data:\n-                                return "Error: the id \'"+seq_id+"\' is duplicated";\n+                                return raiseException( 1, "Error: the id \'"+seq_id+"\' is duplicated", str(options.e'..b'est="outfile", help="output file");\n-    \n     # TEST\n-    #--search \'rrr\'\n-    #--sthreshold 0.5\n-    #--exact 0\n-    #--sequences \'id0__tc__CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC\'\n-    #--outputdir \'collection_content\'\n     #sequences = \'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc\';\n-    #print(sequences);\n     #(options, args) = parser.parse_args([\'-x\', \'rrr\', \'-t\', 0.5, \'-s\', sequences, \'-o\', \'collection_content\']);\n-    \n+\n     (options, args) = parser.parse_args();\n-    return query( options, args );\n+    if options.version:\n+        print __version__;\n+    else:\n+        # create output dir (collection)\n+        output_dir_path = options.outputdir;\n+        if not os.path.exists(output_dir_path):\n+            os.makedirs(output_dir_path);\n+\n+        return query( options, args );\n \n if __name__ == "__main__": __main__()\n'
b
diff -r 0d0f7080b55c -r 039e8e1e8b1f query.xml
--- a/query.xml Wed Jan 31 17:29:13 2018 -0500
+++ b/query.xml Thu Feb 01 16:23:17 2018 -0500
[
@@ -34,17 +34,17 @@
                 <option value="1">By manually inserted text</option>
             </param>
             <when value="0">
-                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="true" help="Select one or more tabular files containing (ID, TRANSCRIPT) touples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
+                <param format="tabular" name="txtfiles" type="data" label="Select files" multiple="true" optional="false" help="Select one or more tabular files containing (ID, TRANSCRIPT) couples for each line. The content of these files will be merged and the result will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
             </when>
             <when value="1">
-                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="true" help="Insert a list of (ID, TRANSCRIPT) touples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
+                <param name="sequences" type="text" area="True" size="5x25" label="Manually insert sequences" optional="false" help="Insert a list of (ID, TRANSCRIPT) couples in a tab delimited format, one for each line. The content of this text box will represent a query to the AllSome Sequence Bloom Tree Search Engine that will return a collection containing a file for each ID. The content of these files as result of the tool will be a list of accession numbers." />
             </when>
         </conditional>            
         <param name="sthreshold" size="3" type="float" value="0.5" min="0.0" max="1.0" label="Search threshold" help="This threshold controls the specificity. Lower values will produce more hits to the query. Higher values are more stringent and will produce fewer hits." />
     </inputs>
     <outputs>
         <collection name="output_collect" type="list" label="AllSome Sequence Bloom Tree Search Collection">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)" directory="collection_content" ext="tabular" />
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;ext&gt;[^_]+)" directory="collection_content" ext="auto" />
         </collection>
     </outputs>
 
@@ -54,9 +54,7 @@
 
 ----
 
-**Example**
-
-The input for this tool is a list of (ID, TRANSCRIPT) touples, one for each line,
+The input for this tool is a list of (ID, TRANSCRIPT) couples, one for each line,
 in a tab delimited format::
     
     id0  CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCA