diff blast_unmatched.py @ 0:f3b63b59a1ea draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
author artbio
date Tue, 03 Oct 2017 07:19:17 -0400
parents
children 50c1fa95a076
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast_unmatched.py	Tue Oct 03 07:19:17 2017 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+
+import optparse
+
+
+def parse_options():
+    """
+    Parse the options guiven to the script
+    """
+    parser = optparse.OptionParser(description='Get unmatched blast queries')
+    parser.add_option('-f','--fasta', dest='fasta_file', help='Query fasta file\
+used during blast')
+    parser.add_option('-b','--blast', dest='blast_file', help='Blast tabular\
+output (queries in 1rst column)')
+    parser.add_option('-o','--output', dest='output_file', help='Output file name')
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Wrong number of arguments')
+    return options
+
+def get_matched(blast_file):
+    """
+    Get a dictionary of all the queries that got a match
+    """
+    matched = dict()
+    blast_file_handle = open(blast_file, 'r')
+    for line in blast_file_handle.readlines():
+        fields = line.split("\t")
+        query_id = fields[0]
+        matched[query_id] = 1
+    blast_file_handle.close()
+    return matched
+
+def get_unmatched(output_file, fasta_file, matched):
+    """
+    Compares matched queries to query fasta file and print unmatched to ouput
+    """
+    output_file_handle = open(output_file, 'w')
+    fasta_file_handle = open(fasta_file, 'r')
+    unmatched = False
+    for line in fasta_file_handle.readlines():
+        if line.startswith('>'):
+            subline = line[1:100].rstrip() #qid are 100chars long in blast
+            if subline not in matched:
+                output_file_handle.write(line)
+                unmatched = True
+            else:
+                unmatched = False
+        elif unmatched:
+            output_file_handle.write(line)
+    fasta_file_handle.close()
+    output_file_handle.close()
+
+def __main__():
+    opts = parse_options()
+    matched = get_matched(opts.blast_file)
+    get_unmatched(opts.output_file, opts.fasta_file, matched)
+
+if __main__():
+    __main__()