annotate BlastParser_and_hits.py @ 2:36103afa0934 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
author artbio
date Tue, 19 Jun 2018 05:18:31 -0400
parents 9dfb65ebb02e
children b4c9c085d709
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
1 #!/usr/bin/python
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
2 import argparse
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
3 from collections import defaultdict
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
4
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
5
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
6 def Parser():
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
7 the_parser = argparse.ArgumentParser()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
8 the_parser.add_argument('--blast', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
9 help="Path to the blast output\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
10 (tabular format, 12 column)")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
11 the_parser.add_argument('--sequences', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
12 help="Path to the fasta file with blasted\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
13 sequences")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
14 the_parser.add_argument('--fastaOutput', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
15 help="fasta output file of blast hits")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
16 the_parser.add_argument('--tabularOutput', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
17 help="tabular output file of blast analysis")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
18 the_parser.add_argument('--flanking', action="store", type=int,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
19 help="number of flanking nucleotides\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
20 added to the hit sequences")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
21 the_parser.add_argument('--mode', action="store",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
22 choices=["verbose", "short"], type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
23 help="reporting (verbose) or not reporting (short)\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
24 oases contigs")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
25 the_parser.add_argument('--filter_relativeCov', action="store", type=float,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
26 default=0,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
27 help="filter out relative coverages\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
28 below the specified ratio (float number)")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
29 the_parser.add_argument('--filter_maxScore', action="store", type=float,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
30 default=0, help="filter out best BitScores below\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
31 the specified float number")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
32 the_parser.add_argument('--filter_meanScore', action="store", type=float,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
33 default=0,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
34 help="filter out mean BitScores below the\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
35 specified float number")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
36 the_parser.add_argument('--filter_term_in', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
37 default="",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
38 help="select the specified term in the\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
39 subject list")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
40 the_parser.add_argument('--filter_term_out', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
41 default="",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
42 help="exclude the specified term from\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
43 the subject list")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
44 the_parser.add_argument('--al_sequences', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
45 help="sequences that have been blast aligned")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
46 the_parser.add_argument('--un_sequences', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
47 help="sequences that have not been blast aligned")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
48 the_parser.add_argument('--dataset_name', action="store", type=str,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
49 default="",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
50 help="the name of the dataset that has been parsed,\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
51 to be reported in the output")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
52 args = the_parser.parse_args()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
53 if not all((args.sequences, args.blast, args.fastaOutput,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
54 args.tabularOutput)):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
55 the_parser.error('argument(s) missing, call the\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
56 -h option of the script')
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
57 if not args.flanking:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
58 args.flanking = 0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
59 return args
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
60
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
61
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
62 def median(lst):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
63 lst = sorted(lst)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
64 if len(lst) < 1:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
65 return None
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
66 if len(lst) % 2 == 1:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
67 return lst[((len(lst)+1)/2)-1]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
68 if len(lst) % 2 == 0:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
69 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
70
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
71
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
72 def mean(lst):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
73 if len(lst) < 1:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
74 return 0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
75 return sum(lst) / float(len(lst))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
76
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
77
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
78 def getfasta(fastafile):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
79 fastadic = {}
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
80 for line in open(fastafile):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
81 if line[0] == ">":
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
82 header = line[1:-1]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
83 fastadic[header] = ""
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
84 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
85 fastadic[header] += line
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
86 for header in fastadic:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
87 fastadic[header] = "".join(fastadic[header].split("\n"))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
88 return fastadic
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
89
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
90
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
91 def insert_newlines(string, every=60):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
92 lines = []
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
93 for i in range(0, len(string), every):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
94 lines.append(string[i:i+every])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
95 return '\n'.join(lines)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
96
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
97
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
98 def getblast(blastfile):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
99 '''blastinfo [0] Percentage of identical matches
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
100 blastinfo [1] Alignment length
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
101 blastinfo [2] Number of mismatches
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
102 blastinfo [3] Number of gap openings
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
103 blastinfo [4] Start of alignment in query
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
104 blastinfo [5] End of alignment in query
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
105 blastinfo [6] Start of alignment in subject (database hit)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
106 blastinfo [7] End of alignment in subject (database hit)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
107 blastinfo [8] Expectation value (E-value)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
108 blastinfo [9] Bit score
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
109 blastinfo [10] Subject length
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
110 (NEED TO BE SPECIFIED WHEN RUNNING BLAST) '''
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
111 blastdic = defaultdict(dict)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
112 for line in open(blastfile):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
113 fields = line[:-1].split("\t")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
114 transcript = fields[0]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
115 subject = fields[1]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
116 # blastinfo[0]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
117 blastinfo = [float(fields[2])]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
118 # blastinfo[1:8] insets 1 to 7
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
119 blastinfo = blastinfo + [int(i) for i in fields[3:10]]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
120 # blastinfo[8] E-value remains as a string type
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
121 blastinfo.append(fields[10])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
122 # blastinfo[9] Bit score
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
123 blastinfo.append(float(fields[11]))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
124 # blastinfo[10] Subject length MUST BE RETRIEVED
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
125 # THROUGH A 13 COLUMN BLAST OUTPUT
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
126 blastinfo.append(int(fields[12]))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
127 try:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
128 blastdic[subject][transcript].append(blastinfo)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
129 except Exception:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
130 blastdic[subject][transcript] = [blastinfo]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
131 return blastdic
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
132
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
133
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
134 def getseq(fastadict, transcript, up, down, orientation="direct"):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
135 def reverse(seq):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
136 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
137 revseq = [revdict[i] for i in seq[::-1]]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
138 return "".join(revseq)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
139 pickseq = fastadict[transcript][up-1:down]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
140 if orientation == "direct":
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
141 return pickseq
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
142 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
143 return reverse(pickseq)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
144
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
145
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
146 def subjectCoverage(fastadict, blastdict, subject,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
147 QueriesFlankingNucleotides=0):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
148 SubjectCoverageList = []
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
149 HitDic = {}
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
150 bitScores = []
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
151 for transcript in blastdict[subject]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
152 prefix = "%s--%s_" % (subject, transcript)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
153 hitNumber = 0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
154 for hit in blastdict[subject][transcript]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
155 hitNumber += 1
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
156 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
157 hit[0],
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
158 hit[1],
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
159 hit[8])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
160 # query coverage by a hit is in hit[4:6]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
161 HitDic[prefix+suffix] = GetHitSequence(fastadict, transcript,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
162 hit[4], hit[5],
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
163 QueriesFlankingNucleotides)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
164 # subject coverage by a hit is in hit[6:8]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
165 SubjectCoverageList += range(min([hit[6], hit[7]]),
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
166 max([hit[6], hit[7]]) + 1)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
167 bitScores.append(hit[9])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
168 # always the same value for a given subject. Stupid but simple
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
169 subjectLength = hit[10]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
170 TotalSubjectCoverage = len(set(SubjectCoverageList))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
171 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
172 return (HitDic, subjectLength, TotalSubjectCoverage,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
173 RelativeSubjectCoverage, max(bitScores), mean(bitScores))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
174
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
175
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
176 def GetHitSequence(fastadict, FastaHeader, leftCoordinate, rightCoordinate,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
177 FlankingValue):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
178 if rightCoordinate > leftCoordinate:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
179 polarity = "direct"
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
180 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
181 polarity = "reverse"
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
182 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
183 if leftCoordinate - FlankingValue > 0:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
184 leftCoordinate -= FlankingValue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
185 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
186 leftCoordinate = 1
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
187 return getseq(fastadict, FastaHeader, leftCoordinate, rightCoordinate,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
188 polarity)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
189
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
190
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
191 def outputParsing(dataset_name, F, Fasta, results, Xblastdict, fastadict,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
192 filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
193 filter_term_in="", filter_term_out="", mode="verbose"):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
194 def filter_results(results, filter_relativeCov=0, filter_maxScore=0,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
195 filter_meanScore=0, filter_term_in="",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
196 filter_term_out=""):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
197 for subject in results.keys():
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
198 if results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
199 "RelativeSubjectCoverage"] < filter_relativeCov:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
200 del results[subject]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
201 continue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
202 if results[subject]["maxBitScores"] < filter_maxScore:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
203 del results[subject]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
204 continue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
205 if results[subject]["meanBitScores"] < filter_meanScore:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
206 del results[subject]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
207 continue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
208 if filter_term_in in subject:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
209 pass
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
210 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
211 del results[subject]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
212 continue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
213 if filter_term_out and filter_term_out in subject:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
214 del results[subject]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
215 continue
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
216 return results
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
217
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
218 F = open(F, "w")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
219 Fasta = open(Fasta, "w")
2
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
220 blasted_transcripts = dict()
0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
221 filter_results(results, filter_relativeCov, filter_maxScore,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
222 filter_meanScore, filter_term_in, filter_term_out)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
223 for subject in results:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
224 for transcript in Xblastdict[subject]:
2
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
225 blasted_transcripts[transcript] = ">%s\n%s\n" % (transcript,
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
226 insert_newlines(
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
227 fastadict[
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
228 transcript
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
229 ]))
0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
230 if mode == "verbose":
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
231 F.write("--- %s ---\n" % dataset_name)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
232 F.write("# %s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ("SeqId", "%Identity",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
233 "AlignLength",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
234 "StartSubject",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
235 "EndSubject",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
236 "%QueryHitCov",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
237 "E-value",
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
238 "BitScore"))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
239 for subject in sorted(results,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
240 key=lambda x: results[x]["meanBitScores"],
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
241 reverse=True):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
242 F.write(" \n# %s\n" % subject)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
243 F.write("# Suject Length: %s\n" %
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
244 results[subject]["subjectLength"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
245 F.write("# Total Subject Coverage: %s\n" %
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
246 results[subject]["TotalCoverage"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
247 F.write("# Relative Subject Coverage: %s\n" %
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
248 results[subject]["RelativeSubjectCoverage"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
249 F.write("# Best Bit Score: %s\n" % results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
250 "maxBitScores"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
251 F.write("# Mean Bit Score: %s\n" % results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
252 "meanBitScores"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
253 for header in results[subject]["HitDic"]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
254 Fasta.write(">%s\n%s\n" % (header,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
255 insert_newlines(results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
256 "HitDic"][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
257 header])))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
258 Fasta.write("\n") # final carriage return for the sequence
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
259 for transcript in Xblastdict[subject]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
260 transcriptSize = float(len(fastadict[transcript]))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
261 for hit in Xblastdict[subject][transcript]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
262 percentIdentity = hit[0]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
263 alignLenght = hit[1]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
264 subjectStart = hit[6]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
265 subjectEnd = hit[7]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
266 queryCov = "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
267 Eval, BitScore = hit[8], hit[9]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
268 info = [transcript] + [percentIdentity, alignLenght,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
269 subjectStart, subjectEnd, queryCov,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
270 Eval, BitScore]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
271 info = [str(i) for i in info]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
272 info = "\t".join(info)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
273 F.write("%s\n" % info)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
274 else:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
275 F.write("--- %s ---\n" % dataset_name)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
276 F.write("# subject\tsubject length\tTotal Subject Coverage\tRelative\
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
277 Subject Coverage\tBest Bit Score\tMean Bit Score\n")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
278 for subject in sorted(results,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
279 key=lambda x: results[x]["meanBitScores"],
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
280 reverse=True):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
281 line = []
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
282 line.append(subject)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
283 line.append(results[subject]["subjectLength"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
284 line.append(results[subject]["TotalCoverage"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
285 line.append(results[subject]["RelativeSubjectCoverage"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
286 line.append(results[subject]["maxBitScores"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
287 line.append(results[subject]["meanBitScores"])
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
288 line = [str(i) for i in line]
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
289 F.write("%s\n" % "\t".join(line))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
290 for header in results[subject]["HitDic"]:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
291 Fasta.write(">%s\n%s\n" % (header,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
292 insert_newlines(
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
293 results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
294 "HitDic"][header])))
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
295 Fasta.write("\n") # final carriage return for the sequence
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
296 F.close()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
297 Fasta.close()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
298 return blasted_transcripts
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
299
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
300
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
301 def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
302 unmatched_sequences):
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
303 '''to output the sequences that matched and did not matched in the blast'''
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
304 F_matched = open(matched_sequences, "w")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
305 F_unmatched = open(unmatched_sequences, "w")
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
306 for transcript in fastadict:
2
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
307 try:
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
308 F_matched.write(blasted_transcripts[transcript])
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
309 except KeyError:
0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
310 F_unmatched.write(">%s\n%s\n" % (transcript, insert_newlines(
2
36103afa0934 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 22ac2287a510708784dec78647afea4eff658f02
artbio
parents: 0
diff changeset
311 fastadict[transcript])))
0
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
312 F_matched.close()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
313 F_unmatched.close()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
314 return
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
315
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
316
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
317 def __main__():
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
318 args = Parser()
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
319 fastadict = getfasta(args.sequences)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
320 Xblastdict = getblast(args.blast)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
321 results = defaultdict(dict)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
322 for subject in Xblastdict:
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
323 results[subject]["HitDic"], results[subject]["subjectLength"], results[
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
324 subject]["TotalCoverage"], results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
325 "RelativeSubjectCoverage"], results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
326 "maxBitScores"], results[subject][
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
327 "meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
328 args.flanking)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
329 blasted_transcripts = outputParsing(
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
330 args.dataset_name, args.tabularOutput,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
331 args.fastaOutput, results, Xblastdict, fastadict,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
332 filter_relativeCov=args.filter_relativeCov,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
333 filter_maxScore=args.filter_maxScore,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
334 filter_meanScore=args.filter_meanScore,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
335 filter_term_in=args.filter_term_in,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
336 filter_term_out=args.filter_term_out, mode=args.mode)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
337 dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences,
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
338 args.un_sequences)
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
339
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
340
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
341 if __name__ == "__main__":
9dfb65ebb02e planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
artbio
parents:
diff changeset
342 __main__()