comparison BlastParser_and_hits.py @ 4:60b6bd959929 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
author drosofff
date Fri, 15 Jan 2016 12:29:30 -0500
parents 8f5d48294f70
children a0dec1a0f2ef
comparison
equal deleted inserted replaced
3:8f5d48294f70 4:60b6bd959929
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue. 2 # blastn tblastn blastx parser revised 14-1-2016.
3 # drosofff@gmail.com 3 # drosofff@gmail.com
4 4
5 import sys 5 import sys
6 import argparse 6 import argparse
7 from collections import defaultdict 7 from collections import defaultdict
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number")
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number")
20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list")
21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list")
20 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned")
21 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned")
22 args = the_parser.parse_args() 24 args = the_parser.parse_args()
23 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): 25 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):
24 the_parser.error('argument(s) missing, call the -h option of the script') 26 the_parser.error('argument(s) missing, call the -h option of the script')
125 leftCoordinate -= FlankingValue 127 leftCoordinate -= FlankingValue
126 else: 128 else:
127 leftCoordinate = 1 129 leftCoordinate = 1
128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) 130 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
129 131
130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): 132 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"):
133 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""):
134 print "###", filter_term_in
135 for subject in results.keys():
136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov:
137 del results[subject]
138 continue
139 if results[subject]["maxBitScores"]<filter_maxScore:
140 del results[subject]
141 continue
142 if results[subject]["meanBitScores"]<filter_meanScore:
143 del results[subject]
144 continue
145 if filter_term_in in subject:
146 pass
147 else:
148 del results[subject]
149 continue
150 if filter_term_out and filter_term_out in subject:
151 del results[subject]
152 continue
153 return results
154
131 F= open(F, "w") 155 F= open(F, "w")
132 Fasta=open(Fasta, "w") 156 Fasta=open(Fasta, "w")
133 blasted_transcripts = [] 157 blasted_transcripts = []
158 filter_results (results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out)
134 for subject in results: 159 for subject in results:
135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
136 continue
137 for transcript in Xblastdict[subject]: 160 for transcript in Xblastdict[subject]:
138 blasted_transcripts.append(transcript) 161 blasted_transcripts.append(transcript)
139 blasted_transcripts = list( set( blasted_transcripts)) 162 blasted_transcripts = list( set( blasted_transcripts))
140 if mode == "verbose": 163 if mode == "verbose":
141 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" 164 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n"
142 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 165 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
143 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
144 continue
145 print >> F, "#\n# %s" % subject 166 print >> F, "#\n# %s" % subject
146 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) 167 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
147 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) 168 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
148 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) 169 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
149 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) 170 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"])
161 info = "\t".join(info) 182 info = "\t".join(info)
162 print >> F, info 183 print >> F, info
163 else: 184 else:
164 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" 185 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score"
165 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 186 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
166 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
167 continue
168 line = [] 187 line = []
169 line.append(subject) 188 line.append(subject)
170 line.append(results[subject]["subjectLength"]) 189 line.append(results[subject]["subjectLength"])
171 line.append(results[subject]["TotalCoverage"]) 190 line.append(results[subject]["TotalCoverage"])
172 line.append(results[subject]["RelativeSubjectCoverage"]) 191 line.append(results[subject]["RelativeSubjectCoverage"])
201 results = defaultdict(dict) 220 results = defaultdict(dict)
202 for subject in Xblastdict: 221 for subject in Xblastdict:
203 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) 222 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
204 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, 223 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
205 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, 224 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,
206 filter_meanScore=args.filter_meanScore, mode=args.mode) 225 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,
226 filter_term_out=args.filter_term_out, mode=args.mode)
207 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) 227 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)
208 228
209 if __name__=="__main__": __main__() 229 if __name__=="__main__": __main__()