Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 1:1964514aabde draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8
author | drosofff |
---|---|
date | Mon, 14 Sep 2015 12:18:46 -0400 |
parents | 69ea2a13947f |
children | bb0d4cd765c5 |
comparison
equal
deleted
inserted
replaced
0:69ea2a13947f | 1:1964514aabde |
---|---|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") | 12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") | 13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") | 14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | |
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | |
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | |
20 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | |
21 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | |
17 args = the_parser.parse_args() | 22 args = the_parser.parse_args() |
18 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 23 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
19 the_parser.error('argument(s) missing, call the -h option of the script') | 24 the_parser.error('argument(s) missing, call the -h option of the script') |
20 if not args.flanking: | 25 if not args.flanking: |
21 args.flanking = 0 | 26 args.flanking = 0 |
120 leftCoordinate -= FlankingValue | 125 leftCoordinate -= FlankingValue |
121 else: | 126 else: |
122 leftCoordinate = 1 | 127 leftCoordinate = 1 |
123 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
124 | 129 |
125 def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): | 130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): |
126 F= open(F, "w") | 131 F= open(F, "w") |
127 Fasta=open(Fasta, "w") | 132 Fasta=open(Fasta, "w") |
128 if mode == "verbose": | 133 if mode == "verbose": |
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" | 134 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
130 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 135 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
137 continue | |
131 print >> F, "#\n# %s" % subject | 138 print >> F, "#\n# %s" % subject |
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 139 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 140 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 141 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) | 142 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) |
147 info = "\t".join(info) | 154 info = "\t".join(info) |
148 print >> F, info | 155 print >> F, info |
149 else: | 156 else: |
150 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" | 157 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" |
151 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 158 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
159 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
160 continue | |
152 line = [] | 161 line = [] |
153 line.append(subject) | 162 line.append(subject) |
154 line.append(results[subject]["subjectLength"]) | 163 line.append(results[subject]["subjectLength"]) |
155 line.append(results[subject]["TotalCoverage"]) | 164 line.append(results[subject]["TotalCoverage"]) |
156 line.append(results[subject]["RelativeSubjectCoverage"]) | 165 line.append(results[subject]["RelativeSubjectCoverage"]) |
162 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) | 171 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
163 print >> Fasta, "" # final carriage return for the sequence | 172 print >> Fasta, "" # final carriage return for the sequence |
164 F.close() | 173 F.close() |
165 Fasta.close() | 174 Fasta.close() |
166 | 175 |
167 | 176 def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): |
177 '''to output the sequences that matched and did not matched in the blast''' | |
178 blasted_transcripts = [] | |
179 for subject in blastdict: | |
180 for transcript in blastdict[subject]: | |
181 blasted_transcripts.append(transcript) | |
182 blasted_transcripts = list( set( blasted_transcripts)) | |
183 F_matched = open (matched_sequences, "w") | |
184 F_unmatched = open (unmatched_sequences, "w") | |
185 for transcript in fastadict: | |
186 if transcript in blasted_transcripts: | |
187 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | |
188 else: | |
189 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | |
190 F_matched.close() | |
191 F_unmatched.close() | |
192 return | |
168 | 193 |
169 def __main__ (): | 194 def __main__ (): |
170 args = Parser() | 195 args = Parser() |
171 fastadict = getfasta (args.sequences) | 196 fastadict = getfasta (args.sequences) |
172 Xblastdict = getblast (args.blast) | 197 Xblastdict = getblast (args.blast) |
198 sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) | |
173 results = defaultdict(dict) | 199 results = defaultdict(dict) |
174 for subject in Xblastdict: | 200 for subject in Xblastdict: |
175 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 201 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
176 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) | 202 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
203 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | |
204 filter_meanScore=args.filter_meanScore, mode=args.mode) | |
177 if __name__=="__main__": __main__() | 205 if __name__=="__main__": __main__() |