Mercurial > repos > artbio > blastx_to_scaffold
comparison blastx_to_scaffold.py @ 0:bdf781f2658b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
| author | artbio |
|---|---|
| date | Sun, 15 Oct 2017 13:16:03 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bdf781f2658b |
|---|---|
| 1 #!/usr/bin/python | |
| 2 import argparse | |
| 3 | |
| 4 | |
| 5 def insert_newlines(string, every=60): | |
| 6 lines = [] | |
| 7 for i in range(0, len(string), every): | |
| 8 lines.append(string[i:i+every]) | |
| 9 return '\n'.join(lines) | |
| 10 | |
| 11 | |
| 12 def Parser(): | |
| 13 the_parser = argparse.ArgumentParser( | |
| 14 description="Generate DNA scaffold from blastx alignment of Contigs") | |
| 15 the_parser.add_argument('--sequences', action="store", type=str, | |
| 16 help="input sequence file in fasta format") | |
| 17 the_parser.add_argument('--blastx-tab', dest="blastx_tab", action="store", | |
| 18 type=str, help="13-columns tabular blastx output") | |
| 19 the_parser.add_argument('--output', action="store", type=str, | |
| 20 help="output file path, fasta format") | |
| 21 args = the_parser.parse_args() | |
| 22 return args | |
| 23 | |
| 24 | |
| 25 def __main__(): | |
| 26 args = Parser() | |
| 27 protLenght = int(open(args.blastx_tab, "r").readline().split("\t")[12]) | |
| 28 BlastxOutput = open(args.blastx_tab, "r") | |
| 29 Contigs = open(args.sequences, "r") | |
| 30 ContigsDict = {} | |
| 31 protScaffold = {} | |
| 32 for line in Contigs: | |
| 33 if line[0] == ">": | |
| 34 header = line[1:-1] | |
| 35 ContigsDict[header] = "" | |
| 36 else: | |
| 37 ContigsDict[header] += line[:-1] | |
| 38 protScaffold = dict([(i, "NNN") for i in range(1, protLenght+1)]) | |
| 39 for line in BlastxOutput: | |
| 40 fields = line[:-1].split("\t") | |
| 41 queryStart = int(fields[6]) | |
| 42 queryStop = int(fields[7]) | |
| 43 subjectStart = int(fields[8]) | |
| 44 subjectStop = int(fields[9]) | |
| 45 seqHeader = fields[0] | |
| 46 sequence = ContigsDict[seqHeader] | |
| 47 for i in range(subjectStart, subjectStop): | |
| 48 del protScaffold[i] | |
| 49 protScaffold[subjectStop] = sequence[queryStart-1: queryStop] | |
| 50 finalSeqList = [] | |
| 51 for i in sorted(protScaffold): | |
| 52 finalSeqList.append(protScaffold[i]) | |
| 53 finalSequence = insert_newlines("".join(finalSeqList)) | |
| 54 Out = open(args.output, "w") | |
| 55 Out.write(">Scaffold\n") | |
| 56 Out.write("%s\n" % finalSequence) | |
| 57 BlastxOutput.close() | |
| 58 Contigs.close() | |
| 59 Out.close() | |
| 60 | |
| 61 | |
| 62 if __name__ == "__main__": | |
| 63 __main__() |
