comparison blastx_to_scaffold.py @ 0:bdf781f2658b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
author artbio
date Sun, 15 Oct 2017 13:16:03 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bdf781f2658b
1 #!/usr/bin/python
2 import argparse
3
4
5 def insert_newlines(string, every=60):
6 lines = []
7 for i in range(0, len(string), every):
8 lines.append(string[i:i+every])
9 return '\n'.join(lines)
10
11
12 def Parser():
13 the_parser = argparse.ArgumentParser(
14 description="Generate DNA scaffold from blastx alignment of Contigs")
15 the_parser.add_argument('--sequences', action="store", type=str,
16 help="input sequence file in fasta format")
17 the_parser.add_argument('--blastx-tab', dest="blastx_tab", action="store",
18 type=str, help="13-columns tabular blastx output")
19 the_parser.add_argument('--output', action="store", type=str,
20 help="output file path, fasta format")
21 args = the_parser.parse_args()
22 return args
23
24
25 def __main__():
26 args = Parser()
27 protLenght = int(open(args.blastx_tab, "r").readline().split("\t")[12])
28 BlastxOutput = open(args.blastx_tab, "r")
29 Contigs = open(args.sequences, "r")
30 ContigsDict = {}
31 protScaffold = {}
32 for line in Contigs:
33 if line[0] == ">":
34 header = line[1:-1]
35 ContigsDict[header] = ""
36 else:
37 ContigsDict[header] += line[:-1]
38 protScaffold = dict([(i, "NNN") for i in range(1, protLenght+1)])
39 for line in BlastxOutput:
40 fields = line[:-1].split("\t")
41 queryStart = int(fields[6])
42 queryStop = int(fields[7])
43 subjectStart = int(fields[8])
44 subjectStop = int(fields[9])
45 seqHeader = fields[0]
46 sequence = ContigsDict[seqHeader]
47 for i in range(subjectStart, subjectStop):
48 del protScaffold[i]
49 protScaffold[subjectStop] = sequence[queryStart-1: queryStop]
50 finalSeqList = []
51 for i in sorted(protScaffold):
52 finalSeqList.append(protScaffold[i])
53 finalSequence = insert_newlines("".join(finalSeqList))
54 Out = open(args.output, "w")
55 Out.write(">Scaffold\n")
56 Out.write("%s\n" % finalSequence)
57 BlastxOutput.close()
58 Contigs.close()
59 Out.close()
60
61
62 if __name__ == "__main__":
63 __main__()