Mercurial > repos > artbio > blastx_to_scaffold
comparison blastx_to_scaffold.py @ 0:bdf781f2658b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
author | artbio |
---|---|
date | Sun, 15 Oct 2017 13:16:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bdf781f2658b |
---|---|
1 #!/usr/bin/python | |
2 import argparse | |
3 | |
4 | |
5 def insert_newlines(string, every=60): | |
6 lines = [] | |
7 for i in range(0, len(string), every): | |
8 lines.append(string[i:i+every]) | |
9 return '\n'.join(lines) | |
10 | |
11 | |
12 def Parser(): | |
13 the_parser = argparse.ArgumentParser( | |
14 description="Generate DNA scaffold from blastx alignment of Contigs") | |
15 the_parser.add_argument('--sequences', action="store", type=str, | |
16 help="input sequence file in fasta format") | |
17 the_parser.add_argument('--blastx-tab', dest="blastx_tab", action="store", | |
18 type=str, help="13-columns tabular blastx output") | |
19 the_parser.add_argument('--output', action="store", type=str, | |
20 help="output file path, fasta format") | |
21 args = the_parser.parse_args() | |
22 return args | |
23 | |
24 | |
25 def __main__(): | |
26 args = Parser() | |
27 protLenght = int(open(args.blastx_tab, "r").readline().split("\t")[12]) | |
28 BlastxOutput = open(args.blastx_tab, "r") | |
29 Contigs = open(args.sequences, "r") | |
30 ContigsDict = {} | |
31 protScaffold = {} | |
32 for line in Contigs: | |
33 if line[0] == ">": | |
34 header = line[1:-1] | |
35 ContigsDict[header] = "" | |
36 else: | |
37 ContigsDict[header] += line[:-1] | |
38 protScaffold = dict([(i, "NNN") for i in range(1, protLenght+1)]) | |
39 for line in BlastxOutput: | |
40 fields = line[:-1].split("\t") | |
41 queryStart = int(fields[6]) | |
42 queryStop = int(fields[7]) | |
43 subjectStart = int(fields[8]) | |
44 subjectStop = int(fields[9]) | |
45 seqHeader = fields[0] | |
46 sequence = ContigsDict[seqHeader] | |
47 for i in range(subjectStart, subjectStop): | |
48 del protScaffold[i] | |
49 protScaffold[subjectStop] = sequence[queryStart-1: queryStop] | |
50 finalSeqList = [] | |
51 for i in sorted(protScaffold): | |
52 finalSeqList.append(protScaffold[i]) | |
53 finalSequence = insert_newlines("".join(finalSeqList)) | |
54 Out = open(args.output, "w") | |
55 Out.write(">Scaffold\n") | |
56 Out.write("%s\n" % finalSequence) | |
57 BlastxOutput.close() | |
58 Contigs.close() | |
59 Out.close() | |
60 | |
61 | |
62 if __name__ == "__main__": | |
63 __main__() |