Mercurial > repos > artbio > blastx_to_scaffold
annotate blastx_to_scaffold.py @ 2:f03154ac4ed8 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/blastx_to_scaffold commit 7777a0e34cc2ccd228183af74809ae0d5d9e9b85
author | artbio |
---|---|
date | Wed, 11 Oct 2023 13:03:33 +0000 |
parents | bdf781f2658b |
children |
rev | line source |
---|---|
0
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
1 #!/usr/bin/python |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
2 import argparse |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
3 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
4 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
5 def insert_newlines(string, every=60): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
6 lines = [] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
7 for i in range(0, len(string), every): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
8 lines.append(string[i:i+every]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
9 return '\n'.join(lines) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
10 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
11 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
12 def Parser(): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
13 the_parser = argparse.ArgumentParser( |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
14 description="Generate DNA scaffold from blastx alignment of Contigs") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
15 the_parser.add_argument('--sequences', action="store", type=str, |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
16 help="input sequence file in fasta format") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
17 the_parser.add_argument('--blastx-tab', dest="blastx_tab", action="store", |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
18 type=str, help="13-columns tabular blastx output") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
19 the_parser.add_argument('--output', action="store", type=str, |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
20 help="output file path, fasta format") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
21 args = the_parser.parse_args() |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
22 return args |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
23 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
24 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
25 def __main__(): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
26 args = Parser() |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
27 protLenght = int(open(args.blastx_tab, "r").readline().split("\t")[12]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
28 BlastxOutput = open(args.blastx_tab, "r") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
29 Contigs = open(args.sequences, "r") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
30 ContigsDict = {} |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
31 protScaffold = {} |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
32 for line in Contigs: |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
33 if line[0] == ">": |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
34 header = line[1:-1] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
35 ContigsDict[header] = "" |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
36 else: |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
37 ContigsDict[header] += line[:-1] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
38 protScaffold = dict([(i, "NNN") for i in range(1, protLenght+1)]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
39 for line in BlastxOutput: |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
40 fields = line[:-1].split("\t") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
41 queryStart = int(fields[6]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
42 queryStop = int(fields[7]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
43 subjectStart = int(fields[8]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
44 subjectStop = int(fields[9]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
45 seqHeader = fields[0] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
46 sequence = ContigsDict[seqHeader] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
47 for i in range(subjectStart, subjectStop): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
48 del protScaffold[i] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
49 protScaffold[subjectStop] = sequence[queryStart-1: queryStop] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
50 finalSeqList = [] |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
51 for i in sorted(protScaffold): |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
52 finalSeqList.append(protScaffold[i]) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
53 finalSequence = insert_newlines("".join(finalSeqList)) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
54 Out = open(args.output, "w") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
55 Out.write(">Scaffold\n") |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
56 Out.write("%s\n" % finalSequence) |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
57 BlastxOutput.close() |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
58 Contigs.close() |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
59 Out.close() |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
60 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
61 |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
62 if __name__ == "__main__": |
bdf781f2658b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff
changeset
|
63 __main__() |