annotate blastx_to_scaffold.py @ 2:f03154ac4ed8 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/blastx_to_scaffold commit 7777a0e34cc2ccd228183af74809ae0d5d9e9b85
author artbio
date Wed, 11 Oct 2023 13:03:33 +0000
parents bdf781f2658b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
1 #!/usr/bin/python
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
2 import argparse
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
3
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
4
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
5 def insert_newlines(string, every=60):
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
6 lines = []
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
7 for i in range(0, len(string), every):
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
8 lines.append(string[i:i+every])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
9 return '\n'.join(lines)
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
10
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
11
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
12 def Parser():
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
13 the_parser = argparse.ArgumentParser(
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
14 description="Generate DNA scaffold from blastx alignment of Contigs")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
15 the_parser.add_argument('--sequences', action="store", type=str,
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
16 help="input sequence file in fasta format")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
17 the_parser.add_argument('--blastx-tab', dest="blastx_tab", action="store",
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
18 type=str, help="13-columns tabular blastx output")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
19 the_parser.add_argument('--output', action="store", type=str,
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
20 help="output file path, fasta format")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
21 args = the_parser.parse_args()
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
22 return args
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
23
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
24
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
25 def __main__():
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
26 args = Parser()
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
27 protLenght = int(open(args.blastx_tab, "r").readline().split("\t")[12])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
28 BlastxOutput = open(args.blastx_tab, "r")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
29 Contigs = open(args.sequences, "r")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
30 ContigsDict = {}
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
31 protScaffold = {}
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
32 for line in Contigs:
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
33 if line[0] == ">":
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
34 header = line[1:-1]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
35 ContigsDict[header] = ""
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
36 else:
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
37 ContigsDict[header] += line[:-1]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
38 protScaffold = dict([(i, "NNN") for i in range(1, protLenght+1)])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
39 for line in BlastxOutput:
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
40 fields = line[:-1].split("\t")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
41 queryStart = int(fields[6])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
42 queryStop = int(fields[7])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
43 subjectStart = int(fields[8])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
44 subjectStop = int(fields[9])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
45 seqHeader = fields[0]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
46 sequence = ContigsDict[seqHeader]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
47 for i in range(subjectStart, subjectStop):
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
48 del protScaffold[i]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
49 protScaffold[subjectStop] = sequence[queryStart-1: queryStop]
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
50 finalSeqList = []
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
51 for i in sorted(protScaffold):
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
52 finalSeqList.append(protScaffold[i])
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
53 finalSequence = insert_newlines("".join(finalSeqList))
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
54 Out = open(args.output, "w")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
55 Out.write(">Scaffold\n")
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
56 Out.write("%s\n" % finalSequence)
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
57 BlastxOutput.close()
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
58 Contigs.close()
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
59 Out.close()
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
60
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
61
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
62 if __name__ == "__main__":
bdf781f2658b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastx_to_scaffold commit 1353e75b8459213e88f32744a759ce4d7b43826d
artbio
parents:
diff changeset
63 __main__()