annotate blast_to_scaffold.py @ 0:7d96b28eec49 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
author artbio
date Sun, 15 Oct 2017 12:52:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
1 #!/usr/bin/env python
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
2 import argparse
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
3
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
4
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
5 def insert_newlines(string, every=60):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
6 lines = []
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
7 for i in range(0, len(string), every):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
8 lines.append(string[i:i+every])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
9 return '\n'.join(lines)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
10
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
11
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
12 def getseq(fastadict, transcript, up, down, orientation="direct"):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
13 def reverse(seq):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
14 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
15 revseq = [revdict[i] for i in seq[::-1]]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
16 return "".join(revseq)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
17 pickseq = fastadict[transcript][up-1:down]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
18 if orientation == "direct":
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
19 return pickseq
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
20 else:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
21 return reverse(pickseq)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
22
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
23
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
24 def Parser():
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
25 the_parser = argparse.ArgumentParser(
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
26 description="Generate DNA scaffold from blastn or tblastx alignment\
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
27 of Contigs")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
28 the_parser.add_argument('--sequences', action="store", type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
29 help="input sequence file in fasta format")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
30 the_parser.add_argument('--guideSequence', action="store", type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
31 help="the reference sequence to guide the scaffold\
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
32 assembly in fasta format")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
33 the_parser.add_argument('--blast-tab', dest="blast_tab", action="store",
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
34 type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
35 help="13-columns tabular blastn or tblastx output")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
36 the_parser.add_argument('--output', action="store", type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
37 help="output file path, fasta format")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
38 the_parser.add_argument('--scaffold_prefix', action="store", type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
39 help="the prefix that will be used for the header\
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
40 of the fasta scaffold")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
41 the_parser.add_argument('--scaffold_suffix', action="store", type=str,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
42 help="the sufix that will be used for the header\
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
43 of the fasta scaffold")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
44 args = the_parser.parse_args()
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
45 return args
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
46
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
47
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
48 def blatnInfo(file):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
49 blastlist = []
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
50 with open(file, "r") as f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
51 for line in f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
52 minilist = []
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
53 fields = line.rstrip().split()
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
54 minilist.append(fields[0])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
55 minilist.extend(fields[6:10])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
56 blastlist.append(minilist)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
57 blastlist.sort(key=lambda x: x[3], reverse=True)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
58 return blastlist
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
59
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
60
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
61 def myContigs(file):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
62 Contigs = {}
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
63 with open(file, "r") as f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
64 for line in f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
65 if line[0] == ">":
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
66 header = line[1:-1]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
67 Contigs[header] = ""
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
68 else:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
69 Contigs[header] += line[:-1]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
70 return Contigs
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
71
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
72
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
73 def myGuide(file):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
74 Guide = {}
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
75 coordinate = 0
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
76 with open(file, "r") as f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
77 for line in f:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
78 if line[0] == ">":
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
79 continue
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
80 else:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
81 for nucleotide in line[:-1]:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
82 coordinate += 1
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
83 Guide[coordinate] = nucleotide.lower()
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
84 return Guide
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
85
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
86
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
87 def updateGuide(blastlist, GuideDict, ContigsDict):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
88 '''
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
89 the blastlist object is a list of list with
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
90 element [0] : name of the blasted Contig
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
91 element [1] : queryStart of the alignment to the reference
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
92 element [2] = queryStop of the alignment to the reference
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
93 element [3] : subjectStart of the alignment to the reference
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
94 element [4] = subjectStop of the alignment to the reference
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
95 '''
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
96 for fields in blastlist:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
97 seqHeader = fields[0]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
98 queryStart = int(fields[1])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
99 queryStop = int(fields[2])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
100 subjectStart = int(fields[3])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
101 subjectStop = int(fields[4])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
102 if subjectStart > subjectStop:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
103 subjectStart, subjectStop = subjectStop, subjectStart
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
104 orientation = "reverse"
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
105 else:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
106 orientation = "direct"
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
107 sequence = getseq(ContigsDict, seqHeader, queryStart, queryStop,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
108 orientation)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
109 for i in range(subjectStart, subjectStop+1):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
110 try:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
111 del GuideDict[i]
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
112 except KeyError:
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
113 continue
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
114 for i, nucleotide in enumerate(sequence):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
115 GuideDict[i+subjectStart] = nucleotide
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
116
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
117
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
118 def finalAssembly(GuideDict, outputfile, prefix, suffix):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
119 finalSeqList = []
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
120 for keys in sorted(GuideDict):
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
121 finalSeqList.append(GuideDict[keys])
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
122 finalSequence = insert_newlines("".join(finalSeqList))
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
123 Out = open(outputfile, "w")
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
124 Out.write(">Scaffold_from_%s_guided_by_%s\n" % (prefix, suffix))
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
125 Out.write("%s\n" % finalSequence)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
126 Out.close()
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
127
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
128
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
129 def __main__():
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
130 args = Parser()
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
131 ContigsDict = myContigs(args.sequences)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
132 GuideDict = myGuide(args.guideSequence)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
133 blastlist = blatnInfo(args.blast_tab)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
134 updateGuide(blastlist, GuideDict, ContigsDict)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
135 finalAssembly(GuideDict, args.output, args.scaffold_prefix,
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
136 args.scaffold_suffix)
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
137
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
138
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
139 if __name__ == "__main__":
7d96b28eec49 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_to_scaffold commit 48a4098045106f363e92357949b32617a2e868c1
artbio
parents:
diff changeset
140 __main__()