annotate RaGOO/ragoo_utilities/get_contig_borders.py @ 13:b9a3aeb162ab draft default tip

Uploaded
author dereeper
date Mon, 26 Jul 2021 18:22:37 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
2 if __name__ == "__main__":
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
3 import argparse
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
4
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
5 parser = argparse.ArgumentParser(description='given and orderings file and a contigs fasta index, print a bed file of contig placements in the pseudomolecules.')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
6 parser.add_argument("orderings", metavar="<orderings.txt>", type=str, help="orderings file from RaGOO")
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
7 parser.add_argument("fai", metavar="<contigs.fasta.fai>", type=str, help="index file for contigs (samtools faidx contigs.fasta)")
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
8 parser.add_argument("gap_len", metavar="100", type=int, help="Gap size used for pseudomolecule padding.")
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
9
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
10 # Get the command line arguments
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
11 args = parser.parse_args()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
12 orderings_file = args.orderings
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
13 fai_file = args.fai
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
14 gap_len = args.gap_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
15
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
16 # Save the contig orderings
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
17 ctgs = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
18 with open(orderings_file, 'r') as f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
19 for line in f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
20 ctgs.append(line.rstrip().split('\t')[0])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
21
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
22 # Get contig lengths
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
23 ctg_lens = dict()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
24 with open(fai_file, 'r') as f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
25 for line in f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
26 L1 = line.split('\t')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
27 ctg_lens[L1[0]] = int(L1[1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
28
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
29 # Get contig borders
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
30 final_bed = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
31 current_pos = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
32
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
33 for ctg in ctgs:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
34 start = current_pos
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
35 end = current_pos + ctg_lens[ctg]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
36 current_pos += ctg_lens[ctg]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
37 current_pos += gap_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
38 pm_header = orderings_file[orderings_file.rfind('/')+1:orderings_file.rfind('_')] + '_RaGOO'
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
39 final_bed.append('%s\t%r\t%r' % (pm_header, start, end))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
40
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
41 print('\n'.join(final_bed))