comparison RaGOO/ragoo_utilities/get_contig_borders.py @ 13:b9a3aeb162ab draft default tip

Uploaded
author dereeper
date Mon, 26 Jul 2021 18:22:37 +0000
parents
children
comparison
equal deleted inserted replaced
12:68a9ec9ce51e 13:b9a3aeb162ab
1
2 if __name__ == "__main__":
3 import argparse
4
5 parser = argparse.ArgumentParser(description='given and orderings file and a contigs fasta index, print a bed file of contig placements in the pseudomolecules.')
6 parser.add_argument("orderings", metavar="<orderings.txt>", type=str, help="orderings file from RaGOO")
7 parser.add_argument("fai", metavar="<contigs.fasta.fai>", type=str, help="index file for contigs (samtools faidx contigs.fasta)")
8 parser.add_argument("gap_len", metavar="100", type=int, help="Gap size used for pseudomolecule padding.")
9
10 # Get the command line arguments
11 args = parser.parse_args()
12 orderings_file = args.orderings
13 fai_file = args.fai
14 gap_len = args.gap_len
15
16 # Save the contig orderings
17 ctgs = []
18 with open(orderings_file, 'r') as f:
19 for line in f:
20 ctgs.append(line.rstrip().split('\t')[0])
21
22 # Get contig lengths
23 ctg_lens = dict()
24 with open(fai_file, 'r') as f:
25 for line in f:
26 L1 = line.split('\t')
27 ctg_lens[L1[0]] = int(L1[1])
28
29 # Get contig borders
30 final_bed = []
31 current_pos = 0
32
33 for ctg in ctgs:
34 start = current_pos
35 end = current_pos + ctg_lens[ctg]
36 current_pos += ctg_lens[ctg]
37 current_pos += gap_len
38 pm_header = orderings_file[orderings_file.rfind('/')+1:orderings_file.rfind('_')] + '_RaGOO'
39 final_bed.append('%s\t%r\t%r' % (pm_header, start, end))
40
41 print('\n'.join(final_bed))