Mercurial > repos > dereeper > ragoo
diff RaGOO/ragoo_utilities/get_contig_borders.py @ 13:b9a3aeb162ab draft default tip
Uploaded
author | dereeper |
---|---|
date | Mon, 26 Jul 2021 18:22:37 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RaGOO/ragoo_utilities/get_contig_borders.py Mon Jul 26 18:22:37 2021 +0000 @@ -0,0 +1,41 @@ + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='given and orderings file and a contigs fasta index, print a bed file of contig placements in the pseudomolecules.') + parser.add_argument("orderings", metavar="<orderings.txt>", type=str, help="orderings file from RaGOO") + parser.add_argument("fai", metavar="<contigs.fasta.fai>", type=str, help="index file for contigs (samtools faidx contigs.fasta)") + parser.add_argument("gap_len", metavar="100", type=int, help="Gap size used for pseudomolecule padding.") + + # Get the command line arguments + args = parser.parse_args() + orderings_file = args.orderings + fai_file = args.fai + gap_len = args.gap_len + + # Save the contig orderings + ctgs = [] + with open(orderings_file, 'r') as f: + for line in f: + ctgs.append(line.rstrip().split('\t')[0]) + + # Get contig lengths + ctg_lens = dict() + with open(fai_file, 'r') as f: + for line in f: + L1 = line.split('\t') + ctg_lens[L1[0]] = int(L1[1]) + + # Get contig borders + final_bed = [] + current_pos = 0 + + for ctg in ctgs: + start = current_pos + end = current_pos + ctg_lens[ctg] + current_pos += ctg_lens[ctg] + current_pos += gap_len + pm_header = orderings_file[orderings_file.rfind('/')+1:orderings_file.rfind('_')] + '_RaGOO' + final_bed.append('%s\t%r\t%r' % (pm_header, start, end)) + + print('\n'.join(final_bed)) \ No newline at end of file