Mercurial > repos > dereeper > ragoo
view RaGOO/Assemblytics_within_alignment.py @ 13:b9a3aeb162ab draft default tip
Uploaded
author | dereeper |
---|---|
date | Mon, 26 Jul 2021 18:22:37 +0000 |
parents | |
children |
line wrap: on
line source
#! /usr/bin/env python import argparse import gzip # Author: Maria Nattestad # Email: mnattest@cshl.edu # This script is part of Assemblytics, a program to detect and analyze structural variants from an assembly aligned to a reference genome using MUMmer. def run(args): filename = args.delta minimum_variant_size = args.minimum_variant_size f = open(filename) header1 = f.readline() if header1[0:2]=="\x1f\x8b": f.close() f = gzip.open(filename) header1 = f.readline() # Ignore the first two lines for now f.readline() linecounter = 0 current_reference_name = "" current_reference_position = 0 current_query_name = "" current_query_position = 0 variants = [] for line in f: if line[0]==">": # linecounter += 1 # if linecounter > 1: # break fields = line.strip().split() current_reference_name = fields[0][1:] current_query_name = fields[1] else: fields = line.strip().split() if len(fields) > 4: # current_reference_position = int(fields[0]) current_reference_position = min(int(fields[0]),int(fields[1])) # fields[1] is the reference position at the end of the alignment # current_query_position = int(fields[2]) current_query_position = min(int(fields[2]),int(fields[3])) # fields[3] is the query position at the end of the alignment else: tick = int(fields[0]) if abs(tick) == 1: # then go back and edit the last entry to add 1 more to its size report = variants[-1] report[4] = report[4] + 1 # size if tick > 0: # deletion, moves in reference report[2] = report[2] + 1 # reference end position report[7] = report[7] + 1 # reference gap size current_reference_position += 1 # update reference position after deletion elif tick < 0: # insertion, moves in query report[8] = report[8] + 1 # query gap size report[12] = report[12] + 1 # query end position current_query_position += 1 # update query position after insertion else: # report the last one and continue current_reference_position += abs(tick) - 1 current_query_position += abs(tick) - 1 if tick > 0: size = 1 # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (current_reference_name,current_reference_position,current_reference_position+size,len(variants)+1,size,"+","Deletion",size,0,current_query_name,"within_alignment") report = [current_reference_name,current_reference_position,current_reference_position+size,"Assemblytics_w_"+str(len(variants)+1),size,"+","Deletion",size,0,current_query_name,"within_alignment",current_query_position,current_query_position] current_reference_position += size # update reference position after deletion variants.append(report) elif tick < 0: size = 1 # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % (current_reference_name,current_reference_position,current_reference_position,len(variants)+1,size,"+","Insertion",0,size,current_query_name,"within_alignment") report = [current_reference_name,current_reference_position,current_reference_position,"Assemblytics_w_"+str(len(variants)+1),size,"+","Insertion",0,size,current_query_name,"within_alignment",current_query_position,current_query_position+size] current_query_position += size # update query position after insertion variants.append(report) # TESTING # print line, report f.close() newcounter = 1 for line in variants: # report = "%s\t%d\t%d\tAssemblytics_%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s\n" % line if line[4] >= minimum_variant_size: line[3] = "Assemblytics_w_%d" % (newcounter) print ("\t".join(map(str,line[0:10])) + ":" + str(line[11]) + "-" + str(line[12]) + ":+\t" + line[10]) # print "\t".join(map(str,line)) newcounter += 1 def main(): parser=argparse.ArgumentParser(description="Outputs MUMmer coordinates annotated with length of unique sequence for each alignment") parser.add_argument("--delta",help="delta file" ,dest="delta", type=str, required=True) parser.add_argument("--min",help="Minimum size (bp) of variant to include, default = 50" ,dest="minimum_variant_size",type=int, default=50) parser.set_defaults(func=run) args=parser.parse_args() args.func(args) if __name__=="__main__": main()