Mercurial > repos > artbio > lumpy_smoove
view vcf2hrdetect.py @ 6:ad8853ee9909 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
author | artbio |
---|---|
date | Sun, 14 Feb 2021 23:59:42 +0000 |
parents | |
children | 7dcf61950215 |
line wrap: on
line source
import sys handle = open(sys.argv[1], 'r') vcfdict = dict() tabdict = dict() for line in handle: if line[0] == "#": continue else: tabline = line[:-1].split("\t") vcfdict[tabline[2]] = tabline for id in vcfdict.keys(): if "_1" in id: newid = id[:-2] pointbreak = vcfdict[id][4] if "]" in pointbreak: coordbreak = pointbreak.split("]")[1].split(":")[1] chrom = pointbreak.split("]")[1].split(":")[0] elif "[" in pointbreak: coordbreak = pointbreak.split("[")[1].split(":")[1] chrom = pointbreak.split("[")[1].split(":")[0] if vcfdict[id][0] == chrom: tabdict[newid] = [chrom, vcfdict[id][1], chrom, coordbreak, "INV"] else: tabdict[newid] = [vcfdict[id][0], vcfdict[id][1], chrom, coordbreak, "TRA"] for id in list(vcfdict): if "_" in id: del(vcfdict[id]) for id in vcfdict.keys(): # only sv that are not of type TRA or INV chr1 = vcfdict[id][0] chr2 = vcfdict[id][0] pos1 = vcfdict[id][1] pos2 = vcfdict[id][7].split("END=")[1].split(";")[0] type = vcfdict[id][7].split("SVTYPE=")[1].split(";")[0] tabdict[id] = [chr1, pos1, chr2, pos2, type] out = open(sys.argv[2], 'w') out.write("chr1\tpos1\tchr2\tpos2\ttype\n") for key in tabdict: line = "\t".join(tabdict[key]) + "\n" out.write(line)