comparison vcf_gff.py @ 3:402c3f0fe807 draft

Uploaded revised vcf_gff.py from Github to fix bug
author john-mccallum
date Thu, 18 Oct 2012 17:54:38 -0400
parents a0689dc29b7f
children b321e0517be3
comparison
equal deleted inserted replaced
2:ea2117a7b363 3:402c3f0fe807
108 gen = "HET" 108 gen = "HET"
109 if genotypes == "0/0": 109 if genotypes == "0/0":
110 gen = "HOM_ref" 110 gen = "HOM_ref"
111 else: 111 else:
112 gen = "NA" 112 gen = "NA"
113 geno = ("%s:%s;" % (reads, gen)) 113 geno = ("%s:%s " % (reads, gen))
114 genos += geno 114 genos += geno
115 sample_dict = {} 115 sample_dict = {}
116 return genos 116 return genos
117 117
118 attributes = {} 118 attributes = {}
119 """ 119 """
120 Get relevant info from vcf file and put to proper gff columns 120 Get relevant info from vcf file and put to proper gff columns
121 """ 121 """
122 122
123 out_gff_file.write("#gff-version 3\n")
123 for line in in_vcf_file: 124 for line in in_vcf_file:
124 if line.startswith("#") == False: 125 if line.startswith("#") == False:
125 info = line.split() 126 info = line.split()
126 seqid = info[0].strip() 127 seqid = info[0].strip()
127 source = "SAMTOOLS" 128 source = "SAMTOOLS"
139 else: 140 else:
140 end = start 141 end = start
141 gen = get_gen(info[8:], reference) 142 gen = get_gen(info[8:], reference)
142 out_gff_file.write( 143 out_gff_file.write(
143 ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\tID=%s:%s:%d;Variant" + 144 ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\tID=%s:%s:%d;Variant" +
144 "_seq=%s;Reference_seq=%s;Total_reads=%s:Zygosity=%s\n") % 145 "_seq=%s;Reference_seq=%s;Total_reads=%s;Zygosity=%s\n") %
145 ( seqid, source,record_type, start, end, score, strand, phase,seqid, 146 ( seqid, source,record_type, start, end, score, strand, phase,seqid,
146 record_type, start, variant, reference, reads, gen)) 147 record_type, start, variant, reference, reads, gen))
147 148
148 out_gff_file.close() 149 out_gff_file.close()
149 150