comparison vcf_gff.py @ 5:b321e0517be3 draft

Uploaded
author ben-warren
date Thu, 22 May 2014 20:30:19 -0400
parents 402c3f0fe807
children
comparison
equal deleted inserted replaced
4:be070a68521e 5:b321e0517be3
44 rec = rec[1:] 44 rec = rec[1:]
45 else: 45 else:
46 record_type = "SNP" 46 record_type = "SNP"
47 for entry in rec: 47 for entry in rec:
48 detail = entry.split("=") 48 detail = entry.split("=")
49 if len(detail) < 2:
50 continue
49 INFO[detail[0]] = detail[1] 51 INFO[detail[0]] = detail[1]
50 if INFO.has_key("DP"): 52 if INFO.has_key("DP"):
51 reads = INFO.get("DP") 53 reads = INFO.get("DP")
52 else: 54 else:
53 reads = "NA" 55 reads = "NA"
106 gen = "HOM_mut" 108 gen = "HOM_mut"
107 if genotypes == "0/1": 109 if genotypes == "0/1":
108 gen = "HET" 110 gen = "HET"
109 if genotypes == "0/0": 111 if genotypes == "0/0":
110 gen = "HOM_ref" 112 gen = "HOM_ref"
111 else: 113 try: # set gen to 'NA' if still unset
114 gen
115 except NameError:
112 gen = "NA" 116 gen = "NA"
113 geno = ("%s:%s " % (reads, gen)) 117 geno = ("%s:%s " % (reads, gen))
114 genos += geno 118 genos += geno
115 sample_dict = {} 119 sample_dict = {}
116 return genos 120 return genos
139 end = start + len(reference) 143 end = start + len(reference)
140 else: 144 else:
141 end = start 145 end = start
142 gen = get_gen(info[8:], reference) 146 gen = get_gen(info[8:], reference)
143 out_gff_file.write( 147 out_gff_file.write(
144 ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\tID=%s:%s:%d;Variant" + 148 ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\tID=%s:%s:%s:%d;Variant" +
145 "_seq=%s;Reference_seq=%s;Total_reads=%s;Zygosity=%s\n") % 149 "_seq=%s;Reference_seq=%s;Total_reads=%s;Zygosity=%s\n") %
146 ( seqid, source,record_type, start, end, score, strand, phase,seqid, 150 ( seqid, source,record_type, start, end, score, strand, phase,seqid,
147 record_type, start, variant, reference, reads, gen)) 151 source, record_type, start, variant, reference, reads, gen))
148 152
149 out_gff_file.close() 153 out_gff_file.close()
150 154
151 155