Mercurial > repos > jjohnson > ensembl_variant_report
changeset 2:f87fe6bc48f4 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 6b6e5c13531bf909c4c70b7f8f9e28b4206d9068-dirty
author | jjohnson |
---|---|
date | Mon, 18 Mar 2019 21:43:34 -0400 |
parents | a67b4de184c2 |
children | 652d35c42bca |
files | ensembl_variant_report.py ensembl_variant_report.xml |
diffstat | 2 files changed, 24 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/ensembl_variant_report.py Thu Jun 14 18:08:15 2018 -0400 +++ b/ensembl_variant_report.py Mon Mar 18 21:43:34 2019 -0400 @@ -96,7 +96,8 @@ dp = int(fields[di]) dpr = [int(x) for x in fields[fi].split(',')] for i,alt in enumerate(alts.split(',')): - freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None + freq = float(dpr[i+1])/float(dp) if dp and dpr else \ + float(dpr[i+1])/float(sum(dpr)) if dpr else None yield (transcript,pos,ref,alt,dp,freq) def parse_snpeff_vcf(): @@ -115,13 +116,16 @@ qual = float(qual) dp = None dpr = None + af = None for info_item in info.split(';'): if info_item.find('=') < 0: continue (key, val) = info_item.split('=', 1) if key == 'DP': dp = int(val) - if key == 'DPR': + if key == 'DPR' or key == 'AD': dpr = [int(x) for x in val.split(',')] + if key == 'AF': + af = [float(x) for x in val.split(',')] if key in ['EFF','ANN']: for effect in val.split(','): if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|')) @@ -131,8 +135,15 @@ (eff, effs) = effect.rstrip(')').split('(') (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] i = alt_list.index(alt) if alt in alt_list else 0 - freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None - yield (transcript,pos,ref,alt,dp,freq) + if af: + freq = af[i] + elif dpr: + freq = float(dpr[i+1])/float(dp) if dp else \ + float(dpr[i+1])/float(sum(dpr)) + else: + freq = None + if freq: + yield (transcript,pos,ref,alt,dp,freq) #Process gene model @@ -179,6 +190,8 @@ alt_seq = alt if tx.gene.strand else reverse_complement(alt) ref_seq = ref if tx.gene.strand else reverse_complement(ref) cds_pos = ens_ref.genome_to_cds_pos(tid, spos) + if cds_pos is None: + continue if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos)) alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' offset = 0
--- a/ensembl_variant_report.xml Thu Jun 14 18:08:15 2018 -0400 +++ b/ensembl_variant_report.xml Mon Mar 18 21:43:34 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.1.0"> +<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.2.0"> <requirements> <requirement type="package" version="1.40">gtf_to_genes</requirement> <requirement type="package" version="3.1.4">twobitreader</requirement> @@ -94,7 +94,12 @@ </inputs> <outputs> - <data name="output" format="tabular" /> + <data name="output" format="tabular" > + <actions> + <action name="column_names" type="metadata" + default="Gene,Ref_location,Ref_seq,Var_seq,Frequency,DP,Ensemble_Gene_transcript,AA_pos,AA_var,Protein_len,Stop_Codon,Variant_Peptide,Transcipt_type"/> + </actions> + </data> </outputs> <tests> <test>