Mercurial > repos > jjohnson > ensembl_variant_report
changeset 3:652d35c42bca draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit d3b7ff1c2f0a1eed7a65af7208e987a35627402c-dirty
author | jjohnson |
---|---|
date | Wed, 20 Mar 2019 15:27:02 -0400 |
parents | f87fe6bc48f4 |
children | 7fc91849ab21 |
files | ensembl_variant_report.py ensembl_variant_report.xml ensemblref.pyc |
diffstat | 3 files changed, 39 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/ensembl_variant_report.py Mon Mar 18 21:43:34 2019 -0400 +++ b/ensembl_variant_report.py Wed Mar 20 15:27:02 2019 -0400 @@ -116,6 +116,7 @@ qual = float(qual) dp = None dpr = None + ao = None af = None for info_item in info.split(';'): if info_item.find('=') < 0: continue @@ -124,6 +125,8 @@ dp = int(val) if key == 'DPR' or key == 'AD': dpr = [int(x) for x in val.split(',')] + if key == 'AO': + ao = [int(x) for x in val.split(',')] if key == 'AF': af = [float(x) for x in val.split(',')] if key in ['EFF','ANN']: @@ -135,11 +138,14 @@ (eff, effs) = effect.rstrip(')').split('(') (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] i = alt_list.index(alt) if alt in alt_list else 0 - if af: - freq = af[i] + if ao: + freq = float(ao[i])/float(dp) if dp else \ + float(dpr[i])/float(sum(ao)) elif dpr: freq = float(dpr[i+1])/float(dp) if dp else \ float(dpr[i+1])/float(sum(dpr)) + elif af: + freq = af[i] else: freq = None if freq:
--- a/ensembl_variant_report.xml Mon Mar 18 21:43:34 2019 -0400 +++ b/ensembl_variant_report.xml Wed Mar 20 15:27:02 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.2.0"> +<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.3.0"> <requirements> <requirement type="package" version="1.40">gtf_to_genes</requirement> <requirement type="package" version="3.1.4">twobitreader</requirement> @@ -97,7 +97,7 @@ <data name="output" format="tabular" > <actions> <action name="column_names" type="metadata" - default="Gene,Ref_location,Ref_seq,Var_seq,Frequency,DP,Ensemble_Gene_transcript,AA_pos,AA_var,Protein_len,Stop_Codon,Variant_Peptide,Transcipt_type"/> + default="Gene,Ref_location,Ref_seq,Var_seq,Frequency,DP,Ensemble_Gene_transcript,AA_pos,AA_var,Protein_len,Stop_Codon,Variant_Peptide,Transcript_type"/> </actions> </data> </outputs> @@ -179,18 +179,43 @@ </tests> <help><![CDATA[ Uses an Essembl GTF and a genome 2bit reference to report variant peptides from snpEff reported missense and frameshift variants. -Allows readthrough of stop codons, and reports the stop codons. +Allows readthrough of stop codons, and reports the stop codons. Translation readthrough is known to occur with some antibiotics. + +The variant peptides can be converted to a fasta file with text and fasta tools, then used as input to epitope binding prediction +applications such as netMHC or IEDB. + +**Input** Input can be a snpEff vcf file using either ANN or EFF annotations. + Alternatively, the input can be a tabular file that has columns: - - pos - - ref - - alt + - genomic_location + - reference_bases + - variant_bases - Ensembl Transcript ID - Read Depth (DP) - AlleleDepth (DPR) +**Output** + +Sample Output :: + + ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= ======================= + Gene Ref_location Ref_seq Var_seq Frequency DP Ensemble_Gene_transcript AA_pos AA_var Protein_len Stop_Codon Variant_Peptide Transcript_type + ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= ======================= + ACTL8 1:18149510 + G T 1.00 12 ENSG00000117148|ENST00000375406 3 A3S 367 G-TGA MA_S_RTVIIDHGSG protein_coding + BDH2 4:104013796 - A G 0.47 159 ENSG00000164039|ENST00000511354 70 N70S 91 c-tag TKKKQIDQFA_S_EVERLDVLFN nonsense_mediated_decay + CENPE 4:104061993 - G C 0.83 6 ENSG00000138778|ENST00000265148 1911 S1911T 2702 G-TAG LKLERDQLKE_T_LQETKARDLE protein_coding + CCHCR1 6:31110391 - C G 0.40 65 ENSG00000204536|ENST00000396268 865 S865C 872 C-TAA QGDNLDRCSS_C_NPQMSS* protein_coding + NPRL3 16:138772 - CT CCT 0.58 123 ENSG00000103148|ENST00000399953 489 S489L 569 A-TGA-C,C-TGA-G LGA*TRSHPQCTRSPEP* protein_coding + ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= ======================= + +The Variant_Peptide column: + - misense: prior amino acids _ variant amino acid _ following amino acids + - frameshift: variant amino acids with stop codons indicated by * + + ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btt385</citation>