view ensembl_variant_report.xml @ 4:7fc91849ab21 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 4078a495c5569e9055b4eba33004fe609ee42aff"
author jjohnson
date Sat, 25 Jan 2020 15:22:46 -0500
parents 652d35c42bca
children
line wrap: on
line source

<tool id="ensembl_variant_report" name="Ensembl Variant Report" version="0.4.0">
    <requirements>
        <requirement type="package" version="1.40">gtf_to_genes</requirement>
        <requirement type="package" version="3.1.7">twobitreader</requirement>
        <requirement type="package" version="1.70">biopython</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
         python '$__tool_directory__/ensembl_variant_report.py'
         #if $ref.ref_source == 'cached':
             --twobit="$ref.ref_loc.fields.path"
         #else
             --twobit="$ref.ref_file"
         #end if
         --gene_model="$gtf_file"
         #if $variant.fmt == 'vcf':
             --input="$variant.input_vcf"
             --format=snpeff
         #else
             --input="$variant.input_tsv"
            --pos_column=$variant.pos_column
            --ref_column=$variant.ref_column
            --alt_column=$variant.alt_column
            --transcript_column=$variant.transcript_column
            --dp_column=$variant.dp_column
            --dpr_column=$variant.dpr_column
         #end if
         #if str($filter.min_depth) != '':
             --min_depth=$filter.min_depth
         #end if
         #if str($filter.min_freq) != '':
             --min_freq=$filter.min_freq
         #end if
         #if str($report.readthrough) != '':
             --readthrough=$report.readthrough
         #end if
         #if str($report.leading_aa) != '':
             --leading_aa=$report.leading_aa
         #end if
         #if str($report.trailing_aa) != '':
             --trailing_aa=$report.trailing_aa
         #end if
         --output="$output"
    ]]></command>
    <inputs>
        <conditional name="variant">
            <param name="fmt" type="select" label="Input format for variants">
                <option value="vcf">snpEff vcf</option>
                <option value="tsv">tabular from snpsift extract</option>
            </param>
            <when value="vcf">
                <param name="input_vcf" type="data" format="vcf" label="snpEff VCF file"/>
            </when>
            <when value="tsv">
                <param name="input_tsv" type="data" format="tabular" label="tabular file"/>
                <param name="pos_column" type="data_column" data_ref="input_tsv" label="POS column"/>
                <param name="ref_column" type="data_column" data_ref="input_tsv" label="REF column"/>
                <param name="alt_column" type="data_column" data_ref="input_tsv" label="ALT column"/>
                <param name="transcript_column" type="data_column" data_ref="input_tsv" label="Transcript ID column"/>
                <param name="dp_column" type="data_column" data_ref="input_tsv" label="Read Depth (DP) column"/>
                <param name="dpr_column" type="data_column" data_ref="input_tsv" label="Allele Count (DPR ro AN) column"/>
            </when>
        </conditional>
        <conditional name="ref">
            <param name="ref_source" type="select" label="Source for Genomic Data">
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param name="ref_loc" type="select" label="Select reference 2bit file">
                    <options from_data_table="twobit" />
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="twobit" label="reference 2bit file" />
            </when>
        </conditional>
        <param name="gtf_file" type="data" format="gtf" label="Ensembl GTF file"/>
        <section name="filter" expanded="false" title="Filter Options">
            <param name="min_depth" type="integer" value="" optional="true" min="0" label="Minimum Read Depth to report" 
                   help="If a value is entered, ignore variants with DP INFO field less than this value"/>
            <param name="min_freq" type="float" value="" optional="true" min="0.0" max="1.0" label="Minimum Alt frequency to report"
                   help="If a value is entered, ignore variant alleles where its DPR count divided the sum of DPR is less than this value"/>
        </section>
        <section name="report" expanded="false" title="Report Options">
            <param name="readthrough" type="integer" value="0" optional="true" min="0" max="4" label="Number of readthrough stop codons to display"/>
            <param name="leading_aa" type="integer" value="10" optional="true" min="0" label="Number of Amino Acids prior to variant to display"
                   help="Ignored for frame shifts"/>
            <param name="trailing_aa" type="integer" value="10" optional="true" min="0" label="Number of Amino Acids following the variant to display"
                   help="Ignored for frame shifts"/>
        </section>
        
    </inputs>
    <outputs>
        <data name="output" format="tabular" >
            <actions>
                <action name="column_names" type="metadata" 
                 default="Gene,Ref_location,Ref_seq,Var_seq,Frequency,DP,Ensemble_Gene_transcript,AA_pos,AA_var,Protein_len,Stop_Codon,Variant_Peptide,Transcript_type"/>
            </actions>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_EFF.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="readthrough" value="1"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="vcf"/>
            <param name="input_vcf" value="GRCh38_X400k_ANN.vcf" ftype="vcf"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="min_depth" value="100"/>
            <param name="min_freq" value=".80"/>
            <output name="output">
                <assert_contents>
                    <has_text text="EQLDAGVRYL_E_LRIAHMLEGS" />
                    <not_has_text text="EYDILVAEET_V_GEPWEDGFEA" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="fmt" value="tsv"/>
            <param name="input_tsv" value="GRCh38_X400k.tsv" ftype="tabular"/>
            <param name="ref_source" value="history"/>
            <param name="ref_file" value="GRCh38_X.2bit" ftype="twobit"/>
            <param name="gtf_file" value="GRCh38_X.83.gtf" ftype="gtf"/>
            <param name="pos_column" value="2"/>
            <param name="ref_column" value="3"/>
            <param name="alt_column" value="4"/>
            <param name="transcript_column" value="7"/>
            <param name="dp_column" value="8"/>
            <param name="dpr_column" value="9"/>
            <output name="output">
                <assert_contents>
                    <has_text text="MGGQ_A_SASNSFSRLH" />
                    <not_has_text text="*REDHAAGPEA" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Uses an Essembl GTF and a genome 2bit reference to report variant peptides from snpEff reported missense and frameshift variants.
Allows readthrough of stop codons, and reports the stop codons.  Translation readthrough is known to occur with some antibiotics.

The variant peptides can be converted to a fasta file with text and fasta tools, then used as input to epitope binding prediction 
applications such as netMHC or IEDB.

**Input**

Input can be a snpEff vcf file using either ANN or EFF annotations.

Alternatively, the input can be a tabular file that has columns:

  - genomic_location
  - reference_bases
  - variant_bases
  - Ensembl Transcript ID
  - Read Depth (DP)
  - AlleleDepth (DPR)

**Output**

Sample Output ::

  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================
  Gene   Ref_location  Ref_seq Var_seq Frequency DP  Ensemble_Gene_transcript        AA_pos AA_var Protein_len Stop_Codon      Variant_Peptide         Transcript_type
  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================
  ACTL8  1:18149510 +  G       T       1.00      12  ENSG00000117148|ENST00000375406 3      A3S    367         G-TGA           MA_S_RTVIIDHGSG         protein_coding
  BDH2   4:104013796 - A       G       0.47      159 ENSG00000164039|ENST00000511354 70     N70S   91          c-tag           TKKKQIDQFA_S_EVERLDVLFN nonsense_mediated_decay
  CENPE  4:104061993 - G       C       0.83      6   ENSG00000138778|ENST00000265148 1911   S1911T 2702        G-TAG           LKLERDQLKE_T_LQETKARDLE protein_coding
  CCHCR1 6:31110391 -  C       G       0.40      65  ENSG00000204536|ENST00000396268 865    S865C  872         C-TAA           QGDNLDRCSS_C_NPQMSS*    protein_coding
  NPRL3  16:138772 -   CT      CCT     0.58      123 ENSG00000103148|ENST00000399953 489    S489L  569         A-TGA-C,C-TGA-G LGA*TRSHPQCTRSPEP*      protein_coding
  ====== ============= ======= ======= ========= === =============================== ====== ====== =========== =============== ======================= =======================

The Variant_Peptide column:
  - misense:  prior amino acids _ variant amino acid _ following amino acids
  - frameshift: variant amino acids with stop codons indicated by *


    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btt385</citation>
    </citations>
</tool>