view snpEff_create_db.xml @ 13:f0ee2b470481 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 0310c734a944901fffb975937b20f55bfc1d9f46
author iuc
date Sat, 07 Apr 2018 15:46:09 -0400
parents 5a29ab10dba6
children 479c4f2f4826
line wrap: on
line source

<tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.galaxy2">
    <description> database from Genbank record</description>
    <macros>
        <import>snpEff_macros.xml</import>
    </macros>
    <requirements>
        <expand macro="requirement" />
        <requirement type="package" version="3.6">python</requirement>
        <requirement type="package" version="1.70">biopython</requirement>
        <requirement type="package" version="0.4.15">python-magic</requirement>
        <requirement type="package" version="5.32">libmagic</requirement>
    </requirements>
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command><![CDATA[

        #if str( $fasta.fasta_selector ) == "yes":
            python3 '$__tool_directory__/gbk2fa.py' '${input_gbk}' '${output_fasta}'
            #if $fasta.remove_version:
                '${fasta.remove_version}'
            #end if
            &&
        #end if

        mkdir -p '${snpeff_output.files_path}'/'${genome_version}' &&

        ln -s '${input_gbk}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gbk &&

        snpEff @java_options@ build -v
        -configOption '${genome_version}'.genome='${genome_version}'
        -configOption '${genome_version}'.codonTable='${codon_table}'
        -genbank -dataDir '${snpeff_output.files_path}' '${genome_version}' &&
        echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config &&
        echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config

    ]]></command>
    <inputs>
        <param name="genome_version" type="text" value="" label="Name for the database" help="for E. coli K12 you may want to use 'EcK12' etc.">
            <validator type="regex" message="A genome version name is required">\S+</validator>
        </param>
        <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/>
        <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">
            <option selected="true" value="Standard">Standard</option>
            <option value="Vertebrate_Mitochondrial">Vertebrate_Mitochondrial</option>
            <option value="Yeast_Mitochondrial">Yeast_Mitochondrial</option>
            <option value="Mold_Mitochondrial">Mold_Mitochondrial</option>
            <option value="Protozoan_Mitochondrial">Protozoan_Mitochondrial</option>
            <option value="Coelenterate">Coelenterate</option>
            <option value="Mitochondrial">Mitochondrial</option>
            <option value="Mycoplasma">Mycoplasma</option>
            <option value="Spiroplasma">Spiroplasma</option>
            <option value="Invertebrate_Mitochondrial">Invertebrate_Mitochondrial</option>
            <option value="Ciliate_Nuclear">Ciliate_Nuclear</option>
            <option value="Dasycladacean_Nuclear">Dasycladacean_Nuclear</option>
            <option value="Hexamita_Nuclear">Hexamita_Nuclear</option>
            <option value="Echinoderm_Mitochondrial">Echinoderm_Mitochondrial</option>
            <option value="Flatworm_Mitochondrial">Flatworm_Mitochondrial</option>
            <option value="Euplotid_Nuclear">Euplotid_Nuclear</option>
            <option value="Bacterial_and_Plant_Plastid">Bacterial_and_Plant_Plastid</option>
            <option value="Alternative_Yeast_Nuclear">Alternative_Yeast_Nuclear</option>
            <option value="Ascidian_Mitochondrial">Ascidian_Mitochondrial</option>
            <option value="Alternative_Flatworm_Mitochondrial">Alternative_Flatworm_Mitochondrial</option>
            <option value="Blepharisma_Macronuclear">Blepharisma_Macronuclear</option>
            <option value="Chlorophycean_Mitochondrial">Chlorophycean_Mitochondrial</option>
            <option value="Trematode_Mitochondrial">Trematode_Mitochondrial</option>
            <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option>
            <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option>
        </param>
        <conditional name="fasta">
            <param name="fasta_selector" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format">
                <option value="yes" selected="true">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/>
            </when>
            <when value="no"/>
        </conditional>
    </inputs>
    <outputs>
        <data name="snpeff_output" format="snpeffdb" label="@snpeff_version@ database for ${genome_version}"/>
        <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}">
            <filter>fasta['fasta_selector'] == 'yes'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="genome_version" value="pBR322"/>
            <param name="input_gbk" value="pBR322.gbk" />
            <output name="snpeff_output">
                <assert_contents>
                    <has_text text="pBR322" />
                </assert_contents>
            </output>
            <output name="output_fasta" value="pBR322.fna"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool uses `"snpEff build -genbank"` command to create a snpEff database from a Genbank dataset. If **Parse Genbank into Fasta** is selected (the default behavior) the tool will also convert Genbank dataset into a single FASTA dataset.


Using Genbank data for creating databases has several advantages:

 #. Genbank files contains annotations (such as locations of genes) together with sequences. This was one ensures that these two are in sync with each other
 #. When you are analyzing small genomes it is much more convenient to create a database on the fly and use it.

-------

.. class:: infomark

**The usage scenario**

Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps:

 #. Download genome from `NCBI <https://www.ncbi.nlm.nih.gov>`_ into Galaxy.
 #. Use this tool to generate a snpEff database and FASTA sequences from the file you downloaded at step 1.
 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM.
 #. Call variants using **Freebayes**.
 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter).

In this scenario Genbank dataset is used twice. First, it is used to produce FASTA sequences that are using by BWA to map against. Second, it is used to create snpEff database. This guarantees that you will not have any issues related to reference sequence naming.

@snpeff_in_galaxy_info@
@external_documentation@
]]>
    </help>
    <expand macro="citations" />
</tool>