comparison snpEff_create_db.xml @ 10:5b4ac70948d2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
author iuc
date Tue, 27 Mar 2018 09:44:18 -0400
parents
children bfa6c1b8a03c
comparison
equal deleted inserted replaced
9:68693743661e 10:5b4ac70948d2
1 <tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.1">
2 <description> database from Genbank record</description>
3 <macros>
4 <import>snpEff_macros.xml</import>
5 </macros>
6 <requirements>
7 <expand macro="requirement" />
8 <requirement type="package" version="3.6">python</requirement>
9 <requirement type="package" version="1.70">biopython</requirement>
10 <requirement type="package" version="0.4.15">python-magic</requirement>
11 <requirement type="package" version="5.32">libmagic</requirement>
12 </requirements>
13 <expand macro="stdio" />
14 <expand macro="version_command" />
15 <command><![CDATA[
16
17 #if str( $fasta.fasta_selector ) == "yes":
18 python3 '$__tool_directory__/gbk2fa.py' '${input_gbk}' '${output_fasta}'
19 #if $fasta.remove_version:
20 '${fasta.remove_version}'
21 #end if
22 &&
23 #end if
24
25 mkdir -p '${snpeff_output.files_path}'/'${genome_version}' &&
26
27 ln -s '${input_gbk}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gbk &&
28
29 snpEff @java_options@ build -v
30 -configOption '${genome_version}'.genome='${genome_version}'
31 -configOption '${genome_version}'.codonTable='${codon_table}'
32 -genbank -dataDir '$snpeff_output.files_path' '$genome_version'
33
34 ]]></command>
35 <inputs>
36 <param name="genome_version" type="text" value="" label="Name for the database" help="for E. coli K12 you may want to use 'EcK12' etc.">
37 <validator type="regex" message="A genome version name is required">\S+</validator>
38 </param>
39 <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/>
40 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">
41 <option selected="true" value="Standard">Standard</option>
42 <option value="Vertebrate_Mitochondrial">Vertebrate_Mitochondrial</option>
43 <option value="Yeast_Mitochondrial">Yeast_Mitochondrial</option>
44 <option value="Mold_Mitochondrial">Mold_Mitochondrial</option>
45 <option value="Protozoan_Mitochondrial">Protozoan_Mitochondrial</option>
46 <option value="Coelenterate">Coelenterate</option>
47 <option value="Mitochondrial">Mitochondrial</option>
48 <option value="Mycoplasma">Mycoplasma</option>
49 <option value="Spiroplasma">Spiroplasma</option>
50 <option value="Invertebrate_Mitochondrial">Invertebrate_Mitochondrial</option>
51 <option value="Ciliate_Nuclear">Ciliate_Nuclear</option>
52 <option value="Dasycladacean_Nuclear">Dasycladacean_Nuclear</option>
53 <option value="Hexamita_Nuclear">Hexamita_Nuclear</option>
54 <option value="Echinoderm_Mitochondrial">Echinoderm_Mitochondrial</option>
55 <option value="Flatworm_Mitochondrial">Flatworm_Mitochondrial</option>
56 <option value="Euplotid_Nuclear">Euplotid_Nuclear</option>
57 <option value="Bacterial_and_Plant_Plastid">Bacterial_and_Plant_Plastid</option>
58 <option value="Alternative_Yeast_Nuclear">Alternative_Yeast_Nuclear</option>
59 <option value="Ascidian_Mitochondrial">Ascidian_Mitochondrial</option>
60 <option value="Alternative_Flatworm_Mitochondrial">Alternative_Flatworm_Mitochondrial</option>
61 <option value="Blepharisma_Macronuclear">Blepharisma_Macronuclear</option>
62 <option value="Chlorophycean_Mitochondrial">Chlorophycean_Mitochondrial</option>
63 <option value="Trematode_Mitochondrial">Trematode_Mitochondrial</option>
64 <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option>
65 <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option>
66 </param>
67 <conditional name="fasta">
68 <param name="fasta_selector" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format">
69 <option value="yes" selected="true">Yes</option>
70 <option value="no">No</option>
71 </param>
72 <when value="yes">
73 <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/>
74 </when>
75 <when value="no"/>
76 </conditional>
77 </inputs>
78 <outputs>
79 <data name="snpeff_output" format="snpeffdb" label="@snpeff_version@ database for ${genome_version}"/>
80 <data name="output_fasta" format="fasta" label="Fasta sequecnes for ${genome_version}">
81 <filter>fasta['fasta_selector'] == 'yes'</filter>
82 </data>
83 </outputs>
84 <tests>
85 <test>
86 <param name="genome_version" value="pBR322"/>
87 <param name="input_gbk" value="pBR322.gbk" />
88 <output name="snpeff_output">
89 <assert_contents>
90 <has_text text="pBR322" />
91 </assert_contents>
92 </output>
93 <output name="output_fasta" value="pBR322.fna"/>
94 </test>
95 </tests>
96 <help><![CDATA[
97 **What it does**
98
99 This tool uses `"snpEff build -genbank"` command to create a snpEff database from a Genbank dataset. If **Parse Genbank into Fasta** is selected (the default behavior) the tool will also convert Genbank dataset into a single FASTA dataset.
100
101
102 Using Genbank data for creating databases has several advantages:
103
104 #. Genbank files contains annotations (such as locations of genes) together with sequences. This was one ensures that these two are in sync with each other
105 #. When you are analyzing small genomes it is much more convenient to create a database on the fly and use it.
106
107 -------
108
109 .. class:: infomark
110
111 **The usage scenario**
112
113 Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps:
114
115 #. Download genome from `NCBI <https://www.ncbi.nlm.nih.gov>`_ into Galaxy.
116 #. Use this tool to generate a snpEff database and FASTA sequences from the file you downloaded at step 1.
117 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM.
118 #. Call variants using **Freebayes**.
119 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter).
120
121 In this scenario Genbank dataset is used twice. First, it is used to produce FASTA sequences that are using by BWA to map against. Second, it is used to create snpEff database. This guarantees that you will not have any issues related to reference sequence naming.
122
123 @snpeff_in_galaxy_info@
124 @external_documentation@
125 ]]>
126 </help>
127 <expand macro="citations" />
128 </tool>