comparison snpEff_create_db.xml @ 17:65ae79bddc69 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 5ab504d384299d8c2ed496650f1f9e4a887cd102
author iuc
date Thu, 06 Sep 2018 13:23:57 -0400
parents 479c4f2f4826
children de67e5082c48
comparison
equal deleted inserted replaced
16:c9ecd2a96ecf 17:65ae79bddc69
1 <tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.galaxy3"> 1 <tool id="snpEff_build_gb" name="SnpEff build:" version="@WRAPPER_VERSION@.galaxy3">
2 <description> database from Genbank or GFF record</description> 2 <description> database from Genbank or GFF record</description>
3 <macros> 3 <macros>
4 <import>snpEff_macros.xml</import> 4 <import>snpEff_macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
11 <requirement type="package" version="5.32">libmagic</requirement> 11 <requirement type="package" version="5.32">libmagic</requirement>
12 </requirements> 12 </requirements>
13 <expand macro="stdio" /> 13 <expand macro="stdio" />
14 <expand macro="version_command" /> 14 <expand macro="version_command" />
15 <command><![CDATA[ 15 <command><![CDATA[
16
17 #if str( $input_type.input_type_selector ) == "gb": 16 #if str( $input_type.input_type_selector ) == "gb":
18 #if str( $input_type.fasta ) == "yes": 17 #if str( $input_type.fasta ) == "yes":
19 python3 '$__tool_directory__/gbk2fa.py' '${input_type.input_gbk}' '${output_fasta}' 18 python3 '$__tool_directory__/gbk2fa.py' '${input_type.input_gbk}' '${output_fasta}'
20 #if $input_type.remove_version: 19 #if $input_type.remove_version:
21 '${input_type.remove_version}' 20 '${input_type.remove_version}'
39 ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa.gz && 38 ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa.gz &&
40 #end if 39 #end if
41 ln -s '${input_type.input_gff}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gff && 40 ln -s '${input_type.input_gff}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gff &&
42 #end if 41 #end if
43 42
44 snpEff @java_options@ build -v 43 snpEff @JAVA_OPTIONS@ build -v
45 -configOption '${genome_version}'.genome='${genome_version}' 44 -configOption '${genome_version}'.genome='${genome_version}'
46 -configOption '${genome_version}'.codonTable='${codon_table}' 45 -configOption '${genome_version}'.codonTable='${codon_table}'
47 #if str( $input_type.input_type_selector ) == "gb": 46 #if str( $input_type.input_type_selector ) == "gb":
48 -genbank 47 -genbank
49 #elif str( $input_type.input_type_selector ) == "gff": 48 #elif str( $input_type.input_type_selector ) == "gff":
53 echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config && 52 echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config &&
54 echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config 53 echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config
55 54
56 ]]></command> 55 ]]></command>
57 <inputs> 56 <inputs>
58 <param name="genome_version" type="text" value="" label="Name for the database" help="for E. coli K12 you may want to use 'EcK12' etc."> 57 <param name="genome_version" type="text" value="" label="Name for the database" help="For E. coli K12 you may want to use 'EcK12' etc.">
59 <validator type="regex" message="A genome version name is required">\S+</validator> 58 <validator type="empty_field" message="A genome version name is required" />
60 </param> 59 </param>
61 <conditional name="input_type"> 60 <conditional name="input_type">
62 <param name="input_type_selector" type="select" display="radio" label="Input annotations are in" help="Specify format for annotations you are using to create SnpEff database"> 61 <param name="input_type_selector" type="select" display="radio" label="Input annotations are in" help="Specify format for annotations you are using to create SnpEff database">
63 <option value="gb" selected="true">GenBank</option> 62 <option value="gb" selected="true">GenBank</option>
64 <option value="gff">GFF</option> 63 <option value="gff">GFF</option>
67 <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/> 66 <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/>
68 <param name="fasta" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format"> 67 <param name="fasta" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format">
69 <option value="yes" selected="true">Yes</option> 68 <option value="yes" selected="true">Yes</option>
70 <option value="no">No</option> 69 <option value="no">No</option>
71 </param> 70 </param>
72 <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/> 71 <param argument="--remove_version" type="boolean" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" />
73 </when> 72 </when>
74 <when value="gff"> 73 <when value="gff">
75 <param name="input_gff" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/> 74 <param name="input_gff" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/>
76 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/> 75 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/>
77 </when> 76 </when>
78 </conditional> 77 </conditional>
79 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options"> 78 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options">
103 <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option> 102 <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option>
104 <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option> 103 <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option>
105 </param> 104 </param>
106 </inputs> 105 </inputs>
107 <outputs> 106 <outputs>
108 <data name="snpeff_output" format="snpeffdb" label="@snpeff_version@ database for ${genome_version}"/> 107 <data name="snpeff_output" format="snpeffdb" label="@SNPEFF_VERSION@ database for ${genome_version}"/>
109 <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}"> 108 <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}">
110 <filter>input_type['input_type_selector'] == 'gb'</filter> 109 <filter>input_type['input_type_selector'] == 'gb'</filter>
111 <filter>input_type['fasta'] == 'yes'</filter> 110 <filter>input_type['fasta'] == 'yes'</filter>
112 </data> 111 </data>
113 </outputs> 112 </outputs>
158 </test> 157 </test>
159 </tests> 158 </tests>
160 <help><![CDATA[ 159 <help><![CDATA[
161 **What it does** 160 **What it does**
162 161
163 This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database. 162 This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database.
164 163
165 ------ 164 ------
166 165
167 .. class:: infomark 166 .. class:: infomark
168 167
169 **Working with Genbank files** 168 **Working with Genbank files**
170 169
171 Using Genbank data for creating databases has several advantages: 170 Using Genbank data for creating databases has several advantages:
172 171
173 #. Genbank files contain annotations (such as locations of genes) together with sequences. This ensures that these two are in sync with each other. 172 #. Genbank files contain annotations (such as locations of genes) together with sequences. This ensures that these two are in sync with each other.
174 #. When you are analyzing small genomes (or not so small) it is much more convenient to create a database on the fly and use it. 173 #. When you are analyzing small genomes (or not so small) it is much more convenient to create a database on the fly and use it.
175 174
176 .. class:: warningmark 175 .. class:: warningmark
177 176
178 SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below. 177 SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below.
179 178
180 ------- 179 -------
181 180
182 **Genbank usage scenario** 181 **Genbank usage scenario**
183 182
184 Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps: 183 Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps:
185 184
186 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome (note that all NCBI genomes have similar list of files associated with them). 185 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome (note that all NCBI genomes have similar list of files associated with them).
187 #. Copy URL for file with extension `gbff.gz` 186 #. Copy URL for file with extension `gbff.gz`
188 #. Paste the URL into upload tool and set datatype to `genbank.gz`. 187 #. Paste the URL into upload tool and set datatype to `genbank.gz`.
189 #. Use this tool to generate a snpEff database and FASTA sequences from the dataset you've uploaded during the previous step. 188 #. Use this tool to generate a snpEff database and FASTA sequences from the dataset you've uploaded during the previous step.
190 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM. 189 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM.
191 #. Call variants using **Freebayes**. 190 #. Call variants using **Freebayes**.
192 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter). 191 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter).
206 205
207 The GFF file contains coordinates of various features, but does not contain underlying sequences. This is why a FASTA file needs to be provided as well. 206 The GFF file contains coordinates of various features, but does not contain underlying sequences. This is why a FASTA file needs to be provided as well.
208 207
209 ------ 208 ------
210 209
211 **GFF usage scenario** 210 **GFF usage scenario**
212 211
213 The following example also uses *E. coli* K-12 MG1655: 212 The following example also uses *E. coli* K-12 MG1655:
214 213
215 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome. 214 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome.
216 #. Copy URLs for files with `gff.gz` and `fna.gz` extensions. The first file contains annotations in GFF3 format. The second file contains entire genome as a FASTA record. 215 #. Copy URLs for files with `gff.gz` and `fna.gz` extensions. The first file contains annotations in GFF3 format. The second file contains entire genome as a FASTA record.
217 #. Paste URLs into upload tool and set datatypes to `gff3` and `fasta.gz` for annotations and genome, respectively. 216 #. Paste URLs into upload tool and set datatypes to `gff3` and `fasta.gz` for annotations and genome, respectively.
218 #. Use this tool to generate a snpEff database from the GFF dataset. 217 #. Use this tool to generate a snpEff database from the GFF dataset.
219 #. Map your reads against the FASTA dataset and continue as described in the above example. 218 #. Map your reads against the FASTA dataset and continue as described in the above example.
220 219
221 220
222 @snpeff_in_galaxy_info@ 221 @SNPEFF_IN_GALAXY_INFO@
223 @external_documentation@ 222 @EXTERNAL_DOCUMENTATION@
224 ]]> 223 ]]>
225 </help> 224 </help>
226 <expand macro="citations" /> 225 <expand macro="citations" />
227 </tool> 226 </tool>