Mercurial > repos > iuc > snpeff
comparison snpEff_create_db.xml @ 17:65ae79bddc69 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 5ab504d384299d8c2ed496650f1f9e4a887cd102
author | iuc |
---|---|
date | Thu, 06 Sep 2018 13:23:57 -0400 |
parents | 479c4f2f4826 |
children | de67e5082c48 |
comparison
equal
deleted
inserted
replaced
16:c9ecd2a96ecf | 17:65ae79bddc69 |
---|---|
1 <tool id="snpEff_build_gb" name="SnpEff build:" version="@wrapper_version@.galaxy3"> | 1 <tool id="snpEff_build_gb" name="SnpEff build:" version="@WRAPPER_VERSION@.galaxy3"> |
2 <description> database from Genbank or GFF record</description> | 2 <description> database from Genbank or GFF record</description> |
3 <macros> | 3 <macros> |
4 <import>snpEff_macros.xml</import> | 4 <import>snpEff_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
11 <requirement type="package" version="5.32">libmagic</requirement> | 11 <requirement type="package" version="5.32">libmagic</requirement> |
12 </requirements> | 12 </requirements> |
13 <expand macro="stdio" /> | 13 <expand macro="stdio" /> |
14 <expand macro="version_command" /> | 14 <expand macro="version_command" /> |
15 <command><![CDATA[ | 15 <command><![CDATA[ |
16 | |
17 #if str( $input_type.input_type_selector ) == "gb": | 16 #if str( $input_type.input_type_selector ) == "gb": |
18 #if str( $input_type.fasta ) == "yes": | 17 #if str( $input_type.fasta ) == "yes": |
19 python3 '$__tool_directory__/gbk2fa.py' '${input_type.input_gbk}' '${output_fasta}' | 18 python3 '$__tool_directory__/gbk2fa.py' '${input_type.input_gbk}' '${output_fasta}' |
20 #if $input_type.remove_version: | 19 #if $input_type.remove_version: |
21 '${input_type.remove_version}' | 20 '${input_type.remove_version}' |
39 ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa.gz && | 38 ln -s '${input_type.input_fasta}' '${snpeff_output.files_path}'/'${genome_version}'/sequences.fa.gz && |
40 #end if | 39 #end if |
41 ln -s '${input_type.input_gff}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gff && | 40 ln -s '${input_type.input_gff}' '${snpeff_output.files_path}'/'${genome_version}'/genes.gff && |
42 #end if | 41 #end if |
43 | 42 |
44 snpEff @java_options@ build -v | 43 snpEff @JAVA_OPTIONS@ build -v |
45 -configOption '${genome_version}'.genome='${genome_version}' | 44 -configOption '${genome_version}'.genome='${genome_version}' |
46 -configOption '${genome_version}'.codonTable='${codon_table}' | 45 -configOption '${genome_version}'.codonTable='${codon_table}' |
47 #if str( $input_type.input_type_selector ) == "gb": | 46 #if str( $input_type.input_type_selector ) == "gb": |
48 -genbank | 47 -genbank |
49 #elif str( $input_type.input_type_selector ) == "gff": | 48 #elif str( $input_type.input_type_selector ) == "gff": |
53 echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config && | 52 echo "${genome_version}.genome : ${genome_version}" >> '${snpeff_output.files_path}'/snpEff.config && |
54 echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config | 53 echo "${genome_version}.codonTable : ${codon_table}" >> '${snpeff_output.files_path}'/snpEff.config |
55 | 54 |
56 ]]></command> | 55 ]]></command> |
57 <inputs> | 56 <inputs> |
58 <param name="genome_version" type="text" value="" label="Name for the database" help="for E. coli K12 you may want to use 'EcK12' etc."> | 57 <param name="genome_version" type="text" value="" label="Name for the database" help="For E. coli K12 you may want to use 'EcK12' etc."> |
59 <validator type="regex" message="A genome version name is required">\S+</validator> | 58 <validator type="empty_field" message="A genome version name is required" /> |
60 </param> | 59 </param> |
61 <conditional name="input_type"> | 60 <conditional name="input_type"> |
62 <param name="input_type_selector" type="select" display="radio" label="Input annotations are in" help="Specify format for annotations you are using to create SnpEff database"> | 61 <param name="input_type_selector" type="select" display="radio" label="Input annotations are in" help="Specify format for annotations you are using to create SnpEff database"> |
63 <option value="gb" selected="true">GenBank</option> | 62 <option value="gb" selected="true">GenBank</option> |
64 <option value="gff">GFF</option> | 63 <option value="gff">GFF</option> |
67 <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/> | 66 <param name="input_gbk" type="data" format="genbank,genbank.gz" label="Genbank dataset to build database from" help="This Genbank file will be used to generate snpEff database"/> |
68 <param name="fasta" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format"> | 67 <param name="fasta" type="select" display="radio" label="Parse Genbank into Fasta" help="This will generate an additional dataset containing all sequences from Genbank file in FASTA format"> |
69 <option value="yes" selected="true">Yes</option> | 68 <option value="yes" selected="true">Yes</option> |
70 <option value="no">No</option> | 69 <option value="no">No</option> |
71 </param> | 70 </param> |
72 <param type="boolean" name="remove_version" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" argument="--remove_version"/> | 71 <param argument="--remove_version" type="boolean" truevalue="--remove_version" falsevalue="" checked="true" label="Remove sequence version label?" help="Genbank sequences have vesion numbers such as B000564.2. This option removes them leaving only B000564" /> |
73 </when> | 72 </when> |
74 <when value="gff"> | 73 <when value="gff"> |
75 <param name="input_gff" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/> | 74 <param name="input_gff" type="data" format="gff3" label="GFF dataset to build database from" help="This GFF file will be used to generate snpEff database"/> |
76 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/> | 75 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Genome in FASTA format" help="This dataset is required for generating SnpEff database. See help section below."/> |
77 </when> | 76 </when> |
78 </conditional> | 77 </conditional> |
79 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options"> | 78 <param name="codon_table" type="select" label="Select genetic code for this sequence" help="If this sequence uses non-standard genetic code, select one from these options"> |
103 <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option> | 102 <option value="Scenedesmus_obliquus_Mitochondrial">Scenedesmus_obliquus_Mitochondrial</option> |
104 <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option> | 103 <option value="Thraustochytrium_Mitochondrial">Thraustochytrium_Mitochondrial</option> |
105 </param> | 104 </param> |
106 </inputs> | 105 </inputs> |
107 <outputs> | 106 <outputs> |
108 <data name="snpeff_output" format="snpeffdb" label="@snpeff_version@ database for ${genome_version}"/> | 107 <data name="snpeff_output" format="snpeffdb" label="@SNPEFF_VERSION@ database for ${genome_version}"/> |
109 <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}"> | 108 <data name="output_fasta" format="fasta" label="Fasta sequences for ${genome_version}"> |
110 <filter>input_type['input_type_selector'] == 'gb'</filter> | 109 <filter>input_type['input_type_selector'] == 'gb'</filter> |
111 <filter>input_type['fasta'] == 'yes'</filter> | 110 <filter>input_type['fasta'] == 'yes'</filter> |
112 </data> | 111 </data> |
113 </outputs> | 112 </outputs> |
158 </test> | 157 </test> |
159 </tests> | 158 </tests> |
160 <help><![CDATA[ | 159 <help><![CDATA[ |
161 **What it does** | 160 **What it does** |
162 | 161 |
163 This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database. | 162 This tool uses `"snpEff build -genbank"` or `"snpEff build -gff3"` commands to create a snpEff database. |
164 | 163 |
165 ------ | 164 ------ |
166 | 165 |
167 .. class:: infomark | 166 .. class:: infomark |
168 | 167 |
169 **Working with Genbank files** | 168 **Working with Genbank files** |
170 | 169 |
171 Using Genbank data for creating databases has several advantages: | 170 Using Genbank data for creating databases has several advantages: |
172 | 171 |
173 #. Genbank files contain annotations (such as locations of genes) together with sequences. This ensures that these two are in sync with each other. | 172 #. Genbank files contain annotations (such as locations of genes) together with sequences. This ensures that these two are in sync with each other. |
174 #. When you are analyzing small genomes (or not so small) it is much more convenient to create a database on the fly and use it. | 173 #. When you are analyzing small genomes (or not so small) it is much more convenient to create a database on the fly and use it. |
175 | 174 |
176 .. class:: warningmark | 175 .. class:: warningmark |
177 | 176 |
178 SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below. | 177 SnpEff errors out on highly fragmented genomes containing multiple scaffolds. This is because a single gene may be split between multiple scaffolds causing SnpEff to crash. If this is happening use GFF route described below. |
179 | 178 |
180 ------- | 179 ------- |
181 | 180 |
182 **Genbank usage scenario** | 181 **Genbank usage scenario** |
183 | 182 |
184 Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps: | 183 Suppose you have a series of Illumina reads from an experiment involving *E. coli* K-12 MG1655. You want to map these reads to the reference genome of K-12 MG1655, call variants, and annotate them using snpEff. This tool enables you to follow the following analysis steps: |
185 | 184 |
186 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome (note that all NCBI genomes have similar list of files associated with them). | 185 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome (note that all NCBI genomes have similar list of files associated with them). |
187 #. Copy URL for file with extension `gbff.gz` | 186 #. Copy URL for file with extension `gbff.gz` |
188 #. Paste the URL into upload tool and set datatype to `genbank.gz`. | 187 #. Paste the URL into upload tool and set datatype to `genbank.gz`. |
189 #. Use this tool to generate a snpEff database and FASTA sequences from the dataset you've uploaded during the previous step. | 188 #. Use this tool to generate a snpEff database and FASTA sequences from the dataset you've uploaded during the previous step. |
190 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM. | 189 #. Use your Illumina reads to map against FASTA dataset generated in the previous step using BWA-MEM. |
191 #. Call variants using **Freebayes**. | 190 #. Call variants using **Freebayes**. |
192 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter). | 191 #. Annotate vcf output of Freebayes with **SnpEff eff** using database generated at step 2 (using *Custom* option for **Genome source** parameter). |
206 | 205 |
207 The GFF file contains coordinates of various features, but does not contain underlying sequences. This is why a FASTA file needs to be provided as well. | 206 The GFF file contains coordinates of various features, but does not contain underlying sequences. This is why a FASTA file needs to be provided as well. |
208 | 207 |
209 ------ | 208 ------ |
210 | 209 |
211 **GFF usage scenario** | 210 **GFF usage scenario** |
212 | 211 |
213 The following example also uses *E. coli* K-12 MG1655: | 212 The following example also uses *E. coli* K-12 MG1655: |
214 | 213 |
215 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome. | 214 #. Go to `NCBI <http://www.ncbi.nlm.nih.gov>`_ page for K-12 MG1655 genome. |
216 #. Copy URLs for files with `gff.gz` and `fna.gz` extensions. The first file contains annotations in GFF3 format. The second file contains entire genome as a FASTA record. | 215 #. Copy URLs for files with `gff.gz` and `fna.gz` extensions. The first file contains annotations in GFF3 format. The second file contains entire genome as a FASTA record. |
217 #. Paste URLs into upload tool and set datatypes to `gff3` and `fasta.gz` for annotations and genome, respectively. | 216 #. Paste URLs into upload tool and set datatypes to `gff3` and `fasta.gz` for annotations and genome, respectively. |
218 #. Use this tool to generate a snpEff database from the GFF dataset. | 217 #. Use this tool to generate a snpEff database from the GFF dataset. |
219 #. Map your reads against the FASTA dataset and continue as described in the above example. | 218 #. Map your reads against the FASTA dataset and continue as described in the above example. |
220 | 219 |
221 | 220 |
222 @snpeff_in_galaxy_info@ | 221 @SNPEFF_IN_GALAXY_INFO@ |
223 @external_documentation@ | 222 @EXTERNAL_DOCUMENTATION@ |
224 ]]> | 223 ]]> |
225 </help> | 224 </help> |
226 <expand macro="citations" /> | 225 <expand macro="citations" /> |
227 </tool> | 226 </tool> |