diff bcftools_csq.xml @ 0:2a6c13f8cc5a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 9d03fe38504a35d11660dadb44cb1beee32fcf4e
author iuc
date Thu, 13 Apr 2017 17:47:16 -0400
parents
children 039ea3f1dea9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bcftools_csq.xml	Thu Apr 13 17:47:16 2017 -0400
@@ -0,0 +1,153 @@
+<?xml version='1.0' encoding='utf-8'?>
+<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@.0">
+    <description>Haplotype aware consequence predictor</description>
+    <macros>
+        <token name="@EXECUTABLE@">csq</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <expand macro="samtools_requirement"/>
+    </expand>
+    <expand macro="version_command" />
+    <command detect_errors="aggressive"><![CDATA[
+@PREPARE_ENV@
+@PREPARE_INPUT_FILE@
+#set $section = $sec_required
+@PREPARE_FASTA_REF@
+
+#set $section = $sec_restrict
+@PREPARE_TARGETS_FILE@
+@PREPARE_REGIONS_FILE@
+
+bcftools @EXECUTABLE@
+
+## csq required inputs section
+#set $section = $sec_required
+@FASTA_REF@
+--gff-annot '$section.gff_annot'
+
+## csq options section
+#set $section = $sec_csq_opts
+#if str($section.ncsq):
+    --ncsq $section.ncsq
+#end if
+$section.local_csq
+#if $section.phase:
+    --phase $section.phase
+#end if
+#if str($section.custom_tag):
+    --custom-tag '$section.custom_tag'
+#end if
+
+## Subset section
+#set $section = $sec_subset
+@SAMPLES@
+
+## Filter section
+#set $section = $sec_restrict
+@INCLUDE@
+@EXCLUDE@
+@REGIONS@
+@TARGETS@
+
+@OUTPUT_TYPE@
+
+## Primary Input/Outputs
+@INPUT_FILE@
+> '$output_file'
+
+]]>
+    </command>
+    <inputs>
+        <expand macro="macro_input" />
+        <section name="sec_required" expanded="true" title="Required References">
+            <expand macro="macro_fasta_ref" />
+            <param name="gff_annot" type="data" format="gff3" label="GFF3 annotation file" 
+                 help="From Ensembl:  ftp://ftp.ensembl.org/pub/current_gff3/"/>
+        </section>
+        <section name="sec_csq_opts" expanded="true" title="CSQ Options">
+            <param name="ncsq" type="integer" value="16" min="1" max="50" label="maximum number of consequences to consider per site"
+                help="-ncsq 16"/>
+            <param name="local_csq" type="boolean" truevalue="--local-csq" falsevalue="" checked="false" label="run localized predictions considering only one VCF record at a time"
+                 help="--local-csq switch off haplotype-aware calling, run localized predictions considering only one VCF record at a time"/>
+            <param name="phase" type="select" optional="true" label="phase" 
+                 help="how to construct haplotypes and how to deal with unphased data">
+                <option value="a">take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)</option>
+                <option value="m">merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)</option>
+                <option value="r">require phased GTs, throw an error on unphased het GTs</option>
+                <option value="R">create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)</option>
+                <option value="s">skip unphased GTs</option>
+            </param>
+            <param name="custom_tag" type="text" value="" optional="true" 
+                 label="use this custom tag to store consequences rather than the default BCSQ tag">
+                 <validator type="regex" message="">^(\w+)?$</validator>
+            </param>
+        </section>
+
+        <section name="sec_restrict" expanded="false" title="Restrict to">
+            <expand macro="macro_regions" />
+            <expand macro="macro_targets" />
+            <expand macro="macro_include" />
+            <expand macro="macro_exclude" />
+        </section>
+        <section name="sec_subset" expanded="false" title="Subset Options">
+            <expand macro="macro_samples" />
+        </section>
+        <expand macro="macro_select_output_type" />
+    </inputs>
+    <outputs>
+        <expand macro="macro_vcf_output" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" ftype="vcf" value="csq.vcf" />
+            <param name="fasta_ref" ftype="fasta" value="csq.fa" />
+            <param name="gff_annot" ftype="gff3" value="csq.gff3" />
+            <param name="output_type" value="v" />
+            <output name="output_file">
+                <assert_contents>
+                    <has_text text="BCSQ" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=====================================
+ bcftools @EXECUTABLE@
+=====================================
+
+Haplotype aware consequence predictor which correctly handles combined variants such as MNPs split over
+multiple VCF records, SNPs separated by an intron (but adjacent in the spliced transcript) or nearby
+frame-shifting indels which in combination in fact are not frame-shifting.
+
+The output VCF is annotated with INFO/BCSQ and FORMAT/BCSQ tag (configurable with the -c option).
+The latter is a bitmask of indexes to INFO/BCSQ, with interleaved haplotypes. See the usage examples
+below for using the %TBCSQ converter in query for extracting a more human readable form from this bitmask.
+The contruction of the bitmask limits the number of consequences that can be referenced in the FORMAT/BCSQ tags.
+By default this is 16, but if more are required, see the --ncsq option.
+
+The program requires on input a VCF/BCF file, the reference genome in fasta format (--fasta-ref)
+and genomic features in the GFF3 format downloadable from the Ensembl website (--gff-annot),
+and outputs an annotated VCF/BCF file. Currently, only Ensembl GFF3 files are supported.
+
+By default, the input VCF should be phased. If phase is unknown, or only partially known,
+the --phase option can be used to indicate how to handle unphased data. Alternatively,
+haplotype aware calling can be turned off with the --local-csq option.
+
+If conflicting (overlapping) variants within one haplotype are detected, a warning will
+be emitted and predictions will be based on only the first variant in the analysis.
+
+Symbolic alleles are not supported. They will remain unannotated in the output VCF and are
+ignored for the prediction analysis.
+
+
+@REGIONS_HELP@
+@TARGETS_HELP@
+
+@BCFTOOLS_MANPAGE@#@EXECUTABLE@
+
+@BCFTOOLS_WIKI@
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>