Mercurial > repos > devteam > samtools_mpileup

diff samtools_mpileup.xml @ 4:c6fdfe3331d6 draft
Uploaded
author: devteam
date: Tue, 21 Apr 2015 16:29:10 -0400
parents: 973fea5b4bdf
children: aa0ef6f0ee89
--- a/samtools_mpileup.xml	Thu Mar 27 15:27:36 2014 -0400
+++ b/samtools_mpileup.xml	Tue Apr 21 16:29:10 2015 -0400
@@ -1,213 +1,371 @@
-<tool id="samtools_mpileup" name="MPileup" version="0.0.3">
-  <description>SNP and indel caller</description>
-  <requirements>
-      <requirement type="package" version="0.1.19">samtools</requirement>
-  </requirements>
-  <command interpreter="python">samtools_wrapper.py
-    -p 'samtools mpileup'
-    --stdout "${output_log}"
+<tool id="samtools_mpileup" name="MPileup" version="2.0">
+    <description>call variants</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+    <![CDATA[
+    #if $reference_source.reference_source_selector == "history":
+       ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup
+    #else:
+        samtools mpileup
+    #end if
     #if $reference_source.reference_source_selector != "history":
-        -p '-f "${reference_source.ref_file.fields.path}"'
+        -f "${reference_source.ref_file.fields.path}"
     #else:
-        -d "-f" "${reference_source.ref_file}" "fa" "reference_input"
+        -f "${reference_source.ref_file}"
     #end if
     #for $i, $input_bam in enumerate( $reference_source.input_bams ):
-        -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}"
-        -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
+        "${input_bam.input_bam}"
     #end for
-    -p '
     #if str( $advanced_options.advanced_options_selector ) == "advanced":
-        ${advanced_options.skip_anomalous_read_pairs}
+        #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
+            #if $advanced_options.filter_by_flags.require_flags:
+                --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])}
+            #end if
+            #if $advanced_options.filter_by_flags.exclude_flags:
+                --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])}
+            #end if
+        #end if
+        #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
+            -l "$pasted_regions"
+        #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history"
+            -l "$bed_regions"
+        #end if
+        #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
+            -G "$excluded_read_groups"
+        #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
+            -G "$read_groups"
+        #end if
+            ${advanced_options.skip_anomalous_read_pairs}
         ${advanced_options.disable_probabilistic_realignment}
         -C "${advanced_options.coefficient_for_downgrading}"
         -d "${advanced_options.max_reads_per_bam}"
         ${advanced_options.extended_BAQ_computation}
-        #if str( $advanced_options.position_list ) != 'None':
-          -l "${advanced_options.position_list}"
-        #end if
         -q "${advanced_options.minimum_mapping_quality}"
         -Q "${advanced_options.minimum_base_quality}"
         #if str( $advanced_options.region_string ):
             -r "${advanced_options.region_string}"
         #end if
-        ${advanced_options.output_per_sample_read_depth}
-        ${advanced_options.output_per_sample_strand_bias_p_value}
+        
     #end if
     #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
-        ##-g or -u
-        -g
-        -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}"
-        -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}"
+        ##
+
+        ${genotype_likelihood_computation_type.output_format}
+        ${genotype_likelihood_computation_type.compressed}
+        
+        #if str( $genotype_likelihood_computation_type.output_tags ) != "None":
+            -output-tags "${genotype_likelihood_computation_type.output_tags}"
+        #end if
+
         #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
+            -o "${genotype_likelihood_computation_type.perform_indel_calling.gap_open_sequencing_error_probability}"
+            -e "${genotype_likelihood_computation_type.perform_indel_calling.gap_extension_sequencing_error_probability}"
+            -h "${genotype_likelihood_computation_type.perform_indel_calling.coefficient_for_modeling_homopolymer_errors}"
             -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
-        #else:
+            -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}"
+            --open-prob "${genotype_likelihood_computation_type.perform_indel_calling.open_seq_error_probability}"
+            -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}"
+            ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample}
+            #if len( $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ):
+                -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.perform_indel_calling.platform_list_repeat ] ) }"
+            #end if
+        #elif str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'do_not_perform_indel_calling':
             -I
         #end if
-        -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}"
-        #if len( $genotype_likelihood_computation_type.platform_list_repeat ):
-            -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }"
-        #end if
+        
+         
+    #else:
+        ${genotype_likelihood_computation_type.base_position_on_reads}
+        ${genotype_likelihood_computation_type.output_mapping_quality}
     #end if
-    &gt; "${output_mpileup}"
-    '
-  </command>
-  <inputs>
-    <conditional name="reference_source">
-      <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
-        <option value="cached">Locally cached</option>
-        <option value="history">History</option>
-      </param>
-      <when value="cached">
-        <repeat name="input_bams" title="BAM file" min="1">
-          <param name="input_bam" type="data" format="bam" label="BAM file">
-            <validator type="unspecified_build" />
-            <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
-          </param>
-        </repeat>
-        <param name="ref_file" type="select" label="Using reference genome">
-          <options from_data_table="fasta_indexes">
-            <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...-->
-          </options>
-        </param>
-      </when>
-      <when value="history"> <!-- FIX ME!!!! -->
-        <repeat name="input_bams" title="BAM file" min="1">
-          <param name="input_bam" type="data" format="bam" label="BAM file">
-            <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." />
-          </param>
-        </repeat>
-        <param name="ref_file" type="data" format="fasta" label="Using reference file" />
-      </when>
-    </conditional>
-
-    
-    <conditional name="genotype_likelihood_computation_type">
-      <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
-        <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option>
-        <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option>
-      </param>
-      <when value="perform_genotype_likelihood_computation">
-          <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" />
-          <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." />
-          <conditional name="perform_indel_calling">
-            <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
-              <option value="perform_indel_calling" selected="True">Perform INDEL calling</option>
-              <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
+    --output "$output_mpileup" 2> "$output_log"
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="reference_source">
+            <param label="Choose the source for the reference genome" name="reference_source_selector" type="select">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history">Use a genome from the history</option>
+            </param>
+            <when value="cached">
+                <repeat min="1" name="input_bams" title="BAM file">
+                    <param format="bam" label="BAM file" name="input_bam" type="data">
+                        <validator type="unspecified_build" />
+                        <validator message="Sequences are not currently available for the specified build." metadata_column="1" metadata_name="dbkey" table_name="fasta_indexes" type="dataset_metadata_in_data_table" />
+                    </param>
+                </repeat>
+                <param label="Using reference genome" name="ref_file" type="select">
+                    <options from_data_table="fasta_indexes" />
+                </param>
+            </when>
+            <when value="history">
+                <repeat min="1" name="input_bams" title="BAM file">
+                    <param format="bam" label="BAM file" name="input_bam" type="data">
+                        <validator check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." type="metadata" />
+                    </param>
+                </repeat>
+                <param format="fasta" label="Using reference genome" name="ref_file" type="data" />
+            </when>
+        </conditional>
+        <conditional name="genotype_likelihood_computation_type">
+            <param label="Genotype Likelihood Computation" name="genotype_likelihood_computation_type_selector" type="select">
+                <option selected="True" value="perform_genotype_likelihood_computation">Perform genotype likelihood computation (--VCF, --BCF options)</option>
+                <option value="do_not_perform_genotype_likelihood_computation">Do not perform genotype likelihood computation (output pileup)</option>
             </param>
-            <when value="perform_indel_calling">
-              <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" />
+            <when value="perform_genotype_likelihood_computation">
+                <param label="Choose the output format" name="output_format" type="select">
+                    <option value="--VCF">VCF</option>
+                    <option value="--BCF">BCF</option>
+                </param>
+                <param checked="False" falsevalue="--uncompressed" label="Compress output" name="compressed" truevalue="" type="boolean" help="--incompressed; default=False"/>
+                <param name="output_tags" optional="True" type="select" multiple="True" display="checkboxes" label="Optional tags to output" help="--output-tags">
+                    <option value="DP">DP (Number of high-quality bases)</option>
+                    <option value="DPR">DRP (Number of high-quality bases for each observed allele)</option>
+                    <option value="DV">DV (Number of high-quality non-reference bases)</option>
+                    <option value="DP4">DP4 (Number of high-quality ref-forward, ref-reverse, alt-forward and alt-reverse bases)</option>
+                    <option value="INFO/DPR">INFO/DPR (Number of high-quality bases for each observed allele)</option>
+                    <option value="SP">SP (Phred-scaled strand bias P-value)</option>
+                </param>
+                <conditional name="perform_indel_calling">
+                    <param label="Perform INDEL calling" name="perform_indel_calling_selector" type="select">
+                        <option selected="True" value="perform_indel_calling_def">Perform INDEL calling using default options</option>
+                        <option value="perform_indel_calling">Perform INDEL calling and set advanced options</option>
+                        <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
+                    </param>
+                    <when value="perform_indel_calling_def" />
+                    <when value="perform_indel_calling">
+                        <param label="Phred-scaled gap open sequencing error probability" name="gap_open_sequencing_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
+                        <param label="Phred-scaled gap extension sequencing error probability" name="gap_extension_sequencing_error_probability" type="integer" value="20" help="--ext-prob;  Reducing this value leads to longer indels. default=20"/>
+                        <param label="Coefficient for modeling homopolymer errors." name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" help="--tandem-qual; default=100"/>
+                        <param label="Skip INDEL calling if the average per-sample depth is above" name="skip_indel_calling_above_sample_depth" type="integer" value="250" help="--max-idepth; default=250"/>
+                        <param label="Minimum gapped reads for indel candidates" name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" help="--min-ireads; default=1"/>
+                        <param label="Phred-scaled gap open sequencing error probability" name="open_seq_error_probability" type="integer" value="40" help="--open-prob; Reducing this value leads to more indel calls; default=40"/>
+                        <param label="Minimum fraction of gapped reads" name="minimum_gapped_read_fraction" type="float" value="0.002" help="--gap-frac; default=0.002"/>
+                        <param checked="False" falsevalue="" label="Apply --min-ireads and --gap-frac values on a per-sample basis" name="gapped_read_per_sample" truevalue="-p" type="boolean" help="--per-sample-mF;  by default both options are applied to reads pooled from all samples"/>
+                        <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
+                            <param label="Platform to use for INDEL candidates" name="platform_entry" type="text" value="" help="It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA"/>
+                        </repeat>
+                    </when>
+                    <when value="do_not_perform_indel_calling" />
+                </conditional>
+                
+            </when>
+            <when value="do_not_perform_genotype_likelihood_computation">
+                <param checked="False" falsevalue="" label="Output base positions on reads" name="base_position_on_reads" truevalue="-O" type="boolean" help="--output-BP"/>
+                <param checked="False" falsevalue="" label="Output mapping quality" name="output_mapping_quality" truevalue="-s" type="boolean" help="--output-MQ"/>
             </when>
-            <when value="do_not_perform_indel_calling" />
-          </conditional>
-          <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" />
-          <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
-            <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" />
-          </repeat>
-      </when>
-      <when value="do_not_perform_genotype_likelihood_computation">
-          <!-- Do nothing here -->
-      </when>
-    </conditional>
-    <conditional name="advanced_options">
-      <param name="advanced_options_selector" type="select" label="Set advanced options">
-        <option value="basic" selected="True">Basic</option>
-        <option value="advanced">Advanced</option>
-      </param>
-      <when value="advanced">
-        <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" />
-        <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label="	Disable probabilistic realignment for the computation of base alignment quality (BAQ)" />
-        <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" />
-        <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" />
-        <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" />
-        <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" />
-        <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
-        <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" />
-        <param name="region_string" type="text" value="" label="Only generate pileup in region" />
-        <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" />
-        <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" />
-      </when>
-      <when value="basic" />
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}">
-      <change_format>
-        <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" />
-      </change_format>
-    </data>
-    <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
-  </outputs>
-  <tests>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
-          <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
-          <param name="advanced_options_selector" value="basic" />
-          <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> 
-          <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" />
-      </test>
-      <test>
-          <param name="reference_source_selector" value="history" />
-          <param name="ref_file" value="phiX.fasta" ftype="fasta" />
-          <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
-          <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
-          <param name="gap_extension_sequencing_error_probability" value="20" />
-          <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
-          <param name="perform_indel_calling_selector" value="perform_indel_calling" />
-          <param name="skip_indel_calling_above_sample_depth" value="250" />
-          <param name="gap_open_sequencing_error_probability" value="40" />
-          <param name="platform_list_repeat" value="0" />
-          <param name="advanced_options_selector" value="basic" />
-          <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> 
-          <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" />
-      </test>
-  </tests>
-  <help>
+        </conditional>
+        <conditional name="advanced_options">
+            <param label="Set advanced options" name="advanced_options_selector" type="select">
+                <option selected="True" value="basic">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="advanced">
+                <conditional name="filter_by_flags">
+                    <param label="Set filter by flags" name="filter_flags" type="select">
+                        <option selected="True" value="nofilter">Do not filter</option>
+                        <option value="filter">Filter by flags to exclude or require</option>
+                    </param>
+                    <when value="filter">
+                        <param display="checkboxes" label="Require" multiple="True" name="require_flags" type="select" help="--incl-flags">
+                            <option value="1">Read is paired</option>
+                            <option value="2">Read is mapped in a proper pair</option>
+                            <option value="4">The read is unmapped</option>
+                            <option value="8">The mate is unmapped</option>
+                            <option value="16">Read strand</option>
+                            <option value="32">Mate strand</option>
+                            <option value="64">Read is the first in a pair</option>
+                            <option value="128">Read is the second in a pair</option>
+                            <option value="256">The alignment or this read is not primary</option>
+                            <option value="512">The read fails platform/vendor quality checks</option>
+                            <option value="1024">The read is a PCR or optical duplicate</option>
+                        </param>
+                        <param display="checkboxes" label="Exclude" multiple="True" name="exclude_flags" type="select" help="--excl-flags">
+                            <option value="1">Read is paired</option>
+                            <option value="2">Read is mapped in a proper pair</option>
+                            <option value="4">The read is unmapped</option>
+                            <option value="8">The mate is unmapped</option>
+                            <option value="16">Read strand</option>
+                            <option value="32">Mate strand</option>
+                            <option value="64">Read is the first in a pair</option>
+                            <option value="128">Read is the second in a pair</option>
+                            <option value="256">The alignment or this read is not primary</option>
+                            <option value="512">The read fails platform/vendor quality checks</option>
+                            <option value="1024">The read is a PCR or optical duplicate</option>
+                        </param>
+                    </when>
+                    <when value="nofilter" />
+                </conditional>
+                <conditional name="limit_by_region">
+                    <param label="Select regions to call" name="limit_by_regions" type="select">
+                        <option selected="True" value="no_limit">Do not limit</option>
+                        <option value="history">From an uploaded BED file (--positions)</option>
+                        <option value="paste">Paste a list of regions or BED (--region)</option>
+                    </param>
+                    <when value="history">
+                        <param format="bed" label="BED file" name="bed_regions" type="data" help="--positions">
+                            <validator type="dataset_ok_validator" />
+                        </param>
+                    </when>
+                    <when value="paste">
+                        <param area="true" help="Paste a list of regions in BED format or as a list of chromosomes and positions" label="Regions" name="region_paste" size="10x35" type="text"/>
+                    </when>
+                    <when value="no_limit" />
+                </conditional>
+                <conditional name="exclude_read_group">
+                    <param label="Select read groups to exclude" name="exclude_read_groups" type="select" help="--exclude-RG">
+                        <option selected="True" value="no_limit">Do not exclude</option>
+                        <option value="history">From an uploaded text file</option>
+                        <option value="paste">Paste a list of read groups</option>
+                    </param>
+                    <when value="history">
+                        <param format="txt" label="Text file" name="read_groups" type="data">
+                            <validator type="dataset_ok_validator" />
+                        </param>
+                    </when>
+                    <when value="paste">
+                        <param area="true" help="Paste a list of read groups" label="Read groups" name="group_paste" size="10x35" type="text" />
+                    </when>
+                    <when value="no_limit" />
+                </conditional>
+                <param checked="False" falsevalue="" label="Disable read-pair overlap detection" name="ignore_overlaps" truevalue="-x" type="boolean" help="--ignore-overlaps"/>
+                <param checked="False" falsevalue="" label="Do not skip anomalous read pairs in variant calling" name="skip_anomalous_read_pairs" truevalue="-A" type="boolean" help="--count-orphans"/>
+                <param checked="False" falsevalue="" label="Disable probabilistic realignment for the computation of base alignment quality (BAQ)" name="disable_probabilistic_realignment" truevalue="-B" type="boolean" help="--no-BAQ; BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments"/>
+                <param label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" name="coefficient_for_downgrading" type="integer" value="0" help="--adjust-MQ; Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. default=0"/>
+                <param label="Max reads per BAM" max="1024" min="1" name="max_reads_per_bam" type="integer" value="250" help="--max-depth; default=250"/>
+                <param checked="False" falsevalue="" label="Redo BAQ computation" name="extended_BAQ_computation" truevalue="-E" type="boolean" help="--redo-BAQ; ignore existing BQ tags"/>
+                <param label="Minimum mapping quality for an alignment to be used" name="minimum_mapping_quality" type="integer" value="0" help="-min-MQ; default=0"/>
+                <param label="Minimum base quality for a base to be considered" name="minimum_base_quality" type="integer" value="13" help="--min-BQ; default=13"/>
+            </when>
+            <when value="basic" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="pileup" label="${tool.name} on ${on_string}" name="output_mpileup">
+            <change_format>
+                <when format="bcf" input="genotype_likelihood_computation_type.output_format" value="--BCF" />
+                <when format="vcf" input="genotype_likelihood_computation_type.output_format" value="--VCF" />
+            </change_format>
+        </data>
+        <data format="txt" label="${tool.name} on ${on_string} (log)" name="output_log" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param ftype="fasta" name="ref_file" value="phiX.fasta" />
+            <param ftype="bam" name="input_bam" value="samtools_mpileup_in_1.bam" />
+            <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
+            <param name="advanced_options_selector" value="basic" />
+            <param name="base_position_on_reads" value="true" />
+            <param name="output_mapping_quality" value="true" />
+            <output file="samtools_mpileup_out_1.pileup" name="output_mpileup" />
+            <output file="samtools_mpileup_out_1.log" name="output_log" />
+        </test>
+        <test>
+            <param name="reference_source_selector" value="history" />
+            <param ftype="fasta" name="ref_file" value="phiX.fasta" />
+            <param ftype="bam" name="input_bam" value="phiX.bam" />
+            <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
+            <param name="gap_extension_sequencing_error_probability" value="20" />
+            <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
+            <param name="perform_indel_calling_selector" value="perform_indel_calling" />
+            <param name="skip_indel_calling_above_sample_depth" value="250" />
+            <param name="gap_open_sequencing_error_probability" value="40" />
+            <param name="platform_list_repeat" value="0" />
+            <param name="advanced_options_selector" value="basic" />
+            <param name="genotype_likelihood_computation_type|output_format" value="VCF" />
+            <output file="samtools_mpileup_out_2.vcf" ftype="vcf" lines_diff="8" name="output_mpileup" />
+            <output file="samtools_mpileup_out_2.log" name="output_log" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
 **What it does**
 
- Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. 
+Report variants for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. 
 
 ------
 
-**Settings**::
+**Input options**::
+
+  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding
+  -A, --count-orphans     do not discard anomalous read pairs
+  -b, --bam-list FILE     list of input BAM filenames, one per line
+  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)
+  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]
+  -d, --max-depth INT     max per-BAM depth; avoids excessive memory usage [250]
+  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs
+  -f, --fasta-ref FILE    faidx indexed reference sequence file
+  -G, --exclude-RG FILE   exclude read groups listed in FILE
+  -l, --positions FILE    skip unlisted positions (chr pos) or regions (BED)
+  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [0]
+  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [13]
+  -r, --region REG        region in which pileup is generated
+  -R, --ignore-RG         ignore RG tags (one BAM = one sample)
+  --rf, --incl-flags STR|INT  required flags: skip reads with mask bits unset []
+  --ff, --excl-flags STR|INT  filter flags: skip reads with mask bits set
+                                            [UNMAP,SECONDARY,QCFAIL,DUP]
+  -x, --ignore-overlaps   disable read-pair overlap detection
+
+**Output options**::
+
+  -o, --output FILE       write output to FILE [standard output]
+  -g, --BCF               generate genotype likelihoods in BCF format
+  -v, --VCF               generate genotype likelihoods in VCF format
+
+**Output options for mpileup format** (without -g/-v)::
+
+  -O, --output-BP         output base positions on reads
+  -s, --output-MQ         output mapping quality
+
+**Output options for genotype likelihoods** (when -g/-v is used)::
+
+  -t, --output-tags LIST  optional tags to output: DP,DPR,DV,DP4,INFO/DPR,SP []
+  -u, --uncompressed      generate uncompressed VCF/BCF output
+
+**SNP/INDEL genotype likelihoods options** (effective with -g/-v)::
 
- Input Options:
- -6 	Assume the quality is in the Illumina 1.3+ encoding.
- -A Do not skip anomalous read pairs in variant calling.
- -B 	Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
- -b FILE 	List of input BAM files, one file per line [null]
- -C INT 	Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0]
- -d INT 	At a position, read maximally INT reads per input BAM. [250]
- -E 	Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit.
- -f FILE 	The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null]
- -l FILE 	BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
- -q INT 	Minimum mapping quality for an alignment to be used [0]
- -Q INT 	Minimum base quality for a base to be considered [13]
- -r STR 	Only generate pileup in region STR [all sites]
- Output Options:
- 	
- -D 	Output per-sample read depth
- -g 	Compute genotype likelihoods and output them in the binary call format (BCF).
- -S 	Output per-sample Phred-scaled strand bias P-value
- -u 	Similar to -g except that the output is uncompressed BCF, which is preferred for piping.
- 
- Options for Genotype Likelihood Computation (for -g or -u):
-  	
- -e INT 	Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20]
- -h INT 	Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100]
- -I 	Do not perform INDEL calling
- -L INT 	Skip INDEL calling if the average per-sample depth is above INT. [250]
- -o INT 	Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40]
- -P STR 	Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all]
+  -e, --ext-prob INT      Phred-scaled gap extension seq error probability [20]
+  -F, --gap-frac FLOAT    minimum fraction of gapped reads [0.002]
+  -h, --tandem-qual INT   coefficient for homopolymer errors [100]
+  -I, --skip-indels       do not perform indel calling
+  -L, --max-idepth INT    maximum per-sample depth for INDEL calling [250]
+  -m, --min-ireads INT    minimum number gapped reads for indel candidates [1]
+  -o, --open-prob INT     Phred-scaled gap open seq error probability [40]
+  -p, --per-sample-mF     apply -m and -F per-sample for increased sensitivity
+  -P, --platforms STR     comma separated list of platforms for indels [all]
 
-------
-
-**Citation**
-
-For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_
-
-If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
-
-  </help>
+**Notes**: Assuming diploid individuals.
+]]>
+    </help>
+    <configfiles>
+        <configfile name="excluded_read_groups">
+<![CDATA[
+#set pasted_data = ''
+#if str( $advanced_options.advanced_options_selector ) == "advanced":
+    #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
+        #set pasted_data = '\t'.join( str( $advanced_options.exclude_read_group['read_groups'] ).split() )
+    #end if
+#end if
+${pasted_data}
+]]>
+        </configfile>
+        <configfile name="pasted_regions">
+<![CDATA[
+#set pasted_data = ''
+#if str( $advanced_options.advanced_options_selector ) == "advanced":
+    #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
+        #set pasted_data = '\t'.join( str( $advanced_options.limit_by_region['region_paste'] ).split() )
+    #end if
+#end if
+${pasted_data}
+]]>
+        </configfile>
+    </configfiles>
+    <expand macro="citations" />
 </tool>
author	devteam
date	Tue, 21 Apr 2015 16:29:10 -0400
parents	973fea5b4bdf
children	aa0ef6f0ee89