# HG changeset patch
# User devteam
# Date 1377541416 14400
# Node ID 44a18a94d7a9ba97e0872ad9f90b7e8f96e005d9
Uploaded tool tarball.
diff -r 000000000000 -r 44a18a94d7a9 samtools_mpileup.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_mpileup.xml Mon Aug 26 14:23:36 2013 -0400
@@ -0,0 +1,213 @@
+
+ SNP and indel caller
+
+ samtools
+
+ samtools_wrapper.py
+ -p 'samtools mpileup'
+ --stdout "${output_log}"
+ #if $reference_source.reference_source_selector != "history":
+ -p '-f "${reference_source.ref_file.fields.path}"'
+ #else:
+ -d "-f" "${reference_source.ref_file}" "fa" "reference_input"
+ #end if
+ #for $i, $input_bam in enumerate( $reference_source.input_bams ):
+ -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}"
+ -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
+ #end for
+ -p '
+ #if str( $advanced_options.advanced_options_selector ) == "advanced":
+ ${advanced_options.skip_anomalous_read_pairs}
+ ${advanced_options.disable_probabilistic_realignment}
+ -C "${advanced_options.coefficient_for_downgrading}"
+ -d "${advanced_options.max_reads_per_bam}"
+ ${advanced_options.extended_BAQ_computation}
+ #if str( $advanced_options.position_list ) != 'None':
+ -l "${advanced_options.position_list}"
+ #end if
+ -q "${advanced_options.minimum_mapping_quality}"
+ -Q "${advanced_options.minimum_base_quality}"
+ #if str( $advanced_options.region_string ):
+ -r "${advanced_options.region_string}"
+ #end if
+ ${advanced_options.output_per_sample_read_depth}
+ ${advanced_options.output_per_sample_strand_bias_p_value}
+ #end if
+ #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
+ ##-g or -u
+ -g
+ -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}"
+ -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}"
+ #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
+ -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
+ #else:
+ -I
+ #end if
+ -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}"
+ #if len( $genotype_likelihood_computation_type.platform_list_repeat ):
+ -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }"
+ #end if
+ #end if
+ > "${output_mpileup}"
+ '
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+ Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
+
+------
+
+**Settings**::
+
+ Input Options:
+ -6 Assume the quality is in the Illumina 1.3+ encoding.
+ -A Do not skip anomalous read pairs in variant calling.
+ -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
+ -b FILE List of input BAM files, one file per line [null]
+ -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0]
+ -d INT At a position, read maximally INT reads per input BAM. [250]
+ -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit.
+ -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null]
+ -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
+ -q INT Minimum mapping quality for an alignment to be used [0]
+ -Q INT Minimum base quality for a base to be considered [13]
+ -r STR Only generate pileup in region STR [all sites]
+ Output Options:
+
+ -D Output per-sample read depth
+ -g Compute genotype likelihoods and output them in the binary call format (BCF).
+ -S Output per-sample Phred-scaled strand bias P-value
+ -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping.
+
+ Options for Genotype Likelihood Computation (for -g or -u):
+
+ -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20]
+ -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100]
+ -I Do not perform INDEL calling
+ -L INT Skip INDEL calling if the average per-sample depth is above INT. [250]
+ -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40]
+ -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all]
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
+
+If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
+
+
+
diff -r 000000000000 -r 44a18a94d7a9 samtools_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_wrapper.py Mon Aug 26 14:23:36 2013 -0400
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+
+"""
+A wrapper script for running SAMTools commands.
+"""
+
+import sys, optparse, os, tempfile, subprocess, shutil
+from string import Template
+
+GALAXY_EXT_TO_SAMTOOLS_EXT = { 'bam_index':'bam.bai', } #items not listed here will use the galaxy extension as-is
+GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE = GALAXY_EXT_TO_SAMTOOLS_EXT #for now, these are the same, but could be different if needed
+DEFAULT_SAMTOOLS_PREFIX = "SAMTools_file"
+CHUNK_SIZE = 2**20 #1mb
+
+
+def cleanup_before_exit( tmp_dir ):
+ if tmp_dir and os.path.exists( tmp_dir ):
+ shutil.rmtree( tmp_dir )
+
+def SAMTOOLS_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ):
+ suffix = GALAXY_EXT_TO_SAMTOOLS_EXT.get( galaxy_ext, galaxy_ext )
+ if prefix is None:
+ prefix = DEFAULT_SAMTOOLS_PREFIX
+ if target_dir is None:
+ target_dir = os.getcwd()
+ SAMTools_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) )
+ os.symlink( galaxy_filename, SAMTools_filename )
+ return SAMTools_filename
+
+def SAMTOOLS_filetype_argument_substitution( argument, galaxy_ext ):
+ return argument % dict( file_type = GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE.get( galaxy_ext, galaxy_ext ) )
+
+def open_file_from_option( filename, mode = 'rb' ):
+ if filename:
+ return open( filename, mode = mode )
+ return None
+
+def html_report_from_directory( html_out, dir ):
+ html_out.write( '\n
\nGalaxy - SAMTOOLS Output\n\n\n\n
\n' )
+ for fname in sorted( os.listdir( dir ) ):
+ html_out.write( '