Mercurial > repos > scisjnu123 > test
changeset 26:81a543035990 draft
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/samtools_mpileup.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,213 @@ +<tool id="samtools_mpileup" name="MPileup" version="0.0.3"> + <description>SNP and indel caller</description> + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + </requirements> + <command interpreter="python">samtools_wrapper.py + -p 'samtools mpileup' + --stdout "${output_log}" + #if $reference_source.reference_source_selector != "history": + -p '-f "${reference_source.ref_file.fields.path}"' + #else: + -d "-f" "${reference_source.ref_file}" "fa" "reference_input" + #end if + #for $i, $input_bam in enumerate( $reference_source.input_bams ): + -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index + #end for + -p ' + #if str( $advanced_options.advanced_options_selector ) == "advanced": + ${advanced_options.skip_anomalous_read_pairs} + ${advanced_options.disable_probabilistic_realignment} + -C "${advanced_options.coefficient_for_downgrading}" + -d "${advanced_options.max_reads_per_bam}" + ${advanced_options.extended_BAQ_computation} + #if str( $advanced_options.position_list ) != 'None': + -l "${advanced_options.position_list}" + #end if + -q "${advanced_options.minimum_mapping_quality}" + -Q "${advanced_options.minimum_base_quality}" + #if str( $advanced_options.region_string ): + -r "${advanced_options.region_string}" + #end if + ${advanced_options.output_per_sample_read_depth} + ${advanced_options.output_per_sample_strand_bias_p_value} + #end if + #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation': + ##-g or -u + -g + -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" + -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" + #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': + -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" + #else: + -I + #end if + -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" + #if len( $genotype_likelihood_computation_type.platform_list_repeat ): + -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" + #end if + #end if + > "${output_mpileup}" + ' + </command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <repeat name="input_bams" title="BAM file" min="1"> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + </repeat> + <param name="ref_file" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"> + <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> + </options> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <repeat name="input_bams" title="BAM file" min="1"> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> + </param> + </repeat> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + + + <conditional name="genotype_likelihood_computation_type"> + <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> + <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> + <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> + </param> + <when value="perform_genotype_likelihood_computation"> + <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> + <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> + <conditional name="perform_indel_calling"> + <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> + <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> + <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> + </param> + <when value="perform_indel_calling"> + <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> + </when> + <when value="do_not_perform_indel_calling" /> + </conditional> + <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> + <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> + <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> + </repeat> + </when> + <when value="do_not_perform_genotype_likelihood_computation"> + <!-- Do nothing here --> + </when> + </conditional> + <conditional name="advanced_options"> + <param name="advanced_options_selector" type="select" label="Set advanced options"> + <option value="basic" selected="True">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> + <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> + <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> + <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> + <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> + <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> + <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> + <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> + <param name="region_string" type="text" value="" label="Only generate pileup in region" /> + <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> + <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> + </when> + <when value="basic" /> + </conditional> + </inputs> + <outputs> + <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> + <change_format> + <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> + </change_format> + </data> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> + <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> + <param name="advanced_options_selector" value="basic" /> + <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> + <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> + <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> + <param name="gap_extension_sequencing_error_probability" value="20" /> + <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> + <param name="perform_indel_calling_selector" value="perform_indel_calling" /> + <param name="skip_indel_calling_above_sample_depth" value="250" /> + <param name="gap_open_sequencing_error_probability" value="40" /> + <param name="platform_list_repeat" value="0" /> + <param name="advanced_options_selector" value="basic" /> + <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> + <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> + </test> + </tests> + <help> +**What it does** + + Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. + +------ + +**Settings**:: + + Input Options: + -6 Assume the quality is in the Illumina 1.3+ encoding. + -A Do not skip anomalous read pairs in variant calling. + -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. + -b FILE List of input BAM files, one file per line [null] + -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] + -d INT At a position, read maximally INT reads per input BAM. [250] + -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. + -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] + -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] + -q INT Minimum mapping quality for an alignment to be used [0] + -Q INT Minimum base quality for a base to be considered [13] + -r STR Only generate pileup in region STR [all sites] + Output Options: + + -D Output per-sample read depth + -g Compute genotype likelihoods and output them in the binary call format (BCF). + -S Output per-sample Phred-scaled strand bias P-value + -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. + + Options for Genotype Likelihood Computation (for -g or -u): + + -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] + -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] + -I Do not perform INDEL calling + -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] + -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] + -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] + +------ + +**Citation** + +For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/samtools_wrapper.py Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,110 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for running SAMTools commands. +""" + +import sys, optparse, os, tempfile, subprocess, shutil +from string import Template + +GALAXY_EXT_TO_SAMTOOLS_EXT = { 'bam_index':'bam.bai', } #items not listed here will use the galaxy extension as-is +GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE = GALAXY_EXT_TO_SAMTOOLS_EXT #for now, these are the same, but could be different if needed +DEFAULT_SAMTOOLS_PREFIX = "SAMTools_file" +CHUNK_SIZE = 2**20 #1mb + + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def SAMTOOLS_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ): + suffix = GALAXY_EXT_TO_SAMTOOLS_EXT.get( galaxy_ext, galaxy_ext ) + if prefix is None: + prefix = DEFAULT_SAMTOOLS_PREFIX + if target_dir is None: + target_dir = os.getcwd() + SAMTools_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) ) + os.symlink( galaxy_filename, SAMTools_filename ) + return SAMTools_filename + +def SAMTOOLS_filetype_argument_substitution( argument, galaxy_ext ): + return argument % dict( file_type = GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE.get( galaxy_ext, galaxy_ext ) ) + +def open_file_from_option( filename, mode = 'rb' ): + if filename: + return open( filename, mode = mode ) + return None + +def html_report_from_directory( html_out, dir ): + html_out.write( '<html>\n<head>\n<title>Galaxy - SAMTOOLS Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) + for fname in sorted( os.listdir( dir ) ): + html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) + html_out.write( '</ul>\n</body>\n</html>\n' ) + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to SAMTOOLS, without any modification.' ) + parser.add_option( '-d', '--dataset', dest='datasets', action='append', type="string", nargs=4, help='"-argument" "original_filename" "galaxy_filetype" "name_prefix"' ) + parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' ) + parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) + parser.add_option( '', '--html_report_from_directory', dest='html_report_from_directory', action='append', type="string", nargs=2, help='"Target HTML File" "Directory"') + (options, args) = parser.parse_args() + + tmp_dir = tempfile.mkdtemp( prefix='tmp-SAMTOOLS-' ) + + #set up stdout and stderr output options + stdout = open_file_from_option( options.stdout, mode = 'wb' ) + stderr = open_file_from_option( options.stderr, mode = 'wb' ) + #if no stderr file is specified, we'll use our own + if stderr is None: + stderr = tempfile.NamedTemporaryFile( prefix="SAMTOOLS-stderr-", dir=tmp_dir ) + + if options.pass_through_options: + cmd = ' '.join( options.pass_through_options ) + else: + cmd = '' + return_code = None + if options.datasets: + for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: + SAMTools_filename = SAMTOOLS_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix ) + if dataset_arg: + if '>' in cmd: + cmd = cmd.replace( '>', ' %s "%s" >' % ( SAMTOOLS_filetype_argument_substitution( dataset_arg, galaxy_ext ), SAMTools_filename ), 1 ) + else: + cmd = '%s %s "%s"' % ( cmd, SAMTOOLS_filetype_argument_substitution( dataset_arg, galaxy_ext ), SAMTools_filename ) + #auto index fasta files: + if galaxy_ext == 'fa': + index_cmd = 'samtools faidx %s' % ( SAMTools_filename ) + proc = subprocess.Popen( args=index_cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) + return_code = proc.wait() + if return_code: + break + if return_code is None or not return_code: + proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) + return_code = proc.wait() + if return_code: + stderr_target = sys.stderr + else: + if stdout: + stderr_target = stdout + else: + stderr_target = sys.stdout + stderr.flush() + stderr.seek(0) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + #generate html reports + if options.html_report_from_directory: + for ( html_filename, html_dir ) in options.html_report_from_directory: + html_report_from_directory( open( html_filename, 'wb' ), html_dir ) + + cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__()
Binary file samtools/samtools_mpileup/test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/test-data/phiX.fasta Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/test-data/samtools/mpileup/samtools_mpileup_out_1.log Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,2 @@ +[mpileup] 1 samples in 1 input files +<mpileup> Set max per-file depth to 8000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/test-data/samtools/mpileup/samtools_mpileup_out_1.pileup Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,43 @@ +phiX174 1411 A 1 ^P. $ +phiX174 1412 G 3 .^D.^F. "$$ +phiX174 1413 C 5 ...^D.^F. """$$ +phiX174 1414 G 6 .....^F. #####$ +phiX174 1415 C 7 ......^F. %%%%%%& +phiX174 1416 C 8 .......^F. $$$$$$$$ +phiX174 1417 G 9 ........^F. "#######$ +phiX174 1418 T 10 .........^F. """""""""$ +phiX174 1419 G 10 .......... """""'&'%$ +phiX174 1420 G 10 .......... """""""""" +phiX174 1421 A 10 .......... """""""""" +phiX174 1422 T 10 .......... """""""""" +phiX174 1423 G 10 .......... """""""""# +phiX174 1424 C 10 ..A.AAAAAA %""""""""" +phiX174 1425 C 10 .......... $$$""""""" +phiX174 1426 T 10 .......... #####""""" +phiX174 1427 G 10 .......... ######"""" +phiX174 1428 A 10 .......... """""""""" +phiX174 1429 C 10 .......... ((((((&("" +phiX174 1430 C 10 .......... $$$$$$$$$" +phiX174 1431 G 10 .......... ########## +phiX174 1432 T 10 .......... """""""""" +phiX174 1433 A 10 .......... ########## +phiX174 1434 C 10 .......... ((((((&(%$ +phiX174 1435 C 10 .......... $$$$$$$$$$ +phiX174 1436 G 10 .......... ########## +phiX174 1437 A 10 .......... """""""""! +phiX174 1438 G 10 .......... """""####! +phiX174 1439 G 10 .......... """""""""! +phiX174 1440 C 10 .......... """""""""! +phiX174 1441 T 10 .......... """"""""#! +phiX174 1442 A 10 .......... $$$%%%&&%! +phiX174 1443 A 10 .-1C.-1C..-1C...... """""""""! +phiX174 1444 C 10 **.*...... &%"!"""""! +phiX174 1445 C 10 .......... &%&!%%%&%! +phiX174 1446 C 10 .......... """!"""""! +phiX174 1447 T 10 .$..$....... #"#!"""""! +phiX174 1448 A 8 .$..$..... #!#%%$$! +phiX174 1449 A 6 .$.$.... !""""! +phiX174 1450 T 4 .$... """! +phiX174 1451 G 3 .$.. #"! +phiX174 1452 A 2 .$. "! +phiX174 1453 G 1 .$ !
Binary file samtools/samtools_mpileup/test-data/samtools/mpileup/samtools_mpileup_out_2.bcf has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/tool-data/fasta_indexes.loc.sample Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/tool_data_table_conf.xml.sample Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_mpileup/tool_dependencies.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.19"> + <repository changeset_revision="1ef76f8d8e52" name="package_samtools_0_1_19" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_sort/macros.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,70 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>samtools --version | head -n 1 | awk '{ print $2 }'</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@no-chrom-options@"> +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + + </token> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_sort/samtools_sort.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,40 @@ +<tool id="samtools_sort" name="Sort" version="2.0"> + <description>BAM dataset</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <expand macro="stdio"></expand> + <expand macro="version_command"></expand> + <command>samtools sort $sort_mode -@ \${GALAXY_SLOTS:-1} -o "${output1}" -O bam -T dataset "${input1}"</command> + <inputs> + <param name="input1" type="data" format="bam" label="BAM File" /> + <param name="sort_mode" type="select" label="Sort by "> + <option value="" selected="True">Chromosomal coordinates</option> + <option value="-n">Read names (-n)</option> + </param> + </inputs> + <outputs> + <data name="output1" format="bam" /> + </outputs> + <tests> + <test> + <param name="input1" value="1.bam" ftype="bam" /> + <output name="output1" file="1_sort.bam" ftype="bam" sort="True"/> + </test> + <test> + <param name="input1" value="1.bam" ftype="bam" /> + <param name="sort_mode" value="-n"/> + <output name="output1" file="1_sort_read_names.bam" ftype="bam" sort="True"/> + </test> + </tests> + <help> + +**What it does** + +This tool uses ``samtools sort`` command to sort BAM datasets in coordinate or read name order. + + + </help> + <expand macro="citations"></expand> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/samtools_sort/tool_dependencies.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="1.2"> + <repository changeset_revision="6eea04363026" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools/suite_samtools_1_2/repository_dependencies.xml Thu Oct 03 11:02:57 2019 -0400 @@ -0,0 +1,17 @@ +<?xml version="1.0"?> +<repositories description="A suite of Galaxy tools designed to work with version 1.2 of the SAMtools package."> + <repository changeset_revision="cf875cbe2df4" name="data_manager_sam_fasta_index_builder" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="af7c50162f0b" name="bam_to_sam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="d04d9f1c6791" name="sam_to_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8c3472790020" name="samtools_bedcov" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="1ebb4ecdc1ef" name="samtools_calmd" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="0072bf593791" name="samtools_flagstat" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="87398ae795c7" name="samtools_idxstats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c6fdfe3331d6" name="samtools_mpileup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="020e144b5f78" name="samtools_reheader" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="3735f950b2f5" name="samtools_rmdup" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="2b474ebbfc7d" name="samtools_slice_bam" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="a430da4f04cd" name="samtools_sort" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="57f3e32f809d" name="samtools_split" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="0d71d9467847" name="samtools_stats" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> +</repositories>
--- a/samtools_sort/macros.xml Fri Sep 27 08:55:46 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -<macros> - <xml name="requirements"> - <requirements> - <requirement type="package" version="1.2">samtools</requirement> - <yield/> - </requirements> - </xml> - <xml name="citations"> - <citations> - <citation type="bibtex"> - @misc{SAM_def, - title={Definition of SAM/BAM format}, - url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} - </citation> - <citation type="doi">10.1093/bioinformatics/btp352</citation> - <citation type="doi">10.1093/bioinformatics/btr076</citation> - <citation type="doi">10.1093/bioinformatics/btr509</citation> - <citation type="bibtex"> - @misc{Danecek_et_al, - Author={Danecek, P., Schiffels, S., Durbin, R.}, - title={Multiallelic calling model in bcftools (-m)}, - url = {http://samtools.github.io/bcftools/call-m.pdf},} - </citation> - <citation type="bibtex"> - @misc{Durbin_VCQC, - Author={Durbin, R.}, - title={Segregation based metric for variant call QC}, - url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} - </citation> - <citation type="bibtex"> - @misc{Li_SamMath, - Author={Li, H.}, - title={Mathematical Notes on SAMtools Algorithms}, - url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} - </citation> - <citation type="bibtex"> - @misc{SamTools_github, - title={SAMTools GitHub page}, - url = {https://github.com/samtools/samtools},} - </citation> - </citations> - </xml> - <xml name="version_command"> - <version_command>samtools --version | head -n 1 | awk '{ print $2 }'</version_command> - </xml> - <xml name="stdio"> - <stdio> - <exit_code range="1:" level="fatal" description="Error" /> - </stdio> - </xml> - <token name="@no-chrom-options@"> ------ - -.. class:: warningmark - -**No options available? How to re-detect metadata** - -If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: - -1. Click on the **pencil** icon adjacent to the dataset in the history -2. A new menu will appear in the center pane of the interface -3. Click **Datatype** tab -4. Set **New Type** to **BAM** -5. Click **Save** - -The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. - - </token> - -</macros>
--- a/samtools_sort/samtools_sort.xml Fri Sep 27 08:55:46 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -<tool id="samtools_sort" name="Sort" version="2.0"> - <description>BAM dataset</description> - <macros> - <import>macros.xml</import> - </macros> - <expand macro="requirements"></expand> - <expand macro="stdio"></expand> - <expand macro="version_command"></expand> - <command>samtools sort "${input1}" "${output1}"</command> - <inputs> - <param name="input1" type="data" format="bam" label="BAM File" /> - <param name="sort_mode" type="select" label="Sort by "> - <option value="" selected="True">Chromosomal coordinates</option> - <option value="-n">Read names (-n)</option> - </param> - </inputs> - <outputs> - <data name="output1" format="bam" /> - </outputs> - - <expand macro="citations"></expand> -</tool>
--- a/samtools_sort/tool_dependencies.xml Fri Sep 27 08:55:46 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="samtools" version="1.2"> - <repository changeset_revision="6eea04363026" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>