Mercurial > repos > devteam > bam_to_sam
changeset 3:c1419fa820c6 draft
Uploaded
author | devteam |
---|---|
date | Wed, 18 Mar 2015 15:44:51 -0400 |
parents | c09a20532957 |
children | af7c50162f0b |
files | bam_to_sam.py bam_to_sam.xml macros.xml tool_dependencies.xml |
diffstat | 4 files changed, 65 insertions(+), 186 deletions(-) [+] |
line wrap: on
line diff
--- a/bam_to_sam.py Thu Mar 05 21:22:11 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -#!/usr/bin/env python -""" -Converts BAM data to sorted SAM data. -usage: bam_to_sam.py [options] - --input1: SAM file to be converted - --output1: output dataset in bam format -""" - -import optparse, os, sys, subprocess, tempfile, shutil -from galaxy import eggs -import pkg_resources; pkg_resources.require( "bx-python" ) -from bx.cookbook import doc_optparse -#from galaxy import util - -def stop_err( msg ): - sys.stderr.write( '%s\n' % msg ) - sys.exit() - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' ) - parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' ) - parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' ) - ( options, args ) = parser.parse_args() - - # output version # of tool - try: - tmp = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp, 'wb' ) - proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) - tmp_stdout.close() - returncode = proc.wait() - stdout = None - for line in open( tmp_stdout.name, 'rb' ): - if line.lower().find( 'version' ) >= 0: - stdout = line.strip() - break - if stdout: - sys.stdout.write( 'Samtools %s\n' % stdout ) - else: - raise Exception - except: - sys.stdout.write( 'Could not determine Samtools version\n' ) - - tmp_dir = tempfile.mkdtemp( dir='.' ) - - try: - # exit if input file empty - if os.path.getsize( options.input1 ) == 0: - raise Exception, 'Initial BAM file empty' - # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command - # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted - # into memory ( controlled by option -m ). - tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir ) - tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name - tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name - tmp_sorted_aligns_file.close() - command = 'samtools sort %s %s' % ( options.input1, tmp_sorted_aligns_file_base ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - # exit if sorted BAM file empty - if os.path.getsize( tmp_sorted_aligns_file_name) == 0: - raise Exception, 'Intermediate sorted BAM file empty' - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error sorting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - - - try: - # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ). - if options.header: - view_options = "-h" - else: - view_options = "" - command = 'samtools view %s -o %s %s' % ( view_options, options.output1, tmp_sorted_aligns_file_name ) - tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name - tmp_stderr = open( tmp, 'wb' ) - proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - except Exception, e: - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) ) - #clean up temp files - if os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - # check that there are results in the output file - if os.path.getsize( options.output1 ) > 0: - sys.stdout.write( 'BAM file converted to SAM' ) - else: - stop_err( 'The output file is empty, there may be an error with your input file.' ) - -if __name__=="__main__": __main__()
--- a/bam_to_sam.xml Thu Mar 05 21:22:11 2015 -0500 +++ b/bam_to_sam.xml Wed Mar 18 15:44:51 2015 -0400 @@ -1,63 +1,50 @@ -<tool id="bam_to_sam" name="BAM-to-SAM" version="1.0.4"> - <requirements> - <requirement type="package" version="0.1.19">samtools</requirement> - </requirements> - <description>converts BAM format to SAM format</description> - <command interpreter="python"> - bam_to_sam.py - --input1=$input1 - --output1=$output1 - $header - </command> - <inputs> - <param name="input1" type="data" format="bam" label="BAM File to Convert" /> - <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" /> - </inputs> - <outputs> - <data format="sam" name="output1" label="${tool.name} on ${on_string}: converted SAM" /> - </outputs> - <tests> - <test> - <!-- - Bam-to-Sam command: - samtools view -o bam_to_sam_out1.sam test-data/bam_to_sam_in1.bam - bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam - --> - <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" /> - <param name="header" value="" /> - <output name="output1" file="bam_to_sam_out1.sam" sorted="True" /> - </test> - <test> - <!-- - Bam-to-Sam command: - samtools view -o bam_to_sam_out2.sam test-data/bam_to_sam_in2.bam - bam_to_sam_in2.bam can be created from bam_to_sam_in2.sam - --> - <param name="input1" value="bam_to_sam_in2.bam" ftype="bam" /> - <param name="header" value="" /> - <output name="output1" file="bam_to_sam_out2.sam" sorted="True" /> - </test> - <test> - <!-- - Bam-to-Sam command: - samtools view -h -o bam_to_sam_out3.sam test-data/bam_to_sam_in1.bam - bam_to_sam_in1.bam can be created from bam_to_sam_in1.sam - --> - <param name="input1" value="bam_to_sam_in1.bam" ftype="bam" /> - <param name="header" value="--header" /> - <output name="output1" file="bam_to_sam_out3.sam" sorted="True" lines_diff="6" /><!-- header param not working in func tests so won't produce correct 6-line header (fine in browser) --> - </test> - </tests> - <help> +<tool id="bam_to_sam" name="BAM-to-SAM" version="1.0.6"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <expand macro="version_command"></expand> + <expand macro="stdio"></expand> + <description>converts BAM format to SAM format</description> + <command> +<![CDATA[ + samtools sort -O bam -@ \${GALAXY_SLOTS:-1} "$input1" -o sorted_input.bam -T temp && + samtools view sorted_input.bam $header -o $output1 -@ \${GALAXY_SLOTS:-1} +]]> + </command> + <inputs> + <param format="bam" label="BAM File to Convert" name="input1" type="data" /> + <param checked="False" falsevalue="" label="Include header in output" name="header" truevalue="-h" type="boolean" /> + </inputs> + <outputs> + <data format="sam" label="${tool.name} on ${on_string}: converted SAM" name="output1" /> + </outputs> + <tests> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in1.bam" /> + <param name="header" value="" /> + <output file="bam_to_sam_out1.sam" name="output1" sorted="True" /> + </test> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in2.bam" /> + <param name="header" value="" /> + <output file="bam_to_sam_out2.sam" name="output1" sorted="True" /> + </test> + <test> + <param ftype="bam" name="input1" value="bam_to_sam_in1.bam" /> + <param name="header" value="True" /> + <output file="bam_to_sam_out3.sam" name="output1" sorted="True" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file. .. _SAMTools: http://samtools.sourceforge.net/samtools.shtml - +]]> </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btp352</citation> - </citations> + <expand macro="citations"></expand> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Mar 18 15:44:51 2015 -0400 @@ -0,0 +1,21 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>samtools --version | head -n 1 | awk '{ print $2 }'</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> +</macros>
--- a/tool_dependencies.xml Thu Mar 05 21:22:11 2015 -0500 +++ b/tool_dependencies.xml Wed Mar 18 15:44:51 2015 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="samtools" version="0.1.19"> - <repository changeset_revision="95d2c4aefb5f" name="package_samtools_0_1_19" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="samtools" version="1.2"> + <repository changeset_revision="6eea04363026" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>