Mercurial > repos > brad-chapman > bam_to_fastq
changeset 0:5a9ada9a3191 default tip
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author | brad-chapman |
---|---|
date | Tue, 07 Jun 2011 16:27:36 -0400 |
parents | |
children | |
files | bam_to_fastq/bam_to_fastq-readme.txt bam_to_fastq/bam_to_fastq.xml bam_to_fastq/bam_to_fastq_wrapper.py |
diffstat | 3 files changed, 84 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq-readme.txt Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,9 @@ +Use Picard's SamToFastq program to convert BAM files to fastq. This makes it +easy to store reads in Galaxy as compressed, accessible BAM files but then +allow them to be extracted to feed into programs requiring fastq. + +Requires: + Picard (http://picard.sourceforge.net/) + The SamToFastq.jar file needs to be linked from this directory or available + in a standard directory like /usr/share/java/picard. + pysam (http://code.google.com/p/pysam/)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq.xml Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,27 @@ +<tool id="bam_to_fastq" name="BAM to fastq" force_history_refresh="True" version="0.0.1"> + <description>Convert BAM file to fastq</description> + <command interpreter="python">bam_to_fastq_wrapper.py $in_bam $out $out.id $__new_file_path__</command> + <inputs> + <param format="bam" name="in_bam" type="data" label="BAM file"/> + </inputs> + <outputs> + <data format="fastqsanger" name="out" metadata_source="in_bam"/> + </outputs> + +<help> +**What it does** + +Extract sequences and quality scores from a BAM file, converting into fastq files. + +**Input** + +A BAM alignment file. + +**Output** + +Fastq files with sequence and quality data. Output qualities are in Sanger format. +For single end data, one fastq file is produced; paired end data will have separate +fastq files for the forward and reverse reads. +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_fastq/bam_to_fastq_wrapper.py Tue Jun 07 16:27:36 2011 -0400 @@ -0,0 +1,48 @@ +"""Wrapper script providing conversion from BAM to fastq, handling paired ends. + +Requires: + Picard (http://picard.sourceforge.net/) + The SamToFastq.jar file needs to be linked from this directory or available + in a standard directory like /usr/share/java/picard. + pysam (http://code.google.com/p/pysam/) +""" +import os +import sys +import subprocess + +import pysam + +def main(in_bam, out_fastq, out_id, extra_file_dir): + out_fastq2 = check_for_paired(in_bam, out_id, extra_file_dir) + picard_jar = find_picard_jar("SamToFastq") + opts = [("INPUT", in_bam), ("FASTQ", out_fastq), + ("QUIET", "true"), ("VERBOSITY", "WARNING")] + if out_fastq2: + opts.append(("SECOND_END_FASTQ", out_fastq2)) + opts = ["%s=%s" % (x, y) for x, y in opts] + cl = ["java", "-jar", picard_jar] + opts + subprocess.check_call(cl) + +def find_picard_jar(name): + test_dirs = [os.path.dirname(__file__), "/usr/share/java/picard"] + for d in test_dirs: + f = os.path.join(d, "%s.jar" % name) + if os.path.exists(f): + return f + raise ValueError("Could not find %s in %s" % (name, test_dirs)) + +def check_for_paired(in_bam, out_id, extra_file_dir): + if is_paired(in_bam): + return os.path.join(extra_file_dir, "%s_%s_%s_%s_%s" % + ('primary', out_id, 'pair2', 'visible', 'fastqsanger')) + else: + return None + +def is_paired(in_bam): + samfile = pysam.Samfile(in_bam, "rb") + read = samfile.fetch(until_eof=True).next() + samfile.close() + return read.is_paired + +if __name__ == "__main__": + main(*sys.argv[1:])