# HG changeset patch
# User devteam
# Date 1429652269 14400
# Node ID 2b474ebbfc7d377c87ce58af6959d691b2dc9b31
# Parent 74a8d2d6025821f4d0cacb604473633d687d3daf
Uploaded
diff -r 74a8d2d60258 -r 2b474ebbfc7d macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 21 17:37:49 2015 -0400
@@ -0,0 +1,70 @@
+
+
+
+ samtools
+
+
+
+
+
+
+ @misc{SAM_def,
+ title={Definition of SAM/BAM format},
+ url = {https://samtools.github.io/hts-specs/SAMv1.pdf},}
+
+ 10.1093/bioinformatics/btp352
+ 10.1093/bioinformatics/btr076
+ 10.1093/bioinformatics/btr509
+
+ @misc{Danecek_et_al,
+ Author={Danecek, P., Schiffels, S., Durbin, R.},
+ title={Multiallelic calling model in bcftools (-m)},
+ url = {http://samtools.github.io/bcftools/call-m.pdf},}
+
+
+ @misc{Durbin_VCQC,
+ Author={Durbin, R.},
+ title={Segregation based metric for variant call QC},
+ url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
+
+
+ @misc{Li_SamMath,
+ Author={Li, H.},
+ title={Mathematical Notes on SAMtools Algorithms},
+ url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
+
+
+ @misc{SamTools_github,
+ title={SAMTools GitHub page},
+ url = {https://github.com/samtools/samtools},}
+
+
+
+
+ samtools --version | head -n 1 | awk '{ print $2 }'
+
+
+
+
+
+
+
+-----
+
+.. class:: warningmark
+
+**No options available? How to re-detect metadata**
+
+If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps:
+
+1. Click on the **pencil** icon adjacent to the dataset in the history
+2. A new menu will appear in the center pane of the interface
+3. Click **Datatype** tab
+4. Set **New Type** to **BAM**
+5. Click **Save**
+
+The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down.
+
+
+
+
diff -r 74a8d2d60258 -r 2b474ebbfc7d samtools_slice_bam.py
--- a/samtools_slice_bam.py Thu Mar 27 15:28:06 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-"""
-A wrapper script for slicing a BAM file by provided BED file using SAMTools.
-%prog input_filename.sam output_filename.bam
-"""
-#TODO: Confirm that the sort is necessary e.g. if input regions are out of order
-
-
-import sys, optparse, os, tempfile, subprocess, shutil
-
-CHUNK_SIZE = 2**20 #1mb
-
-def cleanup_before_exit( tmp_dir ):
- if tmp_dir and os.path.exists( tmp_dir ):
- shutil.rmtree( tmp_dir )
-
-def __main__():
- #Parse Command Line
- parser = optparse.OptionParser()
- (options, args) = parser.parse_args()
-
- assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam"
- input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args
-
- tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' )
-
- tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' )
- os.symlink( input_bam_filename, tmp_input_bam_filename )
- os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename )
-
- #Slice BAM
- unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' )
- unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' )
- cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename )
- proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
- return_code = proc.wait()
- if return_code:
- stderr_target = sys.stderr
- else:
- stderr_target = sys.stdout
- stderr = open( unsorted_stderr_filename )
- while True:
- chunk = stderr.read( CHUNK_SIZE )
- if chunk:
- stderr_target.write( chunk )
- else:
- break
- stderr.close()
-
- #sort sam, so indexing will not fail
- #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?)
- sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' )
- sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' )
- cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename )
- proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir )
- return_code = proc.wait()
-
- if return_code:
- stderr_target = sys.stderr
- else:
- stderr_target = sys.stdout
- stderr = open( sorted_stderr_filename )
- while True:
- chunk = stderr.read( CHUNK_SIZE )
- if chunk:
- stderr_target.write( chunk )
- else:
- break
- stderr.close()
-
- cleanup_before_exit( tmp_dir )
-
-if __name__=="__main__": __main__()
diff -r 74a8d2d60258 -r 2b474ebbfc7d samtools_slice_bam.xml
--- a/samtools_slice_bam.xml Thu Mar 27 15:28:06 2014 -0400
+++ b/samtools_slice_bam.xml Tue Apr 21 17:37:49 2015 -0400
@@ -1,40 +1,123 @@
-
- by provided regions
-
- samtools
-
- samtools_slice_bam.py
- "${input_bam}"
- "${input_bam.metadata.bam_index}"
- "${input_interval}"
- "${output_bam}"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ BAM by genomic regions
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UCSC Main**.
-------
-
-**Citation**
+This tool is based on ``samtools view`` command.
-For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
+@no-chrom-options@
-If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
-
+]]>
+
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/bam-slice-input.bam
Binary file test-data/bam-slice-input.bam has changed
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/bam-slice-test1.bam
Binary file test-data/bam-slice-test1.bam has changed
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/bam-slice-test2.bam
Binary file test-data/bam-slice-test2.bam has changed
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/bam-slice-test3.bam
Binary file test-data/bam-slice-test3.bam has changed
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/bam-slice.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bam-slice.bed Tue Apr 21 17:37:49 2015 -0400
@@ -0,0 +1,38 @@
+chrM 5 1000 myInterval 0 +
+chrM 577 647 TRNF 0 +
+chrM 648 1601 RNR1 0 +
+chrM 1602 1670 TRNV 0 +
+chrM 1671 3229 RNR2 0 +
+chrM 3230 3304 TRNL1 0 +
+chrM 3307 4262 ND1 0 +
+chrM 4263 4331 TRNI 0 +
+chrM 4329 4400 TRNQ 0 -
+chrM 4402 4469 TRNM 0 +
+chrM 4470 5511 ND2 0 +
+chrM 5512 5579 TRNW 0 +
+chrM 5587 5655 TRNA 0 -
+chrM 5657 5729 TRNN 0 -
+chrM 5761 5826 TRNC 0 -
+chrM 5826 5891 TRNY 0 -
+chrM 5904 7445 COX1 0 +
+chrM 7446 7514 TRNS1 0 -
+chrM 7518 7585 TRND 0 +
+chrM 7586 8269 COX2 0 +
+chrM 8295 8364 TRNK 0 +
+chrM 8366 8572 ATP8 0 +
+chrM 8527 9207 ATP6 0 +
+chrM 9207 9990 COX3 0 +
+chrM 9991 10058 TRNG 0 +
+chrM 10059 10404 ND3 0 +
+chrM 10405 10469 TRNR 0 +
+chrM 10470 10766 ND4L 0 +
+chrM 10760 12137 ND4 0 +
+chrM 12138 12206 TRNH 0 +
+chrM 12207 12265 TRNS2 0 +
+chrM 12266 12336 TRNL2 0 +
+chrM 12337 14148 ND5 0 +
+chrM 14149 14673 ND6 0 -
+chrM 14674 14742 TRNE 0 -
+chrM 14747 15887 CYTB 0 +
+chrM 15888 15953 TRNT 0 +
+chrM 15956 16023 TRNP 0 -
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/gatk/fake_phiX_reads_1.bam
Binary file test-data/gatk/fake_phiX_reads_1.bam has changed
diff -r 74a8d2d60258 -r 2b474ebbfc7d test-data/gatk/fake_phiX_variant_locations.bed
--- a/test-data/gatk/fake_phiX_variant_locations.bed Thu Mar 27 15:28:06 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-phiX174 1442 1443
-phiX174 1445 1446
diff -r 74a8d2d60258 -r 2b474ebbfc7d tool_dependencies.xml
--- a/tool_dependencies.xml Thu Mar 27 15:28:06 2014 -0400
+++ b/tool_dependencies.xml Tue Apr 21 17:37:49 2015 -0400
@@ -1,6 +1,6 @@
-
-
+
+