# HG changeset patch
# User bccarstens
# Date 1326771490 18000
# Node ID 2b6d577dd1abda4d838caeb9b46fcfb5e7c48d37
Uploaded
diff -r 000000000000 -r 2b6d577dd1ab barcode_splitter-bc23f6946bb8/.hg_archival.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter-bc23f6946bb8/.hg_archival.txt Mon Jan 16 22:38:10 2012 -0500
@@ -0,0 +1,5 @@
+repo: bc23f6946bb86468e0ee41aec073a208bfb010fb
+node: bc23f6946bb86468e0ee41aec073a208bfb010fb
+branch: default
+latesttag: null
+latesttagdistance: 1
diff -r 000000000000 -r 2b6d577dd1ab barcode_splitter-bc23f6946bb8/fastx_barcode_splitter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter-bc23f6946bb8/fastx_barcode_splitter.xml Mon Jan 16 22:38:10 2012 -0500
@@ -0,0 +1,88 @@
+
+
+ fastx_toolkit
+ fastx_barcode_splitter_galaxy_wrapper.py
+ ## params for galaxy wrapper
+ $output
+ "$output.id"
+ "$input.ext"
+ "$__new_file_path__"
+ --barcodes='$barcodes'
+ $BARCODE $input "$input.name" "$output.extra_files_path"
+ ## params for fastx_barcode_splitter
+ --mismatches $mismatches --partial $partial $EOL
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool splits a Solexa library (FASTQ file) or a regular FASTA file into several files, using barcodes as the split criteria.
+
+--------
+
+**Barcode file Format**
+
+Barcode files are simple text files.
+Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character.
+Example::
+
+ #This line is a comment (starts with a 'number' sign)
+ BC1 GATCT
+ BC2 ATCGT
+ BC3 GTGAT
+ BC4 TGTCT
+
+For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name).
+Sequences matching the barcode will be stored in the appropriate file.
+
+One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored.
+
+The output of this tool is an HTML file, displaying the split counts and the file locations.
+
+**Output Example**
+
+.. image:: ./static/fastx_icons/barcode_splitter_output_example.png
+
+
+
+
diff -r 000000000000 -r 2b6d577dd1ab barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_galaxy_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_galaxy_wrapper.py Mon Jan 16 22:38:10 2012 -0500
@@ -0,0 +1,91 @@
+import sys, os, os.path, tempfile, shutil, re, shlex, subprocess
+
+def stop_err( msg ):
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+# tranform fastx_barcode_splitter result to html
+def results_to_html(results_path,html_path,basepath,print_stdout ):
+ pat = '%s[/]?([^\t]*)' % basepath
+ rep = '\\1'
+ txt = open(results_path,'r')
+ html = open(html_path,'w')
+ html.write('
\n')
+ try:
+ for line in txt:
+ html.write('%s |
' % re.sub('\t','',re.sub(pat,rep,line)))
+ if print_stdout:
+ print >> sys.stdout, '\t'.join(line.split('\t')[:2])
+ except Exception, e:
+ print(str(e))
+ pass
+ html.write(' |
\n')
+ html.close()
+ txt.close()
+
+def __main__():
+ """
+ ##params for galaxy wrapper
+ $output
+ "$output.id"
+ "$input.ext"
+ "$__new_file_path__"
+ --barcodes='$barcodes'
+ $BARCODE $input "$input.name" "$output.extra_files_path"
+ ## params for fastx_barcode_splitter
+ --mismatches $mismatches --partial $partial $EOL
+ """
+ output = sys.argv[1]
+ output_id = sys.argv[2]
+ file_ext = sys.argv[3]
+ new_file_path = sys.argv[4]
+ select_barcodes = sys.argv[5].replace('--barcodes=','')
+ barcodes = sys.argv[6]
+ fastx = sys.argv[7]
+ fastx_name = sys.argv[8]
+ extra_files_path = sys.argv[9]
+ script_args = ' '.join(sys.argv[10:])
+ #Sanitize library name, make sure we can create a file with this name
+ lib_name = re.sub('\W','_',re.sub('\.\W*$','',fastx_name))+'_'
+ prefix = os.path.join(extra_files_path,lib_name)
+ # Check that input datasets exist
+ if not os.path.isfile(fastx):
+ stop_err('Error: Input file (%s) not found!' % fastx)
+ if not os.path.isfile(barcodes):
+ stop_err('Error: barcode file (%s) not found!' % barcodes)
+ try:
+ # Check that extra_files_path exists
+ if not os.path.isdir(extra_files_path):
+ os.makedirs(extra_files_path)
+ cmd_line = 'zcat -f %s | fastx_barcode_splitter.pl --bcfile %s --prefix %s --suffix %s %s' %(fastx,barcodes,prefix,'.'+file_ext,script_args)
+ # print >> sys.stderr, cmd_line
+ # Create file to collect results written to stdout
+ tmp_dir = tempfile.mkdtemp()
+ result_path = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix='results_', suffix='.out' ).name
+ result_file = open( result_path, 'wb' )
+ proc = subprocess.Popen( args=cmd_line, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE,stdout=result_file.fileno() )
+ returncode = proc.wait()
+ result_file.close()
+ stderr = proc.stderr.read()
+ if returncode != 0:
+ raise Exception, stderr
+ # copy results to ouptut
+ results_to_html(result_path,output,extra_files_path,True)
+ # make new datasets for selected barcodes
+ if select_barcodes != None and len(select_barcodes) > 0:
+ flist = os.listdir(extra_files_path)
+ for barcode in select_barcodes.split(','):
+ for fname in flist:
+ if fname.find('_'+barcode+'.'+file_ext) >= 0:
+ fpath = os.path.join(extra_files_path,fname)
+ # filename pattern required by galaxy
+ fn = "%s_%s_%s_%s_%s" % ( 'primary', output_id, barcode, 'visible', file_ext )
+ npath = os.path.join(new_file_path,fn)
+ try:
+ os.link(fpath, npath)
+ except:
+ shutil.copy2(fpath, npath)
+ except Exception, e:
+ raise Exception, 'Exception caught attempting conversion: ' + str( e )
+
+if __name__ == "__main__": __main__()
diff -r 000000000000 -r 2b6d577dd1ab barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_single.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_single.xml Mon Jan 16 22:38:10 2012 -0500
@@ -0,0 +1,63 @@
+
+
+ fastx_toolkit
+ fastx_barcode_splitter_single_galaxy_wrapper.py
+ $matched_output
+ $unmatched_output
+ "$input.ext"
+ --barcodes='$barcode'
+ $input "$input.name"
+ --mismatches $mismatches --partial $partial $EOL
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool splits a Solexa library (FASTQ file) or a regular FASTA file into two files using a barcode as the split criteria.
+
+--------
+
+A new FASTQ file will be created (with the barcode's identifier as part of the file name).
+Sequences matching the barcode will be stored in the appropriate file.
+
+An additional FASTQ file will be created (the 'unmatched' file), where sequences not matching this barcode will be stored.
+
+
+
+
diff -r 000000000000 -r 2b6d577dd1ab barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_single_galaxy_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_single_galaxy_wrapper.py Mon Jan 16 22:38:10 2012 -0500
@@ -0,0 +1,86 @@
+import sys, os, os.path, tempfile, shutil, re, shlex, subprocess
+
+def stop_err( msg ):
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+# tranform fastx_barcode_splitter result to html
+def results_to_html(results_path,html_path,basepath,print_stdout ):
+ pat = '%s[/]?([^\t]*)' % basepath
+ rep = '\\1'
+ txt = open(results_path,'r')
+ html = open(html_path,'w')
+ html.write('\n')
+ try:
+ for line in txt:
+ html.write('%s |
' % re.sub('\t','',re.sub(pat,rep,line)))
+ if print_stdout:
+ print >> sys.stdout, '\t'.join(line.split('\t')[:2])
+ except Exception, e:
+ print(str(e))
+ pass
+ html.write(' |
\n')
+ html.close()
+ txt.close()
+
+def __main__():
+ """
+ ##params for galaxy wrapper
+ $output
+ "$output.id"
+ "$input.ext"
+ "$__new_file_path__"
+ --barcodes='$barcodes'
+ $BARCODE $input "$input.name" "$output.extra_files_path"
+ ## params for fastx_barcode_splitter
+ --mismatches $mismatches --partial $partial $EOL
+ """
+
+ output = sys.argv[1]
+ output_unmatched = sys.argv[2]
+ file_ext = sys.argv[3]
+ select_barcode = sys.argv[4].replace('--barcodes=','')
+ barcodes = os.path.abspath("barcodes")
+ with open(barcodes, 'w') as f:
+ f.write("barcode\t%s\n" % (select_barcode))
+
+ #barcodes = sys.argv[6]
+ fastx = sys.argv[5]
+ fastx_name = sys.argv[6]
+ #extra_files_path = sys.argv[9]
+ script_args = ' '.join(sys.argv[7:])
+ #Sanitize library name, make sure we can create a file with this name
+ lib_name = re.sub('\W','_',re.sub('\.\W*$','',fastx_name))+'_'
+ # Check that input datasets exist
+ if not os.path.isfile(fastx):
+ stop_err('Error: Input file (%s) not found!' % fastx)
+ try:
+ prefix = lib_name
+ cmd_line = 'zcat -f %s | fastx_barcode_splitter.pl --bcfile %s --prefix %s --suffix %s %s' %(fastx,barcodes,prefix,'.'+file_ext,script_args)
+ # print >> sys.stderr, cmd_line
+ # Create file to collect results written to stdout
+ tmp_dir = tempfile.mkdtemp()
+ result_path = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix='results_', suffix='.out' ).name
+ result_file = open( result_path, 'wb' )
+ proc = subprocess.Popen( args=cmd_line, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE,stdout=result_file.fileno() )
+ returncode = proc.wait()
+ result_file.close()
+ stderr = proc.stderr.read()
+ if returncode != 0:
+ raise Exception, stderr
+ # copy results to ouptut
+ #results_to_html(result_path,output,extra_files_path,True)
+ # make new datasets for selected barcodes
+ flist = os.listdir(tmp_dir)
+ for fname in flist:
+ if fname.find('_'+barcode+'.'+file_ext) >= 0:
+ fpath = os.path.join(tmp_dir,fname)
+ shutil.copy2(fpath, output)
+ for fname in flist:
+ if fname.find('_unmatched.' + file_ext) > 0:
+ fpath = os.path.join(tmp_dir, fname)
+ shutil.copy2(fpath, output_unmatched)
+ except Exception, e:
+ raise Exception, 'Exception caught attempting conversion: ' + str( e )
+
+if __name__ == "__main__": __main__()