annotate barcode_splitter-bc23f6946bb8/fastx_barcode_splitter_galaxy_wrapper.py @ 0:2b6d577dd1ab default tip

Uploaded
author bccarstens
date Mon, 16 Jan 2012 22:38:10 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
1 import sys, os, os.path, tempfile, shutil, re, shlex, subprocess
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
2
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
3 def stop_err( msg ):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
4 sys.stderr.write( "%s\n" % msg )
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
5 sys.exit()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
6
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
7 # tranform fastx_barcode_splitter result to html
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
8 def results_to_html(results_path,html_path,basepath,print_stdout ):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
9 pat = '%s[/]?([^\t]*)' % basepath
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
10 rep = '<a href=\"\\1\">\\1</a>'
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
11 txt = open(results_path,'r')
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
12 html = open(html_path,'w')
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
13 html.write('<html><body><table border=1>\n')
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
14 try:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
15 for line in txt:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
16 html.write('<tr><td>%s</td></tr>' % re.sub('\t','</td><td>',re.sub(pat,rep,line)))
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
17 if print_stdout:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
18 print >> sys.stdout, '\t'.join(line.split('\t')[:2])
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
19 except Exception, e:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
20 print(str(e))
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
21 pass
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
22 html.write('</table></body></html>\n')
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
23 html.close()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
24 txt.close()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
25
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
26 def __main__():
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
27 """
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
28 ##params for galaxy wrapper
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
29 $output
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
30 "$output.id"
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
31 "$input.ext"
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
32 "$__new_file_path__"
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
33 --barcodes='$barcodes'
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
34 $BARCODE $input "$input.name" "$output.extra_files_path"
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
35 ## params for fastx_barcode_splitter
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
36 --mismatches $mismatches --partial $partial $EOL
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
37 """
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
38 output = sys.argv[1]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
39 output_id = sys.argv[2]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
40 file_ext = sys.argv[3]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
41 new_file_path = sys.argv[4]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
42 select_barcodes = sys.argv[5].replace('--barcodes=','')
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
43 barcodes = sys.argv[6]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
44 fastx = sys.argv[7]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
45 fastx_name = sys.argv[8]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
46 extra_files_path = sys.argv[9]
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
47 script_args = ' '.join(sys.argv[10:])
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
48 #Sanitize library name, make sure we can create a file with this name
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
49 lib_name = re.sub('\W','_',re.sub('\.\W*$','',fastx_name))+'_'
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
50 prefix = os.path.join(extra_files_path,lib_name)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
51 # Check that input datasets exist
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
52 if not os.path.isfile(fastx):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
53 stop_err('Error: Input file (%s) not found!' % fastx)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
54 if not os.path.isfile(barcodes):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
55 stop_err('Error: barcode file (%s) not found!' % barcodes)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
56 try:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
57 # Check that extra_files_path exists
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
58 if not os.path.isdir(extra_files_path):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
59 os.makedirs(extra_files_path)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
60 cmd_line = 'zcat -f %s | fastx_barcode_splitter.pl --bcfile %s --prefix %s --suffix %s %s' %(fastx,barcodes,prefix,'.'+file_ext,script_args)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
61 # print >> sys.stderr, cmd_line
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
62 # Create file to collect results written to stdout
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
63 tmp_dir = tempfile.mkdtemp()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
64 result_path = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix='results_', suffix='.out' ).name
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
65 result_file = open( result_path, 'wb' )
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
66 proc = subprocess.Popen( args=cmd_line, shell=True, cwd=tmp_dir, stderr=subprocess.PIPE,stdout=result_file.fileno() )
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
67 returncode = proc.wait()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
68 result_file.close()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
69 stderr = proc.stderr.read()
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
70 if returncode != 0:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
71 raise Exception, stderr
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
72 # copy results to ouptut
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
73 results_to_html(result_path,output,extra_files_path,True)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
74 # make new datasets for selected barcodes
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
75 if select_barcodes != None and len(select_barcodes) > 0:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
76 flist = os.listdir(extra_files_path)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
77 for barcode in select_barcodes.split(','):
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
78 for fname in flist:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
79 if fname.find('_'+barcode+'.'+file_ext) >= 0:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
80 fpath = os.path.join(extra_files_path,fname)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
81 # filename pattern required by galaxy
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
82 fn = "%s_%s_%s_%s_%s" % ( 'primary', output_id, barcode, 'visible', file_ext )
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
83 npath = os.path.join(new_file_path,fn)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
84 try:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
85 os.link(fpath, npath)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
86 except:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
87 shutil.copy2(fpath, npath)
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
88 except Exception, e:
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
89 raise Exception, 'Exception caught attempting conversion: ' + str( e )
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
90
2b6d577dd1ab Uploaded
bccarstens
parents:
diff changeset
91 if __name__ == "__main__": __main__()