Mercurial > repos > fangly > grinder
diff tools/ngs_simulation/grinder_multiple_outputs.py @ 1:7d26d64539b2
Uploaded
author | fangly |
---|---|
date | Mon, 19 Sep 2011 01:07:28 -0400 |
parents | |
children | 27a15723d4f0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ngs_simulation/grinder_multiple_outputs.py Mon Sep 19 01:07:28 2011 -0400 @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +""" +Move files create by Grinder to a location where it is going to be recognized by +Galaxy as multiple output files with the right format. See +http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple Output Files +Example: python grinder_move_outputs output_dir output_id +Author: Florent Angly +""" + +import sys, os, re + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + # Get output dir and ID + args = sys.argv + output_dir = args[1] + output_id = args[2] + + # Move Grinder files to the proper output + # Grinder filenames look like this + # grinder-ranks.txt + # grinder-reads.fa + # grinder-reads.qual + # grinder-1-ranks.txt + # grinder-1-reads.fa + # grinder-1-reads.qual + # grinder-2-ranks.txt + # grinder-2-reads.fa + # grinder-2-reads.qual + + p = re.compile(output_id) + q = re.compile('-(\d+)-') + r = re.compile('-(\w+)$') + + + for fname in os.listdir(output_dir): + + # Skip files that do not start with the output_id + source = os.path.join( output_dir, fname ) + basename, extension = os.path.splitext(fname) + if not p.match(fname): + continue + + # Assign the dataset format + if extension == '.txt': + format = 'text' + elif extension == '.fa': + format = 'fasta' + elif extension == '.fna': + format = 'fasta' + elif extension == '.faa': + format = 'fasta' + elif extension == '.fasta': + format = 'fasta' + elif extension == '.fq': + format = 'fastq' + elif extension == '.fastq': + format = 'fastq' + elif extension == '.qual': + format = 'qual' + else: + stop_err( 'Error: File %s had the unknown extension %s' % ( fname, extension ) ) + + # Assign the dataset name + name = '' + match = q.search(basename) + if match != None: + lib_num = match.group(1) + name = 'lib%s' % lib_num + + match = r.search(basename) + if match == None: + stop_err( 'Error: File with basename %s did not have a recognized name' % (basename) ) + + lib_type = match.group(1) + if format == 'qual': + lib_type = 'qual' + + name = name + '-' + lib_type + + # Move the dataset to the proper place + optional_spec = 'asdf' + destination = os.path.join( output_dir, 'primary_%s_%s_visible_%s_%s' % ( output_id, name, format, optional_spec ) ) + + print "moving %s to %s" % (source, destination) + + try: + os.rename(source, destination) + except Exception, e: + stop_err( 'Error: ' + str( e ) ) + +if __name__ == "__main__": __main__()