view tools/ngs_simulation/grinder_multiple_outputs.py @ 2:27a15723d4f0

Uploaded
author fangly
date Tue, 04 Oct 2011 01:02:12 -0400
parents 7d26d64539b2
children
line wrap: on
line source

#!/usr/bin/env python

"""
Move files create by Grinder to a location where it is going to be recognized by
Galaxy as multiple output files with the right format. See
http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple Output Files
Example: python grinder_move_outputs output_dir output_id
Author: Florent Angly
"""

import sys, os, re

assert sys.version_info[:2] >= ( 2, 4 )

def stop_err( msg ):
    sys.stderr.write( "%s\n" % msg )
    sys.exit()

def __main__():
    # Get output dir and ID
    args = sys.argv
    output_dir = args[1]
    output_id  = args[2]

    # Move Grinder files to the proper output
    # Grinder filenames look like this
    #   grinder-ranks.txt
    #   grinder-reads.fa
    #   grinder-reads.qual
    #   grinder-1-ranks.txt
    #   grinder-1-reads.fa
    #   grinder-1-reads.qual
    #   grinder-2-ranks.txt
    #   grinder-2-reads.fa
    #   grinder-2-reads.qual

    p = re.compile(output_id)
    q = re.compile('-(\d+)-')
    r = re.compile('-(\w+)$')
    

    for fname in os.listdir(output_dir):

        # Skip files that do not start with the output_id
        source = os.path.join( output_dir, fname )
        basename, extension = os.path.splitext(fname)
        if not p.match(fname):
           continue

        # Assign the dataset format
        if extension == '.txt': 
           format = 'text'
        elif extension == '.fq':
           format = 'fastqsanger'
        elif extension == '.fastq':
           format = 'fastqsanger'
        elif extension == '.fa':
           format = 'fasta'
        elif extension == '.fna':
           format = 'fasta'
        elif extension == '.faa':
           format = 'fasta'
        elif extension == '.fasta':
           format = 'fasta'
        elif extension == '.qual':
           format = 'qual'
        else:
           stop_err( 'Error: File %s had the unknown extension %s' % ( fname, extension ) )
        
        # Assign the dataset name
        name = ''
        match = q.search(basename)
        if match != None:
          lib_num = match.group(1)
          name = 'lib%s-' % lib_num

        match = r.search(basename)
        if match == None:
          stop_err( 'Error: File with basename %s did not have a recognized name' % (basename) )
        
        lib_type = match.group(1)
        if format == 'qual':
          lib_type = 'qual'

        name = name + lib_type        

        # Move the dataset to the proper place
        #db_ref = ''
        #destination = os.path.join( output_dir, 'primary_%s_%s_visible_%s_%s' % (output_id, name, format, db_ref) )
        destination = os.path.join( output_dir, 'primary_%s_%s_visible_%s' % (output_id, name, format) )

        print "moving %s to %s" % (source, destination)

        try:
          os.rename(source, destination)
        except Exception, e:
          stop_err( 'Error: ' + str( e ) )

if __name__ == "__main__": __main__()