view SMART/DiffExpAnal/countNumber_parallel_unSQL.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#! /usr/bin/env python


import optparse, os, sys, tarfile, random
from optparse import OptionParser
from commons.core.launcher.Launcher import Launcher
from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory

def stop_err(msg):
	sys.stderr.write('%s\n' % msg)
	sys.exit()

def toTar(tarFileName, outCountNames):
	dir = os.path.dirname(tarFileName)
	tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
	currentPath = os.getcwd()
	os.chdir(dir)
	for file in outCountNames:
		relativeFileName = os.path.basename(file)
		tfile.add(relativeFileName)
	os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
	tfile.close()
	os.chdir(currentPath)

def _map(iLauncher, cmd, cmdStart, cmdFinish ):
	lCmds = []
	lCmds.append(cmd)
	lCmdStart = []
	lCmdStart.append(cmdStart)
	lCmdFinish = []
	lCmdFinish.append(cmdFinish)
	return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))

def _createCountNumberCommand(iLauncher, inputFile, outputFile):
		lArgs = []
		lArgs.append("%s" % inputFile)
		lArgs.append("%s" %  outputFile)
		return iLauncher.getSystemCommand("perl %s/SMART/DiffExpAnal/countNumber.pl " %  os.environ["REPET_PATH"], lArgs)

def __main__():
	#Parse Command Line
	parser = optparse.OptionParser()
	parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
	parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
	parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
	(options, args) = parser.parse_args()

	#Parse the input txt file and read a list of transcripts files.
	file = open(options.inputFile, "r")
	lines = file.readlines()
	inputFileNames = []
	outCountNames = []
	outputName = options.outputFile
	resDirName = os.path.dirname(outputName) + '/'

	#Write output txt file and define all output count file names
	out = open(outputName, "w")
	out.write("label\tfiles\tgroup\n")
	for line in lines:
		tab = line.split()
		inputFileNames.append(tab[1])
		outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
		outCountNames.append(outCountName)
		out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
	file.close()
	out.close()
	
	#Launch on nodes
	acronym = "countNumber"
	jobdb = TableJobAdaptatorFactory.createJobInstance()
	iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
	lCmdsTuples = []
	for i in range(len(inputFileNames)):	#Construct the lines commands
		inputFile = inputFileNames[i]
		outputFile = outCountNames[i]
		cmd2Launch = _createCountNumberCommand(iLauncher, inputFile, outputFile)
		cmdStart = ""
		cmdFinish = ""
		lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))


		
	iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)


	
	if options.outputTar != None:
		toTar(options.outputTar, outCountNames)


if __name__=="__main__":__main__()