view SMART/DiffExpAnal/countNumber_parallel.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#! /usr/bin/env python


import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
from optparse import OptionParser

def stop_err(msg):
	sys.stderr.write('%s\n' % msg)
	sys.exit()

def toTar(tarFileName, outCountNames):
	dir = os.path.dirname(tarFileName)
	tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
	currentPath = os.getcwd()
	os.chdir(dir)
	for file in outCountNames:
		relativeFileName = os.path.basename(file)
		tfile.add(relativeFileName)
	os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
	tfile.close()
	os.chdir(currentPath)


def __main__():
	#Parse Command Line
	parser = optparse.OptionParser()
	parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
	parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
	parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
	(options, args) = parser.parse_args()

	#Parse the input txt file and read a list of transcripts files.
	file = open(options.inputFile, "r")
	lines = file.readlines()
	inputFileNames = []
	outCountNames = []
	outputName = options.outputFile
	resDirName = os.path.dirname(outputName) + '/'

	#Write output txt file and define all output count file names
	out = open(outputName, "w")
	out.write("label\tfiles\tgroup\n")
	for line in lines:
		tab = line.split()
		inputFileNames.append(tab[1])
		outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
		outCountNames.append(outCountName)
		out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
	file.close()
	out.close()

	#Construct the lines commands
	cmds = []
	for i in range(len(inputFileNames)):
		cmd = "perl %s/SMART/DiffExpAnal/countNumber.pl " %  os.environ["REPET_PATH"]		
		opts = "%s %s " % (inputFileNames[i], outCountNames[i])
		cmd += opts
		cmds.append(cmd)

	tmp_files = []
	for i in range(len(cmds)):
		try:
			tmp_out = tempfile.NamedTemporaryFile().name
			tmp_files.append(tmp_out)
			tmp_stdout = open(tmp_out, 'wb')
			tmp_err = tempfile.NamedTemporaryFile().name
			tmp_files.append(tmp_err)
			tmp_stderr = open(tmp_err, 'wb')
			proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
			returncode = proc.wait()
			tmp_stderr.close()
			#get stderr, allowing for case where it's very large
			tmp_stderr = open(tmp_err, 'rb')
			stderr = ''
			buffsize = 1048576
			try:
				while True:
					stderr += tmp_stderr.read(buffsize)
					if not stderr or len(stderr) % buffsize != 0:
						break
			except OverflowError:
				pass
			tmp_stdout.close()
			tmp_stderr.close()
			if returncode != 0:
				raise Exception, stderr
		except Exception, e:
			stop_err('Error in :\n' + str(e))
	
	if options.outputTar != None:
		toTar(options.outputTar, outCountNames)

	for tmp_file in tmp_files:
		os.remove(tmp_file)

if __name__=="__main__":__main__()