6
|
1 import unittest
|
|
2 import os
|
|
3 import time
|
|
4 from optparse import OptionParser
|
|
5 from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample
|
|
6 from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
|
|
7
|
|
8 if __name__ == '__main__':
|
|
9 description = "runRandomJobs: create random ref/query files (with size given), and run the jobs on cluster with help of runJobs.sh"
|
|
10
|
|
11 parser = OptionParser(description = description)
|
|
12 parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
|
|
13 parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
|
|
14 parser.add_option("-m", "--inputRefSize", dest="numberOfRefReads", action="store", type="int", help="The number of Reference")
|
|
15 parser.add_option("-n", "--inputQuerySize", dest="numberOfQReads", action="store", type="int", help="The number of Query")
|
|
16 parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
|
|
17 (options, args) = parser.parse_args()
|
|
18
|
|
19 outputDataName = 'timeResult.dat'
|
|
20 fTime = open(outputDataName, 'w')
|
|
21 fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')
|
|
22 chromSize = 100000
|
|
23 print 'ref size = %d, query size = %d' %(options.numberOfRefReads, options.numberOfQReads)
|
|
24 iMFOR_ref = MockFindOverlaps_randomExample(options.inputRefGff3FileName, 'ref', options.numberOfRefReads, chromSize)
|
|
25 iMFOR_ref.write()
|
|
26 cmd_ref = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputRefGff3FileName, options.inputRefGff3FileName)
|
|
27 os.system(cmd_ref)
|
|
28 iMFOR_query = MockFindOverlaps_randomExample(options.inputQueryGff3FileName,'q', options.numberOfQReads, chromSize)
|
|
29 iMFOR_query.write()
|
|
30 cmd_query = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputQueryGff3FileName, options.inputQueryGff3FileName)
|
|
31 os.system(cmd_query)
|
|
32 iFOO = FindOverlaps_optim(options.inputRefGff3FileName, options.inputQueryGff3FileName)
|
|
33 iFOO.setOutputGff3FileName(options.outputGff3FileName)
|
|
34
|
|
35 startTime_optim = time.time()
|
|
36 iFOO.run()
|
|
37 iFOO.close()
|
|
38 nbOverlap = iFOO.getNbOverlap()
|
|
39 endTime_optim = time.time()
|
|
40 cmd = 'sort -f -n -k4 -k5.4rn -k9.5 -t ";" -o %s %s' % (options.outputGff3FileName, options.outputGff3FileName)
|
|
41 os.system(cmd)
|
|
42 totalTime_optim = endTime_optim - startTime_optim
|
|
43 print 'we take %s second.' % (totalTime_optim)
|
|
44 fTime.write('%d\t%d\t%d\t%.2f\n'%(options.numberOfRefReads, options.numberOfQReads, nbOverlap, totalTime_optim))
|
|
45 iFOO.deletIntermediateFiles()
|
|
46 fTime.close()
|