| 
18
 | 
     1 
 | 
| 
 | 
     2 #!/usr/bin/env python
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile, glob
 | 
| 
 | 
     5 from commons.core.launcher.Launcher import Launcher
 | 
| 
 | 
     6 from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
 | 
| 
 | 
     7 from commons.core.utils.FileUtils import FileUtils
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 def stop_err( msg ):
 | 
| 
 | 
    10     sys.stderr.write( "%s\n" % msg )
 | 
| 
 | 
    11     sys.exit()
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 def toTar(tarFileName, accepted_hits_outputNames):
 | 
| 
 | 
    14     tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
 | 
| 
 | 
    15     currentPath = os.getcwd()
 | 
| 
 | 
    16     os.chdir(dir)
 | 
| 
 | 
    17     for file in accepted_hits_outputNames:
 | 
| 
 | 
    18         relativeFileName = os.path.basename(file)
 | 
| 
 | 
    19         tfile.add(relativeFileName)
 | 
| 
 | 
    20     os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
 | 
| 
 | 
    21     tfile.close()
 | 
| 
 | 
    22     os.chdir(currentPath)
 | 
| 
 | 
    23     
 | 
| 
 | 
    24 def splitFastQ(fileName, nbOfSeqPerBatch):
 | 
| 
 | 
    25     nbOfLinesPerFile = nbOfSeqPerBatch * 4
 | 
| 
 | 
    26     lOutput = []
 | 
| 
 | 
    27     filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
 | 
| 
 | 
    28     resDir = os.path.dirname(fileName)
 | 
| 
 | 
    29     with open(fileName) as inF:
 | 
| 
 | 
    30         fileNb = 1
 | 
| 
 | 
    31         line = inF.readline()
 | 
| 
 | 
    32         if not line or nbOfLinesPerFile == 0:
 | 
| 
 | 
    33             outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
 | 
| 
 | 
    34             lOutput.append(outFileName)
 | 
| 
 | 
    35             f = open(outFileName, "wb")
 | 
| 
 | 
    36             shutil.copyfileobj(open(fileName, "rb"), f)
 | 
| 
 | 
    37             f.close()
 | 
| 
 | 
    38         else:
 | 
| 
 | 
    39             while line:
 | 
| 
 | 
    40                 outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
 | 
| 
 | 
    41                 lOutput.append(outFileName)
 | 
| 
 | 
    42                 with open(outFileName, "w") as outF:
 | 
| 
 | 
    43                     lineNb = 1
 | 
| 
 | 
    44                     while lineNb <= nbOfLinesPerFile and line:
 | 
| 
 | 
    45                         outF.write(line)
 | 
| 
 | 
    46                         line = inF.readline()
 | 
| 
 | 
    47                         lineNb += 1
 | 
| 
 | 
    48                 fileNb += 1
 | 
| 
 | 
    49     return lOutput
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 def joinBAM(dCutOut2Out):
 | 
| 
 | 
    52     for key in dCutOut2Out.keys():
 | 
| 
 | 
    53         fh = open(key, "w") 
 | 
| 
 | 
    54         fh.close()
 | 
| 
 | 
    55         nbFile = 0
 | 
| 
 | 
    56         cmd = "samtools merge -f %s" % key
 | 
| 
 | 
    57         for fileName in dCutOut2Out[key]:
 | 
| 
 | 
    58             nbFile = nbFile + 1
 | 
| 
 | 
    59             if nbFile < 225:
 | 
| 
 | 
    60                 cmd += " %s" % fileName
 | 
| 
 | 
    61             else:
 | 
| 
 | 
    62                 nbFile = 0
 | 
| 
 | 
    63                 cmd += ";mv %s tmpBAM;" % (key)
 | 
| 
 | 
    64                 cmd += "samtools merge -f %s tmpBAM %s" %  (key, fileName)
 | 
| 
 | 
    65         proc = subprocess.Popen( args=cmd , shell=True)
 | 
| 
 | 
    66         returncode = proc.wait()
 | 
| 
 | 
    67 
 | 
| 
 | 
    68         
 | 
| 
 | 
    69 def _map(iLauncher, cmd, cmdStart, cmdFinish ):
 | 
| 
 | 
    70     lCmds = []
 | 
| 
 | 
    71     lCmds.extend(cmd)
 | 
| 
 | 
    72     lCmdStart = []
 | 
| 
 | 
    73     lCmdStart.extend(cmdStart)
 | 
| 
 | 
    74     lCmdFinish = []
 | 
| 
 | 
    75     lCmdFinish.extend(cmdFinish)
 | 
| 
 | 
    76     return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 def _createTopHatCommand(iLauncher, options, index_paths, inputFileNames, inputRevFilesNames, space):
 | 
| 
 | 
    79     lArgs = []
 | 
| 
 | 
    80     lArgs.append('-p %s %s' % ( options.num_threads, space ))
 | 
| 
 | 
    81     if options.single_paired == 'paired':
 | 
| 
 | 
    82         lArgs.append('-r %s ' % options.mate_inner_dist)
 | 
| 
 | 
    83     if options.settings == 'preSet':
 | 
| 
 | 
    84         lArgs.append(index_paths)
 | 
| 
 | 
    85         lArgs.append(inputFileNames)
 | 
| 
 | 
    86         if options.input2:
 | 
| 
 | 
    87             lArgs.append(inputRevFilesNames)
 | 
| 
 | 
    88         return iLauncher.getSystemCommand("tophat", lArgs)
 | 
| 
 | 
    89     else:
 | 
| 
 | 
    90         if int( options.min_anchor_length ) >= 3:
 | 
| 
 | 
    91             lArgs.append('-a %s ' % options.min_anchor_length)
 | 
| 
 | 
    92         else:
 | 
| 
 | 
    93             raise Exception, 'Minimum anchor length must be 3 or greater'
 | 
| 
 | 
    94         lArgs.append('-m %s ' % options.splice_mismatches)
 | 
| 
 | 
    95         lArgs.append('-i %s ' % options.min_intron_length)
 | 
| 
 | 
    96         lArgs.append('-I %s ' % options.max_intron_length)
 | 
| 
 | 
    97         if float( options.junction_filter ) != 0.0:
 | 
| 
 | 
    98             lArgs.append('-F %s ' % options.junction_filter)
 | 
| 
 | 
    99         lArgs.append('-g %s ' % options.max_multihits)
 | 
| 
 | 
   100         # Custom junctions options.
 | 
| 
 | 
   101         if options.gene_model_annotations:
 | 
| 
 | 
   102             lArgs.append('-G %s ' % options.gene_model_annotations)
 | 
| 
 | 
   103         if options.raw_juncs:
 | 
| 
 | 
   104             lArgs.append('-j %s ' % options.raw_juncs)
 | 
| 
 | 
   105         if options.no_novel_juncs:
 | 
| 
 | 
   106             lArgs.append('--no-novel-juncs ')
 | 
| 
 | 
   107         if options.library_type:
 | 
| 
 | 
   108             lArgs.append('--library-type %s ' % options.library_type)
 | 
| 
 | 
   109         if options.no_novel_indels:
 | 
| 
 | 
   110             lArgs.append('--no-novel-indels ')
 | 
| 
 | 
   111         else:
 | 
| 
 | 
   112             if options.max_insertion_length:
 | 
| 
 | 
   113                 lArgs.append('--max-insertion-length %i ' % int( options.max_insertion_length ))
 | 
| 
 | 
   114             if options.max_deletion_length:
 | 
| 
 | 
   115                 lArgs.append('--max-deletion-length %i ' % int( options.max_deletion_length ))
 | 
| 
 | 
   116             # Max options do not work for Tophat v1.2.0, despite documentation to the contrary. (Fixed in version 1.3.1)
 | 
| 
 | 
   117             # need to warn user of this fact
 | 
| 
 | 
   118             #sys.stdout.write( "Max insertion length and max deletion length options don't work in Tophat v1.2.0\n" )
 | 
| 
 | 
   119 
 | 
| 
 | 
   120         # Search type options.
 | 
| 
 | 
   121         if options.coverage_search:
 | 
| 
 | 
   122             lArgs.append('--coverage-search --min-coverage-intron %s --max-coverage-intron %s ' % ( options.min_coverage_intron, options.max_coverage_intron ))
 | 
| 
 | 
   123         else:
 | 
| 
 | 
   124             lArgs.append('--no-coverage-search ')
 | 
| 
 | 
   125         if options.closure_search:
 | 
| 
 | 
   126             lArgs.append('--closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s '  % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron )) 
 | 
| 
 | 
   127         else:
 | 
| 
 | 
   128             lArgs.append('--no-closure-search ')
 | 
| 
 | 
   129         if options.microexon_search:
 | 
| 
 | 
   130             lArgs.append('--microexon-search ')
 | 
| 
 | 
   131         if options.single_paired == 'paired':
 | 
| 
 | 
   132             lArgs.append('--mate-std-dev %s ' % options.mate_std_dev)
 | 
| 
 | 
   133         if options.initial_read_mismatches:
 | 
| 
 | 
   134             lArgs.append('--initial-read-mismatches %d ' % int( options.initial_read_mismatches ))
 | 
| 
 | 
   135         if options.seg_mismatches:
 | 
| 
 | 
   136             lArgs.append('--segment-mismatches %d ' % int( options.seg_mismatches ))
 | 
| 
 | 
   137         if options.seg_length:
 | 
| 
 | 
   138             lArgs.append('--segment-length %d ' % int( options.seg_length ))
 | 
| 
 | 
   139         if options.min_segment_intron:
 | 
| 
 | 
   140             lArgs.append('--min-segment-intron %d ' % int( options.min_segment_intron ))
 | 
| 
 | 
   141         if options.max_segment_intron:
 | 
| 
 | 
   142             lArgs.append('--max-segment-intron %d ' % int( options.max_segment_intron ))
 | 
| 
 | 
   143         lArgs.append(index_paths)
 | 
| 
 | 
   144         lArgs.append(inputFileNames)
 | 
| 
 | 
   145         if options.input2:
 | 
| 
 | 
   146             lArgs.append(inputRevFilesNames)
 | 
| 
 | 
   147         return iLauncher.getSystemCommand("tophat", lArgs)
 | 
| 
 | 
   148         
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 
 | 
| 
 | 
   151 def __main__():
 | 
| 
 | 
   152     #Parse Command Line
 | 
| 
 | 
   153     parser = optparse.OptionParser()
 | 
| 
 | 
   154     parser.add_option('-o', '--outputTxtFile', dest='outputTxtFile', help='for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.')
 | 
| 
 | 
   155     parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all accepted hits results in a tar file.' )
 | 
| 
 | 
   156     parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
 | 
| 
 | 
   157     parser.add_option( '-C', '--color-space', dest='color_space', action='store_true', help='This indicates color-space data' )
 | 
| 
 | 
   158     parser.add_option( '-J', '--junctions-output', dest='junctions_output_file', default='junctions_output.bed', help='Junctions output file; formate is BED.' )
 | 
| 
 | 
   159     parser.add_option( '-H', '--hits-output', dest='accepted_hits_output_file', default='hits_output.bam', help='Accepted hits output file; formate is BAM.' )
 | 
| 
 | 
   160     parser.add_option( '', '--own-file', dest='own_file', help='' )
 | 
| 
 | 
   161     parser.add_option( '-D', '--indexes-path', dest='index_path', help='Indexes directory; location of .ebwt and .fa files.' )
 | 
| 
 | 
   162     parser.add_option( '-r', '--mate-inner-dist', dest='mate_inner_dist', help='This is the expected (mean) inner distance between mate pairs. \
 | 
| 
 | 
   163                                                                                 For, example, for paired end runs with fragments selected at 300bp, \
 | 
| 
 | 
   164                                                                                 where each end is 50bp, you should set -r to be 200. There is no default, \
 | 
| 
 | 
   165                                                                                 and this parameter is required for paired end runs.')
 | 
| 
 | 
   166     parser.add_option( '', '--mate-std-dev', dest='mate_std_dev', help='Standard deviation of distribution on inner distances between male pairs.' )
 | 
| 
 | 
   167     parser.add_option( '-a', '--min-anchor-length', dest='min_anchor_length', 
 | 
| 
 | 
   168                         help='The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.' )
 | 
| 
 | 
   169     parser.add_option( '-m', '--splice-mismatches', dest='splice_mismatches', help='The maximum number of mismatches that can appear in the anchor region of a spliced alignment.' )
 | 
| 
 | 
   170     parser.add_option( '-i', '--min-intron-length', dest='min_intron_length', 
 | 
| 
 | 
   171                         help='The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.' )
 | 
| 
 | 
   172     parser.add_option( '-I', '--max-intron-length', dest='max_intron_length', 
 | 
| 
 | 
   173                         help='The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.' )
 | 
| 
 | 
   174     parser.add_option( '-F', '--junction_filter', dest='junction_filter', help='Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)' )
 | 
| 
 | 
   175     parser.add_option( '-g', '--max_multihits', dest='max_multihits', help='Maximum number of alignments to be allowed' )
 | 
| 
 | 
   176     parser.add_option( '', '--initial-read-mismatches', dest='initial_read_mismatches', help='Number of mismatches allowed in the initial read mapping' )
 | 
| 
 | 
   177     parser.add_option( '', '--seg-mismatches', dest='seg_mismatches', help='Number of mismatches allowed in each segment alignment for reads mapped independently' )
 | 
| 
 | 
   178     parser.add_option( '', '--seg-length', dest='seg_length', help='Minimum length of read segments' )
 | 
| 
 | 
   179     parser.add_option( '', '--library-type', dest='library_type', help='TopHat will treat the reads as strand specific. Every read alignment will have an XS attribute tag. Consider supplying library type options below to select the correct RNA-seq protocol.' )
 | 
| 
 | 
   180     parser.add_option( '', '--allow-indels', action="store_true", help='Allow indel search. Indel search is disabled by default.(Not used since version 1.3.0)' )
 | 
| 
 | 
   181     parser.add_option( '', '--max-insertion-length', dest='max_insertion_length', help='The maximum insertion length. The default is 3.' )
 | 
| 
 | 
   182     parser.add_option( '', '--max-deletion-length', dest='max_deletion_length', help='The maximum deletion length. The default is 3.' )
 | 
| 
 | 
   183 
 | 
| 
 | 
   184     # Options for supplying own junctions
 | 
| 
 | 
   185     parser.add_option( '-G', '--GTF', dest='gene_model_annotations', help='Supply TopHat with a list of gene model annotations. \
 | 
| 
 | 
   186                                                                            TopHat will use the exon records in this file to build \
 | 
| 
 | 
   187                                                                            a set of known splice junctions for each gene, and will \
 | 
| 
 | 
   188                                                                            attempt to align reads to these junctions even if they \
 | 
| 
 | 
   189                                                                            would not normally be covered by the initial mapping.')
 | 
| 
 | 
   190     parser.add_option( '-j', '--raw-juncs', dest='raw_juncs', help='Supply TopHat with a list of raw junctions. Junctions are \
 | 
| 
 | 
   191                                                                     specified one per line, in a tab-delimited format. Records \
 | 
| 
 | 
   192                                                                     look like: <chrom> <left> <right> <+/-> left and right are \
 | 
| 
 | 
   193                                                                     zero-based coordinates, and specify the last character of the \
 | 
| 
 | 
   194                                                                     left sequenced to be spliced to the first character of the right \
 | 
| 
 | 
   195                                                                     sequence, inclusive.')
 | 
| 
 | 
   196     parser.add_option( '', '--no-novel-juncs', action="store_true", dest='no_novel_juncs', help="Only look for junctions indicated in the \
 | 
| 
 | 
   197                                                                                             supplied GFF file. (ignored without -G)")
 | 
| 
 | 
   198     parser.add_option( '', '--no-novel-indels', action="store_true", dest='no_novel_indels', help="Skip indel search. Indel search is enabled by default.")
 | 
| 
 | 
   199     # Types of search.
 | 
| 
 | 
   200     parser.add_option( '', '--microexon-search', action="store_true", dest='microexon_search', help='With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.')
 | 
| 
 | 
   201     parser.add_option( '', '--closure-search', action="store_true", dest='closure_search', help='Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (<= 50bp)')
 | 
| 
 | 
   202     parser.add_option( '', '--no-closure-search', action="store_false", dest='closure_search' )
 | 
| 
 | 
   203     parser.add_option( '', '--coverage-search', action="store_true", dest='coverage_search', help='Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.')
 | 
| 
 | 
   204     parser.add_option( '', '--no-coverage-search', action="store_false", dest='coverage_search' )
 | 
| 
 | 
   205     parser.add_option( '', '--min-segment-intron', dest='min_segment_intron', help='Minimum intron length that may be found during split-segment search' )
 | 
| 
 | 
   206     parser.add_option( '', '--max-segment-intron', dest='max_segment_intron', help='Maximum intron length that may be found during split-segment search' )
 | 
| 
 | 
   207     parser.add_option( '', '--min-closure-exon', dest='min_closure_exon', help='Minimum length for exonic hops in potential splice graph' )
 | 
| 
 | 
   208     parser.add_option( '', '--min-closure-intron', dest='min_closure_intron', help='Minimum intron length that may be found during closure search' )
 | 
| 
 | 
   209     parser.add_option( '', '--max-closure-intron', dest='max_closure_intron', help='Maximum intron length that may be found during closure search' )
 | 
| 
 | 
   210     parser.add_option( '', '--min-coverage-intron', dest='min_coverage_intron', help='Minimum intron length that may be found during coverage search' )
 | 
| 
 | 
   211     parser.add_option( '', '--max-coverage-intron', dest='max_coverage_intron', help='Maximum intron length that may be found during coverage search' )
 | 
| 
 | 
   212 
 | 
| 
 | 
   213     # Wrapper options.
 | 
| 
 | 
   214     parser.add_option( '-1', '--input1', dest='input1', help='A list of the (forward or single-end) reads files of Sanger FASTQ format, txt format' )
 | 
| 
 | 
   215     parser.add_option( '-2', '--input2', dest='input2', help='The list of reverse reads file in Sanger FASTQ format' )
 | 
| 
 | 
   216     parser.add_option( '', '--single-paired', dest='single_paired', help='' )
 | 
| 
 | 
   217     parser.add_option( '', '--settings', dest='settings', help='' )
 | 
| 
 | 
   218 
 | 
| 
 | 
   219     (options, args) = parser.parse_args()
 | 
| 
 | 
   220 
 | 
| 
 | 
   221     # output version # of tool
 | 
| 
 | 
   222     try:
 | 
| 
 | 
   223         tmp_files = []
 | 
| 
 | 
   224         tmp = tempfile.NamedTemporaryFile().name
 | 
| 
 | 
   225         tmp_files.append(tmp)
 | 
| 
 | 
   226         tmp_stdout = open( tmp, 'wb' )
 | 
| 
 | 
   227         proc = subprocess.Popen( args='tophat -v', shell=True, stdout=tmp_stdout )
 | 
| 
 | 
   228         tmp_stdout.close()
 | 
| 
 | 
   229         returncode = proc.wait()
 | 
| 
 | 
   230         stdout = open( tmp_stdout.name, 'rb' ).readline().strip()
 | 
| 
 | 
   231         if stdout:
 | 
| 
 | 
   232             sys.stdout.write( '%s\n' % stdout )
 | 
| 
 | 
   233         else:
 | 
| 
 | 
   234             raise Exception
 | 
| 
 | 
   235     except:
 | 
| 
 | 
   236         sys.stdout.write( 'Could not determine Tophat version\n' )
 | 
| 
 | 
   237 
 | 
| 
 | 
   238     # Color or base space
 | 
| 
 | 
   239     space = ''
 | 
| 
 | 
   240     if options.color_space:
 | 
| 
 | 
   241         space = '-C'
 | 
| 
 | 
   242 
 | 
| 
 | 
   243 
 | 
| 
 | 
   244     #reads = options.input1
 | 
| 
 | 
   245     file = open(options.input1,"r")
 | 
| 
 | 
   246     lines = file.readlines()
 | 
| 
 | 
   247     inputFileNames = []
 | 
| 
 | 
   248     accepted_hits_outputNames = []
 | 
| 
 | 
   249     outputName = options.outputTxtFile
 | 
| 
 | 
   250     resDirName = os.path.dirname(outputName) + '/'
 | 
| 
 | 
   251     out = open(outputName, "w")
 | 
| 
 | 
   252     for line in lines:
 | 
| 
 | 
   253         tab = line.split()
 | 
| 
 | 
   254         inputFileNames.append(tab[1])
 | 
| 
 | 
   255         aHitOutName = resDirName + tab[0] + '_' + options.accepted_hits_output_file
 | 
| 
 | 
   256         accepted_hits_outputNames.append(aHitOutName) 
 | 
| 
 | 
   257         out.write(tab[0] + '\t' + aHitOutName + '\n')
 | 
| 
 | 
   258     file.close()
 | 
| 
 | 
   259     out.close()
 | 
| 
 | 
   260     
 | 
| 
 | 
   261     if options.input2:
 | 
| 
 | 
   262         revFile = open(options.input2,"r")
 | 
| 
 | 
   263         lines = revFile.readlines()
 | 
| 
 | 
   264         inputRevFileNames = []
 | 
| 
 | 
   265         for line in lines:
 | 
| 
 | 
   266             revTab = line.split()
 | 
| 
 | 
   267             inputRevFileNames.append(revTab[1])
 | 
| 
 | 
   268         revFile.close()
 | 
| 
 | 
   269 
 | 
| 
 | 
   270     
 | 
| 
 | 
   271     # Creat bowtie index if necessary.
 | 
| 
 | 
   272     tmp_index_dirs = []
 | 
| 
 | 
   273     index_paths = []
 | 
| 
 | 
   274     tmp_index_dir = tempfile.mkdtemp(dir="%s" % os.getcwd())
 | 
| 
 | 
   275     tmp_index_dirs.append(tmp_index_dir)
 | 
| 
 | 
   276     if options.own_file:
 | 
| 
 | 
   277         index_path = os.path.join( tmp_index_dir, '.'.join( os.path.split( options.own_file )[1].split( '.' )[:-1] ) )
 | 
| 
 | 
   278         index_paths.append(index_path)
 | 
| 
 | 
   279         try:
 | 
| 
 | 
   280             os.link( options.own_file, index_path + '.fa' )
 | 
| 
 | 
   281         except:
 | 
| 
 | 
   282             # Tophat prefers (but doesn't require) fasta file to be in same directory, with .fa extension
 | 
| 
 | 
   283             pass
 | 
| 
 | 
   284         lCmdsTuples =[]
 | 
| 
 | 
   285         acronym = "tophat_index"
 | 
| 
 | 
   286         jobdb = TableJobAdaptatorFactory.createJobInstance()
 | 
| 
 | 
   287         iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
 | 
| 
 | 
   288         cmd_index = iLauncher.getSystemCommand("bowtie-build", [space, "-f %s" % options.own_file, index_path])
 | 
| 
 | 
   289         cmd2Launch = []
 | 
| 
 | 
   290         cmdStart = []
 | 
| 
 | 
   291         cmdFinish = []
 | 
| 
 | 
   292         cmd2Launch.append(cmd_index)
 | 
| 
 | 
   293         lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) 
 | 
| 
 | 
   294         iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
 | 
| 
 | 
   295     else:
 | 
| 
 | 
   296         for file in inputFileNames:
 | 
| 
 | 
   297             tmp_index_dir = tempfile.mkdtemp()
 | 
| 
 | 
   298             index_path = tmp_index_dir + '/' + os.path.basename(file).split('.')[0]
 | 
| 
 | 
   299             index_paths.append(index_path)
 | 
| 
 | 
   300             tmp_index_dirs.append(tmp_index_dir)
 | 
| 
 | 
   301 
 | 
| 
 | 
   302     
 | 
| 
 | 
   303     
 | 
| 
 | 
   304     acronym = "tophat"
 | 
| 
 | 
   305     jobdb = TableJobAdaptatorFactory.createJobInstance()
 | 
| 
 | 
   306     iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
 | 
| 
 | 
   307     lCmdsTuples = []
 | 
| 
 | 
   308     dCutOut2Out = {}
 | 
| 
 | 
   309     lAllFile2remove = []
 | 
| 
 | 
   310     # for inputFileName in inputFileNames:
 | 
| 
 | 
   311     for i in range(len(inputFileNames)):
 | 
| 
 | 
   312         lCutOutput = []
 | 
| 
 | 
   313         lCutInputFile = splitFastQ(inputFileNames[i], 20000)
 | 
| 
 | 
   314         lAllFile2remove.extend(lCutInputFile)
 | 
| 
 | 
   315         if options.input2:
 | 
| 
 | 
   316             lCutPairInputFile = splitFastQ(inputRevFileNames[i], 20000)
 | 
| 
 | 
   317             lAllFile2remove.extend(lCutPairInputFile)
 | 
| 
 | 
   318         for j in range(len(lCutInputFile)):
 | 
| 
 | 
   319             cutOutput = "%s_out" %  lCutInputFile[j]
 | 
| 
 | 
   320             lCutOutput.append(cutOutput)
 | 
| 
 | 
   321             lAllFile2remove.extend(lCutOutput)
 | 
| 
 | 
   322             cmd2Launch = []
 | 
| 
 | 
   323             if options.input2: 
 | 
| 
 | 
   324                 inputRevFile = lCutPairInputFile[j]
 | 
| 
 | 
   325             else:
 | 
| 
 | 
   326                 inputRevFile = ""
 | 
| 
 | 
   327             if options.own_file:
 | 
| 
 | 
   328                 cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[0], lCutInputFile[j], inputRevFile, space))
 | 
| 
 | 
   329             else:
 | 
| 
 | 
   330                 cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[i], lCutInputFile[j], inputRevFile, space))
 | 
| 
 | 
   331             cmdStart = []
 | 
| 
 | 
   332             cmdFinish = ["shutil.copyfile( os.path.join( 'tophat_out', 'accepted_hits.bam' ), '%s')" % cutOutput]
 | 
| 
 | 
   333             lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))    
 | 
| 
 | 
   334         dCutOut2Out[accepted_hits_outputNames[i]] = lCutOutput
 | 
| 
 | 
   335     iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
 | 
| 
 | 
   336     
 | 
| 
 | 
   337     joinBAM(dCutOut2Out) 
 | 
| 
 | 
   338     FileUtils.removeFilesFromListIfExist(lAllFile2remove)   
 | 
| 
 | 
   339                  
 | 
| 
 | 
   340     if options.outputTar != None:
 | 
| 
 | 
   341         toTar(options.outputTar, accepted_hits_outputNames)
 | 
| 
 | 
   342 
 | 
| 
 | 
   343     
 | 
| 
 | 
   344     # Clean up temp dirs
 | 
| 
 | 
   345     for tmp_index_dir in tmp_index_dirs:
 | 
| 
 | 
   346         if os.path.exists( tmp_index_dir ):
 | 
| 
 | 
   347             shutil.rmtree( tmp_index_dir )
 | 
| 
 | 
   348 
 | 
| 
 | 
   349     for tmp in tmp_files:
 | 
| 
 | 
   350         os.remove(tmp)
 | 
| 
 | 
   351 
 | 
| 
 | 
   352 
 | 
| 
 | 
   353 if __name__=="__main__": __main__()
 |