Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/ncList/ConvertToNCList.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 #! /usr/bin/env python | |
2 # | |
3 # Copyright INRA-URGI 2009-2012 | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 # | |
31 | |
32 import random, os, time, shutil | |
33 from optparse import OptionParser | |
34 from commons.core.parsing.ParserChooser import ParserChooser | |
35 from SMART.Java.Python.structure.Transcript import Transcript | |
36 from SMART.Java.Python.structure.Interval import Interval | |
37 from SMART.Java.Python.ncList.NCList import NCList | |
38 from SMART.Java.Python.ncList.NCListCursor import NCListCursor | |
39 from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle | |
40 from SMART.Java.Python.ncList.FileSorter import FileSorter | |
41 from SMART.Java.Python.ncList.NCListMerger import NCListMerger | |
42 from SMART.Java.Python.misc.Progress import Progress | |
43 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress | |
44 try: | |
45 import cPickle as pickle | |
46 except: | |
47 import pickle | |
48 | |
49 class ConvertToNCList(object): | |
50 | |
51 def __init__(self, verbosity = 1): | |
52 self._parsers = {} | |
53 self._sortedFileNames = {} | |
54 self._inputFileName = None | |
55 self._outputFileName = None | |
56 self._index = False | |
57 self._ncLists = {} | |
58 self._splittedFileNames = {} | |
59 self._nbElements = 0 | |
60 self._nbElementsPerChromosome = {} | |
61 self._randomNumber = random.randint(0, 10000) | |
62 self._sorted = False | |
63 self._verbosity = verbosity | |
64 | |
65 def setInputFileName(self, fileName, format): | |
66 self._inputFileName = fileName | |
67 chooser = ParserChooser(self._verbosity) | |
68 chooser.findFormat(format) | |
69 self._parser = chooser.getParser(fileName) | |
70 | |
71 def setOutputFileName(self, fileName): | |
72 self._outputFileName = fileName | |
73 fileNameNoExtension = os.path.splitext(fileName)[0] | |
74 baseName = "%s_%d" % (fileNameNoExtension, self._randomNumber) | |
75 self._directory = "%s_files" % (baseName) | |
76 if not os.path.exists(self._directory): | |
77 os.makedirs(self._directory) | |
78 self._sortedFileNames = os.path.join(self._directory, baseName) | |
79 | |
80 def setIndex(self, boolean): | |
81 self._index = boolean | |
82 | |
83 def setSorted(self, boolean): | |
84 self._sorted = boolean | |
85 | |
86 def sortFile(self): | |
87 if self._verbosity > 2: | |
88 print "%s file %s..." % ("Rewriting" if self._sorted else "Sorting", self._inputFileName) | |
89 startTime = time.time() | |
90 fs = FileSorter(self._parser, self._verbosity-4) | |
91 fs.setPresorted(self._sorted) | |
92 fs.perChromosome(True) | |
93 fs.setOutputFileName(self._sortedFileNames) | |
94 fs.sort() | |
95 self._splittedFileNames = fs.getOutputFileNames() | |
96 self._nbElementsPerChromosome = fs.getNbElementsPerChromosome() | |
97 self._nbElements = fs.getNbElements() | |
98 endTime = time.time() | |
99 if self._verbosity > 2: | |
100 print " ...done (%ds)" % (endTime - startTime) | |
101 | |
102 def createNCLists(self): | |
103 self._ncLists = {} | |
104 if self._verbosity > 2: | |
105 print "Creating NC-list for %s..." % (self._inputFileName) | |
106 startTime = time.time() | |
107 for chromosome, fileName in self._splittedFileNames.iteritems(): | |
108 if self._verbosity > 3: | |
109 print " chromosome %s" % (chromosome) | |
110 ncList = NCList(self._verbosity) | |
111 if self._index: | |
112 ncList.createIndex(True) | |
113 ncList.setChromosome(chromosome) | |
114 ncList.setFileName(fileName) | |
115 ncList.setNbElements(self._nbElementsPerChromosome[chromosome]) | |
116 ncList.buildLists() | |
117 self._ncLists[chromosome] = ncList | |
118 endTime = time.time() | |
119 if self._verbosity > 2: | |
120 print " ...done (%ds)" % (endTime - startTime) | |
121 | |
122 def writeOutputFile(self): | |
123 merger = NCListMerger(self._verbosity) | |
124 merger.setFileName(self._outputFileName) | |
125 merger.addIndex(self._index) | |
126 merger.setNCLists(self._ncLists) | |
127 merger.merge() | |
128 | |
129 def cleanFiles(self): | |
130 shutil.rmtree(self._directory) | |
131 | |
132 def run(self): | |
133 self.sortFile() | |
134 self.createNCLists() | |
135 self.writeOutputFile() | |
136 self.cleanFiles() | |
137 | |
138 def getSortedFileNames(self): | |
139 return self._splittedFileNames | |
140 | |
141 def getNbElements(self): | |
142 return self._nbElements | |
143 | |
144 def getNbElementsPerChromosome(self): | |
145 return self._nbElementsPerChromosome | |
146 | |
147 def getNCLists(self): | |
148 return self._ncLists | |
149 | |
150 def getTmpDirectory(self): | |
151 return self._directory | |
152 | |
153 | |
154 if __name__ == "__main__": | |
155 description = "Convert To NC-List v1.0.0: Convert a mapping or transcript file into a NC-List. [Category: NC-List]" | |
156 | |
157 parser = OptionParser(description = description) | |
158 parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="Query input file [compulsory] [format: file in transcript format given by -f]") | |
159 parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") | |
160 parser.add_option("-d", "--index", dest="index", action="store_true", default=False, help="create an index [default: false] [format: boolean]") | |
161 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="Output file [compulsory] [format: output file in NCList format]") | |
162 parser.add_option("-s", "--sorted", dest="sorted", action="store_true", default=False, help="input file is already sorted [format: boolean] [default: False]") | |
163 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="Trace level [format: int] [default: 1]") | |
164 (options, args) = parser.parse_args() | |
165 | |
166 ctncl = ConvertToNCList(options.verbosity) | |
167 ctncl.setInputFileName(options.inputFileName, options.format) | |
168 ctncl.setOutputFileName(options.outputFileName) | |
169 ctncl.setIndex(options.index) | |
170 ctncl.setSorted(options.sorted) | |
171 ctncl.run() | |
172 |