6
|
1 import os
|
|
2 import random
|
|
3 from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
|
|
4 from commons.core.writer.TranscriptWriter import TranscriptWriter
|
|
5 from SMART.Java.Python.structure.Transcript import Transcript
|
|
6 from commons.core.parsing.GffParser import GffParser
|
|
7
|
|
8 class MockFindOverlaps_randomExample(object):
|
|
9
|
|
10 def __init__(self, fileName, ID, numberOfReads, chromSize):
|
|
11 self._fileName = fileName
|
|
12 self._ID = ID
|
|
13 self._numberOfReads = numberOfReads
|
|
14 self._chromSize = chromSize
|
|
15
|
|
16 def write(self):
|
|
17 iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
|
|
18 iMFO_RE.write()
|
|
19 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
|
|
20 os.system(cmd)
|
|
21
|
|
22 class MockFindOverlaps_randomExample_NonOrder(object):
|
|
23
|
|
24 def __init__(self, fileName, ID, numberOfReads, chromSize):
|
|
25 self._fileName = fileName
|
|
26 self._ID = ID
|
|
27 self._numberOfReads = numberOfReads
|
|
28 self._chromSize = chromSize
|
|
29
|
|
30 def write(self):
|
|
31 iRRG = RandomRegionsGenerator(2)
|
|
32 iRRG.setMinSize(36)
|
|
33 iRRG.setMaxSize(100)
|
|
34 iRRG.setGenomeSize(self._chromSize)
|
|
35 iRRG.setChromosomeName("chr1")
|
|
36 iRRG.setStrands(False)
|
|
37 iRRG.setNumber(self._numberOfReads)
|
|
38 iRRG.setOutputFile(self._fileName)
|
|
39 iRRG.run()
|
|
40
|
|
41
|
|
42 class MockFindOverlaps_randomExample_MOverlaps(object):
|
|
43
|
|
44 def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
|
|
45 self._refFileName = refFileName
|
|
46 self._queryFileName = queryFileName
|
|
47 self._overlapNumber = overlapNumber
|
|
48 self._numberOfReads = numberOfReads
|
|
49 self._chromSize = chromSize
|
|
50
|
|
51 def createRandomExample(self):
|
|
52 id = 'reference'
|
|
53 iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
|
|
54 iRSS.write()
|
|
55 self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
|
|
56 totalOverlap = 0
|
|
57 while totalOverlap != self._overlapNumber:
|
|
58 totalOverlap = 0
|
|
59 i = 0
|
|
60 while i < 10:
|
|
61 query = self.createRandomTranscript(i, id)
|
|
62 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
|
|
63 while overlapNumber > self._overlapNumber:
|
|
64 query = self.createRandomTranscript(i, id)
|
|
65 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
|
|
66 totalOverlap = overlapNumber
|
|
67 i += 1
|
|
68 self.queryWriter.addTranscript(query)
|
|
69 self.queryWriter.write()
|
|
70 self.queryWriter.close()
|
|
71 # os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
|
|
72
|
|
73 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
|
|
74 os.system(cmd)
|
|
75 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
|
|
76 os.system(cmd)
|
|
77
|
|
78 def createRandomTranscript(self, cpt, id):
|
|
79 iRRG = RandomRegionsGenerator(2)
|
|
80 strand = '+'
|
|
81 chromosome = 'chr1'
|
|
82 size = random.randint(36, 100)
|
|
83 iRRG.setSize(size)
|
|
84 start = random.randint(0, 1000-size)
|
|
85 transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)
|
|
86 IDdetail = '%s_%d'%(id,cpt)
|
|
87 transcript.setTagValue('ID', IDdetail)
|
|
88 transcript.setName(IDdetail)
|
|
89 return transcript
|
|
90
|
|
91 def isOverlap(self, query, ref):
|
|
92 if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
|
|
93 return True
|
|
94 else:
|
|
95 return False
|
|
96
|
|
97 def getIntervalFromAdress(self, fileName, address):
|
|
98 iParser = GffParser(fileName)
|
|
99 iParser.gotoAddress(int(address))
|
|
100 iTranscrit = iParser.getNextTranscript()
|
|
101 iParser.close()
|
|
102 return iTranscrit
|
|
103
|
|
104 def getOverlapNumber(self, query, refFileName, totalOverlap):
|
|
105 count = totalOverlap
|
|
106 fRef = open(refFileName, 'r')
|
|
107 address = fRef.tell()
|
|
108 line = fRef.readline()
|
|
109 while line != '':
|
|
110 ref = self.getIntervalFromAdress(refFileName, address)
|
|
111 if self.isOverlap(query, ref):
|
|
112 count += 1
|
|
113 address = fRef.tell()
|
|
114 line = fRef.readline()
|
|
115 fRef.close()
|
|
116 return count
|
|
117
|
|
118
|