diff smart_toolShed/SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,118 @@
+import os
+import random
+from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.parsing.GffParser import GffParser
+
+class MockFindOverlaps_randomExample(object):
+
+    def __init__(self, fileName, ID, numberOfReads, chromSize):	
+        self._fileName = fileName
+        self._ID = ID
+        self._numberOfReads = numberOfReads
+        self._chromSize = chromSize
+		
+    def write(self):
+        iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
+        iMFO_RE.write()
+        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
+        os.system(cmd)
+		
+class MockFindOverlaps_randomExample_NonOrder(object):
+
+	def __init__(self, fileName, ID, numberOfReads, chromSize):	
+		self._fileName = fileName
+		self._ID = ID
+		self._numberOfReads = numberOfReads
+		self._chromSize = chromSize
+		
+	def write(self):
+		iRRG = RandomRegionsGenerator(2)
+		iRRG.setMinSize(36)
+		iRRG.setMaxSize(100)
+		iRRG.setGenomeSize(self._chromSize)
+		iRRG.setChromosomeName("chr1")
+		iRRG.setStrands(False)
+		iRRG.setNumber(self._numberOfReads)
+		iRRG.setOutputFile(self._fileName)
+		iRRG.run()
+		
+
+class MockFindOverlaps_randomExample_MOverlaps(object):
+	
+	def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
+		self._refFileName = refFileName
+		self._queryFileName = queryFileName
+		self._overlapNumber = overlapNumber
+		self._numberOfReads = numberOfReads
+		self._chromSize = chromSize
+		
+	def createRandomExample(self):
+		id = 'reference'
+		iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
+		iRSS.write()
+		self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
+		totalOverlap = 0
+		while totalOverlap != self._overlapNumber:
+			totalOverlap = 0
+			i = 0
+			while i < 10:
+				query = self.createRandomTranscript(i, id)
+				overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
+				while overlapNumber > self._overlapNumber:
+					query = self.createRandomTranscript(i, id)
+					overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
+				totalOverlap = overlapNumber
+				i += 1
+				self.queryWriter.addTranscript(query)
+		self.queryWriter.write()
+		self.queryWriter.close()
+#		os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
+		
+		cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
+		os.system(cmd)
+		cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
+		os.system(cmd)			
+		
+	def createRandomTranscript(self, cpt, id):
+		iRRG = RandomRegionsGenerator(2)
+		strand = '+'
+		chromosome = 'chr1'
+		size = random.randint(36, 100)
+		iRRG.setSize(size)
+		start = random.randint(0, 1000-size)
+		transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)	
+		IDdetail = '%s_%d'%(id,cpt)
+		transcript.setTagValue('ID', IDdetail)
+		transcript.setName(IDdetail)		 
+		return transcript
+	
+	def isOverlap(self, query, ref):
+		if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
+			return True 
+		else:
+			return False	
+		
+	def getIntervalFromAdress(self, fileName, address):
+		iParser = GffParser(fileName)
+		iParser.gotoAddress(int(address))
+		iTranscrit = iParser.getNextTranscript()
+		iParser.close()
+		return iTranscrit
+	
+	def getOverlapNumber(self, query, refFileName, totalOverlap):
+		count = totalOverlap
+		fRef = open(refFileName, 'r')
+		address = fRef.tell()
+		line = fRef.readline()
+		while line != '':
+			ref = self.getIntervalFromAdress(refFileName, address)
+			if self.isOverlap(query, ref):
+				count += 1
+			address = fRef.tell()
+			line = fRef.readline()					
+		fRef.close()
+		return count
+	
+