view SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line source

import os
import random
from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
from commons.core.writer.TranscriptWriter import TranscriptWriter
from SMART.Java.Python.structure.Transcript import Transcript
from commons.core.parsing.GffParser import GffParser

class MockFindOverlaps_randomExample(object):

    def __init__(self, fileName, ID, numberOfReads, chromSize):	
        self._fileName = fileName
        self._ID = ID
        self._numberOfReads = numberOfReads
        self._chromSize = chromSize
		
    def write(self):
        iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
        iMFO_RE.write()
        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
        os.system(cmd)
		
class MockFindOverlaps_randomExample_NonOrder(object):

	def __init__(self, fileName, ID, numberOfReads, chromSize):	
		self._fileName = fileName
		self._ID = ID
		self._numberOfReads = numberOfReads
		self._chromSize = chromSize
		
	def write(self):
		iRRG = RandomRegionsGenerator(2)
		iRRG.setMinSize(36)
		iRRG.setMaxSize(100)
		iRRG.setGenomeSize(self._chromSize)
		iRRG.setChromosomeName("chr1")
		iRRG.setStrands(False)
		iRRG.setNumber(self._numberOfReads)
		iRRG.setOutputFile(self._fileName)
		iRRG.run()
		

class MockFindOverlaps_randomExample_MOverlaps(object):
	
	def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
		self._refFileName = refFileName
		self._queryFileName = queryFileName
		self._overlapNumber = overlapNumber
		self._numberOfReads = numberOfReads
		self._chromSize = chromSize
		
	def createRandomExample(self):
		id = 'reference'
		iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
		iRSS.write()
		self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
		totalOverlap = 0
		while totalOverlap != self._overlapNumber:
			totalOverlap = 0
			i = 0
			while i < 10:
				query = self.createRandomTranscript(i, id)
				overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
				while overlapNumber > self._overlapNumber:
					query = self.createRandomTranscript(i, id)
					overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
				totalOverlap = overlapNumber
				i += 1
				self.queryWriter.addTranscript(query)
		self.queryWriter.write()
		self.queryWriter.close()
#		os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
		
		cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
		os.system(cmd)
		cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
		os.system(cmd)			
		
	def createRandomTranscript(self, cpt, id):
		iRRG = RandomRegionsGenerator(2)
		strand = '+'
		chromosome = 'chr1'
		size = random.randint(36, 100)
		iRRG.setSize(size)
		start = random.randint(0, 1000-size)
		transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)	
		IDdetail = '%s_%d'%(id,cpt)
		transcript.setTagValue('ID', IDdetail)
		transcript.setName(IDdetail)		 
		return transcript
	
	def isOverlap(self, query, ref):
		if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
			return True 
		else:
			return False	
		
	def getIntervalFromAdress(self, fileName, address):
		iParser = GffParser(fileName)
		iParser.gotoAddress(int(address))
		iTranscrit = iParser.getNextTranscript()
		iParser.close()
		return iTranscrit
	
	def getOverlapNumber(self, query, refFileName, totalOverlap):
		count = totalOverlap
		fRef = open(refFileName, 'r')
		address = fRef.tell()
		line = fRef.readline()
		while line != '':
			ref = self.getIntervalFromAdress(refFileName, address)
			if self.isOverlap(query, ref):
				count += 1
			address = fRef.tell()
			line = fRef.readline()					
		fRef.close()
		return count