Mercurial > repos > yufei-luo > s_mart
view SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line source
import os import random from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator from commons.core.writer.TranscriptWriter import TranscriptWriter from SMART.Java.Python.structure.Transcript import Transcript from commons.core.parsing.GffParser import GffParser class MockFindOverlaps_randomExample(object): def __init__(self, fileName, ID, numberOfReads, chromSize): self._fileName = fileName self._ID = ID self._numberOfReads = numberOfReads self._chromSize = chromSize def write(self): iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize) iMFO_RE.write() cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName) os.system(cmd) class MockFindOverlaps_randomExample_NonOrder(object): def __init__(self, fileName, ID, numberOfReads, chromSize): self._fileName = fileName self._ID = ID self._numberOfReads = numberOfReads self._chromSize = chromSize def write(self): iRRG = RandomRegionsGenerator(2) iRRG.setMinSize(36) iRRG.setMaxSize(100) iRRG.setGenomeSize(self._chromSize) iRRG.setChromosomeName("chr1") iRRG.setStrands(False) iRRG.setNumber(self._numberOfReads) iRRG.setOutputFile(self._fileName) iRRG.run() class MockFindOverlaps_randomExample_MOverlaps(object): def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize): self._refFileName = refFileName self._queryFileName = queryFileName self._overlapNumber = overlapNumber self._numberOfReads = numberOfReads self._chromSize = chromSize def createRandomExample(self): id = 'reference' iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize) iRSS.write() self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3') totalOverlap = 0 while totalOverlap != self._overlapNumber: totalOverlap = 0 i = 0 while i < 10: query = self.createRandomTranscript(i, id) overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) while overlapNumber > self._overlapNumber: query = self.createRandomTranscript(i, id) overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) totalOverlap = overlapNumber i += 1 self.queryWriter.addTranscript(query) self.queryWriter.write() self.queryWriter.close() # os.rename("%s.gff3" % (self._queryFileName), self._queryFileName) cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName) os.system(cmd) cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName) os.system(cmd) def createRandomTranscript(self, cpt, id): iRRG = RandomRegionsGenerator(2) strand = '+' chromosome = 'chr1' size = random.randint(36, 100) iRRG.setSize(size) start = random.randint(0, 1000-size) transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt) IDdetail = '%s_%d'%(id,cpt) transcript.setTagValue('ID', IDdetail) transcript.setName(IDdetail) return transcript def isOverlap(self, query, ref): if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()): return True else: return False def getIntervalFromAdress(self, fileName, address): iParser = GffParser(fileName) iParser.gotoAddress(int(address)) iTranscrit = iParser.getNextTranscript() iParser.close() return iTranscrit def getOverlapNumber(self, query, refFileName, totalOverlap): count = totalOverlap fRef = open(refFileName, 'r') address = fRef.tell() line = fRef.readline() while line != '': ref = self.getIntervalFromAdress(refFileName, address) if self.isOverlap(query, ref): count += 1 address = fRef.tell() line = fRef.readline() fRef.close() return count