diff uniqprimer-0.5.0/primertools/includefilemanager.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniqprimer-0.5.0/primertools/includefilemanager.py	Mon Jan 03 09:56:55 2022 +0000
@@ -0,0 +1,132 @@
+'''
+Created on Jan 1, 2011
+
+@author: John L. Herndon
+@contact: herndon@cs.colostate.edu
+@organization: Colorado State University
+@group: Computer Science Department, Asa Ben-Hur's laboratory 
+'''
+
+
+import fastaparser
+import utils
+import os
+import programs
+import nucmerparser
+import copy
+
+class IncludeFileManager( object ):
+    """
+    A class to manage include files
+    """
+    #This class needs some work. Need to come up with a way to find unique sequences between all include files....
+
+    def __init__( self ):
+        """
+        Constructor
+        """
+        self.includeFiles = [ ]
+        self.nucmer = programs.Nucmer( )
+        self.isExcludeFileInitialized = False
+        self.isReferenceFileInitialized = False
+        self.referenceFile = None
+        self.referenceSequence = None
+        self.uniqueSequences = None
+          
+    def setExcludeFile( self, excludeFileName ):
+        """
+        A function to set the exclude file that will be used when nucmer is called
+        """
+        
+        utils.logMessage( "IncludeFileManager::setExcludeFile( )", "fileName {0}".format( excludeFileName ) )
+        self.excludeFileName = excludeFileName
+        self.isExcludeFileInitialized = True
+    
+  
+    def findUniqueSequencesInFile(self, doWantFile, doNotWantFile ):
+        utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( doWantFile ) )
+        coordFile = self.nucmer.execute( [ doWantFile, doNotWantFile ] )
+        
+        matches = nucmerparser.parseCoordMatchFile( coordFile )
+        sequences = fastaparser.parseFastaFileAsPrimerSequence( doWantFile )
+        
+        for match in matches:
+            if sequences.has_key( match.seqID ):
+                primerData = sequences[ match.seqID ]
+                primerData.addMatch( match )
+            else:
+                print "Warning: id from .coords file not found in sequence data..."
+                utils.logMessage( "IncludeFileManager::processMatches( )", "WARNING - an ID was read in a Match that does not correspond to a sequence read from the fasta file!" )
+        
+        returnValue = [ ]
+    
+        for key in sequences.keys( ):
+            sequence = sequences[ key ]
+            subSequences = sequence.getNonMatchedSubSequences( )
+            returnValue.extend( subSequences )
+            
+        return returnValue
+        
+        
+    def findCommonSequencesInFile(self, want, alsoWant ):
+         utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( want ) )
+         
+         print want, alsoWant
+         coordFile = self.nucmer.execute( [ want, alsoWant ] )
+         
+         matches = nucmerparser.parseCoordMatchFile( coordFile )
+         sequences = fastaparser.parseFastaFileAsPrimerSequence( want )
+         
+         for match in matches:
+             if sequences.has_key( match.seqID ):
+                 primerData = sequences[ match.seqID ]
+                 primerData.addMatch( match )
+         
+         returnValue = [ ]
+         for key in sequences:
+             sequence = sequences[ key ]
+             subSequences = sequence.getMatchedSubSequences( )
+             returnValue.extend( subSequences )
+             
+             
+         return returnValue
+         
+ 
+    def processIncludeFile( self, includeFileName ):
+        """
+        A function that adds and processes and include file.
+        An exclude file must be set for this function to be called.
+        """
+        
+        utils.logMessage( "IncludeFileManager::processIncludeFile( )", "processing {0}".format( includeFileName ) )
+        
+        if self.isExcludeFileInitialized == False:
+            utils.logMessage( "IncludeFileManager::processIncludeFile( )", "no exclude file set".format( includeFileName ) )
+            raise utils.ModuleNotInitializedException( "includefilemanager", "no exclude file set" )
+        
+        if self.isReferenceFileInitialized == False:
+            
+            utils.logMessage( "IncludeFileManager::processIncludeFile( )", "running nucmer for reference file: {0}".format( includeFileName ) )
+            self.uniqueSequences = self.findUniqueSequencesInFile( includeFileName, self.excludeFileName )
+            
+            self.referenceFile = includeFileName
+            self.isReferenceFileInitialized = True
+            
+        else:
+            #write the unique sequences to a temp file
+            tempSequences = utils.getTemporaryDirectory( ) + "/tempSequences.fasta"
+            fastaparser.writeFastaFile( self.uniqueSequences, tempSequences )
+            self.findCommonSequencesInFile( includeFileName, tempSequences )
+            self.includeFiles.append( includeFileName )
+            
+
+    def getUniqueSequences( self ):
+        """
+        getUniqueSequences - return a dictionary of all sequences that are found in include fasta files, but not the 
+        combined exclude fasta files. The dictionary is indexed by the file ID
+        """
+        
+        return self.uniqueSequences
+        
+        
+   
\ No newline at end of file