comparison uniqprimer-0.5.0/primertools/includefilemanager.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
comparison
equal deleted inserted replaced
2:7e0438dad4e9 3:3249d78ecfc2
1 '''
2 Created on Jan 1, 2011
3
4 @author: John L. Herndon
5 @contact: herndon@cs.colostate.edu
6 @organization: Colorado State University
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
8 '''
9
10
11 import fastaparser
12 import utils
13 import os
14 import programs
15 import nucmerparser
16 import copy
17
18 class IncludeFileManager( object ):
19 """
20 A class to manage include files
21 """
22 #This class needs some work. Need to come up with a way to find unique sequences between all include files....
23
24 def __init__( self ):
25 """
26 Constructor
27 """
28 self.includeFiles = [ ]
29 self.nucmer = programs.Nucmer( )
30 self.isExcludeFileInitialized = False
31 self.isReferenceFileInitialized = False
32 self.referenceFile = None
33 self.referenceSequence = None
34 self.uniqueSequences = None
35
36 def setExcludeFile( self, excludeFileName ):
37 """
38 A function to set the exclude file that will be used when nucmer is called
39 """
40
41 utils.logMessage( "IncludeFileManager::setExcludeFile( )", "fileName {0}".format( excludeFileName ) )
42 self.excludeFileName = excludeFileName
43 self.isExcludeFileInitialized = True
44
45
46 def findUniqueSequencesInFile(self, doWantFile, doNotWantFile ):
47 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( doWantFile ) )
48 coordFile = self.nucmer.execute( [ doWantFile, doNotWantFile ] )
49
50 matches = nucmerparser.parseCoordMatchFile( coordFile )
51 sequences = fastaparser.parseFastaFileAsPrimerSequence( doWantFile )
52
53 for match in matches:
54 if sequences.has_key( match.seqID ):
55 primerData = sequences[ match.seqID ]
56 primerData.addMatch( match )
57 else:
58 print "Warning: id from .coords file not found in sequence data..."
59 utils.logMessage( "IncludeFileManager::processMatches( )", "WARNING - an ID was read in a Match that does not correspond to a sequence read from the fasta file!" )
60
61 returnValue = [ ]
62
63 for key in sequences.keys( ):
64 sequence = sequences[ key ]
65 subSequences = sequence.getNonMatchedSubSequences( )
66 returnValue.extend( subSequences )
67
68 return returnValue
69
70
71 def findCommonSequencesInFile(self, want, alsoWant ):
72 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( want ) )
73
74 print want, alsoWant
75 coordFile = self.nucmer.execute( [ want, alsoWant ] )
76
77 matches = nucmerparser.parseCoordMatchFile( coordFile )
78 sequences = fastaparser.parseFastaFileAsPrimerSequence( want )
79
80 for match in matches:
81 if sequences.has_key( match.seqID ):
82 primerData = sequences[ match.seqID ]
83 primerData.addMatch( match )
84
85 returnValue = [ ]
86 for key in sequences:
87 sequence = sequences[ key ]
88 subSequences = sequence.getMatchedSubSequences( )
89 returnValue.extend( subSequences )
90
91
92 return returnValue
93
94
95 def processIncludeFile( self, includeFileName ):
96 """
97 A function that adds and processes and include file.
98 An exclude file must be set for this function to be called.
99 """
100
101 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "processing {0}".format( includeFileName ) )
102
103 if self.isExcludeFileInitialized == False:
104 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "no exclude file set".format( includeFileName ) )
105 raise utils.ModuleNotInitializedException( "includefilemanager", "no exclude file set" )
106
107 if self.isReferenceFileInitialized == False:
108
109 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "running nucmer for reference file: {0}".format( includeFileName ) )
110 self.uniqueSequences = self.findUniqueSequencesInFile( includeFileName, self.excludeFileName )
111
112 self.referenceFile = includeFileName
113 self.isReferenceFileInitialized = True
114
115 else:
116 #write the unique sequences to a temp file
117 tempSequences = utils.getTemporaryDirectory( ) + "/tempSequences.fasta"
118 fastaparser.writeFastaFile( self.uniqueSequences, tempSequences )
119 self.findCommonSequencesInFile( includeFileName, tempSequences )
120 self.includeFiles.append( includeFileName )
121
122
123 def getUniqueSequences( self ):
124 """
125 getUniqueSequences - return a dictionary of all sequences that are found in include fasta files, but not the
126 combined exclude fasta files. The dictionary is indexed by the file ID
127 """
128
129 return self.uniqueSequences
130
131
132