Mercurial > repos > yufei-luo > s_mart
changeset 32:3441fe98a2ba
Deleted selected files
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:34:10 -0400 |
parents | 0ab839023fe4 |
children | aa0420172fc6 |
files | commons/launcher/BlatClusterLauncher.py commons/launcher/BlatProgramLauncher.py commons/launcher/LaunchBlastclust.py commons/launcher/LaunchLastZ.py commons/launcher/LaunchMCL.py commons/launcher/LaunchMap.py commons/launcher/LaunchMatcher.py commons/launcher/LaunchMummerPlot.py commons/launcher/LaunchNucmer.py commons/launcher/LaunchPhyML.py commons/launcher/LaunchPromer.py commons/launcher/LaunchRefAlign.py commons/launcher/LaunchRefalign_old.py commons/launcher/LaunchRepeatMasker.py commons/launcher/LaunchTRF.py commons/launcher/LaunchTallymer.py commons/launcher/MafftClusterLauncher.py commons/launcher/MafftProgramLauncher.py commons/launcher/MapClusterLauncher.py commons/launcher/MapProgramLauncher.py commons/launcher/NWalignProgramLauncher.py commons/launcher/RepeatMaskerClusterLauncher.py commons/launcher/RepeatMaskerProgramLauncher.py commons/launcher/YassClusterLauncher.py commons/launcher/YassProgramLauncher.py commons/launcher/__init__.py commons/launcher/launchBlasterMatcherPerQuery.py commons/launcher/launchMafft.py commons/launcher/launchMreps.py commons/launcher/launchPhyML.py commons/launcher/launchPrank.py commons/launcher/launchTCoffee.py commons/launcher/launchTEclass.py commons/launcher/tests/MockDataBankForBlat.py commons/launcher/tests/MockESTBankForBlat.py commons/launcher/tests/MockOutputForBlat.py commons/launcher/tests/Test_BlatClusterLauncher.py commons/launcher/tests/Test_BlatProgramLauncher.py commons/launcher/tests/Test_F_BlatProgramLauncher.py commons/launcher/tests/Test_F_LaunchBlastclust.py commons/launcher/tests/Test_F_LaunchLastZ.py commons/launcher/tests/Test_F_LaunchMCL.py commons/launcher/tests/Test_F_LaunchMap.py commons/launcher/tests/Test_F_LaunchMatcher.py commons/launcher/tests/Test_F_LaunchMummerPlot.py commons/launcher/tests/Test_F_LaunchNucmer.py commons/launcher/tests/Test_F_LaunchPhyML.py commons/launcher/tests/Test_F_LaunchPromer.py commons/launcher/tests/Test_F_LaunchRefAlign.py commons/launcher/tests/Test_F_LaunchRefalign.py commons/launcher/tests/Test_F_LaunchRepeatMasker.py commons/launcher/tests/Test_F_LaunchTRF.py commons/launcher/tests/Test_F_LaunchTallymer.py commons/launcher/tests/Test_LaunchBlastclust.py commons/launcher/tests/Test_LaunchTallymer.py commons/launcher/tests/Test_MafftClusterLauncher.py commons/launcher/tests/Test_MafftProgramLauncher.py commons/launcher/tests/Test_MapClusterLauncher.py commons/launcher/tests/Test_MapProgramLauncher.py commons/launcher/tests/Test_NWalignProgramLauncher.py commons/launcher/tests/Test_RepeatMaskerClusterLauncher.py commons/launcher/tests/Test_RepeatMaskerProgramLauncher.py commons/launcher/tests/Test_YassClusterLauncher.py commons/launcher/tests/Test_YassProgramLauncher.py commons/launcher/tests/Test_launchTEclass.py commons/launcher/tests/__init__.py commons/pyRepetUnit/__init__.py commons/pyRepetUnit/align/AlignList.py commons/pyRepetUnit/align/AlignListUtils.py commons/pyRepetUnit/align/__init__.py commons/pyRepetUnit/align/hmmOutputParsing/HmmpfamOutput2align.py commons/pyRepetUnit/align/hmmOutputParsing/HmmscanOutput2align.py commons/pyRepetUnit/align/hmmOutputParsing/__init__.py commons/pyRepetUnit/align/hmmOutputParsing/tests/HmmpfamOutput2AlignTestSuite.py commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_Hmmerpfam2align.py commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmpfamOutput2align.py commons/pyRepetUnit/align/hmmOutputParsing/tests/Test_F_HmmscanOutput2align.py commons/pyRepetUnit/align/hmmOutputParsing/tests/__init__.py commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/ConsensusTestFile_nt.fsa commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/OutputHmmpfamTest commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/PostPostProcessTestFiltered.align commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/hmmscanTransformedExpected.align commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/repetHmmscan.fa commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output commons/pyRepetUnit/align/hmmOutputParsing/tests/datas/test_hmmpfam_output.align commons/pyRepetUnit/align/tests/Test_AlignListUtils.py commons/pyRepetUnit/align/tests/__init__.py commons/pyRepetUnit/align/tests/hmmpfamOutputParsingTestSuite.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/TransformAACoordIntoNtCoordInAlignFormat.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/__init__.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_F_TransformAACoordIntoNtCoordAndScoreFiltering.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/Test_TransformAACoordIntoNtCoordInAlignFormat.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/TransformAACoordIntoNtCoordTestSuite.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/__init__.py commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/ConsensusTestFile_nt.fsa commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/OutputHmmpfamTest.align commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTest.align commons/pyRepetUnit/align/transformAACoordIntoNtCoord/tests/datas/PostPostProcessTestFiltered.align commons/pyRepetUnit/blastnForClassifierStep1/RepbaseBLRnForClassifierStep1.py commons/pyRepetUnit/blastnForClassifierStep1/__init__.py commons/pyRepetUnit/blastnForClassifierStep1/tests/Test_RepbaseBLRnForClassifierStep1.py commons/pyRepetUnit/blastnForClassifierStep1/tests/__init__.py commons/pyRepetUnit/components/AbstractClusterLauncher.py commons/pyRepetUnit/components/AbstractProgramLauncher.py commons/pyRepetUnit/components/IClusterLauncher.py commons/pyRepetUnit/components/__init__.py commons/pyRepetUnit/components/blastx2GFF/__init__.py commons/pyRepetUnit/components/blastx2GFF/tests/__init__.py commons/pyRepetUnit/components/blastx2GFF/tests/blastx2GFFTestSuite.py commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.path commons/pyRepetUnit/components/blastx2GFF/tests/datas/dummy.align.match.tab commons/pyRepetUnit/components/blastx2GFF/tests/datas/exp_dummy.gff commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.map commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.path commons/pyRepetUnit/components/blastx2GFF/tests/datas/lm_supctg_v2_29_30_vs_BlastBank.align.match.tab commons/pyRepetUnit/convCoord/ConvMapChr2Chunk.py commons/pyRepetUnit/convCoord/ConvPathChr2Chunk.py commons/pyRepetUnit/convCoord/ConvSetChr2Chunk.py commons/pyRepetUnit/convCoord/PathChunkConnector.py commons/pyRepetUnit/convCoord/__init__.py commons/pyRepetUnit/convCoord/test/TestConvCoordWithOverlapps.py commons/pyRepetUnit/convCoord/test/Test_ConvMapChr2Chunk.py commons/pyRepetUnit/convCoord/test/Test_ConvPathChr2Chunk.py commons/pyRepetUnit/convCoord/test/Test_ConvSetChr2Chunk.py commons/pyRepetUnit/convCoord/test/Test_PathChunkConnector.py commons/pyRepetUnit/convCoord/test/__init__.py commons/pyRepetUnit/convCoord/test/convCoordTestSuite.py commons/pyRepetUnit/doc/__init__.py commons/pyRepetUnit/doc/api-objects.txt commons/pyRepetUnit/doc/class-tree.html commons/pyRepetUnit/doc/commons.Checker-module.html commons/pyRepetUnit/doc/commons.Checker-pysrc.html commons/pyRepetUnit/doc/commons.Checker.Checker-class.html commons/pyRepetUnit/doc/commons.Checker.CheckerException-class.html commons/pyRepetUnit/doc/commons.Checker.ConfigChecker-class.html commons/pyRepetUnit/doc/commons.Checker.ConfigException-class.html commons/pyRepetUnit/doc/commons.Checker.IChecker-class.html commons/pyRepetUnit/doc/commons.Checker._Logger-class.html commons/pyRepetUnit/doc/commons.IComponentWrapper-module.html commons/pyRepetUnit/doc/commons.IComponentWrapper-pysrc.html commons/pyRepetUnit/doc/commons.IComponentWrapper.IComponentWrapper-class.html commons/pyRepetUnit/doc/commons.IDataProcessor-module.html commons/pyRepetUnit/doc/commons.IDataProcessor-pysrc.html commons/pyRepetUnit/doc/commons.IDataProcessor.IDataProcessor-class.html commons/pyRepetUnit/doc/crarr.png commons/pyRepetUnit/doc/epydoc.css commons/pyRepetUnit/doc/epydoc.js commons/pyRepetUnit/doc/frames.html commons/pyRepetUnit/doc/help.html commons/pyRepetUnit/doc/identifier-index.html commons/pyRepetUnit/doc/index.html commons/pyRepetUnit/doc/module-tree.html commons/pyRepetUnit/doc/redirect.html commons/pyRepetUnit/doc/toc-commons.Checker-module.html commons/pyRepetUnit/doc/toc-commons.IComponentWrapper-module.html commons/pyRepetUnit/doc/toc-commons.IDataProcessor-module.html commons/pyRepetUnit/doc/toc-everything.html commons/pyRepetUnit/doc/toc.html commons/pyRepetUnit/fastaTranslation/__init__.py commons/pyRepetUnit/fastaTranslation/allFrames/TranslateInAllFramesAndReplaceStopByX.py commons/pyRepetUnit/fastaTranslation/allFrames/__init__.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_F_TranslateInAllFramesAndReplaceStopByX.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateAfastaFileInAllFrameAndReplaceStopsByX.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/Test_TranslateInAllFramesAndReplaceStopByX.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/__init__.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_aaWithoutStop.fsa commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/ConsensusTestFile_nt.fsa commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/__init__.py commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_aa.fa commons/pyRepetUnit/fastaTranslation/allFrames/tests/datas/test_input_nt.fa commons/pyRepetUnit/fastaTranslation/allFrames/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py commons/pyRepetUnit/hmmer/HmmpfamClusterComponent.py commons/pyRepetUnit/hmmer/LaunchPreProcessHmmpfamPostProcessNotInParallel.py commons/pyRepetUnit/hmmer/__init__.py commons/pyRepetUnit/hmmer/check/OldDetectFeatureConfigChecker.py commons/pyRepetUnit/hmmer/check/__init__.py commons/pyRepetUnit/hmmer/check/test/Test_OldDetectFeaturesConfigChecker.py commons/pyRepetUnit/hmmer/check/test/__init__.py commons/pyRepetUnit/hmmer/check/test/detectFeatureConfigCheckerTestSuite.py commons/pyRepetUnit/hmmer/hmmOutput/HmmOutput.py commons/pyRepetUnit/hmmer/hmmOutput/HmmOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/HmmscanOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/__init__.py commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmpfamOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/tests/TestHmmscanOutputProcessing.py commons/pyRepetUnit/hmmer/hmmOutput/tests/Test_HmmOutput.py commons/pyRepetUnit/hmmer/hmmOutput/tests/__init__.py commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/Outputhmmpfam commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutput.align commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/hmmscanOutputTab.txt commons/pyRepetUnit/hmmer/hmmOutput/tests/datas/test_hmmpfam_output commons/pyRepetUnit/hmmer/hmmOutput/tests/hmmOutputTestSuite.py commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/__init__.py commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/Test_ProfilesSearch.py commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/__init__.py commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/ConsensusFile_test.fa commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_aa.fa commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/DummyRepbase_nt.fa commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmbank_test commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/tests/datas/hmmpfamOut.align.clean_match.path commons/pyRepetUnit/hmmer/tests/TestAcceptanceHmmpfamAndParse2alignInparallel.py commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamAndParse2alignLauncherInParallel.py commons/pyRepetUnit/hmmer/tests/TestFunctionalHmmpfamClusterComponent.py commons/pyRepetUnit/hmmer/tests/TestHmmpfamAndParse2alignLauncher.py commons/pyRepetUnit/hmmer/tests/TestHmmpfamClusterComponent.py commons/pyRepetUnit/hmmer/tests/TestHmmpfamLauncher.py commons/pyRepetUnit/hmmer/tests/TestLaunchPreProcessHmmpfamPostProcessNotInParallel.py commons/pyRepetUnit/hmmer/tests/TestProgramLauncher.py commons/pyRepetUnit/hmmer/tests/__init__.py commons/pyRepetUnit/hmmer/tests/datas/Outputhmmpfam commons/pyRepetUnit/hmmer/tests/datas/config.cfg commons/pyRepetUnit/hmmer/tests/datas/configTestAcceptanceHmmpfamAndParse2alignLauncherInparallel.cfg commons/pyRepetUnit/hmmer/tests/datas/configTestFunctionalHmmpfamLauncherInparallel.cfg commons/pyRepetUnit/hmmer/tests/datas/configTestLaunchPreProcessHmmpfamPostProcessNotInParallel.cfg commons/pyRepetUnit/hmmer/tests/datas/myhmms commons/pyRepetUnit/hmmer/tests/datas/test_input_aa.fa commons/pyRepetUnit/hmmer/tests/datas/test_input_nt.fa commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py commons/pyRepetUnit/profilesDB/InsertProfilesMapFileInDB.py commons/pyRepetUnit/profilesDB/Profiles.py commons/pyRepetUnit/profilesDB/ProfilesDB2Map.py commons/pyRepetUnit/profilesDB/ProfilesDB4Repet.py commons/pyRepetUnit/profilesDB/ProfilesDatabank.py commons/pyRepetUnit/profilesDB/ProfilesDatabankUtils.py commons/pyRepetUnit/profilesDB/__init__.py commons/pyRepetUnit/profilesDB/tests/InsertProfilesMapFileInDBTestRessources.py commons/pyRepetUnit/profilesDB/tests/TestCompleteProfilesDBFromProfilesNameListOrAccNumber.py commons/pyRepetUnit/profilesDB/tests/TestInsertProfilesMapFileInDB.py commons/pyRepetUnit/profilesDB/tests/TestProfiles.py commons/pyRepetUnit/profilesDB/tests/TestProfilesDB2Map.py commons/pyRepetUnit/profilesDB/tests/TestProfilesDatabankUtils.py commons/pyRepetUnit/profilesDB/tests/Test_F_CompleteProfilDB.py commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB2Map.py commons/pyRepetUnit/profilesDB/tests/Test_F_ProfilesDB4Repet.py commons/pyRepetUnit/profilesDB/tests/Test_ProfilesDB4Repet.py commons/pyRepetUnit/profilesDB/tests/__init__.py commons/pyRepetUnit/profilesDB/tests/completeProfilesDBFromAFileWithProfilesList_script.py commons/pyRepetUnit/profilesDB/tests/datas/ListPfamProfilsInRepbase.txt commons/pyRepetUnit/profilesDB/tests/datas/ListpfamAccNumber.txt commons/pyRepetUnit/profilesDB/tests/datas/myhmms commons/pyRepetUnit/profilesDB/tests/datas/profilesDBTest.hmm commons/pyRepetUnit/profilesDB/tests/profilesDBTestSuite.py commons/tools/AlignTEOnGenomeAccordingToAnnotation.py commons/tools/AnnotationStats.py commons/tools/BenchmarkTEconsensus.py commons/tools/CalcCoordCumulLength.py commons/tools/ChangeSequenceHeaders.py commons/tools/CheckMysqlConnect.py commons/tools/CleanClusterNodesAfterRepet.py commons/tools/CorrelateTEageWithGCcontent.py commons/tools/FilterAlign.py commons/tools/GFF3Maker.py commons/tools/GameXmlMaker.py commons/tools/GetMultAlignAndPhylogenyPerTErefSeq.py commons/tools/GetSpecificTELibAccordingToAnnotation.py commons/tools/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py commons/tools/LaunchBlaster.py commons/tools/LaunchBlasterInParallel.py commons/tools/LaunchMatcherInParallel.py commons/tools/ListAndDropTables.py commons/tools/MergeMatchsFiles.py commons/tools/MysqlConnect.py commons/tools/OrientSequences.py commons/tools/PostAnalyzeTELib.py commons/tools/PrepareBatches.py commons/tools/RetrieveInitHeaders.py commons/tools/RmvPairAlignInChunkOverlaps.py commons/tools/SpliceTEsFromGenome.py commons/tools/SplicerFromAnnotation.py commons/tools/TEclassifierPE.py commons/tools/TEclassifierPE_parallelized.py commons/tools/__init__.py commons/tools/blast2align.py commons/tools/dbBestLength.py commons/tools/dbConsensus.py commons/tools/dbShuffle.py commons/tools/dbSplit.py commons/tools/filterOutMatcher.py commons/tools/getCumulLengthFromTEannot.py commons/tools/pathnum2id.py commons/tools/refalign2fasta.py commons/tools/removeDescriptionInFastaHeaderProgramLauncher.py commons/tools/replaceGreaterThanSymbolInFastaHeaderProgramLauncher.py commons/tools/setnum2id.py commons/tools/srptBlasterMatcher.py commons/tools/srptCreateTable.py commons/tools/srptExportTable.py commons/tools/srptGameXmlMaker.py commons/tools/srptPhyML.py commons/tools/srptTableOverlap.py commons/tools/tabFileReader.py commons/tools/tests/MockFastaForReplaceGreaterThanSymbolInFastaHeader.py commons/tools/tests/Test_AlignTEOnGenomeAccordingToAnnotation.py commons/tools/tests/Test_CalcCoordCumulLength.py commons/tools/tests/Test_ChangeSequenceHeaders.py commons/tools/tests/Test_CorrelateTEageWithGCcontent.py commons/tools/tests/Test_F_AlignTEOnGenomeAccordingToAnnotation.py commons/tools/tests/Test_F_CheckMysqlConnect.py commons/tools/tests/Test_F_FilterAlign.py commons/tools/tests/Test_F_GFF3Maker.py commons/tools/tests/Test_F_GameXmlMaker.py commons/tools/tests/Test_F_GetMultiAlignAndPhylogenyPerTErefSeq.py commons/tools/tests/Test_F_GetSpecificTELibAccordingToAnnotation.py commons/tools/tests/Test_F_HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py commons/tools/tests/Test_F_LaunchBlaster.py commons/tools/tests/Test_F_LaunchBlasterInParallel.py commons/tools/tests/Test_F_LaunchMatcherInParallel.py commons/tools/tests/Test_F_MergeMatchsFiles.py commons/tools/tests/Test_F_PostAnalyzeTELib.py commons/tools/tests/Test_F_ReplaceGreaterThanSymbolInFastaHeader.py commons/tools/tests/Test_F_RetrieveInitHeaders.py commons/tools/tests/Test_F_SplicerFromAnnotation.py commons/tools/tests/Test_F_TEclassifierPE.py commons/tools/tests/Test_GetMultAlignAndPhylogenyPerTErefSeq.py commons/tools/tests/Test_GetSpecificTELibAccordingToAnnotation.py commons/tools/tests/Test_OrientSequences.py commons/tools/tests/Test_RmvPairAlignInChunkOverlaps.py commons/tools/tests/Test_SpliceTEsFromGenome.py commons/tools/tests/Test_getCumulLengthFromTEannot.py commons/tools/tests/Test_pathnum2id.py commons/tools/tests/Test_refalign2fasta.py commons/tools/tests/Test_srptTableOverlap.py commons/tools/tests/__init__.py |
diffstat | 296 files changed, 0 insertions(+), 75738 deletions(-) [+] |
line wrap: on
line diff
--- a/commons/launcher/BlatClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch BlatProgramLauncher on several files in parallel on a cluster. - - -from pyRepet.launcher.AbstractClusterLauncher import * -from commons.launcher.BlatProgramLauncher import BlatProgramLauncher -from commons.core.coord.AlignUtils import AlignUtils -from commons.tools import srptBlasterMatcher - - -class BlatClusterLauncher( AbstractClusterLauncher ): - """ - Launch Blat on several files in parallel on a cluster. - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractClusterLauncher.__init__( self ) - AbstractClusterLauncher.setAcronym( self, "Blat" ) - - self._cmdLineSpecificOptions = "s:p:A" - - self._exeWrapper = "BlatProgramLauncher.py" - self._prgLauncher = BlatProgramLauncher() - self._prgLauncher.setInputFile( GENERIC_IN_FILE ) - self._prgLauncher.setClean() - self._prgLauncher.setVerbosityLevel( 1 ) - self._prgLauncher.setListFilesToKeep() - self._prgLauncher.setListFilesToRemove() - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (format='fasta')" - string += "\n -p: parameters for '%s'" % ( self._prgLauncher.getProgramName() ) - string += "\n -Z: concatenate output files" - string += "\n -A: same sequences (all-by-all)" - return string - - - def getSubjectFile( self ): - return self._prgLauncher.getSubjectFile() - - - def getProgramParameters( self ): - return self._prgLauncher.getProgramParameters() - - - def processOutputFile( self, tmpFile, outFile ): - sortFile = "%s.sort" % ( tmpFile ) - AlignUtils.sortAlignFile( tmpFile, sortFile ) - if self._prgLauncher.getAllByAll(): - srptBlasterMatcher.filterRedundantMatches( sortFile, - outFile ) - os.remove( sortFile ) - else: - os.rename( sortFile, outFile ) - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - if o =="-s": - self._prgLauncher.setSubjectFile( a ) - -if __name__ == "__main__": - i = BlatClusterLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/BlatProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,192 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch Blat (pairwise alignment). -# -# options: -# -h: this help -# -i: name of the input file (queries, format='fasta') -# -s: name of the subject file (format='fasta') -# -p: parameters for 'blat' (default='-d 2') -# -o: name of the output file (format='align', default=inFile+'.align') -# -c: clean -# -v: verbosity level (default=0/1) - - -import os -import sys - -from commons.pyRepetUnit.components.AbstractProgramLauncher import AbstractProgramLauncher - - -class BlatProgramLauncher( AbstractProgramLauncher ): - """ - Launch Blat (pairwise alignment). - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "blat" - self._formatInFile = "fasta" - self._sbjFile = "" - self._prgParam = "" - self._allByAll = False - - - def getHelpAsString( self ): - string = AbstractProgramLauncher.getHelpAsString(self) - string += "\nspecific options:" - string += "\n -s: name of the subject file (database, format='fasta')" - string += "\n -p: parameters for '%s'" % ( self.getProgramName() ) - string += "\n -A: same sequences (all-by-all)" - string += "\n -o: name of the output file (format='align', default=inFile+'.align')" - return string - - def getCmdLineOptions(self): - return AbstractProgramLauncher.getCmdLineOptions(self) + "s:p:Ao:" - - def setAttributesFromCmdLine( self, o, a = "" ): - AbstractProgramLauncher.setAttributesFromCmdLine(self, o, a) - if o == "-s": - self.setSubjectFile( a ) - elif o == "-p": - self.setProgramParameters( a ) - elif o == "-A": - self.setAllByAll() - elif o == "-o": - self.setOutputFile( a ) - - - def setSubjectFile( self, arg ): - self._sbjFile = arg - - - def getSubjectFile( self ): - return self._sbjFile - - - def setAllByAll( self ): - self._allByAll = True - - - def getAllByAll( self ): - return self._allByAll - - - def check( self ): - """ - Check the specific attributes before running the program. - """ - AbstractProgramLauncher.check(self) - if self._sbjFile == "": - string = "ERROR: missing subject file (-s)" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for BlatClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) - if self.getProgramParameters() != "": - self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() ) - if self.getAllByAll(): - self._wrpCmdLine += " -A" - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - self._prgCmdLine += " %s" % ( self.getSubjectFile() ) - self._prgCmdLine += " %s" % ( self.getInputFile() ) - if self.getProgramParameters() != "": - self._prgCmdLine += " %s" % ( self.getProgramParameters() ) - self._prgCmdLine += " -out=blast8" - self._prgCmdLine += " %s.blast" % ( self.getInputFile() ) - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - - - def setListFilesToRemove( self ): - """ - Set the list of files to remove. - """ - self.appendFileToRemove( "%s.blast" % ( self.getInputFile() ) ) - - - def convertBlastIntoAlign( self ): - """ - Convert a 'blast' file into the 'align' format. - """ - cmd = os.environ["REPET_PATH"] + "/bin/blast2align.py" - cmd += " -i %s.blast" % ( self.getInputFile() ) - cmd += " -o %s" % ( self.getOutputFile() ) - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR while converting 'blast' file into 'align' format" - print string - sys.exit(1) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) - self._summary += "\nparameters: %s" % ( self.getProgramParameters() ) - if self.getAllByAll(): - self._summary += "\nall-by-all" - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) - print string - sys.exit(1) - - self.convertBlastIntoAlign() - - self.end() - - -if __name__ == "__main__": - i = BlatProgramLauncher() - i.checkAttributesFromCmdLine() - i.run()
--- a/commons/launcher/LaunchBlastclust.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,372 +0,0 @@ -#!/usr/bin/env python - -""" -Launch Blastclust on nucleotide sequences and return a fasta file. -""" - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -import os -import sys -import subprocess -from commons.core.seq.BioseqDB import BioseqDB -from commons.core.seq.Bioseq import Bioseq -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - -class LaunchBlastclust(object): - """ - Launch Blastclust on nucleotide sequences and return a fasta file. - """ - - def __init__(self, input = "", outFilePrefix = "", clean = False, verbose = 0): - """ - Constructor. - """ - self._inFileName = input - self._identityThreshold = 95 - self._coverageThreshold = 0.9 - self._bothSeq = "T" - self._filterUnclusteredSeq = False - self._outFilePrefix = outFilePrefix - self._isBlastToMap = False - self._isHeaderForTEdenovo = False - self._nbCPUs = 1 - self._clean = clean - self._verbose = verbose - self._tmpFileName = "" - - def setAttributesFromCmdLine(self): - """ - Set the attributes from the command-line. - """ - - description = "Launch Blastclust on nucleotide sequences and return a fasta file." - usage = "LaunchBlastclust.py -i inputFileName [options]" - - examples = "\nExample 1: launch Blastclust with default options, highest verbose and clean temporary files.\n" - examples += "\t$ python ./LaunchBlastclust.py -i MyBank.fa -v 2 -c" - examples += "\n\t" - examples += "\t\nExample 2: launch Blastclust with an identity threshold of 90%, rename output files and generate a map file corresponding to the fasta output.\n" - examples += "\t$ python ./LaunchBlastclust.py -i MyBank.fa -S 90 -o SpecialOutputName -m" - examples += "\n\tWARNING: Please refer to -m option limitations in the description above.\n" - - #TODO: check if the optionParser can handle '\' into strings for a better code readability in -m option - - parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples) - parser.add_option("-i", "--input", dest = "inFileName", type = "string", help = "name of the input fasta file (nucleotides)", default = "") - parser.add_option("-L", "--length", dest = "coverageThreshold", type = "float", help = "length coverage threshold (default=0.9)", default = 0.9) - parser.add_option("-S", "--ident", dest = "identityThreshold", type = "int", help = "identity threshold (default=95)", default = 95) - parser.add_option("-b", "--both", dest = "bothSeq", type = "string", help = "require coverage on both neighbours (default=T/F)", default = "T") - parser.add_option("-f", "--filter", dest = "filterUnclusteredSeq", help = "filter unclustered sequences", default = False, action="store_true") - parser.add_option("-o", "--out", dest = "outFilePrefix", type = "string", help = "prefix of the output files (default=input fasta file name)", default = "") - parser.add_option("-m", "--map", dest = "isBlast2Map", help = "generate an additional output file in map format (Warning: only works if blastclust's fasta input headers are formated like LTRharvest fasta output)", default = False, action="store_true") - parser.add_option("", "--TEdenovoHeader", dest = "isHeaderForTEdenovo", help = "format headers for TEdenovo pipeline", default = False, action="store_true") - parser.add_option("-n", "--num", dest = "nbCPUs", type = "int", help = "number of CPU's to use (default=1)", default = 1) - parser.add_option("-c", "--clean", dest = "clean", help = "clean temporary files", default = False, action="store_true") - parser.add_option("-v", "--verbose", dest = "verbose", type = "int", help = "verbosity level (default=0/1/2)", default = 0) - - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.setInputFileName(options.inFileName) - self.setCoverageThreshold(options.coverageThreshold) - self.setIdentityThreshold(options.identityThreshold) - self.setBothSequences(options.bothSeq) - self.setNbCPUs(options.nbCPUs) - self.setIsHeaderForTEdenovo(options.isHeaderForTEdenovo) - if options.filterUnclusteredSeq: - self.setFilterUnclusteredSequences() - if options.outFilePrefix != "": - self.setOutputFilePrefix(options.outFilePrefix) - else: - self._outFilePrefix = self._inFileName - if options.isBlast2Map: - self.setIsBlastToMap() - if options.clean: - self.setClean() - self.setVerbosityLevel(options.verbose) - - def setInputFileName(self , inFileName): - self._inFileName = inFileName - - def setCoverageThreshold(self, lengthThresh): - self._coverageThreshold = float(lengthThresh) - - def setIdentityThreshold(self, identityThresh): - self._identityThreshold = int(identityThresh) - - def setBothSequences(self, bothSeq): - self._bothSeq = bothSeq - - def setNbCPUs(self, nbCPUs): - self._nbCPUs = int(nbCPUs) - - def setFilterUnclusteredSequences(self): - self._filterUnclusteredSeq = True - - def setOutputFilePrefix(self, outFilePrefix): - self._outFilePrefix = outFilePrefix - - def setIsBlastToMap(self): - self._isBlastToMap = True - - def setIsHeaderForTEdenovo(self, isHeaderForTEdenovo): - self._isHeaderForTEdenovo = isHeaderForTEdenovo - - def setClean(self): - self._clean = True - - def setVerbosityLevel(self, verbose): - self._verbose = int(verbose) - - def setTmpFileName(self, tmpFileName): - self._tmpFileName = tmpFileName - - - def checkAttributes(self): - """ - Check the attributes are valid before running the algorithm. - """ - if self._inFileName == "": - print "ERROR: missing input file name (-i)" - sys.exit(1) - if self._outFilePrefix == "": - self._outFilePrefix = self._inFileName - self._tmpFileName = "%s_blastclust.txt" % (self._outFilePrefix) - - - def launchBlastclust(self, inFile): - """ - Launch Blastclust in command-line. - """ - if os.path.exists(os.path.basename(inFile)): - inFile = os.path.basename(inFile) - prg = "blastclust" - cmd = prg - cmd += " -i %s" % (inFile) - cmd += " -o %s" % (self._tmpFileName) - cmd += " -S %i" % (self._identityThreshold) - cmd += " -L %f" % (self._coverageThreshold) - cmd += " -b %s" % (self._bothSeq) - cmd += " -p F" - cmd += " -a %i" % (self._nbCPUs) - if self._verbose == 0: - cmd += " -v blastclust.log" - if self._verbose > 0: - print cmd - sys.stdout.flush() - process = subprocess.Popen(cmd, shell = True) - process.communicate() - if process.returncode != 0: - raise Exception("ERROR when launching '%s'" % cmd) - if self._clean and os.path.exists("error.log"): - os.remove("error.log") - if self._clean and os.path.exists("blastclust.log"): - os.remove("blastclust.log") - - - def getClustersFromTxtFile(self): - """ - Return a dictionary with cluster IDs as keys and sequence headers as values. - """ - dClusterId2SeqHeaders = {} - inF = open(self._tmpFileName, "r") - line = inF.readline() - clusterId = 1 - while True: - if line == "": - break - tokens = line[:-1].split(" ") - dClusterId2SeqHeaders[clusterId] = [] - for seqHeader in tokens: - if seqHeader != "": - dClusterId2SeqHeaders[clusterId].append(seqHeader) - line = inF.readline() - clusterId += 1 - inF.close() - if self._verbose > 0: - print "nb of clusters: %i" % (len(dClusterId2SeqHeaders.keys())) - sys.stdout.flush() - return dClusterId2SeqHeaders - - - def filterUnclusteredSequences(self, dClusterId2SeqHeaders): - """ - Filter clusters having only one sequence. - """ - for clusterId in dClusterId2SeqHeaders.keys(): - if len(dClusterId2SeqHeaders[clusterId]) == 1: - del dClusterId2SeqHeaders[clusterId] - if self._verbose > 0: - print "nb of clusters (>1seq): %i" % (len(dClusterId2SeqHeaders.keys())) - sys.stdout.flush() - return dClusterId2SeqHeaders - - - def getClusteringResultsInFasta(self, inFile): - """ - Write a fasta file whose sequence headers contain the clustering IDs. - """ - dClusterId2SeqHeaders = self.getClustersFromTxtFile() - if self._filterUnclusteredSeq: - dClusterId2SeqHeaders = self.filterUnclusteredSequences(dClusterId2SeqHeaders) - inDB = BioseqDB(inFile) - outFileName = "%s_Blastclust.fa" % (inFile) - outF = open(outFileName, "w") - for clusterId in dClusterId2SeqHeaders.keys(): - memberId = 1 - for seqHeader in dClusterId2SeqHeaders[clusterId]: - bs = inDB.fetch(seqHeader) - bs.header = "BlastclustCluster%iMb%i_%s" % (clusterId, memberId, seqHeader) - bs.write(outF) - memberId += 1 - outF.close() - - - def getLinkInitNewHeaders(self): - dNew2Init = {} - linkFileName = "%s.shortHlink" % (self._inFileName) - linkFile = open(linkFileName,"r") - line = linkFile.readline() - while True: - if line == "": - break - data = line.split("\t") - dNew2Init[data[0]] = data[1] - line = linkFile.readline() - linkFile.close() - return dNew2Init - - - def retrieveInitHeaders(self, dNewH2InitH): - tmpFaFile = "%s.shortH_Blastclust.fa" % (self._inFileName) - tmpFaFileHandler = open(tmpFaFile, "r") - outFaFile = "%s_Blastclust.fa" % (self._outFilePrefix) - outFaFileHandler = open(outFaFile, "w") - while True: - line = tmpFaFileHandler.readline() - if line == "": - break - if line[0] == ">": - tokens = line[1:-1].split("_") - initHeader = dNewH2InitH[tokens[1]] - if self._isHeaderForTEdenovo: - classif = initHeader.split("_")[0] - consensusName = "_".join(initHeader.split("_")[1:]) - clusterId = tokens[0].split("Cluster")[1].split("Mb")[0] - newHeader = "%s_Blc%s_%s" % (classif, clusterId, consensusName) - else: - newHeader = "%s_%s" % (tokens[0], initHeader) - outFaFileHandler.write(">%s\n" % (newHeader)) - else: - outFaFileHandler.write(line) - tmpFaFileHandler.close() - outFaFileHandler.close() - if self._clean: - os.remove(tmpFaFile) - - - def blastclustToMap(self, blastclustFastaOut): - """ - Write a map file from blastclust fasta output. - Warning: only works if blastclust's fasta input headers are formated like LTRharvest fasta output. - """ - fileDb = open(blastclustFastaOut , "r") - mapFilename = "%s.map" % (os.path.splitext(blastclustFastaOut)[0]) - fileMap = open(mapFilename, "w") - seq = Bioseq() - numseq = 0 - while 1: - seq.read(fileDb) - if seq.sequence == None: - break - numseq = numseq + 1 - ID = seq.header.split(' ')[0].split('_')[0] - chunk = seq.header.split(' ')[0].split('_')[1] - start = seq.header.split(' ')[-1].split(',')[0][1:] - end = seq.header.split(' ')[-1].split(',')[1][:-1] - line= '%s\t%s\t%s\t%s' % (ID, chunk, start, end) - fileMap.write(line + "\n") - - fileDb.close() - fileMap.close() - print "saved in %s" % mapFilename - - - def start(self): - """ - Useful commands before running the program. - """ - self.checkAttributes() - if self._verbose > 0: - print "START %s" % (type(self).__name__) - - - def end(self): - """ - Useful commands before ending the program. - """ - if self._verbose > 0: - print "END %s" % (type(self).__name__) - - - def run(self): - """ - Run the program. - """ - self.start() - - iCSH = ChangeSequenceHeaders(inFile = self._inFileName, format = "fasta", step = 1, outFile = "%s.shortH" % self._inFileName, linkFile = "%s.shortHlink" % self._inFileName) - iCSH.run() - - self.launchBlastclust("%s.shortH" % (self._inFileName)) - - self.getClusteringResultsInFasta("%s.shortH" % (self._inFileName)) - - dNewH2InitH = self.getLinkInitNewHeaders() - self.retrieveInitHeaders(dNewH2InitH) - - if self._isBlastToMap: - blastclustFileName = "%s_Blastclust.fa" % (self._outFilePrefix) - self.blastclustToMap(blastclustFileName) - - if self._clean: - os.remove("%s.shortH" % (self._inFileName)) - os.remove("%s.shortHlink" % (self._inFileName)) - - self.end() - -if __name__ == "__main__": - i = LaunchBlastclust() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/LaunchLastZ.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -#! /usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -import sys -import os -import optparse -from commons.core.checker.CheckerUtils import CheckerUtils -from commons.core.utils.FileUtils import FileUtils -from subprocess import Popen -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess -import tempfile - -class LaunchLastZ(object): - - def __init__(self,queryFileName="", refFileName ="", outputFileName=None,outputFileFormat="axt",noTransition=True, ambiguous=None, step=1,gfextend=False, chain=False, verbosity=1 ): - self.queryFileName = queryFileName - self.refFileName = refFileName - self.outputFileName = outputFileName - self.outputFileFormat = outputFileFormat - self.noTransition = noTransition - self.step = step - self.ambiguous = ambiguous - self.gfextend = gfextend - self.chain = chain - self.verbosity = verbosity - - def setAttributesFromCmdLine(self): - description = "LaunchLastZ runs the LastZ program ." - parser = RepetOptionParser(description = description) - parser.add_option("-q", "--query", dest="queryFileName", default = None, action="store", type="string", help="input query file [compulsory] [format: fasta]") - parser.add_option("-r", "--ref", dest="refFileName", default = None, action="store", type="string", help="input ref file [compulsory] [format: fasta]") - parser.add_option("-o", "--output", dest="outputFileName", default = None, action="store", type="string", help="output file [compulsory] ") - parser.add_option("-f", "--format", dest="outputFileFormat", default = "axt", action="store", type="string", help="output file format[optional] ") - parser.add_option("-n", "--notransition", dest="noTransition", action="store_false", default=True, help="noTransition (default True) [optional] ") - parser.add_option("-a", "--ambiguous", dest="ambiguous", action="store", type="string", help="ambiguous [optional] ") - parser.add_option("-s", "--step", dest="step", default = 1, action="store", type="int", help="stepsize (default 1) [optional] ") - parser.add_option("-g", "--gfextend", dest="gfextend", action="store_true", help="extend gf (default false)[optional] ") - parser.add_option("-c", "--chain", dest="chain", action="store_true", help="chain (default false)[optional] ") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 1, action="store", type="int", help="verbosity [optional] ") - (self._options, args) = parser.parse_args() - self._setAttributesFromOptions(self._options) - - def _setAttributesFromOptions(self, options): - self.queryFileName = options.queryFileName - self.refFileName = options.refFileName - self.outputFileName = options.outputFileName - self.outputFileFormat = options.outputFileFormat - self.ambiguous = options.ambiguous - self.noTransition = options.noTransition - self.step = options.step - self.gfextend = options.gfextend - self.chain = options.chain - self.verbosity = options.verbosity - - def checkOptions(self): - if self.queryFileName != "": - if not FileUtils.isRessourceExists(self.queryFileName): - raise Exception("ERROR: Query file does not exist!") - else: - raise Exception("ERROR: No specified --query option!") - if self.refFileName != "": - if not FileUtils.isRessourceExists(self.refFileName): - raise Exception("ERROR: Ref file does not exist!") - else: - raise Exception("ERROR: No specified --ref option!") - if self.outputFileName == None: - self.outputFileName = "%s_%s.axt" % (os.path.basename(self.queryFileName), os.path.basename(self.refFileName)) - - def run(self): - if not CheckerUtils.isExecutableInUserPath("lastz") : - print ("ERROR: LastZ must be in your path") - else: - self.checkOptions() - - transition = "" - if self.noTransition: - transition = "--notransition" - ambiguous = "" - if self.ambiguous is not None: - ambiguous = "--ambiguous=%s" % self.ambiguous - - gfextend = "" - if self.gfextend: - gfextend = "--gfextend" - - chain = "" - if self.chain: - chain = "--chain" - - cmd = "lastz %s[format=fasta] %s[format=fasta] --output=%s --format=%s %s %s --step=%i %s %s" % (self.refFileName, self.queryFileName, self.outputFileName - , self.outputFileFormat, ambiguous, transition,self.step, gfextend, chain) - if self.verbosity>0: - print("Running LastZ with following commands : %s" %cmd) - sys.stdout.flush() - cmd = cmd.split() - process = subprocess.Popen(cmd) - process.wait() - return process.returncode - -if __name__ == "__main__": - iLaunchLastZ = LaunchLastZ() - iLaunchLastZ.setAttributesFromCmdLine() - iLaunchLastZ.run()
--- a/commons/launcher/LaunchMCL.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.core.seq.FastaUtils import FastaUtils -from commons.core.coord.MatchUtils import MatchUtils -import subprocess -import os -import time -import shutil -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - -LOG_DEPTH = "repet.base" - -##Launch MCL -# -class LaunchMCL(object): - - def __init__(self, fastaFileName = "", outFilePrefix = "", inflate = 1.5, covThres = 0.0, isJoined = False, isCluster2Map = False, isClusterConsensusHeaders = False, doClean = False, verbosity = 0): - self._fastaFileName = fastaFileName - self.setOutFilePrefix(outFilePrefix) - self._inflate = inflate - self._coverageThreshold = covThres - self._isJoined = isJoined - self._isCluster2Map = isCluster2Map - self._isClusterConsensusHeaders = isClusterConsensusHeaders - self._doClean = doClean - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def setAttributesFromCmdLine(self): - description = "Launch MCL clustering program." - epilog = "\nExample: launch without verbosity and keep temporary files.\n" - epilog += "\t$ python LaunchMCL.py -i file.fa -v 0\n" - parser = RepetOptionParser(description = description, epilog = epilog) - parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") - parser.add_option("-o", "--out", dest = "outFilePrefix", action = "store", type = "string", help = "prefix of the output files [default=input fasta file name]", default = "") - parser.add_option("-I", "--inflate", dest = "inflate", action = "store", type = "float", help = "inflate parameter of MCL [optional] [default: 1.5]", default = 1.5) - parser.add_option("-T", "--coverage", dest = "coverageThreshold", action = "store", type = "float", help = "length coverage threshold (default=0.0, 0.0 <= value <= 1.0)", default = 0.0) - parser.add_option("-j", "--join", dest = "isJoined", action = "store_true", help = "join hits after alignement [optional] [default: False]" , default = False) - parser.add_option("-m", "--map", dest = "isCluster2Map", action = "store_true", help = "generate an additional output file in map format (Warning: only works if MCL's fasta input headers are formated like LTRharvest fasta output)", default = False) - parser.add_option("", "--isClusterConsensusHeaders", dest = "isClusterConsensusHeaders", action="store_true", help = "format headers for Cluster Consensus tool", default = False) - parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.setFastaFileName(options.fastaFileName) - self.setOutFilePrefix(options.outFilePrefix) - self.setInflate(options.inflate) - self.setCoverageThreshold(options.coverageThreshold) - self.setIsJoined(options.isJoined) - self.setIsCluster2Map(options.isCluster2Map) - self.setIsClusterConsensusHeaders(options.isClusterConsensusHeaders) - self.setDoClean(options.doClean) - self.setVerbosity(options.verbosity) - - def setFastaFileName(self, fastaFileName): - self._fastaFileName = fastaFileName - - def setOutFilePrefix(self, outFilePrefix): - if outFilePrefix == "": - self._outFilePrefix = os.path.splitext(self._fastaFileName)[0] - else: - self._outFilePrefix = outFilePrefix - - def setInflate(self, inflate): - self._inflate = inflate - - def setCoverageThreshold(self, covThres): - self._coverageThreshold = float(covThres) - - def setIsJoined(self, isJoined): - self._isJoined = isJoined - - def setDoClean(self, doClean): - self._doClean = doClean - - def setIsCluster2Map(self, isCluster2Map): - self._isCluster2Map = isCluster2Map - - def setIsClusterConsensusHeaders(self, isClusterConsensusHeaders): - self._isClusterConsensusHeaders = isClusterConsensusHeaders - - def setVerbosity(self, verbosity): - self._verbosity = verbosity - - def _checkOptions(self): - if self._fastaFileName == "": - self._logAndRaise("ERROR: Missing input fasta file name") - if self._isCluster2Map and self._isClusterConsensusHeaders: - self._logAndRaise("ERROR: You can't use both '--isClusterConsensusHeaders' and '-m' options") - if self._coverageThreshold > 1 or self._coverageThreshold < 0: - self._logAndRaise("ERROR: Coverage Threshold must be in [0.0 , 1.0]") - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def run(self): - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START Launch MCL") - self._log.debug("With parameters: -i %s -o %s -I %.2f -T %.2f -j %r -m %r -clusterHeaders %r " % (self._fastaFileName, self._outFilePrefix , self._inflate , self._coverageThreshold, self._isJoined, self._isCluster2Map, self._isClusterConsensusHeaders)) - #self._log.debug("With parameters: -i %s -o %s -I %.2f -T %.2f" % (self._fastaFileName, self._outFilePrefix , self._inflate , self._coverageThreshold)) - self._log.debug("Fasta file name: %s" % self._fastaFileName) - workingDir = "MCLtmpDirectory" - if os.path.exists(workingDir): - self._logAndRaise("ERROR: %s already exists." % workingDir) - os.mkdir(workingDir) - os.chdir(workingDir) - linkToFastaFile = "%s2.fa" % os.path.splitext(self._fastaFileName)[0] - os.symlink("../%s" % self._fastaFileName, self._fastaFileName) - fastaFileNameShorten = "%s.shortH" % self._fastaFileName - iChangeSequenceHeaders = ChangeSequenceHeaders(inFile=self._fastaFileName, format="fasta", step=1, outFile=fastaFileNameShorten, verbosity=self._verbosity - 1) - iChangeSequenceHeaders.run() - os.symlink(fastaFileNameShorten, linkToFastaFile) - - self._log.info("START Blaster-Matcher (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) - cmd = "LaunchBlaster.py" - cmd += " -q %s" % fastaFileNameShorten - cmd += " -s %s" % linkToFastaFile - cmd += " -a" - cmd += " 1>&2 >> blasterMatcher.log" - process = subprocess.Popen(cmd, shell = True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - outBlasterFileName = "%s.align" % fastaFileNameShorten - - cmd = "matcher" - cmd += " -m %s" % outBlasterFileName - cmd += " -q %s" % fastaFileNameShorten - cmd += " -s %s" % linkToFastaFile - cmd += " -a" - if self._isJoined: - cmd += " -j" - cmd += " 1>&2 >> blasterMatcher.log" - process = subprocess.Popen(cmd, shell=True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - self._log.info("END Blaster-Matcher (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) - - outMatcherFileName = "%s.match.tab" % outBlasterFileName - inputABCFileName = "%s.shortH.abc" % os.path.splitext(fastaFileNameShorten)[0] - MatchUtils.convertMatchFileIntoABCFileOnQueryCoverage(outMatcherFileName, inputABCFileName, coverage = self._coverageThreshold) - outMCLPreprocessFileName = "MCLPreprocess.out" - - self._log.info("START MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) - cmd = "mcxload" - cmd += " -abc %s" % inputABCFileName - cmd += " --stream-mirror" - cmd += " --stream-neg-log10" - cmd += " -stream-tf 'ceil(200)'" - cmd += " -o %s" % outMCLPreprocessFileName - cmd += " -write-tab %s.tab" % outMCLPreprocessFileName - cmd += " 1>&2 > MCLpreprocess.log" - process = subprocess.Popen(cmd, shell = True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - - outMCLFileName = "out.shortH.mcl" - cmd = "mcl" - cmd += " %s" % outMCLPreprocessFileName - cmd += " -I %s" % self._inflate - cmd += " -use-tab %s.tab" % outMCLPreprocessFileName - cmd += " -o %s" % outMCLFileName - cmd += " 1>&2 > MCL.log" - process = subprocess.Popen(cmd, shell = True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - self._log.info("END MCL (%s)" % time.strftime("%Y-%m-%d %H:%M:%S")) - - outFastaFileNameShorten = "%s.fa" % os.path.splitext(outMCLFileName)[0] - - FastaUtils.convertClusterFileToFastaFile(outMCLFileName, fastaFileNameShorten, outFastaFileNameShorten, "MCL", verbosity = self._verbosity - 1) - - outFastaFileName = "%s_MCL.fa" % self._outFilePrefix - linkFileName = "%s.newHlink" % self._fastaFileName - headerStyle = "A" - if self._isClusterConsensusHeaders: - headerStyle = "B" - iChangeSequenceHeaders = ChangeSequenceHeaders(inFile=outFastaFileNameShorten, format="fasta", step=2, outFile=outFastaFileName, linkFile=linkFileName, whichCluster = headerStyle, verbosity=self._verbosity - 1) - iChangeSequenceHeaders.run() - - if self._isCluster2Map: - outMapFileName = "%s_MCL.map" % self._outFilePrefix - FastaUtils.convertClusteredFastaFileToMapFile(outFastaFileName, outMapFileName) - shutil.move(outMapFileName, "..") - - shutil.move(outFastaFileName, "..") - os.chdir("..") - if self._doClean: - self._log.warning("Working directory will be cleaned") - shutil.rmtree(workingDir) - self._log.info("END Launch MCL") - -if __name__ == "__main__": - iLaunch = LaunchMCL() - iLaunch.setAttributesFromCmdLine() - iLaunch.run()
--- a/commons/launcher/LaunchMap.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,186 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB -from commons.core.seq.FastaUtils import FastaUtils -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders -from commons.core.utils.FileUtils import FileUtils -import os -import subprocess - -LOG_DEPTH = "repet.tools" - -##Reference launcher implementation -# -class LaunchMap(object): - - def __init__(self, fastaFileName = "", outFileName = "", gapSize = 50, mismatchPenalty = -8, gapOpenPenalty = 16, gapExtendPenalty = 4, doClean = False, verbosity = 0): - self._fastaFileName = fastaFileName - self.setOutFileName(outFileName) - self._gapSize = gapSize - self._mismatchPenalty = mismatchPenalty - self._gapOpenPenalty = gapOpenPenalty - self._gapExtendPenalty = gapExtendPenalty - self._doClean = doClean - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def setAttributesFromCmdLine(self): -# description = "Launch template to create a launcher." -# epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" -# epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" -# epilog += "\n\t" -# epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n" -# epilog += "\t$ python LaunchTemplate.py -i file.fa -c -v 2" -# parser = RepetOptionParser(description = description, epilog = epilog) - parser = RepetOptionParser(description = "", epilog = "") - parser.add_option("-i", "--fasta", dest = "fastaFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") - parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.fa_aln]", default = "") - parser.add_option("-s", "--gapSize", dest = "gapSize", action = "store", type = "int", help = "size above which a gap is not penalized anymore [optional] [default: 50]", default = 50) - parser.add_option("-m", "--mismatch", dest = "mismatch", action = "store", type = "int", help = "penalty for a mismatch [optional] [default: -8]", default = -8) - parser.add_option("-O", "--gapOpen", dest = "gapOpen", action = "store", type = "int", help = "penalty for a gap opening [optional] [default: 16]", default = 16) - parser.add_option("-e", "--gapExtend", dest = "gapExtend", action = "store", type = "int", help = "penalty for a gap extension [optional] [default: 4]", default = 4) - parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.setFastaFileName(options.fastaFileName) - self.setOutFileName(options.outFileName) - self.setGapSize(options.gapSize) - self.setMismatchPenalty(options.mismatch) - self.setGapOpenPenalty(options.gapOpen) - self.setGapExtendPenalty(options.gapExtend) - self.setDoClean(options.doClean) - self.setVerbosity(options.verbosity) - - def setFastaFileName(self, fastaFileName): - self._fastaFileName = fastaFileName - - def setOutFileName(self, outFileName): - if outFileName == "": - self._outFileName = "%s.fa_aln" % self._fastaFileName - else: - self._outFileName = outFileName - - def setGapSize(self, gapSize): - self._gapSize = gapSize - - def setMismatchPenalty(self, mismatchPenalty): - self._mismatchPenalty = mismatchPenalty - - def setGapOpenPenalty(self, gapOpenPenalty): - self._gapOpenPenalty = gapOpenPenalty - - def setGapExtendPenalty(self, gapExtendPenalty): - self._gapExtendPenalty = gapExtendPenalty - - def setDoClean(self, doClean): - self._doClean = doClean - - def setVerbosity(self, verbosity): - self._verbosity = verbosity - - def _checkOptions(self): - if self._fastaFileName == "": - self._logAndRaise("ERROR: Missing input fasta file name") - if not FileUtils.isRessourceExists(self._fastaFileName): - self._logAndRaise("ERROR: Input fasta file name %s doesn't exist." % self._fastaFileName) - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def getMapCmd(self): - cmd = "rpt_map" - cmd += " %s.shortH" % self._fastaFileName - cmd += " %i" % self._gapSize - cmd += " %i" % self._mismatchPenalty - cmd += " %i" % self._gapOpenPenalty - cmd += " %i" % self._gapExtendPenalty - cmd += " > %s.shortH.fa_aln" % self._fastaFileName - return cmd - - def run(self): - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START LaunchMap") - self._log.debug("Fasta file name: %s" % self._fastaFileName) - - lInitHeaders = FastaUtils.dbHeaders(self._fastaFileName, self._verbosity - 1) - - csh = ChangeSequenceHeaders() - csh.setInputFile(self._fastaFileName) - csh.setFormat("fasta") - csh.setStep(1) - csh.setPrefix("seq") - csh.setLinkFile("%s.shortHlink" % self._fastaFileName) - csh.setOutputFile("%s.shortH" % self._fastaFileName) - csh.setVerbosityLevel(self._verbosity - 1) - csh.run() - - cmd = self.getMapCmd() - process = subprocess.Popen(cmd, shell = True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - - csh.setInputFile("%s.shortH.fa_aln" % self._fastaFileName) - csh.setFormat("fasta") - csh.setStep(2) - csh.setLinkFile("%s.shortHlink" % self._fastaFileName) - csh.setOutputFile("%s.shortH.fa_aln.initH" % self._fastaFileName) - csh.setVerbosityLevel(self._verbosity - 1) - csh.run() - - absDB = AlignedBioseqDB("%s.shortH.fa_aln.initH" % self._fastaFileName) - outFileHandler = open(self._outFileName, "w") - for header in lInitHeaders: - bs = absDB.fetch(header) - bs.upCase() - bs.write(outFileHandler) - outFileHandler.close() - if self._doClean: - os.remove("%s.shortH" % self._fastaFileName) - os.remove("%s.shortHlink" % self._fastaFileName) - os.remove("%s.shortH.fa_aln" % self._fastaFileName) - os.remove("%s.shortH.fa_aln.initH" % self._fastaFileName) - self._log.info("END Launch") - -if __name__ == "__main__": - iLaunch = LaunchMap() - iLaunch.setAttributesFromCmdLine() - iLaunch.run() \ No newline at end of file
--- a/commons/launcher/LaunchMatcher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,168 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess - -LOG_DEPTH = "repet.tools" - -##Launch MATCHER -# -class LaunchMatcher(object): - - def __init__(self, align="", queryFileName="", subjectFileName="", evalue="1e-10", doJoin=False, keepConflict=False, prefix="", doClean = False, verbosity = 0): - self._alignFileName = align - self._queryFileName = queryFileName - self.setSubjectFileName(subjectFileName) - self.setOutPrefix(prefix) - self._doJoin = doJoin - self._eValue = evalue - self._keepConflict = keepConflict - - self._doClean = doClean - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def setAttributesFromCmdLine(self): - description = "Launch Matcher." - epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" - epilog += "\t$ python LaunchMatcher.py -a in.align -v 0" - epilog += "\n\t" - epilog += "\nExample 2: launch with verbosity to have errors (level 1) and basic information (level 2), and delete temporary files.\n" - epilog += "\t$ python LaunchMatcher.py -a in.align -q query.fa -s nr.fa -c -v 2" - parser = RepetOptionParser(description = description, epilog = epilog) - parser.add_option("-a", "--align", dest = "align", action = "store", type = "string", help = "input align file name [compulsory] [format: align]", default = "") - parser.add_option("-q", "--query", dest = "query", action = "store", type = "string", help = "query fasta file name [optional] [format: fasta]", default = "") - parser.add_option("-s", "--subject", dest = "subject", action = "store", type = "string", help = "subject fasta file name [optional] [format: fasta]", default = "") - parser.add_option("-e", "--evalue", dest = "evalue", action = "store", type = "string", help = "E-value filter [default: 1e-10]", default = "1e-10") - parser.add_option("-j", "--join", dest = "doJoin", action = "store_true", help = "join matches [default: False]", default = False) - parser.add_option("-k", "--keepConflict",dest = "keepConflict", action = "store_true", help = "keep conflicting subjects [default: False]", default = False) - parser.add_option("-o", "--outPrefix", dest = "outPrefix", action = "store", type = "string", help = "output file prefix [default: align file name]", default = "") - parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [default: False]", default = False) - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.setAlignFileName(options.align) - self.setQueryFileName(options.query) - self.setSubjectFileName(options.subject) - self.setEvalue(options.evalue) - self.setDoJoin(options.doJoin) - self.setKeepConflicts(options.keepConflict) - self.setOutPrefix(options.outPrefix) - self.setDoClean(options.doClean) - self.setVerbosity(options.verbosity) - - def setAlignFileName(self, alignFileName): - self._alignFileName = alignFileName - - def setQueryFileName(self, queryFileName): - self._queryFileName = queryFileName - - def setSubjectFileName(self, subjectFileName): - self._subjectFileName = subjectFileName - - def setEvalue(self, evalue): - self._eValue = evalue - - def setDoJoin(self, doJoin): - self._doJoin = doJoin - - def setKeepConflicts(self, keepConflict): - self._keepConflict = keepConflict - - def setOutPrefix(self, outPrefix): - if outPrefix == "": - self._outPrefix = self._alignFileName - else: - self._outPrefix = outPrefix - - def setDoClean(self, doClean): - self._doClean = doClean - - def setVerbosity(self, verbosity): - self._verbosity = verbosity - - def _checkOptions(self): - if self._alignFileName == "": - self._logAndRaise("ERROR: Missing input align file name") - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def _getMatcherCmd(self): - lArgs = [] - lArgs.append("-m %s" % self._alignFileName) - if self._queryFileName: - lArgs.append("-q %s" % self._queryFileName) - if self._subjectFileName: - lArgs.append("-s %s" % self._subjectFileName) - if self._doJoin: - lArgs.append("-j") - lArgs.append("-E %s" % self._eValue) - lArgs.append("-B %s" % self._outPrefix) - if self._keepConflict: - lArgs.append("-a") - lArgs.append("-v %i" % (self._verbosity - 1)) - return self._getSystemCommand("matcher", lArgs) - - def _getSystemCommand(self, prg, lArgs): - systemCmd = prg - for arg in lArgs: - systemCmd += " " + arg - return systemCmd - - def run(self): - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START LaunchMatcher") - self._log.debug("Align file name: %s" % self._alignFileName) - self._log.debug("Query file name: %s" % self._queryFileName) - self._log.debug("Subject file name: %s" % self._subjectFileName) - #TODO: clean files -# if self._doClean: -# self._log.warning("Files will be cleaned") - cmd = self._getMatcherCmd() - process = subprocess.Popen(cmd, shell = True) - self._log.debug("Running : %s" % cmd) - process.communicate() - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - self._log.info("END LaunchMatcher") - -if __name__ == "__main__": - iLaunch = LaunchMatcher() - iLaunch.setAttributesFromCmdLine() - iLaunch.run() \ No newline at end of file
--- a/commons/launcher/LaunchMummerPlot.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,150 +0,0 @@ -#! /usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.checker.CheckerUtils import CheckerUtils -from commons.core.utils.FileUtils import FileUtils -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess -from commons.core.LoggerFactory import LoggerFactory -import os -import shutil - -LOG_DEPTH = "repet.tools" - -class LaunchMummerPlot(object): - - def __init__(self, inputFileName="", queryFileName="", refFileName ="", prefix = None, fat=False, filter=False,clean=False, verbosity=0): - self._inputFileName = inputFileName - self._queryFileName = queryFileName - self._refFileName = refFileName - self._prefix = prefix - self._fat = fat - self._filter = filter - self.doClean = clean - self.verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity) - - def setAttributesFromCmdLine(self): - description = "LaunchMummerPlot runs the MummerPlot program (part of the mummer package) ." - parser = RepetOptionParser(description = description) - parser.add_option("-i", "--input", dest="inputFileName", default = None, action="store", type="string", help="input file[mandatory] [format: delta]") - parser.add_option("-q", "--Qfile", dest="queryFileName", default = None, action="store", type="string", help="Plot an ordered set of reference sequences from Qfile [optional] [format: fasta]") - parser.add_option("-r", "--Rfile", dest="refFileName", default = None, action="store", type="string", help="Plot an ordered set of reference sequences from Rfile [optional] [format: fasta]") - parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [mandatory]") - parser.add_option("-o","--fat", dest="fat",action="store_true", help="Layout sequences using fattest alignment only[optional] ") - parser.add_option("-s","--filter", dest="filter",action="store_true", help="Only display .delta alignments which represent the 'best' hit [optional] ") - parser.add_option("-c", "--clean", dest = "clean", help = "clean temporary files", default = False, action="store_true") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ") - - (self._options, args) = parser.parse_args() - self._setAttributesFromOptions(self._options) - - def _setAttributesFromOptions(self, options): - self._inputFileName = options.inputFileName - self._queryFileName = options.queryFileName - self._refFileName = options.refFileName - self._prefix = options.prefix - self._fat = options.fat - self._filter = options.filter - self.verbosity = options.verbosity - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def checkOptions(self): - if self._inputFileName != "": - if not FileUtils.isRessourceExists(self._inputFileName): - self._logAndRaise("ERROR: Query file: %s does not exist!" % self._inputFileName) - else: - self._logAndRaise("ERROR: No specified --query option!") - - if self._queryFileName != "": - if not FileUtils.isRessourceExists(self._queryFileName): - self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName) - - if self._refFileName != "": - if not FileUtils.isRessourceExists(self._refFileName): - self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName) - - def clean(self): - try: - os.remove("%s.filter" % self._prefix) - except Exception as inst: - self._log.error(inst) - try: - os.remove("%s.fplot" % self._prefix) - except Exception as inst: - self._log.error(inst) - try: - os.remove("%s.rplot" % self._prefix) - except Exception as inst: - self._log.error(inst) - - def run(self): - if not CheckerUtils.isExecutableInUserPath("mummerplot") : - self._logAndRaise("ERROR: mummerplot must be in your path") - self.checkOptions() - - ref="" - if self._refFileName != "": - ref = "-R %s" % self._refFileName - - query="" - if self._queryFileName != "": - query = "-Q %s" % self._queryFileName - - fat = "" - if self._fat: - fat = "--fat" - - filter = "" - if self._filter: - filter = "-f" - - prefix = "" - if self._prefix is not None: - prefix = "--prefix=%s" %(self._prefix) - - cmd = "mummerplot %s %s %s %s %s %s --png" % (self._inputFileName, prefix, ref, query, fat, filter) - self._log.debug("Running mummerplot with following commands : %s" %cmd) - cmd = cmd.split() - process = subprocess.Popen(cmd) - process.wait() - - self.clean() - return process.returncode - -if __name__ == "__main__": - iLaunchNucmer = LaunchMummerPlot() - iLaunchNucmer.setAttributesFromCmdLine() - iLaunchNucmer.run() \ No newline at end of file
--- a/commons/launcher/LaunchNucmer.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -#! /usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.checker.CheckerUtils import CheckerUtils -from commons.core.utils.FileUtils import FileUtils -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess -from commons.core.LoggerFactory import LoggerFactory -import os - -LOG_DEPTH = "repet.tools" - -class LaunchNucmer(object): - - def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=90, minMatch=20, nooptimize=False,mincluster=65, minIdentity=50, minLength=100, verbosity=0): - self._queryFileName = queryFileName - self._refFileName = refFileName - self._prefix = prefix - self._genCoords = genCoords - self._showCoords = showCoords - self._mum = mum - self._maxgaps = maxGaps - self._minMatch = minMatch - self._nooptimize = nooptimize - self._mincluster = mincluster - self._minIdentity = minIdentity - self._minLength = minLength - self.verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity) - - def setMincluster(self, value): - self._mincluster = value - def getMincluster(self): - return self._mincluster - - mincluster = property(getMincluster, setMincluster) - - def setAttributesFromCmdLine(self): - description = "LaunchNucmer runs the Nucmer program (part of the mummer package) ." - parser = RepetOptionParser(description = description) - parser.add_option("-q", "--query", dest="queryFileName", default = "", action="store", type="string", help="input query file [compulsory] [format: fasta]") - parser.add_option("-r", "--ref", dest="refFileName", default = "", action="store", type="string", help="input ref file [compulsory] [format: fasta]") - parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [optional]") - parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ") - parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ") - parser.add_option("-m", "--mum", dest="mum", action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ") - parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 90, action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 90) [optional] ") - parser.add_option("-l", "--minmatch", dest="minMatch", default = 20, action="store", type="int", help="Minimum length of an maximal exact match (default 20) [optional] ") - parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ") - parser.add_option("-j", "--mincluster", dest="mincluster", default = 65, action="store", type="int", help="Minimum length of a cluster of matches (default 65) [optional] ") - - parser.add_option("-i", "--minIdentity", dest="minIdentity", default = 50, action="store", type="int", help="Minimum identity for show_coords (default 50) [optional] ") - parser.add_option("-u", "--minLength", dest="minLength", default = 100, action="store", type="int", help="Minimum alignment length for show_coords (default 100) [optional] ") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ") - - (self._options, args) = parser.parse_args() - self._setAttributesFromOptions(self._options) - - def _setAttributesFromOptions(self, options): - self._queryFileName = options.queryFileName - self._refFileName = options.refFileName - self._prefix = options.prefix - self._genCoords = options.genCoords - self._showCoords = options.showCoords - self._mum = options.mum - self._maxgaps = options.maxgaps - self._minMatch = options.minMatch - self._nooptimize = options.nooptimize - self._mincluster = options.mincluster - - self._minIdentity = options.minIdentity - self._minLength = options.minLength - - self.verbosity = options.verbosity - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def checkOptions(self): - if self._queryFileName != "": - if not FileUtils.isRessourceExists(self._queryFileName): - self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName) - else: - self._logAndRaise("ERROR: No specified --query option!") - - if self._refFileName != "": - if not FileUtils.isRessourceExists(self._refFileName): - self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName) - else: - self._logAndRaise("ERROR: No specified --ref option!") - - def run(self): - LoggerFactory.setLevel(self._log, self.verbosity) - if not CheckerUtils.isExecutableInUserPath("nucmer") : - self._logAndRaise("ERROR: nucmer must be in your path") - self.checkOptions() - - genCoords = "" - if self._genCoords: - genCoords = "-o" - mum = "" - if self._mum: - mum = "--mum" - nooptimize = "--optimize" - if self._nooptimize: - nooptimize = "--nooptimize" - prefix = "" - if self._prefix is not None: - prefix = "--prefix=%s" %(self._prefix) - cmd = "nucmer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster) - self._log.debug("Running nucmer with following commands : %s" %cmd) - cmd = cmd.split() - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - process.wait() - - if self._showCoords: - #use of os.system because redirect on process is broken in python < 3.0 - cmd = "show-coords -r -c -l -d -I %d -L %d -T %s.delta > %s.coords" % (self._minIdentity, self._minLength, self._prefix, self._prefix) - self._log.debug("Running show-coords with following commands : %s" %cmd) - os.system(cmd) - - - return process.returncode - -if __name__ == "__main__": - iLaunchNucmer = LaunchNucmer() - iLaunchNucmer.setAttributesFromCmdLine() - iLaunchNucmer.run() \ No newline at end of file
--- a/commons/launcher/LaunchPhyML.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders -import subprocess -import os -from commons.core.seq.Bioseq import Bioseq -import shutil - -LOG_DEPTH = "repet.core.launchers" - - - -class LaunchPhyML(object): - """ - Launch 'PhyML' - """ - def __init__(self, inputFileName="", outFileName="",dataType= "nt", interleavedFormat= True, nbDataSets=1, nbBootDataSets=0, substModel="HKY85", ratioTsTv=4.0, propInvSites= 0.0, nbCat=1, gammaParam=1.0, startTree="BIONJ", paramOptimisation = "tlr", clean=False, verbosity=3 ): - self.inputFileName = inputFileName - self.outFileName=outFileName - self.dataType = dataType #"nt or aa" - self._setSeqFormat(interleavedFormat) #if False -q" - self.nbDataSets = nbDataSets - self.nbBootDataSets = nbBootDataSets - self.substModel = substModel - self.ratioTsTv = ratioTsTv - self.propInvSites = propInvSites # propInvSites="e" replaced by 0.0; should be in [0-1] - self.nbCat = nbCat # Number of categories less than four or higher than eight are not recommended. - self.gammaParam = gammaParam - self.startTree = startTree #by default is BIONJ used reformatedInputFileName+"_phyml_tree.txt" instead - self.paramOptimisation = paramOptimisation # used instead of self.optTopology="y", self.optBranchRate="y" - #This option focuses on specific parameter optimisation. - #tlr : tree topology (t), branch length (l) and rate parameters (r) are optimised. - #tl : tree topology and branch length are optimised. - #lr : branch length and rate parameters are optimised. - #l : branch length are optimised. - #r : rate parameters are optimised. - #n : no parameter is optimised. - - self._clean = clean - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def _setSeqFormat(self, interleavedFormat): - if not (interleavedFormat) : - self.seqFormat = " -q" - else : - self.seqFormat = "" - - def setAttributesFromCmdLine(self): - description = "usage: LaunchPhyML.py [ options ]" - epilog = "\n -h: this help\n" - epilog += "\t -i: name of the input file (refseq is first, format='fasta')" - epilog += "\n\t" - parser = RepetOptionParser(description = description, epilog = epilog) - parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") - parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "") - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.inputFileName = options.inputFileName - self.setOutFileName = options.outFileName - self._verbosity = options.verbosity - - def _checkOptions(self): - if self.inputFileName == "": - self._logAndRaise("ERROR: Missing input file name") - - if self.outFileName == "": - self.outFileName = "%s_phyml.newick" % (self.inputFileName) - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def _shortenHeaders(self): - self.csh = ChangeSequenceHeaders() - self.csh.setInputFile(self.inputFileName) - self.csh.setFormat("fasta") - self.csh.setStep(1) - self.csh.setPrefix("seq") - self.csh.setLinkFile(self.inputFileName+".shortHlink") - self.csh.setOutputFile(self.inputFileName+".shortH") - self.csh.setVerbosityLevel(self._verbosity-1) - self.csh.run() - self.shortInputFileName = self.inputFileName+".shortH" - - def _renameHeaders(self): - self.csh.setInputFile(self.phyml_tree) - self.csh.setFormat("newick") - self.csh.setStep(2) - self.csh.setLinkFile(self.inputFileName+".shortHlink" ) - self.csh.setOutputFile(self.outFileName) - self.csh.setVerbosityLevel(self._verbosity-1) - self.csh.run() - - def run(self): - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START LaunchPhyML") - self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName)) - - inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName)) - if not os.path.exists(inputFileName): - os.symlink(self.inputFileName, inputFileName) - self.inputFileName = inputFileName - - self._shortenHeaders() - - cmd = "sreformat phylip %s" % (self.shortInputFileName) - - with open (self.reformatedInputFileName, "w") as fPhylip : - - process = subprocess.Popen(cmd.split(' '), stdout= fPhylip , stderr=subprocess.PIPE) - self._log.debug("Running : %s" % cmd) - output = process.communicate() - self._log.debug("Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - - self.reformatedInputFileName = "%s.phylip" % self.shortInputFileName - self.phyml_tree = "%s_phyml_tree.txt" %self.reformatedInputFileName - cpyPhyml_tree = "%s_cpy" %self.phyml_tree - shutil.copyfile(self.phyml_tree,cpyPhyml_tree) - - cmd = "phyml -i %s -d %s%s -n %d -b %d -m %s -t %f -v %f -c %d -a %f -u %s -o %s" % (self.reformatedInputFileName, self.dataType, self.seqFormat, self.nbDataSets,self.nbBootDataSets,self.substModel, self.ratioTsTv, self.propInvSites,self.nbCat,self.gammaParam, cpyPhyml_tree , self.paramOptimisation ) - print cmd - process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self._log.debug("Running : %s" % cmd) - output = process.communicate() - self._log.debug("Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - - self._renameHeaders() - - if self._clean: - for f in [ self.shortInputFileName, self.inputFileName+".shortHlink", self.inputFileName+".shortH.phylip",self.inputFileName+".shortH.phylip_phyml_lk.txt", self.phyml_tree ]: - os.remove(f) - os.system( "mv %s.phylip_phyml_stat.txt %s_phyml.txt" % ( self.shortInputFileName, self.inputFileName ) ) - - self._log.info("Finished running LaunchPhyML") - -
--- a/commons/launcher/LaunchPromer.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -#! /usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.checker.CheckerUtils import CheckerUtils -from commons.core.utils.FileUtils import FileUtils -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess -from commons.core.LoggerFactory import LoggerFactory -import os - -LOG_DEPTH = "repet.tools" - -class LaunchPromer(object): - - def __init__(self,queryFileName="", refFileName ="", prefix = None, genCoords=False, showCoords = False, mum=False, maxGaps=30, minMatch=6, nooptimize=False,mincluster=20, verbosity=0): - self._queryFileName = queryFileName - self._refFileName = refFileName - self._prefix = prefix - self._genCoords = genCoords - self._showCoords = showCoords - self._mum = mum - self._maxgaps = maxGaps - self._minMatch = minMatch - self._nooptimize = nooptimize - self._mincluster = mincluster - self.verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity) - - def setMincluster(self, value): - self._mincluster = value - def getMincluster(self): - return self._mincluster - - mincluster = property(getMincluster, setMincluster) - - def setAttributesFromCmdLine(self): - description = "LaunchPromer runs the Promer program (part of the mummer package) ." - parser = RepetOptionParser(description = description) - parser.add_option("-q", "--query", dest="queryFileName", default = "", action="store", type="string", help="input query file [compulsory] [format: fasta]") - parser.add_option("-r", "--ref", dest="refFileName", default = "", action="store", type="string", help="input ref file [compulsory] [format: fasta]") - parser.add_option("-p", "--prefix", dest="prefix", default = None, action="store", type="string", help="prefix name [optional]") - parser.add_option("-o","--gencoords", dest="genCoords",action="store_true", help="generate coords file with minimal option (show-coords -r) [optional] ") - parser.add_option("-s","--showcoords", dest="showCoords",action="store_true", help="generate coords file with: show-coords -r -c -l -d -I 50 -L 100 -T [optional] ") - parser.add_option("-m", "--mum", dest="mum", action="store_true", help="Use anchor matches that are unique in both the reference and query [optional] ") - parser.add_option("-g", "--maxgaps", dest="maxgaps", default = 30, action="store", type="int", help="Maximum gap between two adjacent matches in a cluster (default 30) [optional] ") - parser.add_option("-l", "--minmatch", dest="minMatch", default = 6, action="store", type="int", help="Minimum length of an maximal exact match (default 6) [optional] ") - parser.add_option("-n", "--nooptimize", dest="nooptimize", action="store_true", help="nooptimize (default --optimize) [optional] ") - parser.add_option("-j", "--mincluster", dest="mincluster", default = 20, action="store", type="int", help="Minimum length of a cluster of matches (default 20) [optional] ") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ") - - (self._options, args) = parser.parse_args() - self._setAttributesFromOptions(self._options) - - def _setAttributesFromOptions(self, options): - self._queryFileName = options.queryFileName - self._refFileName = options.refFileName - self._prefix = options.prefix - self._genCoords = options.genCoords - self._showCoords = options.showCoords - self._mum = options.mum - self._maxgaps = options.maxgaps - self._minMatch = options.minMatch - self._nooptimize = options.nooptimize - self._mincluster = options.mincluster - self.verbosity = options.verbosity - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def checkOptions(self): - if self._queryFileName != "": - if not FileUtils.isRessourceExists(self._queryFileName): - self._logAndRaise("ERROR: Query file: %s does not exist!" % self._queryFileName) - else: - self._logAndRaise("ERROR: No specified --query option!") - - if self._refFileName != "": - if not FileUtils.isRessourceExists(self._refFileName): - self._logAndRaise("ERROR: Ref file does not exist!"% self._refFileName) - else: - self._logAndRaise("ERROR: No specified --ref option!") - - def run(self): - if not CheckerUtils.isExecutableInUserPath("promer") : - self._logAndRaise("ERROR: promer must be in your path") - self.checkOptions() - - genCoords = "" - if self._genCoords: - genCoords = "-o" - mum = "" - if self._mum: - mum = "--mum" - nooptimize = "--optimize" - if self._nooptimize: - nooptimize = "--nooptimize" - prefix = "" - if self._prefix is not None: - prefix = "--prefix=%s" %(self._prefix) - - cmd = "promer %s %s %s %s %s -g=%d -l=%d %s -c=%d" % (self._refFileName,self._queryFileName, prefix, genCoords, mum, self._maxgaps, self._minMatch, nooptimize, self._mincluster) - self._log.debug("Running promer with following commands : %s" %cmd) - cmd = cmd.split() - process = subprocess.Popen(cmd) - process.wait() - - if self._showCoords: - #use of os.system because redirect on process is broken in python < 3.0 - cmd = "show-coords -r -c -l -d -I 50 -L 100 -T %s.delta > %s.coords" % (self._prefix, self._prefix) - os.system(cmd) - - return process.returncode - -if __name__ == "__main__": - iLaunchPromer = LaunchPromer() - iLaunchPromer.setAttributesFromCmdLine() - iLaunchPromer.run() \ No newline at end of file
--- a/commons/launcher/LaunchRefAlign.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,208 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.core.checker.ConfigChecker import ConfigRules -from commons.core.checker.ConfigChecker import ConfigChecker -import subprocess -import os -from commons.core.seq.Bioseq import Bioseq - -LOG_DEPTH = "repet.core.launchers" - -from commons.core.seq.BioseqDB import BioseqDB -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - - -class LaunchRefAlign(object): - """ - Launch 'refalign' to build a master-slave multiple sequence alignment. - """ - def __init__(self, inputFileName="", outFileName="", gapSize=10, match=10, mismatch=8, gapOpen=16, gapExtend=4, refseqName="", keepRefseq =False, verbosity=3 ): - self.inputFileName = inputFileName - self.outFileName=outFileName - self.gapSize = gapSize - self.match = match - self.mismatch = mismatch - self.gapOpen = gapOpen - self.gapExtend = gapExtend - self.gapExtend = gapExtend - self.refseqName = refseqName - self.keepRefseq = keepRefseq - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def setAttributesFromCmdLine(self): - description = "usage: LaunchRefalign.py [ options ]" - epilog = "\n -h: this help\n" - epilog += "\t -i: name of the input file (refseq is first, format='fasta')" - epilog += "\t -r: keep the reference sequence" - epilog += "\t -o: name of the output file (default=inFileName+'.fa_aln')" - epilog += "\t -v: verbosity (default=0)" - epilog += "\n\t" - parser = RepetOptionParser(description = description, epilog = epilog) - parser.add_option("-i", "--fasta", dest = "inputFileName", action = "store", type = "string", help = "input fasta file name [compulsory] [format: fasta]", default = "") - parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.out]", default = "") - parser.add_option("-r", "--keepRefseq", dest = "keepRefseq", action = "store_true", help = "keep reference sequence [optional] [default: False]", default = False) - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.inputFileName = options.inputFileName - self.setOutFileName = options.outFileName - self.keepRefseq = options.keepRefseq - self._verbosity = options.verbosity - - def _checkOptions(self): - if self.inputFileName == "": - self._logAndRaise("ERROR: Missing input file name") - - if self.outFileName == "": - self.outFileName = "%s.fa_aln" % (self.inputFileName) - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def _prepareRefAlign(self): - self.shortInputFileName = self.inputFileName+".shortH" - self.refFileName= self.shortInputFileName + ".ref" - self.cpyFileName=self.shortInputFileName + ".cpy" - - file_db = open(self.shortInputFileName) - file_ref = open(self.refFileName,"w") - file_cpy = open(self.cpyFileName,"w") - - self._numseq=0 - while 1: - seq=Bioseq() - seq.read(file_db) - if seq.sequence==None: - break - self._numseq+=1 - if self._numseq==1: - seq.write(file_ref) - else: - seq.write(file_cpy) - file_db.close() - file_ref.close() - file_cpy.close() - - def _shortenHeaders(self): - self.csh = ChangeSequenceHeaders() - self.csh.setInputFile(self.inputFileName) - self.csh.setFormat("fasta") - self.csh.setStep(1) - self.csh.setPrefix("seq") - self.csh.setLinkFile(self.inputFileName+".shortHlink") - self.csh.setOutputFile(self.inputFileName+".shortH") - self.csh.setVerbosityLevel(self._verbosity-1) - self.csh.run() - - bsDB = BioseqDB(self.inputFileName+".shortH") - bsDB.upCase() - bsDB.save(self.inputFileName+".shortHtmp") - del bsDB - os.rename(self.inputFileName+".shortHtmp", self.inputFileName+".shortH") - - def _renameHeaders(self): - self.csh.setInputFile(self.inputFileName+".shortH.fa_aln") - self.csh.setFormat("fasta") - self.csh.setStep(2) - self.csh.setLinkFile(self.inputFileName+".shortHlink" ) - self.csh.setOutputFile(self.outFileName) - self.csh.setVerbosityLevel(self._verbosity-1) - self.csh.run() - - def run(self): - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START LaunchRefAlign") - self._log.debug("building a multiple alignment from '%s'..." % ( self.inputFileName)) - - inputFileName = "%s/%s" % (os.getcwd(), os.path.basename(self.inputFileName)) - if not os.path.exists(inputFileName): - os.symlink(self.inputFileName, inputFileName) - self.inputFileName = inputFileName - - self._shortenHeaders() - if self.keepRefseq: - self.refseqName="seq1" - self._prepareRefAlign() - - if self._numseq > 1: - cmd = "refalign %s %s -m %d -l %d -d %d -g %d -e %d" % (self.refFileName, self.cpyFileName, self.match, self.gapSize, self.mismatch, self.gapOpen, self.gapExtend) - - process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self._log.debug("Running : %s" % cmd) - output = process.communicate() - self._log.debug("Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - refseqNameParam = "" - if self.refseqName != "": - refseqNameParam = "-r %s " % (self.refseqName) - outFileName = self.inputFileName+".shortH.fa_aln" - #self.cpyFileName = os.path.join(os.getcwd(),os.path.basename(self.cpyFileName)) - - self._log.info("Copy file path %s " % self.cpyFileName) - print("Copy file path %s " % self.cpyFileName) - cmd = "refalign2fasta.py -i %s.aligner %s-g d -o %s -v 1" % (self.cpyFileName, refseqNameParam, outFileName) - self._log.debug("Running : %s" % cmd) - process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output = process.communicate() - self._log.debug("Output:\n%s" % output[0]) - - if process.returncode != 0: - self._logAndRaise("ERROR when launching '%s'" % cmd) - - cmd = "rm -f "+ self.refFileName + " " + self.cpyFileName + " " + self.cpyFileName + ".aligner " + self.cpyFileName + ".oriented " + self.cpyFileName + ".refalign.stat" - os.system(cmd) - - else: - self._logAndRaise("Only one sequence available") - cmd = "echo empty" - - self._renameHeaders() - - for fileName in [self.inputFileName + ".shortH", self.inputFileName + ".shortHlink", self.inputFileName + ".shortH.fa_aln"]: - os.remove(fileName) - self._log.info("END LaunchRefAlign") - return 0 - - -if __name__ == "__main__": - iLaunchRefAlign = LaunchRefAlign() - iLaunchRefAlign.setAttributesFromCmdLine() - iLaunchRefAlign.run()
--- a/commons/launcher/LaunchRefalign_old.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import getopt - -import pyRepet.launcher.programLauncher -from commons.core.seq.BioseqDB import BioseqDB -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - - -def help(): - print - print "usage: launchRefalign.py [ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (refseq is first, format='fasta')" - print " -r: keep the reference sequence" - print " -o: name of the output file (default=inFileName+'.fa_aln')" - print " -v: verbose (default=0)" - print - - -def main(): - """ - Launch 'refalign' to build a master-slave multiple sequence alignment. - """ - inFileName = "" - keepRefseq = False - outFileName = "" - verbose = 0 - try: - opts,args=getopt.getopt(sys.argv[1:],"hi:ro:v:") - except getopt.GetoptError, err: - print str(err); help(); sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-r": - keepRefseq = True - elif o == "-o": - outFileName = a - elif o == "-v": - verbose = int(a) - if inFileName == "": - print "ERROR: missing compulsory options" - help() - sys.exit(1) - - if verbose > 0: - print "START %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - if verbose > 0: - print "build a multiple alignment from '%s'..." % ( inFileName ) - sys.stdout.flush() - - if outFileName == "": - outFileName = "%s.fa_aln" % ( inFileName ) - - csh = ChangeSequenceHeaders() - csh.setInputFile( inFileName ) - csh.setFormat( "fasta" ) - csh.setStep( 1 ) - csh.setPrefix( "seq" ) - csh.setLinkFile( inFileName+".shortHlink" ) - csh.setOutputFile( inFileName+".shortH" ) - csh.setVerbosityLevel( verbose - 1 ) - csh.run() - - bsDB = BioseqDB( inFileName+".shortH" ) - bsDB.upCase() - bsDB.save( inFileName+".shortHtmp" ) - del bsDB - os.rename( inFileName+".shortHtmp", inFileName+".shortH" ) - - pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" ) - if keepRefseq: - pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", refseqName="seq1", verbose=verbose ) - else: - pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", verbose=verbose ) - - csh.setInputFile( inFileName+".shortH.fa_aln" ) - csh.setFormat( "fasta" ) - csh.setStep( 2 ) - csh.setLinkFile( inFileName+".shortHlink" ) - csh.setOutputFile( outFileName ) - csh.setVerbosityLevel( verbose - 1 ) - csh.run() - - for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.fa_aln" ]: - os.remove( f ) - - if verbose > 0: - print "END %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - - -if __name__ == "__main__": - main()
--- a/commons/launcher/LaunchRepeatMasker.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -#! /usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.checker.CheckerUtils import CheckerUtils -from commons.core.utils.FileUtils import FileUtils -from commons.core.utils.RepetOptionParser import RepetOptionParser -import subprocess - -class LaunchRepeatMasker(object): - - def __init__(self,queryFileName="", libFileName ="",sensitivity="", engine="wu", cutOff=225, outputDir = ".",verbosity=0): - self._queryFileName = queryFileName - self._libFileName = libFileName - self._engine = engine - self._sensitivity = sensitivity - self._cutOff = cutOff - self._outputDir = outputDir - self._verbosity = verbosity - - def setAttributesFromCmdLine(self): - description = "LaunchRepeatMasker runs the RepeatMasker program ." - parser = RepetOptionParser(description = description) - parser.add_option("-q", "--query", dest="queryFileName", default = None, action="store", type="string", help="input query file [compulsory] [format: fasta]") - parser.add_option("-l", "--libFileName", dest="libFileName", default = None, action="store", type="string", help="custom library [optional]") - parser.add_option("-n", "--outputDir", dest="outputDir", default=".", action="store", type="string", help="outputDir (default : current directory) [optional] ") - parser.add_option("-c", "--cutOff", dest="cutOff", default=225, action="store", type="int", help="Sets cutoff score for masking repeats when using -lib (default 225) [optional] ") - parser.add_option("-e", "--engine", dest="engine", default = "wu", action="store", type="string", help="engine [optional] ") - parser.add_option("-u", "--sensitivity", dest="sensitivity", default = "", action="store", type="string", help="sensitivity can be s, q, qq[optional] ") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 0, action="store", type="int", help="verbosity [optional] ") - - (options, args) = parser.parse_args() - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self._queryFileName = options.queryFileName - self._libFileName = options.libFileName - self._outputDir = options.outputDir - self._engine = options.engine - self._sensitivity = options.sensitivity - self._cutOff = options.cutOff - self._verbosity = options.verbosity - - def checkOptions(self): - if self._queryFileName != "": - if not FileUtils.isRessourceExists(self._queryFileName): - raise Exception("ERROR: Query file: %s does not exist!" % self._queryFileName) - else: - raise Exception("ERROR: No specified --query option!") - - def run(self): - if not CheckerUtils.isExecutableInUserPath("RepeatMasker") : - print "ERROR: RepeatMasker must be in your path" - else: - self.checkOptions() - - engine = "" - if self._engine == "wu": - engine = "-e wublast" - elif self._engine == "cm": - engine = "-e crossmatch" - sensitivity = "" - if self._sensitivity: - sensitivity = "-%s" % self._sensitivity - libFileName = "" - if self._libFileName != "": - libFileName = "-lib %s" % self._libFileName - - cmd = "RepeatMasker %s -dir %s -pa 1 -gccalc -no_is -nolow %s %s %s" % (self._queryFileName,self._outputDir,engine,sensitivity,libFileName) - cmd = cmd.split() - - if self._verbosity>0: - print("Running RepeatMasker with following commands : %s" %cmd) - - process = subprocess.Popen(cmd, stdout=subprocess.PIPE,stderr = subprocess.PIPE) -# process.wait() - output= process.communicate() - if self._verbosity>0: - print("".join(output)) - return process.returncode - -if __name__ == "__main__": - iLaunchRepeatMasker = LaunchRepeatMasker() - iLaunchRepeatMasker.setAttributesFromCmdLine() - iLaunchRepeatMasker.run() \ No newline at end of file
--- a/commons/launcher/LaunchTRF.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,155 +0,0 @@ -#!/usr/bin/env python - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -from commons.core.LoggerFactory import LoggerFactory -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.core.checker.ConfigChecker import ConfigRules -from commons.core.checker.ConfigChecker import ConfigChecker -import subprocess -import glob -import os - -LOG_DEPTH = "repet.launchers" - -class LaunchTRF(object): - - def __init__(self, inFileName = "", outFileName = "", maxPeriod=15, doClean = False, verbosity = 0): - self.inFileName = inFileName - self.setOutFileName(outFileName) - self.maxPeriod=maxPeriod - self._doClean = doClean - self._verbosity = verbosity - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity) - - def setAttributesFromCmdLine(self): - description = "Launch TRF to detect micro-satellites in sequences." - epilog = "\nExample 1: launch without verbosity and keep temporary files.\n" - epilog += "\t$ python LaunchTemplate.py -i file.fa -v 0" - epilog += "\n\t" - parser = RepetOptionParser(description = description, epilog = epilog) - parser.add_option("-i", "--in", dest = "inFileName", action = "store", type = "string", help = "input file name [compulsory] [format: fasta]", default = "") - parser.add_option("-o", "--out", dest = "outFileName", action = "store", type = "string", help = "output file name [default: <input>.TRF.set]", default = "") - parser.add_option("-m", "--maxPeriod", dest = "maxPeriod", action = "store", type = "int", help = " maximum period size to report [default: 15]", default = 15) - parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False) - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1) - options = parser.parse_args()[0] - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.setInFileName(options.inFileName) - self.setOutFileName(options.outFileName) - self.maxPeriod = options.maxPeriod - self.setDoClean(options.doClean) - self.setVerbosity(options.verbosity) - - def setInFileName(self, inFileName): - self.inFileName = inFileName - - def setOutFileName(self, outFileName): - if outFileName == "": - self._outFileName = "%s.TRF.set" % self.inFileName - else: - self._outFileName = outFileName - - def setDoClean(self, doClean): - self._doClean = doClean - - def setVerbosity(self, verbosity): - self._verbosity = verbosity - - def _checkOptions(self): - if self.inFileName == "": - self._logAndRaise("ERROR: Missing input file name") - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - - def _launchTRF(self): - cmd = "trf %s 2 3 5 80 10 20 %d -h -d" % (self.inFileName, self.maxPeriod) - self._log.debug("Running : %s" % cmd) - process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output = process.communicate() - self._log.debug("Output:\n%s" % output[0]) - - def _parseTRF(self): - self._log.debug("Parsing TRF output") - with open( "%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod),'r') as inFile: - with open(self._outFileName,'w') as outFile: - nbPatterns = 0 - nbInSeq = 0 - for line in inFile.readlines(): - if line == "": - break - data = line.split(" ") - if len(data) > 1 and "Sequence:" in data[0]: - nbInSeq += 1 - seqName = data[1][:-1] - if len(data) < 14: - continue - nbPatterns += 1 - consensus = data[13] - copyNb = int( float(data[3]) + 0.5 ) - start = data[0] - end = data[1] - outFile.write( "%i\t(%s)%i\t%s\t%s\t%s\n" % ( nbPatterns, consensus, copyNb, seqName, start, end ) ) - self._log.debug("Finished Parsing TRF output") - - def _clean(self): - try: - os.remove("%s.2.3.5.80.10.20.%d.dat" % (self.inFileName, self.maxPeriod)) - except:pass - - - def run(self): - """ - Launch TRF to detect micro-satellites in sequences. - """ - LoggerFactory.setLevel(self._log, self._verbosity) - self._checkOptions() - self._log.info("START Launch") - self._log.debug("Input file name: %s" % self.inFileName) - - self._launchTRF() - self._parseTRF() - - if self._doClean: - self._log.warning("Files will be cleaned") - self._clean() - self._log.info("END Launch") - -if __name__ == "__main__": - iLaunchTRF = LaunchTRF() - iLaunchTRF.setAttributesFromCmdLine() - iLaunchTRF.run() -
--- a/commons/launcher/LaunchTallymer.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,328 +0,0 @@ -#!/usr/bin/env python - -""" -Launch Tallymer's sub programs, generate map file, and convert output to gff and wig, as well as visual (RPlot) data -""" - -# Copyright INRA (Institut National de la Recherche Agronomique) -# http://www.inra.fr -# http://urgi.versailles.inra.fr -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. - -import os -import shutil -import subprocess -import time -from commons.core.utils.RepetOptionParser import RepetOptionParser -from commons.core.LoggerFactory import LoggerFactory -from SMART.Java.Python.convertTranscriptFile import ConvertTranscriptFile -from commons.core.seq.BioseqUtils import BioseqUtils -from commons.core.seq.BioseqDB import BioseqDB -from Tallymer_pipe.PlotBenchMarkGFFFiles import PlotBenchMarkGFFFiles - -LOG_DEPTH = "repet.tools" - - -class LaunchTallymer(object): - """ - Launch Tallymer's sub programs, generate map file, and convert output to - gff and wig, as well as visual (RPlot) data - """ - - _lValidFormats = ["gff", "gff3", "wig", "bed", "map"] - - def __init__(self, inputFasta="", indexationFasta=None, merSize=20, minOccs=4, outputFormats="gff", nLargestScaffoldsToPlot=0, clean=False, verbosity=0): - self.inputFasta = inputFasta - self.indexationFasta = indexationFasta if indexationFasta != None else inputFasta - self.merSize = merSize - self.minOccs = minOccs - self.outputFormats = outputFormats - self.nLargestScaffoldsToPlot = nLargestScaffoldsToPlot - self.doClean = clean - self.verbosity = verbosity - - self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity) - self._workdir = os.path.join(os.getcwd(), "launchTallymer_%s" % time.strftime("%Y%m%d%H%M%S")) - self._tmpSearchFileName = None - self._tmpMapFileName = None - self._tmpStatFileName = None - self._tmpPngFileName = None - self._plot_data = {} - self._plot_data2 = {} - - def setAttributesFromCmdLine(self): - description = "Generates stats from the results of the tallymer search ." - parser = RepetOptionParser(description=description) - parser.add_option("-i", "--inputFasta", dest="inputFasta", default = "", action="store", type="string", help="input fasta file [compulsory] [format: fasta]") - parser.add_option("-u", "--indexationFasta", dest="indexationFasta", default = "", action="store", type="string", help="input indexation fasta file used to generate kmer index (defaults to input fasta)[optional] [format: fasta]") - parser.add_option("-s", "--merSize", dest="merSize", default = 20, action="store", type="int", help="input merSize [optional][default:20]") - parser.add_option("-m", "--minOccs", dest="minOccs", default = 4, action="store", type="int", help="input minimal kmer occurence count [default:4]") - parser.add_option("-f", "--outputFormats", dest="outputFormats", default = "gff", action="store", type="string", help="comma separated list of output file formats (can be %s) [optional] [default:gff]" % ", ".join(self._lValidFormats)) - parser.add_option("-n", "--nLargestScaffoldsToPlot",default = 0, type="int", action="store", dest = "nLargestScaffoldsToPlot", help = "generate graph of Kmer repartition along the input sequence for the n biggest scaffolds") - parser.add_option("-c", "--clean", dest = "clean", help = "clean temporary files", default = False, action="store_true") - parser.add_option("-v", "--verbosity", dest="verbosity", default = 1, action="store", type="int", help="verbosity [optional] ") - options, args = parser.parse_args() - self._setAttributesFromOptions(options) - - def _setAttributesFromOptions(self, options): - self.inputFasta = options.inputFasta - self.indexationFasta = options.indexationFasta if options.indexationFasta != "" else options.inputFasta - self.merSize = options.merSize - self.minOccs = options.minOccs - self.outputFormats = options.outputFormats - self.nLargestScaffoldsToPlot = options.nLargestScaffoldsToPlot - self.doClean = options.clean - self.verbosity = options.verbosity - - def _checkOptions(self): - if self.inputFasta == "": - self._logAndRaise("Error: missing input fasta file") - if self.merSize < 1: - self._logAndRaise("Error: invalid kmer size '%i'; must be a positive integer" % self.merSize) - - self.outputFormats = self.outputFormats.lower().split(',') - sOutFormats = set(self.outputFormats) - sValidFormats = set(self._lValidFormats) - lInvalidFormats = list(sOutFormats - sValidFormats) - self.outputFormats = list(sValidFormats.intersection(sOutFormats)) - if lInvalidFormats: - self._log.warning("Warning: ignoring invalid formats: <%s>" % " ".join(lInvalidFormats)) - if not self.outputFormats: - self._logAndRaise("Error: no valid output formats specified") - - def _logAndRaise(self, errorMsg): - self._log.error(errorMsg) - raise Exception(errorMsg) - - def clean(self): - try: - shutil.rmtree(self._workdir) - except Exception as inst: - self._log.error(inst) - - def run(self): - LoggerFactory.setLevel(self._log, self.verbosity) - self._checkOptions() - self._log.debug("Input fasta file: %s; K-mer size: %s; Output formats: %s; Cleaning: %s" % (self.inputFasta, self.merSize, str(self.outputFormats), self.doClean)) - try: - os.makedirs(self._workdir) - except:pass - - srcPath = os.path.abspath(self.inputFasta) - dstPath = os.path.join(self._workdir,os.path.basename(self.inputFasta)) - os.symlink(srcPath, dstPath) - - if (self.indexationFasta != self.inputFasta): - srcPath = os.path.abspath(self.indexationFasta) - dstPath = os.path.join(self._workdir,os.path.basename(self.indexationFasta)) - try: - os.symlink(srcPath, dstPath) - except OSError as inst: - pass - - os.chdir(self._workdir) - - if (self.indexationFasta != self.inputFasta): - self.indexationFasta = os.path.basename(self.indexationFasta) - else: - self.indexationFasta = os.path.basename(self.inputFasta) - self.inputFasta = os.path.basename(self.inputFasta) - - self._tmpSearchFileName = "%s.tallymer" % os.path.splitext(os.path.basename(self.inputFasta))[0] - self._tmpMapFileName = "%s_tmp.map" % self._tmpSearchFileName - self._tmpStatFileName = "%s.stat" % self._tmpSearchFileName - self._tmpPngFileName = "%s.png" % self._tmpSearchFileName - - - - - - self._runTallymerSearch() - self._convertTallymerToMap() - self._writeOutputFiles() - - if self.nLargestScaffoldsToPlot > 0: - self._doPlot() - shutil.copy2(self._tmpPngFileName, "../.") - - shutil.copy2(self._tmpStatFileName, "../.") - os.chdir("..") - - if self.doClean: - self.clean() - - def _runTallymerSearch(self): - self._log.info("Starting to run tallymer search of sequence %s " % (self.inputFasta)) - self._indexInputFastaFile() - self._countAndIndexKmersForGivenK() - self._searchKmersListInTallymerIndex() - self._log.info("Finished running tallymer scan of sequence %s " % (self.inputFasta)) - - def _convertTallymerToMap(self): - self._log.info("Starting to run tallymer search to map conversion") - totalNbOcc, dKmer2Occ, self._plot_data, self._plot_data2 = ConvertUtils.convertTallymerFormatIntoMapFormatAndGenerateData(self.inputFasta, self._tmpSearchFileName, self._tmpMapFileName) - self._log.debug("totalNbOcc=%i" % totalNbOcc) - self._writeOccurencesNbAndFrequencyOfKmers(totalNbOcc, dKmer2Occ) - self._log.info("Finished tallymer search to map conversion") - - def _doPlot(self): - iBioseqDB = BioseqDB() - iBioseqDB.load(self.inputFasta) - largest_seqsDb = iBioseqDB.bestLength(self.nLargestScaffoldsToPlot) - - for seq in largest_seqsDb.db: - headerCleaned = seq.header.replace(" ", "_") - shortFastaName = "%s_%s" % (os.path.basename(self.inputFasta),headerCleaned) - data = self._plot_data2[seq.header] - gffPlotter = PlotBenchMarkGFFFiles(yLabel="Number of Kmer Occurences", maxLength=1, title="Kmer repartition along the input sequence: %s; MerSize: %i" % (shortFastaName, self.merSize) ) - gffPlotter.setOutFileName("%s_%s.png" % (self._tmpSearchFileName, headerCleaned)) - gffPlotter._title = "Mers along the input sequence: %s MerSize: %i" % (shortFastaName, self.merSize) - gffPlotter._xLabel = "Coordinates along the input sequence (%s)" % shortFastaName - gffPlotter._rawData = data - gffPlotter.run() - - def _indexInputFastaFile(self): - self._log.debug("index the input fasta file: get an enhanced suffix array.") - cmd = "gt suffixerator -dna -pl -tis -suf -lcp -v -db %s -indexname %s.reads" % (self.indexationFasta, self.indexationFasta) - process = subprocess.Popen(cmd.split(' '),stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self._log.debug("Running suffixerator with following params %s" % cmd) - output = process.communicate() - self._log.debug("Suffixerator Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("Error in generating enhanced suffix array in %s" % self.indexationFasta) - - def _countAndIndexKmersForGivenK(self): - self._log.debug("Counting and indexing k-mers for k = %i " % self.merSize) - cmd = "gt tallymer mkindex -mersize %i -minocc %i -indexname %s.tyr-reads -counts -pl -esa %s.reads" % (self.merSize, self.minOccs, self.indexationFasta, self.indexationFasta) - process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self._log.debug("Running tallymer mkindex with following params %s" % cmd) - output = process.communicate() - self._log.debug("Tallymer mkindex Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("Error in indexing kmers in %s.reads" % self.indexationFasta) - - def _searchKmersListInTallymerIndex(self): - self._log.debug("Searching list of kmers in tallymer-index ") - cmd = "gt tallymer search -output qseqnum qpos counts sequence -tyr %s.tyr-reads -q %s" % (self.indexationFasta, self.inputFasta) - process = subprocess.Popen(cmd.split(' '),stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self._log.debug("Running tallymer search with following params %s" % cmd) - output = process.communicate() - self._log.debug("Tallymer search Output:\n%s" % output[0]) - if process.returncode != 0: - self._logAndRaise("Error in searching for kmers in %s.tyr-reads" % self.indexationFasta) - tmpOutputFile = open(self._tmpSearchFileName,'w') - tmpOutputFile.write(output[0]) - tmpOutputFile.close() - - def _writeOccurencesNbAndFrequencyOfKmers(self, totalNbOcc, dKmer2Occ): - with open(self._tmpStatFileName, "w") as statFile: - statFile.write("kmer\tocc\tfreq\n") - for kmer in dKmer2Occ.keys(): - statFile.write("%s\t%i\t%.10f\n" % (kmer, dKmer2Occ[kmer], dKmer2Occ[kmer] / float(totalNbOcc))) - - def _writeOutputFiles(self): - for format in self.outputFormats: - self._log.info("Generating %s file" % format) - outputFileName = "%s.tallymer.%s" % (os.path.splitext(self.inputFasta)[0], format) - try: - iConvertTranscriptFile = ConvertTranscriptFile(inputFileName=self._tmpMapFileName, name="Tallymer",\ - inputFormat="map", outputFileName=outputFileName, outputFormat=format,feature= "Match", featurePart="Match-part", verbosity=0) #self.verbosity - iConvertTranscriptFile.run() - except Exception as inst: - self._log.error("Error: %s - Failed to generate %s format ouput, skipping" % (inst, format)) - shutil.copy2(outputFileName, "../.") - - -class ConvertUtils(object): - - def convertTallymerFormatIntoMapFormatAndGenerateData(fastaFileName, searchFileName, mapFileName): - dIndex2NameLengthList = ConvertUtils._createDictOfNameLengthAccordingToSeqOrder(fastaFileName) - plotData = {} - plotData2 = {} - with open(searchFileName, "r") as talFile: - with open(mapFileName, "w") as mapFile: - totalNbOcc = 0 - dKmer2Occ = {} - line = talFile.readline() - while line: - data = line[:-1].split("\t") - name = "%s_%s" % (data[3], data[2]) - nbOccs = int(data[2]) - chrname = dIndex2NameLengthList[int(data[0])][0] - if data[1][0] == "+": - start = int(data[1][1:]) + 1 - end = start + len(data[3]) - elif data[1][0] == "-": - start_revcomp = int(data[1][1:]) - start = dIndex2NameLengthList[int(data[0])][1] - start_revcomp - 1 - end = end - len(data[3]) + 1 - mapLine = "%s\t%s\t%s\t%s\t%i\n" % (name, chrname, start, end, nbOccs) - mapFile.write(mapLine) - - if plotData2.get(chrname,None) is None: - plotData2[chrname] = {} - if plotData2[chrname].get(start, None) is None: - plotData2[chrname][start]=0 - plotData2[chrname][start] += nbOccs - - totalNbOcc += 1 - if dKmer2Occ.has_key(data[3]): - dKmer2Occ[data[3]] += 1 - else: - dKmer2Occ[data[3]] = 1 - plotData[start] = nbOccs - line = talFile.readline() - return totalNbOcc, dKmer2Occ, plotData, plotData2 - - convertTallymerFormatIntoMapFormatAndGenerateData = staticmethod(convertTallymerFormatIntoMapFormatAndGenerateData) - - def _createDictOfNameLengthAccordingToSeqOrder(fastaFileName): - with open(fastaFileName) as fastaFile: - line = fastaFile.readline() - i = 0 - length = 0 - dIndex2Name = {} - while line: - if line[0] == ">": - dIndex2Name[i] = [line[1:-1]] - if i > 0: - dIndex2Name[i - 1].append(length) - length = 0 - i += 1 - else: - length += len(line[:-1]) - line = fastaFile.readline() - dIndex2Name[i - 1].append(length) - return dIndex2Name - - _createDictOfNameLengthAccordingToSeqOrder = staticmethod(_createDictOfNameLengthAccordingToSeqOrder) - -if __name__ == "__main__": - iLaunchTallymer = LaunchTallymer() - iLaunchTallymer.setAttributesFromCmdLine() - iLaunchTallymer.run() \ No newline at end of file
--- a/commons/launcher/MafftClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch MafftProgramLauncher on several files in parallel on a cluster. - - -from pyRepet.launcher.AbstractClusterLauncher import * -from commons.launcher.MafftProgramLauncher import MafftProgramLauncher - - -class MafftClusterLauncher( AbstractClusterLauncher ): - """ - Launch Mafft on several files in parallel on a cluster. - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractClusterLauncher.__init__( self ) - AbstractClusterLauncher.setAcronym( self, "Mafft" ) - - self._cmdLineSpecificOptions = "p:" - - self._exeWrapper = "MafftProgramLauncher.py" - self._prgLauncher = None - self._prgLauncher = self.getProgramLauncherInstance() - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -p: parameters for 'mafft' (default='--auto')" - return string - - - def getProgramParameters( self ): - return self._prgLauncher.getProgramParameters() - - - def getProgramLauncherInstance( self ): - if self._prgLauncher == None: - self._prgLauncher = MafftProgramLauncher() - self._prgLauncher.setInputFile( GENERIC_IN_FILE ) - self._prgLauncher.setOutputFile( "%s.fa_aln" % ( GENERIC_IN_FILE ) ) - self._prgLauncher.setClean() - self._prgLauncher.setVerbosityLevel( 1 ) - self._prgLauncher.setListFilesToKeep() - self._prgLauncher.setListFilesToRemove() - return self._prgLauncher - - -if __name__ == "__main__": - i = MafftClusterLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/MafftProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,188 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch Mafft (multiple alignment). -# -# options: -# -h: this help -# -i: name of the input file (format='fasta') -# -p: parameters for 'mafft' (default='--auto') -# -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln') -# -c: clean -# -v: verbosity level (default=0/1) - - -import os -import sys -import getopt -import exceptions - -from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher -from pyRepet.seq.fastaDB import * -from commons.core.seq.FastaUtils import FastaUtils -from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - - -class MafftProgramLauncher( AbstractProgramLauncher ): - """ - Launch Mafft (multiple alignment). - """ - - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "mafft" - self._formatInFile = "fasta" - self._prgParam = "--auto" - self._cmdLineSpecificOptions = "p:o:" - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -p: parameters for '%s' (default='--auto')" % ( self.getProgramName() ) - string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')" - return string - - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - """ - Set a specific attribute from the command-line arguments. - """ - if o == "-p": - self.setProgramParameters( a ) - elif o == "-o": - self.setOutputFile( a ) - - - def checkSpecificAttributes( self ): - """ - Check the specific attributes before running the program. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for MafftClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - self._prgCmdLine += " %s" % ( self.getProgramParameters() ) - if self.getVerbosityLevel() == 0 and "--quiet" not in self._prgCmdLine: - self._prgCmdLine += " --quiet" - self._prgCmdLine += " %s.shortH" % ( self.getInputFile() ) - self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() ) - if self._verbose < 2: - self._prgCmdLine += " 2> /dev/null" - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - - - def setListFilesToRemove( self ): - """ - Set the list of files to remove. - """ - self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) ) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - self._summary += "\nparameters: %s" % ( self.getProgramParameters() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 ) - - csh = ChangeSequenceHeaders() - csh.setInputFile( self.getInputFile() ) - csh.setFormat( "fasta" ) - csh.setStep( 1 ) - csh.setPrefix( "seq" ) - csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) - csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) ) - csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) - csh.run() - - bsDB = BioseqDB( "%s.shortH" % ( self.getInputFile() ) ) - bsDB.upCase() - bsDB.save( "%s.shortHtmp" % ( self.getInputFile() ) ) - del bsDB - os.rename( "%s.shortHtmp" % ( self.getInputFile() ), - "%s.shortH" % ( self.getInputFile() ) ) - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) - print string - sys.exit(1) - - csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) - csh.setFormat( "fasta" ) - csh.setStep( 2 ) - csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) - csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) - csh.run() - - absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - outFileHandler = open( self.getOutputFile(), "w" ) - for header in lInitHeaders: - bs = absDB.fetch( header ) - bs.upCase() - bs.write( outFileHandler ) - outFileHandler.close() - os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - - self.end() - - -if __name__ == "__main__": - i = MafftProgramLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/MapClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch MapProgramLauncher.py on several files in parallel on a cluster. - - -import os -import sys -import getopt -import exceptions - -from pyRepet.launcher.AbstractClusterLauncher import * -from commons.launcher.MapProgramLauncher import MapProgramLauncher - - -class MapClusterLauncher( AbstractClusterLauncher ): - """ - Launch Map on several files in parallel on a cluster. - """ - - - def __init__( self ): - """ - Constructor. - """ - AbstractClusterLauncher.__init__( self ) - AbstractClusterLauncher.setAcronym( self, "Map" ) - - self._cmdLineSpecificOptions = "s:m:O:e:" - - self._exeWrapper = "MapProgramLauncher.py" - self._prgLauncher = None - self._prgLauncher = self.getProgramLauncherInstance() - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() ) - string += "\n -m: penalty for a mismatch (default='%i')" % ( self.getMismatchPenalty() ) - string += "\n -O: penalty for a gap openning (default='%i')" % ( self.getGapOpenPenalty() ) - string += "\n -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() ) - return string - - - def getGapSize( self ): - return self._prgLauncher.getGapSize() - - - def getMismatchPenalty( self ): - return self._prgLauncher.getMismatchPenalty() - - - def getGapOpenPenalty( self ): - return self._prgLauncher.getGapOpenPenalty() - - - def getGapExtendPenalty( self ): - return self._prgLauncher.getGapExtendPenalty() - - - def getProgramLauncherInstance( self ): - if self._prgLauncher == None: - self._prgLauncher = MapProgramLauncher() - self._prgLauncher.setInputFile( GENERIC_IN_FILE ) - self._prgLauncher.setOutputFile( "%s.fa_aln" % ( GENERIC_IN_FILE ) ) - #TESt de SATannot verifier si clean est a vrai - #self._prgLauncher.setClean() - self._prgLauncher.setVerbosityLevel( 1 ) - self._prgLauncher.setListFilesToKeep() - self._prgLauncher.setListFilesToRemove() - return self._prgLauncher - - -if __name__ == "__main__": - i = MapClusterLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/MapProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,251 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch Map (multiple alignment). -# -# options: -# -h: this help -# -i: name of the input file (format='fasta') -# -s: size above which a gap is not penalized anymore (default='50') -# -m: penalty for a mismatch (default='-8') -# -O: penalty for a gap opening (default='16') -# -e: penalty for a gap extension (default='4') -# -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln') -# -c: clean -# -v: verbosity level (default=0/1) - - -import sys -import os - -from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher -from commons.core.seq.FastaUtils import FastaUtils -from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - - -class MapProgramLauncher( AbstractProgramLauncher ): - """ - Launch Map (multiple alignment). - """ - - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "rpt_map" - self._formatInFile = "fasta" - self._cmdLineSpecificOptions = "s:m:O:e:o:" - self._gapSize = 50 - self._mismatchPenalty = -8 - self._gapOpenPenalty = 16 - self._gapExtendPenalty = 4 - self._outFile = "" - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: size above which a gap is not penalized anymore (default='%i')" % ( self.getGapSize() ) - string += "\n -m: penalty for a mismatch (default='%i', match=10)" % ( self.getMismatchPenalty() ) - string += "\n -O: penalty for a gap opening (default='%i')" % ( self.getGapOpenPenalty() ) - string += "\n -e: penalty for a gap extension (default='%i')" % ( self.getGapExtendPenalty() ) - string += "\n -o: name of the output file (format='aligned fasta', default=inFile+'.fa_aln')" - return string - - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - """ - Set a specific attribute from the command-line arguments. - """ - if o == "-s": - self.setGapSize( a ) - elif o == "-m": - self.setMismatchPenalty( a ) - elif o == "-O": - self.setGapOpenPenalty( a ) - elif o == "-e": - self.setGapExtendPenalty( a ) - elif o == "-o": - self.setOutputFile( a ) - - - def setGapSize( self, arg ): - self._gapSize = int(arg) - - - def setMismatchPenalty( self, arg ): - self._mismatchPenalty = int(arg) - - - def setGapOpenPenalty( self, arg ): - self._gapOpenPenalty = int(arg) - - - def setGapExtendPenalty( self, arg ): - self._gapExtendPenalty = int(arg) - - - def getGapSize( self ): - return self._gapSize - - - def getMismatchPenalty( self ): - return self._mismatchPenalty - - - def getGapOpenPenalty( self ): - return self._gapOpenPenalty - - - def getGapExtendPenalty( self ): - return self._gapExtendPenalty - - - def checkSpecificAttributes( self ): - """ - Check the specific attributes before running the program. - """ - if self.getGapSize() <= 0: - string = "ERROR: gap size should be > 0" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getMismatchPenalty() >= 0: - string = "ERROR: mismatch penalty should be < 0" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getGapOpenPenalty() < 0: - string = "ERROR: gap opening penalty should be >= 0" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getGapExtendPenalty() < 0: - string = "ERROR: gap extension penalty should be >= 0" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for MapClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - self._wrpCmdLine += " -s %i" % ( self.getGapSize() ) - self._wrpCmdLine += " -m %i" % ( self.getMismatchPenalty() ) - self._wrpCmdLine += " -O %i" % ( self.getGapOpenPenalty() ) - self._wrpCmdLine += " -e %i" % ( self.getGapExtendPenalty() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - self._prgCmdLine += " %s.shortH" % ( self.getInputFile() ) - self._prgCmdLine += " %i" % ( self.getGapSize() ) - self._prgCmdLine += " %i" % ( self.getMismatchPenalty() ) - self._prgCmdLine += " %i" % ( self.getGapOpenPenalty() ) - self._prgCmdLine += " %i" % ( self.getGapExtendPenalty() ) - self._prgCmdLine += " > %s.shortH.fa_aln" % ( self.getInputFile() ) - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - - - def setListFilesToRemove( self ): - """ - Set the list of files to remove. - """ - self.appendFileToRemove( "%s.shortH" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.shortHlink" % ( self.getInputFile() ) ) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - self._summary += "\ngap size: %i" % ( self.getGapSize() ) - self._summary += "\nmismatch penalty: %i" % ( self.getMismatchPenalty() ) - self._summary += "\ngap openning penalty: %i" % ( self.getGapOpenPenalty() ) - self._summary += "\ngap extension penalty: %i" % ( self.getGapExtendPenalty() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.fa_aln" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - lInitHeaders = FastaUtils.dbHeaders( self.getInputFile(), self.getVerbosityLevel()-1 ) - - csh = ChangeSequenceHeaders() - csh.setInputFile( self.getInputFile() ) - csh.setFormat( "fasta" ) - csh.setStep( 1 ) - csh.setPrefix( "seq" ) - csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) - csh.setOutputFile( "%s.shortH" % ( self.getInputFile() ) ) - csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) - csh.run() - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - returnStatus = os.system( cmd ) - if returnStatus != 0: - string = "ERROR: program '%s' returned status '%i'" % ( self.getProgramName(), returnStatus ) - print string - sys.exit(1) - - csh.setInputFile( "%s.shortH.fa_aln" % ( self.getInputFile() ) ) - csh.setFormat( "fasta" ) - csh.setStep( 2 ) - csh.setLinkFile( "%s.shortHlink" % ( self.getInputFile() ) ) - csh.setOutputFile( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - csh.setVerbosityLevel( self.getVerbosityLevel() - 1 ) - csh.run() - - absDB = AlignedBioseqDB( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - outFileHandler = open( self.getOutputFile(), "w" ) - for header in lInitHeaders: - bs = absDB.fetch( header ) - bs.upCase() - bs.write( outFileHandler ) - outFileHandler.close() - if self.getClean(): - os.remove( "%s.shortH.fa_aln.initH" % ( self.getInputFile() ) ) - - self.end() - - -if __name__ == "__main__": - i = MapProgramLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/NWalignProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,187 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch NWalign (pairwise alignment). -# -# options: -# -h: this help -# -i: name of the input file (queries, format='fasta') -# -s: name of the subject file (format='fasta') -# -p: parameters for 'NWalign' (default='-d 2') -# -o: name of the output file (format='align', default=inFile+'.align') -# -c: clean -# -v: verbosity level (default=0/1) - - -import os -import sys - -from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher -from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB -from commons.core.coord.AlignUtils import AlignUtils - - -class NWalignProgramLauncher( AbstractProgramLauncher ): - """ - Launch NWalign (pairwise alignment). - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "NWalign" - self._formatInFile = "fasta" - self._sbjFile = "" - self._prgParam = "" - self._cmdLineSpecificOptions = "s:p:o:" - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (format='fasta')" - string += "\n -p: parameters for '%s'" % ( self.getProgramName() ) - string += "\n -o: name of the output file (format='align', default=inFile+'.align')" - return string - - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - """ - Set a specific attribute from the command-line arguments. - """ - if o =="-s": - self.setSubjectFile( a ) - elif o == "-p": - self.setProgramParameters( a ) - elif o == "-o": - self.setOutputFile( a ) - - - def setSubjectFile( self, arg ): - self._sbjFile = arg - - - def getSubjectFile( self ): - return self._sbjFile - - - def checkSpecificAttributes( self ): - """ - Check the specific attributes before running the program. - """ - if self._sbjFile == "": - string = "ERROR: missing subject file (-s)" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for NWalignClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) - if self.getProgramParameters() != "": - self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - if self.getProgramParameters() != "": - self._prgCmdLine += " %s" % ( self.getProgramParameters() ) - self._prgCmdLine += " -o %s.afa" % ( self.getInputFile() ) - self._prgCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - self._prgCmdLine += " %s" % ( self.getSubjectFile() ) - self._prgCmdLine += " %s" % ( self.getInputFile() ) - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.afa" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - - - def postprocess( self ): - absDB = AlignedBioseqDB( "%s.afa" % ( self.getInputFile() ) ) - lHeaders = absDB.getHeaderList() - queryHeader = lHeaders[0] - subjectHeader = lHeaders[1] - queryLength = absDB.fetch( queryHeader ).getLength() - subjectLength = absDB.fetch( subjectHeader ).getLength() - lAligns = absDB.getAlignList( queryHeader, subjectHeader ) - for i in lAligns: - if "re-oriented" in i.getQueryName(): - i.setQueryName( queryHeader.replace(" re-oriented","") ) - start = i.getQueryStart() - end = i.getQueryEnd() - i.setQueryStart( queryLength - end + 1 ) - i.setQueryEnd( queryLength - start + 1 ) - if "re-oriented" in i.getSubjectName(): - i.setSubjectName( subjectHeader.replace(" re-oriented","") ) - start = i.getSubjectStart() - end = i.getSubjectEnd() - i.setSubjectEnd( subjectLength - end + 1 ) - i.setSubjectStart( subjectLength - start + 1 ) - if not i.isQueryOnDirectStrand(): - i.reverse() - AlignUtils.writeListInFile( lAligns, self.getOutputFile() ) - os.remove( "%s.afa" % ( self.getInputFile() ) ) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) - self._summary += "\nparameters: %s" % ( self.getProgramParameters() ) - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) - print string - sys.exit(1) - - self.postprocess() - - self.end() - - -if __name__ == "__main__": - i = NWalignProgramLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/RepeatMaskerClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch RepeatMaskerProgramLauncher on several files in parallel on a cluster. - - -import os -import sys -import getopt -import exceptions - -from pyRepet.launcher.AbstractClusterLauncher import * -from commons.launcher.RepeatMaskerProgramLauncher import RepeatMaskerProgramLauncher - - -class RepeatMaskerClusterLauncher( AbstractClusterLauncher ): - """ - Launch RepeatMasker on several files in parallel on a cluster. - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractClusterLauncher.__init__( self ) - AbstractClusterLauncher.setAcronym( self, "RM" ) - - self._cmdLineSpecificOptions = "s:n:gbl" - - self._exeWrapper = "RepeatMaskerProgramLauncher.py" - self._prgLauncher = RepeatMaskerProgramLauncher() - self._prgLauncher.setInputFile( GENERIC_IN_FILE ) - self._prgLauncher.setOutputFile( GENERIC_IN_FILE ) - self._prgLauncher.setClean() - self._prgLauncher.setVerbosityLevel( 1 ) - self._prgLauncher.setListFilesToKeep() - self._prgLauncher.setListFilesToRemove() - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (repeats, format='fasta')" - string += "\n -n: nb of processors to use in parallel (default=1)" - string += "\n -g: calculate the GC content" - string += "\n -b: skip bacterial insertion element check" - string += "\n -l: mask low-complexity DNA or simple repeats" - return string - - - def getSubjectFile( self ): - return self._prgLauncher.getSubjectFile() - - - def getNbProcessors( self ): - return self._prgLauncher.getNbProcessors() - - - def getCalculateGCcontent( self ): - return self._prgLauncher.getCalculateGCcontent() - - - def getSkipBacterialIsCheck( self ): - return self._prgLauncher.getSkipBacterialIsCheck() - - - def getMaskSsr( self ): - return self._prgLauncher.getMaskSsr() - - -if __name__ == "__main__": - i = RepeatMaskerClusterLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/RepeatMaskerProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,286 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch RepeatMasker (pairwise alignment for repeat detection). - - -import os -import sys - -from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher - - -class RepeatMaskerProgramLauncher( AbstractProgramLauncher ): - """ - Launch RepeatMasker (pairwise alignment for repeat detection). - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "RepeatMasker" - self._formatInFile = "fasta" - self._sbjFile = "" - self._nbProc = 1 - self._calcGc = False - self._skipIs = False - self._maskSsr = True - self._onlySsr = False - self._cmdLineSpecificOptions = "s:n:gblmo:" - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (repeats, format='fasta')" - string += "\n -n: nb of processors to use in parallel (default='%i')" % ( self.getNbProcessors() ) - string += "\n -g: calculate the GC content" - string += "\n -b: skip bacterial insertion element check" - string += "\n -l: does not mask low-complexity DNA or simple repeats" - string += "\n -m: only masks low complex/simple repeats (no interspersed repeats)" - string += "\n -o: name of the output file" - string += "\n with -s: format='align', default=inFile+'.cat.align')" - string += "\n with -m: format='path', default=inFile+'.cat.path')" - return string - - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - """ - Set a specific attribute from the command-line arguments. - """ - if o =="-s": - self.setSubjectFile( a ) - elif o == "-n": - self.setNbProcessors( a ) - elif o == "-g": - self.setCalculateGCcontent() - elif o == "-b": - self.setSkipBacterialIsCheck() - elif o == "-l": - self.unsetMaskSsr() - elif o == "-m": - self.setOnlySsr() - elif o == "-o": - self.setOutputFile( a ) - - - def setSubjectFile( self, arg ): - self._sbjFile = arg - - - def setNbProcessors( self, arg ): - self._nbProc = int(arg) - - - def setCalculateGCcontent( self ): - self._calcGc = True - - - def setSkipBacterialIsCheck( self ): - self._skipIs = True - - - def unsetMaskSsr( self ): - self._maskSsr = False - - - def setOnlySsr( self ): - self._onlySsr = True - - - def getSubjectFile( self ): - return self._sbjFile - - - def getNbProcessors( self ): - return self._nbProc - - - def getCalculateGCcontent( self ): - return self._calcGc - - - def getSkipBacterialIsCheck( self ): - return self._skipIs - - - def getMaskSsr( self ): - return self._maskSsr - - - def getOnlySsr( self ): - return self._onlySsr - - - def checkSpecificAttributes( self ): - """ - Check the specific attributes before running the program. - """ - if ( self.getSubjectFile() == "" and not self.getOnlySsr() ) \ - or ( self.getSubjectFile() != "" and self.getOnlySsr() ): - string = "ERROR: need to specify -s or -m" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getOutputFile() == "": - if not self.getOnlySsr(): - self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) - else: - self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for RepeatMaskerClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - if self.getSubjectFile() != "": - self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) - self._wrpCmdLine += " -n %i" %( self.getNbProcessors() ) - if self.getCalculateGCcontent(): - self._wrpCmdLine += " -g" - if self.getSkipBacterialIsCheck(): - self._wrpCmdLine += " -b" - if not self.getMaskSsr(): - self._wrpCmdLine += " -l" - if self.getOnlySsr(): - self._wrpCmdLine += " -m" - if self.getOutputFile() != "": - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - if self.getVerbosityLevel() != 0: - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - self._prgCmdLine += " -dir ." - self._prgCmdLine += " -pa %i" % ( self.getNbProcessors() ) - if self.getCalculateGCcontent(): - self._prgCmdLine += " -gccalc" - if self.getSkipBacterialIsCheck(): - self._prgCmdLine += " -no_is" - if self.getMaskSsr(): - self._prgCmdLine += " -nolow" - if self.getOnlySsr(): - self._prgCmdLine += " -int" - if self.getSubjectFile() != "": - self._prgCmdLine += " -lib %s" % ( self.getSubjectFile() ) - self._prgCmdLine += " %s" % ( self.getInputFile() ) - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - if not self.getOnlySsr(): - self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) - else: - self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - self.appendFileToKeep( "%s.cat" % ( self.getInputFile() ) ) - - - def setListFilesToRemove( self ): - """ - Set the list of files to remove. - """ - self.appendFileToRemove( "%s.stderr" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.tbl" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.ori.out" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.masked" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.out" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.log" % ( self.getInputFile() ) ) - self.appendFileToRemove( "%s.ref" % ( self.getInputFile() ) ) - - - def convertCatIntoAlign( self ): - """ - Convert a 'cat' file into the 'align' format. - """ - cmd = os.environ["REPET_PATH"] + "/bin/RMcat2align.py" - cmd += " -i %s.cat" % ( self.getInputFile() ) - cmd += " -o %s.cat.align" % ( self.getInputFile() ) - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR while converting 'cat' file into 'align' format" - print string - sys.exit(1) - - - def convertCatIntoPath( self ): - """ - Convert a 'cat' file into the 'path' format. - """ - cmd = os.environ["REPET_PATH"] + "/bin/RMcat2path.py" - cmd += " -i %s.cat" % ( self.getInputFile() ) - cmd += " -o %s.cat.path" % ( self.getInputFile() ) - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR while converting 'cat' file into 'path' format" - print string - sys.exit(1) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - if self.getSubjectFile() != "": - self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) - self._summary += "\nnb processors: %i" % ( self.getNbProcessors() ) - if self.getCalculateGCcontent(): - self._summary += "\ncalculate the GC content" - if self.getSkipBacterialIsCheck(): - self._summary += "\nskip bacterial insertion element check" - if self.getMaskSsr(): - self._summary += "\nmask low-complexity DNA or simple repeats" - if self.getOnlySsr(): - self._summary = "\nonly masks low complex/simple repeats (no interspersed repeats)" - if self.getOutputFile() == "": - if not self.getMaskSsr(): - self.setOutputFile( "%s.cat.align" % ( self.getInputFile() ) ) - else: - self.setOutputFile( "%s.cat.path" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) - print string - sys.exit(1) - - if not self.getOnlySsr(): - self.convertCatIntoAlign() - else: - self.convertCatIntoPath() - - self.end() - - -if __name__ == "__main__": - i = RepeatMaskerProgramLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/YassClusterLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch YassProgramLauncher on several files in parallel on a cluster. - - -from pyRepet.launcher.AbstractClusterLauncher import * -from commons.core.coord.AlignUtils import AlignUtils -from commons.launcher.YassProgramLauncher import YassProgramLauncher -from commons.tools import srptBlasterMatcher - - -class YassClusterLauncher( AbstractClusterLauncher ): - """ - Launch Yass on several files in parallel on a cluster. - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractClusterLauncher.__init__( self ) - AbstractClusterLauncher.setAcronym( self, "Yass" ) - - self._cmdLineSpecificOptions = "s:p:A" - - self._exeWrapper = "YassProgramLauncher.py" - self._prgLauncher = None - self._prgLauncher = self.getProgramLauncherInstance() - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (format='fasta')" - string += "\n -p: parameters for 'yass'" - string += "\n -Z: concatenate output files" - string += "\n -A: same sequences (all-by-all)" - return string - - - def getSubjectFile( self ): - return self._prgLauncher.getSubjectFile() - - - def getProgramParameters( self ): - return self._prgLauncher.getProgramParameters() - - - def getProgramLauncherInstance( self ): - if self._prgLauncher == None: - self._prgLauncher = YassProgramLauncher() - self._prgLauncher.setInputFile( GENERIC_IN_FILE ) - self._prgLauncher.setClean() - self._prgLauncher.setVerbosityLevel( 1 ) - self._prgLauncher.setListFilesToKeep() - self._prgLauncher.setListFilesToRemove() - return self._prgLauncher - - - def processOutputFile( self, tmpFile, outFile ): - sortFile = "%s.sort" % ( tmpFile ) - AlignUtils.sortAlignFile( tmpFile, sortFile ) - if self._prgLauncher.getAllByAll(): - srptBlasterMatcher.filterRedundantMatches( sortFile, - outFile ) - os.remove( sortFile ) - else: - os.rename( sortFile, outFile ) - - -if __name__ == "__main__": - i = YassClusterLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/YassProgramLauncher.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,195 +0,0 @@ -#!/usr/bin/env python - -##@file -# Launch Yass (pairwise alignment). -# -# options: -# -h: this help -# -i: name of the input file (queries, format='fasta') -# -s: name of the subject file (format='fasta') -# -p: parameters for 'yass' (default='-d 2') -# -o: name of the output file (format='align', default=inFile+'.align') -# -c: clean -# -v: verbosity level (default=0/1) - - -import os -import sys - -from pyRepet.launcher.AbstractProgramLauncher import AbstractProgramLauncher - - -class YassProgramLauncher( AbstractProgramLauncher ): - """ - Launch Yass (pairwise alignment). - """ - - def __init__( self ): - """ - Constructor. - """ - AbstractProgramLauncher.__init__( self ) - self._prgName = "yass" - self._formatInFile = "fasta" - self._sbjFile = "" - self._prgParam = "" - self._allByAll = False - self._cmdLineSpecificOptions = "s:p:Ao:" - - - def getSpecificHelpAsString( self ): - """ - Return the specific help as a string. - """ - string = "" - string += "\nspecific options:" - string += "\n -s: name of the subject file (format='fasta')" - string += "\n -p: parameters for '%s'" % ( self.getProgramName() ) - string += "\n -A: same sequences (all-by-all)" - string += "\n -o: name of the output file (format='align', default=inFile+'.align')" - return string - - - def setASpecificAttributeFromCmdLine( self, o, a="" ): - """ - Set a specific attribute from the command-line arguments. - """ - if o =="-s": - self.setSubjectFile( a ) - elif o == "-p": - self.setProgramParameters( a ) - elif o == "-A": - self.setAllByAll() - elif o == "-o": - self.setOutputFile( a ) - - - def setSubjectFile( self, arg ): - self._sbjFile = arg - - - def getSubjectFile( self ): - return self._sbjFile - - - def setAllByAll( self ): - self._allByAll = True - - - def getAllByAll( self ): - return self._allByAll - - - def checkSpecificAttributes( self ): - """ - Check the specific attributes before running the program. - """ - if self._sbjFile == "": - string = "ERROR: missing subject file (-s)" - print string - print self.getHelpAsString() - sys.exit(1) - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - - - def setWrapperCommandLine( self ): - """ - Set the command-line of the wrapper. - Required for YassClusterLauncher. - """ - self._wrpCmdLine = self.getWrapperName() - self._wrpCmdLine += " -i %s" % ( self.getInputFile() ) - self._wrpCmdLine += " -s %s" % ( self.getSubjectFile() ) - if self.getProgramParameters() != "": - self._wrpCmdLine += " -p '%s'" % ( self.getProgramParameters() ) - if self.getAllByAll(): - self._wrpCmdLine += " -A" - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._wrpCmdLine += " -o %s" % ( self.getOutputFile() ) - if self.getClean(): - self._wrpCmdLine += " -c" - self._wrpCmdLine += " -v %i" % ( self.getVerbosityLevel() ) - - - def setProgramCommandLine( self ): - """ - Set the command-line of the program. - """ - self._prgCmdLine = self.getProgramName() - self._prgCmdLine += " -d 2" - if self.getProgramParameters() != "": - self._prgCmdLine += " %s" % ( self.getProgramParameters() ) - self._prgCmdLine += " -o %s.blast" % ( self.getInputFile() ) - self._prgCmdLine += " %s" % ( self.getInputFile() ) - self._prgCmdLine += " %s" % ( self.getSubjectFile() ) - - - def setListFilesToKeep( self ): - """ - Set the list of files to keep. - """ - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self.appendFileToKeep( self.getOutputFile() ) - - - def setListFilesToRemove( self ): - """ - Set the list of files to remove. - """ - pass - - - def convertBlastIntoAlign( self ): - """ - Convert a 'blast' file into the 'align' format. - """ - cmd = os.environ["REPET_PATH"] + "/bin/blast2align.py" - cmd += " -i %s.blast" % ( self.getInputFile() ) - cmd += " -o %s" % ( self.getOutputFile() ) - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR while converting 'blast' file into 'align' format" - print string - sys.exit(1) - - - def setSummary( self ): - self._summary = "input file: %s" % ( self.getInputFile() ) - self._summary += "\nsubject file: %s" % ( self.getSubjectFile() ) - self._summary += "\nparameters: %s" % ( self.getProgramParameters() ) - if self.getAllByAll(): - self._summary += "\nall-by-all" - if self.getOutputFile() == "": - self.setOutputFile( "%s.align" % ( self.getInputFile() ) ) - self._summary += "\noutput file: %s" % ( self.getOutputFile() ) - - - def run( self ): - """ - Run the program. - """ - self.start() - - self.setProgramCommandLine() - cmd = self.getProgramCommandLine() - if self.getVerbosityLevel() > 0: - print "LAUNCH: %s" % ( cmd ) - sys.stdout.flush() - exitStatus = os.system( cmd ) - if exitStatus != 0: - string = "ERROR: program '%s' returned exit status '%i'" % ( self.getProgramName(), exitStatus ) - print string - sys.exit(1) - - self.convertBlastIntoAlign() - - self.end() - - -if __name__ == "__main__": - i = YassProgramLauncher() - i.setAttributesFromCmdLine() - i.run()
--- a/commons/launcher/launchBlasterMatcherPerQuery.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,196 +0,0 @@ -#!/usr/bin/env python - -""" -This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. -""" - -import os -import sys -import getopt -import exceptions -import logging -import ConfigParser - -if not os.environ.has_key( "REPET_PATH" ): - print "*** Error: no environment variable REPET_PATH" - sys.exit(1) -sys.path.append( os.environ["REPET_PATH"] ) - -import pyRepet.launcher.programLauncher -import pyRepet.seq.fastaDB - -#----------------------------------------------------------------------------- - -def help(): - - """ - Give the list of the command-line options. - """ - - print - print "usage:",sys.argv[0]," [ options ]" - print "options:" - print " -h: this help" - print " -q: fasta filename of the queries" - print " -s: fasta filename of the subjects (same as queries if not specified)" - print " -Q: queue name on the cluster" - print " -d: absolute path to the temporary directory" - print " -C: configuration file" - print " -n: max. number of jobs (default=10,given a min. of 1 query per job)" - print " -m: mix of Blaster and/or Matcher" - print " 1: launch Blaster only" - print " 2: launch Matcher only (on '*.align' query files)" - print " 3: launch Blaster+Matcher in the same job (default)" - print " -B: parameters for Blaster (e.g. \"-a -n tblastx\")" - print " -M: parameters for Matcher (e.g. \"-j\")" - print " -Z: collect all the results into a single file (format 'align', 'path' or 'tab')" - print " -c: clean" - print " -v: verbose (default=0/1/2)" - print - -#----------------------------------------------------------------------------- - -def main(): - - """ - This program splits the input fasta file in a given number of files, launch Blaster and/or Matcher on them in parallel and collect the results afterwards. - """ - - qryFileName = "" - sbjFileName = "" - queue = "" - tmpDir = "" - configFileName = "" - maxNbJobs = 10 - minQryPerJob = 1 - mix = "3" - paramBlaster = "" - paramMatcher = "" - collectFormat = "" - clean = False - verbose = 0 - - try: - opts, args = getopt.getopt(sys.argv[1:],"hq:s:Q:d:C:n:m:B:M:Z:cv:") - except getopt.GetoptError, err: - print str(err) - help() - sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-q": - qryFileName = a - elif o == "-s": - sbjFileName = a - elif o == "-Q": - queue = a - elif o == "-d": - tmpDir = a - elif o == "-C": - configFileName = a - elif o == "-n": - maxNbJobs = int(a) - elif o == "-m": - mix = a - elif o == "-B": - paramBlaster = a - elif o == "-M": - paramMatcher = a - elif o == "-Z": - collectFormat = a - elif o == "-c": - clean = True - elif o == "-v": - verbose = int(a) - - if qryFileName == "" or configFileName == "" or collectFormat == "": - print "*** Error: missing compulsory options" - help() - sys.exit(1) - - if verbose > 0: - print "\nbeginning of %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - if not os.path.exists( qryFileName ): - print "*** Error: query file '%s' doesn't exist" % ( qryFileName ) - sys.exit(1) - if sbjFileName != "": - if not os.path.exists( sbjFileName ): - print "*** Error: subject file '%s' doesn't exist" % ( sbjFileName ) - sys.exit(1) - else: - sbjFileName = qryFileName - - pL = pyRepet.launcher.programLauncher.programLauncher() - - nbSeqQry = pyRepet.seq.fastaDB.dbSize( qryFileName ) - qryPerJob = nbSeqQry / float(maxNbJobs) - - # split the input query file in single files into a new directory - prg = os.environ["REPET_PATH"] + "/bin/dbSplit.py" - cmd = prg - cmd += " -i %s" % ( qryFileName ) - if qryPerJob <= 1.0: - cmd += " -n %i" % ( minQryPerJob ) - else: - cmd += " -n %i" % ( qryPerJob + 1 ) - cmd += " -d" - pL.launch( prg, cmd ) - - # prepare the subject databank - if sbjFileName != qryFileName: - prg = "blaster" - cmd = prg - cmd += " -q %s" % ( sbjFileName ) - cmd += " -P" - pL.launch( prg, cmd ) - - # launch Blaster+Matcher in parallel - prg = "srptBlasterMatcher.py" - cmd = prg - cmd += " -g %s_vs_%s" % ( qryFileName, sbjFileName ) - cmd += " -q %s/batches" % ( os.getcwd() ) - cmd += " -s %s/%s" % ( os.getcwd(), sbjFileName ) - cmd += " -Q '%s'" % ( queue ) - if tmpDir != "": - cmd += " -d %s" % ( tmpDir ) - cmd += " -m %s" % ( mix ) - if paramBlaster != "": - cmd += " -B \"%s\"" % ( paramBlaster ) - if paramMatcher != "": - cmd += " -M \"%s\"" % ( paramMatcher ) - cmd += " -Z %s" % ( collectFormat ) - cmd += " -C %s" % ( configFileName ) - if clean == True: - cmd += " -c" - cmd += " -v %i" % ( verbose - 1 ) - pL.launch( prg, cmd ) - - suffix = "" - if mix in ["2","3"]: - if "-a" in paramMatcher: - suffix = "match.%s" % ( collectFormat ) - else: - suffix = "clean_match.%s" % ( collectFormat ) - os.system( "mv %s_vs_%s.%s %s_vs_%s.align.%s" % ( qryFileName, sbjFileName, collectFormat, qryFileName, sbjFileName, suffix ) ) - - # clean - if clean == True: - prg = "rm" - cmd = prg - cmd += " -rf batches formatdb.log %s_cut* %s.Nstretch.map" % ( sbjFileName, sbjFileName ) - pL.launch( prg, cmd ) - - if verbose > 0: - print "%s finished successfully\n" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - -#---------------------------------------------------------------------------- - -if __name__ == '__main__': - main()
--- a/commons/launcher/launchMafft.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -#!/usr/bin/env python - -# DEPRECATED - -import user, os, sys, getopt, exceptions - -if not os.environ.has_key( "REPET_PATH" ): - print "*** Error: no environment variable REPET_PATH" - sys.exit(1) -sys.path.append( os.environ["REPET_PATH"] ) - -import pyRepet.launcher.programLauncher -import pyRepet.seq.fastaDB -from pyRepet.seq.BioseqDB import * - -#------------------------------------------------------------------------------ - -def help(): - - print - print "DEPRECATED" - print - print "usage: ",sys.argv[0],"[ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (format='fasta')" - print " -o: name of the output file (default=inFileName+'.fa_aln')" - print " -v: verbose (default=0/1)" - print - -#------------------------------------------------------------------------------ - -def main(): - - """ - This program launches MAFFT to build a multiple sequence alignment. - """ - - inFileName = "" - outFileName = "" - verbose = 0 - - try: - opts,args=getopt.getopt(sys.argv[1:],"hi:o:v:") - except getopt.GetoptError: - help() - sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-o": - outFileName = a - elif o == "-v": - verbose = int(a) - - if inFileName == "": - print "*** Error: missing compulsory options" - help() - sys.exit(1) - - if verbose > 0: - print "beginning of %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - if verbose > 0: - print "build a multiple alignment from '%s'..." % ( inFileName ) - sys.stdout.flush() - - pyRepet.seq.fastaDB.shortenSeqHeaders( inFileName ) - - bsDB = BioseqDB( inFileName+".shortH" ) - bsDB.upCase() - bsDB.save( inFileName+".shortHtmp" ) - del bsDB - os.rename( inFileName+".shortHtmp", inFileName+".shortH" ) - - pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" ) - pL.launchMafft( outFileName=inFileName+".shortH.fa_aln", verbose=verbose ) - - pyRepet.seq.fastaDB.retrieveInitSeqHeaders( inFileName+".shortH.fa_aln", - inFileName+".shortHlink", - inFileName+".shortH.fa_aln.initH", - verbose-1 ) - - if outFileName == "": - outFileName = "%s.fa_aln" % ( inFileName ) - os.system( "mv %s.shortH.fa_aln.initH %s" % ( inFileName, outFileName ) ) - - for f in [inFileName+".shortH",inFileName+".shortH.fa_aln",inFileName+".shortHlink"]: - os.remove( f ) - - if verbose > 0: - print "%s finished successfully" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - -#------------------------------------------------------------------------------ - -if __name__ == '__main__': - main()
--- a/commons/launcher/launchMreps.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,140 +0,0 @@ -#!/usr/bin/env python - -from commons.core.seq.BioseqDB import BioseqDB -from commons.core.parsing.MrepsToSet import MrepsToSet -import subprocess -import os -import sys -import getopt - -def help(): - """ - Give the list of the command-line options. - """ - print - print "usage: ",sys.argv[0],"[ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (format='fasta')" - print " -o: name of the output file (default=inFileName+'.Mreps.set')" - print " -f: error filter (default=1.0)" - print " -c: clean" - print " -v: verbosity level (default=0/1)" - print - -def main(): - """ - Launch Mreps. - """ - inFileName = "" - outFileName = "" - errorFilter = 1.0 - clean = False - verbose = 0 - - try: - opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0] - except getopt.GetoptError, err: - print str(err) - help() - sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-o": - outFileName = a - elif o == "-f": - errorFilter = float(a) - elif o == "-c": - clean = True - elif o == "-v": - verbose = int(a) - - if inFileName == "": - print "ERROR: missing compulsory options" - help() - sys.exit(1) - - if verbose > 0: - print "beginning of %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - # Mreps 2.5 doesn't fully support IUPAC nomenclature - if verbose > 0: - print "* check IUPAC symbols"; sys.stdout.flush() - tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() ) - if os.path.exists( tmpInFileName ): - os.system( "rm -f %s" % ( tmpInFileName ) ) - bsDB = BioseqDB( inFileName ) - for bs in bsDB.db: - if verbose > 0: - print bs.header; sys.stdout.flush() - bs.partialIUPAC() - onlyN = True - for nt in ["A","T","G","C"]: - if nt in bs.sequence: - onlyN = False - if onlyN == True: - if verbose > 0: - print "** Warning: only Ns"; sys.stdout.flush() - else: - bsDB.save( tmpInFileName ) - - if not os.path.exists( tmpInFileName ): - sys.exit(0) - - if verbose > 0: - print "* remove N stretches"; sys.stdout.flush() - prg = os.environ["REPET_PATH"] + "/bin/cutterDB" - cmd = prg - cmd += " -l 200000" - cmd += " -o 0" - cmd += " -w 11" - cmd += " %s" % ( tmpInFileName ) - if verbose > 0: - print cmd; sys.stdout.flush() - log = os.system( cmd ) - if log != 0: - print "ERROR: %s returned %i" % ( prg, log ) - sys.exit(1) - - # launch Mreps on the input file - MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName ) - prg = "mreps" - cmd = prg - cmd += " -res 3" - cmd += " -exp 3.0" - cmd += " -maxsize 50" - cmd += " -xmloutput %s" % MrepsOutFileName - cmd += " -fasta %s_cut" % tmpInFileName - process = subprocess.Popen(cmd, shell = True) - process.communicate() - if process.returncode != 0: - raise Exception("ERROR when launching '%s'" % cmd) - - if outFileName == "": - outFileName = inFileName + ".Mreps.set" - - # parse Mreps results in xml format - iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter) - iMrepsToSet.run() - if clean: - iMrepsToSet.clean() - - # remove temporary input filename - os.remove(tmpInFileName) - os.remove("%s_cut" % tmpInFileName) - os.remove("%s.Nstretch.map" % tmpInFileName) - - if verbose > 0: - print "%s finished successfully\n" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - - -if __name__ == '__main__': - main()
--- a/commons/launcher/launchPhyML.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import getopt - -from pyRepet.launcher.programLauncher import programLauncher -from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders - - -def help(): - print - print "usage: ",sys.argv[0],"[ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (aligned fasta)" - print " -c: clean" - print " -v: verbose (default=0)" - print - - -def main(): - - inFileName = "" - clean = False - verbose = 0 - try: - opts,args=getopt.getopt(sys.argv[1:],"hi:cv:") - except getopt.GetoptError, err: - print str(err) - help(); sys.exit(1) - for o,a in opts: - if o == "-h": - help(); sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-c": - clean = True - elif o == "-v": - verbose = int(a) - if inFileName == "": - print "ERROR: missing compulsory options" - help(); sys.exit(1) - - if verbose > 0: - print "START %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - csh = ChangeSequenceHeaders() - csh.setInputFile( inFileName ) - csh.setFormat( "fasta" ) - csh.setStep( 1 ) - csh.setPrefix( "seq" ) - csh.setLinkFile( inFileName+".shortHlink" ) - csh.setOutputFile( inFileName+".shortH" ) - csh.run() - - pL = programLauncher( inFileName+".shortH" ) - - pL.launchSreformat( outFormat="phylip", outFileName=inFileName+".shortH.phylip", verbose=verbose ) - - pL.reset( inFileName+".shortH.phylip" ) - - pL.launchPhyML( verbose=verbose ) - - csh.setInputFile( inFileName+".shortH.phylip_phyml_tree.txt" ) - csh.setFormat( "newick" ) - csh.setStep( 2 ) - csh.setLinkFile( inFileName+".shortHlink" ) - csh.setOutputFile( inFileName+"_phyml.newick" ) - csh.run() - - if clean: - for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.phylip", - inFileName+".shortH.phylip_phyml_lk.txt", inFileName+".shortH.phylip_phyml_tree.txt" ]: - os.remove( f ) - os.system( "mv %s.shortH.phylip_phyml_stat.txt %s_phyml.txt" % ( inFileName, inFileName ) ) - - if verbose > 0: - print "END %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - - -if __name__ == "__main__": - main()
--- a/commons/launcher/launchPrank.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import getopt - -from pyRepet.launcher.programLauncher import programLauncher - - -def help(): - print - print "usage: launchPrank.py [ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (format=fasta)" - print " -o: name of the output file (format=aligned fasta, default='inFileName'+fa_aln)" - print " -P: Prank's parameters" - print " -c: clean" - print " -v: verbose (default=0/1)" - print - - -def main(): - """ - Launch PRANK. - """ - inFileName = "" - outFileName = "" - parameters = "" - clean = False - verbose = 0 - - try: - opts, args = getopt.getopt( sys.argv[1:], "hi:o:P:cv:" ) - except getopt.GetoptError, err: - print str(err) - help() - sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-o": - outFileName = a - elif o == "-P": - parameters = a - elif o == "-c": - clean = True - elif o == "-v": - verbose = int(a) - - if inFileName == "": - print "ERROR: missing input file (-i)" - help() - sys.exit(1) - - if not os.path.exists( inFileName ): - print "ERROR: can't find file '%s'" % ( inFileName ) - help() - sys.exit(1) - - if verbose > 0: - print "START %s" % ( sys.argv[0].split("/")[-1] ) - sys.stdout.flush() - - if outFileName == "": - outFileName = "%s.fa_aln" % ( inFileName ) - - pL = programLauncher( inFileName ) - returnStatus = pL.launchPrank( outFileName, parameters, "yes", verbose ) - if returnStatus != 0: - print "ERROR: launchPrank() returned '%i'" % ( returnStatus ) - sys.exit(1) - - if verbose > 0: - print "END %s" % ( sys.argv[0].split("/")[-1] ) - sys.stdout.flush() - - return 0 - - -if __name__ == "__main__": - main()
--- a/commons/launcher/launchTCoffee.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import getopt -import exceptions - -if not os.environ.has_key( "REPET_PATH" ): - print "ERROR: no environment variable REPET_PATH" - sys.exit(1) -sys.path.append( os.environ["REPET_PATH"] ) - -from pyRepet.launcher.programLauncher import programLauncher - - -def help(): - print - print "usage: ",sys.argv[0],"[ options ]" - print "options:" - print " -h: this help" - print " -i: name of the input file (format='fasta')" - print " -P: parameters" - print " -o: name of the output file (format='aligned fasta', default='inFileName'+fa_aln)" - print " -c: clean" - print " -v: verbosity level (default=0/1)" - print - - -def main(): - - inFileName = "" - parameters = "" - outFileName = "" - clean = False - verbose = 0 - - try: - opts, args = getopt.getopt(sys.argv[1:],"hi:P:o:cv:") - except getopt.GetoptError, err: - print str(err); help(); sys.exit(1) - for o,a in opts: - if o == "-h": - help(); sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-P": - parameters = a - elif o == "-o": - outFileName = a - elif o == "-c": - clean = True - elif o == "-v": - verbose = "yes" - - if inFileName == "" and parameters == "": - print "ERROR: missing compulsory options" - help() - sys.exit(1) - - if outFileName == "": - outFileName = "%s.fa_aln" % ( inFileName ) - - if verbose > 0: - print "START %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - pL = programLauncher( inFileName ) - pL.launchTcoffee( outFileName, parameters ) - - if verbose > 0: - print "END %s" % (sys.argv[0].split("/")[-1]) - sys.stdout.flush() - - return 0 - - -if __name__ == "__main__": - main()
--- a/commons/launcher/launchTEclass.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,138 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import getopt -import glob -import shutil - - -def help(): - print - print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] ) - print "options:" - print " -h: this help" - print " -i: name of the input file (format='fasta')" - print " -o: name of the output file (format='map', default=inFileName+'.map')" - print " -c: clean" - print " -v: verbosity level (default=0/1)" - print - -def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ): - tmpHandler = open( inFile, "r" ) - outHandler = open( outFile, "w" ) - dClassif2Count = {} - header = "" - classif = "" - while True: - line = tmpHandler.readline() - if line == "": - break - if line[0] == ">": - header = line[1:].split("|")[0] - classif = line[1:-1].split(": ")[1].split("|")[0] - if not dClassif2Count.has_key( classif ): - dClassif2Count[ classif ] = 0 - dClassif2Count[ classif ] += 1 - else: - seqLength = len(line[:-1]) - outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) ) - tmpHandler.close() - outHandler.close() - if verbose > 0: - for classif in dClassif2Count.keys(): - print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] ) - sys.stdout.flush() - - -def main(): - """ - Launch TEclass to classify TE sequences. - """ - inFileName = "" - outFileName = "" - clean = False - verbose = 0 - - try: - opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" ) - except getopt.GetoptError, err: - print str(err) - help() - sys.exit(1) - for o,a in opts: - if o == "-h": - help() - sys.exit(0) - elif o == "-i": - inFileName = a - elif o == "-o": - outFileName = a - elif o == "-c": - clean = True - elif o == "-v": - verbose = int(a) - - if inFileName == "": - print "ERROR: missing input file (-i)" - help() - sys.exit(1) - if not os.path.exists( inFileName ): - print "ERROR: can't find input file '%s'" % ( inFileName ) - help() - sys.exit(1) - if outFileName == "": - outFileName = "%s.TEclass.map" % ( inFileName ) - - if verbose > 0: - print "START %s" % ( sys.argv[0].split("/")[-1] ) - sys.stdout.flush() - - if verbose > 0: - print "launch TEclass..." - sys.stdout.flush() - prg = "test_consensi_2.1.pl" - cmd = prg - cmd += " %s" % ( inFileName ) - returnValue = os.system( cmd ) - if returnValue != 0: - print "ERROR: '%s' returned %i" % ( prg, returnValue ) - sys.exit(1) - - lOut1 = glob.glob( "%s_*" % ( inFileName ) ) - outDir = "" - for i in lOut1: - if os.path.isdir( i ): - lOut2 = glob.glob( "%s/*" % ( i ) ) - if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2: - outDir = i - break - if outDir == "": - print "ERROR: can't find output directory" - sys.exit(1) - os.chdir( outDir ) - - if verbose > 0: - print "parse the results..." - sys.stdout.flush() - parseFastaFileFromTEclass( "%s.lib" % ( inFileName ), - outFileName, - verbose ) - os.system( "mv %s .." % ( outFileName ) ) - os.chdir( ".." ) - - if clean: - if verbose > 0: - print "clean the temporary files..." - sys.stdout.flush() - shutil.rmtree( outDir ) - - if verbose > 0: - print "END %s" % ( sys.argv[0].split("/")[-1] ) - sys.stdout.flush() - - return 0 - - -if __name__ == "__main__": - main()
--- a/commons/launcher/tests/MockDataBankForBlat.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,299 +0,0 @@ -class MockDataBankForBlat(object): - - def write(self, inFileName): - f = open(inFileName, 'w') - f.write('>Vein_ctg_2286\n') - f.write('AGCAAAAaCGTATTTTCTAAGCTAAAATTTAGCGTAGAAGCTTGGACTCGCTtATTTTTT\n') - f.write('ATCCTAGGAATTAGTAGAATCTCTCTATAAATATTTATTTTTTATCCGCGCAAAGCTAGG\n') - f.write('AAACTATAGAATCTTATATATTAGTATATTCTAGCGATTATCTAGAGATTGTaGGGGGGG\n') - f.write('ggAAAGGTAGTAATAGGCGTGTAAGAGAGATAAAGAGTTtATCTTAGAATCTCCCTACGC\n') - f.write('CAAGCGAGATTCCTAAGGTAAGAATCCTAAGGTAAAGAATCTAAGCGAGAATTTAGAATC\n') - f.write('ACGTGCTCGCTTCCTACTACCGTAATTTTACGGTTATAGAGTACGACAGTTCCTATATAT\n') - f.write('AACCTATACTTATAACAACGAATATAaTATAAGACGTTAATTTGATTTTTTACCGTGTTA\n') - f.write('CTTATAATAAGGTGTCCCAAAGTTGTTCGAATTTTTTTTGTCCAAGATTGAATACGGTGT\n') - f.write('TGGTCATGTGATAGTATTTTCACGGTATTTAAACGGAAATTTTACCTACGGCCCCCcGCC\n') - f.write('TATGTACATCATGAGGATGCACGATGGAGATGAAATGCTTCGTTTCATACTGAGTGGCCG\n') - f.write('ATTGATGGACGCGTGAACGTCAAAGTAACTTCCATTTGTTTCCTGTCGTTGTCCGGTAGC\n') - f.write('CTGGCAAATAGTGACATGTTTGATGTCTTTGGAACAGGGTGGAAACTTTTTTTGGGTtGC\n') - f.write('AAATCGTCAAGAGTCACGTGTCCACACAGTGGCGCAAGTGGGGGCTTAAAATGGTGGACT\n') - f.write('TGCGCGAGGGCGATTTCTTTTCTTCCCCAACCACAATCTATCGTGATCAAAGAAGCACCC\n') - f.write('ATCCGGTTGTTGAGGTATGCCAGCCAGCATCCAATGATGCGCTCAAACAACAACTAACTC\n') - f.write('TCATACAGATCGCCATGCAGATTTTCGTGAAGACCCTCACGGGCAAGACTATCACCCTcG\n') - f.write('AGGTGGAGTCCTCCGACACCATTGACAATGTCAAGACCAAGATTCAGGGTACGTGGCCTC\n') - f.write('GCGCAAGCATTTGTAACCATCTTCTAACCCTCTGCCGTAGACAAGGAAGgAATCCCCCCA\n') - f.write('GACCAGCAGCGCTTGATCTTCGCCGGCAAGCAGCTCGAAGATGGACGCACCCTATCGGAC\n') - f.write('TACAACATCCAGAAGGAGTCGACCCTCCACCTCGTCCTCCGACTCCGTGGTGGTATGGCC\n') - f.write('AAGAaGCGCAAGAAGAAGGTCTACACCACCCcAAAGAaGATCAAGCACAAGCGCAAGAaG\n') - f.write('ACCAAGTTGGCTGTCcTCAAGTACTACAAGGTGGATGGCGATGGAAAgaTCGAGcGCcTC\n') - f.write('CGACGAGAGTGCCcAGCTGACGAGTGTGGTgCCGGTgTCTTCATGGCTGCCATGCACAAC\n') - f.write('CGtcAATACTGCGGAAAGTGCCACCTCACCtACGTTTTCGACGAGAGCAAGTAGACGGTT\n') - f.write('CTGGAAAGCATACAATGACAACCATGATTATGCTGTGGGATGATGGATGCTGAACAAAGG\n') - f.write('GAGGGAATAGCGACGGACACCGCGTTCTGTAGCTACACAATCAAGCAAAACCTTCACCTC\n') - f.write('GATAGCGGGCCTCGATCCATGTACTATCTTATGATATGTGACATTGAATGTGCGAGTGTT\n') - f.write('CATTATTCTTTCGCCGTAGCTTCGTTCTCTCTGTGTTCACCCAACCTCCAAGAATCCCGA\n') - f.write('CAGTTTCTGGCGATGTTCAAGAATGATATTGCCAGAAATTTATCGGTACCGAACCCTAGA\n') - f.write('TAAAAGGATGGGTTCGGAGTTAACGTAATTTCGAAGGGCAGATGCCATGTGCTTGGGTGC\n') - f.write('CGCGTTACGAGAATTACTATGATGGTGCTGTGTATCTTCACAATGGCAACATGGCAGAAC\n') - f.write('AACGCCAAGAAGTTTCTCCAATGTCAACCCAAGGTCTCCGGACGAATCTAGATGCACCCT\n') - f.write('GACTGCATCGTTATCGGTCATGCAAAAGCACGCCTAATGTGCAGCGTGATGACAAGGTGG\n') - f.write('AAAaGGAGGCAAATGCTGGTCTAGTGTCTTGTGGCAGAAGTCAGAGTCACGATGAGCAAG\n') - f.write('CATGGCGCATGAGGGTTGCCTCCGCAAGCATCCGACAGAGTAGAGACAGAGAAAACACCA\n') - f.write('CAGCAAAAGATGTCACGGCACTCTCGATTCGCTTGTGACGGCCAAGTCTAGGTCCAGGTC\n') - f.write('CAGGTCCAGGATACGGATACAAGGCGAATGTGGACACCCCAGCCTCCCGATTAGGGAACT\n') - f.write('ATACAGCGGGCTGGGTTTATGAATAATGAATCCAATTGCAGCATGAGATCATAGCGTTTG\n') - f.write('TCTAAGAGGCTTTTTGCTACTGTACATGGCGATTGGCGAGTGTTGGGGGTTAGGTGAGAC\n') - f.write('GGTCTTGGCTGGCAACCCTGTGAGCGCAAAGATGGAGAAGGGAATGGCAGGCGATCAGGA\n') - f.write('CGATTGTTTCATGGATGAGCAATGGGCATCTCAAaGAAACACGGATCGTTTCTGATGGAC\n') - f.write('AGACGGGCCAAGACTGACCAAGCATTTGTGAAATTGGGACAGGAGAAGGGATGGTGGCTT\n') - f.write('CGACGTGCTGCAGCTGCATACTGTGTAGCTGCGTGTTGATAGCTGCACAACTGCATTGGC\n') - f.write('TCGACCAGACACATTCCCGGAGCGTTATGCATCCAGCGCCTGATTCGCTTGGGACTTGGG\n') - f.write('TCGCGACTCGCGAGAGAATTGGTACTCGTAGTCGGTACCTAAGCTGCACCTTGTCCCCGA\n') - f.write('AGTAGACTGTCGAGACTGTATAGtAGAGGTCGAGGTATATTCTATGCTATACTGTACATT\n') - f.write('ATTGAAGTGCTCCCATCATATCACCAACCCTCCCGCCTCGTCTTCCATGTCcGTTTCCGC\n') - f.write('CTTCACATTTCAAAAAGTCGTTGAGTGGTCCTAGTGCTCAATTAAATTGCTTACCAGAGC\n') - f.write('GTCAGAGCTCTCCTGGAGTCCTGGCAAAAGCCAaaGCTCAACGACCAATTGCCCCATCCT\n') - f.write('CTCTCCTACCGAGTACTCCGTACCGCTCATCTCGTCAACCACCACCGCAGACAACAATCG\n') - f.write('CACCACTTCTCCAGCGCGTCCACTGAACTCAGGTCTCGCTGTGGTTGCGCAGTATTGCCT\n') - f.write('GTCGGACCCATCTCGCGCGCGCTACTGTCATTCCAACTCCCGGATCTTTCGGCCACAAAT\n') - f.write('TGGTCCGTGGCCTGCTCGTCACTCTCCGTCACCATCAAAACTCAACAGGCCGCCTTCACC\n') - f.write('TCACCAAAGTCACCCTTCCTACCACCACCGCGACCACCGCGACCACCACCACCACCTACA\n') - f.write('ATGTCGCCCACGCTGCCCTCGTTCGACTTCGCCACTACCAGAGGGAAGCGGTCGCGGATG\n') - f.write('CCTTCGTTCGCGCCGGCCCTTCCGGACGGACTTTCACACCTACCAAACATGCGATACATC\n') - f.write('ATTCCTCGCCAGGTGAGAAGGAGATGGAGAGCAACAAAGGGCAGACTACGTGTTGGACAG\n') - f.write('TCACCTGCTTCGAATGTCTTTAGACTGCAGACTTCTTGGTCGCCCACCGACACAATAGCT\n') - f.write('GCAcTGCGCACGCATCAGTGGTCATTGTATGACTTTCAGTACTTGGGACTGGCAATTCTG\n') - f.write('GGCATTTTCTGCTTGAGCATTATGGAgACTCcAGGTCCTATGGTTAAGACGTTCGGCGCT\n') - f.write('ACCGCACTCTTGCTATCTCTTTGTTTtCCCGTTACAAGTCAATTCTGGCTCCCAACGTTA\n') - f.write('CCCGTTGTAGGCTGGATCATTCTGTTCTTCTCCTGCAGGTACGTACACCCTTGCTTAATT\n') - f.write('TCTAATCGACCATTTCCAACTCGATACTAACACGCGTTTCTTTGTAGATTCATCGATGGC\n') - f.write('GCATATCGTCCTTCCATCCACGTCCGCGTTCTCCCTGCCCTAGAAAACATATTTTACGGC\n') - f.write('GCCAATCTCAGCAACATCCTATCTAGCCaCAAAAaTTCCTTCCTCGAtGTCCTCGCATGG\n') - f.write('CTACCATATGGCATCACCCACTTTGGAGCGCCCTTCGTCGTTTCAATCATCATGTTCATC\n') - f.write('TGGGGACCTCCAGGGATCGTCCCGACCTTTGCCCGCGCTTTTGGTTACaTGAACATTGCA\n') - f.write('GGTGTTCTCATCCAaCTGCTGTTTCCATGCTCTCCACCGTGGTATGAAAATCTATACGGT\n') - f.write('TTGGTGCCTGCAAACTACAGCATCCCGGGCTCACCGGCGGGTTTGGCGGCTGTGGACAAG\n') - f.write('CTCTTTGGGGTCGACATGTACACTACAGCGTTCACCGGCTCTCCCCAGGTTTTCGGCGCC\n') - f.write('TTCCCATCTTTACACTCCGCCAACGCCACCGTCGAGGCTCTCTTCATGAGCTTTGTTTTC\n') - f.write('CCAAAGCTTGCACCCCTATTCATTGCTTACACGCTTTGGCTTTGGTGGGCAACTATGTAC\n') - f.write('CTGTCGCACCACTACGCTGTAGATCTCGTTGGCGGAAGTCTTCTCGCTGGCATTGCTTTT\n') - f.write('TATATTGCTAAAGCAAAGTTCCTCCCACGCATGCAACCCGACAAGGAATTTCGATGGGAT\n') - f.write('TACGACTACCTTGAGATTGGCGAATCTCAGGACAATTATTCAGCCAAGGATGGCACTGGG\n') - f.write('TTTTACGAAGAGTTTCAGACCGGCGTATCTGATGACGAATGGACCGTTGGCTCAAGTTCC\n') - f.write('AGCATCTCGTCCAGAAGTAGAAGCCCTTCCAACACCAACAGATCCTCTAGCGAAACATCA\n') - f.write('ATGTGGGAAGGTGAAACCCTGGCCTCGACTtCCGACACCGAACAACGAAGCAGGTAGTAC\n') - f.write('CAGCAAaTTAAtctCAACATCCTCGCcTtCCCcGCGACCCAAGCTACATTCTCAaCcAAC\n') - f.write('CTGGTTCcTCGATTTTtCGTATCGGTACCCTTGTTGTTCACACGGGACACAATCACGTCA\n') - f.write('ACATTGGGCGGAGTTTTGCATCTCTGCATTTGCATGTCACATCTGCAGttagg\n') - f.write('>Vein_ctg_2288\n') - f.write('GCTGACTTGATATGGAGACAATAACGCAAGGCTGACCGACTGACtGTTTGATtATGTGCA\n') - f.write('GGAaTAAAAATACAAAATTGGAATTCGCAAGAGGACAAACAAAACTCTTTTGAACAGCTG\n') - f.write('TAGAGATCCACAAAAGATTACAGACCGACCCGGTAATCTGAGACGCTCAGCCTGCAACAC\n') - f.write('TTCACCCGTACGAAACAATGGAAACCTGGGCAGACGAATGAGAGGAAGTGGATATTCGCG\n') - f.write('GAGTAATTTACATCGAGAAACATTCAGCCACCAGATCCATGATCGTGAAACGCCAAAGGT\n') - f.write('CGAAATGGGTTAGCTGGAAGAGGGCGTTTTTGTGGAGAGTCGTCGACGACAACAATGGAT\n') - f.write('CCACAGCTTTCTATCAATTCAGTAATCCAACAGTAGCAGCTGACCTAAACATGGCCAGCT\n') - f.write('ATAGATCGATCAATATATCCAAGCTTCAGATACCTCCCGAcTCCGCCGGACGATATGGAG\n') - f.write('AGCGCACGTCGTGCAAGCTACGCACTTGATGCCCGAGAACGAGAAGGAAAGGAACCGACA\n') - f.write('ATGTAAACACCGAACGGATTACACTCAACTACATTAGTaTAAAAAATCGCCAGAGCGACG\n') - f.write('TCGTAAACCCCTCAATATGCCATAACAACGCGCCCCGACCTGGTATAAAACTGCCAAATT\n') - f.write('TCCCATAAACACCACCCCATCGCACGAATGTCCTCCTGCGCAACAGGATAAAAGAGGCCT\n') - f.write('TACCGAGCATCCACGTACCTCTCCCCCTTGTCCACACCCCcAACCTCCCTCTTCTCCTTC\n') - f.write('CTCGCCCCcACAAGCGCAAGCCCCGCCAACACCAACCCAACAACATCCGTCAAGAAAATA\n') - f.write('GGAATCAACATCCACCTCCAAGCCTCCATATTCCGAACCTTGTTCCTAAACGTCTTCTTC\n') - f.write('AAGCTCTCCTCGATGGGAAGCTTGAGGACGGCTTTGTACCATGTTTCTGGCGTCCAGCGA\n') - f.write('TGGTCTGGGTAGGGTTCTCCTGCGCGAAGGAGGGAGGGCGTGATTtGTTTGTGCTTGGTT\n') - f.write('TGGGAGGTTACGGTGAAGGTGAAGATGAGGGCGGAGAGTGTGAAGAGCGTTGCTAGTGTA\n') - f.write('AGGAGGATTGTCAGGACCAGGACGAGGGTCGGTGTACTCTATTCACAAACACATATTAGC\n') - f.write('CTGATATCGGAAGAACATTCGATACAGGAAGCGAGGGAGCGATACATAcTTTtCCCTTTT\n') - f.write('tGTATCGGAAGGCAATGAACATGCCCAGGAGACCTAGGAAAAACCCATAACCGGCCACGC\n') - f.write('CGTTAGATGTGTGGCCTTGATCGAGCCAGATGTGGGCTGGGAGTGCTTTCATCTGGATGC\n') - f.write('TCTCGCTTGACGACACTTGAACGGAATAACCAGGCCCGTATTTTTGGACGTGCAGGAAAC\n') - f.write('CGACCATGGAACTTATGAGGCCTAGATTCGTGATGGAGAGTGCACTCAGCACGAATTGCG\n') - f.write('AGACGAAAAGGGGGAGgAGTAACATGATTCCTGGTGTGATTGCTAGGTGTGTAGTGTACT\n') - f.write('CTGAGGAATAAAAGAGTATGTAATAGCCAAATTCCAAAATATTAGGGCGTTTGGAATGGA\n') - f.write('CGTGGAACGAGGAACGATGAaCAAAGAGAAGAATAAGCGTCCCGTCGAGATCACAAACAT\n') - f.write('CCCGCGCCGCCTTGTTTCTCTATCTATACGAACACACACACACATACATACCTCAGCCGC\n') - f.write('AAGGGAACCGCATTGGCAAAGCTTGTGCAAAATATGCATGGTTGCCGGTAGGTATGTAAG\n') - f.write('ACGCGCAGCAGCGGCACACAAGAGTCAAGACACATCTTGTCCAAAAGCCCCGGCCACATG\n') - f.write('GAATGCAGATTCGTTGGCACCCTCTCAGCACGGATATGTGTATCCATTACCGTTATCGGT\n') - f.write('TTTCGGGTTAGTGCTGACTGATGCGATTTTTTACATGCTGAGAGTGTGGGTATTAGGAAG\n') - f.write('CAGTATGTATGTAATCATGCTGCGGTCTACGGTCTGCGGGTCGGTCAGGCTGAGGCTGAG\n') - f.write('ACATTCACCGCTTGGTATCTTGATCATGATAATGTTTGTTGCTGCGCAGAGGATAGCGAT\n') - f.write('TGACCAGTGTTTGTATGTATTAACCATGATTGAAGCTTTTTTTTTCATTGCCAATTTTCT\n') - f.write('GGTTGTCGAGTTCGGCCCAAGCTTTtGTCTTGTGGATGGAACGTTTTCAGCTCCTGTTCA\n') - f.write('GATCGGGCAAAGGCCGGGTTTTGAATGTTGGGGTGTGGCATTTCGCTATTTGGTGGGGTT\n') - f.write('GTGCAGACTGCATAAAGAGTGATTACTACAACTGTTTGCATGTGGAGCTGTGTGACCAGT\n') - f.write('TTGTATGCACAGAATCTTTCAAAATGGCATAATCACTGCCATAGGAGGTTCGATTCGATG\n') - f.write('GAACCTTTGCAGGGACAGTCTTCCTTCTCGAATATCGAGTTCAGTATTTGGGGCGTCTTT\n') - f.write('GCTCATCTTCGTTGGCATCGTCTTTGCAAATTGTGATCAGTTGCCTTTGCACCTGCGTGC\n') - f.write('CTTACAAGTCCCCGTTGCTGGTTCGAATTATTTCATGTCTGATAGGATCCTTGACATTGA\n') - f.write('TGTGTTCGGGCAATAGCTTGGAGTAAGCACAGAATGCTGGAACGTCTACAGGGTAATTAT\n') - f.write('GAATACGCAAAGTGGAACGAGGCATGTCACAAACCAGCTGGTGCGACAGTAGGATGGACA\n') - f.write('ATTCCATCGAAACTCACGGGGAAAGGCCAAAAGGGACGGCATCACGGTGTCACACATCGG\n') - f.write('GGAGCGGCATCTCCAGGAGAATATGCCAGCAGCATTCCTGCTTTGCCATGCTCTGCTCAT\n') - f.write('GCCTCATGCATAGCGGCATGAATTGGTCTGTTCAGTCTGGgTAAATTGTATTCGGTTTGA\n') - f.write('CTACAGTTTACACCATCCTCAGCCACCACGCTGAAAGAGCCCCGGTTTGGGAAGCCCTAA\n') - f.write('TGCCATATCCTtCCTTTTGTCATGATTCTTGGTATAATGCGTGTGTATCTAACCGACCAA\n') - f.write('TTGGACGACTCCACCGGCGTCCTTAAtCTTTTtCTCGGCCTCTGCGGAGAAGtATCGGGC\n') - f.write('GCGAACGACGAGTGGGATTTctGGGAGACGACCCTTTCCGAGGACCTTGGAGTAACCGAA\n') - f.write('GGAGAGCTTTtCAAAGATTAGCGATTGCGATGGAATCGGCGCGTAGAGATACGTACGAGG\n') - f.write('TCGAGGACTGGAGCGGTGTCGGACTTCTTGTTTGAGAGGTACTTCTCACGCTGCTCGAGA\n') - f.write('GGAACGAGGGACCAAAGCTGTACTGGTTAGTAGCGGTACAATTTGAATTGGACTGCAGAA\n') - f.write('CTGACCTTGTCCAAGTTGATGGTTGGCTTCCAGAAGTGGTTGCCTTGCTTGTGGAAGTAT\n') - f.write('CGCATACCGACCTTTCCGAAGTAACCTGGATGGTATTTATCCATGTTGGTTCTGTGGTGG\n') - f.write('TGCTGACCACCAGCGAGACCACGACCACCGGGATGCTTGCGGCTATGCGACTGTTAGCCA\n') - f.write('ATGCCCTCTGATTCAATTAAGCCCAGTCCTGTAGGCGCTCTTCTTTCAAGTAGCAGTGAG\n') - f.write('TGTGGGCGAGAGCATTCGCAAAGCTGAAGAATTCCTCCCACACTGACTGCAATCGCGAAA\n') - f.write('TTTCGAAGATCCATTCGACATCGAGGAGAGGGTTATTGCTGAGGGCGTAACATACTGCTT\n') - f.write('GCCGACACGACCGTGACCGGCAGAGACGTGACCGCGGTGCTTGCGGGTGTTCGATAATCT\n') - f.write('GGTAGGCATCTTGGATGTGAAGGTGCTTGTCGTTGCTTCGGGATGGGAGATAGCGCGAAG\n') - f.write('TTTCGATATTCTCGTCTCTGTGTAGCGAGATTTTCCCGACGCCAATATAATAGTGACGCT\n') - f.write('AGCCGCGTCGGGAACTCTGATCATGTGACCACATCGGACTACCAaGAAGTTGGAAATGAT\n') - f.write('TGATTTGATTCATCTTGGTTCAAATTTACTTTGAAGCAAGATCTAATGATTCTAATCCCT\n') - f.write('CTTCTTACAAATGGAAGTGCCCAATATCAACGCTCATTTTCCACCACGGAATGTCGAGCC\n') - f.write('CCAGAGTCCAACCGAAGCCAGACAGAGACCACAATCTTCCAGTTCAAGATCAAACCAaCG\n') - f.write('CAGTTCAATCTCTCCGCGCACAAACAATACcATAAACCCGAATAAAGGGTGTACCAATTG\n') - f.write('ACTAACCAATCGCAGCCCGCTCAAATCCAATCGGCCCTCTCTCCCCACATCTCCAGCCTC\n') - f.write('GGTATCATGGCAAAGaTATCACCGTCGGCCTAAGAACcTtGAAATGCCGGGGAATAaaCC\n') - f.write('ACCGCCGCATAAGCATTCCAACTCTTAAGAACTGCATTTGTGAAAACCaTATCCATCTAT\n') - f.write('CTCATTCCCCGCTCCAGCACTCTCACAGTCCTTTAAaTACTTCTGCGCGCGCAAGTCTCT\n') - f.write('GTCTTCTAAGCTCATCCCCTCCACCTTACCTCGTACTTCTACCGAAGCAATCCACAATGT\n') - f.write('CAAAACAACTCTCCACAAAGGAAGTCGCAGAACACAAGTCCGTCGAAGAGGGTCTCTGGA\n') - f.write('TCATAATCGACAACGAAGTCTACGACGTCACAAAATTCATCGACGAACACCCCGGCGGTG\n') - f.write('TCAAGATCCTAAAGCGCGTCGCTGGAAAGGATGCCAGCAAGCAATTCTGGAAGGTGAGAG\n') - f.write('ATAATCCTCTTTCCCCCCAGACGTTCTGTAGTTGTAACTAACCCcGCTCGTGTTCATAGT\n') - f.write('ACCACAGCGACAACGTCCTAAAAAAGTACAAGCCAAAACTCAAAATCGGAAaCGTAAAGG\n') - f.write('AAGACGCCAAaCTATAAAACCCCCGCGCACACACAAGGAGAAACATTGAAGAGTGGAGGA\n') - f.write('TATGACACGAGTACAAGGCAGGCAGGCAGGCAGGCGGgCGGGCGGTCAGGCAGGCACGAG\n') - f.write('GCCGTCCAGCGTTATACCGGCATGATGAGAGAACGGCATTGCATATACACCCACCAGTAA\n') - f.write('TCTTGTCTACCTTGGGGGGgTTATATAGACGCATGCATAGATGAGCGGAGCGTACTAGTT\n') - f.write('TTACTTGCAGCAGACACTGCTGAACATCATCATTTTGGATACTTCACTTTCAATTCGTTG\n') - f.write('GTTTGGGCTACTTTACTCCTTTGCGGAAAGGAAGTGATTGGTCAATGTAGAATCAAGGAG\n') - f.write('CCATTCCAAATTTCCGTTGTTCCAATCCTGATGCTTTCATGAATGGCAATCGGCTGAAGT\n') - f.write('TTTCAGGGCGAAGAGTATTGATTCGTGGGATAGACAACCCCAACTTCACCACAAAGGAAA\n') - f.write('GGAGAAACAGAACCAATGGACAAAAACAACGGGGAGAGTGGTCATAAAAAGCACAATGGT\n') - f.write('ATCAATTACTAAAAGCAATAGtGACGAtCGAATTtCCCTCCCAAACTCATATtCTGAGTT\n') - f.write('CCCACAGCCGTGATCCCAAACGCCACGACATTGAGAAaTGAAAAGTCAGACAAGCAACGC\n') - f.write('CATCGGGGTgTCCGAATCAATAAGAAGAAAaTAAAGGAATAAAGCAACGCAACGCGACAT\n') - f.write('TCCATCGTATTGGTTCAACCATTCTCACAATGCCCAGTACCATGTCCGCTAATCACCGAG\n') - f.write('ATGGATGTTTCCCCAAACTCGCATCCAGCCGTGCTGGCCTCGACATAAATgCTTGATAGG\n') - f.write('ATCGCATAGTTGCGGCAGTCTCACTTTTGTGCTCTTGGGTCGTCGTCCGAGGTCGCAGTT\n') - f.write('GTTTTGTATCTGAAAACCCCGCAGGGAGGGCCCTCAAAAGCTGTCGATTCGGACAGCCCA\n') - f.write('TGGGATTTGTACCACCAAGATCATTCGCGCCACTCTGAGCAGACCTCTCATTGTCATTTG\n') - f.write('AAGATTTGGGACCAGACTGGCAGGAAATCACAGCACTTGTAAGCTGAGTGGgCGAGTGGA\n') - f.write('GAGAATTGACACCATTTGCAGGCTGAGTAGGCGCACCATTGACCACCAGGCCTTGTTGGG\n') - f.write('ATTGTCCGCCTCTCTGTATATGATCCGTtACCCGCTTGCAGCGCCCATCGTCCATCGTCT\n') - f.write('GAAGTTGGAGCTTGATATTTGCTGGCATGGTCGCAAGAACATGACTAGGTAGGTGgCTTG\n') - f.write('CAACGACATCCGCACTTCTCAAaaCTGGTTTGGGCTTACTGCCGAATCCATCAGGGCGCT\n') - f.write('TTTGAGCAATGCTTTGATTGCCATCTGAAGAGTCGAAACCAGAACTGCGCAAAACATGAA\n') - f.write('TGAGCAACGATAATTCTAACGAGACGTAGAGCTTTGTGGGCTGTTTTTTTAGTAGATGGT\n') - f.write('GAGATGGAAATGATATTTGGCTGATGAACCGAAACGCCCCAAAATTGTGGGTCTCCGCAT\n') - f.write('GGAGACTTACGACTTGAGGAGTTGGGCGTCCAACCTCTTCAAATCGCT\n') - f.write('>Vein_ctg_5197\n') - f.write('AAACCATTTGTACAGTCCTAGCCTCCTCTTCAAGAACCAGACGCTAAACAAGCCGACGAA\n') - f.write('GAGTACTAAAGCACCACGCGTCATCTGGTAGATAGACGCGGCCACAAACAACAAGCCCAC\n') - f.write('ATTCATAAGCGTAGTGCCTATGATATCGCAGATCGCTGGCAGAGCTAGATAtGTCACTTT\n') - f.write('CCAGCCTCCCAAAGTCGCTCTTCCATCGTGTGTAAGtGTGAGAGAGgTCGCTAGCGGATC\n') - f.write('GTCCGAGTATGCATCAtcGTCATCGTCTTCGGGCTCTTCATTGTTAACGAGTAAAGGATC\n') - f.write('GGGGGCGGAGGAAGGGATGGAGAAGTAGCCGGCCTGCTTCCCTCGTCGTTCCTGGATACG\n') - f.write('TCTGTAGAGAGAGTGTGCTCCGACAACGAGCCAGCAGCCcATCTcGCCCATGAACaTTTG\n') - f.write('CAGCGTTTGTATGACtGGTTGTTCGAAGTGTcTCCTGTtCTTTGGGTTGGTAGCTTTGCA\n') - f.write('GTTGCCAGTACaTTGCATGtCCTTTCGCGATCCAGTCAGTTAAGGCCTCATATCAGCTAG\n') - f.write('CAGGAGACAACCATACCTGGTACTTCGTGAGCAGAGTATTACACACTCCCGTCGTCAACA\n') - f.write('TCATAGCCaCCAAAAACGGAATCACCGCCTTCACGGCcATGGCTGCGTGCGAGGCCGGAG\n') - f.write('CTCAACGAGCTCAAGGCAGGGGATGGGAGAGCTTCGggCAGGCCTATAAGACGCAAGCTG\n') - f.write('ATGAATAGTTGTCAGTGATATTGGAGGTACTTTAGCTGGAGGAGAGCTGTTGGGTATTTT\n') - f.write('CAGTGTTTGAAGGTAGGCACTATAGAAGCTCGTCGCGTGATGGTGATTCAGACAGGGTCT\n') - f.write('GTGATAAGTAACGACCTCGGGAGGTCCGTTCACAAGCTACGATAGTACCGATTTACCAGT\n') - f.write('GCTTGGCACTGTTGGCAGACATCGCGGTCGCGAAAGACGAACGCATGAGGCATCTGATTA\n') - f.write('CAGCGAGCACGCTAGGAAAGATATGAGTGTGGGATTCTCCACACAAAAAGTGGCACAGTA\n') - f.write('TCAGTCATAACACATCGCTTCTATTGTTTGTTTGAACTCGCAGAAGTGAAATCTAGAGTC\n') - f.write('CCCAGGAGCAGACTCCTTCGCATAACCGTTCCTTCGTGAGCCGCATTAACACATGGAAAG\n') - f.write('AAGTTGACCGGCTATTTTtGCATACTTTGACGAACGTCGAACCATGCCCCATGAGAAAAA\n') - f.write('AAAaaCaGATGCAGTGGATATATTGAAGACCTGAGAATGCAAaGAtAACTTTGCGGgAGA\n') - f.write('GGTTCCTACCCGACGTTTCCATCAGAAAACCCATTCCGCTAATCACCCTCATCGAACTAT\n') - f.write('GGGTCGTGGAGCATCTACTTCTTGGCACCAGCCTtGACGGCAGCCTTGGTAACCTTACCG\n') - f.write('GCTTTGTCGACCTTGACGACGCTCTTGATGACACCGACAGCGACAGTCTGTCTCATGTCA\n') - f.write('CGGACAGCGAAACGTCCGAGTGGTGGGTAGTCAgTGAAAGCCTCAACaCACATtGgCTTG\n') - f.write('GAaGgAAtcATCTTGACAaTGGCGGCGTCACCAGACTtGATGAACTTGGGGCTGGTTTCG\n') - f.write('GTGGCTTtACCAGTACGGCGATCGATCTtCTCCAAAAGCTCAGCAAACTTGCAAGCAATG\n') - f.write('TGGGCAGTGTGGCAATCcAAGACTGGAGCGTAACCAGCACcGACCTGACCTGGGTGGTTG\n') - f.write('AGGACGATGACCTGGGCGTTGAAGGACTCGGCACCCTTTGGAGGGTCGTTCTTGGAGTCA\n') - f.write('CCGGCGACGTtACCACGACGAATTTCCTTGACGGAAACGTTCTTGACGTTGAATCCGACG\n') - f.write('TTGTCACCTGGGAGACCCTCAACGAGCTGCTCGTGGTGCATCTCGACGGACTTGACTTCA\n') - f.write('GTGGTGACACCAGCTGGGGCGAaGGTAACGACCATACcGGCCTTGATGACACCGGTCTCG\n') - f.write('ACACGACCGACTGGCACTGTTCCAATACCACCGATCTTGTAGACATCCTGGAGAGGAAGA\n') - f.write('CGgAGGGgCTTGTCGGATGgACGGGATGGTGGgTCGATGGCATCAATGGCCTCGAGGAGG\n') - f.write('GtCTTTCCGGTGACCTTGGACTtGGTCTCCTTCTCCCAACCCTTGTACCATGGGCAGTTG\n') - f.write('CTGGAGTTGTCGATCATGTTGTCACCGTTGAAACCGGAGATTGGCACGAATGGGACGTGC\n') - f.write('TTTGGGTTGTAACCAACCTTCTTGATGAAGGAGGATGTCTCCTTGATGATTTCGTTGAAT\n') - f.write('CGCTCCTCGGACCACTTGGTGGTGTCCATCTTGTTGATGGCGACGATGAGCTGCTTGACA\n') - f.write('CCcAAGGTGTAGGCGAGCAGGGCGTGCTCACGAGTCTGACCATCCTTGGAGATACCAGCC\n') - f.write('TCGAACTCACCAGTACCAGCGGCAAtGATGAGAATGGCGCAATCAGCCTGGGAGGTACCA\n') - f.write('GTGATCATGTTCTTGATGAAATCaCGGTgACCgtGGGgCGTCAATGACGGTGAC\n') - f.write('>Vein_ctg_10638\n') - f.write('AGTCATAGTTACTCCCGCCGTTTACCCGCGCTTGGTTGAATTTCTTCACTTTGACATTCA\n') - f.write('GAGCACTGGGCAGAAATCACATTGCGTCAACACCACTTTCTGGCCATCGCAATGCTATGT\n') - f.write('TTTAATTAGACAGTCAGATTCCCCTTGTCCGTACCAGTTCTAAGTTGGTTGTTAAGCGTA\n') - f.write('GACTGGGTTCCCTTGGTTGTAGACCAAGAAGATGAAAGAACAGAGTTGCCCCCATTCTCT\n') - f.write('CTTCCTCAAGGTCAATACCAAGGGTCCATTTCTACCCAACCCTTAGAGCCAATCCTTATT\n') - f.write('CCGAAGTTACGGATCTATTTTGCCGACTTCCCTTATCTACATTGTTCTATCAACTAGAGG\n') - f.write('CTTTTCACCTTGGAGACCTGCTGCGGTTATGAGTACGACTAGGCGAGAAAATTATTCTTT\n') - f.write('CCCTTGGATTTTCAAGGATCGTTATGAATGCACCGGACACAGCAAAAaGTACTGTGCTCT\n') - f.write('ACCAGCAGATAAACCTTTTCTCCTGCTAAACAGATTTCAAGGTTTATACTAACTGTCAAA\n') - f.write('AAGAAAAGATAACTCTTCCCGGGACCCATACTGATGTCTCCAAGTTCAATCGCGTTGCCG\n') - f.write('CAAAAAATCCTCGTCCTAGTACCGGAATATTAACCGGTTTCCCTTTCGATAGGCGGCCCA\n') - f.write('AAAATTGGGCGCTTTGAAAAAACGGAACTTCCCTATCTCTTAGGATCGACTAACCCTGGA\n') - f.write('CCAACTGATGTTCTCCAGGAACCTTTCTCCACTTCAGTCTTCAAAGTTCTCATTTGAATA\n') - f.write('TTTGCTACTACCACCAAGATCTGCACTAGAGGCTGTTTCACTCCGGTTCACACCAAGAGC\n') - f.write('TTCTTAACAGTTTATAAAAACCTCCACGCCTGCCTACTCGTTATTGCTTCGCTTTTACAA\n') - f.write('TAACGGCAGAGTATGGGTAACACGCTTAAGCGCCATCCATTTTCAGGGctAGTTCATTCG\n') - f.write('GCAGGTGAGTTGTTACACACTCCTTAGCGGATTCCGACTTCCATGGCCACCGTCCTGCTG\n') - f.write('TCTAGATGAACTAACACCTTTTGTGGTGTCTGATGAGCGTGTATTCCGGCACCTTAACTC\n') - f.write('TGCGTTCGGTTCATCCCGCATCGCCAGTTCTGCTTACCAAAAaTGGCCCACTAGAAACTC\n') - f.write('TGCATTCAATGACCTGCTTCAATTAAGCAAACAGGTCGTCTTACATATTTAAAGTTTGAG\n') - f.write('AGTGGTTGAAGGGCGTTTAGCCCCCCGAGACCCCAATCATTCGCTTTACCACATAAAACT\n') - f.write('GCGTATAAGTTTCTGCTATCCTGAGGGAAACTTCGGCAGGAACCAGCTACTAGATGGTTC\n') - f.write('GATTAGtCTTTcGCCCCTATaCCCAAATTTGACGATCGATTTGCACGTCAGAATCGCTAC\n') - f.write('GAGCCTCCACCAGAGTTTCCTCTGGCTTCACCCTATTCAGGCATAGTTCACCATCTTTCG\n') - f.write('GGTCCCATCATTAGTGCTTTGTCTCGGTCAATTCAGTATAAAACGTCAGCGCCGGACGAT\n') - f.write('ACTGCCTCCTTAATGGATTCGTATCAATCAGTTTCCTTACGCATATGGGTTTGGCACCCA\n') - f.write('AATACTCGCACTAATGGTGGACTCCTTGGTCCGTGTTTCAAGACGGGTCATTTAGAGTCA\n') - f.write('TTAAGCCAACAACCTAAGCGAATAGAAGTATAACCAAAAAGATCAACCTTGATACCGTAG\n') - f.write('TACCTCAGAAAACCTTCCTGGAAAACTCGCCAATAAGCATTCGCTGCGTTCCTCAATCCA\n') - f.write('ACCCAAGGTATTTTCTAAGGGACTATAACACCCACAAGTGGGCCACATTTCCCCTAGTTT\n') - f.write('TTTCCCTCAAGTCAAATTGTCGTTGGCAGGCATAGCCTGCAAGTGCATCCAGGCCGAAGC\n') - f.write('CTAGATTGATTACAGACAAGCCAGTCTGGCTCCAAACGGTTCCCTTTTAACAATTTCACA\n') - f.write('TACTGTTTAACTCTCTTTTCAAAGTTCTTTTCATCTTTCCCTCACGGTACTTGTTCGCTA\n') - f.write('TCGGTTTCTCGCCAATATTTAGCTTTAGGTGAGATTTACCACCCAATTTAGGCTGCATTC\n') - f.write('CCAAACAACCTGACTCTTTGAAAGCGTATCACAAAAGGCAAATGCTCAAGCCAAAGACGG\n') - f.write('GATTCTCACCCTCTATGATGCCCTGTTCCAAAGGACTTATTTACTCGGCTTGCCTGGAAA\n') - f.write('ACACTTCTACAGTCTACAATCCGGTTTAGCTAGGCCAAACAGGTTCCAACTTTGAGCTCT\n') - f.write('TTCCTCTTCACTCGCCGTTACTAGGGAAATCATTGTTATTTTCTTTTCCTCCGCTTATTG\n') - f.write('ATATGCTTAAGTTCAGCGGGTAATCCCACCTGACTTCAGATCATAGTTTGAAAGTTACTG\n') - f.write('GATTATACTCTTGTACTTTACTTCCTGGGCGAACCAAAAAAAAaGATCCTGAGACCAGCG\n') - f.write('TAATATTCCTGCCTAGCAAGCCAGACAGAAAATCACACACATTTTAGGTGCTCACTGTAA\n') - f.write('TAAAACAGCGATGCGACCCATCACCACATAAACAAATGTTATGTGTGGGTTTGTGATGAT\n') - f.write('ACTGAAGCAGGCGTACTCTATAGAAAAACCAT\n') - f.write('>Vein_ctg_10639\n') - f.write('ACAAACAGACAAACAGACAAACAGACAAACAGACAAACAGACAAACAGAGAGGCAGACAA\n') - f.write('ACAGAGAGGCAGACAAGCAGACAAACTTAACATAATGCTTGCATACAAGTATCCTTGAAG\n') - f.write('ATCAGAAGCCAAGTGTCAAACTGCTAAAACTGAATTACATAAGTGAATCTAGATAAAGAA\n') - f.write('TCACATGTGGGGGAAGAACATTAAACTAATACTGTTTACATAAAAAAAAaGCAAAAAAAA\n') - f.write('ATAATATTTAATTTGTATAGCTGAAAGTGTTCCCGTAAGGAACAAATTCAATGACAAGGG\n') - f.write('CTTAATCTCAGTACATCGTAGCAACAAAGGCTACTCTAGTACTTACAATACCCCGTCCAT\n') - f.write('TTCATGTCGTCTGCATGCGATTTATCACTTTGATCATTTGCATTATCATCACAGGGTAGT\n') - f.write('GAATCACAGCATTTCCGCTGCAAAGCCTATCCCGCAAGTAAGGTTTTCAAGCCGAAGCTT\n') - f.write('TATTTGTACACAACTAGTACAATCAAAGCACAGTAGTATCGCTTCCAGCATGGATTCTGA\n') - f.write('CTTAGAGGCGTTCAGCCATTATCCAGCAGATGGTAGCTTCGCGGCATTGGCCTTTCAACC\n') - f.write('AGCCGCAAATACCAATTATCTGAATGAAGGGTTCCTCTCGTACTAACTTCAATTACTGTT\n') - f.write('GCGATACCAATACCATCAGTAGGGTAAAACTAACCTGTCTCACGACGGTCTAAACCCAGC\n') - f.write('TCACGTTCCCTATTAGTGGGTGAACAATCCAACACTTAATGAATTCTGCTTCATTATGAT\n') - f.write('AGGAAGAGCCGACATCGAAGAATCAAAAAGCAACGTCGCTATGAACGCTTGGCTGCCACA\n') - f.write('AGCCAGTTATCCCTGTGGTAACTTTTCTGGCACCTCTAGCCTCAAATCTTGAGATTCTAA\n') - f.write('AGGATCGATAGGCCACACTTTCATGGTTTGTATTCACACTGAAAATCAAAATCAAGGGGA\n') - f.write('CTTTTACCCTTTTATTCTACAGGAGATTTCTGTTCTCCTTGAGTCCCCCTTAGGACACCT\n') - f.write('GCGTTATCTTTTAACAGATGTGCCGCCCCAGCCAAACTCCCCACTTGACAATGTCAATAA\n') - f.write('CATGGGTCGCACCCTAATGGATGCTTAAAGCTAGAAGGTGAGTCTTGCGACTCAATCCCA\n') - f.write('CTTAATTATTTAAGTAAAAAAaCAATAGA\n') - f.close()
--- a/commons/launcher/tests/MockESTBankForBlat.py Tue Apr 30 14:33:21 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5996 +0,0 @@ -class MockESTBankForBlat(object): - - def write(self, inFileName): - f = open(inFileName, 'w') - f.write(">gi|226792376|gb|GO546081.1|GO546081 Mdas9010M17_e784.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_850886.1| expressed protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("GTTTCACGCTCTTCCTCAGCTTCATCAACCTCCGAACCCTAAATTCCCAAATGGCCTCCTCGCCGGACCA\n") - f.write("CCACATCAACGGCGACACCAAAACCCACATCTCCAAAAAACCTAAACTTTCCCCAAATTTCATCACTGCC\n") - f.write("GCCGAAATCGCCGCCGAATTCTCCCACCACGACCCGAACGTCGCCCGGATCAACAACGGCAGCTTCGGCT\n") - f.write("CCTGCCCCGCCTCCTTGATCGAAGCCCAGCGGCGGTGGCAGCTCAAAAACCTCGCCCAGCCCGACCATTT\n") - f.write("CTACGTCAACGAGCTCAAGAAGGGAATCCACCGTTCCAGAACCATTATCAAGGAGCTCATCAATGCGGAC\n") - f.write("CATGTCAACGAGGTCTCACTCGTCGACAACGCCACCACCGCCGCCGCCATCGTGCTTCAGCAGACGGC\n") - f.write("\n") - f.write(">gi|226792375|gb|GO546080.1|GO546080 Mdas9010J22_e767.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTCCCATTAAAGAATTATAAAAGATCAATACATAGAGAAATAAGAGAGGGACATATATATTGTCA\n") - f.write("CATATACTGGCTAGATATATATATATCTAGCTAGCTACTTGACAGTTATTTGTACGTAATTACTTGACAA\n") - f.write("TTAAAAGAAGAAGAAGCAAAGAAAAGTGAACAAGACAAAGATGGAAGTAAGACCATGAGGGTGGAAGTGC\n") - f.write("ATTCTCAAATGAACCGATTTAGAAAAATAAGGTGGCGGGGGACTGTAAGGATTTACAGGATAAAGGCTTG\n") - f.write("GTGGTACTATAGAGGGCGCAAATGGA\n") - f.write("\n") - f.write(">gi|226792374|gb|GO546079.1|GO546079 Mdas9010L17_e779.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAG35782.1|AF280060_1 tonneau 1 [Oryza sativa], mRNA sequence\n") - f.write("TTTTTTTTTTTTTTCAGTTACTAATTTATATTTCAAGAAATCAATCAAAAAGCACTCCAACAAAGCACTA\n") - f.write("AATCAAATCAAAATAGGGTATAATCTTACATAACATGCCTATCATTTGCGGGTGAAATACACAAAATCAA\n") - f.write("ATACCGTGCAGTTATACAAAGGTTGTCATCGTGCAGCTACATCTGGTCAGCCCTGCCGTCATCCTCACTA\n") - f.write("ATTCCATCTCCTGCGTGCCGCCAAAATGTTGTTAAATTGCGAGCTTTTCTATCCAACTGGAGGTTTTCCA\n") - f.write("TCGCACTTGAAGCTCGAATGACATCCTCTGGACTTTCATCACTGTCGTATCTATAGCTGTAGTCATCTTT\n") - f.write("CCTGTAGCCAGACATGGAGGACCCACCTCTTCGATCAAATGACTGGGAAGATGCACCAGGCCTTCCTAGT\n") - f.write("GGAGGTAGCCCACCAGCAACAGATGATGAAGAAGGTCTTCTCATATTGCGAGAATCTAAATTGGACAGGG\n") - f.write("ATTCTGTTTCTGAAGTCGTTAGCCTCCTTCCGGTACCCCTTGATTGAGATAGATTCTCAAATTTCAAAAA\n") - f.write("TCCTTCAAGCACATCCAAAAGCAAAGGACCACTATCTCCATTTCGGTTAAGATCATATCCATTCTTGCTA\n") - f.write("CTAAAGTCCTTCAACTCAGCTTTCCAAGAATCCTTTT\n") - f.write("\n") - f.write(">gi|226792373|gb|GO546078.1|GO546078 Mdas9010H22_e754.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAC34983.1| light harvesting chlorophyll A/B binding protein [Prunus persica], mRNA sequence\n") - f.write("AAAAAAAGCAGCAAGCAATGGCAACCTCTGCAATCCAACAATCAGCATTTGCTGGCCAGACTGCTTTGAA\n") - f.write("GCAGTCCAATGAGCTCGTCCGAAAGATCGGCGGCCTTGGCGGCGGCCGCTTCTCCATGCGGCGCACCGTC\n") - f.write("AAAAGTGCCCCCCAGAGCATATGGTACGGCCCAGACCGCCCCAAGTACTTGGGACCATTCTCCGAGCAAA\n") - f.write("CTCCGTCATACTTGACCGGTGAATTCCCCGGAGACTACGGATGGGACACTGCTGGACTATCTGCAGACCC\n") - f.write("CGAGACATTTGCCAAGAACCGTGAGCTTGAGGTGATCCACTCCAGATGGGCCATGCTTGGTGCACTGGGA\n") - f.write("TGCGTCTTCACAGAAATCTTGTCAAGGAATGGCGTCAAGTTCGGCGAGGCTGTCTGGTTCAAGGCTGGAT\n") - f.write("CGCAAATCTTCTCTGAGGGCGGCCTTGACTACCTTGGGCACCCAAACCTTATCCATGCTCAGAGCATCTT\n") - f.write("GGCAATCTGGGCTGTCCAGGTCGTGCTCATGGGATTCATTGAGGGATACAGAGTTGGAGGAGGAACCACT\n") - f.write("CGGTGAAGGACTAGACCCACTTTACCCAGGAGGGGCCTTTGACCCCCTTGGACTTGCTGATGATCCCGAA\n") - f.write("GCTTTTGCTGAACTGAAGGT\n") - f.write("\n") - f.write(">gi|226792372|gb|GO546077.1|GO546077 Mdas9010K23_e774.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_187130.1| ribose 5-phosphate isomerase -related [Arabidopsis thaliana] gb|AAF04905.1|AC011437_20 putative ribose 5-phosphate isomerase [Arabidopsis thaliana] gb|AAG51427.1|AC009465_27 putative ribose 5-phosphate isomerase; 91580-90750 [Arabi, mRNA sequence\n") - f.write("TTTTTTTGAAGCACGAAGGTAATTCATTTCACATTATTTCACAATTCACAATTCCATCATTCTTTAGTAT\n") - f.write("CATTAGTATACCACTGAGTCGAAACCGCAATGCTCAACAAACAATAAGACTATCGAATGCAGAATTTTCG\n") - f.write("ACAAATTTCCCATTTCGCCAATCTAAAATTACACCAAAGCAGCTAAATTTCACCAGTAAATACTGAACAA\n") - f.write("CAAGCAGCAAGTACCAAAACCCCCCAAAAATATGGAAAAAAACAGAGTACTAATGGAGATTCCAAATACT\n") - f.write("CACCACCACCGGAGAACCCGAAAAAAGCCAATCACTTGGTCTTCACATCCACTCCATCCGTGCCCGCAAT\n") - f.write("GATCACCGCCGTCGCCATGTCCAAGAACAACCCATGTTCCACAACCCCTTCAAATTTCAAAATCTCCTTC\n") - f.write("CCTGCCGCCGGCCCGTCTTTAATCGGAGTCTGGAAGTACAAATCCACAATGTAATTGAAGTTATCAGCCA\n") - f.write("CATACGGCTTCCCGTCGCCGTCGATCC\n") - f.write("\n") - f.write(">gi|226792371|gb|GO546076.1|GO546076 Mdas9010F21_e739.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P32869|PSAD_CUCSA Photosystem I reaction center subunit II, chloroplast precursor (Photosystem I 20 kDa subunit) (PSI-D) (PS I subunit 5) pir|A60695 photosystem I chain II precursor - cucumber prf|1710320A photosystem I 20kD protein, mRNA sequence\n") - f.write("TCACTGAAGTTGCATTTTAATGAAATCAAATTACAAGAAGCACAAATTATACTCACATAGTCATCATCAT\n") - f.write("GATCTTAAATACATTGCAAATTACACAAATCAATATCATGATCATATGTCATAAACTTGCTTGCCGGTGA\n") - f.write("ACTTGACCTCAATTGGGTTGACATTCTTCCCAATCGACCTAAAGTTCTGGCCAACCCCTTGGCGCCCAGG\n") - f.write("GTTCACCTTCTCAGGGTACACACCATCCTTGGGGTGCAAGTATTGGACCTCCCCGTTAGGGAAAACCCTG\n") - f.write("TAAAACTGGTACTTAATCTTGTACTTTGACCTCAGCCTTGTCCCAAGAACCAAGCACTGCTCTTTCCTAA\n") - f.write("CCAGTTTCAACAAGTTAGGACCCTCCCTCATGATGGCTGCTCCGCCGGTCGGCATCTCAAAAATCTGCTC\n") - f.write("CTTTGGTGACTCCCATGTGATCACGTAAAACTCTTCCTCCTGCGCTTTCCTCAACAGCCCGCCGGTGCTG\n") - f.write("CCGCCAAAGATCGGCGAGGGAGTTGGTGGGTCCAGCTCGGG\n") - f.write("\n") - f.write(">gi|226792370|gb|GO545820.1|GO545820 Mdas9004G01_e277.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_192425.1| 4-coumarate:CoA ligase (4-coumaroyl-CoA synthase) family [Arabidopsis thaliana] pir|H85064 4-coumarate-CoA ligase-like protein [imported] - Arabidopsis thaliana emb|CAB81058.1| 4-coumarate--CoA ligase-like protein [Arabidopsis tha, mRNA sequence\n") - f.write("TTTTTTTTTTTTTTTTTTTTACTGTAGAAGAAGACTGTATACTCCATATGGTAAAATGATATACCTCTGC\n") - f.write("CCTTTTCGGGCAAGAATGCTAGATCCACCGAAAAGTACGTATACTCGTTATGGTGAAATGAAAAATATAA\n") - f.write("ATAATTTGCATTAAAAGCAACGTACTGAGGTTGTTGCTATTTTTCTTTTTGGGTATAGGAGTTTCTATAT\n") - f.write("GGACATAATGAAACTGCTCTTCTTTATTTAAATCCAAAAACTTACTGCTCAGATTGTTGAAGAGAATCGT\n") - f.write("AGAGAGTTGGTACTAAGAGCTACGTCCTGCTCTTCTTCCCTATAAAGAATGGCAGCACAGCTGAATATCG\n") - f.write("TAACATACCTGCAACTTTAAAACAAGCGTACGGAACTTGTAGTGTTCCTACAAGCCTTCCCGATAAAGCA\n") - f.write("AACATCGCATCAATGAGGAGCCATGATACAATTTCATATTTTCGACCGCACTTTCTCGATGAGCTCTCTT\n") - f.write("CTGAGGATTTTGCCCGATGCTGACTTGGGGACACTGTTTATGAATGATACTCTTCGTAGTCGTTTGAAAG\n") - f.write("ATGCAACTTGACTAGCAATAAAGCTCTTGACATCTTCTTCAGTTAGAGAACTATTTGGCGTACGTACACA\n") - f.write("ATATGCAGCGGGGACCTCACCAGCTTCAGCATCAGGG\n") - f.write("\n") - f.write(">gi|226792369|gb|GO545819.1|GO545819 Mdas9003M23_e232.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTGTGAAGAATTTCTAGTTTTGAGGATTCTAGAAAACTGAATGTAG\n") - f.write("AATTTATAGCCCAACAGTTACATCAACAAGTATTCTGTGGCTTTTATATCTCAGTTCAATGTTTCCAGCT\n") - f.write("ATAGATTGGCTAGCTCATGCTGGTTGAGATGGTGTTGTCTCTTTCTCTTATGAATAAAATAATTCTTTGT\n") - f.write("GCCGATGTGTTGCCTCAACCACAGCACGTACGGCATCGAAGTCAACATTGGGGCCGACATCGGTGGCCTT\n") - f.write("GAGCACTCGAATGCTCAAATTCTGATTAACCTTGCCGATGATTTCAGCAGCATGACCAACCGTCAGATGT\n") - f.write("GCTTCAGGATTAAATTGTAGCGTTACAGCACCATCGCTGTCCAA\n") - f.write("\n") - f.write(">gi|226792368|gb|GO545818.1|GO545818 Mdas9003L19_e221.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|T02087 gag/pol polyprotein - maize retrotransposon Hopscotch gb|AAA57005.1| copia-like retrotransposon Hopscotch polyprotein, mRNA sequence\n") - f.write("TCGAGAAACAAATATTTTTCCAATTTGGAGATCATAAAGACGATGCCATTTTTGCCTATAAAGGTATCCA\n") - f.write("AGGAATATGCATTGGGAGGCACATGCATCAAATTTGGTGAGTCGTTTATCATCTGTGGAGCTTATGGTGG\n") - f.write("ATGGTGAACGGGTATGTTTAGCTAAAGGGCAAACTGAACAATAATTAATTTCAGAAAACTGAATGTCTTT\n") - f.write("TACATATGCTTGCATTAATCGGAAATGTTTATCGGAAAGGTGCCCAAGGCGTTCATGCCAAAGGCAGGAA\n") - f.write("ATAGCGGACTTGCTGACAAAATTGCAGATAGCCTCCATAGTAACAATGTTGAGATAGTAGAGTCATCCCC\n") - f.write("TTTCAGTTCTCGTCCCAATCATCATCCCCAAGCATTGGTCCTGTAACACAAATAGGTTTTTAGCAAAAAA\n") - f.write("GGCAATGTAGTCCGAGTGTTGGACAAGTTTAGCGACGGATATTAGATTCAATTGGAATGAAGGAACACAT\n") - f.write("AAGACATTACGAATGGTCAAATTGGGAGAGAATTTGACACTCCCTACGTGTGTAACAGTAGCACTCATGC\n") - f.write("CATTTGGTAATTGTATGGTTCGATTTTGTATTAATGAGCACGTAATAAGTAGGGCAGGAGAGTACACTAT\n") - f.write("ATGATCAGTTGCTCCTGTGTCCAAGATCCAAGCTACGTCTTTCCAAAGAGCGAAGCCACGAAAAGATTTA\n") - f.write("CCTGAGAGACTA\n") - f.write("\n") - f.write(">gi|226792367|gb|GO545817.1|GO545817 Mdas9004B15_e260.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|O24059|MT3_MALDO Metallothionein-like protein type 3 pir|T17015 metallothionein-like protein AMT2 - apple tree gb|AAC23698.1| metallothionein-like protein [Malus x domestica], mRNA sequence\n") - f.write("CAAAATACCATTCAAGCGAAAACCCTAATTTAAACACATCTTCAGCTCCAAGTTCTTAAGTTTATCTTCA\n") - f.write("ACATGTCGGGCAAGTGCGGCAACTGCGATTGTGCTGACAGCTCCCAGTGCGTGAAGAAGGGAAACGGCTA\n") - f.write("CGACTTGGTGATCGTGGAGACTGAGAACCGCTCCATGGACACCGTCGTCGTGGACGCTCCTGCAGCCGAG\n") - f.write("AACGACGGAAAGTGCAAGTGTGGCACAACCTGCCCATGTGTGAACTGCACCTGTGGTCAGTAAGCCCAGA\n") - f.write("TAACCAAATTAAAGATGTGATTAATAAAAGTGTCATATTAATTAAGGGATTATAGACCCTTAATTAATGA\n") - f.write("AAAGTGTTTGTGGGATAAAATAACGTTGTGGCTTTGTCTCTTGTTTGCTTATAGTATTTGAGTCTGTCGA\n") - f.write("GTGACATGTTGTACGTCTGTGAAAACATGTCACTCATATGTTTGTGTTTCTGTGATTGTGTCATGTAATG\n") - f.write("GCCATGTTCATGGCCCTTTGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792366|gb|GO545816.1|GO545816 Mdas9004B22_e262.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAA10129.1| hypothetical protein [Cicer arietinum], mRNA sequence\n") - f.write("AATTAGGGTTTCTAGCTCCTCCTCCTTATCTCTCGATTTACCAGACGGCATGGGAGGAGGCAACGGTCAG\n") - f.write("AAGGCCAAGATGGCCCGCGAGAAGAACTTGGAGAAGCAGAAAGCTGTCGGCAAGGGAAGTCAGCTTAAAA\n") - f.write("CAAACGAGAAAGCCATGTCAATCCAGTGTAAGGTGTGTATGCAAGCATTTATGTGCACCACATCGGAAGT\n") - f.write("GAAATGCAGGGAGCATGCTGAGGCAAAGCATCCCAAGTCTGATGTCTACGCTTGTTTCCCTCATCTCAAG\n") - f.write("AAATGAAAGAACATTTGGTAAGAAGTCACAGGGACAAATTGTGAACGTCGTGGCCCTTTATATTAAAGCC\n") - f.write("TGGAATGAATTTACCGAGTTCTTCTAGAGCGATAGCTATTTAATACTGCTTGTGGAGTTGTTTTTGTGTG\n") - f.write("TATGTGGTGTGTGTGAGTGAGGGAGAAAGAAAGAGATGTATCTCCTTGTTGAACCGAAACTCGGTCTTAA\n") - f.write("ATGATTGTGAATGTTGTTGGTGTAACTCTTGAAAAGAGTTTGTTTCATGGAACCTTGGTTCACTGCGTTG\n") - f.write("TGTCGTCTGTACGGGTTGAGTGTCGAGTTTGTGGAACTGCTGTTTGTCTGGCAACTCCGAGGCAAATTCA\n") - f.write("CACCAAACCTTGCCTCTTTGGTAATTAATTTGTGTGTCGAAGGTTCTGTTAAAA\n") - f.write("\n") - f.write(">gi|226792365|gb|GO545815.1|GO545815 Mdas9004A17_e252.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAA33652.1| carbonic anhydrase prf|1710354A carbonic anhydrase, mRNA sequence\n") - f.write("GAAGGAACCAACCTGGATCTCCGTTTATTATTTTTTCGCAAACGATTACAATCTCAAGGAAATTATACGA\n") - f.write("TTACATTTGCAACGACGCTGCAGATGAAATTAAGAAGGAAATTACAAACAGGAAGCAATTGAGTTGCATG\n") - f.write("GGAATGTGATTGCGATCATACGGAGAAGATGGGGTTGAGGCTGAAATCAACATCCCAGAGCTCGAAACGT\n") - f.write("CCGTTGACAAAGTCATAGTATCCTCCCTTGATTGCTAGGGTTTTCTTCAGCAAACCCTCTCTCACAAACG\n") - f.write("GATAGCTCAGCAGGTTTCCGATCGATACATTCACCGCCTCCTTTTCACAGTGGCCGCAGAGATCAGCGAA\n") - f.write("AGGAGCATCGGCACCATGATCTGCCTTCACCTTCTTCCCTGCCGGCGATGCGATACCAATCCAGTCTTCT\n") - f.write("ATGAAGTCAGTGCCAAGGGTAGACCCATCTTCTGGAATCGACAAGAGCGCCTTGATTCCTCCACAGGCGC\n") - f.write("TGTGCCCAATCACCACTATGTATGAAACCTTGAGATGCAAGACAGCATACTCAACGGCCGCCCCAGTTCC\n") - f.write("AGCGTATTTAGTC\n") - f.write("\n") - f.write(">gi|226792355|gb|GO546075.1|GO546075 Mdas9010C10_e729.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCTAGTTTTTTTTTTTTTTTTTTCTTGGAAAGCAGTCACCATATCGGAAAAATACTGCTGCTTGTGTGAT\n") - f.write("CATATAAATGTTATTAAATAATCAAGTGA\n") - f.write("\n") - f.write(">gi|226792354|gb|GO546074.1|GO546074 Mdas9010K17_e772.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("GTAAGTAAATATAAAATACCACTTGAAAATTTCCCCAGTTGACTTACAAATAACATCCCACTTGATTTTC\n") - f.write("ATTGCTACCAAAAATCACAGAATTTTGCCTTATAGGGCAAACTTGCCATTAGTTATAACCAAATTGAATA\n") - f.write("AAACAAAAAAAACAACTTGCTAACCCTGTTTTGAAAAAAAAAAATGGAAATAGGGATGGAAACCCAAGCA\n") - f.write("CCTAAAAAAATGAAA\n") - f.write("\n") - f.write(">gi|226792353|gb|GO546073.1|GO546073 Mdas9010D09_e734.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAA33866.1| ribulose 1,5-bisphosphate carboxylase small subunit, mRNA sequence\n") - f.write("TTTGAGGTCTTGAAAGCCCTAATCAAATACAAACTTAAACAGACCACCGGGACCCTTGGCTAAGAGCCAC\n") - f.write("AATGGTACATACAACAACAACATTATTTATGAAAATTGTAGAACAACTTAGACGCCTGGAGGCTTGTAAG\n") - f.write("CGATGAAACTGATGCACTGCACTTGACGGACATTGTCGAATCCGATGATACGGATGAAGGATTGGGGGTA\n") - f.write("GGCCTTCTTGGCCTCTTCCAGCTCTTTCAACACCTGGGAAGAGTCGGTGCATCCGAACATGGGCAGCTTC\n") - f.write("CACATTGTCCAGTACCTTCCATCATAGTATCCTGGGGATCTGTGGTTCTCACGGTACACGAATCCATGCT\n") - f.write("CCAACTCAAATTCCAAGCAGGGAACCCAGTTTTTGCGGAGGAGGTAGTCAACTTCCTTGGCCAAGGACTC\n") - f.write("GGTAGAAAGGGGAGGAAGGTAAGAGAGGGTCTCGAACTTCTTCAGTCCAAGTGGAGGCCACACCTGCATG\n") - f.write("CATTGCACTCTTCCTCCATTGCTTGCAATAGAGGTAATGTCATTGCTTTTTCTGGTGACAGGGAAAGCTG\n") - f.write("AGGAGGACTTGAGGCCGGTGAATGGAGCAACCATTCTGGCTTGAGCGGGGGCGGAAACTGTAGCCACGGC\n") - f.write("ACCGGAGGAAATCATGGAGGAAGCCATTGCTCTCTCTCTCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792352|gb|GO546072.1|GO546072 Mdas9010N19_e792.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAL18930.1|AF429389_1 hydroxymethylglutaryl coenzyme A synthase [Hevea brasiliensis], mRNA sequence\n") - f.write("ATTTTTTACGCTGACAATACTTAGCTGCCAATCAAACTATTGGCTTTTAACATACAAACTTAGGGAGGCC\n") - f.write("AGCATGTTCTGTTTTACGAGTTAGAACTGGCATCTAGCTACATAAATCTCAACTTAACCATGTGACCGGC\n") - f.write("TTGAATTACGTCCATCAGTGACCGTTGGCAACCACACCGTTGGCATTGGTGGTGGCGGCCGCATCCTTCT\n") - f.write("TGGCATAGAATCTACGGTACTTAGAGTCGACTTCGGTAAGATAGTATGTGCCTGGAAGTAGAAGGCTAAT\n") - f.write("GTCCTGGTTGGTCACAAAGTCTTTGCCTCCATACCTGTGCTCCATGATCTTCATCACTTCAACAAACTTC\n") - f.write("TCGGGAGGGAACTCATGTCTTGCCTTCAACTTCTCTCCAACATTCATCATGGCTGCGATGTTCTCCAAGC\n") - f.write("TAAAGGGCTGTTGACCAGCATTGAGACGCAATGAGAACATGGTTGCAGTTGAGCCACTTCCATAAGAGAA\n") - f.write("CAGTATCACCCGCTTGCCATCCAAGGAGCTGTGCTTGTTGTGGAGGAGGGATACAAATGCCGCATAAAGA\n") - f.write("GATGCAGTGTACATGTTTCCCACTTGCTTTGGTACCAAAGTTGTTGGTACAACCCTTAC\n") - f.write("\n") - f.write(">gi|226792351|gb|GO546071.1|GO546071 Mdas9010O08_e797.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|S14305 chlorophyll a/b-binding protein (cab-11) - tomato, mRNA sequence\n") - f.write("TTTTCTTGACTTAATTCAGGTGGGGAAAACAATGGCAACAGTCACAACTCAGGCCTCGGCCGCCGTTTTC\n") - f.write("CGGCCATGCGTCAACTCGAAATCGAGGTTCCTTTCGGGTTCTTCTAGTAAGTTAAATAGGGAAGTGGCTT\n") - f.write("TTAGGCCTATGGCTTCTCCTCCTGCTTCTTCTTTTAAAGTTGAGGCTAAGAAGGGTGAGTGGTTACCTGG\n") - f.write("CTTGCCTTCACCAGACTACCTCACCGGCAGCCTTCCTGGTGACAATGGATTTGATCCTCTGGCACTTGCT\n") - f.write("GAGGACCCAGAGAACTTAAGGTGGTATATCCAAGCTGATCTTGTCAATGGCAGATGGGCAATGTTGGGCG\n") - f.write("TTGTTGGGATGCTATTGCCTGATGTGTTCACCATCATTGGGATCCTTATCTTTTCCTTTTGTCATG\n") - f.write("\n") - f.write(">gi|226792350|gb|GO546070.1|GO546070 Mdas9010L11_e778.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTACATTAACTACGATTACAATATCCTCCAACTACACAAACGGCCAACATATTTGACAAACGTTGAAAAC\n") - f.write("AATTGAG\n") - f.write("\n") - f.write(">gi|226792349|gb|GO546069.1|GO546069 Mdas9010J21_e766.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTAATGAAAAATCAGAGTTGTATCATATATAACCAGATTGTTGTACAG\n") - f.write("CACAGACCATCAACGAAGCTGCAGAGTTGTCAACACATCTTTTATTCTCGACATTTCATCTCCTGTCACG\n") - f.write("ATAGCACACCATCCGCGGTTGCAATTTTATCTGAGGATACAAATTTCTTCATATGTTTTGTCTTGATTCT\n") - f.write("GTGCTACCCATAAAAATTTCTCCTTGTCATGGGTTGCGAAATCTGGTGGTGCACATGAATATAGAACATG\n") - f.write("ATTAACATTCAATCCATGACCAACATTCTTATTTAGTCCAGAACATCGAACGAGGCCATACAAACAAGCT\n") - f.write("CTTTGAAATTCCAAGAGGAATCGAGAGTGACCTCCAAATATAGAAATAAAGGGAAAGGCTACCCAATAAC\n") - f.write("TAATTTGACCACAGTAAAGAAAATAACTAACAGACTTATTCCCTTGAAACAAAAAACAAGCAGCCTTGTC\n") - f.write("AGCCAACGAATCATAACCCAAAGTCTGTAGAGTACTGATGGAAAATCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792348|gb|GO546068.1|GO546068 Mdas9010I09_e759.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAA10284.1| chlorophyll a/b binding protein [Cicer arietinum], mRNA sequence\n") - f.write("TTTTTTTTTTTCAGACACATATTGTAGCACATAGTTCAAATTATTGTTGTCCAATACTTGGTACCAAATT\n") - f.write("TACATCACCGACAACAATACAAACGAAAGTACAAGACAACTCCTGAGCTCTCACTTTCCGGGAACAAAGT\n") - f.write("TTGTGGCGTAGTTCCAGGCATTGTTGGCGACTGGGTCAGCGAGGTGGTCGGCAAGGTTCTCCAATGGTCC\n") - f.write("CTTTCCGGTGACGATGGCCTGGACAAAGAATCCGAACATGGAGAACATGCCAACCTTCCATTCTTGAGCT\n") - f.write("CCTTCACCTTGAGCTCGGCAAAGGCTTCTGGGTCGTCAGCAAGGCCCAAGGGGTCGAAGCTTCCGCCGGG\n") - f.write("GTAAAGGGGGTCGGTCACCTCTCCGAGGGGTCCGCCGGCAATTCTGTATCCCTCAACTGCTCCCATCAAC\n") - f.write("ACAACTTGGGTAGCCCAGATGGCCAAGATGCTCTGTGCATGGACCAAGCTAGGGTTTCCCAGGTAGTCGA\n") - f.write("GACCGCCCTCGCTGAAGATCTGGGCTCCGGCCTTGAACCACACAGCCTCGCCGAACTTGACACCGTTGCG\n") - f.write("GGACAAGAGCTCGGGGAAAACGCATCCAAGAGCTCCGAGCATGGCCCATCTGGAGTGGATCACTTCGAGC\n") - f.write("TCACGGTTCTTGGCGAAGGTCTCTG\n") - f.write("\n") - f.write(">gi|226792347|gb|GO545814.1|GO545814 Mdas9003N22_e235.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB01236.1| gene_id:MDJ14.23 unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTTTTTTTGTCTATCAAAGTGATCATAAATCTACAAGCATTTAGAACATATTTAGTCGACTAATTAT\n") - f.write("TGATAATTACAATTATTTACAGATTGAATGGGGAATTGCCCCAAAGATGACTGGGAGATTTGAACTGAAT\n") - f.write("TTACAGCTATCTATTTTAGCAGAGAATTCCTATGCCTTTAGTTGCTGATATCAAAGTATTTCGGATCGGA\n") - f.write("AGGAAAGTGATATTCCACATGTAACTTGCCCTCATCAGCATCCGTCTGGCGTTGAGGGAACACGACCCTC\n") - f.write("AAATCATTATACAAGTAGAAGCGCCTCCCCCCTTCGACATCGATACTGTTTGTCGCTTGTCCAGATGATG\n") - f.write("GATCTAACTTGCATCTCTGGACTGACCTCGAACATTTCTTAGGAAATAGGCATGAGAAACGAAGGTGAAG\n") - f.write("AGCATAACGCAGAACGCCAGCACCAGCAGTTTTCTTGCTCGTCTTTAAAGGGCTACGCACCCTACTTTCA\n") - f.write("CTAATCTCACTGGTCTTTTCAGACGTACAAAGGCTACAATCACTGGGTTCACTGTGATAACAGGCCTTTG\n") - f.write("TGCGTTCTCCCTCTCTTTGGTACTGATTAACTACATCCTTAAACTCGTTTGAGTCCGCATCATTGTC\n") - f.write("\n") - f.write(">gi|226792346|gb|GO545813.1|GO545813 Mdas9003P22_e248.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("CTGCTACTATAATAGCTCTCCAATTGCAATATATATATTCAAATTATCCTCACAACTGTAACATGCTCTA\n") - f.write("TTTACTAACTGGACATATTCCCTCTATG\n") - f.write("\n") - f.write(">gi|226792345|gb|GO545812.1|GO545812 Mdas9004H08_e282.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_198322.1| GDSL-motif lipase/hydrolase protein [Arabidopsis thaliana] gb|AAM64916.1| putative GDSL-motif lipase/acylhydrolase [Arabidopsis thaliana] gb|AAO42146.1| unknown protein [Arabidopsis thaliana] gb|AAO50514.1| unknown protein [Arabid, mRNA sequence\n") - f.write("TTTCACGACACAAGGAAAATTATAATAAATGGGCAATACTAACGAAGACACATTGAGAAACTGAAGAAAG\n") - f.write("TCTTATGATAGAAAAACAAACACACCCCTTGTAGTTCTAGAACTAGTTCTAGGTCTTAGAGTCCAAGGCC\n") - f.write("AAAATAGTGCTGAGGTTCATTGGATTCATGTACTTGTTGGACCCAGTAAGGATTTGCTGCGCAATAATTC\n") - f.write("TGCTTGCCTTCTCAGAAGGATGGAATGCATCCCAAAACGCGTACAAATCTCTATTTGGGCACAAGTTGGA\n") - f.write("GGCAATTGTGCAGAGTCCGATCCCATTGTAGGGTCCTTGGCCACAGCAAGCTATCTTTGAAGTCACAAAT\n") - f.write("CCATAAGCTTGAGGATTGGAGATAAAATCCATGTGCATTTGAAATGCATTTGCAGCAACAAATACATCTG\n") - f.write("AGCCAATTTGGCCATTCAGGCTGTTGATCATATCGACTAGCTGTGGGTTAAATAAAGAAGCAGCTCGCTG\n") - f.write("CAGCTCCACCGCACATTCGCCTCTTCTGCTGCGCTGAGCCAACTCTGCTGGAGCACACCCTAGTGGCCCT\n") - f.write("GTGCCTGTCACCAAAACCTTACGAGCTCCTAATTCATACAGCCTGCTAGAACTTTGCGATACTCGGAGAT\n") - f.write("GAGGTAGACGACATAGTCCGGGAGAGAGAATTG\n") - f.write("\n") - f.write(">gi|226792344|gb|GO545811.1|GO545811 Mdas9003P14_e245.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|T12041 cysteine proteinase (EC 3.4.22.-) 3 precursor - kidney bean emb|CAB17076.1| cysteine proteinase precursor [Phaseolus vulgaris], mRNA sequence\n") - f.write("AAACATGCGGCCGTACCGATCATCGCCGGTCGTGGCGATCCTCCTCCTCCTCACGATCTTCACCGTCTCA\n") - f.write("TCGGCCCTCGACATGTCGATCATCTCCTACGACAGCAACAACCACGTCGGAGACAGCAAGTCGTCGGGCA\n") - f.write("GCTGGAGGACAGACGATGAGGTGATGTCGATATACGAGGGGTGGCTGGCGGAGCACGGAAAGGCATACAA\n") - f.write("CGCTTTGGGAGAGAAGGAGAGAAGGTTCCAGATCTTCAAGGACAACCTTAGGTACATCGACGAACAAAAC\n") - f.write("TCCAAGAACCTCAGCTACAAGCTCGGTCTGAACAGGTTCGCCGATCTGTCGAACGAGGAGTACCGGAACA\n") - f.write("CTTACCTCGGCGCCAAGACTCGCGCGCAGATGAAGCGGGTGTCCAACAGGAACACCAAGAGCGACCGGTA\n") - f.write("CGCGCCACGTGTCGGCGATTCGTTGCCCGACTCCGTTGATTGGAGGAAGGAGGGCGCGGTGAGTCCAGTC\n") - f.write("AAAGACCAAGGCAGCTGCGGGAGTTGCTGGGCATTCTCAACTATCTCCGCCGTGGAAGGTATCAATAAGC\n") - f.write("TAGTCACCGGAGATCTCATCTCCCTATCTGAGCAGGAGCTGGTAGACTGCGATAGAACGTACAATGAA\n") - f.write("\n") - f.write(">gi|226792343|gb|GO545810.1|GO545810 Mdas9004J05_e291.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_187574.1| 60S ribosomal protein L4/L1 (RPL4A) [Arabidopsis thaliana] sp|Q9SF40|RL4B_ARATH 60S ribosomal protein L4-2 (L1) gb|AAF23293.1|AC016661_18 putative 60S ribosomal protein L1 [Arabidopsis thaliana] gb|AAL09727.1| AT3g09630/F11F8_22 [, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTGTCACGAACGAAACTAGATTGATAAACTAAGCTACAAATATGA\n") - f.write("ATCCAGTGCCCGTTATACAAAATCTCCAACCAGATCGAACGGAGGAAAAACACCTTGAAACTTGCTTGTT\n") - f.write("AAACTCATTTCCCTGCAACTTATCATTCATAACTCAAAAGTAAACAAAAGACACAGATAAACACCACAAC\n") - f.write("AAAATAAACTAACAAAGGCAATGTCTTGTAAAATTTTAAAGCAATAAACTAAATCACTGGGCTACACCGA\n") - f.write("GCCACTTTGTGAAGTTGTCGAATTCTGTGTAATCGCTGTCAGAAATCATAGTTTTGTACCAAGCCTTTCC\n") - f.write("AGCTGCCTTGATGGCGGTAGCTTCCTCCTTAGTAATGGGCTTTCTCTTCTTGTCAAGCTTCTCCTTCTTG\n") - f.write("GCCTTCACGCGCTGCTCCTCAGCCAAAAGTGACATTCTCCTCGCTGTCTTAACATAGGGGTTCAACTTGA\n") - f.write("GCAGTGTGTTGAGATTTTTGAGAGGGTTTTTCTTCATTGGAGCTCTCTTGGCATCCTTCTTGATTGGCCT\n") - f.write("GACAACAGATTGAACTTCATCAGAGTTGATGATCCTCGCCAAGTCAGCATTGACCATCTTCGACCTCGGC\n") - f.write("AGCACATATCCCTTCTTCTTCTCCGAGGGCTTGTCAAACGACCCGTAAATTGAG\n") - f.write("\n") - f.write(">gi|226792342|gb|GO545809.1|GO545809 Mdas9004B06_e256.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAA24493.1| chlorophyll a/b-binding protein [Fagus crenata], mRNA sequence\n") - f.write("TTTTCAAGATAAAGTTAAAATTGTCCATTGATGCAGTTTTTAAAGCTTGAGCTTACATACAAGCAGGTAG\n") - f.write("CTTAAAGATCATCTATTGTACACACTGATTAGGTTTCCTCACTTCCCGGGCACGAAGTTGGTGGCATAGG\n") - f.write("CCCAGGCATTGTTGGTAACAGGATCAGCCAGATGATCGGCAAGGTTCTCGATAGGGCCCTTTCCAGTGAC\n") - f.write("AATGGCCTGCACAAAGAAGCCGAACATTGAGAACATGGCAAGTCTTCCATTCTTGAGTTCTTTGACCTTA\n") - f.write("AGCTCTGCAAATGCCTCTGGGTCATCTGCAAGTCCCAAAGGATCAAAGCTGCCTCCGGGGTACAGTGGGT\n") - f.write("CAGTGATCTCACCAAGGGGTCCTCCGGCGATGCGGTAACCCTCAACCGCACCCATGAGGACCACCTGGGT\n") - f.write("TGCCCAGATTGCTAGAATGCTCTGTGCATGAACCAGGCTTGGGTTTCCAAGGTAGTCAAGGCCACCTTCG\n") - f.write("CTGAAGATTTGAGCTCCGGCTTTGAACCACACTGCTTCACCAAACTTGACTCCGTTCCTGGAGAGGAGCT\n") - f.write("CCGGGAAAACACAGCCCAAAGCACCAAGCATTGCCCATCTTGAGTGAATGACTTCAAGTTCACGGTTC\n") - f.write("\n") - f.write(">gi|226792333|gb|GO546067.1|GO546067 Mdas9010O09_e798.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAK38744.1| carotenoid 9,10-9',10' cleavage dioxygenase [Phaseolus vulgaris], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTAATCAACATAATTAAGATGGTATAGGTATTGCTATGTGACCCAC\n") - f.write("TGATTCTTTTTGTTCAAAGTAATTAAAACACCATCCGGTAGTTTAAACACAGAGGTGTATACATTATTTA\n") - f.write("TTGGTGTACATGTGGAGTAGACCGCCTACACGACAACAGGTTCAGTTTACGCTGGCGTGCCTTCCCCTTG\n") - f.write("TACGTTCTAACAATGATGAATAATTTTAGAGCTTTGCTTGTTCTTGCAGTTGCTCCTCTGTCACGAAGAA\n") - f.write("GGCATGGAAACCATATGGAACTCTGTGTGGCAATTCCACGACTGCAACGGGCTCTGATGACATTGTCTTT\n") - f.write("GCATCAACTACATGAACTGCTGATTTTCCAGTGTTCTCGTCATGAACAAAGAGTATTAAGTAGCCATCAT\n") - f.write("CTTCTTCAGAAGTAACGCCAGGAATACGAGGGACAAAAATAGCTTCAGAACCAAATCTGCCAGGTCCCAG\n") - f.write("GTCATAGAGGCCTTGGACATTTCCTCCAACCTCAATTTTTGTTTTTCCGACCTCTGGTTCAGCATGCAAA\n") - f.write("TCAAACTTGGCTACCCCTGTAACCTTG\n") - f.write("\n") - f.write(">gi|226792332|gb|GO546066.1|GO546066 Mdas9010M20_e786.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAD31838.1| putative quinone oxidoreductase [Cicer arietinum], mRNA sequence\n") - f.write("TTTTTTTTTTTTTTTTTTTTTTTCAGTAATAAAAACTGTTTATTCAATAATAAAACTTGTTACAACAATT\n") - f.write("AAATTTGACAAAGCAAATTATCCTTTACAAAAACACGAAATTGCAAATTGAAAACTGGACTGAAAATCAA\n") - f.write("GAACAAAAAAAATGAAAGTATGGAACACTCAACAGCGATCGTATAATTGATCATCATAATTAGGCAAATC\n") - f.write("CCTTAAGCTTCTTTGCGATGCCAGCAAAGTACTTTCCCTGGTGGCTAGCTGTTTCTAGTTCTAAATCAAA\n") - f.write("AGGTTGTCTTGTGCCATCCCCAGCATAAGTTCCCGCACCATAGGGGCTTCCACCCTTGACCTGCTCCATC\n") - f.write("TCGAACATGCCAGCTCCACTCGTGTATCCGATAGGCACAAAAAGCATTCCGTGGTGAACAAGCTGAGTAA\n") - f.write("TGGTCGTCAAGGGGGTAGTCTCTTGTCCACCACCTTGAGATCCAGTGCTGTAAAAAATTCCAGCGGGCTT\n") - f.write("GCCAGCAAGTGCTTGAGTTCTCCATAAGCCTCCAGTTGAATCGAAAAATGCTTTAAACTGTGCAGCCATC\n") - f.write("ATTCCAAATCTGGTAGGGAAACCGAAAATTATCCCATCAGCCTCCACAAGTTCCTCAGGTGAAATTACAG\n") - f.write("GCACTTCACTCTTTGGTGGTGCACCCATTTTTCCAAGAACTTCTGCTGGCAGCGTTTCTGGTACC\n") - f.write("\n") - f.write(">gi|226792331|gb|GO546065.1|GO546065 Mdas9011C01_e819.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_191781.1| serine/threonine protein kinase, putative [Arabidopsis thaliana] pir|T48014 serine/threonine protein kinase-like protein - Arabidopsis thaliana emb|CAB71882.1| serine/threonine protein kinase-like protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTAAAACATGAAATACCACCTTGCATTATGAACAAATCAAATACACCGCAAATAATGAAGCAATATA\n") - f.write("GTACATCTCTTTCAAAAAGAGGAAAAAAAGAAGAAAACGAAGCATATAGAACATTGTGCAAACAATATAC\n") - f.write("CACCAGACCACCCTCAGAACTCTATCCCCTTTTGTTACAATAATTGCATGCTGAATTACTTATACATGCA\n") - f.write("CACATGCCACACCTTCAGAGGCAAAGGGAGAGACAGAGGGAGATTGAGAGATTCACAAGTTTGGTGATTC\n") - f.write("ACCAGCAGGTCCATGACGAGCATTTAACAGGGGTTGGAGGGCTTTAACAACAATGCTCATGTTTGGGCGG\n") - f.write("AAATCAGCCTCATATTGCACGCACAAGGCAGCAACTGCAGCCATCTTTGCAACCGCCTTTGGTGGGTACT\n") - f.write("CTCCTCCAAGTCTTGAATCAATACACTGCCTAACTTTGTCTTCACTAAGCTTTGGTGTAGCCCATGTTAC\n") - f.write("TAGACTCTGCTGTCCCCGTGGTAATGTATGATCAACAGGTTTTCGTCCAGTCAAAAGCTCAAGCAGGACA\n") - f.write("ACACCAAAACTGTAAACATCACTCTTGGCATTCAACTGTCCTGTCATTGCATATTCAGGGGCATGATAGC\n") - f.write("CAAAAGTTCCTAGCACACGAGTGGAGTGAAGACGTGCTGC\n") - f.write("\n") - f.write(">gi|226792330|gb|GO546064.1|GO546064 Mdas9011C14_e822.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAC32136.1| YGL010w-like protein [Picea mariana], mRNA sequence\n") - f.write("CTAATTTTCTCGGAAAATTCTCAAATGGGGAAGACCGGACTGTTTGATCTCGAAAAGCACTTTGCTTTCT\n") - f.write("ATGGAGCTTACCACAGCAACCCGGTCAACATAGCGATTCATATGCTCTTCGTCTGGCCTATCCTCTTCAC\n") - f.write("CGCTCTGCTCATCCTTTACTTCACGCCCTCGCTGCTCAGTTTTGGGGTTTCGGTGTTTGGTAACGACGTC\n") - f.write("GCTCTGCCTTTCAACGTCGGCCTGCTGTTGACTATAATCTATTCGGCGTTTTACATCTGTTTGGACGCAA\n") - f.write("AAGCTGGCTCCTTGGGTGCTCTGCTCTGCGTAATTTGCTGGGTTGGTAGCTGTTTCCTTGGAACCCTACT\n") - f.write("CGGATTTTCCATCTCTTGGAAGGTTGTTCTGGTGGCTCAGATAGTGTGTTGGACTGGACAATTTATCGGC\n") - f.write("CATGGAGTCTTTGAGAAAAGGGCACCAGCTTTGTTGGACAATCTTGCACAAGCCTTTATCATGGCTCCCT\n") - f.write("TCTTTGTTCTGTTAGAGGCTCTGCAGATCTTCTTCGGGTACGAACCGTACCCCGGGTTTCAAGCAATTGT\n") - f.write("TCAAGCAAAGATTGATGCTGAAATCAGTGAATGGCAAGAGAATAAGAAGAAACTGATCTCCTAATCTGTA\n") - f.write("ACTGGATTTACTTTTACTGTAATATCTGGGGTGTGGTTAAAAGCAATACCAAAGCTTTTATCTGTAAACA\n") - f.write("TGT\n") - f.write("\n") - f.write(">gi|226792329|gb|GO546063.1|GO546063 Mdas9010N01_e787.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_568147.1| coatomer delta subunit (delta-coat protein) (delta-COP) [Arabidopsis thaliana] gb|AAK96656.1| coatomer delta subunit (delta-coat protein) (delta-COP) [Arabidopsis thaliana] gb|AAM47945.1| coatomer delta subunit (delta-coat protein), mRNA sequence\n") - f.write("ACGAATAGGCATCCACTTTTATTATTAACATTCCTTGGGGGGGAAGGGGATAGTATCAAATAACATGAAA\n") - f.write("GAATGCACATCTATAGCATCCAAATTCTGGCAGGCTTCCCTTCATCTCAAATTTAATGGGAATTAAAACA\n") - f.write("CCACAAAATGAAAACTCCTGGTTTTCGAATTCAATCCATATCCAAAAAGGTTGCAGATCCACACACCCCA\n") - f.write("AACCAATCACACCACTTGGTAATTCTCCGTCACCAACTGCGTTCTTTGAGAAAACTTGGGAGCGGCTCCA\n") - f.write("CCTTGCAGAGGTATAATGTTCAAAACCTTCAGGTCACTGAATGTCTCAGTGGCAGCGAAGCGCACAGAAA\n") - f.write("TGGGGAAAAATACAGATGGGTCTGCTGGGGGAACAACAAACTCCATTGATCCACTGCGGTTCGAGTTATC\n") - f.write("TACGAGAAGTATGGACCACTCCAACATTGAATTCCTAGCATCGTATCTCCATTCACCTTCAATCTGGTTT\n") - f.write("ACACGTGGAGCCTCTCTCGCAGCTGGGAGAGGTACAGAGATAACAACATTTCTCAGATCAAACATCTCTG\n") - f.write("AAGCCTCATACTCAATGTTGACATAAGTTTCATTCCCGGAAACAGTGGGCCAACAGTTGATTGTCAGGGG\n") - f.write("CACCATTGACTCATCTGAAGTCTGCATTCTCCATCTCAAAAGACCAACAC\n") - f.write("\n") - f.write(">gi|226792328|gb|GO546062.1|GO546062 Mdas9010O18_e800.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P54260|GCST_SOLTU Aminomethyltransferase, mitochondrial precursor (Glycine cleavage system T protein) (GCVT) pir|S59948 aminomethyltransferase (EC 2.1.2.10) precursor - potato emb|CAA81081.1| T-protein [Solanum tuberosum], mRNA sequence\n") - f.write("TCGAGTTGCTTGAGTATTACCTCAGCGCCAAGAAAGCCGCCCTCTGCCCTTCTTCTCTTCCCTATGGCCC\n") - f.write("ATGTGAGTCCAGCCTCCACAGGTGTTGTGTGTTGCTCCATGTCATTGCCATATAAACACAACCCAGCTTC\n") - f.write("CAGTCGGAGACTGTCACGAGCACCCAGACCTGTCAGCCTCACCTTCCCCTCGGATTTCTCCAAGATTGCC\n") - f.write("TTGGCAAGATCCACTGCATTCTCATCAGGAACCGAGATTTCAAATCCACAAACAATGTAGCATACACGAC\n") - f.write("ATGAGTGATTACAGTTCGATAAAAAGAAGCAAGTCTACTTGAAGAAGTAAGGAAGGTTAACAACATAGAG\n") - f.write("GAGGAAGTAGGCCCAAGTGACACCCGAAATGCCTCCGAAGAAGAAGCCGCCGGTGAATTTAGACCATCCC\n") - f.write("TCCGCGGTTTGAAGCTGGTCGGGCTCCTTCTTCCTGCCAGTCAATGTCAAAGCGGGCGCGGAGGACGGCT\n") - f.write("CTCCTTCCTTGAAGGAGGAAATTCCATAGATTGTCAAGCACAGGGTTTAGGATGACCACAAGACCAGCAG\n") - f.write("CAGCCAAGGAGCCAGCTCCCCAGCATAAAGGGGTGTTCCTCAATGGGCCAGTCTTGACAAACGGGCCTAC\n") - f.write("CAGAAAGAAAACCGTGGGCCAGGCCCACCTCAAATACCCCTGAGAAGTGGGCTGAC\n") - f.write("\n") - f.write(">gi|226792327|gb|GO545808.1|GO545808 Mdas9004B24_e263.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAO85557.1| photosystem I subunit XI [Nicotiana attenuata], mRNA sequence\n") - f.write("TTGGAGACTCCAGTGACATCAAGCCCTTTGATTGCCTGGTACTTGTCCAACCTCCCAGCCTACAGGACAG\n") - f.write("CAGTCAGCCCACTTCTCAAGGGTATTGAGGTGGGCCTGGCCCACGGTTTCCTTCTGGTAGGCCCGTTTGT\n") - f.write("CAAGACTGGCCCATTGAGGAACACCCCCTATGCTGGGGGAGCTGGCTCCTTGGCTGCTGCTGGTCTTGTG\n") - f.write("GTCATCCTAACCCTGTGCTTGACAATCTATGGAATTTCCTCCTTCAAGGAAGGAGAGCCGTCCTCCGCGC\n") - f.write("CCGCTTTGACATTGACTGGCAGGAAGAAGGAGCCCGACCAGCTTCAAACCGCGGAGGGATGGTCTAAATT\n") - f.write("CACCGGCGGCTTCTTCTTCGGAGGCATTTCGGGTGTCACTTGTGCCTACTTCCTCCTCTATGTTGTTAAC\n") - f.write("CTTCCTTACTTCTTCAAGTAGACTTGCTTCTTTTTATCGAACTGTAATCACTCATGTCGTGTATGCTACA\n") - f.write("TTGTTTGTGCCTCGCGCCGAA\n") - f.write("\n") - f.write(">gi|226792326|gb|GO545807.1|GO545807 Mdas9003M08_e227.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|T07086 acid phosphatase (EC 3.1.3.-) - soybean emb|CAA11075.1| acid phosphatase [Glycine max], mRNA sequence\n") - f.write("AGGAGTATTACTCTTCCTTCTTGCCATATTTTTCTCAACAACTCAAGCTTATGAGCCACTTATAAACCAC\n") - f.write("CATATCCATCTCCTCAGGCCGAAATCCGGGGCGGGTGGCAGCAGCGTTCCCGGCGTGTCTTGCCTGAGTT\n") - f.write("GGCGTTTGGGGGTGGAAGTTAGGAACATTATCAACTGGAAGACTGTTCCGGCACAGTGTGAAAGCTACGT\n") - f.write("TGGGCACTACATGCTTGGGCACCAGTACAGGAAGGACTCAAAAGCGGTCACTGATGTGGCTTGGCTCTAT\n") - f.write("GCTAAGAGCCTCAATCTTACAAAGGATGGCAAGAACGTTTGGGTCTTTGATATCGATGAAACTACACTCT\n") - f.write("CTAATCTACCGTATTATGCTCGTCATGGATTCGGGACCGAGGTATACAATTCCACTTCATTCAATGAATG\n") - f.write("GGTGTTGGAAGGCACAGCCCCAGCTTTGCCCGAGAGTCTTCAGCTGTACAAGAAATTGTTGAAACTTGGA\n") - f.write("GTCAAGGTCGTGTTCATAACAGGTAGAGGAGAAGATCAGAGAAGTGTCACAATCGCCAATCTCAAAAATG\n") - f.write("TCGGATATCACACCTGGGAGAAGCTTGTCTTAAAGGGATCAGCTTATTCTGGGAAGACATCGTACGAGTA\n") - f.write("\n") - f.write(">gi|226792325|gb|GO545806.1|GO545806 Mdas9004D04_e268.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P50694|TLP_PRUAV Thaumatin-like protein precursor gb|AAB38064.1| thaumatin-like protein precursor, mRNA sequence\n") - f.write("GGGAATGCAAGAGTATTATTCTTTATTTATTTCATTGAAATTATTACAAATTCATGACAATACTAAGCGT\n") - f.write("CAAAATTCATAAAGTTTATTTCTCCTTTAGTATGATATAAAGGGATATTACATAATATCCTTTCTAGTTC\n") - f.write("TATTTTTAGGGTGATGGACAGAATGTAATAACGTAGTTTGGTCCACCGCTGCATGTAAAGGTGCTGTTGA\n") - f.write("CATCATCATAAGCGTAGCTATAAGCCTGAGGGCACTGGTCCTCAAAGATCTTAGAGTAGTCTGTGGGTGG\n") - f.write("ACATGTAGCTGGTGTGTCATTAGGAGGAGTGCAGCAGTATTGTGGTTGATTAAATTGTGTACACGCACTC\n") - f.write("TTGCATGCAATTACGCTCCCATCGGACCCCTTCACTTGCAGCTCAGCTGGGCAAACAGCGTTGACGTTGG\n") - f.write("CGGGGCAAGAAGAGGTCTGGCAGTCGCCCGTGCCGCCTTGTGGGGCAACAGACACGGGCAAGTTGAAGCC\n") - f.write("ATCAACAAGACTTACGTCGTAGAAATCTTGGCCTCCGTTTGCGACAATATTTATTTCGACTAGGCTTGCT\n") - f.write("GGTGGAACTGCTCCGTTACCGTTGCATGTGACTTGGCCAGAGCTACACTCTGCTGTGGCACAACTGAACT\n") - f.write("TTCCTGAAGCGTCAGTGGAGCATCCGGTTCGGCCCCAGAAGCGGCCTTTCCATGGGACTGGAGTG\n") - f.write("\n") - f.write(">gi|226792324|gb|GO545805.1|GO545805 Mdas9004F09_e274.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_179752.1| 40S ribosomal protein S25 (RPS25B) [Arabidopsis thaliana] sp|Q9SIK2|R25A_ARATH 40S ribosomal protein S25-1 pir|H84602 40S ribosomal protein S25 [imported] - Arabidopsis thaliana gb|AAD23647.1| 40S ribosomal protein S25 [Arabidops, mRNA sequence\n") - f.write("TAAAATTGTAGATTAAACAAAATCGACGCTAAATAGTCTGAAACGACATATAACGATACCCGAAACGACA\n") - f.write("ATTAAACACGAAAGAGCAAAGAGACCTAGGTGTTGGTAGCCCTGGTGTAGATCTGCTGGCTGGCGTGGGC\n") - f.write("AGAGACCATCCTAATCGAGCCTCTAGCCATCAACTCCCTGATGGCCTTCCTGGCCAGGGAACCGTTGATC\n") - f.write("CTGAGACGGTCGGAGAGAATGGATGGTGTGATGAGCTTGTACTTGGGGGCCTCAGTGAGGAGCTTGTCGT\n") - f.write("AGGTGCCCTGGTCGAAGAGCACCATGTTGTTCACCTTCTCCTTCTGCTTTCCCTTGCTCCACTTCTTCTT\n") - f.write("CTTCTGCTTGCCTCCTCCGGACTTCGCCGGCTTCGACGACGGCGGCGGTGCCTTGTCCTTCTTCGGCGCC\n") - f.write("ATGATCGGATTACAGCGACAGGGAAGGGAGGAGAGAGGAGGAGAGAGGGAGGGATCGGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792323|gb|GO545804.1|GO545804 Mdas9003O06_e237.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAO85557.1| photosystem I subunit XI [Nicotiana attenuata], mRNA sequence\n") - f.write("CAACTGTGTCTGTGTATCTTCTGCAGTTGGTGCAAAAGTGATGGCCTCAGCTTCTCCAATGGCCAGCCAG\n") - f.write("CTTAAATCCAACTTCACCTCTCCGATCACCACCAGACCTGCCTTGTTTTCTCCCAAAGGCCTCTCTGCCT\n") - f.write("CCCCACTCAGGCTCTTCCCTTCCAAGAGACTCTCTTCCTTCTCCATCAAGGCTGTCCAATCTGACAAGCA\n") - f.write("AAATTTCCAAGTGATTCAACCCATCAACGGAGATCCTTTCATTGGAAGCTTGGAGACTCCAGTGACATCA\n") - f.write("AGCCCTTTGATTGCCTGGTACTTGTCCAACCTCCCAGCCTACAGGACAGCGGTCAGCCCACTTCTCAGGG\n") - f.write("GTATTGAGGTGGGCCTGGCCCACGGTTTCCTTCTGGTAGGCCCGTTTGTCAAGACTGGCCCATTGAGGAA\n") - f.write("CACCCCCTATGCTGGGGGAGCTGGCTCCTTGGCTGCTGCTGGTCTTGTGGTCATCCTAACCTTGTGCTTG\n") - f.write("ACAATCTATGGAATTTCCTCCTTCAAGGAAGGAGAGCCGTCCTCCGCGCCTGCTTTGACATTAACTGGTC\n") - f.write("GGAAGAAGGAGC\n") - f.write("\n") - f.write(">gi|226792322|gb|GO545803.1|GO545803 Mdas9004J23_e295.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P27608|AROF_TOBAC Phospho-2-dehydro-3-deoxyheptonate aldolase 1, chloroplast precursor (Phospho-2-keto-3-deoxyheptonate aldolase 1) (DAHP synthetase 1) (3-deoxy-D-arabino-heptulosonate 7-phosphate synthase 1) gb|AAA34068.1| 3-deoxy-D-arabino-hept, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTGTCAAAATCATACGCATTATTCAATTTATTTTTGCTTCCCTTCAA\n") - f.write("AGTTCAAAGTCTCCAACCATCACTTCCAGTATATACGTACAACACACAACATAAATTATCACTAAACACC\n") - f.write("AGTGACGTATTAACACAGTGCAGTTTTGAAAAGATGAATAAAATAACTTCCAGGGTTCTTAACCCTTGAG\n") - f.write("ACCATTCTACAGACCTAAAGCGGCAAGCGGGTTCTGTGACTTGATCCTTCTTTTCCTGAGGCGCTCGGCA\n") - f.write("ATGATAAAGGCGAGCTCAAGAGATTGTGAGGCGTTGAGCCTTGGGTCGCAGTGAGTGTGGTAGCGTGAGC\n") - f.write("TAAGGTCGTCGAATGTCACAGTGCGTGAACCTCCAATGCATTCCGTCACATTCTGGCCTGTCATCTCTAG\n") - f.write("ATGAACACCACCTGGGTGGCTTCCTTCTTGCTCGTGTACATCAAAGAACGCTCTCACCTCCGCCCTTATG\n") - f.write("GCATCAAAAGGACGTGTTTTGAGGCCGCAGGGAGCCTTAATGGTGTTTCCATGCATTGGGTCACTGACCC\n") - f.write("ATGTGACAATCTGTCCAGCCCTGCGG\n") - f.write("\n") - f.write(">gi|226792314|gb|GO546061.1|GO546061 Mdas9010N21_e794.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P36491|PSBA_POPDE Photosystem Q(B) protein (32 kDa thylakoid membrane protein) (Photosystem II protein D1) pir|S42492 photosystem II protein D1 precursor - cottonwood emb|CAA55040.1| D1 protein [Populus deltoides], mRNA sequence\n") - f.write("GAGAATTCGTGTGCTTGGGAGTCCCTGATGATTAATTATTAAATAAACCAAGATTTTACCATGACTGCAA\n") - f.write("TTTTAGAGAGACGCGAAAGCGAAAGCCTATGGGGTCGCTTTTGTAACTGGATAACCAGCACTGAAAACCG\n") - f.write("TCTTTACATTGGATGGTTTGGTGTTTTGATGATCCCTACTTTATTGACCGCAACTTCTGTATTTATTATT\n") - f.write("GCTTTCATTGCTGCACCTCCGGTAGATATTGATGGTATTCGTGAACCTGTTTCTGGATCTTTACTTTATG\n") - f.write("GAAACAATATTATTTCTGGTGCCATTATTCCTACTTCTGCAGCTATAGGTTTGCACTTTTACCCTATATG\n") - f.write("GGAAGCGGCTTCCGTTGATGAATGGTTATACAATGGTGGTCCTTATGAGCTAATTGTTCTACACTTCTTA\n") - f.write("CTTGGTGTAGCTTGCTACATGGGTCGTGAGTGGGAACTTAGTTTCCGTCTGGGTATGCGCC\n") - f.write("\n") - f.write(">gi|226792313|gb|GO546060.1|GO546060 Mdas9010O06_e796.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAN62336.1|AF506028_3 CTV.2 [Poncirus trifoliata], mRNA sequence\n") - f.write("TGCAAATCTCAGACTGCGGTGTCGTATTAATCCTTCTGCTTATCTTCCTGCTAATGTTAGCCCTCCGCTT\n") - f.write("CCCCTTTGCAACAGTTCTAACGTACAGCCCCTTGTTATTGCGGCGCATCCACAAGAAGCAAATCAGTTTG\n") - f.write("CATTAGGACTATCGGATGGTGCTGTTCACGTCTTTGAGCCCCTTGAATCTGAAGGCAAATGGGGTGTGGC\n") - f.write("TCCACCTGTTGAAAATGGTTCAGCAAGCAGTGCGCCGGCTACTCAAGTTGGAACTTCAGGTTCAGATCAA\n") - f.write("GCTCAGAGATGATCAAGAGGCTGGCAGAGCACAAGTTCTTTTACATGCTTTGCTTGCTGGCCTATTCCAA\n") - f.write("ATACAGTTTGTGTTCTTCATGCAGTGTAATTTAATCAAGGAAGGATGTGCAGATATGTATATTTAACAAG\n") - f.write("TTGGTTCAGAATCTCAAGTAAGGTA\n") - f.write("\n") - f.write(">gi|226792312|gb|GO546059.1|GO546059 Mdas9010C05_e728.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTGAAACTTTGAAAGCATATATATTTATGAATGTTTAATGGTACATGCTCATACACCTCACATTTAACA\n") - f.write("TTTGAAAATAATTACAAACTAAGCAAAACAAGTAATTAGTTTTTGCATCAAAAAGTGTCAAGTATGTTTG\n") - f.write("ATGCATTGTGTAAGATAAAGCCGAACACAGCTGACAAATATGTCAATATAGCTCAGCTTTATCCACACAT\n") - f.write("TATTTTAAGAAATACTATGGTAAAAACCATAAGCATCTGAAAGAGTAATGGCAGTGGACTCGCTCCGCCG\n") - f.write("GCTTGTGTCCTTCACTTTGTTGAAGTTTTTCGAGCCATTGCCGCCTTCAGTTTCTCCTCCCTTCTCACCC\n") - f.write("CCGCGCTGTTGCTTGAGAAAGATTTTGGAATCCAGTCCTGAGATTAAAAGAATGGATGACTGGAATATGC\n") - f.write("TCAACCGCCTGACGATCTTCACCATCCCTTTGAAGCTTGGCAATGAGTACAGTGGGCATGTCATGGAGAA\n") - f.write("ACAGTTCCTCGAGTTCCCGTAGACCCTCCATGCCGAGTGGAGGTAAATTCAAACTCTTACAATTGGACAG\n") - f.write("GGTTACCTTCTTAAGATTANGCATTGCTCCAGACTCAACTTTCATGGTATGTAGTTGATCAAATTGTTTG\n") - f.write("ATACTTAATATCTTTAGCTTCATGAACGTTTTAGCCTCAGATTCTAACACTTGTCCGGTATAATAGCAAA\n") - f.write("CCAAATGAAGCTCCATCATATTCGGCAATGCTT\n") - f.write("\n") - f.write(">gi|226792311|gb|GO546058.1|GO546058 Mdas9010C19_e731.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P93111|HMA1_CUCSA Glutamyl-tRNA reductase 1, chloroplast precursor (GluTR) pir|T10186 glutamyl-tRNA reductase (EC 1.2.1.-) - cucumber dbj|BAA08910.1| glutamyl-tRNA reductase [Cucumis sativus], mRNA sequence\n") - f.write("TTTAATTACTTTTTTTGAGAGTATTACTGATATCTCATATTTTCCATATGTTGCCGAACAGCCACTTACG\n") - f.write("AACATGTGGTGGTAGACATATAAATCACTCCCACAGAAAAATCATGTTACTGTGTCTTTCACCAGTGAAT\n") - f.write("GAATTGCAAGATCCCCCAGCCGGGACCACCACAATTTGTCTATCGAGTTGAAGATTACACCAGACTGAAC\n") - f.write("ACCAAAATGACAATGTGAGGATGTTTTCCCCTTATTCGAGTATGAGGAACTCCAGGTGTCATGAGAGATA\n") - f.write("TTTGACATTGTACCTTTAGTTCTGGGTTTGTTCCACCTTACCTCGAATTTTCTGTTCCCATACGGATCTC\n") - f.write("TCCGTCTCGAGGCTGAACATTCTGTTAATAGCGTGCATGTTCTCAATGGTCTCACTCAGAGTTCGACTAT\n") - f.write("CTCTCCCATCACATCTTAGATGATG\n") - f.write("\n") - f.write(">gi|226792310|gb|GO546057.1|GO546057 Mdas9010I08_e758.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTCATTTCGAATGGAACGTTTAATATAAATTGCTTGTTTACCAAAAA\n") - f.write("AGACATCAAGGATTTGATCCTTGTATTAGTGAATATACAAGAAAAATCCTACGAGTAACCAGGGAGGGCG\n") - f.write("TTAAAGCGCTGTACAACCGGTAAAGATTAGCAACGAGGCAGTTAGGAGTTGAGAGGGCGGCCTACTTAAT\n") - f.write("GTTTTAGCCGCTGGCGTTTATCCTGATGCCTGGAAAATGTTGGACTGCAAATTAAGTCAGGAGAAACAGA\n") - f.write("AAATTGATTTGGGCAACATGTCCATAAAGAATCTCCCCTAATTGCACTGGCACTACAACCGACTCAATAA\n") - f.write("TTCAAGAAACTCCACTGGTTCTTCATTTTCTTTAACAATCTTAATGATTGTGTCCTTTGGCAGACCCATT\n") - f.write("TGAACAAGAACATGAAAACCAACTTGATCCCAGTCACTCTCTTCTACATCAGTTCTCTCTCTTCCACTAT\n") - f.write("CTAGTTGAATGGAAAACTTATCACTGTTGAAAAATCTCCCCACCCAGAGGTACAAAACCCGGTCTTTACT\n") - f.write("TTTTCCAAAATCTGTATTTGGTGAGAAAATAGTGTAAGCAGCTTTAGAATCTAGTTCCCTCACACCAAAT\n") - f.write("GTCGCAATCTTCTCTAAACTGGGCCAACGGCACACAAAAGGTTGGATAAAATTGCATTGCGCTGGACAAC\n") - f.write("TT\n") - f.write("\n") - f.write(">gi|226792309|gb|GO546056.1|GO546056 Mdas9010P01_e803.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_177574.1| dolichyl-phosphate mannosyltransferase polypeptide 2 -related [Arabidopsis thaliana] pir|A96772 hypothetical protein F1M20.2 [imported] - Arabidopsis thaliana gb|AAG52357.1|AC011765_9 putative dolichyl-phosphate mannosyltransferas, mRNA sequence\n") - f.write("TTTTTTTAACAAGGTAATCAGTATCTGATCATTTCAGACACACTTGGATTCGGCTCCAAGGAAATGAAAA\n") - f.write("GAAACTAAAAGTGGGGGAAATACTGTCATCAAGCTTTCTTCTTTTTGGATTTTAACATGACAAATCCAAT\n") - f.write("GAAAATGCATAAAAAGCAGAGAAGCACCATGCCAGCAAACACAGGTATTAGTATGGCGTACTCTTGTGGG\n") - f.write("AGGAAATACCGGTGGATGAAGTGATCGTTGTTTACAAACGGCAGGATAATCACCCAAAATGTGTAATAAG\n") - f.write("TGAATATTGACAAACTGATGTAGGATAACAGAAATCCAACGGCCTTGTCCGCCAATTCCATTGTCGTGAT\n") - f.write("GTTTGTGGCTTCTGGTACCTGTAAATGGGGGAGAGGGACGATATGGCTACTGAGATGGTCTGCCTCGTGC\n") - f.write("CGAA\n") - f.write("\n") - f.write(">gi|226792308|gb|GO545802.1|GO545802 Mdas9004F21_e276.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_175036.1| histidine decarboxylase -related [Arabidopsis thaliana] pir|E96500 probable histidine decarboxylase [imported] - Arabidopsis thaliana gb|AAF63121.1|AC009526_6 Putative histidine decarboxylase [Arabidopsis thaliana] gb|AAK25943.1|, mRNA sequence\n") - f.write("TGCAGCTCGGCCACCTCTCGACCGTTCGATCTTTCAACATTACAAATTTCATGGATCACGAGAACGGAGC\n") - f.write("TGTGGCGGTGGAGATGCTATCCGATGATTTCGATCCCACGGCTGTGGTTGCGGAGCCTTTGCCTCCGGTT\n") - f.write("GTAACCAGCACCGACGACTGCGATCTCCTCGGAAAATTGGCCGAAGACCGCAAAGGTAGCCGGGAGAAAC\n") - f.write("AGATGGTTCTCGGCCGGAACGTGCACACCACGTGCCTCGCCGTGACGGAGCCCGAATCCAACGACGATTT\n") - f.write("CACCGGCGATAAGGAGGCGTACATGGCCAGCGTCCTCGCCCGCTACCGCAAAACCCTCATCGAGAGGACC\n") - f.write("AAGCACCATCTGGGTTATCCTTACAATTTGGACTTCGACTATGGTGCTCTGACGCAGCTTCAGCATTTCT\n") - f.write("CCATCAACAATTTGGGCGATCCGTTTATCGAGAGCAACTACGGTGTCCATTCAAGGCAGTTCGAGGGTCG\n") - f.write("GTGTTCTCGATTGGTTTGCTCGGTTGTGGGAGATTGAGA\n") - f.write("\n") - f.write(">gi|226792307|gb|GO545801.1|GO545801 Mdas9003I20_e214.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAN03474.1| syntaxin [Glycine max], mRNA sequence\n") - f.write("GGCGAGGATTTTTCTCATTGGCAAGCGGGACCCCACCCTAGCTGTGGCCTCCGGCATTTTCCAGATCGGC\n") - f.write("ACCGCCGTATCCACCTTCCAGAGGCTCGTCAACACCGTCGGCTCCCCCAAAGACACGCCGGATCTCCGCG\n") - f.write("AATAGCTGCACAAGACAAGACTACAGTATCGGGCAATTGGTAGAAGGATACTTCATAAAAACTGACTCTA\n") - f.write("GTCAGTGAAAGAGATCATCAGACCGAAGTTAATGCAAGCCAGAAGATTACAGATGCTAAACTTGCAAAAG\n") - f.write("ATTTTCAAGCGGTGCTTAGAGAATTTCAGAAAGCTCAACGACTTGCAGCTGAGAGGGAAACAACATATGC\n") - f.write("TCCTTTTGTTCCTCACGCAGTTCTTCCGACAAGTTACACCGCTAGCGAGATAGATGTAAACTCAGACAAA\n") - f.write("AGTCTGGAACAGCGTGCTCTCCTTGTGGAATCTAGAAGACAAGAGGTCTTGCTACTGGACAATAGATTGC\n") - f.write("TTTCAATGAGGCTATCATCGAGGAAAGAGAACAGGGAATACAAGAGATCCAGCAGCAAATTGGTGAAGTG\n") - f.write("AATGAGATTTTTAAAGATCTTGCTATACTGGTTCATGAACAACGAACAATGATTGATGACATTGGCTCCA\n") - f.write("ATATTGAGAGTGCTCATGCTGCTACTGGGCAGGCAAA\n") - f.write("\n") - f.write(">gi|226792306|gb|GO545800.1|GO545800 Mdas9004H09_e283.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P49313|ROC1_NICPL 30 kDa ribonucleoprotein, chloroplast precursor (CP-RBP30) pir|S26203 RNA-binding protein 30 - curled-leaved tobacco emb|CAA46234.1| RNA binding protein 30 [Nicotiana plumbaginifolia], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTAATGATTTTGATGTACCAAATCTATAAACTGTCAAAACAAGTTGATC\n") - f.write("ACAATCAGTTCCTTAACTGGGCCAAGTCACTCGGAATATAAATCTTACAACTCTCAAAACAGTCTTCTCT\n") - f.write("GCTCTTGTCCATCTACAGGCAGATGTAAAAAAACCACCACTACCGCTCACAAAGCCAACCATGGCGCAAG\n") - f.write("GCAGCATGATCTTGTCACTAACCGCTCCTAAAAATCTGAAATGAGTATCAGCAGATATTATGATCCTAAA\n") - f.write("TGCAAACGGAAAGAAAAGAAACCCATCAAGTATTCTGTTGGCCAATGACTCTGCAAAGTTTACATTTAAA\n") - f.write("ACTGACGCCTCGGCCTAGGTTCTGCCGCAGACACTCGGATAGATCTTCCATTTAAGTCAACTCCATCCAA\n") - f.write("TGATTCAATGGCACTGTTCATTTCATCAGCAGTATCATAAGTTACAAAACCGAAACCCCTCGATCTGCCA\n") - f.write("CTGTCCCTGTCAAAAACTACCTTGGCTTCCAAAACCTTTCCTTGCTCACTAAACAAGTTCTCAAGAGCCA\n") - f.write("AATTGTCAACACCCCATGCAAGGTTACCCACGTAAAGGCGGTTGTTAGAGTCATAACCACCACCACCTCT\n") - f.write("GGGACCTCTGGCACCTCTGAAAGAGGAAT\n") - f.write("\n") - f.write(">gi|226792305|gb|GO545799.1|GO545799 Mdas9004D05_e269.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("CTACCCGCCGCTCGGTGGGAATTACGATTCAAGGCGGCCACCGCGGCTCGTCCGCCGCGAGGGCCAGACC\n") - f.write("ATCGACATGAGCCTTTGGGGGCCGGGGCCCCTACTGCAGGTCGGCAATCGGGCGGCGGGCGCAGGCGTCG\n") - f.write("CTTCTAGCCCGGATTCTGACTTAGAGGCGTTCAGTCATAATCCAGCGCACGGTAGCTTCGCGCCACTGGC\n") - f.write("TTTTCAACCAAGCGCGATGACCAATTGTGCGAATCAACGGTTCCTCTCGTACTAGGTTGAATTACTATTG\n") - f.write("CGACGCTGTCATCAGTAGGGTAAAACTAACCTGTCTCACGACGGTCTAAACCCAGCTCACGTTCCCTATT\n") - f.write("GGTGGGTGAACAATCCAACACTTGGTGAATTCTGCTTCACAATGATAGGAAGAGCCGACATCGAAGGATC\n") - f.write("AAAAAGCAACGTCGCTATGAACGCTTGGCTGCCACAAGCCAGTTATCCCTGTGGTAACTTTTCTGACACC\n") - f.write("TCTAGCTTCAAATTCCGAAGGTCTAAAGGATCGATAGGCCACGCTTTCACGGTTCGTATTCGTACTGGAA\n") - f.write("ATCAGAATCAAACGAGCTTTTACCCTTTTGTTCCACACGAGATTTCTGTTCTCGTTGAGCTCATCTTANG\n") - f.write("ACACCTGCGTTATCTTTTAACAGATGTGCCGCCCCAGCCAAACTCCCCACCTGACA\n") - f.write("\n") - f.write(">gi|226792304|gb|GO545798.1|GO545798 Mdas9004G02_e278.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTTAGAAAACGATATAGACAATATAAAATTTCAAGATACACACGATGTGGAAGTACATATAGAAA\n") - f.write("CGATGAACTAGCGCATAATTTGTACTAGCTAGCTCGATCCCTCTTCCCTTAGCAAACAGCTTTATAAAGC\n") - f.write("TAGCAGTGGAAGAGCTAGCTAGATTAATACTATAACAACAAAAGGACAACTAGCTAGGGCATTTGTTTCC\n") - f.write("CATACATATATGTGTTCAAGCATGACATTTGACGGGCTAACCAGTCCCATAATAACGCTTATTATTGATC\n") - f.write("GGTTTACCGGTAGAAGTACGAGTACCATATACTTCTGTGATGGTTATCCCACCAAAGATTCTCGCTGCCT\n") - f.write("CATCAGAATTGATGAGCTTGTTATAATTATTAGGCCTCTGAAGGTTCTGTCGAGGTCCATGATTCTCCAT\n") - f.write("GTTGGCTGCAGTCGCCAGTCGTGGGTATCCTTGGTTATCATGATCACCGGTTGAAAGAGGTGTATTAGGA\n") - f.write("TGAAGAGAAGAGACTTGACTCGCCTGTCGTGGGTATCCTTGGTAATCATGATCACCGGTTGGAAAAGTTG\n") - f.write("TATCAGGATGAAGAGAAGAAATTTGACTCCCCATGTTAGCGAGAAATTCCAAATAATGAACTAGGACAAG\n") - f.write("TTCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792303|gb|GO545797.1|GO545797 Mdas9004B01_e254.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAD13683.1| major allergen Mal d 1 [Malus x domestica], mRNA sequence\n") - f.write("TTTTTTTTTTGCAAGTGAGACATTAACTGATTTCATTATATTTATAGATAATACACTTACAGTCTGCAAC\n") - f.write("TTTTTTTTCAAACAAATACATAAAGGGCAACCAGAAGATGATCACAACCATAAACTTATTTTTCAAAGCC\n") - f.write("AAAGTGCTCAAAACCATGGCTACTTTGGGCTGACTAGAAAAAGCATACAAAGAAAAGCCACACAACCTTC\n") - f.write("GACTGAAGGGTGACACCCAAAATACCATAGAAACATATTAATTTAGTTGTATGCGTCGGGGTGGCCCTTG\n") - f.write("AGGTAGCTCTCAATAAGCTTGAACAAACCATGAGCCTTCTCTTTGCCAGCCTTGACGTGCTCTTCCTTGA\n") - f.write("TCTCAACATCACCCTTGGTGTGGTAGTGGCTGATACTCTTGATGATGGAACCACTTCCAGATGCCACCAA\n") - f.write("CTTGGTCTCGTAAGAGACCTTCTCAATGGTGTCTGTCAAAGCATCTCCTTCAATCAAAGTGTAGGCGTAT\n") - f.write("GAGTAGTTTGCTTCGTCAACCGAGTCGATCTTGTGCTTCACGTAGCCGTATTGGCTGCCTTCACCAAAAG\n") - f.write("TGATCTTCTTGATGGTTCCAGGGCCACCGTCTCCTTCAAGGATCTCAGCATGCTTGATTGCCTGGGGTGC\n") - f.write("AATCTTGGGGATGAGGTTATCAG\n") - f.write("\n") - f.write(">gi|226792302|gb|GO545796.1|GO545796 Mdas9004D23_e271.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAF73132.1|AF149017_1 homogentisate 1,2-dioxygenase [Lycopersicon esculentum], mRNA sequence\n") - f.write("TTTTTTTTGAAGGCCTATACCACTAGCACTGGTGTGGTGATTATTATTATTTTGCAAATTTAAGTATATA\n") - f.write("ATAGGCATCTGTTTACATGTAAAGAAACTTTGGGGGCCCATACGAATCCGGGGCACCTTTTGCACACCCT\n") - f.write("CAAGGCCGGCTCAGGTGCTTTAATACAGAAGTGGATACAAATAAGCATATTTGGTACACTTAACACTTTT\n") - f.write("TATAGTACTATGTACTCAGTAGGATTGTATTTGCTTAGTCTTTGTATGAACTATAGTTTCCATTCGTCTA\n") - f.write("AAGCAGCCACGGCAGTCTCCACTTTAATCCTCTTTCTTCCCATTCTGGACATCTCCGTCTTCAGCATTCA\n") - f.write("TCCTTCCACGCGTAAAATGTGATTTCAGTCCAGTCCAGCACTGGTAATAATCACGATCGATGACTGGAGA\n") - f.write("CTCGAGAGCCCATGTGCATACTCTGGGGATTAAACACGATTCAAACATAAAAGCCAAAGTATCGGTGATT\n") - f.write("CTGAATGGTCCTGCCTCATTTCCACGCGCAATGGTAGCCTCATATGTTTTGGTATCAGGACCATGGGGAG\n") - f.write("TCATGCAGCTATGAAGGCTTGCACCACCAG\n") - f.write("\n") - f.write(">gi|226792293|gb|GO546055.1|GO546055 Mdas9010K09_e770.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("GGGGGCCTTCCCCGGGCAGCGAACGTTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAAT\n") - f.write("TAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGT\n") - f.write("CAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAA\n") - f.write("TGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCC\n") - f.write("AGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACT\n") - f.write("CTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGTATAAGTGGGAGCCTCGCGGCGAAATTGAAATAC\n") - f.write("CACTACTTTTAACGTTATTTTACTTATTCCGTGAATCGGAGGCGGGGCATCGCCCCTCTTTTTGGAACCA\n") - f.write("AGGCCCGCTCGCGGGCCGATCCGGGCGGAAGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCT\n") - f.write("GTTAAAAGATAACGCAGGTGTCCTAAAATGAGCTCAACGAGAACAGAAATC\n") - f.write("\n") - f.write(">gi|226792292|gb|GO546054.1|GO546054 Mdas9010G23_e750.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAB90634.1| protein phosphatase 2C (PP2C) [Fagus sylvatica], mRNA sequence\n") - f.write("TCACTTGTCACAAGAGCACAATGCCTGTATAGAATCTGTGAGACAGGAGCTGCAGTCTTTGCACCCTGAT\n") - f.write("GATCCACATATCGTTGTTTTGAAACATAATGTATGGCGTGTGAAAGGCCTAATACAGATATCTAGATCTA\n") - f.write("TTGGTGATGTGTATTTGAAAAAGGCTGAATTTAACAGGGAGCCTTTGTATTCCAAGTTTCCTCTTCCGGA\n") - f.write("CCCTATCCAAAGACCAATTTTGAACGCTGATCCAGCAATTTCAGTGATCCCATTGCAGCCACACCATCAA\n") - f.write("TTTATTATATTTGCATCTGATGGGCTATGGGAACACCTCACCAATCAGGAAGCAGTGGACATAGTTCAAA\n") - f.write("AACATCCACGAAGTGGAAGTGCAAAAAGGCTGGTGAAAACCGCCCTGCAAGAAGCAGCAAAAAAGAGAGA\n") - f.write("AATGAGGTATTCGGACTTGAAGAAGATTGACCGCGGGGTTCGCCGCCATTTCCATGATGACATAACCGTG\n") - f.write("ATTGTTGTGTTTATTGACTCAAACCTTGTGAGCAAGGCTAGTTCTGTCAAGGGCCCAAATCTATCGGTGA\n") - f.write("GAGGAGGTGGAATCAACCTGCCTCCTAACACTCTGGCTCCATACGCCTCGGCCACGGAAGCTGGCAATAC\n") - f.write("CTGATTAAACTGTATCTCTCATGTGTGTTCTGTTGCAC\n") - f.write("\n") - f.write(">gi|226792291|gb|GO546053.1|GO546053 Mdas9010J19_e765.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAO63778.1| unknown [Populus tremuloides], mRNA sequence\n") - f.write("TTTTTTTTTATTAATGAAATTGCTAAGACAGCTGATTTTCATTCACTGATTATTCGAGTAAATACTATGT\n") - f.write("AAAAGAAATACAAAGTAATACATCAAAAATACCTCAAAGACTTCTGGATCATGGAAAATTCAAGACCAAC\n") - f.write("CACAATATTTTAACCAAAATTTTATCCCAATTTCAACCACTGTACATATGATTTCTCTCCTTGAAAAAAC\n") - f.write("ACCGTATCAACTGAATTTTGCCAACCATTTAGCAAATGACACAAGCATTGGGATCCTCTTCTGAACTTCT\n") - f.write("TACGTAATAATACGCAGGCATTGGAGGATAAAGTGCTGGGTAGGCCTTCATAAGCTCAAGCATTTCGTCC\n") - f.write("TTTGGATCTTTCTTCTTCGGCTCCTCCTTCTTCGGCTCCTCTTTCTTCTTCTCCTCCTTTGCCGGTCCGA\n") - f.write("CTGAGACTATCTCCGTCCGGCAAAGCTTCCTCAATCTTCCCACCAAATGCACGGGATCTATGTCCCCTGT\n") - f.write("GGCCGTCAACTTCTTGTCCTTCATATCCATAGAAATTGAATCAAGGCCTTCAAGCCCCGAGACGGCCCTC\n") - f.write("ATGACTTTCTTCTTACACTTCTCGACGTATAATTCCAATTTCAACACTACTTTCTTCATTTTTCTGTGCA\n") - f.write("GCTTG\n") - f.write("\n") - f.write(">gi|226792290|gb|GO546052.1|GO546052 Mdas9010M15_e783.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAC55114.1| S-adenosylmethionine decarboxylase [Malus x domestica], mRNA sequence\n") - f.write("TGACGGCCATTTTGGCAAGCTTGGTTTGGCAAGCAAGGCATATGTTATGGGAAGCCCTGACAAAACTCAG\n") - f.write("AAATGGCATATTTACTCTGCATCGGCGGAGTTGGCAAGCTTATTTTGGGGTTCACGCCAATCAGGCCCTA\n") - f.write("CATACACTCTGGAGATGTGCATGACTGGTTTAGACAGGAAGAGGGCTTCTGTCTTTTACAAATCCGACGC\n") - f.write("AAGTTCAGCTGCTGGTATGACCGTAGAATCCGGCATCAGGAACATCCTCCCGCAGTCCGATATATGTGA\n") - f.write("\n") - f.write(">gi|226792289|gb|GO546051.1|GO546051 Mdas9011B22_e817.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAA24493.1| chlorophyll a/b-binding protein [Fagus crenata], mRNA sequence\n") - f.write("AAAATTGTCCATTGATGCAGTTTTTAAAGCTTGAGCTTACATACAAGCAGGTAGCTTAAAGATCATCTAT\n") - f.write("TGTACACACTGATTAGGTTTCCTCACTTCCCGGGCACGAAGTTGGTGGCATAGGCCCAGGCATTGTTGGT\n") - f.write("AACAGGATCAGCCAGATGATCGGCAAGGTTCTCGATAGGGCCCTTTCCAGTGACAATGGCCTGCACAAAG\n") - f.write("AAGCCGAACATTGAGAACATGGCAAGTCTTCCATTCTTGAGTTCTTTGACCTTAAGCTCTGCAAATGCCT\n") - f.write("CTGGGTCATCTGCAAGTCCCAAAGGATCAAAGCTGCCTCCGGGGTACAGTGGGTCAGTGATCTCACCAAG\n") - f.write("GGGTCCTCCGGCGATGCGGTAACCCTCAACCGCACCCATGAGGACCACCTGGGTTGCCCAGATTGCTAGA\n") - f.write("ATGCTCTGTGCATGAACCAGGCTTGGGTTTCCAAGGTAGTCAAGGCCACCTTCGCTGAAGATTTGAGCTC\n") - f.write("CGGCTTTGAACCACACTGCTTCACCAAACTTGACTCCGTTCCTGGAGAGGAGCTCCGGGAAAACACAGCC\n") - f.write("CAAAGCACCAAGCATTGCCCATCTTGAGTGAATGACTTCAAGTT\n") - f.write("\n") - f.write(">gi|226792288|gb|GO546050.1|GO546050 Mdas9010G22_e749.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTTTTTGTTTAATACACTGAGAAATATATTCTATGGCTTAAATTGTGCCACTTGTCAAAGGATTG\n") - f.write("GAATGTTAGTTACTCAAATTCGTTAGTGAGCTGGTATTATCTACAAATTAATACATACTAAAAAAAGGGG\n") - f.write("ATATCACAATCTTCAATTCTACGTTGACGTATTAACTTAAATGAGATTGAAACTTTACACTTCTTGGAGC\n") - f.write("ACTATTTTGATTCTCATCAAGGTTGATCACTCTAATGAAGGGCCTTCCTCCTTTGTGTGCATGCTCCATC\n") - f.write("TCTAAAGACCAAGAACACCCTTCATAGATTTTCGGTTGCACCTAGCCTTGTCAAGAGTAGGTTGCCTACT\n") - f.write("TACTTCATCTCCAACAAAGATCAAACCGTCGTATTTCAGCCTCAGAACTAACGTAAGGATGGAGGAGGAG\n") - f.write("CAAGAGTGTTAAACTAGGACACACTCAAAAGAGATGGATGGCTTTGGATGTGTTTAATAAGTGAT\n") - f.write("\n") - f.write(">gi|226792287|gb|GO546049.1|GO546049 Mdas9010N22_e795.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_198367.2| adenylate kinase -related protein [Arabidopsis thaliana] gb|AAL49859.1| putative adenylate kinase [Arabidopsis thaliana] gb|AAM91697.1| putative adenylate kinase [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGAATGTGGAACGTTATTCAATTTAGTAGGAATTCCCCTCCAGTTATCCTCAACTGAAGACGGATTT\n") - f.write("GAGCTACTCCAACTTTTTCCTAATTTGATCTTCATTTCTTTATCTTTCTGCACTTGCGATAGTATAGAGT\n") - f.write("CTATTACTTCGAAAACAACATCCTTCGAATGGTTTCCATCGATCTTTTTCATGATATTCGAGTATGCGGC\n") - f.write("TGAGATTGAATCTGCATTTTTCTTGTATATTTCAAGACGCGACTTCACTTTTTCCTCGGTGTCATCAGGA\n") - f.write("CGAGTAATAAGTCTTGCTTTAATCTCCTGATTCTCCGGAGGGAAACTTTTTATATGGTAGATCTTCCCTG\n") - f.write("TAACTGGGTCTAGCCTTCTTCCAACACATCTGTCGATTAGAGTTTCATCAGGAACATCTAACACAATGTA\n") - f.write("CACATCTGGTATAATCTTCAATTTTTGCAGACTTTGCGCTTGGTTAAAACTCCGCGGATATCCATCAAGA\n") - f.write("AGCCACCCTTTTTCCTTCGCATCATCGCGTGATAACCGTGCTGTCACCATCGCTGTCACGACTTCATCGG\n") - f.write("GGACCAGACGACCGGCATTCATGAACTCTTTGGCCTTGTTTCCAATTTCCGTCCCGGAATGACACTTCAG\n") - f.write("CTCTGAGAAGGTCCCCCGTTGATATGT\n") - f.write("\n") - f.write(">gi|226792286|gb|GO545795.1|GO545795 Mdas9003L23_e224.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_568299.1| 30S ribosomal protein S13, chloroplast precursor (CS13) [Arabidopsis thaliana] sp|P42732|RR13_ARATH 30S ribosomal protein S13, chloroplast precursor (CS13) pir|S59594 ribosomal protein S13 precursor, chloroplast - Arabidopsis thal, mRNA sequence\n") - f.write("TTTTTTTGTAAATAATTTCAGCAAGTTCATCACTTCTGATTAAATTTCATAGCATTCCAAGTAGTCAAGA\n") - f.write("ATCTACTGCAGTGTGACCAAGTGATTAGACAACATTTCCTTCTCAACATTTGACAATCCTATGAATAAAA\n") - f.write("CTGTGATCAAACAGGTACACCGACATTCACCTCTCCCAGACCCTGTGTAAAGCGGGAGCCTTGTGCACTG\n") - f.write("GGTACGACCTTATACATTGATGATCCCAACTACATTTCCAAAGGTGAGGGCACAGCTCAACGAAGCAAAA\n") - f.write("GCGATCAGAACAATTGTAGTTAATACTAAAACACAAAACCAGACAAAGGAATTCATTACCAATTACAAAA\n") - f.write("AGAGTCGAAAACCCATTCAATTACTTAACGGGCCTTTTTCTTTCCAGCAACAGTGACCCTCTTACCCTTC\n") - f.write("AGGGTCCTAGTGTTGTTCTTGGTGCGCTGTCCTCTGCAAGGCAATCCCTGGCTGTGGCGTATGCCTCTGT\n") - f.write("AGCACTGAATCTCCTTGAGTCTCCTTATATTCAAGGCATTGAACCTCCTCAAATCCCCTTCAATCATGTA\n") - f.write("CTTGGAGACTTCATCTCGGATAATGGTGAGCTCTTCTTCCGAAAAATCTTTGGTGATTTTGTTCT\n") - f.write("\n") - f.write(">gi|226792285|gb|GO545794.1|GO545794 Mdas9003H05_e208.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TACCATTCCATGATATTATTAGTTTTTTGGGAAATACATGCATGAGTTTTTACTATTTACTATTTACATG\n") - f.write("CACAACAAGATGAAGTAAATGCAGATAGGCTAGTCAACCCTGCATGAACTCCCATTAAATAGCTAATCAA\n") - f.write("AGGCCTTGAGACGTTAGATCGCCATCTGACGAGGATTCGGAGATGCTACACTTGATACTTTCCAGCAGTG\n") - f.write("GGCTGAGTAGATATGGTGGTGTATTCATCTGCCAAGTATTCTCGCTAGTACCAGTAGTGGTAGCAGTGCA\n") - f.write("CTCAAAGTCGATGCTGCTTGGTTGGTACCCGACATTGATAGATTTATGATGATGATCGGCTGCGCCTAGG\n") - f.write("TGTCCATTTTGTTGATCGAATGCTCTCACCGTTGCATGAGGATGATGGTTCGGATATTGCTCCACTACAT\n") - f.write("GATCCCCTTGTGTCAATAAAGCAGTCTCATTCAACATTGCATCGACCACTTCCAACGAATTCCAGTTTAC\n") - f.write("ATAGGCTTCCCTTGCTAAATTCTCAATGTAGGTCCTATTGATATTGCTGAGGTCTCGAATGGGGAAATTT\n") - f.write("TGGCCATTAACCACGGCCCTCAACACTTGACATATAGGATTTAGGAAAATCAAGAAATGAGAACCGCGAA\n") - f.write("ATATGAACATTTTGTTACCCATCACGCAGGTTTTGGCGTGCTTTACTGTCACTCCC\n") - f.write("\n") - f.write(">gi|226792284|gb|GO545793.1|GO545793 Mdas9004A01_e249.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAD27878.1|AF139466_1 chlorophyll a/b binding protein CP29 [Vigna radiata], mRNA sequence\n") - f.write("TTTTTTTATGATAAGCTCACATGCCATTGCATACACTAATCCTTTGATCCCATCACAATCTTTACAAGCT\n") - f.write("TAAGCAGTACATATCATGATTCAGATAATTAATCATGGTCTACTGATTAAAAAACCAAACATGATATGCA\n") - f.write("CATAAAGATACTTCTCTAAGAAGAGAAGGCGTCAATAATGGTGGTGTGGAGCGGGTCACTCAAATGGGTA\n") - f.write("GCCCAGTTGTTAAGTGGTCCCTTCCCGGTGACGGCGGCTTGCACGGCAAACCCCAGAAACGCAACCATGG\n") - f.write("CGAGGCGGGCGTGCTTGATTTCGGCCAGCTGAAGAGTGGCCTTCTTTTCCGGGTCAGCAGCAAGGCCCAA\n") - f.write("CGGGTCGAAGAACTTGCCACCCGGGTACAACCTCTTTTCCGGGTCGAGCTCGGCGTTCCTCTGGAACTCG\n") - f.write("ATGTAGCCAATCACTAGAACTTCGATCCAGATCAACGTGGTTAGTGAGAAAGGGAGTGGTTGACCCAAGT\n") - f.write("AGGACGACCCTTCGATGAGCTCCACCTTTCCGGCGTCTTGCCATGTGATCCCGGTGACGGACTCGACAGT\n") - f.write("GAGAGCACCGAGCGTGGCGAGCATGGCCCACCTTCCATGAATCAGCTCGCACTCCCTAAACCTCTGCAGC\n") - f.write("CCAAACACCTCCGAGTAAGGCT\n") - f.write("\n") - f.write(">gi|226792283|gb|GO545792.1|GO545792 Mdas9003M19_e231.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("GCAAGATAAATCTCTTTACTTCTTAGATATCGAGGAAATTAGATTATGCAGATACATACATGATATATCT\n") - f.write("GAATCGGAAAATCTGTAAGGCTTTCAAATTAAAATTTACAGATGGGCATAAAATCTCTTTAGCTTAGTCA\n") - f.write("TACGTAATTGAATCATATGCTGTTTCCTGCCATATAGAGTTGTACACATGCTCCTCTTCTATGCCAACTC\n") - f.write("CATTATAATAAACATGAGAATAATCACGAACCCATTCACAATGATCTTTTGGACAATTATCCATCTGATC\n") - f.write("ATACTCACAAGAATCCGCATACCAATTCATGCCATTACTTGGTGAGCAAGCCACCTGATCCATTTCAAAC\n") - f.write("CATCCCAACGGATCGCAATCATCTGATGAAGGTGATCGTAAGCAATCCTGACCGTCCAAGCCTCCTACGA\n") - f.write("TGTTCCACAAGTTACTATCTTCCAAATCTTCGATGGTGATCCATTCAGTCTGTGTTACTCGAATCATTGC\n") - f.write("GGTCCATGGTTGTTTTGATTATTTCTGCCTCTAGAGATTGGATCACACTGTTTAATCTCTCCTCATCTCG\n") - f.write("ATCTTCCTCCTGCCATTCTTCTAGCAACGACATGAGAATGG\n") - f.write("\n") - f.write(">gi|226792282|gb|GO545791.1|GO545791 Mdas9003O20_e241.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTAGGGTTATGAATTGAAATCAAATCTTGTATTTGATCTCGTTTTT\n") - f.write("TTTTTTTATTATATTACATAAGCATTACATACATAAGAGGGGCAATACACGATCGCCTCCTTCGCTACAA\n") - f.write("GCATGCTACTACTAATAACCCAGTACGTACTATAATTAATTTAGCTTATACCCACATTCTAGGGCATAGT\n") - f.write("TGACAAGGTCTTTGATCTCCTCTTTAGAAATGGTCCCGTCACGGTTGGCATCTGCATGACCCAAAGCTCT\n") - f.write("CCTCGCCCTGAAACCACCCCAACGCGACCCGAGCTTTTTGAAGGCTGCCTTCACCTCCTCTTTGGACAGC\n") - f.write("ATGCCGTCGCCGTCCAAGTCAAAACTATTGAAGAGATTCACCACCTGCTCCCTAGTCCATGGAACACCGG\n") - f.write("TTGGCTCCTTATAAATGACATCCCTAACCATGATACTATATAGATATATATAGTGATCTTGAAATTTTCT\n") - f.write("TCTGTGTTTTGTTCTACACACTCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792281|gb|GO545790.1|GO545790 Mdas9004E19_e273.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_188937.1| expressed protein [Arabidopsis thaliana] dbj|BAB03045.1| gene_id:F5N5.17 pir|G71408 similar to unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TGTTATTAACACTTACCCTCCAAACTTTATATATTAAAAAAAAAAAATACACTTGTGAGGAGTGTAAATT\n") - f.write("GAAATTTTTTGAGTACCAATAATACTTCTCTAAAAAAAATGATTGTCAAAAATTAAATTAAAAAGTGGGG\n") - f.write("GCCCCACAACCTTCAAATCACCGAATCCACCTTCTTTTCTAACATCTCCCCAATCAGTCTGGAAATCTCA\n") - f.write("AATTCCGATCTGAAATCGAGGTCCAAACTGTTGTTGCAGTTTTCATTGAAGCAGTTGCAGCGGTTCTGGC\n") - f.write("TACACCTCGTCACAGCCAATGATTGCTTTTCGTTGCTTTCCTCGATGAAATTCTGAACCATCTTTGTCAA\n") - f.write("AAACATGGAGCTCGGCTCGAACTCGGCTGATCCTCCGCCGTCCCCGTCCCCGTCATCTTTGTTGAAATGC\n") - f.write("AGCTCTTTGATTCCAATTCCACCGTTGGGCTTCTCCGTGGCCACCGAGCTTTTCAATACATTCGTGAGCT\n") - f.write("TAATCGAATTTCCCTCATTTCCACAAAGTTAAAGCTTCCTCTTGAGCTTAATAAAAAAATCAAGCTTGAT\n") - f.write("TCTCTACCTATTATACAATCAATTTTCTCTCCCAATCAATT\n") - f.write("\n") - f.write(">gi|226792280|gb|GO545789.1|GO545789 Mdas9004J06_e292.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB40142.1| plasma membrane intrinsic protein 1-1 [Pyrus communis], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTGGGGTGAAATAAAGGTTGGTGTTATTTCTGAATTGAAAAATA\n") - f.write("CACAAATTCACAGCCACAGGATACAAAGGAAATGAGCACTCACATCAGACGGACGGGATTGGTGGGGCCT\n") - f.write("CTCCCTAAAAAAGAAGGAACAGAGGGCCCCTTGATTCTTGAGACATATCCATCAACACACATGCCTTTTG\n") - f.write("CTTGCTCATATCACAATAAATTTAACAGCTTAATCCCTTAATTACGTACAAAATACGCTTAAAACAACAA\n") - f.write("GCATTACAGATGAAAAATAACACAAACTTGATCGAAGATTGGCTTGAAAGACGATCATAAAACCGATTTA\n") - f.write("AGACGTAATTAACAGTAATCACTTGGACTTGAAAGGGATGGCCCTGATCACAACCACGTGGTACAAAGCT\n") - f.write("GCAAGTGCTGCCCCAATGAACGGTCCAACCCAGAATATCCAGTGGTCATCCCAGGCATGCCGCTTGTTGT\n") - f.write("AGATGATGGCAGCGCCGAGACTCCTGGCTGGGTTGATGCCAGTTCCGGTGACGGGGATGGTGGCCAAGTG\n") - f.write("CACCAAGAACACGGCGAACCCAATTGGCAACGGAGCCAAAATCGGAACGTGGGAGTCTC\n") - f.write("\n") - f.write(">gi|226792272|gb|GO546048.1|GO546048 Mdas9010P15_e808.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_192772.1| light-harvesting chlorophyll a/b binding protein [Arabidopsis thaliana] sp|Q9XF89|CB26_ARATH Chlorophyll A/B-binding protein CP26, chloroplast precursor (Light-harvesting complex II protein 5) (LHCB5) (LHCIIc) pir|T04049 chlorophy, mRNA sequence\n") - f.write("TTTTTTAAGGATCAAGGTTAAAAGCCTTTAAAAGGACTCAATTTGATATACAATGCTTTTAATTTTGATT\n") - f.write("GGAGGAGAATAAGGAGGAAACTTAAAATTACAGGGTTGGAGCTCTTTCGATAGATCCACCAATGACAGTA\n") - f.write("AGCAAGTTGTTGCCGAAGGGATCGCTCAGATGCTTTGATAGGTTTTCGACAGGACCTTCTCCGGTGACAT\n") - f.write("ACGCTTGGAGGAAGAAACCGAGCATGGAAAACATGGCAAGTCTTCCGTTCTTAATCTCCTTCACCTTTAG\n") - f.write("CAATGCAGCCTGGTCTGGATCCTTAGCCAGCCCCAGTGGGTCAAAAGGACCACCTGGGTGAAGCTTGTCC\n") - f.write("TCCAATTCCAGACCGTTAGTGATTCTGTA\n") - f.write("\n") - f.write(">gi|226792271|gb|GO546047.1|GO546047 Mdas9011C03_e820.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTTTTGCAAACCAGTTTTGCTTATTTAGGCATCGGCTAACATCCAATAAAAGGTACAGACATTAA\n") - f.write("CCCAAATCGAATACTGCAATACTAAATTCACATTGTAAACACCCACCAATCTGCTCATTTACTGCAGAAC\n") - f.write("AATGCATTCTTGTGTCTCACCAACCATAGCAGCGCGGGAGCTTTTGTTAATGGTAGCGTCGACTTCTTCA\n") - f.write("TGAGAACGAGGCTTAACTTCTGGATCCTCCGGCAAATTTTCATTCCCATTTATTACCGGAAGCAATACGT\n") - f.write("TCGAAAATAAGAGCATTCACTTCTTGGAACCTCTCTTCAGAAAGGGAAACCTCTGCCAG\n") - f.write("\n") - f.write(">gi|226792270|gb|GO546046.1|GO546046 Mdas9010J15_e764.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_197427.1| expressed protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("ACAAAATTGTGAAAATTTGATTACACGGCTGAAAGATCCAATATTATACTTAATAAACTTTTAATTATTA\n") - f.write("ATAATCCAACTTAAGTAGCCACCCAAAAGGGCTCCCAATACTTTAACAATCCTTTCTTCTCACACTCACC\n") - f.write("GCCACCTTCTTTGCACCTGCTTGGCCTTTCAAACTTTTCTATTCCATCTTTACCCCTAACATTCAAACTA\n") - f.write("TTCCCATCCACCTCCTCACCCTCTATCAAACACACATTTGAACTATTCCCTTCTTCTTCTTCTTCTTCTT\n") - f.write("CTTCCTCTTCCTCCTCCTTATCGCCTTCTAATTGTTCTACCAATTCTTCTTCTGTATTTTCTCCCACCAC\n") - f.write("TTCTTGATCTTGCTCTTCTGTTTCCTCCTCAATTATGTCATTTTCACACCCTTCATTGTATTCTTGAATG\n") - f.write("GCTGCTGCTTCTTCCTCTGAGTCAAAACTGGGAGTAAAGGTTTTTGAAAGGCACAATCGAAGGCGGCCAT\n") - f.write("TGCTTCTTTCAGCTTGGAAGCATGAATGACTTAAAGGGGCCTTGACAGCTTTGATGATCAGCCTCCCATC\n") - f.write("TTCCCTGTGAGGCCTAAAGGGTAGAGAATCCTCGCCGCTTA\n") - f.write("\n") - f.write(">gi|226792269|gb|GO546045.1|GO546045 Mdas9011B15_e816.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB91757.1| putative 60S ribosomal protein [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("CGACATCCAGTTTTAGGGTTTCCTTCTTCAGGAAGTTTTTTCGCAGCGAGGACAAAGATCTAACTAACCA\n") - f.write("TGAGTAAGCTTTCAAGTGATGGGCTGAGGGAGGCCATTTCTCAGATTAAGACAAATTCTGAGACAAAGAA\n") - f.write("GAGGAAGTTCACTGAGACGGTTGAGCTTCAGATTGGTCTGAAGAACTATGACCCACAAAAGGACAAGCGT\n") - f.write("TTCAGCGGTTCTGTCAGGCTTCCACATATCCCTCGCCCTAAGATGAGGGTTTGCATGCTTGGAGATGCTC\n") - f.write("AGCATGTTGAAGAGGCAGAGAAGATGGGTTTGGACTACATGGACGTTGAGAGCTTGAAAAAGCTTAACAA\n") - f.write("GAACAAGAAACTTGTTAAGAAGCTTGCCAAGAAATACCATGCATTTTTGGCTTCTGAAGCTGTCATTAAG\n") - f.write("CAGATTCCTCGTCTTCTTGGCCCTGGTCTTAACAAGGCAGGTAAGTTCCCAACCTTGGCGAGTCATCAGG\n") - f.write("AATCTCTGGAGGCAAAACTTAACGAGACCCAA\n") - f.write("\n") - f.write(">gi|226792268|gb|GO546044.1|GO546044 Mdas9010P11_e807.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_200125.1| cyclic nucleotide-regulated ion channel (CNGC1) [Arabidopsis thaliana] pir|T51354 cyclic nucleotide-regulated ion channel 1 [validated] - Arabidopsis thaliana emb|CAA76178.1| putative cyclic nucleotide-regulated ion channel [Arabi, mRNA sequence\n") - f.write("ATCGACTTCTTGCAAATAATCCACTGTGGAACAAAATATTTATAATTTCATGTGTGATTGCTGTGTCACT\n") - f.write("GGACCCTTTGTTCTTTTACATTCCAATCATCGATCAAGACAACAAGTGCCTTGGAATGGACAAAAAACTG\n") - f.write("AAGAATGTAACTCTTGTTTTGCGACTGCTCACAGATCT\n") - f.write("\n") - f.write(">gi|226792267|gb|GO546043.1|GO546043 Mdas9010L02_e776.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P28475|S6PD_MALDO NADP-dependent D-sorbitol-6-phosphate dehydrogenase (Aldose-6-phosphate reductase [NADPH]) (NADP-S6PDH) pir|T17013 D-sorbitol-6-phosphate dehydrogenase, NADP-dependent - apple tree dbj|BAA01853.1| NADP-dependent D-sorbitol-6-p, mRNA sequence\n") - f.write("TCGAGAAGCTTCAGATAGATTATCTGGATCTCTACCTGGTTCACTACCCAATGCCCACAAAGCACAATGC\n") - f.write("AATTGGTAAAACTGCCAGTCTTTTGGGCGAGGATAAGGTGTTGGACATCGATGTAACAATTTCCCTTCAA\n") - f.write("CAAACCTGGGAGGGCATGGAAAAGACCGTCTCTTTGGGCTTAGTTCGCAGCATTGGTCTCAGCAACTATG\n") - f.write("AGCTCTTTCTAACTAGAGATTGCTTGGCTTACTCCAAAATAAAGCCTGCTGTGAGCCAATTTGAAACCCA\n") - f.write("CCCCTATTTCCAGCGCGACTCTCTCGTCAAATTCTGTATGAAACACGGCGTTCTTCCCACAGCTCACACC\n") - f.write("CCTCTCGGAGGTGCTGCTGCCAACAAGGATATGTTTGGTTCTGTTTCACCTTTGGATGATCCAGTTCTCA\n") - f.write("ATGATGTGGCTAAGAAATACGGAAAGAGCGTGGCACAAATCTGTCTGAGGTGGGGAATTCAGAGGAAAAC\n") - f.write("AGCAGTGATTCCAAAATCATCGAAAATTCAGCGATTGAAAGAGAATTTGGAGGTTCTTGAATTCCAGCTG\n") - f.write("AGCGATGAAGACATGCAGCTCATCTACAGTATCGACAGGAAGTATCGTACCAGTCTACCTTCCAAGACTT\n") - f.write("GGGGCTTA\n") - f.write("\n") - f.write(">gi|226792266|gb|GO545788.1|GO545788 Mdas9003G21_e205.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGAAAAGTTGTGTAAATTGTCCCAATGGAAAGTGGACCAAAGGGTTCTTGGGGCATTAGAGGGCACA\n") - f.write("GGGTTAGATAATCTATATACTTTAGCAACTATTGTTGGTAGAAATGCCCATGACAATAGCATGATAGATG\n") - f.write("CTCTTGTTAGGCATTATGATGTTAGAGATAGGTGTTTTAAGATTAATAATCAAAATTTATTTTTCGGATT\n") - f.write("AGAAGATGTGCTTTTAATCACTGGGCTTCCAGTAGAAGGGACCCCGGTGATAAAGTCAACAACAAACATT\n") - f.write("CATGCCCTTGAAAACTTGTTAGGGGGAGTGCCTGAAAGTGAGTTGAAGGATAAATCAAATGCTGTGAACC\n") - f.write("TTAGTTGGTTACAAAAAGAGTTTGAGCATGTACCTGAAAATGTAGATGCCGATAGATTTTCCTGTCATGT\n") - f.write("TAGAGGATTTGTCCTCTACATATTAGGCACGGTAATGGTTCCCAGTTTAGACCATGTGCACGTGAATCTG\n") - f.write("GGCTACTTGGGCTGCATGTCCGAGATAGGTGAGATAAAGAAGTATGCATGGGGAGTTGCA\n") - f.write("\n") - f.write(">gi|226792265|gb|GO545787.1|GO545787 Mdas9003N07_e234.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_196452.1| expressed protein [Arabidopsis thaliana] emb|CAC08335.1| putative protein [Arabidopsis thaliana] gb|AAM63112.1| unknown [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTTTTTTTTTTTTTTCATGAAATTGACATGATTGATATTATAAACCCAGGCAAAAAAAGGGTAAAAC\n") - f.write("TAAAACCTTGAGGAAAATAAAAACACATGTAGAAGCTAATTACCGGAGATGAGCCAGCTCTCGTTAATTT\n") - f.write("GCTCATCCCCAAATTACAAAACACGTCTTGCTAATTGCTAATGTGCTGCCTGCATTTTCTAGTTGTGGGA\n") - f.write("CTGGTTGTGGTTGGCAATGGAAAGGGCCTTGTGAAGATTTCTGAAAGCTTTCTCGTAGCGCAAGAATCCC\n") - f.write("ATGAACCAGAAGTCGAAATCATCGTGGGTAACTATTTCAATGTACTTCTGTTGTGGTTTGTTGACATTCT\n") - f.write("CGCTTTGGTTCACTGTTTTGATCTTCCTTATTGGTATCCGAACTTTGTAATGCGTTCTGAGGAGTTGCAG\n") - f.write("TTGTCCAGCAGCAGCAGGACCAGAAGCAGGAGGAGAAGGGAGAGTGATGGGTCTTTCGCTGCAAAAAGCA\n") - f.write("AGCTTTTGGGTAGAAATGAAAAGGAGGCCAGGAATGGGACCGGCGGTGGTGGATAGGTAGCATTGGGAGG\n") - f.write("CCTTCAAGAGCTCTTCTCCTTGGCTGACACCGAATATCTGTTTAAAAATATTGGCCCTCCCACCCTCTTG\n") - f.write("AA\n") - f.write("\n") - f.write(">gi|226792264|gb|GO545786.1|GO545786 Mdas9003K17_e220.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_172258.1| histone H2B, putative [Arabidopsis thaliana] pir|D86213 hypothetical protein [imported] - Arabidopsis thaliana gb|AAF75074.1|AC007583_10 Strong similarity to histone H2B like protein from Arabidopsis thaliana gb|Y07745. ESTs gb|R8, mRNA sequence\n") - f.write("TTTTTTTTTTTGTTTTTTTTTTTTAGGTAAATCGTACTTTCATTTAGAATTTAAACCGCAATAGCACAAT\n") - f.write("ATCAGTACTTAATACTTACATAATCAAAAGCATTAAAACCCATAAACCCCTAACCCTAACCCTAATTCCT\n") - f.write("TCACAACCAATCTCAAACCCAAATCCACAGAATTAAACACATTAACCCTAACTCTTCAAGAGCTAGTAAA\n") - f.write("CTTAGTCACCGCCTTAGTCCCCTCAGACACCGCATGCTTAGCAAGCTCACCAGGCAGCACCAATCTCACA\n") - f.write("GCAGTCTGGATCTCCCGGGAAGTAATCGTCGGCTTCTTGTTGTACCTCGCGAGCCTGGATGACTCCTGGG\n") - f.write("CGAGCTTCTCGAAGATGTCGTTGATGAAGCTGTTCATGATTCCCATGGCCTTGCTGGAGATCCCGATGTC\n") - f.write("AGGGTGGACCTGCTTCAGCACCTTGAAGATGTAGATCTTGTAGGTCTCCACGCTCTTCTTCGATCTCTTC\n") - f.write("TTCTTCTTGTCTCCGGCGGCGGCTCCGGCCTCCTTCGGGAGCTTCTTCCCGGCCTTAGGCTTCTTCTCGG\n") - f.write("GCGGGGGCTTTCTCCGCCACGGCGGACTTCTTTTCCTCCGCGGGCTTCTTCTCGGCGGGGCTCTTCTCAG\n") - f.write("CT\n") - f.write("\n") - f.write(">gi|226792263|gb|GO545785.1|GO545785 Mdas9004B14_e259.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAP47161.1|AF514908_1 dehydration responsive element binding protein [Glycine max], mRNA sequence\n") - f.write("TTTTTTTTTTTTTTGGCCATAAAATTCAAAGACTATAATTTCAAGAAAAGAAACATTACTGATTGCCCTT\n") - f.write("CAGTTGGTTTGTTGTTACAATTGAGAAACTATATCAAATTACCAATCGAAACGGAATTCTTGTGATTCCA\n") - f.write("TTTCCTGATCGGACCCAATAAACCCTTCTCTGTATTTTTACTTTATTGCACACCTAAATACCTACTCATA\n") - f.write("ACAATAAACAGCTGGTGATCGGTCGGCCTTCTCAATTAACTTTCATCGGAGTTTTCTGGGTCGGGGTACT\n") - f.write("CATTTAAGTCGGGCTTCAAAGAGCCACCCCGGCTCAACGTTGTTTTGGACTGCGACGTCGGCGATGAGCG\n") - f.write("GAGAGCGGTTTGAACAGCGTCGACCTTAGCCCCGACCTCCGTAGCTTTCTTCCGTATCGAAGCCGCCGAC\n") - f.write("ATGTCGTGGAGCTGCTGGCCGTCGTCTTGGAATACAAGTTCCGGGAAGTTGAGGCGGGCAGAAGGGCCTC\n") - f.write("GAAGATAGAAAACGGCGGTGTCGTAAGCTCGGGCGGCAGCCATGGGGGTCGTGTAGGAGCCGAGCCAAAT\n") - f.write("TCGAGAGCGCTTGTTGGGTTCTCGAATCTCCGCCACCCACTTCCCCCACTTCCTC\n") - f.write("\n") - f.write(">gi|226792262|gb|GO545784.1|GO545784 Mdas9004I10_e289.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_188060.1| glycolate oxidase -related [Arabidopsis thaliana] ref|NP_850584.1| glycolate oxidase -related [Arabidopsis thaliana] sp|Q9LRR9|GOX2_ARATH Probable (S)-2-hydroxy-acid oxidase, peroxisomal 2 (Glycolate oxidase 2) (GOX 2) (Short chain, mRNA sequence\n") - f.write("TTTTTATCCATGGAAATATTATCTCACCATCCAGTAATCAGTATCATTACAGAAAACGCAGAAATAATAA\n") - f.write("GGAATCCAAGAACAAAGGTTTATCCACCGCTGTTCATTTGAATGTTTCTATAACCTTGGCTGATGGCGAG\n") - f.write("GGCGAGGAGCGTCCCAGTCAGTCACAATGTGATCGCGGGTGATCTCTTTGAGCGAACGGCATCCACTTAA\n") - f.write("TGTCATGGTTAGCTCAAACTCTTCGCGCAGCATTTGGAGTACCTTTCTAATGCCAGCCTCTCCTTCAGCC\n") - f.write("GCGAGAGAGAACACCACAGGGCGCCCAATAAATATGCCAGAGGCACCCAAAGCTAATGCTTTGAAGACAT\n") - f.write("CTGTTCCTCGACGAACACCACCATCCAAGAAAACAGGAATGCGTCCTTGTGCACCTTTAACAACCTCTTC\n") - f.write("GAGGGCCATGATAGTCGAAGGGACGTAATCAAGTTGGCGAGCTCCATGGTTGGACACAATAATCCCAGCT\n") - f.write("GCTCCAGCTTGTACTGCAATCCTTGCATCTTCGGCAGTGAGAACACCCTTAACTAGGATAGGCAGTGAAG\n") - f.write("TGATTGTCTGAAGCCACTGCACATCCTTCCAGCTAAGAGAACGATCGATTTGACCAGCAACGTATGAAGC\n") - f.write("AAGTCCAGAGTCAGCAGCTTTGTCATCTTTCCGAGG\n") - f.write("\n") - f.write(">gi|226792261|gb|GO545783.1|GO545783 Mdas9004E14_e272.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAC24587.1| AP2 domain containing protein [Prunus armeniaca], mRNA sequence\n") - f.write("TTTTACACGAAACGAAAATGGCATGGCCTTTTGGTAATTCCGTCATCCTCAAAGGTCAGATTGTCATACA\n") - f.write("GATCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTACAAACACCAAGTCAAAAGTACATAGAACTG\n") - f.write("AAAACCAAACAATGCAAAATCTGATGGCCTCTTGTAAAAAGGAAGACCGAAATAAATTTAAACAAGTACA\n") - f.write("GAATTATGAGGTTTCAGTATCATCATAACTCACACAATTTCATCGTTACAAACATGAAAGTTCAGAAACC\n") - f.write("ACAAGTTCTTTGGACGAGAAAGTGGACCCATTACCTATACGATCTCCATACGCCATTAGCTACGGCTTAA\n") - f.write("ATGCTCCCCCGTACAAGCCTTATCCATGTCAACAGCTTGCACAGATGTACTCCAACCCGAAACACATCAT\n") - f.write("GTAGCTTTATTTACATAAACTGGAAAGCTGGGGTAAGGTTCATCTCTCAGAAAGTTCCTCCAACAATTGC\n") - f.write("GGGCAGATCGTCAAAGGTCCATAGGTCCCCCGGATTACCACCATCCTGAGTTGCGTCTCCGTTAAGGAAG\n") - f.write("GCGTCCACTGAAGCATCCCAGCTCCCATCAAGATATGGGGTCTGAAAGTACTTCATCTCAAAAGCTGAGA\n") - f.write("GCTCATCAGAC\n") - f.write("\n") - f.write(">gi|226792254|gb|GO546042.1|GO546042 Mdas9011B07_e815.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|S28827 chlorophyll a/b-binding protein type I - common tobacco emb|CAA45523.1| photosystem I light-harvesting chlorophyll a/b-binding protein [Nicotiana tabacum], mRNA sequence\n") - f.write("GCTGTGGCATCGCCACTACTGCCTTCCCTTCAGTCCTCTCATCCTCCAAGTCTAAATTTGCGACCAGCGC\n") - f.write("AGTCCAGCTCCCAAGTATTGGTGCCAATGCCTCCTCCAGGTTCTCCATGTCCGCCGAGTGGATGCCCGGC\n") - f.write("GAGCCCCGCCCTCCTTACCTCGACGGCTCTGCCCCCGGTGACTTTGGATTTGACCCACTTCGGCTAGGAG\n") - f.write("AAGTGCCAGAGAACTTAGAGAGGTTCAAGGAGTCCGAGCTCATTCACTGCAGATGGGCAATGCTTGCTGT\n") - f.write("TCCAGGGATTCTAGTACCAGAGGCTTTGGGATTGGGCAACTGGGTAAAAGCACAGGAGTGGGCTGCCGTT\n") - f.write("CCAGGAGGCCAAGCCACCTACTTAGGCAACCCAGTTCCATGGGGCACTTTGCCTACAATTTTGGTCATCG\n") - f.write("AATTCCTTTCCATTGCCTTTGTAGAACACCAACGCAGCATGGAGAAGGACCCTGAGAAGAAGAAGTACCC\n") - f.write("TGGTGGCGCTTTTGACCCCTTGGGCTACTCCAAGGACCCCAAGAAGTTCGAGGAATACAAAGTCAAAGAG\n") - f.write("GTCAAAAATGGCCGGCTTGCGTTGTTGGC\n") - f.write("\n") - f.write(">gi|226792253|gb|GO546041.1|GO546041 Mdas9010G14_e745.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTTTCTTTTTTTTTTGGTTGAGATTAGTGAAATTGCTTTCATACCGAATTTTATCAATAAGCTAG\n") - f.write("TAGATGCTACAAGTACCTATAACTTTCTATCATGTGCCTTAATTCTAAAAAAACCTTGACGCCAACCTTT\n") - f.write("GAGCACAGGGTGACTTCAATATTTTTCACCTTCAGCAGTTCCATTGTGTTTCGGTAACTGAGTACCATAT\n") - f.write("AGGCTACCATAGCCTGTGTCATGCTCGCCCTCTGTCTCTGTCTGTCTAGCATCAAGCGTATTTTTGGTTG\n") - f.write("TTTCCGCGCTTGGAAAAATCG\n") - f.write("\n") - f.write(">gi|226792252|gb|GO546040.1|GO546040 Mdas9010M10_e782.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("ATTATTGTTGTCCAATACTTGGTACCAAATTTACCTCACCGACAACTATACAAACGAAAGTACAAGACAA\n") - f.write("CTCCTGAGCTCTCACTTTCCGGGAACAAAGTTTGAGGCGTATT\n") - f.write("\n") - f.write(">gi|226792251|gb|GO546039.1|GO546039 Mdas9010J03_e761.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAO61490.1| arm repeat-containing protein [Nicotiana tabacum], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTGAAATATATAAATGTCTATACTACCAAGACAATGGGGGCATAACATC\n") - f.write("AACAATTCCAAATACAAATTTGTCATCATAATCATGCCTAGCCGGACTTACAGGTATGCTATTAACTTTT\n") - f.write("TCTTCCCACATTCACGTAGGTATTTTACAAATCAGACGTACAAATCTTCTATCCATCGTTTGTTATTCTG\n") - f.write("TCTTCTCTTGTCTATGGTTTCTGAAGTAGCTAAGGAGTGTCTGTGCCTTTTCTTTTGCCCTAGGGGTGCC\n") - f.write("GGATTGTGACAATACCACTAATGGTGGGACAGCTCCTTCTTGGAGGACCTGACTGCAATATCTATTACTG\n") - f.write("TTTGTGCAGAGCTGTAAAAGAGCAGCAGCAGCGTTCTCCTTCCCTCTTGGAGAACCCAACTCAACAACCT\n") - f.write("CGACAAGAACAGGGATCCCACCCTCCTGACCAATTGCTGTCCTTCCCTCGGGAATTGTCGAAAGATTAGC\n") - f.write("CAAAACAGCAACTGCCTTGTCAACCATTCCAGCTGCAGGGTCCATCAACTCCACTAGGTACTTTACAGCA\n") - f.write("CCAGCTTGAACAATTCGACCCTTGTTCTCATGAAAAATTGACAAATTAAACAAAGCTGTGGCTGCATCTT\n") - f.write("TCCTCCCTCTGGGACTCCCATTCCC\n") - f.write("\n") - f.write(">gi|226792250|gb|GO546038.1|GO546038 Mdas9011A18_e814.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_191019.1| histone H2A.F/Z [Arabidopsis thaliana] pir|T47615 histone H2A.F/Z - Arabidopsis thaliana emb|CAA73155.1| histone H2A.F/Z [Arabidopsis thaliana] emb|CAB77576.1| histone H2A.F/Z [Arabidopsis thaliana] gb|AAM64788.1| histone H2A.F/, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTGAGCTTATAAAAGTCGAATCATATAAATAAAGACAGAGAATACCAG\n") - f.write("GTTAAACTATCCAACAAAACACATAAAGATGAACAAACATACAGATAATGGTTCAAAGGGACAAGCACTA\n") - f.write("GATTGCCCATATGTACTACTAAATCCAGCTAACCTAAACAGCTACTAAAACACGAGAAATAAACAGTCAT\n") - f.write("AATATTAAACAACACCATCAGTTAATACCCGTTTGACTCATCAGATCTGAATTACTCCTTGGAAGTCTTG\n") - f.write("TTGATGAGGGACTTGTGGATGTGGGGGATGACACCACCGCCAGCAATGGTTCCCTTGATCAGAGTGTCCA\n") - f.write("GCTCCTCATCTCCCCTGATGGCTAGTTGCAGATGCCTGGGTGTGATTCTCTTGACCTTGAGATCCTTGCT\n") - f.write("AGCATTCCCGGCCAACTCAAGAACCTCAGCAGTTAGGTACTCGAGGATTGAGGCCAAGTACACAGCAGCA\n") - f.write("GTGGCGCCAACACGGCCATGAGCAGCAATTCGCTGTTTCAGCTGCCTATGAATTCGACCCACCGGAAACT\n") - f.write("GGATACCAGCGCGAGATGAACGAGAAACAGGCCTCTTCTTGTCCTTCTCCTTTTCTTTGTCCTTGTTAGC\n") - f.write("CCGCTGTGGGTATTCGCTGCAATAAGCCCCTTCCCGCCTTTGCCCGCCA\n") - f.write("\n") - f.write(">gi|226792249|gb|GO546037.1|GO546037 Mdas9010F06_e738.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAA33866.1| ribulose 1,5-bisphosphate carboxylase small subunit, mRNA sequence\n") - f.write("TCTCATCAACACCATCTTGAATCCGATAAAACACCAAATTCGAAAACAAACGAGAGTGCCTCGAAAAAGG\n") - f.write("GACAGAGAATTAAAGAAACAAATAGACAGAGGTCTTGAAAGCCCTAATCAAATACAAACTTAAACAGACC\n") - f.write("ACCGGGACCCTTGGCTAAGAGCCACAATGGTACATACAACAACAACATTATTTATGAAAATTGTAGAACA\n") - f.write("ACTTAGACGCCTGGAGGCTTGTAAGCGATGAAACTGATGCACTGCACTTGACGGACATTGTCGAATCCGA\n") - f.write("TGATACGGATGAAGGATTGGGGGTAGGCCTTCTTGGCCTCTTCCAGCTCTTTCAACACCTGGGAAGAGTC\n") - f.write("GGTGCATCCGAACATGGGCAGCTTCCACATTGTCCAGTACCTTCCATCATAGTATCCTGGGGATCTGTGG\n") - f.write("TTCTCACGGTACACGAATCCATGCTCCAACTCAAATTCCAAGCAGGGAACCCAGTTTTTGCGGAGGAGGT\n") - f.write("AGTCAACTTCCTTGGCCAAGGACTCGGTAGAAAGGGGAGGAAGGTAAGAGAGGGTCTCGAACTTCTTCAG\n") - f.write("TCCAAGTGGAGGCCACACCTGCATGCATTGCACTCTTCCTCCATTG\n") - f.write("\n") - f.write(">gi|226792248|gb|GO545782.1|GO545782 Mdas9004H05_e281.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|Q42434|BIP_SPIOL Luminal binding protein precursor (BiP) (78 kDa glucose-regulated protein homolog) (GRP 78) gb|AAA21808.1| ER-lumenal protein gb|AAA21806.1| ER-lumenal protein, mRNA sequence\n") - f.write("GAGTCCGATGAGAAGGAAAAGATTGAGACCGCGACAAAGGAAGCCCTCGAGTGGTTGGATGACAACCAGA\n") - f.write("CCGCCGAGAAGGAAGACTATGACGAGAAGCTCAAGGAGGTTGAAGCCGTGTGCAACCCCATAATCTCAGC\n") - f.write("CGTCTACCAGAGGTCAGGAGGTGCCCCGGGTGGTGCCAGAGCCTCGGAGGAGGACGATGAGTCACATGAC\n") - f.write("GAGCTTTAAAAGAGTAGTTTATTTCATTTTCATCGAACACGGTTGGAAGCAGTGAGGAGAGAGAGGGGTT\n") - f.write("ATAGGTTTAGGGGAAGAAGACTTTTACTGTAATTTTTCCGATGGGAATAGAAGAAGCTGAGGGAGACGAA\n") - f.write("TTTCGACTCTCTCGGCTTATCGGGTCTTTTTGTTAGAACGTGTAATTTCATTATTTTGATAAAAAAAACA\n") - f.write("CAGTTTCAA\n") - f.write("\n") - f.write(">gi|226792247|gb|GO545781.1|GO545781 Mdas9004I24_e290.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_187919.1| chloroplast 30S ribosomal protein S10, putative [Arabidopsis thaliana] dbj|BAB01403.1| unnamed protein product [Arabidopsis thaliana] gb|AAK60293.1|AF385700_1 AT3g13120/MJG19_6 [Arabidopsis thaliana] gb|AAL16207.1|AF428438_1 AT3g1, mRNA sequence\n") - f.write("TTTTTAAAGTGAAATGAATCCTAGACATGGAGAATAATCGAAAAGATACACAATGCGGAACTAATATGCA\n") - f.write("ACAAGGATATCAAATAATGTCGAAAGAGGAACTCTTTATCTCTATTCTTCAACTGAGATCCAAGGGCAGA\n") - f.write("AATTGCTTACAATTGCTTACAAAATACACAAAAGAAATGAATTTCAGGAAAGGATCAAAATGCATTCGAT\n") - f.write("AAGCGTAATCCTGCAGTCATTCAGCCTCTACAAAATAAACGTGAACCTTTGTAATCTCTTTCCCATGTTC\n") - f.write("TTCAAAGAAAACACCTAAGCTGCTGCCCTGCTTCTTCACAGTTTGACCTCCACATCAACTCCAGCAGGAA\n") - f.write("GGTCAAGTTGCATTAAGGAATCTATTGTTTGTGCAGTTGGGTATAGAATATCAATGAGGCGTTGGTGGGT\n") - f.write("ACGAATCTCAAAATGGAACCTTGCATCCTTGTGGACATGGGGAGATTTGAGAACACAGTAGATTCGCTTT\n") - f.write("TTAGTTGGCAAGGGTACAGGACCCATTGTTTTCGCATTCGTGTTCCTAGCAGCATCCATTATCTGCTTGC\n") - f.write("AGGAATCTTCTATNCATGGCACAAAGTAAGACCTCAGTTTAATCCTGATTTTCTGCTTGAGTGAGGCCTT\n") - f.write("ATCTCCATCTTCACCAATACTGAGTGATGAAGTACTCGG\n") - f.write("\n") - f.write(">gi|226792246|gb|GO545780.1|GO545780 Mdas9003G22_e206.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|S16294 chlorophyll a/b-binding protein type I precursor - tomato, mRNA sequence\n") - f.write("TTTTTTTTTTTTTGAAAAGATAGAGCAATTCCATTTAAGGATCAAGGTTAAAAGCCTTTAAAAGGACTCA\n") - f.write("ATTTGATATACAATGCTTTTAATTTTGATTGGAGGAGAATAAGGAGGAAACTTAAAATTACAGGGTTGGA\n") - f.write("GCTCTTTCGATAGATCCACCAATGACAGTAAGCAAGTTGTTGCCGAAGGGATCGCTCAGATGCTTTGATA\n") - f.write("GGTTTTCGACAGGACCTTCTCCGGTGACATACGCTTGGAGGAAGAAACCGAGCATGGAAAACATGGCAAG\n") - f.write("TCTTCCGTTCTTAATCTCCTTCACCTTTAGCAATGCAGCCTGGTCTGGATCCTTAGCCAGCCCCAGTGGG\n") - f.write("TCAAAAGGACCACCTGGGTGAAGCTTGTCCTCCAATTCCAAACCGTTAGTGATTCTGTAGTATTCAGCTC\n") - f.write("CACCAAGAAGAACAACCTCAGCAACAACAGCGAAGATAAGATTAATCGGTATGTTCTTCCCAAAGTAATT\n") - f.write("CAGTGTGTTCCCATCAAGGAGTAGAGCTCCTGTCTTGAACCAAACAGCCTCAGGGCCGCAGTTAGCTCCA\n") - f.write("AATTTGTTGAAGGCCTCTGGAATGATGAATCCGGCTGCACCAAGCATGGCCCACCGAGCGTGGATCAGCT\n") - f.write("CATATGCTTGGTATTTGCTGAAGTTTTCTGGCTTCTTGCCTAGGCCAAAAGGA\n") - f.write("\n") - f.write(">gi|226792245|gb|GO545779.1|GO545779 Mdas9004D14_e270.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_198099.1| seryl-tRNA synthetase [Arabidopsis thaliana] sp|Q39230|SYS_ARATH Seryl-tRNA synthetase (Serine--tRNA ligase) (SerRS) pir|S71293 serine-tRNA ligase (EC 6.1.1.11) - Arabidopsis thaliana emb|CAA94388.1| seryl-tRNA Synthetase [Arabid, mRNA sequence\n") - f.write("TTTTTTAAATCAGCTCTGTTAAACATTACGTAATTAAGGTTTCTTGCGGCGCAAGCATACTCCTCATCAG\n") - f.write("ATATAACTGTGCATATCAGAAATCATAAGGCAAAATATGAATCTTGTAGGACAAACCAAAACAAAACAAG\n") - f.write("TTGATCTGAATATATTTAATTCATTACATCAAAGATGAAGACATGGAATTTGGGTTTCAGGGTTTCTCAA\n") - f.write("AACAATCATCAGAAGAGTTTACGCACGCCATGTATCATGAACTTCTCTTGTTAATCTATTCTTCATTAAA\n") - f.write("GACTGAAAAAATTACGAATTTTAGGCCTTCGATTTCTTCCCTTTGACTTCAGGGGCTGGTTTGGCCTTGA\n") - f.write("ATGGTAAGAACTCCTTTCCACCCATGAAGGGTTGAAGAACTTTCGGTACGTTTACACCCTCTTCAGTCTG\n") - f.write("GTAGTTCTCGAGAATGCAGCATACAGTTCTCTCACTCGCTGTAAGAGTCGAGTTCAATAAGTGAACATAT\n") - f.write("TGCTTTGTCTGCTCGTTGCTCTTTTTCTGCCCATATCGAATTTCTAACTTTCTTGACTGGTAGTCCGTAC\n") - f.write("AGTTTGAACAACACACCAGCTCTCTGTATGTCTGAGATGCAAGATACCATCCGTTCAGATCATACTTCTT\n") - f.write("TGCTGCGGCATCATTCAAAGCACCAGAGACGATGGACACAATATGATATGCGAGCCCTAACGCTTGATAA\n") - f.write("AAT\n") - f.write("\n") - f.write(">gi|226792244|gb|GO545778.1|GO545778 Mdas9003O19_e240.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_192721.1| expressed protein [Arabidopsis thaliana] pir|T04031 hypothetical protein F17A8.180 - Arabidopsis thaliana emb|CAB39650.1| putative protein [Arabidopsis thaliana] emb|CAB78106.1| putative protein [Arabidopsis thaliana] gb|AAL6703, mRNA sequence\n") - f.write("CAGGGATAATATTTCAACATTAAAAGCAATGCCTAATAGAAACACAACGGTTTTTGGTAAAACAAAACGA\n") - f.write("AACAAAAAGCAATACAAAAAAGATGCTTACAGAAGATGGAGAAGACCAAGCTATTCTCAAACAAGACAGC\n") - f.write("AAAAGTAAATTAAATTGATTGCATTTACCGCTTCCATGTTGTGCCGTCTTCCCTTATGGAATTGCACCAG\n") - f.write("AGAGCAGCTTTGTATTGTCGTGGTTCTTCCAAAGACTTTTGACACCGTCCCATGGGAAATACTTGGATGA\n") - f.write("ACCAACTGAACGCTTTCAGTAGGTAGAGTCATGACCAGGACTATCTGCATGCATGTCCATCGAGGTATCA\n") - f.write("TTGGAACTTGGAGGATTAGCATCCCGGTTTGGGTGGAGAAAGCTAGGTTCATTGTTTCGGGCTCCATTTC\n") - f.write("CAGATGGCAGAACTGCTCCTGGATAATATCCATCTTGAGAAATGTGATAGTGCTGAAGACTCTGGCGAAC\n") - f.write("AGGGCTCGACAAAGCATTTGAGAAGACCGAATTCTTAGATTGATGATCGCACTGCTCAGAACGAATTCCC\n") - f.write("TGCCCAGTAAATTGTTGGGACAGATGAACCAGAAGT\n") - f.write("\n") - f.write(">gi|226792236|gb|GO546036.1|GO546036 Mdas9010N20_e793.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTCTTGGGGTCGCAGTCAGGGGAACAAGCAGTGGAGGTCGGATTCAAGTAACCAGTGGAATGGAGCACA\n") - f.write("CTATGGAGGACAGGGTTATGGCGGGTATGGACATGCTGTGCCACAGGGTGAGGACCTGAGCATGCACACC\n") - f.write("GCAGCTGCGGTTAACGGGGCTTCTTAACAGGGCTGTCGCAGTTGCCAACAGCGTGTAAACTGAACTTACA\n") - f.write("AAAGTGGCATTTTAGATATAGTTTATCAATGCCTTTAATTGAAGTAGGGTCGATTTAGCTAAATATGTAA\n") - f.write("TGTGAGGAATTTTTGATACCGGATTTAGCAGCCCTTGTGTGAGATGAATACTTGTGTCTATGCCTGGCAA\n") - f.write("TCTTGGAATCAACAGGAATTCTTTTGACCCGGAAAATACGTTTATACCGA\n") - f.write("\n") - f.write(">gi|226792235|gb|GO546035.1|GO546035 Mdas9010O21_e801.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_172650.1| aminomethyltransferase-related precursor protein [Arabidopsis thaliana] ref|NP_849646.1| aminomethyltransferase-related precursor protein [Arabidopsis thaliana] sp|O65396|GCST_ARATH Aminomethyltransferase, mitochondrial precursor (, mRNA sequence\n") - f.write("AACCCAGCTTCCACCCCGAAACTGTCACTAGCACCCAGACCTGACAGCCTCACCTTCCCCTCGGATTTCT\n") - f.write("CACAGATTGCCTAGGCAAGATCCACTGCATTCTCATCAGGAACCGAGATTTCAAATCCACAAACAATGTA\n") - f.write("GCATACACGACATGAGTGATTAC\n") - f.write("\n") - f.write(">gi|226792234|gb|GO546034.1|GO546034 Mdas9011B24_e818.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P07030|PLAS_SILPR Plastocyanin, chloroplast precursor emb|CAA26709.1| precursor [Silene latifolia subsp. alba], mRNA sequence\n") - f.write("TTGCTAGAACACATTCAATCAACTACATACAATGGCCTTCGTCAACCCCGCCGCGGTCGCTATCCCATCA\n") - f.write("TTCACCGGACTTAAATCCGCCGGCGCAGCCAAGGTTAACGCTGCAGCTAAGGTCTCAGCCTCCCCAAGGC\n") - f.write("AGATGTGCATCGTCAAGTCATCCCTCAAGGATGTTGGTGTGGCCGTTGCCGCCACTGCCGCCAGTGCTAT\n") - f.write("CCTCGCCAGTAATGCCATGGCCATTGAGGTCTTGCTAGGCAGCGATGACGGTGGCTTAGCCTTTGTCCCC\n") - f.write("AACAGCTTCTCCATCGCCCCCGGTGAGAAGATTGTGTTCAAGAACAATGCTGGATTCCCACACAACGTTT\n") - f.write("TGTTTGACGAGGACGAGGTTCCCAGCGGTGTGGATGCTGGAAAAATCTCGATGAGCGAGGAGGACCTCCT\n") - f.write("GAATGCCCCCGGGGAAACCTACGCCATCACCTTGACCGAAAAAGGTTCATACTCTTTCTACTGCTCTCCT\n") - f.write("CACCAGGGAGCTGGCATGGTCGGCAAAGTTACCGTTAACTAATTCAGTTGTCTGATTAATAATAT\n") - f.write("\n") - f.write(">gi|226792233|gb|GO546033.1|GO546033 Mdas9010M04_e781.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAF78526.1|AF195234_1 metallothionein-like protein [Pyrus pyrifolia], mRNA sequence\n") - f.write("TTTTTTTTTTTTTTTTGATGAAAAATAAGATGGTAATTAACAGAGCCATAGCATGTATTCTAATATTTGC\n") - f.write("ATATAAATCAAAGGGCCATGAACATGGCCATTACATGACACAATCACAGAAACACAAACATATGAGTGAC\n") - f.write("ATGTTTTCACAGACGTACAACATGTCACTCGACAGACTCAAATACTATAAGCAAACAAGAGACAAAGCCA\n") - f.write("CAACGTTATTTTATCCCACAAACACTTTTCATTAATTAAGGGTCTATAATCCCTTAATTAATATGACACT\n") - f.write("TTTATTAATCACATCTTTAATTTGGTTATCTGGGCTTACTGACCACAGGTGCAGTTCACACATGGGCAGG\n") - f.write("TTGCGCCACACTTGCACTTTCCGTCGTTCTCGGCTGCAGGAGCGTCCACGACGACGGTGTCCATGGCGCG\n") - f.write("GTTCTCATTCTCCACGATCACCAAGT\n") - f.write("\n") - f.write(">gi|226792232|gb|GO546032.1|GO546032 Mdas9010J06_e762.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|T12317 26S proteasome regulatory subunit S5A - common ice plant gb|AAC19402.1| 26S proteasome regulatory subunit S5A [Mesembryanthemum crystallinum], mRNA sequence\n") - f.write("TCGGGGAAGAGGATGATGGGAAGCCAGAGAAGCTGGAGGCCCTTCTGTCTGCAGTTAATAATAATGACAG\n") - f.write("CAGTCATATAGTACATGTCCCCCCCGGTCCAAATGCTCTCTCAGATGTTCTCATAAGTACACCTGTATTT\n") - f.write("ACTGGTGACGGAGAAGGAGGAAGTGGTTTTGCAGTGGCTGCCGCAGCAGCAACTGCTGCTGCTATTGGTG\n") - f.write("GCTCTGGGTATGACTT\n") - f.write("\n") - f.write(">gi|226792231|gb|GO546031.1|GO546031 Mdas9010K13_e771.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAM63765.1| unknown [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTGGTTAGAATATGCTTATGCAAAGTTGCAAACAACATGAAGAATGTTCACATCCTTATGAACAATGCTG\n") - f.write("CGAGATATTTACGTATACAGATAGATGCTTCAAAAACCGGCCACGAGTTCTTATCTTAAACAACATTACT\n") - f.write("TTTCGGGTTGTTCTAGCTGCTGCCGATTATATCTTTGTATGTAGCTTTAATCTTCAGTGTCAAAGCCTCT\n") - f.write("CTGTCTGGTTTGTAAACAAGGTTCTTGTATGCAAACCACCCAGTGTAGCCAATTCCTACAAGCTCAAGAA\n") - f.write("CACCAGGAACTAAAGGAAGCCTATCAATTGCCGAAATCAGTCCAGTGGAGCCCCATAGTGCAGCAGCACC\n") - f.write("AGCTACTGCAAGTGAAGAAACTGCATATTTATCTTCCACTTTCTCCCAAGCTTCTTGAACGGTCTTCAC\n") - f.write("\n") - f.write(">gi|226792230|gb|GO546030.1|GO546030 Mdas9010P21_e809.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAL26889.1|AF317062_1 abscisic stress ripening-like protein [Prunus persica], mRNA sequence\n") - f.write("ACCGAGTAAACAGTATTATTATGAACAATAACAATGACATACATACATACATACATACATAGATGGCATG\n") - f.write("CTAAAAAAAAACCAAATAGCACCCATTAATTGAAAAGGACATTTAACCACTCAACTATTGTAGCGATATT\n") - f.write("GTTGAAATTAAGCATCACACTCTCACTGATCACACCACATGATAACTAGTAAAAACACAACATTGCATAT\n") - f.write("GTAGTCAAAAAATGATGTTCCACAATCCACATAAATCTAAAAGTGGTGGTGCTTCTTCTTTCCATAAGCC\n") - f.write("TCTTCCTCTTCTTCCTTTGTCTCTTTCTTCTCATGATGCTCATGGAAGGCAAATCCACCCGAACCAACTG\n") - f.write("CAGCTGCTGCAGCAATCTCCTCTTCTATCTTGTGCCTGTGGGCATGCTCTGGGTCTTTCTTTTCATTGTG\n") - f.write("CTTCTCATGCAAGGCAAAAGCGCCGGCAGCAGCTACACCGGCCTCGCCGAGGTGCTCGAGATGCTTGTGG\n") - f.write("TGCTTCTCCTCCT\n") - f.write("\n") - f.write(">gi|226792229|gb|GO545777.1|GO545777 Mdas9003K16_e219.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_566036.1| protease inhibitor/seed storage/lipid transfer protein (LTP) family [Arabidopsis thaliana] pir|D84887 probable proline-rich protein [imported] - Arabidopsis thaliana gb|AAB82643.1| expressed protein [Arabidopsis thaliana] gb|AAL1, mRNA sequence\n") - f.write("CAGTGATATCCCCTTTTGTAGTAGTGTGAATAATCAATAATACAAAAACCATCAATGCTCAATGACTGAA\n") - f.write("AAGATAATAAGTTCACAATCCAAAGGGAAACACAAGAAACACAAGAAAATGTTCCTTGGAGGGAACACTT\n") - f.write("TACATATCAATTACACTTTGAAGGAAACATGCACAATGCATTAATGCAGCCAAAGATTAGTGCCCTTTGT\n") - f.write("ATTTTTATTCACACAAAGCTGTGTAAGGGCAGGTACTTACGTAGTATGCAGATGAGAAGATCATCGTGCA\n") - f.write("TGTCTCTAAGAGCATTGGAAGCCATTCGGGACAGACTTGCCACAGTAATTCAAAAGCAAGCTCAAGGAGA\n") - f.write("CTGGCACGTCAAGATTGATGCCCAAAACGTTGGCCTTAATTGCAGTGCAAAGGCACACAGCAGCTTCAAG\n") - f.write("ATCGGCAAGTCCACCAATGAGGCTACAGCATGGGGTCTTTGGTGGGGTCCCCACAACAATGGTCACCAAG\n") - f.write("TCATTTAACAAGTCGCCACATACCCCTAGCTTAAGGGTGTCCTTAGGGCAAGAAGCTTTAGCTGGAGGGT\n") - f.write("TTTGGGGGCTAGCGGGGGTTGCAGGAGGAGGACATTTAGGGGTTTTCGGGGGTGGTGGTGGTGGACATTT\n") - f.write("AGAAGCATGATGGTTTGCACTGACTGT\n") - f.write("\n") - f.write(">gi|226792228|gb|GO545776.1|GO545776 Mdas9004J18_e293.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTCCAGTCTCTCCGCGTCTTAAAAAACCCTAGATTCCCGAAAGAACACAGAGTTCGAAACCCTATCAACA\n") - f.write("ATGGCAGTCTCAAAGGCATCCTCCGTCATCGCTCTCATGGCGGTCCTCTTCGCCGTCCTCTCCGCCATCG\n") - f.write("GCGCGGCCCAGGAGTCTCCGGCTCCCAGCCCAACCTCCCCCGCCGCATCCATCTCCCCCTCGTTCGTCTC\n") - f.write("CGCCCTCTTCGCCGCCGTCACCGCTCTGGCCTTCGGATCTGCGCTCAGGGTTTGAGGTCGCGGTTGCTGC\n") - f.write("TTGTCTGGATGTGGCTGTGTAGAGCGGTTTGGGATTTTGTGATTCGTTTTGTTTGGTAGCGAGTGTCGTT\n") - f.write("TTAGTAGGTGGTGGTCATGGATCTATAGGGATTTCGTTTACATATGATGGATTTTATTATTATATTATTG\n") - f.write("CTACTGTGATGGATTTCTGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792227|gb|GO545775.1|GO545775 Mdas9004F13_e275.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("ACGGATGGTACACCACGTGTTTTTATATAAGTGGTGGAAAATTGCATTTTTTAATACATATATCTCACCA\n") - f.write("TTTATATAACGACATGTAATATACCATGTATTCCGATCATATTGAAAAATCTCTAGTAGATGGGCTTTTC\n") - f.write("CCATCACATGACTCTAATCCACGTAACCACGGCCATCCATGTGGGTCCACCTCCCAAAAATCACATGGGC\n") - f.write("AGAAAAGAAAAAAGTTGAAAGGTCCACCATCCTAAATTGGGCCCATGTTATTGAATCACATGGCCCAACC\n") - f.write("AACAGCAACTCAGTTAGTTTGGAAATCTCAAAGAAACATTAAAAACCACTATTTTTTTACACTAAATCAT\n") - f.write("ATAGCACAGCATTGAAATCTCACCAAATTATAAAACGCAATTTTGCAATCTGTTGGAACATTCCAACTCC\n") - f.write("TACACTGGAACATTTCAACCCCAAGCGCTGCCGCACTCAAGATCGTCAAGTTCATAGACAAATCCTTCTA\n") - f.write("CAACCCACATGCCATCTATTACAAAGGTCCCAAGATAAAAGGGGTACATAGTACAAATTCTATGAACCAG\n") - f.write("AAGACCTGTTACTGTGCTTCACTGGATTCGCGCTTTGATGCTTCCTTGATTTCGTCCCCGGCATCATCCG\n") - f.write("TGATATC\n") - f.write("\n") - f.write(">gi|226792226|gb|GO545774.1|GO545774 Mdas9004C13_e265.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_172536.1| expressed protein [Arabidopsis thaliana] gb|AAN41360.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTTTTTGAAAAATCAATCATAACGTTGTTATGTGCAAGTCTAAATGCCATAGCTGCTTCAGAAGAAC\n") - f.write("AATCATCTAGGGCAGCTAATTACAAATGGAAGCCTGAACCATCAAAATTTATCCTTTATCCAGGCCAAAA\n") - f.write("AAATCATTAATCTGTGGTGCCGCGGTGGCACCATGTAGCCTGGGATTTACAAACGCAGTTCTCATCCGCT\n") - f.write("TCCTCATCTCTCGTTCTGATAAGACTCCGGAAATGCTCTTGACAACAGCAAGTTCTTTATCCTAATAATT\n") - f.write("AGAGCGAAGACGCCGTAGCATGGGAGATGCATCAATCCAACCCCAAGATACCATAAGGGAGCATACGAGG\n") - f.write("ATGACAGGTCGAGAAATGGATCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226792225|gb|GO545773.1|GO545773 Mdas9003L22_e223.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P12372|PSAD_LYCES Photosystem I reaction center subunit II, chloroplast precursor (Photosystem I 20 kDa subunit) (PSI-D) pir|S00449 photosystem I chain II precursor - tomato gb|AAA34185.1| photosystem I subunit II protein precursor prf|160151, mRNA sequence\n") - f.write("CACAAACCAACCACAACTCTCTCTCTCTCTCTTTCCTCTCTCTAGTTCCTCATGGCCATGGCAACACAAG\n") - f.write("CCAGCCTCTTCACCCCAACTCCCTCAGCCCCAAAGACCACTGCTGACCGTACAACCACCCCATGGAAGCA\n") - f.write("ATCAGTCTCCTCCTCCTTCATGGCCCCCAAGCCACTCAAGCTCTCCACCGCAAGAACGATGAGGATCAAT\n") - f.write("GCCTCCGCTGAGGAAAAAACCGTGACCCCCACAAAGGAGGCCCCGGTAGGCTTCACCCCACCCGAGCTGG\n") - f.write("ACCCAACAACACCCTCACCGATCTTCGGTGGCAGCACTGGCGGGCTGTTGAGGAAAGCGCAGGAGGAAGA\n") - f.write("GTTTTACGTGATCACATGGGAGTCACCAAAGGAGCAGATATTTGAGATGCCGACTGGCGGAGCTGCCATC\n") - f.write("ATGAGGGAGGGTCCTAACTTGCTGAAACTGGCCAGGAAAGAGCAGTGCTTGGCTCTTGGAACTAGGCTTA\n") - f.write("GGTCCAAGTACAAGATTAAGTACCAGTTTTACAGGGTTTTCCCTAACGGGGAGGTCCAATACTTGCACCC\n") - f.write("CAAGGATGGTGTGTACCCTGAGAAGGTGAACCCTGGGCGCCAAGGGGTTGGCCAGAACT\n") - f.write("\n") - f.write(">gi|226792224|gb|GO545772.1|GO545772 Mdas9004G04_e279.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P46280|EFT2_SOYBN ELONGATION FACTOR TU, CHLOROPLAST PRECURSOR (EF-TU) pir|S60659 translation elongation factor EF-Tu precursor, chloroplast - soybean emb|CAA61444.1| EF-Tu protein [Glycine max], mRNA sequence\n") - f.write("TTTTTTTTTGGGATGATGAAACCTACAAAATATATAACATTTCGTTGTGGAAATTGCTTGGAAATGTCAA\n") - f.write("AAGCTAGATAAAATTGAAGCTAAGAGAGTTCAAGTTTGACACCAAATTGCAAAAATAAGTTCAGACCACC\n") - f.write("ACATAATTTCTATTTTCTATCACTCGATAATGGATTGAATAACGCCAGCTCCAACAGTCTTCCCTCCTTC\n") - f.write("TCTGATAGCAAACCTCATCCCCTGCTCACACGCCACCGGAACAATCAGCTCGACAACAATCTTAACGCGA\n") - f.write("TCGCCAGGCATCACCATCTTCGACTCCTCATCCTTGTCGTTCATAATCGAAGTCACCTTCCCGGTGACAT\n") - f.write("CAGTGGTCCTCATGTAAAACTGAGGCCTGTAACCCGAAAAGAACGGCGAATGCCTCCCCCCCTCTTCCTT\n") - f.write("CTTCAGCACATACACAATGGCCTCGAATTTGGTATGCGGGGTAATAGTCCCGGGTTTGGCCAAAACCATC\n") - f.write("CCTCTCTGGATGTCAATCTTCTGAATACCCCTAAGCAACAACCCCACATTGTCACCAGCCATGGCATCAT\n") - f.write("CAAGAATTTTCTGAGACATTTCGACACCGGTGACAGTTGTGTTCCTGGTGTCCTTCAAGCCAACAATGTC\n") - f.write("GACGGTGTCTCCGACCT\n") - f.write("\n") - f.write(">gi|226791953|gb|GO546029.1|GO546029 Mdas9010L01_e775.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAA04664.1| hypothetical protein [Citrus x paradisi], mRNA sequence\n") - f.write("GTCGATCTGAGTAGAGTTTGCAAGATGGAGAGCTTGAACAGCTTTTGGCAATTGGGTGATGAGCTCCGAG\n") - f.write("GGCAGTCAAAAGTCGCAGAAGATCACAAATGGTTAATGGCTGCTTCAAAATTGGCTGAGCAGACAAGGGT\n") - f.write("AAAGGGCGAGCGTATGAATAACCTTGATCTTTCAAAGGGCCCAGCTGAACAAAGGGCAAGGGATAAATTT\n") - f.write("GGGTTCCAGGAAGATAACAAATTTGAAGGCCAATACTTTAACATGCTGAGCTTGGATTCTAAAGTAAATG\n") - f.write("AAAATGTGAGCAAAAGTTCCTTCACGAATGGTATTTATAACATGAATGCAGTTTACCAGAAGAACAATGC\n") - f.write("AAGCATTGTGGGAAACATGACTGG\n") - f.write("\n") - f.write(">gi|226791952|gb|GO546028.1|GO546028 Mdas9011A09_e811.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_177142.1| expressed protein [Arabidopsis thaliana] ref|NP_849870.1| expressed protein [Arabidopsis thaliana] pir|F96720 unknown protein, 58197-59415 [imported] - Arabidopsis thaliana gb|AAG52556.1|AC010675_4 unknown protein; 58197-59415 [A, mRNA sequence\n") - f.write("GCAAATATTTTGATTAACTTGTAGACTATATTACAAAGTTGAAGCATCACCCTTCAGTTTCAAAAAGCCA\n") - f.write("AAACAAGCATAGAAAACTCACACTGAAAATTATATACGTGACGATACGTCCAATAATTCAACAATAAAAT\n") - f.write("TAACCACGCTGTTTTAGATCCTAAATCACACCCAAATTCGAGTCTAGCGTAACCGCGCTAACAAACCATC\n") - f.write("GGAAGCTTAAAGATGCCGAGCCTCCCACTACTTACCATGTAACGGCATTACACCCGACTTCCTTGAAGGA\n") - f.write("GGCCGTCTCTTATCTGTGAAGAGATATCTTTCACAGCACCGGGTCCGTGCGGGATGAAAACCGAGTTGGA\n") - f.write("CTTTGAGGACGCACCGACGTCCTTCAATGTGTCGAAGTACTGGGTCACCATAA\n") - f.write("\n") - f.write(">gi|226791951|gb|GO546027.1|GO546027 Mdas9010I01_e756.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_564093.1| expressed protein [Arabidopsis thaliana] gb|AAK59521.1| unknown protein [Arabidopsis thaliana] gb|AAL15188.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTTTTTTGCAGAGACCAACAAGTGCCATTACATTGCTAGTAAGTTTTACCTATGCTCTATACATACA\n") - f.write("TATAATTATTTACTATATTCACAAGCTTTTTACATCATATATTCATATATATATATATATAAGAAAAGCG\n") - f.write("GAGATGCGCGGCGGATTGCGGGTGTAGCCGATGAGCTGATCAAACTTCCAAGAATGCTGTTTATGTACAG\n") - f.write("TTTTAAATCCCTACGGGCGATCCGTTCATGCCAAGTTCTTTCTAACACGAAGTTGGAAGAACTTGGCCCT\n") - f.write("CCATTGAGCAGCATCTCTATAGCGTTCCTCCACTGCCGCAATAAGCATGGTGCGCACAAGAATAAACTCC\n") - f.write("TTCCTTTCAACACATGGTTGACCATTTGGCCGATCCAGTTGAGTAAATAATAATCCATCTGAAGGGCCTT\n") - f.write("GAGTGGAAAACTTTCCAGATTCGATAACTTTCATTCCATCACTGTATGCCAGGTACTTGTTGACCTGTAT\n") - f.write("GGGAACCTTGCACCGTACAGCAATGTTGATGGCGTCTGAAGGTCGAAGGTCGAAGCTGACACACTCTGTC\n") - f.write("TCACTACCTGCCTTTCTGAGGTATAGCTGAGCAAAATACGCCTCAT\n") - f.write("\n") - f.write(">gi|226791950|gb|GO546026.1|GO546026 Mdas9011A16_e813.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTTTTTTTTTTTTTTTTTTTTGACATAAACCTTATTCTTCATAAAGCAAAAATAACAAAATACAAGAAA\n") - f.write("AGAGGGAGGGATATACACCTTACCCCTCTTGAACAAGAAAATACAAGAAAAAGAGAAGGGGAGACATAGA\n") - f.write("CTTTACCCCTATTAAAAAATAAAATACAATAAAAAGAGAGGGGCGAACATAGGACCAAACCCCTCTAAAA\n") - f.write("TAAAACTTAAAAGGTAAGAACCTGCAAGACAAGATATGGTTCGATCGGTTTAATGCGCACCCCTATTTTT\n") - f.write("TTTTTCCATGCCCCGGAAAAGGTACGGTCTTATAGTGCACCGGTTCCTCTTCCTTTGCCACAACATGCAG\n") - f.write("ATGCGACCTACTGATGATCTTTTCTGCTACATTGTGCTTTGGTTTTTCAGAAACATTGGCAAAGACTAAT\n") - f.write("TTGGATGAGAACAATAGCAATAGGATGATGGAAATCATGTAAACAAAGTTGGCCTTGTGAACCATATTGT\n") - f.write("ATGATCAGCCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791949|gb|GO546025.1|GO546025 Mdas9010N10_e788.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P54260|GCST_SOLTU Aminomethyltransferase, mitochondrial precursor (Glycine cleavage system T protein) (GCVT) pir|S59948 aminomethyltransferase (EC 2.1.2.10) precursor - potato emb|CAA81081.1| T-protein [Solanum tuberosum], mRNA sequence\n") - f.write("AAGCCGCCCTCTGCCCTTCTTCTCTTCCCTATGGCCCATGTGAGTCCGTCCTCCACAGGTGTTGTGAGTT\n") - f.write("GCTCCATGTCATTGACGTATAAGCACAACCCAGCTTCCAGTCGGAGACTGTTACGAACACCCAGACCTGG\n") - f.write("CAGCCTCACCTTCCCCTCGGATTTCTCACAGATTGCCTTGGCAAGATCCACTGC\n") - f.write("\n") - f.write(">gi|226791948|gb|GO546024.1|GO546024 Mdas9010P08_e806.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_114288.1| cytochrome b6/f complex subunit IV [Triticum aestivum] sp|P12119|PETD_WHEAT Cytochrome B6-F complex subunit 4 (17 kDa polypeptide) dbj|BAB47064.1| sytochrome b/f complex subunit 4 [Triticum aestivum], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTCTAAAGTTTTGTCTAAACAGATAAAAATTCGTTTTGAATCTTGAATC\n") - f.write("TTTAGCACAATCCATATATTCCGAATAGGTCCAAAATTAAATTGAATAGATGTATCTAGGGAAAATTCAC\n") - f.write("TTTGAAGCGACTATTCCCTAGATACACACGTCATAATATACTTTTTTTTATTTCACAGTTGAATCGATTT\n") - f.write("AAAAAAGACCTAAAGTTAAGGACTTATCAATAGGTAGTGTTGCTCCAATACCCAACCAAAGGGCTACTAC\n") - f.write("AGTACCAATCAAAAAGACGGTTGTCGCTACGGGACGACGAAATGGATTTTGGAATTTATTAACATTCTCC\n") - f.write("AAAAAAGGTACTGTTAATAATCCCGCAGGTACTGAAACCATTAAAAGAACGCCCAATAACTTATTTGGCA\n") - f.write("CTGTACGAAGTATTTGAAATACGGGAAAGAAATACCATTCAGGCAATATTTCCAAAGGAGTTGCAAATGG\n") - f.write("ATCCGCGGGTTCACCAATCATTGATGGTTCTAGAACCGCTAAGCCTACGTTACATGCAATAGTACCTAAA\n") - f.write("ATTACTACTGGAAAAATATATAAAAGATCGTTTGGCCATGCGGGCTCTCCGTAATAATTATGACCCATCC\n") - f.write("CTTTAGCCAATTTAGCTCTTAACACAGGATCGTTCAAGTCAGGTTTTTTTGTTACTCCCATAATCCAT\n") - f.write("\n") - f.write(">gi|226791921|gb|GO546023.1|GO546023 Mdas9007M09_e535.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCCAGTTTTTTTTTTTTTTTTTTTTTGGATATCAAGGAGAATTAAATTTATTCAATTTCCATACAAAGAA\n") - f.write("AATGAATTAATGCAGGAACAACATTTGTGCACATTGAGAATGAAATTAACTAATTAACTTACTATCCCCT\n") - f.write("ACTGCAAATTTCACAACTATGCTCAAAACAGATTTTGTAACCAATATCCCAAAAAAGGGGCAAAAGGAAA\n") - f.write("GAAAAAAAAACTTTTGTATGGTATACTGCAGCGTTATCTGTGTAACCGAACTCAACCTAGCTACCTCCTT\n") - f.write("GTACACTTGGAAAACCCAAACCAACAGTAGCCTTCGAACTCCTGTTAAATCTCAGATTTATCAGCGGAGT\n") - f.write("AAACCAAATGCAAAATCCGCTCTTAACCGACAAAGCCTCGACAAACTGAGTTAAATAAAAAAAAAATACG\n") - f.write("ATGATCTGAACAAAGATCGCACCCAGACCGCACCAAAAATTGGTCGGAAAGCCGATGACAATTGGGGAGA\n") - f.write("AACGATTTACAGAGATGCAAAACTTGTCTCAAAACAGAGAACCCAAACAAACTCCCCAGAAAAAAATAAC\n") - f.write("ACGACGATCTTGGCGATGAATATTTTGCAAGACGAAAAACACAGGATTTCAAACGAGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791920|gb|GO546022.1|GO546022 Mdas9007I12_e518.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTGGACGTAAATTCTAAATTGCATTACAAAAGGCGCTACTTTTTACACT\n") - f.write("TCTACAGCGGTTAGCAGCTTTAATTACCCTCCAAAAACTCCAACTCATGAAAAACCCAACGCCTAAAAAT\n") - f.write("TGGCCAAAATCTTTGATTAGAGAAACCTTTTCCTTCCTCTTTTGCTATTTAACTCTTTACTACTTCTCCA\n") - f.write("TTCACTTGGCTCTTGTGGACGAGAAATTTTCATTGTGCCGGAAACATAACCCGATATACCAAATGTCATA\n") - f.write("ATACAATTGATTGAAAGCTTAAAAAAAAATCCCAACCAATGGTATTACAACACTTGGTGTACCAAATCGT\n") - f.write("GTTCCCGACCGACTGAAAAACCTCCCGTTGTTGAAGGGGGTTTTTTTCTCTTTCTTTTTTTTCACCCCTC\n") - f.write("CTTACTTTTTACCTTTTTTTTCTTTGTTACTTCATTTCTCTCCCAATTTGAAGAGGGAAAATTACAAGTG\n") - f.write("AAACAAAAAATAACCGACTCAAAGTTACCGACTAGAAAGTAGTTAAGTGAGCAGGGGAAAGAGTGTGTCA\n") - f.write("CACCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791919|gb|GO546021.1|GO546021 Mdas9008A08_e556.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P58419|NU4C_OENHO NAD(P)H-quinone oxidoreductase chain 4, chloroplast (NAD(P)H dehydrogenase, chain 4) (NADH-plastoquinone oxidoreductase chain 4), mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTATAATCATTCCAATAGCCATTACAAAAGTAAAATATTTGTCACTA\n") - f.write("TAAGATATTTTTTACTAGCATGTATTCCAAAAATAAAACACTAACAATTCATCAACAAAACAGCTCATGC\n") - f.write("CCGGTGATGCAAGAGAAGGCATTGGCATAATAGTTATCAAACGGGTCCTAAAGATTTCAGACGCTTTTTT\n") - f.write("GTTTGATGGAACACGAAATTATACCAACTATTCCACATATCACTTGTCAAACTACACTAAACTGGCTTGT\n") - f.write("CACAGTGTGTATGCGGTAATGCTGCAAACCGCGACATAACTCGGGAACTGAAACCACTTCCCTATATACA\n") - f.write("GCAAATGCTAGTTCACAGTAACTGTAATCTAGCTCGTGTATCCACTCGTGAAAAATAATAGGAAATGTCT\n") - f.write("ATGTAGTTCCATGAACCAGCATAACTTAATGAAATGAAACTCACTGGCTATCAGGCTTGCCCGAGCATTT\n") - f.write("CTTGCTCCAAACAACCGAACCTGCTTTTCGCTCCCCTCGCTTTCTCTTGTTTCTATGAAAACTTACATCT\n") - f.write("GAAGCCAAGCATGACTCTAGAGTCGATTGCTCCTGAGGAAGAGCAGCTT\n") - f.write("\n") - f.write(">gi|226791918|gb|GO546020.1|GO546020 Mdas9007K24_e528.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_564931.1| expressed protein [Arabidopsis thaliana] pir|A96709 unknown protein, 35272-36292 [imported] - Arabidopsis thaliana gb|AAG52398.1|AC011915_12 unknown protein; 35272-36292 [Arabidopsis thaliana] gb|AAM65154.1| unknown [Arabidopsis, mRNA sequence\n") - f.write("TCCAGCTTTTTTTTTTTTTTTTTTTTTGACTAAAACACAAATGCTTTATTTACACACTAAACGACTGTTT\n") - f.write("AAATCGACTCAACAGCGGCTTCAATTTATACAGATTCGCTTCGCAAGCCGCTCTATGAAACTCCGAAACA\n") - f.write("CCTAAATGGAAACACAACGAAAATTTTCCCACAGTACTCTTTTTACTTTTTACAATACAAGAGGGTCGTA\n") - f.write("AACACTCCTAGACACGATCGATGAACCCGTTTATTATTTGGTTAACATCTCATTTACTTACACGACACTG\n") - f.write("ACATTTCATTTACACAAATCCCTCGTACATATATACACAAGGTAGAAGGATCGTCGTAGTCTTCTCGTGC\n") - f.write("TGAAAATTTCAAAATTTCCTCACAGTGCTATCAAAGTAATGTGGTTGGCTATATCAAACTTTAAGCTCAG\n") - f.write("TCTGTTCCTATGTACCTTCATCGATTGAATTGGGATCTCATGCCAGGGTAGGGATACTGCAATTACGTCG\n") - f.write("ATCCCTGTCCAGGCAATCAAACCCCTCGATCCTCACTGCTGGTTTGCTCCCGAAACTGGCCCGAGCATAG\n") - f.write("CCTCCTTTCCTAGCAGATGATGAAGGAGACGATGAAAGGCCCATTGCGGGAGCGGTTGGCGACAGTGGTG\n") - f.write("ACAATGG\n") - f.write("\n") - f.write(">gi|226791917|gb|GO546019.1|GO546019 Mdas9008A19_e559.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTGAAACCATGTACTTCAAACCGAATTTAAAGTTATTGAAAAGGAACA\n") - f.write("CTACCAATCTAACTGATTGGATATAAATTGACCGTGTTTACAGTTGAAACGTTGCAAAAAGGATGATAAG\n") - f.write("TTTTACTTCCAAGACAACTACTCTTTTTTTGATAAACATGCTCCCTA\n") - f.write("\n") - f.write(">gi|226791916|gb|GO546018.1|GO546018 Mdas9007E02_e499.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P29790|ATPG_TOBAC ATP synthase gamma chain, chloroplast precursor pir|PWNTG H+-transporting two-sector ATPase (EC 3.6.3.14) gamma chain precursor, chloroplast - common tobacco emb|CAA45152.1| ATP synthase (gamma subunit) [Nicotiana tabacum], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTCCTTAATTACATGATTAGCAGTATTATTAACTATGATGGAAAAA\n") - f.write("AATGACATACAATATATATATATGTGGAATGCTCTCTTTTGTTCTCATTTGTGCACACAAAGTTTATTGG\n") - f.write("CAGCTGAAAATTACAATGGAAGTGTTAAGTCTGCAAATTTAAAACAATTGGCACAACTCAAAGTGGAGGA\n") - f.write("AAAAACAAGTACCATGTTGTTCTTTGAATCGTCTAAACTAAAGCGTTGGCACCCGAAACAATCTCCAAAA\n") - f.write("TCTCTCCGGTGATTTTTGCCTGGCGCTGCCGATTGTAGGTCTGCGACAAGGTTTTCTTCAGCTCCACAGC\n") - f.write("GTTATCGGTTGCATTGCTCATAGCGGTCATCCTAGCAGCGAGCTCACTGGCTAGAGACTCCTGCAATGCC\n") - f.write("CTCAAAATCTGACTGTTGAGGTACAAGGGCAGCAAAGCGTCGAGAATCTGAACCGGATCCTGCTCAAACT\n") - f.write("GCAAAACCGGTGTGAAATCCACGGTCTGCGTCTTGATGACATCTCTCTCCACTGTCAATTTCCCTTCCTT\n") - f.write("TGTTGTCAGCCTGAAGAACTCATCATCCACGGCGTCCACACAAACGCCATTGATGTCGCAAATCTCTCCT\n") - f.write("TTTGGCGAGAGTGGAAGCAGGGTGTGA\n") - f.write("\n") - f.write(">gi|226791894|gb|GO546017.1|GO546017 Mdas9007P13_e550.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB62573.1| P0454H12.5 [Oryza sativa (japonica cultivar-group)] dbj|BAB64826.1| B1148D12.11 [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTCTGCAAGTAAAGGGCTTAGATATTGACTCGAACTCTTCACGAAAA\n") - f.write("ATGGCGACGTATATGAGTTTTCGAGTTTAATCTCCAGTAGTAACAAAAAATATGACATAAAAGTTTATGA\n") - f.write("GAACCAAAAATTTATGTACCAACAAGTTATCCACATCTAGTGATTGTTCCATTCGCCTTTCACATTCAAT\n") - f.write("CAAGAAACAAGGGAAGAAGACGTACAAGTAATGTGCATTTGGGTAGGTCGAATTCCCGACTTGCAAGATA\n") - f.write("GAAGGATCAATCACTGTGTTTGGCCTTTGAAACAGTTTTGATCAAACCTTTGGTAATCTTCCAAAACCAA\n") - f.write("ATGAGATTCATCATAGACAACATTGGAGGCACCAGGAGCAAGCTGTAAAATCCTAAAGGGAAGATTGTCT\n") - f.write("TGACTTGATCGAAATGGATAAACATGTGGACGAAAAAGTAAATGAACAAAAGAATCCTTGCAACCAGCCG\n") - f.write("CCCGAGGAACAATGCTACGCCATTGAAGATGTACAGGTTAGAGTTCTTCTTACCAGCAACATCCAAATAC\n") - f.write("CATCTCATGTTTACGAGAGGAGTCGTGCTCT\n") - f.write("\n") - f.write(">gi|226791893|gb|GO546016.1|GO546016 Mdas9007K23_e527.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB03143.1| ankyrin-like protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TACCCTCTGATGGAGTTTATACATATTACCTTCTGATGGGATTTATACAGATATCTCCGAAAGATAATAA\n") - f.write("AAATAAATTACATCATTCGGGTGGGGTGTTTATTCTTTGCTTTTGCAGTGTTTTTCTGCTGCCCTCTTCC\n") - f.write("TGTATCTTCACCTGGTAGACAGAAGGAATCATAATGATATGAAAATCTTTGATTTTCTTCTTTTCTCTTT\n") - f.write("TCCTTTTCCTTTTCACTTTTGCTTTGCACCTCTGTCTTTGCTTTTCTCCAGACATCATTGTTTTTCTCTT\n") - f.write("TGTCTTTGTCTCGGCATGGGTCAGGCAAATTATTGAAACAAATGGGAAATTATAATCTTTCCCTTGAAGC\n") - f.write("TCCGTGTTGTTATGCCGCCCCGTCACCTGCTTCATGTACTTCTTCTTCTTCGCCTGGTCAATCCGTTTTC\n") - f.write("CTGAGTTAGAAAACACTAAAGTCGGCGAGTTGGAAGTCAGAGACGACTGGCGAGTTGGAAGTCAGAGACG\n") - f.write("ACGTGGAAGTCAGAGACGACGAATTGGAAAGCACCAAAGCCGACGCTGACGCAATGGAAGATGGAGACGG\n") - f.write("CGATGACGATGGCTCGGCCAGAGGGTTTTGGCTCGGCTGTGCAGATGAAAGATGGTAGTGCAGAGCCCGT\n") - f.write("TTGTGATGGCTGGTAATGCAAAGCACATTCCTC\n") - f.write("\n") - f.write(">gi|226791892|gb|GO546015.1|GO546015 Mdas9007L11_e530.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB92380.1| P0512C01.35 [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTGGTGAAACTGAACTGTATTGGGAATATACTTCTACAATCTAAACACC\n") - f.write("TTCTGTAATTTATTGGGGAAGCAAATCAAATACAACGAAGGGGAAGAAAAAATGCACAACAGGGAACTTT\n") - f.write("GCTAGTGGCAGTTTCATCTCTCACACCAACAAGGTCCGACCTCGGCCATTCTGACCAGTTATCGACTTGA\n") - f.write("AAGTGGCTTACAAAAGCTCCAAAACAAGGGCAGATGCACCTCCTCCACCGTTGCAAATACCAGCAGCACC\n") - f.write("GTATTTCCCGTTCTTCTGCTTCAGTACCCCTAAAAGGGTGACCAAGATACGAGCTCCACTGCAACCAAGA\n") - f.write("GGATGTCCCCCTCGTGCCGAATTCGGCACGAGGGTCATCCCTCTCCCAAACCCTAAAGCGCTACCTCAAG\n") - f.write("AAACCGTGGGAGATAACCGGTCCGTGCGCGGATCCGGAGTACAAATTGGCCGTACCCGGCGCGCTCGAGT\n") - f.write("ACCGGCTCGAGTGCCCTGCCACCACCAAGGTCCAGGCGTGCGTGCCCACCTCCAACCCGGAAACCGTTTA\n") - f.write("CGATATCAAGTATTTCGCCCGCGACCAGCGGCGCAAC\n") - f.write("\n") - f.write(">gi|226791891|gb|GO546014.1|GO546014 Mdas9008E07_e580.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_191320.1| adenylosuccinate synthetase [Arabidopsis thaliana] sp|Q96529|PURA_ARATH Adenylosuccinate synthetase, chloroplast precursor (IMP--aspartate ligase) (AdSS) (AMPSase) pir|T06759 adenylosuccinate synthase (EC 6.3.4.4) - Arabidopsis th, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTAAATGGAGACAAAAGTGAATGCGATTGATTTTCTCACACATAAATTG\n") - f.write("TCATATTAATGTGAGATGAGCCTAGGCATCTGTTACCTGCTATCAAATAATAGTATAGCCCATAACTAAT\n") - f.write("ACATGAAAACTACGATAACGACCGACCCCGAGTACTCTGACCTCAATTCATTCCAACAAATTCATACCCT\n") - f.write("AGTAATCCATGCATTACAACAAGCAATCCTAAACACAACATAGATTTCTTGTGGCCGCCTCGGGTGGAAA\n") - f.write("ATGTGTTGAATCAAACTGTGTCTTGAAAATTACTTCTAATCCGAAAAATCAACTCAAGTATCAAGGGAAA\n") - f.write("ATCGAGATTACTATTTGTAAATCAGTGCATCGCGTCCAGGCCCTACACCAATGTAATGGACGGGTACACC\n") - f.write("AAGAAGTTCTTCTATCGTTTCCACATATTGACGTGCGGCCTGCGGAAGGTCGGAGTAATTTCTGACAGAA\n") - f.write("GAAATATCCGATTCCCATCCGGGCAATTCTTCATAGTCGACCTCTAATTGCTCAAGATCACGAAGGTCTG\n") - f.write("AAGGGAACGATTTAATCGGTGTACCATCCTTCTGTTTATAGGCAACACCCAACTGAATTTTGGGAAGATC\n") - f.write("AGACAAAACATCAAGTTTGGTGAGATTGAGAGAAGAAAATCCATTGATCTGACAGCAA\n") - f.write("\n") - f.write(">gi|226791890|gb|GO546013.1|GO546013 Mdas9008A06_e555.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_173710.1| expressed protein [Arabidopsis thaliana] pir|G86363 F19G10.8 protein - Arabidopsis thaliana gb|AAB72164.1| unknown protein [Arabidopsis thaliana] gb|AAL49950.1| At1g22970/F19G10_8 [Arabidopsis thaliana] gb|AAM70551.1| At1g22970/, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTTACCGGAAAAACATAGCTTTATTGATTGATATTTCAGAAACA\n") - f.write("AAACAAAGAGGTCTTTATATAGAGAAAGACCAAAGCAAACCCTAGACTAAAACCCTAAACCAAATCCTAA\n") - f.write("ACAAACTCAAATACTTAATGGGCAAGTAACTAATCCTTGGCCCGCAAGATAACAACCTAAATTCCAAACT\n") - f.write("AAATTCCAACAATATGGACACAAGAAGCCAGCATAGGCCCTGCACCGACTGTACTCCCATGTGATAGCAG\n") - f.write("AAGAAAACCTTGCAGGGTATTGAAGTAGGAAACCATGTTTTCTTCAAGTGCTCTAACTTGCGGTGTTTCT\n") - f.write("CCTCCGGTCCAAAGCATTCCAAAAATGGTGGCTTGTTTGGAGACTTGATCGCCCATCTTTGTGACCTCCA\n") - f.write("CCCAGCTCACCTTCTCAAGTTTTGAAGGTGCAGTTTGATCCAACACCTGGAAGGTCTCGTGGACGGTGTT\n") - f.write("GAGGTGGTGATTGAGAGTTCGAGTCAGTTGGTCACTCTCGGCTCGGCCCATCGCTCGGCGTTTGACGGAA\n") - f.write("GGAATTGCAGCTGCAGCAGTGGCGTTTGACGAGGGACTCGGCCCACTGTCCATCACTCGGATTTTTTTTT\n") - f.write("ACTATTTTTCCACGACTGGCAATGAAGTGAGAGATGGCC\n") - f.write("\n") - f.write(">gi|226791860|gb|GO546012.1|GO546012 Mdas9007F15_e504.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTAAAACTTCGTAACTCATTCTATTTGTGCATATTTCTTGCAAAGCTAT\n") - f.write("GAACAACAGCAGAGATTTCGTGTATGCACCAAG\n") - f.write("\n") - f.write(">gi|226791859|gb|GO546011.1|GO546011 Mdas9007L14_e531.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_187796.1| amino acid transporter family [Arabidopsis thaliana] gb|AAF23206.1|AC016795_19 putative amino acid transporter protein [Arabidopsis thaliana] gb|AAP37661.1| At3g11900 [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTGATTTCTGCATCATTGGGCTGAAGAATATTATACACTGGTGGTAC\n") - f.write("ATCGAATATTGGAATCCAGATACAGATTAAAGTTCTTCTGCTTCCCCATTGTACAGTTTGTATTCATGTA\n") - f.write("TCTCTTACTTGTAAAGTACTAATTACGACAAGCGATTCAAACGCCAACTACGGCATTGTAAGCGCCGTAT\n") - f.write("GCGGCAAAAAGCATTCCGCAAAATAATATGCAACAATCCAAGGTTCTTTGCCACAATTTTAGGGAAGAAC\n") - f.write("CCAACAATGCTAAGTGAAATGAAGCAGGTAGAACAAATGAGATCAGTGCACATACAGTACTTCCCACGAG\n") - f.write("CGACGCAAATACTGCAAACCCTGGAACGCACGAGGCCAAGACGGCCAGTATGATCACCACTATTGCTCGA\n") - f.write("CTCACGTACACTGCAAACCTTCCTAGAGGCGGCATCGTTGAATAATCGTCATTGTCATCATGAGCTTTCT\n") - f.write("GAAACCATTTGCCTTTTACCAACTTCCCTTCTATGATCTCGTTTATCGGGTGTAGCATGATATGGGAGGG\n") - f.write("TAAATATTAGCCCCAAGCACAAACCAATCTGAACCACCATGGCAGACCAA\n") - f.write("\n") - f.write(">gi|226791858|gb|GO546010.1|GO546010 Mdas9007E17_e501.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_191612.1| GATA zinc finger protein [Arabidopsis thaliana] pir|T47864 GATA transcription factor 4 - Arabidopsis thaliana emb|CAA74002.1| homologous to GATA-binding transcription factors [Arabidopsis thaliana] emb|CAB81839.1| GATA transcript, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTAATTAATACATTTTCTTTTAATTAATTAAATATCCAAAATAGTAATG\n") - f.write("GAAATTAATTTCAACTTTTTTCTTTGCCTCAAAAACAAAAAATTCCCATTGTTTATGGTCAAATTTCCAC\n") - f.write("ACGGCCCATCAACTGAGAGACCGCCACGTGTTCCACACGTCAGCAAACTCGAAACTCGTCGCGGTGAAGA\n") - f.write("TAGAATTGCTGCTGATGCGGACGGTGTTGCTGTGGTTGCTCTTGCGGTGCCTGCTGCTGAGAAGCCTCCT\n") - f.write("TCTGGCGACGGAGCTCCAGAACCTTCCGGTGCGAGTTCGAGTGCTGAGTCAGCACGAAGGTCGGACTCGC\n") - f.write("CGCCGGCCGGTACTCGGGCACGAGCCGACCCGACTTGTACCGGACCCCGCACGCGTTGCAGAGAGTCTTG\n") - f.write("GGGCCCATCGGTCCTGCCCGCCACTGCGGCGTCTTCTCCGAGGCGCAGTGCGTGCACCTGAGCGGTGAAG\n") - f.write("GGGACGCTTCCGACGGCTCTCTCTTAGGCCTGTTGGGCCTC\n") - f.write("\n") - f.write(">gi|226791857|gb|GO546009.1|GO546009 Mdas9008C11_e569.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("AAATTTCTTACAAAAAAGTTGGCATCAAATAATCCATGTCCCACCCACATTTGGGTGATTTTAAAAAACT\n") - f.write("TATTTTAATTATCTTTTTACAAAAGATAAGCTTAATAACCAATACACAGCAATACATGGTTCTGTTCATT\n") - f.write("GAGTTCCCCATTTGCCTGCTACTTAAAATACGTTTGAAGATCCAACCCCAAGACCAAAAATACCAAACGA\n") - f.write("AATCGGAAAAAAAGCAAATTACCTGTCAACAACTACCGGTAAGCATATAGCAGTAGGTACCCCATGGCTT\n") - f.write("CAATTGTTCAACATTCCTCCACCCCCCCATCGTTTTGTTCCATCATTGTCCCCCTAATTTTTCAACAGAA\n") - f.write("AACTAATGCAGGAACCCCGCTGTATTCGGCTACATCAAATCTACATCGGCAGCACAATGTAAATAACAGG\n") - f.write("GGAGGGAGAAGGCCAACAAATTTCTGAAGAAATGAATTCCTCTCAATTTACAGGCCT\n") - f.write("\n") - f.write(">gi|226791856|gb|GO546008.1|GO546008 Mdas9007G24_e510.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_568305.1| mannosyltransferase, putative [Arabidopsis thaliana] gb|AAK91470.1| AT5g14850/T9L3_150 [Arabidopsis thaliana] gb|AAM52236.1| AT5g14850/T9L3_150 [Arabidopsis thaliana], mRNA sequence\n") - f.write("ACATTTTCAAACCACATCACTAAAGGGGTCAAAACTGATATCATGTTTAACCATGACAGCCTATCCATGC\n") - f.write("TCACAGGCTGTATCCAGTTGAAAAATCAAGTTTAACACAACATAAATAGACACACAAAAACGCAGGAAAA\n") - f.write("ACAACAAAAACAAAGCATTTAATAAAATCCGGGGTTAATGAATGGAAAAAAATTTGTTACATACTAATTG\n") - f.write("GCGGTCTGAAATTGGAATCTGCTGAAGCGGGCATGTCTCCAATAAACCCCCATGCTCTGGCCATTCACTC\n") - f.write("CATTCATCAAGACATAAAGTCAATAAATTCTTCCAGCCCATCATACTGGAATGGTTCTAGTTGCACATAC\n") - f.write("AATAATCAGTCGCCTGTCAAACCGTACACAACAACTGACGCTTGCAAATCACGGTCCACTTTGAAGTGCG\n") - f.write("CATGGAAAAACCTTTTTATCTTTTTGAAAGAATGTGAAATCAAAAAAGCCCTCAACTGTTTTTCTTCCGA\n") - f.write("ATCAAATACTACAATGTGACTAGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791855|gb|GO546007.1|GO546007 Mdas9008D01_e573.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_191593.1| palmitoyl protein thioesterase precursor, putative [Arabidopsis thaliana] ref|NP_850728.1| palmitoyl protein thioesterase precursor, putative [Arabidopsis thaliana] pir|T49229 palmitoyl-protein thioesterase-like protein F27H5.130, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTGAAGTAATGGTCCGAAAGGTGGAAGATATAAAAGTACAATAAAGTTG\n") - f.write("GGAGTCATGTACATACTCTACAACGTAGAGTCTCCGTAAAATTACTTAACAAAGTACAACATCAATGATT\n") - f.write("ACGAAACAATATGGCAGCCGGCAGCCGGCAGCCGGCAGCCGGCAATCGTAAAAGGAATTGTCACTCACTC\n") - f.write("TCCCTTCTCCAATGGTGCAAAAAATAAACAGACATAACAAGTCCCGTTCGACTATAGTGCATAGAGTAGT\n") - f.write("AAGAATATACGTAGTAGCGATCATTTCAGAGCGTTCTTGTGTATATTTCGTGTTACCTTTAAACCCCGAA\n") - f.write("ACTCACATTACACGACCCTCAGCACCACCTTCTGATGCTCACCGAGTCCAACAAGATCCAAGTAGTTGTT\n") - f.write("CCAGATTAACGAAAGTGTCGTTGGAAATGAAGGCTCTGCTATCTTGTGCTGCGTTGAGTCTTCGTCTTCC\n") - f.write("AAGTATGGCACAATGTACGTTTTCATGTCGCTTCGAGAGATCTGCAGATGGCCCCCCGACACATTTATGA\n") - f.write("ACTTAACTTTTCCAGCTTCGTCCAAGGTTTTCAAACCAATCCAGTCTTCAGTGTACAACATGGTCTCTTG\n") - f.write("CGCAGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791828|gb|GO546006.1|GO546006 Mdas9008C21_e572.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB92996.1| chalcone synthase [Malus x domestica], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTCATAATTATCATCTATTTGTACTATCTCTTTAATAAAAATAGGATCC\n") - f.write("GCCAATATATAGGGATTCCATCTCTATTAATAAAACGATACAAATAAATGATGAAAATAGCATTTTTGCA\n") - f.write("GCAAATATTACTCCAAAGTTTCCCCAAGTCAAAGTAAAAAATCCAAAAACCCAAAGAAATAACCACAAGA\n") - f.write("TATACCGGTGGAAAATTTAATACAAATACCACGCAGATATTATAAATAGTTATTGAAACTTCACCTTCAA\n") - f.write("CCCGTTAAACCCACGCTGTGAAGCACGACGGTCTCCACCGTGAGGCCGGGCCCAAATCCGAAAAGCACAC\n") - f.write("CCCACTCCTTTCCCTCCCCGGTCGTTTTGAGTCCTTTCTCGGAGGACTTCCTCCTGACCTCGTCCGAAAT\n") - f.write("AAAAAGCACACAAGCACTCGACATGTTACCGTAATCCGACAACACTTGCCTTGTTGCTTCTAGTTTCTCC\n") - f.write("GGCTTCAATGCCAACTTGGCCTCTACTTGGT\n") - f.write("\n") - f.write(">gi|226791827|gb|GO546005.1|GO546005 Mdas9007H11_e512.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_850250.1| expressed protein [Arabidopsis thaliana] gb|AAL67023.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTCCATTTCTTTTATTGGAGCTACATATGAAGATTATGAATGATACACCTGAAAAAGATTTCTTTAAT\n") - f.write("ACACAACACTATTTCTCCCAGTCTTAATATGGATGCAGTCCTCTTCGTTTCTATTCATGTAGGTAAGTTA\n") - f.write("GGAACTAATTCGTTTGTCAAATACTTTCCCATGCTACTCGTCTTTGTATTTCCTTTGACAGATACATGTA\n") - f.write("CACGTATCGCTCGGTTTCTCTTACGGTGGTGGGTGATCGTCCTCACGGGTTTGAGGAGGGATAGGTCTCC\n") - f.write("AACCTGGTTTCTCATCCCAGTACATGTCATAGTATATGTGACCAGGAGTGTGTTCGGCCACACAACACCA\n") - f.write("TATACCAACATACCGCTTCGCACGATTCCTGAATTTCTCTTCCCTTGCCTTGTCGGTGAAACCTGGGAAT\n") - f.write("GCATCTTGCATAGAAGAAAACTGAATTACTTTTACATCCTTGAACGAGGAGAATACTGTCTTGAACGTTT\n") - f.write("CTTCGTTGCTTCTCTTTGGAAATCTGAGTGCTCCTGAAGGGGTGGTAGTATTTGAAGCAATGCAGTCTCG\n") - f.write("CGTTCCTTGTTGACAAAGCTGGACATCAAGCCATGATTCTTTCACCTGTTTTGGCATCAATGGATTGTCA\n") - f.write("AAGAATGAGTATTCTCTAATACCGATTGGGGGCCCAAATAT\n") - f.write("\n") - f.write(">gi|226791826|gb|GO546004.1|GO546004 Mdas9008B14_e564.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTTGCAGACAAAATATATAAAATATATGTATAGGTATAAAACAG\n") - f.write("AAATAGTAAACCCAGCATCCTACCAATTTATTTACATAAAAGGTTCAAACCATAATCATTGTCAATCGCC\n") - f.write("GTCACAAACAGTTTGCAGCAAACTCCACATTAGCCAACCGTTAAACGAGGAGAAAGATTGATTCCTTACC\n") - f.write("ACGCATCTTTATCAAAAGTTGCATTTATAGAATGACACACTTGGTTTCACCTCTTTTACGAAGCTCCTCC\n") - f.write("TCTGCTTTCATTAAATGTTCTCGAAGCTTTCGAATTTCATCATCAGACTTCAGCTGAGCCTGTTGAGCAT\n") - f.write("TCTCTTCTGCTCTCAGACGTGCAGCTTGTTCATCTGCCAACTGTTGCTCAAGCCTCATAGTGGTCTCTCT\n") - f.write("TAACTTTGACTCGACCATCTCAGTAATCCGCTTAAGCTGCTCGTCATGTGCACGCTGCATCTGATCCTTC\n") - f.write("AATTCAGATATTTCTCGTTTGGAATACCCCTTCAAGGAGTTGACCTCTTCTTGTTGATCACGAAGTTTAA\n") - f.write("TGGCCCCTTTCTTCACTTCAGCAAATATCTCATCTGTGTACGGTCGCCCACCATTTTGTGATATG\n") - f.write("\n") - f.write(">gi|226791825|gb|GO546003.1|GO546003 Mdas9007P19_e553.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_196647.1| CBS domain containing protein [Arabidopsis thaliana] sp|Q9LEV3|UMP3_ARATH Protein At5g10860, mitochondrial precursor pir|T50795 hypothetical protein T30N20_130 - Arabidopsis thaliana emb|CAB96841.1| putative protein [Arabidopsis, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTGTGAAAAAGAAATAAAAATTAGTACCATACTTACACGAATATGTGA\n") - f.write("ACGTGTGAACGTGCGTGATACAAAACGCACTACCATCTACCAGAGAGAGAGAGGATTTAAGCAAAAACAG\n") - f.write("AACGTACACTGCTTATTGTTAGGTACTTGACAGTCCCAAAGGTTTGTAATTAGACAGTGATTGGAAGTAC\n") - f.write("AGAACCAAACAACAACAACAACCGGAGATGCACAAACGAACACACAATACATACAAGCACTCGGGCTTAT\n") - f.write("TTCTGTTTGTCATTCCGGCGTGTCAACTAAAACAGCGTCTCACGTCTTCGTCATCATCATTATCTAGTAA\n") - f.write("CCACCTTGAATAAAAGCATTCAAGCGGTCTAGCTCCTCCCGGTGCTCGCTCACCACAGCACGAACCACGT\n") - f.write("CACCGATGGACACCATTCCAATCATTCCCCCGTTGTCTATTACTGGAATGTGCCTGATTCGGTTATCCGT\n") - f.write("CATAAGTTGCATTGCCCGCAAAACTTTGGTGTCAGGGGTGACAGTGATAAGCTTGTTCTCCTCAGTCATG\n") - f.write("ATATCCCCAACCTTTGTTGACTTGGATGATCTTCCCTGAACTATGATCTTCCTGAGATAATCTCTCTCTG\n") - f.write("TTATGATTCCTGCAAGAGACTTTTGCACTCCGGGTTTCACAACAACTAAAGCTCCAACATTGTGCTGGG\n") - f.write("\n") - f.write(">gi|226791824|gb|GO546002.1|GO546002 Mdas9007H05_e511.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_568870.1| SKP1 interacting partner 1 (SKIP1) [Arabidopsis thaliana] dbj|BAB08860.1| contains similarity to N7-like protein gene_id:MTI20.16 [Arabidopsis thaliana] gb|AAG21976.1| SKP1 interacting partner 1 [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTACAAAGAGAAAAGCATTAATACATGGTTTATACATGAACTCCATACA\n") - f.write("ATTTGACTTATTGGCGTTACCGAACAAGGTTTTCGATAAGTAGAAGTGAAAGAGATACAGAACCTTTTGT\n") - f.write("AAAGAGCTCCACACACCAGCATATACAATATGCTTTTCCCACAATTGATCAGAGCAACACAACACGGTCA\n") - f.write("TACATGTCTTATGTCCGAGCTTAACAGACTAGATTCTGAAAACATCTGTCTGGAATCGCTCGTCATACAA\n") - f.write("CCTCCAATGGCCATACCTCTCCGAATGGAAGACTGACCGTGGAATGTAGAAGTTTGGCTTTTTAATCTCC\n") - f.write("TTCAAATTCTTCAGATTGCATGTTGTGTTCTCAATATCCCGGCTAGTCAAGTTCACACACCCAAATAAAT\n") - f.write("CCAAGTACTCGAGATTTGGACATCCTACAGGTATCAAAGTAAACCCTTTAGCCGATACCTTTGAGAACCG\n") - f.write("AAGTTCAAGGTGCTCCAAATTTGGCATAGATTTGCCAATTGCAGCAGCTTCTGAATTTCCATCCTGAGGG\n") - f.write("CAAGTTTTTAAGTACTCATCTGGAACAACTCCAGTGTATTCGGATGGATCTAACCAGTTCAGCATATTCC\n") - f.write("GCTTTCAGATTTTCAGATTT\n") - f.write("\n") - f.write(">gi|226791823|gb|GO546001.1|GO546001 Mdas9007N23_e544.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("ATCGCTCCAATAAAAACCACATTACATTTTGCAACTAAATACAGATGCAGTTACTCAAGCTACATCCAAT\n") - f.write("TACATCATGGATACAAATTAAAGTTCGACATTTATTAAATTAAGCTATCTATTTAGCTGCTCCTTCAATT\n") - f.write("AATTATCACAAAAAAAAAAAAAAACTTTTGGGTACCCAAATCAACAAAATGGGTCGGACCCACGACAATC\n") - f.write("AAGAAATTGCAGGAGGAACCCATTACACAAATTGCATTCAAAATACCAAAAATTGAGGGAAAAAAACATC\n") - f.write("AACGAAATTGAGAGGAGGGATCGGAGGATCAACCTGCATTCGAATCAATGCCGGTGTCTAGAGAGAGAAA\n") - f.write("AAATCGGATGCAATCTTGAAAATGAGGAAGCAAAGGAACGCGAAAAAGATCGAGAGGAAGAGGTAATCCA\n") - f.write("CGAAGACGAAGAACTCGGTGGAGCCGTGGCTCCTCTTCTTCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791795|gb|GO546000.1|GO546000 Mdas9008B02_e561.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB89355.1| ubiquitin-conjugating enzyme OsUBC5b [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("TCGACTTCCACCCGAATATCAATAGCAATGGAAGCATTTGTCTTGACATCCTAAAAGAGCAATGGAGCCC\n") - f.write("AGCACTTACTATCTCTAAGGTATTGCTCTCAATCTGCTCATTGCTGACTGACCCAAATCCTGATGACCCT\n") - f.write("CTCGTGCCGGAGATCGCCCATATGTACAAAACTGATCGAGCCAAGTACGAATCCACTGCACGTATCTGGA\n") - f.write("CTCACAAATATGCAATGGGTTGAAGAAGTCTTTTCCGCCAAATGTAGAGTTTGTATTTGGGGGGAGGGGG\n") - f.write("ACAGGTTCGGGGCTTTTCAATTGAGGAAGAACAGTTTCCTTAATTATATCAAAAAAAGAAGGGTATGAAT\n") - f.write("GGGATGTAATTCTGGCTTTACCAGAACTTTAATTTCTTGTTTGATGTTGTTCCTTCCAGATTTCTAGACA\n") - f.write("GAAACTTAAGTGTTCAAGTTTTAATTCCATCGACTGAGGAATCGGTGTTGATTTTTCATCTGCCATTTCT\n") - f.write("TGTTGTGTCTTGAAAGATTGAGAAATGTATAAATCCAATGAACTGCTATGTTTGCTATTTACTCTTCCAA\n") - f.write("TAAATTGTTCTTGCAAAAAAACAAA\n") - f.write("\n") - f.write(">gi|226791794|gb|GO545999.1|GO545999 Mdas9007N14_e542.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTAAACAATTACAAATTTCAAATTTCAATTTTTAATTTAAAAAATATAG\n") - f.write("TAACTTACATTATTACATTAATGTCAGGGACCTCAATCAAAAACTAAAAACTAACAAGGTTTCAATCAAA\n") - f.write("GAATATTGATAGTTAGGGACCGCATCCAAAGTGTCCCTTTTAAATAACATCATTTAATCAAATACAAGAG\n") - f.write("AGGGTATGAATCTGTTCCACTTATTTTAGCCGATCCAATCAAATATAATCTGT\n") - f.write("\n") - f.write(">gi|226791793|gb|GO545998.1|GO545998 Mdas9007M06_e534.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTAACTACATGAAGAAACAATTATGTTACAAAACCCTTACATAATTA\n") - f.write("TGCAGCAGTCTCGCAATCATATAACTCTCTCCCCCTTTCAATTACAAAGAATTAATACATCAACAGGTAC\n") - f.write("CTAACCTATATACTTAAATATTTAGCTCACAGAACACACTTGTATGAAACTTGACTTTGTGGTAAATTAG\n") - f.write("GAAGGAAATTTGAAAAGAGATATCGAGAACGAAAGGGCCCCATTTTCTCCCAAGTTCCACATTCCTCTCC\n") - f.write("CCATTTCTGCTCAGTTTCCCCCTCCCCTTCCTAGTGCAGGCACTCCTCCTCATCATGTCTGGCTCCGCCA\n") - f.write("CTCCAAACCGATGACTTCGGAATGTTAGAGCTTAGTTAGGTAATTAACTAAGTAGCATCAGAGGATGAGA\n") - f.write("GAAGGGGTTCATGTTCCCTTCCTTCTTCCAACTGTTCGCCATCTGAAATCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791792|gb|GO545997.1|GO545997 Mdas9007G21_e509.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_196186.1| signal recognition particle receptor beta subunit-related protein [Arabidopsis thaliana] dbj|BAB09661.1| signal recognition particle receptor beta subunit-like protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("GCATCCCAAATGCAATGAGCCATAATCACTTGGAACATCTTTTAACTTACAAGAAATTACCACAAATTCA\n") - f.write("TGAAATGGGATCAAATATCCTACGACTTTACGTTATCTCTGATAAATTGCTCCACCTGGGAAATTTCACC\n") - f.write("ACCAAGGCCAGAAGCTTCTCCAACTGTAACTTTGTTTTGACACTGCAAAAATGCAAATCGTTCTCCAGGT\n") - f.write("GCTCCGAGTGTAAACTCATTTGTAATATCAGCTGTTGATACTGCACTTCTTGATGCTCGTAATTTGTCAA\n") - f.write("TTTCCTTCTCCAATTGTTTTCGAATAAATTCCTTGCTATGTGCAGTCACTTTGTCTGTCTTATTGCAGAA\n") - f.write("AATAAGAACTGGAATTTTCTTCCTCACCACACTCGCCTTGGTCAAAATATCATACAGGTACTCTGAAGCA\n") - f.write("GCACGGCAATTTGGTAAGAATTCCACAGCATCAACCACAAACACTATACCAGCAGCTTGAGGGAGGAAAT\n") - f.write("CATCTAGTTTGGCTCTAAGGCGAGAATGCCCGGGAACATCAATAACATGAATAGGATTTAGCTTTCCATT\n") - f.write("CTTTGATTTTTCAGAATGAAGCACAAAAGTTCCCTCATTTGGTTCCATAGATGTGACAG\n") - f.write("\n") - f.write(">gi|226791791|gb|GO545996.1|GO545996 Mdas9007E08_e500.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_566716.1| protein kinase, putative [Arabidopsis thaliana] dbj|BAB01250.1| kinase-like protein [Arabidopsis thaliana] gb|AAK59509.1| unknown protein [Arabidopsis thaliana] gb|AAL34187.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TGGAAAAGCTTGATTTCCCTACTAATTTACAATAAGAATGAAGAAGGAACCCCAAACAACAATATGGCTT\n") - f.write("TTCAAAAACGCTGTTTGTGCATTATTCAAAAATGAAAAGTGATTATTCAAAGACTGACTGCTGCCTGCAG\n") - f.write("TCTAGCGCTGAACTGGTAAACTGGTTTTAATATGCAATGCACAGTCTGTAAAGCTATATTACACCAAACA\n") - f.write("AAGAAGGTTATGGACCGCGAGGTGTGCCGAAACAAAAACAGCCGGTTGACTGGTCTTCAGGTATCATGCC\n") - f.write("CCCTCCCTTGCTCGTATCTATTGCGTACAACAGTTTCACAACCTCTTCCATTTCAGGGCGTTTATCCGGG\n") - f.write("CTTGCATCCCAGCATTTTCGCATGACGCTAGCCAATGTGCTCGGACAACATCTTGGGATTTCTGGTCGTA\n") - f.write("AATTCTGTCGCACTACTGCAGATGAAACATCAGCAAAACTTAAATTAAGATAAGGCATGTCGCAACAATA\n") - f.write("GATTTCCCATAAGCAAATACCAAAACTATAGACA\n") - f.write("\n") - f.write(">gi|226791790|gb|GO545995.1|GO545995 Mdas9007K19_e526.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|H71447 trehalose-6-phosphate synthase homolog DL4920W - Arabidopsis thaliana emb|CAB10557.1| trehalose-6-phosphate synthase like protein [Arabidopsis thaliana] emb|CAB78780.1| trehalose-6-phosphate synthase like protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTGAAGGCCAACATTAAGATACGAGAAAATTTTACAATTTCGCT\n") - f.write("ACTCACACACTAGTCACTGCATAATAACAGAACGGGGTTTCAATGAATAATCTATTTACTTTTCTAAAGT\n") - f.write("AAGTACTCGCTGAAAAAGTTGCCCTGCAGCGCTTTTGGCCGACTGCTCCGAAGCATTGGCCAGCCCCTGT\n") - f.write("AACATTCTCAAAATCTCAGTTGTGTCTTCCAGGTAGTACTTGGCCTTGCTGGGCTTCTGACCGACGGTGC\n") - f.write("AAGCAAACACTTCAGCCACCGGAGAGAGAGAGTCCCTTGAACTCATTATCGCCTCAAACATGTCTTCGTC\n") - f.write("GGACCTGTCATCCCCAATACACAGAACAAAATCTGGAAGCATTGCTTTCTGTTTCATTGCTACGAGGAGA\n") - f.write("CGTTCTGCTACAAGGCCTTTGTTGACACCCTGAGGTTTAACTTCTACAATGTGCTGACCACTCTTGACAG\n") - f.write("ACACTGGCTCATTGGCAAGAACACTTTCCAGGTGATCTAGAAGCTCCTTAGCCTGACAAAAGCCAAAATC\n") - f.write("TGGATCTGCGTATTGATAATTCCAAACAAGAGCACTTTCTTTG\n") - f.write("\n") - f.write(">gi|226791762|gb|GO545994.1|GO545994 Mdas9007K12_e524.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("CAGGAGGTACTCTCTTGGCTTTCTTAATTCAAAAGTAATAAAGTAGTCAAAAAAGTAGTCAATAAAGTCC\n") - f.write("TCAATAAAGGTTCCCAAGTACTACAGTAAATGCCACTAAACAAAGTAAAATCCCTAAAAAACATGATTTC\n") - f.write("CCTGCCATCCAATCCAACCGCGGGTTGTAAATTTTCCATCCCCCCCCCCTGCTCTGGATTTCCTCTTCTG\n") - f.write("CCCTCTTGGTACCAGGCCTTCGCTTCATGTTCTCTTTCCCTTGAGGGTCTTAAAACTCTTTAAAAAAAAA\n") - f.write("CTGAAAACCCCCTTCAGGGGAGCTTTCCTTCCTTCTTTAAAGTTCCAAAGTTCAAAAAAAACATCCCCCC\n") - f.write("CCCTTGGATTGGAATCATTAAGTTCTTTC\n") - f.write("\n") - f.write(">gi|226791761|gb|GO545993.1|GO545993 Mdas9007C18_e496.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_567082.2| serine/threonine protein kinase, putative [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTTTGCAAACAACATTCAGTGAAAAATATATCCACCTCACAATC\n") - f.write("CAACCCCCAAGTCAATTCATGGCGAAAAAATTAGACATACAGCACTCAGCAATGACATGAAACTGGCCAA\n") - f.write("ACCAATCCTACACATCCAATCTCATATATGAAATGTAAAAAAATGAAACTATATATAATGTTTACGATGA\n") - f.write("AATCAGCGGGAGGCAGTTGAAAGCAGAGTGTTTCAAGGACGCAGAAATTCCAGCACAAGGAAAGACTGAG\n") - f.write("GCCCTTGCCTTCAAGACTCCGGAGCTGCTGCTGGTGGCTTCAGAAGCGGTTGAAGAGCCTTCACAACAAT\n") - f.write("GCTCATATTTGGCCGGAATTCAGATTCATATTGCACACACAGTGCTGCAACGGCTGCCAGCTTCGCGACC\n") - f.write("CCCTTAGCAGGATAATCCTTTAGCTTTGGATCAACACATTGTTTAACTTTGTCTTCGCTCAATCTTGGAG\n") - f.write("TAGCCCAAGTAACAAGACTCTGCTGTCCACGAGGCATTGTATGATCAACAGGTTTCCTCCCGGTCAGAAG\n") - f.write("TTCTAGTAGAACCACTCCAAAGCTGTACACATCACTCTTCTGTGTCAATTGGCCTGTCATTGCATACTCT\n") - f.write("GGGGCATGATAACCAAATGTTCCCAAAAATCGAGTAGAATGAAGACGAGCAGCCATATCAGGAGCCTGA\n") - f.write("\n") - f.write(">gi|226791760|gb|GO545992.1|GO545992 Mdas9007L07_e529.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTGGACAACTCAATTTATTTAGACCGGGTCATAATGTTACACACT\n") - f.write("GAAATTCACTCGAGCATCTCGCGCACATTAGATGCTCGACAATAACATGAATTGAAAGACGTTAACACAA\n") - f.write("CAGAGTTCATAATTACAGCTCAAGAACAATATATTGTATCCCGGATTGGGGTTCGATAAACCCACTCATT\n") - f.write("TCTTCCCGCTTTTCTTGAGCCCAGAACCTCCGAAGGACCCTTTCTGTTGTGCCTTGGCTCTAAGCTCCTT\n") - f.write("CAGTGCCTTTTCCTCCTCTTTCTTCTTCTGAAGGTTGGCCAAATCGGACTCGTCGTACTCCTTCTTGTCG\n") - f.write("GCCTTGGGTGCCTTCAAGGGCT\n") - f.write("\n") - f.write(">gi|226791759|gb|GO545991.1|GO545991 Mdas9007H16_e514.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_568754.1| expressed protein [Arabidopsis thaliana] ref|NP_851167.1| expressed protein [Arabidopsis thaliana] gb|AAK92812.1| unknown protein [Arabidopsis thaliana] gb|AAM14150.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("GGGAGTCGCTACGTTTGGAGCTCCAAGGAGTCCGAATATTCCTCCGCTGATCATCTCGTTGTCATGGTCC\n") - f.write("ATGGAATCATGGGAACTGCCGCGGATTGGAAGTTTGGGGCGGAGCAATTTGTTAAAACACTTCCAGACAA\n") - f.write("AGTTATTGTTCATTGTAGTGAACGGAATGGCTCTAGGCTGACTCTAGATGGTGTGGATGTAATGGGGGAG\n") - f.write("CGATTGGCAGAGGAGGTTATCGAACTGACTCGAAAAAAACCTAATCTACGAAAGATCTCATTTATTGGAC\n") - f.write("ATTCTGTTGGAGGATTAGTGGCAAGATATGCAATTGGGAGGCTATATAGACCCGGTCCCCCTAAAAGTGA\n") - f.write("CAATACGGAACCTAAAAGTGAGAATACGGAACATTCATCTCCTGATGGATGTGAAGAGGATGCAAGGAGT\n") - f.write("ACATTAGCTGGCTTGGAGCCTTTGAACTTTATTACTGTTGCCACGCCTCATCTTGGATCAAGGGGTAACA\n") - f.write("AGCAGGTGCCATTTCTCTTTGGTGTACCTGCCTTTGAAAGACTTGCTAGTGCGGTTATTCATTTGATATT\n") - f.write("TAGGAGAACAGGTCGGCATCTTTTTCTTAATGATGACGATGATGGGAAGCCCTCACTG\n") - f.write("\n") - f.write(">gi|226791758|gb|GO545990.1|GO545990 Mdas9008A20_e560.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAB89355.1| ubiquitin-conjugating enzyme OsUBC5b [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("ACGTCTTCCACCCGAATATCAATAGCAATGGAAGCATTTGTCTTGACATCCTAAAAGAGCAATGGAGCCC\n") - f.write("AGCACTTACTATCTCTAAGGTATTGCTCTCAATCTGCTCATTGCTGACTGACCCAAATCCTGATGACCCT\n") - f.write("CTCGTGCCGGAGATCGCCCATATGTACAAAACTGATCGAGCCAAGTACGAATCCACTGCACGTATCTGGA\n") - f.write("CTCAGAAATATGCAATGGGTTGAAGAAGTCTTTTCAGCCAAATGTAGAGTTTGTATTTGGGGGGAGGGGG\n") - f.write("ACAGGTTCGGGGCTTTTCAATTGAGGAAGAACAGTTTCCTTAATTATATCAAAAAAAGAAGGGTATGAAT\n") - f.write("GGGATGTAATTCTGGCTTTACCAGAACTTTAATTTCTTGTTTGATGTTGTTCCTTCCAGATTTCTAGACA\n") - f.write("GAAACTTAAGTGTTCAAGTTTTAATTCCATCGACTGAGGAATCGGTGTTGATTTTTCATCTGCCATTTCT\n") - f.write("TGTTGTGTCTTGAAAGATTGAGAAATGTATAAATCCAATGAACTGCTATGTTTGCTATTTACTCTTCCAA\n") - f.write("TAAATTGTTCTTGTAAAAAA\n") - f.write("\n") - f.write(">gi|226791757|gb|GO545989.1|GO545989 Mdas9007D02_e497.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_567082.2| serine/threonine protein kinase, putative [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTTTTGCAAACAACATTCAGTGAAAAATATATCCACCTCACAATC\n") - f.write("CAACCCCCAAGTCAATTCATGGCGAAAAAATTAGACATACAGCACTCAGCAATGACATGAAACTGGCCAA\n") - f.write("ACCAATCCTACACATCCAATCTCATATATGAAATGTAAAAAAATGAAACTATATATAATGTTTACGATGA\n") - f.write("AATCAGCGGGAGGCAGTTGAAAGCAGAGTGTTTCAAGGACGCAGAAATTCCAGCACAAGGAAAGACTGAG\n") - f.write("GCCCTTGCCTTCAAGACTCCGGAGCTGCTGCTGGTGGCTTCAGAAGCGGTTGAAGAGCCTTCACAACAAT\n") - f.write("GCTCATATTTGGCCGGAATTCAGATTCATATTGCACACACAGTGCTGCAACGGCTGCCAGCTTCGCGACC\n") - f.write("CCCTTAGCAGGATAATCCTTTAGCTTTGGATCAACACATTGTTTAACTTTGTCTTCGCTCAATCTTGGAG\n") - f.write("TAGCCCAAGTAACAAGACTCTGCTGTCCACGAGGCATTGTATGATCAAAAGGTTTCCTCCCGGGCAGAAA\n") - f.write("TTCTAGTAGAACCACTCCCAAGCTGTACACATCACTCTTCTGTGTCAATTGG\n") - f.write("\n") - f.write(">gi|226791756|gb|GO545988.1|GO545988 Mdas9007J16_e521.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_194589.2| calmodulin-binding protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTTAGATTATAACTCAATATATTTTCAAGAGAAGGAAGGATACTTC\n") - f.write("AACATTTTTCGTATTCTAATGTATTCACTTCTTTGTATCATTTCAAAATACATATATATAACTTACAAAC\n") - f.write("CACTTTCTGATTCTATCATACATGGTTTACAAGGTGCAACAAATCGAGGTGGTCATCGAAAGGGTTCGAT\n") - f.write("CGGCGCATGTTCTTCAAAAAAAGCTGCTGCCTCAAAGCACTCGGCTGTTTCTAGCGTTGATGCCCTCGCA\n") - f.write("TCAACTTTGTACAGCAGCCCCAGATTGTACCAAGCAGAGGGGTTTCCTCTGTCAAGTCGTAGTGCATCCA\n") - f.write("TGAGAAAGCTTCTAACGACTGGTAGAGATTGGCCACCGAACTGTCTGAGTATACAGGCAGTGGATATCAA\n") - f.write("GCTTGGGACGTGGGTAGGTTCGACATCTAATGCCTTCCTGAATGATTCCAAAGCTTCTTGCTGGAGACCC\n") - f.write("TTGGCTTCGTAGAGTAAACCTGTAGAGTGCCATCTGGAAGCAGAATGAGGATTGATGCTCTGGGATTTTG\n") - f.write("AAAGGCAGACCTCAGCATCCCGCCATTGGGACAAGCTTGTATATAAATTAGCTAAAT\n") - f.write("\n") - f.write(">gi|226791728|gb|GO545987.1|GO545987 Mdas9007C14_e495.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_566740.1| expressed protein [Arabidopsis thaliana] dbj|BAB01857.1| dbj|BAA85220.1 gene_id:MYM9.15 similar to unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("CATGCTAATCATACATGATCTAGTACAACATTTCAAAACAAGGGTTCTCAAATATACATTCATACCCAAA\n") - f.write("ACACCCCCATCACAATACACCTTTAACATTTAACAGATTTGAATGAAATCAGCAGTTCCATTCTCTCCAA\n") - f.write("ACTAGGGTTCCACATCCTGTAATTATATCAAGTTTCCATGTCAATTACCTCTTGCACATCTTGTGATGAC\n") - f.write("CTCACACCCTCTGTTGTAAGGATTCGCCGCCGCAGCGTGACAATTGTAATAAGAAGCCCCTGGTTTTCCA\n") - f.write("CAGGGAACCAAGTCCCTGTTGAGAGTCCCATAACTTATGTACTTCTTCTGCATTGCCAGAATTCTCCTGC\n") - f.write("TGCTCTCTGACTCCATCTCTGGCTCTGTCAAGCACTCCCCAATCTTCTTGCTGCAGACGCTTCTTCCCCC\n") - f.write("CATGGCAGCCTCGATTTCGCTTCCTTTGAGTGGATGATTCAGGTCAAGAACTGAAACCCCATTGCAGATT\n") - f.write("GAGAAGTGGATGTGGATT\n") - f.write("\n") - f.write(">gi|226791727|gb|GO545986.1|GO545986 Mdas9007D21_e498.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTGAACTTTGAAGACATACTCAATAATATCAGTCAAACTAGGCAAAGT\n") - f.write("GAACACCTAAAGCTCTACCTTGAAAATAACCTGATTATCTATCTACATCAAGACAGGCAACAAACAGACC\n") - f.write("CGAAAGACGATGAAAGAGTAGAAGTGTCATTGTAAATTCCAGAATTATAAAAAGGTCGTACCCAGTGCAC\n") - f.write("AAGGCTCCCGCTTTACGCAAGGTCTGGGAGAGGTGAATGTCGGCTAGCCTTACCCCCATTTATGGAGAGG\n") - f.write("CTGCTCCCCAGTCTCGAACCCGAGACCTACCGCTCATGCGCGAAGGCACTTGTCATTGTAAATTCCAGAA\n") - f.write("TTATGAGGAAA\n") - f.write("\n") - f.write(">gi|226791726|gb|GO545985.1|GO545985 Mdas9007A14_e487.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_199600.1| NADH dehydrogenase 10.5K chain-related protein [Arabidopsis thaliana] dbj|BAB11336.1| NADH dehydrogenase 10.5K chain-like protein [Arabidopsis thaliana] gb|AAL38780.1| putative NADH dehydrogenase 10.5K chain [Arabidopsis thaliana], mRNA sequence\n") - f.write("AATTTGTTCTTAATGGAAGATGGAAAATACATGAATTATTCAAACTACAACACACGCATTACTGCGTAAG\n") - f.write("CAACCCAAATAATCATCTTACACAAAAAACGCCTGATCTAAATAAAGGGCAGGCGATCAAAGGAATAAAT\n") - f.write("TTCCCGTGAAGGTAAATTTATTTCATTTCACACTGTCGATTATCAGGCTTTTAAAGACTCCCCAACTTTA\n") - f.write("ACAAGTTCCTCTAGTGCCTTCGAAATCTGTGGCTCTGTTAAACCTTCCAATCGAACGCCCCTCTCAACAC\n") - f.write("CCATGTCATATCTAGCCCACAACTGAGGTTCAATCCCTCTGCATTCACGAATCAAGATGGGCAATTTGGG\n") - f.write("GTTCGCACTCTTCAAATCCTTGTAATTCTTCTCCACAAATGTTCTGGTGGATGCGCTTGAAGGGGACGAT\n") - f.write("TGGCAAAGCAAAACCCTAATCTCCTTCAAGTTCTGAGATAGCTTCCCTCTCCATGCCATTTTCTCCCTCC\n") - f.write("CCCTCTCTCTCTCTGCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791725|gb|GO545984.1|GO545984 Mdas9007K14_e525.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TTTATATAAAAAGCAAAGAGAGACATACAAAAAAATTATGAAGGAAAAAAAAAACAAGTGCTTGGTCCAT\n") - f.write("GAAATCGTTCGTTCATATAATTCTTGTACCACTGAACTGTCCCCTCTTCCACCGGCGCTTCGGCCTAACT\n") - f.write("TCGGATATAAAAGTTCCAAGGGGAGCACAACCCGATGATCGTATTGTGACACAAGGCCAACCGACAATAA\n") - f.write("CAAAAAGGCAGCTGGCATAAGGCCAATTCCTTCTAGCAGGAATGAAACAACAAAAAGGCAGCTGGCATAA\n") - f.write("TGCCAATTCCTTCTAGCAGGAACGAAAACACAACAACAAATGGCGACCAAAACAAATGAAACCTAAATTC\n") - f.write("TCAACTGTGCACAACCATAAACGCTTCCTTTGCACCGTGTCATTGGAGCAGCCTGGCGCCAAGTTGGTCT\n") - f.write("TTCACCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791724|gb|GO545983.1|GO545983 Mdas9007A17_e488.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTTTACAATTAAAACATTCCAATTTCTTCACTAAAATGCAACACTTTA\n") - f.write("CATGAGAACAAGCTAGGAAACCATAAAAATGAAAAAAGAAAAGTGAAAGAAAAGGAGACTCTTAGAACTA\n") - f.write("CTTAGAGATCTGCCTAAAGCATATTATAAGACATATTCGATCCAATTGACAGGGCTACGGGCGTTACAAT\n") - f.write("CGTTTTACTTTATCACCACATATCCGA\n") - f.write("\n") - f.write(">gi|226791723|gb|GO545982.1|GO545982 Mdas9007P05_e547.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("ATACTAATGGGGCAGGAAAAACCACCTTTTATCCTTTTAACCATTTAAAAACGGCATTCATGCAAATAAC\n") - f.write("TCCAGTATTGATACAATCAAATGCGATTCCGGCAACGGAAGTTGAAAATTCAAAAGCCGAAACTGCTCGT\n") - f.write("TGTCCCTTCACTAAACAAAAAAAATCAATTGTCGTCCTCCTTTAACTCAAAAACTATAATAAACATTGAT\n") - f.write("TTGAGTCCTGGTAAATACAAATGGGAATTCAAGTGTGACCGCTACAGAACAAACATTCTGGTTTCCACAA\n") - f.write("CCTTCTCCTCCTGCCGAA\n") - f.write("\n") - f.write(">gi|226791722|gb|GO545981.1|GO545981 Mdas9007F21_e505.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_179548.1| senescence-associated protein- related [Arabidopsis thaliana] pir|E84578 probable senescence-associated protein 5 [imported] - Arabidopsis thaliana gb|AAD10165.1| putative senescence-associated protein 5 [Arabidopsis thaliana], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTGAAAACTTAGTTTCCTACTAGAATTTCACTGCCTGTCAATCATCTAT\n") - f.write("CCATCCATAAAGTCCAGATAGAAAAAGAACATACGAATATGTACACTGACTACACTAGTCACAAACAAAG\n") - f.write("ACTCATCGAAAGTCATCAAAAGATAAGAAATTGAAAACAAACAGACGGTTTGTTGTGTTTGGATACTGGT\n") - f.write("TAAGCCCGACCCTGCTTGTTCCGGCGGAAGAGTTCCTCCGTTTGGGCGTTCTTGAAGGCGCTACAGGCGA\n") - f.write("CGAGATAGACGCAGATGAGGATCACCACAGCCACAATAAGGATGACATTGACCCTCCTCCATTCTTTCCT\n") - f.write("CAGGTTCCCCAAAAGTCCAGCCCTGCAGGAATTGCAGTTGTAGCACAAGAGGTTTTGGTCATTGTTCCAT\n") - f.write("ATCAAGCAGTCCGGGTCAGCTCCTGGGTTCACTGGGTTTATCCACAGTGTTGGATTCACATAGTTGTATC\n") - f.write("CACACATTGTTGGAGGTTCACAACATCCTGACTGAAGGGGAGAGATGTGAGCATAGAGGAACTGATCAGC\n") - f.write("TGACATGTATTGTTGTGTAAGCTTANGGCAAGTATCAGACTCAGCCAAACAAGTCCTTATTTTCTGCCAA\n") - f.write("GTCGAAGAGTAAGTAACATAGTCCCTCAGCCACTTGGAAAACCCTTTCAGCCGGT\n") - f.write("\n") - f.write(">gi|226791460|gb|GO545771.1|GO545771 Mdas9004H11_e284.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAG16977.1|AF184076_1 ACC synthase [Prunus armeniaca], mRNA sequence\n") - f.write("GAATGAATGAACAATATTTCAACTGTGCGATTTACAGGTTTCGGAAATTTTCCGAATTCGTTCCATAACA\n") - f.write("ACCGGAATATCTTTTTCAGTCAACGTTGTAAAACAAAACCGAAACCATCCGGGTTCAATACAATGACATG\n") - f.write("ATGAACCGGGAGTTACATTTAGCTTAGCTACATTCAACAACCTATCCCAGAGCTCAAGCTCCCCTTTCTC\n") - f.write("GCTGTAAGAGCGGATTAACCCGCTCATGTCAGCCCAACAGCTGAAACCCCCATTGCTCTTTGTGCACTCA\n") - f.write("ATGCCCAATTGCTTCAAACCTGTCACAAATTTTAGGTACATTGCACGGAGCCTTTCCCTACTAGTATCAA\n") - f.write("TGAACTTCCGGATAAATTTGGTGTCTGAAAGCATAGAGATGAGCAACCGTTGGGATGGGGAGGAAATGGA\n") - f.write("CGAGAACCTTGTCAACTTTTTAGCGGCAGCCAGAACATTCTTGTTATAGGAGTAGATAGCGCCAGCCCTA\n") - f.write("AAACCTGGAAGAGAGAGGTCTTTCGATAGACCATATACTATATGAACTCTGTTCTGGTCCAGATCTTCCA\n") - f.write("ATTCAACAATTTCTGCCATGCTTACAAACTCTTCACTAC\n") - f.write("\n") - f.write(">gi|226791459|gb|GO545770.1|GO545770 Mdas9004C19_e267.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|S68805 pectin acetylesterase (EC 3.1.1.-) precursor - mung bean emb|CAA67728.1| pectinacetylesterase precursor [Vigna radiata], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTCTTATTGAATTTTTACCTTCAATTTACACAATCACACTCCTAGAAT\n") - f.write("AACTTTATTCGCAACACCATACAAGATGCCCGCTACCACTTTCAAATCAAAGCTCAAAAGAATAAATATT\n") - f.write("ACCCAGAACCATAAGTACGTAGGCCATTACATCAATAGGGTTTAGGTCATATAGCTCATAAAAACCATAT\n") - f.write("TTTCGACATAATGAAATTAATCTTCCTGATTGTTTGAGTCAAATTCACGGTTTTTGCAAGTGGGATTGCA\n") - f.write("CGGGTAAGGGCAATCAAACTTTTTGAACGCAGTGCGGTCGTAATACCAGTCTCCAACTGCCTTTGCAATT\n") - f.write("GTCGTCTTGCTCAACATAGGAGAGTCAGCTGCCAGCCATGTCTCCTGCGTTCCAATTTGGCAGTGGGCAA\n") - f.write("AGCAAGAGTCTATGAACGCTCCATGAGATGAAGAAATCCTCGTGCCGAA\n") - f.write("\n") - f.write(">gi|226791458|gb|GO545769.1|GO545769 Mdas9003H06_e209.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to dbj|BAA24493.1| chlorophyll a/b-binding protein [Fagus crenata], mRNA sequence\n") - f.write("GTTCAATCAATTGTTGTCCATTACTTGGTACCAAATTTACATCACCGACAACACTACAAACAAAAGTACA\n") - f.write("AAACAATTCCGAGCTCTCACTTTCCGGGAACGAAGTTTGTGGCATAGTTCCAGGCATTGTTGGCGACCGG\n") - f.write("GTCGGCAAGGTGGTCGGCAAGGTTCTCCAATGGTCCCTTTCCGGTGACGATGGCCTGGACAAAGAATCCA\n") - f.write("AACATGGAGAACATGGCCAACCTTCCATTCTTGAGCTCCTTCACCTTGAGCTCAGCAAAGGCTTCTGGGT\n") - f.write("CATCGGCAAGGCCCAACGGGTCGAAGCTTCCTCCGGGGTAAAGAGGGTCGGTCACCTCTCCGAGGGGTCC\n") - f.write("GCCGGCAATTCTGTATCCCTCAACTGCGCCCATCAACACCACTTGGGTAGCCCAGATGGCCAAGATGCTC\n") - f.write("TGTGCATGGACCAAGCTAGGGTTTCCCAAGTAGTCGAGACCGCCCTCGCTGAAGATCTGGGCTCCGGCCT\n") - f.write("TGAACCACACGGCTTCGCCGAACTTGACACCGTTGCGGGACAAGAGCTCTGGGAAAACGCATCCAAGAGC\n") - f.write("TCCGAGCATGGCCCATCTGGAGTGGATCACTTCGAGCTCACGGTTCTTGGCGAAGGTCTCTGGGTCAGCT\n") - f.write("GAGAGTCCGGCGGTGTCCCAGCCGTAGTCACCGGGGA\n") - f.write("\n") - f.write(">gi|226791457|gb|GO545768.1|GO545768 Mdas9003G16_e204.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAF86906.1|AF223358_1 triose phosphate/phosphate translocator precursor [Mesembryanthemum crystallinum], mRNA sequence\n") - f.write("TTTTTTTTTTAACAAAAAACATGATAGAGAAGTAATTAAAACATTATCTAGAATCTCATGAGACCATTAC\n") - f.write("AAATTAGAATGTCTTCGTACTCTCAATCAATGGGGAAAAAAAAAGGGCAGGGGGGGGGGGCGGCGGAGGA\n") - f.write("GGAAAAGAAACAAACTATGACATAGATAACAGATACTCAATTTGCAATTTCTTCTGAAGGGAATTTACAA\n") - f.write("AATTTACAATGCATGGTTCCCTCCATGTCCGTTCTTCGTTCCGTCTCATGCCGCTTTTCCTTGTCGTTTC\n") - f.write("TCTTCTTCTATCTTGGCCTTGAGGTAAGAGTAGATTGCCACTCCTGCAATTGCAATGGCAGTTCCAATAC\n") - f.write("CGGTTTGTGTTGAAATCTTGTTACCAAAGATCACGATCGAGAAGCCAATCACAAACACACGCTTCAGCAC\n") - f.write("GTTTCCAACTGCGTGCGTAAGAGGCGCCACTCTCTCCAGGGTGTTGGTGGCCAACTGGTTATAGAGATGG\n") - f.write("TAAAACAATCCAACCCAGAAGAGGTCGGTGACGAACTTGACAAGTCCTACTTTAGCAATTGCATCATTGA\n") - f.write("AGCCATACTTGATCAGTTGAGGTCCCTCTAAGATGAGAGCGGGTGGAATGCAGACAATGAGTGCAATGAT\n") - f.write("TGAAATATAAGCATAGAGATTTGTACTATCCATATCAGTCATGGCTTTCTTTGAATAGATACTCCTGTAA\n") - f.write("GTGAAGGAGATA\n") - f.write("\n") - f.write(">gi|226791456|gb|GO545767.1|GO545767 Mdas9003G14_e203.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to emb|CAD40837.1| OSJNBa0086B14.9 [Oryza sativa (japonica cultivar-group)], mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTTAACAACAAACTAAACTAGATTAGACGAAATATTGTTCAATAGTGAG\n") - f.write("GCTGCAGACAACCAAATGAACAAAAAAACGCAAATTAAACACATCTAGGTCCTTCATATTCTACGAAACA\n") - f.write("AACCAACCAAAAGCAGCAGCCGCAGATCTTAAAAGGAGTTTTGCCTATAACCATAATATCTGACCACATC\n") - f.write("TTGTCCCCAACCCTAACCTCCTATATCATCCTATCACAAAAAAACACCACCTTCTGTCCGCAAAAGCATT\n") - f.write("ATACATATCAGTCTTCAATCCCGTCTGCAGCCACCCCACAATTAACCCCGCTCCATTGCAAGGCAGGCGG\n") - f.write("GTTGCTTAGGCACGCTCACCCCTGATCCTCCTGGCAAGCTGGATATCCTTCGGCATGATGGTAACCCGCT\n") - f.write("TGGCATGGATAGCGCAAAGGTTGGTGTCCTCAAACAGACCCACCAGGTAAGCCTCAGCAGCCTCCTGCAG\n") - f.write("TGCCAGCACTGCATGGCTCTGGAAACGCAGATCAGTCTTGAAGTCCTGAGCAATTTCACGAACAAGCCTC\n") - f.write("TGGAATGGCAACTTCCTGATCAACAGCTCAGTA\n") - f.write("\n") - f.write(">gi|226791455|gb|GO545766.1|GO545766 Mdas9004B19_e261.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|F86339 protein F2D10.18 [imported] - Arabidopsis thaliana gb|AAF80615.1|AC069251_8 F2D10.18 [Arabidopsis thaliana], mRNA sequence\n") - f.write("TTTTTTTTATAACGAAGGCAAACTGAAATTCATTGAACATAAGGAGGATCATTAAGTACAGAAATGACGA\n") - f.write("CATTGGAATCGCGGGGAGAAGACGATCCTGAAACTGAATCCGATTTTGTTTTTGTGTTTTCCTAAAAACT\n") - f.write("AACCCCTAAAACAAAAGGTAGAGAGCCAGCAACTTATCCCATAGTGAAGAAAAGGTGCAGGCAGGCCTTT\n") - f.write("CAATTATCTTCTAAGCAACCCTACACATATCGCTTCTCCCCCTCCATTCAATTCCCGTTGCTCTACTCAT\n") - f.write("CGTCGTCTTCCTCCTCGCCGCTCTCGTCCTCATCCTCATCATCATCATCATTTACCTCAGACTTGGACTT\n") - f.write("GTCAGACTCTTCTTCGTCAGCTTCATTAGGTCCTTCAGCTAATTGCTTGTTGTATGCCTGAATGTTCTTG\n") - f.write("TTATACTCAACCTTCCTCTTATCTGCCTTGGCTTGATAAGGAGCTTTCTCAGCATCTGACAACGATTTCC\n") - f.write("ATTTCTCACCGCCAGCCTTACCGACAGCAGCAACCGACTTGTTGTTTGGATGATCCTTCTTGTACTTCTC\n") - f.write("TCTGAACTCCTCCATGAAGACGAAGAAGGCACTCGCAGGCCTCTTCGGCTTGTTCGGATCCTTTCCGGCT\n") - f.write("TTCTTCGCCGGCTTCTTGCTCGCTCCG\n") - f.write("\n") - f.write(">gi|226791454|gb|GO545765.1|GO545765 Mdas9003J01_e215.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("AAAAGGAAAAACGGGGGCCGGGGCACCCCGGCCAATTTAATTTTTTTTTTTTGGGAAAAAATTGGGCGGG\n") - f.write("ATAAAACCCCCGCAAAACCCCCCCCCCTTTTTTTAAAAACCCCCCAAATTTCCAGTAATTTTAAAACCCC\n") - f.write("CCTTTCCATTTTCCCCCCCGAAAAGGGGGCCAAACCCAAAAAAAAAAAAAAAAAGGCCCCCCCAAAAGGA\n") - f.write("ACCCCCAAAAAAACCCCAAACCACCCCCCGAAAAACTTCTGCCCCAACCCCCCCCCCCCAAAAAAAACCC\n") - f.write("CCCCCCCCCCCGGAAAATCCCCCCAC\n") - f.write("\n") - f.write(">gi|226791448|gb|GO545764.1|GO545764 Mdas9003P03_e242.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAD28640.2| geranylgeranyl hydrogenase [Glycine max], mRNA sequence\n") - f.write("AACGTATTATTAGTTTAAGATACTATTACAATGATATTTTGAAGGAGAAATTTTACAACGATTCTAAAAC\n") - f.write("AATGACGGACATAGAAATTTATATATGCTAGATCTCATTACACAAACTTGGAATTTCCACTCAAATCTGT\n") - f.write("TGAATTCAAAGAGTTCATTCCTCGTTACATGAGGAAAAACAAAAAATTACACAAACAACAAGGCTTCCTC\n") - f.write("CTTTCATACAGTAATTTTCTCCATCTCCCTCCTGAGTGCATTAGCCCTAACCAAGCTCCCAATGGTATTC\n") - f.write("ACAGCCAACTTCAAATCCTCCCACGGATTCCCCGGCACCACCCTCTTGTACAAGTAGCTATCGAACGTCA\n") - f.write("TCTTCTGCACGTACTCATCCGCACACATCTCCACGAACGCTTCCCTCGCGGGGTTCGACCTATAGAACAC\n") - f.write("CTTCTGCAACACATCCAACACCTTGTAAGTAGGCCAATAGGTCTTGTCCCACTTCTCCAAGTACGTCCTC\n") - f.write("AAGTCCGCCTCATTCACCATCCTCTTCCCATTCTCCGACCCCTCAACTATCGCCTCGGCACACATCCTGC\n") - f.write("CACTCTTTGCTGCAAAGTAGATGCCTTCACCAGAACACTTTGTTACGTACCCTGCTGCATCACCAACGAG\n") - f.write("TGCTACTCTGCCCGCCAACCTTCGTGGGCCGGG\n") - f.write("\n") - f.write(">gi|226791447|gb|GO545763.1|GO545763 Mdas9004C18_e266.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAL49750.1|AF452012_1 aquaporin-like protein [Petunia x hybrida], mRNA sequence\n") - f.write("TTTTTTTTTAGCGTAAAAAGGTAGATTAAGGTAGAGAGAGGAGAGAAGACATTGTAATCATAAAAAGAAA\n") - f.write("ACACCACTACTCATGAACAATAATTAATGAACATAATTAAAGTTAAGTAATTAGATTATAAATAGAGAAC\n") - f.write("CCAGAAAGATTGAAAGATCATCATCCATCCTCTCAATACACACACTAACAAACAGAGAACAAAATATCAA\n") - f.write("CAGAACATTAATGTAGCTTTCTTTTCTTCAATCTTTGTTTTAGTTGGTTGGGTTGCTGCGGAAGGATCCC\n") - f.write("AACGCCTTGAGCGCCGCCGCCCTCAGAATGTACTGGTGGTACGCCGCTGCAGCCAGTGCTCCCACAAATG\n") - f.write("GTCCGACCCAGAAGATCCAGTGGTCATCCCAGATTTTATCGTTGTTGTAGATCACAGCAGCGCCGAAACT\n") - f.write("CCTTGCTGGGTTGATTCCAGTTCCAGTAATGGGGATTGTTGCGAGGTGCACAATAAACACAGCAAACCCA\n") - f.write("ATGGGCAGTGGAGCCAAAACAGGGACGTGGGAGTCGCGTGCGCTCCTCTTGGGGTCGGTGGCCGAGAAGA\n") - f.write("CGGTGTAGACGAGGACGAAGGTGCCGATGATCTCAGCTCCCAGAGCTGTGCCCTTGCTGTAGCCACTAGC\n") - f.write("CACAGAGTTGGCGCCGCCGCCGA\n") - f.write("\n") - f.write(">gi|226791446|gb|GO545762.1|GO545762 Mdas9003B18_e180.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P28475|S6PD_MALDO NADP-dependent D-sorbitol-6-phosphate dehydrogenase (Aldose-6-phosphate reductase [NADPH]) (NADP-S6PDH) pir|T17013 D-sorbitol-6-phosphate dehydrogenase, NADP-dependent - apple tree dbj|BAA01853.1| NADP-dependent D-sorbitol-6-p, mRNA sequence\n") - f.write("TCGAGTTTTTTTTTTTTTTTTTTCTTTAATAACTCATTTATTAAGTACCAAAAAAAATACAAGGGCCGTT\n") - f.write("TATTATATTTTCCTTTGCAAGGCCTAATGCAAAGGGCGAATAATGAAGTTTTCAAACTAGCAGATAGATG\n") - f.write("AGCTTCTGGAAAATTATATGAAGCTCAGTTTATTGTAAGTAAGATTCACGATAGTTTCCATTTCGCTTCC\n") - f.write("GTTGAACAGCCTTGGAAGAAGTTGCGGAGGCAGCAATTCGAAGGTTTTTGAATGGCACGCTTATGCATAC\n") - f.write("ACGTCTAAGCCCCAAGTCTTGGAAGGTAGACTGGTACGATACTTCCTGTCGATACTGTAGATGAGCTGCA\n") - f.write("TGTCTTCATCGCTCAGCTGGAATTCAAGAACCTCCAAATTCTCTTTCAATCGCTGAATTTTCGATGATTT\n") - f.write("TGGAATCACTGCTGTTTTCCTCTGAATTCCCCACCTCAGACAGATTTGTGCCACGCTCTTTCCGTATTTC\n") - f.write("TTAGCCACATCATTGAGAACTGGATCATCCAAAGGTGAA\n") - f.write("\n") - f.write(">gi|226791445|gb|GO545761.1|GO545761 Mdas9003B06_e177.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to sp|P16059|PSBP_PEA Oxygen-evolving enhancer protein 2, chloroplast precursor (OEE2) (23 kDa subunit of oxygen evolving system of photosystem II) (OEC 23 kDa subunit) (23 kDa thylakoid membrane protein) pir|JS0771 photosystem II oxygen-evolving comp, mRNA sequence\n") - f.write("TTTTAGAGAGCTCAAAGACAATTCACAATCTTAAGGTAATTTACATTACACAAACCCTCTCAAATAAATT\n") - f.write("CTTATGCAACACTGAAAGAACTTGCAGTGCTCTCTACAAACTTCCTTGCTCCTTTGAACCACCTCTTGTC\n") - f.write("TCCGGCTTGTGCCTTTAGAATGTAGAGCTTGCCGTCTTTCACGGTGGCTGTGATAATCTGGTGCTTGCCT\n") - f.write("CCTTCATCTCCGTCGGCTGTCCTTGTCAACACAGATACCTAGTAGTACTGCTTTCCGTCAATCACTCGAC\n") - f.write("TACAACTCTCCAATATGTTGGCTGTGGCCACGGCACCCGGGTCGAAACCACCCTCGGATTCATTCTTGCC\n") - f.write("AAAGTAGGATTGTTTGCCTAGCAAGTAGTCCACCTTCGCGAGGAATTCCTCGGGGGAGCCATAGTCGGCG\n") - f.write("ATGGATTTCATGTCGGTTAAGGTGATTGTGACGGACACATTGCTGTTGCTGTCGAAGTTGTCCTCGTACC\n") - f.write("TAAGAACTTGACCATGGAAC\n") - f.write("\n") - f.write(">gi|226791444|gb|GO545760.1|GO545760 Mdas9011H23_e848.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_680161.1| No apical meristem (NAM) protein family [Arabidopsis thaliana] emb|CAC35884.1| ATAF2 protein [Arabidopsis thaliana] gb|AAL87335.1| unknown protein [Arabidopsis thaliana] gb|AAM91696.1| unknown protein [Arabidopsis thaliana], mRNA sequence\n") - f.write("CAGAAATGGCACTTTACGGAGAGAAAGAATGGTATTTCTTTTCGCCAAGGGACAGAAAATATCCAAACGG\n") - f.write("TTCAAGGCCGAACCGGGCAGCCGGAACCGGGTACTGGAAGGCGACCGGGGCTGACAAGCACATTGGAAAA\n") - f.write("CCCAAGGCACTCGGGATTAAAAAGGCACTCGTGTTCTACGCTGGTAAAGCCCCCAAAGGAATCAAAACCA\n") - f.write("ATTGGATCATGCACGAGTACCGCCTCGCCAATGTCGACCGGTCCGCCGCCGCTGCCAAGAAAAATCAAAA\n") - f.write("CCTGAGGGTACGTCCGTCTTTCCATTACCCTTTTTCTCTTTTACCATTATACCCTTGGACCCACACACGT\n") - f.write("CATTGACCGTGGACCATATTTTGTAATTTGACTAAAATATAAATGCATTTATTATTTCGACGGTCAGCTC\n") - f.write("AGATGTGCAATCTAAAAGAAAGAATATCATATCTAATTATATTTTACATATTATAAGATGAATGTATTAA\n") - f.write("GTAAATTCATGTACCTTTTTCGGTCTCTCAAATTTTGATCATGTCATCAATGCCTCTTAAGAATGCATGT\n") - f.write("TGTAACTTCTGGTAGATTAAATTAGAATATCGCTTTCGAAAAATAAATGAATCCGGCTTCAAATATTTTA\n") - f.write("ATCTCAATTTTGTTTAATTACTCAGAGTGCTTATCGTTCTAAATTGATGTTTATGTAATGCAGCTTGATG\n") - f.write("ATTGGGTACTATGCCGCATATACAACAAG\n") - f.write("\n") - f.write(">gi|226791443|gb|GO545759.1|GO545759 Mdas9011M20_e875.b1 Apple_EST_Mdas Malus x domestica cDNA 3', mRNA sequence\n") - f.write("AAATTAAAGAATTATAAAAGATCAATACATAGAGAAATAAGAGAGGGACATATATATTGTCACATATACT\n") - f.write("GGCTAGATATATATTTATCTAGCTAGCTACTTGACAGTTATTTGTACGTAATTACTTGACAATTAAAAGA\n") - f.write("AGAAGAAGCAAAGAAAAGTGAACAAGACAAAGATGGAAGTAAGACCATGAGGGTGGAAGTGCATTCTCAA\n") - f.write("ATGAACCGATTTAGAAAAATAAGGTGGCGGGGGACTGTAAGGATTTACAGGATAAAGGCTTGGTGGTACT\n") - f.write("ATAGAGGGCGCAAATGGAGGCCGATTCGGGATTAATGAGGGAACATCTGCCGCGGGAGGAGAGGACGAGG\n") - f.write("ATCCGCAAGTGTCTTGACACGGAGGCCTCACAGGTGGTGGTGGTTGAGGTTGAGGGTGGGTGGTGGACAT\n") - f.write("TCACGGAGAGCTGGTGGTGGTGAAGAAGGAGGCGCAGGAGGTGGTGGGCATTCAATGATAACCGGTGGTG\n") - f.write("GCGGCAATGGTGGTGGTGAAGAAGGAGTCGCAGGAGGTGGTGGGCATTCAATGATAACTGGTGGTGGTGG\n") - f.write("CGATGGTGGAGGTAGAGATGGCGCGGGGCATGGGTTGGGGCACTCATTGCA\n") - f.write("\n") - f.write(">gi|226791435|gb|GO545758.1|GO545758 Mdas9011H02_e843.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to gb|AAK25798.1|AF338237_1 rubisco activase [Zantedeschia aethiopica], mRNA sequence\n") - f.write("CCACTAGGGACGACCGGATCGGTGTTTGCACCGGAATCTTCAAGCCTGATAATGTGTCCCAGGAAGATGT\n") - f.write("TGTCAAGATTGTTGACACATTCCCCGGTCAATCCATTGATTTCTTCGGTGCCCTAAGGGCTCGAGTTTAT\n") - f.write("GACGATGAAGTGAGGAAGTGGATTTCCGGTGTTGGAATTGACGGGATTGGGAAGAAGCTTGTGAACTCAA\n") - f.write("AAGAAGGTCCCCCGACTTTCGATCAACCTAAGATGACTCTGGAGAAGCTCCTTCACTACGGACAAATGCT\n") - f.write("TGTTCAGGAGCAGGACAATGTGAAGAGAGTCCAACTGGCTGACAAGTACTTGGCCGATGCAGCCCTTGGA\n") - f.write("GATGCAAACCAGGACTCCATTAACCGAGGAGAATTCTATGGAAAGGCAGCCCAACAAGTTAATGTACCAG\n") - f.write("TCCCAGAAGGCTGCACTGATCGAACAGCTGCAAACTTCAACCCAGCAGCAAGGAGTGACGACGGTAGCTG\n") - f.write("TCAGTACGAGTAATTGGAGCTGAAGAAAATAAATATTCTGTTTGCTGGGTTATTGATATTTTCTTTGTAA\n") - f.write("TTTGCGCTTGTATTTCTATGGAAATTCTTGCAATTATTTTGTTTTAATTATTAAACAAGCTATTGATACT\n") - f.write("CCGGCTATTGAACAGTCGATAAGAACATAATGTTATGTTGTTGAGAATATGTTGTCATTGTCGCAAACAA\n") - f.write("AACTTCACAACATGTTATCAATACATTCATG\n") - f.write("\n") - f.write(">gi|226791434|gb|GO545757.1|GO545757 Mdas9011N23_e879.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to pir|T02946 hypothetical protein 3 - common tobacco chloroplast gb|AAA84680.1| unknown protein prf|1102209C ORF 3, mRNA sequence\n") - f.write("TTTTTTTTCAAGAATTCTTCTTACTTTAATACATAATACATAGGTCATCGATTCAGCATTGGATAAAAAA\n") - f.write("GATAAAACGTGGGATGAAATACCCATTTTTTCCAATCAAATAACGGATTCAAATCATTTTATCGACATGA\n") - f.write("GTGTTTTATATCGAAAAAAAATTCCAACTATTTGTTTTGAAACCATTTCTGTCTTAACTTATTAACTAAC\n") - f.write("ATAGTAGTAGAAAGAATACCATGCTGCATCTGAACTTCAAACGGTTTAGCTTTAACCCTGTTAATGGTTT\n") - f.write("ACATTATTGGTTGATAGAGAATCAAAGTAGATTTACCAATGAATCGCGAAATGCTATGGTTCTTCAATTT\n") - f.write("TTTTTTCAGAAGAAATTCGCGGAATCATGCACCTTTTTTTTTCGCGTTATAACGAAAAAATGCAGTTGGT\n") - f.write("CGTATCCAGCCTATTCTTGAAATAAACAACTCGCACACACTCCCTTTCCAAAAAAAATCAACACACCAAG\n") - f.write("CACTACGCTTAGATTTATTAGATTTGTTGCTAAAATATCGGTATTAAACCCGAAACTCCCGGCGGATGGC\n") - f.write("CAATAACCCAAGGAAAGGAAAGAATCGGTTACATTTTTCATATGATATCCTCTTTCTTATAGATAGACTA\n") - f.write("ATTATTTTATTTATATTATTTTTGTATTATTTTTATTATGAATTTCCCTATTTCTAAATAGAATATACTA\n") - f.write("AATAGAGTCAAAAAATA\n") - f.write("\n") - f.write(">gi|226791433|gb|GO545756.1|GO545756 Mdas9011C24_e824.b1 Apple_EST_Mdas Malus x domestica cDNA 3' similar to ref|NP_566564.1| expressed protein [Arabidopsis thaliana] dbj|BAA94980.1| gb|AAF26101.1 gene_id:K14A17.9 similar to unknown protein [Arabidopsis thaliana] gb|AAK55691.1|AF378888_1 AT3g17020/K14A17_14 [Arabidopsis thaliana] gb|AAK91493.1| AT3g17020, mRNA sequence\n") - f.write(&qu