Mercurial > repos > qfab > usearch_map_reads_to_otus
diff usearch_map_reads_to_otu/scripts/uc.py @ 0:c10d09023766 draft
Uploaded
author | qfab |
---|---|
date | Thu, 29 May 2014 00:51:18 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch_map_reads_to_otu/scripts/uc.py Thu May 29 00:51:18 2014 -0400 @@ -0,0 +1,149 @@ +import re +import sys +import progress + +# Tab-separated fields: +# 1=Type, 2=ClusterNr, 3=SeqLength or ClusterSize, 4=PctId, 5=Strand, 6=QueryStart, 7=SeedStart, 8=Alignment, 9=Label +# Record types (field 1): L=LibSeed, S=NewSeed, H=Hit, R=Reject, D=LibCluster, C=NewCluster, N=NotMatched +# For C and D types, PctId is average id with seed. +# QueryStart and SeedStart are zero-based relative to start of sequence. +# If minus strand, SeedStart is relative to reverse-complemented seed. + +MaxError = -1 + +Type = '?' +ClusterNr = -1 +Size = -1 +PctId = -1.0 +LocalScore = -1.0 +Evalue = -1.0 +Strand = '.' +QueryStart = -1 +SeedStart = -1 +Alignment = "" +QueryLabel = "" +TargetLabel = "" +FileName = "?" +Line = "" + +def Die(s): + print >> sys.stderr, "*** ERROR ***", s, sys.argv + sys.exit(1) + +def ProgressFile(File, FileSize): +# if not sys.stderr.isatty(): +# return + Pos = File.tell() + Pct = (100.0*Pos)/FileSize + Str = "%s %5.1f%%\r" % (FileName, Pct) + sys.stderr.write(Str) + +def Progress(i, N): +# if not sys.stderr.isatty(): + return + Pct = (100.0*i)/N + Str = "%5.1f%%\r" % Pct + sys.stderr.write(Str) + +def PrintLine(): + print Line + +def ParseRec(Line): + global Type + global ClusterNr + global Size + global PctId + global Strand + global QueryStart + global SeedStart + global Alignment + global QueryLabel + global TargetLabel + global LocalScore + global Evalue + + Fields = Line.split("\t") + N = len(Fields) + if N != 9 and N != 10: + Die("Expected 9 or 10 fields in .uc record, got: " + Line) + Type = Fields[0] + + try: + ClusterNr = int(Fields[1]) + except: + ClusterNr = -1 + + try: + Size = int(Fields[2]) + except: + Size = -1 + + Fields2 = Fields[3].split('/') + LocalScore = -1.0 + Evalue = -1.0 + if len(Fields2) == 3: + try: + PctId = float(Fields2[0]) + LocalScore = float(Fields2[1]) + Evalue = float(Fields2[2]) + except: + PctId = -1.0 + else: + try: + PctId = float(Fields[3]) + except: + PctId = -1.0 + + Strand = Fields[4] + + try: + QueryStart = int(Fields[5]) + except: + QueryStart = -1 + + try: + SeedStart = int(Fields[6]) + except: + SeedStart = -1 + + Alignment = Fields[7] + QueryLabel = Fields[8] + + if len(Fields) > 9: + TargetLabel = Fields[9] + +def GetRec(File, OnRecord): + global Line + while 1: + Line = File.readline() + if len(Line) == 0: + return 0 + if Line[0] == '#': + continue + Line = Line.strip() + if len(Line) == 0: + return 1 + ParseRec(Line) + Ok = OnRecord() + if Ok != None and Ok == 0: + return 0 + return 1 + +def ReadRecs(argFileName, OnRecord, ShowProgress = True): + return ReadFile(argFileName, OnRecord, ShowProgress) + +def GetRecs(argFileName, OnRecord, ShowProgress = True): + return ReadFile(argFileName, OnRecord, ShowProgress) + +def ReadFile(argFileName, OnRecord, ShowProgress = True): + global FileName + FileName = argFileName + File = open(FileName) + + if ShowProgress: + progress.InitFile(File, FileName) + while GetRec(File, OnRecord): + if ShowProgress: + progress.File() + if ShowProgress: + progress.FileDone()