# HG changeset patch # User xuebing # Date 1333244677 14400 # Node ID 3af9a3c6aa6f8f03f944892e37f06c700fefe710 # Parent c7659d44142a5e21276bd6309cc63edce8f951f6 Uploaded diff -r c7659d44142a -r 3af9a3c6aa6f revcompl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/revcompl.py Sat Mar 31 21:44:37 2012 -0400 @@ -0,0 +1,84 @@ +import sys + +compldna = {'A':'T', + 'C':'G', + 'G':'C', + 'T':'A', + 'U':'A', + 'M':'K', + 'K':'M', + 'W':'W', + 'S':'S', + 'R':'Y', + 'Y':'R', + 'N':'N'} +complrna = {'A':'U', + 'C':'G', + 'G':'C', + 'T':'A', + 'U':'A', + 'M':'K', + 'K':'M', + 'W':'W', + 'S':'S', + 'R':'Y', + 'Y':'R', + 'N':'N'} +def complement(seq,compl): + complseq = [compl[base] for base in seq] + return complseq + +def reverse_complement(seq,compl): + seq = list(seq) + seq.reverse() + return ''.join(complement(seq,compl)) + +def readFastaFile(infile,outfile,compl): + + fin = open(infile) + out = open(outfile,'w') + + currSeq='' + currSeqname=None + for line in fin: + if '>' in line: + if currSeqname !=None: + out.write(currSeqname+reverse_complement(currSeq,compl)+'\n') + currSeqname=None + currSeq='' + currSeqname=line + else: + currSeq=currSeq+line.strip().upper() + + if currSeqname!=None: + out.write(currSeqname+reverse_complement(currSeq,compl)+'\n') + + fin.close() + out.close() + +def readrawseq(infile,outfile,compl): + ''' + each line is a sequence + ''' + fin = open(infile) + out = open(outfile,'w') + for line in fin: + out.write(reverse_complement(line.strip().upper(),compl)+'\n') + fin.close() + out.close() + +def main(): + seqfile = sys.argv[1] + outfile = sys.argv[2] + fasta = sys.argv[3] + rna = sys.argv[4] + if rna == 'rna': + compl = complrna + else: + compl = compldna + if fasta == 'fasta': + readFastaFile(seqfile,outfile,compl) + else: + readrawseq(seqfile,outfile,compl) + +main()