comparison weblogolib/__init__.py @ 9:f3462128e87c

Minor alterations to the galaxy interface with some better examples and error messages added.
author davidmurphy
date Mon, 30 Jan 2012 08:17:57 -0500
parents 5149eb3a89c2
children 20716450be87
comparison
equal deleted inserted replaced
8:5149eb3a89c2 9:f3462128e87c
980 980
981 if weight is None and alphabet is not None: weight = float(len(alphabet)) 981 if weight is None and alphabet is not None: weight = float(len(alphabet))
982 if comp.lower() == 'equiprobable' : 982 if comp.lower() == 'equiprobable' :
983 prior = weight * equiprobable_distribution(len(alphabet)) 983 prior = weight * equiprobable_distribution(len(alphabet))
984 984
985 985 elif comp.lower() == 'escherichiacoli' :
986 composition="{'CTT': 0.7616, 'ATG': 1.5872, 'ACA': 0.4096, 'ACG': 0.736, 'ATC': 1.1648, 'AAC': 1.5615999999999999, 'ATA': 0.2368, 'AGG': 0.1024, 'CCT': 0.5376000000000001, 'ACT': 0.512, 'AGC': 1.0624, 'AAG': 0.7744, 'AGA': 0.0896, 'CAT': 1.0112, 'AAT': 1.4016, 'ATT': 1.952, 'CTG': 3.0016, 'CTA': 0.3392, 'CTC': 0.672, 'CAC': 0.8383999999999999, 'AAA': 2.1248, 'CCG': 1.7087999999999999, 'AGT': 0.4608, 'CCA': 0.4224, 'CAA': 0.7744, 'CCC': 0.4096, 'TAT': 1.0752000000000002, 'GGT': 1.3632, 'TGT': 0.37760000000000005, 'CGA': 0.2752, 'CAG': 1.7728, 'TCT': 0.3648, 'GAT': 2.4255999999999998, 'CGG': 0.26239999999999997, 'TTT': 1.2608, 'TGC': 0.512, 'GGG': 0.5504, 'TAG': 1e-06, 'GGA': 0.5888, 'TAA': 0.1152, 'GGC': 2.1376, 'TAC': 0.9344, 'TTC': 0.96, 'TCG': 0.512, 'TTA': 0.9728, 'TTG': 0.7616, 'TCC': 0.352, 'ACC': 1.4592, 'TCA': 0.4992, 'GCA': 1.3504, 'GTA': 0.736, 'GCC': 2.0224, 'GTC': 0.7487999999999999, 'GCG': 2.464, 'GTG': 1.6896, 'GAG': 1.1776, 'GTT': 1.0752000000000002, 'GCT': 0.6848, 'TGA': 0.064, 'GAC': 1.312, 'CGT': 1.3504, 'TGG': 0.6848, 'GAA': 2.7968, 'CGC': 1.664}"
987 elif comp.lower() == 'homosapiens' :
988 composition="{'CTT': 0.8448, 'ATG': 1.408, 'ACA': 0.9663999999999999, 'ACG': 0.39039999999999997, 'ATC': 1.3312, 'AAC': 1.2224000000000002, 'ATA': 0.48, 'AGG': 0.768, 'CCT': 1.12, 'ACT': 0.8383999999999999, 'AGC': 1.248, 'AAG': 2.0416, 'AGA': 0.7807999999999999, 'CAT': 0.6976, 'AAT': 1.088, 'ATT': 1.024, 'CTG': 2.5344, 'CTA': 0.4608, 'CTC': 1.2544000000000002, 'CAC': 0.9663999999999999, 'AAA': 1.5615999999999999, 'CCG': 0.44160000000000005, 'AGT': 0.7744, 'CCA': 1.0816, 'CAA': 0.7872, 'CCC': 1.2672, 'TAT': 0.7807999999999999, 'GGT': 0.6912, 'TGT': 0.6784, 'CGA': 0.3968, 'CAG': 2.1888, 'TCT': 0.9728, 'GAT': 1.3952, 'CGG': 0.7296, 'TTT': 1.1264, 'TGC': 0.8064, 'GGG': 1.056, 'TAG': 0.0512, 'GGA': 1.056, 'TAA': 0.064, 'GGC': 1.4208, 'TAC': 0.9792000000000001, 'TTC': 1.2992000000000001, 'TCG': 0.2816, 'TTA': 0.4928, 'TTG': 0.8256, 'TCC': 1.1328, 'ACC': 1.2096, 'TCA': 0.7807999999999999, 'GCA': 1.0112, 'GTA': 0.45439999999999997, 'GCC': 1.7728, 'GTC': 0.928, 'GCG': 0.4736, 'GTG': 1.7984, 'GAG': 2.5344, 'GTT': 0.704, 'GCT': 1.1776, 'TGA': 0.1024, 'GAC': 1.6064, 'CGT': 0.288, 'TGG': 0.8448, 'GAA': 1.856, 'CGC': 0.6656}"
989 elif comp.lower() == 'saccharomycescerevisiae' :
990 composition="{'CTT': 0.7872, 'ATG': 1.3376, 'ACA': 1.1392, 'ACG': 0.512, 'ATC': 1.1008, 'AAC': 1.5872, 'ATA': 1.1392, 'AGG': 0.5888, 'CCT': 0.864, 'ACT': 1.2992000000000001, 'AGC': 0.6272000000000001, 'AAG': 1.9712, 'AGA': 1.3632, 'CAT': 0.8704, 'AAT': 2.2848, 'ATT': 1.9264000000000001, 'CTG': 0.672, 'CTA': 0.8576, 'CTC': 0.3456, 'CAC': 0.4992, 'AAA': 2.6816, 'CCG': 0.3392, 'AGT': 0.9087999999999999, 'CCA': 1.1712, 'CAA': 1.7472, 'CCC': 0.4352, 'TAT': 1.2032, 'GGT': 1.5295999999999998, 'TGT': 0.5184, 'CGA': 0.192, 'CAG': 0.7744, 'TCT': 1.504, 'GAT': 2.4064, 'CGG': 0.1088, 'TTT': 1.6704, 'TGC': 0.3072, 'GGG': 0.384, 'TAG': 0.032, 'GGA': 0.6976, 'TAA': 0.0704, 'GGC': 0.6272000000000001, 'TAC': 0.9472, 'TTC': 1.1776, 'TCG': 0.5504, 'TTA': 1.6767999999999998, 'TTG': 1.7408, 'TCC': 0.9087999999999999, 'ACC': 0.8128, 'TCA': 1.1967999999999999, 'GCA': 1.0368, 'GTA': 0.7552000000000001, 'GCC': 0.8064, 'GTC': 0.7552000000000001, 'GCG': 0.3968, 'GTG': 0.6912, 'GAG': 1.2288, 'GTT': 1.4144, 'GCT': 1.3568, 'TGA': 0.0448, 'GAC': 1.2928, 'CGT': 0.4096, 'TGG': 0.6656, 'GAA': 2.9184, 'CGC': 0.1664}"
986 elif comp.lower() == 'auto' or comp.lower() == 'automatic': 991 elif comp.lower() == 'auto' or comp.lower() == 'automatic':
987 if alphabet == unambiguous_protein_alphabet : 992 if alphabet == unambiguous_protein_alphabet :
988 prior = weight * asarray(aa_composition, float64) 993 prior = weight * asarray(aa_composition, float64)
989 else : 994 else :
990 prior = weight * equiprobable_distribution(len(alphabet)) 995 prior = weight * equiprobable_distribution(len(alphabet))
1254 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1 1259 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1
1255 elif show_warnings: 1260 elif show_warnings:
1256 if len(seqs[i][(counter):(counter+3)].strip("GATUC"))==1 or len(seqs[i][(counter):(counter+3)].strip("GATUC"))==2 : 1261 if len(seqs[i][(counter):(counter+3)].strip("GATUC"))==1 or len(seqs[i][(counter):(counter+3)].strip("GATUC"))==2 :
1257 print >>sys.stderr, 'Warning:Incomplete or non GATUC codon detected:', seqs[i][(counter):(counter+3)] 1262 print >>sys.stderr, 'Warning:Incomplete or non GATUC codon detected:', seqs[i][(counter):(counter+3)]
1258 print >>sys.stderr, 'Position:',counter 1263 print >>sys.stderr, 'Position:',counter
1259 print >>sys.stderr, 'Sequence:',i 1264 print >>sys.stderr, 'Sequence:',(i+1)
1260 print >>sys.stderr, 'This will be treated as ---' 1265 print >>sys.stderr, 'This will be treated as ---'
1261 1266
1262 1267
1263 counter=counter+3 1268 counter=counter+3
1264 counts=asarray(x) 1269 counts=asarray(x)
1384 1389
1385 if(altype=="codonsT"): 1390 if(altype=="codonsT"):
1386 priordict[line[0].upper().replace("U", "T")]=(float(line[1])/1000)*64 1391 priordict[line[0].upper().replace("U", "T")]=(float(line[1])/1000)*64
1387 else: 1392 else:
1388 priordict[line[0].upper().replace("T", "U")]=(float(line[1])/1000)*64 1393 priordict[line[0].upper().replace("T", "U")]=(float(line[1])/1000)*64
1389 1394 if priordict[line[0].upper().replace("U", "T")] == 0:
1395 priordict[line[0].upper().replace("U", "T")] = 0.000001
1390 return priordict 1396 return priordict
1391 1397
1392 def _build_logodata(options) : 1398 def _build_logodata(options) :
1393 global offset 1399 global offset
1394 offset=options.frame 1400 offset=options.frame
1630 data_grp.add_option( "", "--composition", 1636 data_grp.add_option( "", "--composition",
1631 dest="composition", 1637 dest="composition",
1632 action="store", 1638 action="store",
1633 type="string", 1639 type="string",
1634 default = "auto", 1640 default = "auto",
1635 help="The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (Do not perform any compositional adjustment), ", 1641 help="The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (Do not perform any compositional adjustment), or 'escherichiacoli' 'homosapiens' 'saccharomycescerevisiae' for ecoli, human and SC codon frequencies.",
1636 metavar="COMP.") 1642 metavar="COMP.")
1637 1643
1638 data_grp.add_option( "", "--weight", 1644 data_grp.add_option( "", "--weight",
1639 dest="weight", 1645 dest="weight",
1640 action="store", 1646 action="store",