comparison WeightedAverage.py @ 2:efa2b391e887 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/weightedaverage commit f770c3c58f1e7e1fa5ed22d7f7aca856d36729e8
author devteam
date Wed, 05 Oct 2016 13:39:38 -0400
parents 90611e86a998
children
comparison
equal deleted inserted replaced
1:90611e86a998 2:efa2b391e887
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """ 2 """
3 usage: %prog bed_file_1 bed_file_2 out_file 3 usage: %prog bed_file_1 bed_file_2 out_file
4 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file 4 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
5 -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file 5 -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
6 -z, --allow_zeros: Include zeros in calculations
6 """ 7 """
7 8
8 import collections 9 import collections
9 import sys 10 import sys
10 from galaxy.tools.util.galaxyops import * 11 from galaxy.tools.util.galaxyops import *
40 41
41 42
42 def GetOverlap(a, b): 43 def GetOverlap(a, b):
43 return min(a[1], b[1])-max(a[0], b[0]) 44 return min(a[1], b[1])-max(a[0], b[0])
44 45
46 def get_float_no_zero( field ):
47 rval = float( field )
48 assert rval
49 return rval
50
45 51
46 options, args = doc_optparse.parse( __doc__ ) 52 options, args = doc_optparse.parse( __doc__ )
47 53
48 try: 54 try:
49 chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 ) 55 chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 )
51 input1, input2, input3 = args 57 input1, input2, input3 = args
52 except Exception, eee: 58 except Exception, eee:
53 print eee 59 print eee
54 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) 60 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
55 61
56 fd2 = open(input2) 62 if options.allow_zeros:
57 lines2 = fd2.readlines() 63 get_value = float
64 else:
65 get_value = get_float_no_zero
58 RecombChrDict = collections.defaultdict(list) 66 RecombChrDict = collections.defaultdict(list)
59 67
60 skipped = 0 68 skipped = 0
61 for line in lines2: 69 for line in open( input2 ):
62 temp = line.strip().split() 70 temp = line.strip().split()
63 try: 71 try:
64 assert float(temp[int(name_col_2)]) 72 value = get_value( temp[ name_col_2 ] )
65 except: 73 except Exception:
66 skipped += 1 74 skipped += 1
67 continue 75 continue
68 tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] 76 tempIndex = [ int( temp[ start_col_2 ] ), int( temp[ end_col_2 ] ), value ]
69 RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) 77 RecombChrDict[ temp[ chr_col_2 ] ].append( tempIndex )
70 78
71 print "Skipped %d features with invalid values" % (skipped) 79 print "Skipped %d features with invalid values" % (skipped)
72 80
73 fd1 = open(input1) 81 fdd = open( input3, 'w' )
74 lines = fd1.readlines() 82 for line in open( input1 ):
75 finalProduct = '' 83 line = line.strip()
76 for line in lines: 84 temp = line.split('\t')
77 temp = line.strip().split('\t') 85 chromosome = temp[ chr_col_1 ]
78 chromosome = temp[int(chr_col_1)] 86 start = int( temp[ start_col_1 ] )
79 start = int(temp[int(start_col_1)]) 87 stop = int( temp[ end_col_1 ] )
80 stop = int(temp[int(end_col_1)])
81 start_stop = [start, stop] 88 start_stop = [start, stop]
82 RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) 89 RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
83 try: 90 try:
84 RecombRate = "%.4f" % (float(RecombRate)) 91 RecombRate = "%.4f" % (float(RecombRate))
85 except: 92 except:
86 RecombRate = RecombRate 93 RecombRate = RecombRate
87 finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' 94 fdd.write( "%s\t%s\n" % ( line, RecombRate ) )
88 fdd = open(input3, 'w')
89 fdd.writelines(finalProduct)
90 fdd.close() 95 fdd.close()