comparison flanking_features.py @ 2:a09d13b108fd draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
author devteam
date Thu, 22 Jun 2017 18:41:16 -0400
parents 8307665c4b6c
children
comparison
equal deleted inserted replaced
1:8307665c4b6c 2:a09d13b108fd
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #By: Guruprasad Ananda 2 # By: Guruprasad Ananda
3 """ 3 """
4 Fetch closest up/downstream interval from features corresponding to every interval in primary 4 Fetch closest up/downstream interval from features corresponding to every interval in primary
5 5
6 usage: %prog primary_file features_file out_file direction 6 usage: %prog primary_file features_file out_file direction
7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file 7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file 8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval 9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval 10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
11 """ 11 """
12 from __future__ import print_function
12 13
13 import fileinput 14 import fileinput
14 import sys 15 import sys
16
15 from bx.cookbook import doc_optparse 17 from bx.cookbook import doc_optparse
16 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper 18 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper
17 from bx.intervals.operations import quicksect 19 from bx.intervals.operations import quicksect
18 from bx.tabular.io import ParseError 20 from bx.tabular.io import ParseError
19 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped 21 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped
22
20 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper 23 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper
21 24
22 assert sys.version_info[:2] >= ( 2, 4 ) 25 assert sys.version_info[:2] >= ( 2, 4 )
23 26
24 27
25 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): 28 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down):
26 #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases 29 # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases
27 #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand 30 # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand
28 #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand 31 # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand
29 if direction == 1: 32 if direction == 1:
30 if node.maxend <= threshold_up: 33 if node.maxend <= threshold_up:
31 if node.end == node.maxend: 34 if node.end == node.maxend:
32 report_func_up(node) 35 report_func_up(node)
33 elif node.right and node.left: 36 elif node.right and node.left:
101 else: 104 else:
102 root = rightTree.chroms[chrom] # root node for the chrom tree 105 root = rightTree.chroms[chrom] # root node for the chrom tree
103 result_up = [] 106 result_up = []
104 result_down = [] 107 result_down = []
105 if (strand == '+' and up) or (strand == '-' and down): 108 if (strand == '+' and up) or (strand == '-' and down):
106 #upstream +ve strand and downstream -ve strand cases 109 # upstream +ve strand and downstream -ve strand cases
107 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) 110 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None)
108 111
109 if (strand == '+' and down) or (strand == '-' and up): 112 if (strand == '+' and down) or (strand == '-' and up):
110 #downstream +ve strand and upstream -ve strand case 113 # downstream +ve strand and upstream -ve strand case
111 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) 114 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node ))
112 115
113 if result_up: 116 if result_up:
114 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. 117 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval.
115 ends = [] 118 ends = []
121 if not(either): 124 if not(either):
122 yield [ interval, result_up[res_ind].other ] 125 yield [ interval, result_up[res_ind].other ]
123 126
124 if result_down: 127 if result_down:
125 if not(either): 128 if not(either):
126 #The last element of result_down will be the closest element to the given interval 129 # The last element of result_down will be the closest element to the given interval
127 yield [ interval, result_down[-1].other ] 130 yield [ interval, result_down[-1].other ]
128 131
129 if either and (result_up or result_down): 132 if either and (result_up or result_down):
130 iter_val = [] 133 iter_val = []
131 if result_up and result_down: 134 if result_up and result_down:
132 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): 135 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)):
133 iter_val = [ interval, result_up[res_ind].other ] 136 iter_val = [ interval, result_up[res_ind].other ]
134 else: 137 else:
135 #The last element of result_down will be the closest element to the given interval 138 # The last element of result_down will be the closest element to the given interval
136 iter_val = [ interval, result_down[-1].other ] 139 iter_val = [ interval, result_down[-1].other ]
137 elif result_up: 140 elif result_up:
138 iter_val = [ interval, result_up[res_ind].other ] 141 iter_val = [ interval, result_up[res_ind].other ]
139 elif result_down: 142 elif result_down:
140 #The last element of result_down will be the closest element to the given interval 143 # The last element of result_down will be the closest element to the given interval
141 iter_val = [ interval, result_down[-1].other ] 144 iter_val = [ interval, result_down[-1].other ]
142 yield iter_val 145 yield iter_val
143 146
144 147
145 def main(): 148 def main():
201 output_line_fields.extend( line.fields ) 204 output_line_fields.extend( line.fields )
202 output_line_fields.extend( closest_feature.fields ) 205 output_line_fields.extend( closest_feature.fields )
203 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) 206 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) )
204 else: 207 else:
205 out_file.write( "%s\n" % result ) 208 out_file.write( "%s\n" % result )
206 except ParseError, exc: 209 except ParseError as exc:
207 fail( "Invalid file format: %s" % str( exc ) ) 210 fail( "Invalid file format: %s" % str( exc ) )
208 211
209 print "Direction: %s" % (direction) 212 print("Direction: %s" % (direction))
210 if g1.skipped > 0: 213 if g1.skipped > 0:
211 print skipped( g1, filedesc=" of 1st dataset" ) 214 print(skipped( g1, filedesc=" of 1st dataset" ))
212 if g2.skipped > 0: 215 if g2.skipped > 0:
213 print skipped( g2, filedesc=" of 2nd dataset" ) 216 print(skipped( g2, filedesc=" of 2nd dataset" ))
217
214 218
215 if __name__ == "__main__": 219 if __name__ == "__main__":
216 main() 220 main()