comparison gops_cluster.py @ 3:765ceb06c3e2 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/cluster commit a1517c9d22029095120643bbe2c8fa53754dd2b7
author devteam
date Wed, 11 Nov 2015 12:47:24 -0500
parents d5677eecbad4
children 05696474ee89
comparison
equal deleted inserted replaced
2:520de69b107a 3:765ceb06c3e2
7 -d, --distance=N: Maximum distance between clustered intervals 7 -d, --distance=N: Maximum distance between clustered intervals
8 -v, --overlap=N: Minimum overlap require (negative distance) 8 -v, --overlap=N: Minimum overlap require (negative distance)
9 -m, --minregions=N: Minimum regions per cluster 9 -m, --minregions=N: Minimum regions per cluster
10 -o, --output=N: 1)merged 2)filtered 3)clustered 4) minimum 5) maximum 10 -o, --output=N: 1)merged 2)filtered 3)clustered 4) minimum 5) maximum
11 """ 11 """
12 import sys, traceback, fileinput 12 import fileinput
13 from warnings import warn 13 import sys
14 from bx.intervals import * 14 from bx.intervals.io import GenomicInterval, NiceReaderWrapper
15 from bx.intervals.io import * 15 from bx.intervals.operations.find_clusters import find_clusters
16 from bx.intervals.operations.find_clusters import *
17 from bx.cookbook import doc_optparse 16 from bx.cookbook import doc_optparse
18 from galaxy.tools.util.galaxyops import * 17 from bx.tabular.io import ParseError
18 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped
19 19
20 assert sys.version_info[:2] >= ( 2, 4 ) 20 assert sys.version_info[:2] >= ( 2, 4 )
21
21 22
22 def main(): 23 def main():
23 distance = 0 24 distance = 0
24 minregions = 2 25 minregions = 2
25 output = 1 26 output = 1
26 upstream_pad = 0
27 downstream_pad = 0
28 27
29 options, args = doc_optparse.parse( __doc__ ) 28 options, args = doc_optparse.parse( __doc__ )
30 try: 29 try:
31 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) 30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
32 if options.distance: distance = int( options.distance ) 31 if options.distance:
33 if options.overlap: distance = -1 * int( options.overlap ) 32 distance = int( options.distance )
34 if options.output: output = int( options.output ) 33 if options.overlap:
35 if options.minregions: minregions = int( options.minregions ) 34 distance = -1 * int( options.overlap )
35 if options.output:
36 output = int( options.output )
37 if options.minregions:
38 minregions = int( options.minregions )
36 in_fname, out_fname = args 39 in_fname, out_fname = args
37 except: 40 except:
38 doc_optparse.exception() 41 doc_optparse.exception()
39 42
40 g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ), 43 g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
50 except ParseError, exc: 53 except ParseError, exc:
51 fail( "Invalid file format: %s" % str( exc ) ) 54 fail( "Invalid file format: %s" % str( exc ) )
52 55
53 f1 = open( in_fname, "r" ) 56 f1 = open( in_fname, "r" )
54 out_file = open( out_fname, "w" ) 57 out_file = open( out_fname, "w" )
55 58
56 # If "merge" 59 # If "merge"
57 if output == 1: 60 if output == 1:
58 fields = ["." for x in range(max(g1.chrom_col, g1.start_col, g1.end_col)+1)] 61 fields = ["." for x in range(max(g1.chrom_col, g1.start_col, g1.end_col) + 1)]
59 for chrom, tree in clusters.items(): 62 for chrom, tree in clusters.items():
60 for start, end, lines in tree.getregions(): 63 for start, end, lines in tree.getregions():
61 fields[g1.chrom_col] = chrom 64 fields[g1.chrom_col] = chrom
62 fields[g1.start_col] = str(start) 65 fields[g1.start_col] = str(start)
63 fields[g1.end_col] = str(end) 66 fields[g1.end_col] = str(end)
89 if output == 4 or output == 5: 92 if output == 4 or output == 5:
90 linenums = list() 93 linenums = list()
91 f1.seek(0) 94 f1.seek(0)
92 fileLines = f1.readlines() 95 fileLines = f1.readlines()
93 for chrom, tree in clusters.items(): 96 for chrom, tree in clusters.items():
94 regions = tree.getregions()
95 for start, end, lines in tree.getregions(): 97 for start, end, lines in tree.getregions():
96 outsize = -1 98 outsize = -1
97 outinterval = None 99 outinterval = None
98 for line in lines: 100 for line in lines:
99 # three nested for loops? 101 # three nested for loops?
100 # should only execute this code once per line 102 # should only execute this code once per line
101 fileline = fileLines[line].rstrip("\n\r") 103 fileline = fileLines[line].rstrip("\n\r")
102 try: 104 try:
103 cluster_interval = GenomicInterval( g1, fileline.split("\t"), 105 cluster_interval = GenomicInterval( g1, fileline.split("\t"),
104 g1.chrom_col, 106 g1.chrom_col,
105 g1.start_col, 107 g1.start_col,
106 g1.end_col, 108 g1.end_col,
107 g1.strand_col, 109 g1.strand_col,
108 g1.default_strand, 110 g1.default_strand,
109 g1.fix_strand ) 111 g1.fix_strand )
110 except Exception, exc: 112 except Exception, exc:
111 print >> sys.stderr, str( exc ) 113 print >> sys.stderr, str( exc )
112 f1.close() 114 f1.close()
113 sys.exit() 115 sys.exit()
114 interval_size = cluster_interval.end - cluster_interval.start 116 interval_size = cluster_interval.end - cluster_interval.start
115 if outsize == -1 or \ 117 if outsize == -1 or \
116 ( outsize > interval_size and output == 4 ) or \ 118 ( outsize > interval_size and output == 4 ) or \
117 ( outsize < interval_size and output == 5 ) : 119 ( outsize < interval_size and output == 5 ):
118 outinterval = cluster_interval 120 outinterval = cluster_interval
119 outsize = interval_size 121 outsize = interval_size
120 out_file.write( "%s\n" % outinterval ) 122 out_file.write( "%s\n" % outinterval )
121 123
122 f1.close() 124 f1.close()
123 out_file.close() 125 out_file.close()
124 126
125 if g1.skipped > 0: 127 if g1.skipped > 0:
126 print skipped( g1, filedesc="" ) 128 print skipped( g1, filedesc="" )
127 129
128 if __name__ == "__main__": 130 if __name__ == "__main__":
129 main() 131 main()