Mercurial > repos > iuc > genetrack
comparison genetrack.py @ 0:25cd59a002d9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
author | iuc |
---|---|
date | Tue, 22 Dec 2015 17:03:27 -0500 |
parents | |
children | 41887967ef14 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:25cd59a002d9 |
---|---|
1 """ | |
2 genetrack.py | |
3 | |
4 Input: either scidx or gff format of reads | |
5 Output: Called peaks in gff format | |
6 """ | |
7 import optparse | |
8 import csv | |
9 import os | |
10 import genetrack_util | |
11 | |
12 CHUNK_SIZE = 10000000 | |
13 | |
14 | |
15 if __name__ == '__main__': | |
16 parser = optparse.OptionParser() | |
17 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') | |
18 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') | |
19 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') | |
20 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') | |
21 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') | |
22 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') | |
23 parser.add_option('-f', '--filter', dest='filter', type='int', default=1, help='Absolute read filter.') | |
24 options, args = parser.parse_args() | |
25 | |
26 os.mkdir('output') | |
27 for (dataset_path, hid) in options.inputs: | |
28 if options.input_format == 'gff': | |
29 # Make sure the reads for each chromosome are sorted by index. | |
30 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) | |
31 else: | |
32 # We're processing scidx data. | |
33 input_path = dataset_path | |
34 output_name = 's%se%su%sd%sF%s_on_data_%s' % (options.sigma, | |
35 options.exclusion, | |
36 options.up_width, | |
37 options.down_width, | |
38 options.filter, | |
39 hid) | |
40 output_path = os.path.join('output', output_name) | |
41 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') | |
42 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') | |
43 width = options.sigma * 5 | |
44 manager = genetrack_util.ChromosomeManager(reader) | |
45 while not manager.done: | |
46 cname = manager.chromosome_name() | |
47 # Should we process this chromosome? | |
48 data = manager.load_chromosome() | |
49 if not data: | |
50 continue | |
51 keys = genetrack_util.make_keys(data) | |
52 lo, hi = genetrack_util.get_range(data) | |
53 for chunk in genetrack_util.get_chunks(lo, hi, size=CHUNK_SIZE, overlap=width): | |
54 (slice_start, slice_end), process_bounds = chunk | |
55 window = genetrack_util.get_window(data, slice_start, slice_end, keys) | |
56 genetrack_util.process_chromosome(cname, | |
57 window, | |
58 writer, | |
59 process_bounds, | |
60 width, | |
61 options.sigma, | |
62 options.up_width, | |
63 options.down_width, | |
64 options.exclusion, | |
65 options.filter) |