Mercurial > repos > iuc > genetrack
annotate genetrack.py @ 3:41887967ef14 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
author | iuc |
---|---|
date | Sat, 21 Jan 2017 14:41:43 -0500 |
parents | 25cd59a002d9 |
children |
rev | line source |
---|---|
0
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
1 """ |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
2 genetrack.py |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
3 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
4 Input: either scidx or gff format of reads |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
5 Output: Called peaks in gff format |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
6 """ |
3
41887967ef14
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
0
diff
changeset
|
7 import csv |
0
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
8 import optparse |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
9 import os |
3
41887967ef14
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
0
diff
changeset
|
10 |
0
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
11 import genetrack_util |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
12 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
13 CHUNK_SIZE = 10000000 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
14 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
15 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
16 if __name__ == '__main__': |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
17 parser = optparse.OptionParser() |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
18 parser.add_option('-t', '--input_format', dest='input_format', type='string', help='Input format') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
19 parser.add_option('-i', '--input', dest='inputs', type='string', action='append', nargs=2, help='Input datasets') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
20 parser.add_option('-s', '--sigma', dest='sigma', type='int', default=5, help='Sigma.') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
21 parser.add_option('-e', '--exclusion', dest='exclusion', type='int', default=20, help='Exclusion zone.') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
22 parser.add_option('-u', '--up_width', dest='up_width', type='int', default=10, help='Upstream width of called peaks.') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
23 parser.add_option('-d', '--down_width', dest='down_width', type='int', default=10, help='Downstream width of called peaks.') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
24 parser.add_option('-f', '--filter', dest='filter', type='int', default=1, help='Absolute read filter.') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
25 options, args = parser.parse_args() |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
26 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
27 os.mkdir('output') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
28 for (dataset_path, hid) in options.inputs: |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
29 if options.input_format == 'gff': |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
30 # Make sure the reads for each chromosome are sorted by index. |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
31 input_path = genetrack_util.sort_chromosome_reads_by_index(dataset_path) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
32 else: |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
33 # We're processing scidx data. |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
34 input_path = dataset_path |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
35 output_name = 's%se%su%sd%sF%s_on_data_%s' % (options.sigma, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
36 options.exclusion, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
37 options.up_width, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
38 options.down_width, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
39 options.filter, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
40 hid) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
41 output_path = os.path.join('output', output_name) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
42 reader = csv.reader(open(input_path, 'rU'), delimiter='\t') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
43 writer = csv.writer(open(output_path, 'wt'), delimiter='\t') |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
44 width = options.sigma * 5 |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
45 manager = genetrack_util.ChromosomeManager(reader) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
46 while not manager.done: |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
47 cname = manager.chromosome_name() |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
48 # Should we process this chromosome? |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
49 data = manager.load_chromosome() |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
50 if not data: |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
51 continue |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
52 keys = genetrack_util.make_keys(data) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
53 lo, hi = genetrack_util.get_range(data) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
54 for chunk in genetrack_util.get_chunks(lo, hi, size=CHUNK_SIZE, overlap=width): |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
55 (slice_start, slice_end), process_bounds = chunk |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
56 window = genetrack_util.get_window(data, slice_start, slice_end, keys) |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
57 genetrack_util.process_chromosome(cname, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
58 window, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
59 writer, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
60 process_bounds, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
61 width, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
62 options.sigma, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
63 options.up_width, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
64 options.down_width, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
65 options.exclusion, |
25cd59a002d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff
changeset
|
66 options.filter) |