Mercurial > repos > rnateam > graphprot_predict_profile
annotate gplib.py @ 2:7bbb7bf6304f draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit afbe0da7a518d6fc60ea066917b1e567c8c42391"
author | bgruening |
---|---|
date | Mon, 27 Jan 2020 18:37:05 -0500 |
parents | 20429f4c1b95 |
children | ace92c9a4653 |
rev | line source |
---|---|
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
2 from distutils.spawn import find_executable |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
3 import subprocess |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
4 import statistics |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
5 import random |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
6 import gzip |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
7 import uuid |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
8 import sys |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
9 import re |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
10 import os |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
11 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
12 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
13 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
14 Run doctests: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
15 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
16 python3 -m doctest gplib.py |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
17 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
18 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
19 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
20 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
21 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
22 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
23 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
24 def graphprot_predictions_get_median(predictions_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
25 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
26 Given a GraphProt .predictions file, read in site scores and return |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
27 the median value. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
28 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
29 >>> test_file = "test-data/test.predictions" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
30 >>> graphprot_predictions_get_median(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
31 0.571673 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
32 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
33 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
34 # Site scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
35 sc_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
36 with open(predictions_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
37 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
38 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
39 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
40 sc_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
41 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
42 # Return the median. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
43 return statistics.median(sc_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
44 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
45 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
46 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
47 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
48 def graphprot_profile_get_top_scores_median(profile_file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
49 profile_type="profile", |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
50 avg_profile_extlr=5): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
51 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
52 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
53 Given a GraphProt .profile file, extract for each site (identified by |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
54 column 1 ID) the top (= highest) score. Then return the median of these |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
55 top scores. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
56 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
57 profile_type can be either "profile" or "avg_profile". |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
58 "avg_profile means that the position-wise scores will first get smoothed |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
59 out by calculating for each position a new score through taking a |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
60 sequence window -avg_profile_extlr to +avg_profile_extlr of the position |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
61 and calculate the mean score over this window and assign it to the position. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
62 After that, the maximum score of each site is chosen, and the median over |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
63 all maximum scores is returned. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
64 "profile" leaves the position-wise scores as they are, directly extracting |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
65 the maximum for each site and then reporting the median. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
66 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
67 >>> test_file = "test-data/test.profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
68 >>> graphprot_profile_get_top_scores_median(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
69 3.2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
70 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
71 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
72 # Dictionary of lists, with list of scores (value) for each site (key). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
73 lists_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
74 with open(profile_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
75 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
76 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
77 seq_id = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
78 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
79 if seq_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
80 lists_dic[seq_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
81 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
82 lists_dic[seq_id] = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
83 lists_dic[seq_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
84 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
85 # For each site, extract maximum and store in new list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
86 max_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
87 for seq_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
88 if profile_type == "profile": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
89 max_sc = max(lists_dic[seq_id]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
90 max_list.append(max_sc) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
91 elif profile_type == "avg_profile": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
92 # Convert profile score list to average profile scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
93 aps_list = list_moving_window_average_values(lists_dic[seq_id], |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
94 win_extlr=avg_profile_extlr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
95 max_sc = max(aps_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
96 max_list.append(max_sc) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
97 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
98 assert 0, "invalid profile_type argument given: \"%s\"" %(profile_type) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
99 # Return the median. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
100 return statistics.median(max_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
101 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
102 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
103 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
104 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
105 def list_moving_window_average_values(in_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
106 win_extlr=5, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
107 method=1): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
108 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
109 Take a list of numeric values, and calculate for each position a new value, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
110 by taking the mean value of the window of positions -win_extlr and |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
111 +win_extlr. If full extension is not possible (at list ends), it just |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
112 takes what it gets. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
113 Two implementations of the task are given, chose by method=1 or method=2. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
114 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
115 >>> test_list = [2, 3, 5, 8, 4, 3, 7, 1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
116 >>> list_moving_window_average_values(test_list, win_extlr=2, method=1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
117 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
118 >>> list_moving_window_average_values(test_list, win_extlr=2, method=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
119 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
120 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
121 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
122 l_list = len(in_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
123 assert l_list, "Given list is empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
124 new_list = [0] * l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
125 if win_extlr == 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
126 return l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
127 if method == 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
128 for i in range(l_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
129 s = i - win_extlr |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
130 e = i + win_extlr + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
131 if s < 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
132 s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
133 if e > l_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
134 e = l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
135 # Extract portion and assign value to new list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
136 new_list[i] = statistics.mean(in_list[s:e]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
137 elif method == 2: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
138 for i in range(l_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
139 s = i - win_extlr |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
140 e = i + win_extlr + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
141 if s < 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
142 s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
143 if e > l_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
144 e = l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
145 l = e-s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
146 sc_sum = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
147 for j in range(l): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
148 sc_sum += in_list[s+j] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
149 new_list[i] = sc_sum / l |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
150 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
151 assert 0, "invalid method ID given (%i)" %(method) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
152 return new_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
153 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
154 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
155 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
156 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
157 def echo_add_to_file(echo_string, out_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
158 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
159 Add a string to file, using echo command. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
160 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
161 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
162 check_cmd = 'echo "%s" >> %s' % (echo_string, out_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
163 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
164 error = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
165 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
166 error = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
167 assert error == False, "echo is complaining:\n%s\n%s" %(check_cmd, output) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
168 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
169 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
170 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
171 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
172 def is_tool(name): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
173 """Check whether tool "name" is in PATH.""" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
174 return find_executable(name) is not None |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
175 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
176 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
177 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
178 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
179 def count_fasta_headers(fasta_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
180 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
181 Count number of FASTA headers in fasta_file using grep. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
182 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
183 >>> test_file = "test-data/test.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
184 >>> count_fasta_headers(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
185 2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
186 >>> test_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
187 >>> count_fasta_headers(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
188 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
189 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
190 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
191 check_cmd = 'grep -c ">" ' + fasta_file |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
192 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
193 row_count = int(output.strip()) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
194 return row_count |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
195 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
196 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
197 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
198 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
199 def make_file_copy(in_file, out_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
200 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
201 Make a file copy by copying in_file to out_file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
202 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
203 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
204 check_cmd = "cat " + in_file + " > " + out_file |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
205 assert in_file != out_file, "cat does not like to cat file into same file (%s)" %(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
206 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
207 error = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
208 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
209 error = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
210 assert error == False, "cat did not like your input (in_file: %s, out_file: %s):\n%s" %(in_file, out_file, output) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
211 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
212 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
213 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
214 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
215 def split_fasta_into_test_train_files(in_fasta, test_out_fa, train_out_fa, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
216 test_size=500): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
217 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
218 Split in_fasta .fa file into two files (e.g. test, train). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
219 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
220 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
221 # Read in in_fasta. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
222 seqs_dic = read_fasta_into_dic(in_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
223 # Shuffle IDs. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
224 rand_ids_list = random_order_dic_keys_into_list(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
225 c_out = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
226 TESTOUT = open(test_out_fa, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
227 TRAINOUT = open(train_out_fa, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
228 for seq_id in rand_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
229 seq = seqs_dic[seq_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
230 if (c_out >= test_size): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
231 TRAINOUT.write(">%s\n%s\n" % (seq_id, seq)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
232 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
233 TESTOUT.write(">%s\n%s\n" % (seq_id, seq)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
234 c_out += 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
235 TESTOUT.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
236 TRAINOUT.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
237 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
238 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
239 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
240 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
241 def read_fasta_into_dic(fasta_file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
242 seqs_dic=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
243 ids_dic=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
244 read_dna=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
245 reject_lc=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
246 convert_to_uc=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
247 skip_n_seqs=True): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
248 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
249 Read in FASTA sequences, convert to RNA, store in dictionary |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
250 and return dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
251 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
252 >>> test_fasta = "test-data/test.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
253 >>> read_fasta_into_dic(test_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
254 {'seq1': 'acguACGUacgu', 'seq2': 'ugcaUGCAugcaACGUacgu'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
255 >>> test_fasta = "test-data/test2.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
256 >>> read_fasta_into_dic(test_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
257 {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
258 >>> test_fasta = "test-data/test.ensembl.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
259 >>> read_fasta_into_dic(test_fasta, read_dna=True) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
260 {'ENST00000415118': 'GAAATAGT', 'ENST00000448914': 'ACTGGGGGATACGAAAA'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
261 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
262 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
263 if not seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
264 seqs_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
265 seq_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
266 seq = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
267 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
268 # Go through FASTA file, extract sequences. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
269 if re.search(".+\.gz$", fasta_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
270 f = gzip.open(fasta_file, 'rt') |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
271 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
272 f = open(fasta_file, "r") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
273 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
274 if re.search(">.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
275 m = re.search(">(.+)", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
276 seq_id = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
277 # If there is a ".", take only first part of header. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
278 # This assumes ENSEMBL header format ">ENST00000631435.1 cdna ..." |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
279 if re.search(".+\..+", seq_id): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
280 m = re.search("(.+?)\..+", seq_id) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
281 seq_id = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
282 assert seq_id not in seqs_dic, "non-unique FASTA header \"%s\" in \"%s\"" % (seq_id, fasta_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
283 if ids_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
284 if seq_id in ids_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
285 seqs_dic[seq_id] = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
286 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
287 seqs_dic[seq_id] = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
288 elif re.search("[ACGTUN]+", line, re.I): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
289 if seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
290 m = re.search("([ACGTUN]+)", line, re.I) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
291 seq = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
292 if reject_lc: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
293 assert not re.search("[a-z]", seq), "lowercase characters detected in sequence \"%i\" (reject_lc=True)" %(seq_id) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
294 if convert_to_uc: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
295 seq = seq.upper() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
296 # If sequences with N nucleotides should be skipped. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
297 if skip_n_seqs: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
298 if "n" in m.group(1) or "N" in m.group(1): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
299 print ("WARNING: \"%s\" contains N nucleotides. Discarding sequence ... " % (seq_id)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
300 del seqs_dic[seq_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
301 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
302 # Convert to RNA, concatenate sequence. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
303 if read_dna: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
304 seqs_dic[seq_id] += m.group(1).replace("U","T").replace("u","t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
305 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
306 seqs_dic[seq_id] += m.group(1).replace("T","U").replace("t","u") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
307 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
308 return seqs_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
309 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
310 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
311 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
312 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
313 def random_order_dic_keys_into_list(in_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
314 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
315 Read in dictionary keys, and return random order list of IDs. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
316 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
317 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
318 id_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
319 for key in in_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
320 id_list.append(key) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
321 random.shuffle(id_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
322 return id_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
323 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
324 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
325 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
326 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
327 def graphprot_get_param_string(params_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
328 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
329 Get parameter string from GraphProt .params file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
330 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
331 >>> test_params = "test-data/test.params" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
332 >>> graphprot_get_param_string(test_params) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
333 '-epochs 20 -lambda 0.01 -R 1 -D 3 -bitsize 14 -onlyseq ' |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
334 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
335 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
336 param_string = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
337 with open(params_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
338 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
339 cols = line.strip().split(" ") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
340 param = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
341 setting = cols[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
342 if re.search(".+:", param): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
343 m = re.search("(.+):", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
344 par = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
345 if re.search("pos_train.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
346 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
347 if par == "model_type": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
348 if setting == "sequence": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
349 param_string += "-onlyseq " |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
350 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
351 param_string += "-%s %s " %(par, setting) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
352 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
353 assert 0, "pattern matching failed for string \"%s\"" %(param) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
354 return param_string |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
355 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
356 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
357 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
358 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
359 def seqs_dic_count_uc_nts(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
360 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
361 Count number of uppercase nucleotides in sequences stored in sequence |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
362 dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
363 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
364 >>> seqs_dic = {'seq1': "acgtACGTacgt", 'seq2': 'acgtACacgt'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
365 >>> seqs_dic_count_uc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
366 6 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
367 >>> seqs_dic = {'seq1': "acgtacgt", 'seq2': 'acgtacgt'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
368 >>> seqs_dic_count_uc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
369 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
370 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
371 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
372 assert seqs_dic, "Given sequence dictionary empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
373 c_uc = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
374 for seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
375 c_uc += len(re.findall(r'[A-Z]', seqs_dic[seq_id])) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
376 return c_uc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
377 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
378 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
379 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
380 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
381 def seqs_dic_count_lc_nts(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
382 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
383 Count number of lowercase nucleotides in sequences stored in sequence |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
384 dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
385 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
386 >>> seqs_dic = {'seq1': "gtACGTac", 'seq2': 'cgtACacg'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
387 >>> seqs_dic_count_lc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
388 10 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
389 >>> seqs_dic = {'seq1': "ACGT", 'seq2': 'ACGTAC'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
390 >>> seqs_dic_count_lc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
391 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
392 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
393 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
394 assert seqs_dic, "Given sequence dictionary empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
395 c_uc = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
396 for seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
397 c_uc += len(re.findall(r'[a-z]', seqs_dic[seq_id])) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
398 return c_uc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
399 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
400 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
401 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
402 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
403 def count_file_rows(in_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
404 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
405 Count number of file rows for given input file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
406 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
407 >>> test_file = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
408 >>> count_file_rows(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
409 7 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
410 >>> test_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
411 >>> count_file_rows(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
412 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
413 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
414 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
415 check_cmd = "cat " + in_file + " | wc -l" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
416 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
417 row_count = int(output.strip()) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
418 return row_count |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
419 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
420 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
421 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
422 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
423 def bed_check_six_col_format(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
424 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
425 Check whether given .bed file has 6 columns. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
426 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
427 >>> test_bed = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
428 >>> bed_check_six_col_format(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
429 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
430 >>> test_bed = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
431 >>> bed_check_six_col_format(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
432 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
433 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
434 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
435 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
436 six_col_format = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
437 with open(bed_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
438 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
439 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
440 if len(cols) == 6: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
441 six_col_format = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
442 break |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
443 f.closed |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
444 return six_col_format |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
445 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
446 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
447 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
448 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
449 def bed_check_unique_ids(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
450 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
451 Check whether .bed file (6 column format with IDs in column 4) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
452 has unique column 4 IDs. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
453 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
454 >>> test_bed = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
455 >>> bed_check_unique_ids(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
456 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
457 >>> test_bed = "test-data/test2.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
458 >>> bed_check_unique_ids(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
459 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
460 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
461 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
462 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
463 check_cmd = "cut -f 4 " + bed_file + " | sort | uniq -d" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
464 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
465 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
466 return False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
467 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
468 return True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
469 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
470 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
471 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
472 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
473 def get_seq_lengths_from_seqs_dic(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
474 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
475 Given a dictionary of sequences, return dictionary of sequence lengths. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
476 Mapping is sequence ID -> sequence length. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
477 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
478 seq_len_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
479 assert seqs_dic, "sequence dictionary seems to be empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
480 for seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
481 seq_l = len(seqs_dic[seq_id]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
482 seq_len_dic[seq_id] = seq_l |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
483 return seq_len_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
484 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
485 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
486 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
487 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
488 def bed_get_region_lengths(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
489 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
490 Read in .bed file, store and return region lengths in dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
491 key : region ID (.bed col4) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
492 value : region length (.bed col3-col2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
493 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
494 >>> test_file = "test-data/test4.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
495 >>> bed_get_region_lengths(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
496 {'CLIP1': 10, 'CLIP2': 10} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
497 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
498 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
499 id2len_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
500 with open(bed_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
501 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
502 row = line.strip() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
503 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
504 site_s = int(cols[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
505 site_e = int(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
506 site_id = cols[3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
507 site_l = site_e - site_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
508 assert site_id not in id2len_dic, "column 4 IDs not unique in given .bed file \"%s\"" %(bed_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
509 id2len_dic[site_id] = site_l |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
510 f.closed |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
511 assert id2len_dic, "No IDs read into dictionary (input file \"%s\" empty or malformatted?)" % (in_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
512 return id2len_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
513 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
514 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
515 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
516 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
517 def graphprot_get_param_dic(params_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
518 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
519 Read in GraphProt .params file and store in dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
520 key = parameter |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
521 value = parameter value |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
522 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
523 >>> params_file = "test-data/test.params" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
524 >>> graphprot_get_param_dic(params_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
525 {'epochs': '20', 'lambda': '0.01', 'R': '1', 'D': '3', 'bitsize': '14', 'model_type': 'sequence', 'pos_train_ws_pred_median': '0.760321', 'pos_train_profile_median': '5.039610', 'pos_train_avg_profile_median_1': '4.236340', 'pos_train_avg_profile_median_2': '3.868431', 'pos_train_avg_profile_median_3': '3.331277', 'pos_train_avg_profile_median_4': '2.998667', 'pos_train_avg_profile_median_5': '2.829782', 'pos_train_avg_profile_median_6': '2.626623', 'pos_train_avg_profile_median_7': '2.447083', 'pos_train_avg_profile_median_8': '2.349919', 'pos_train_avg_profile_median_9': '2.239829', 'pos_train_avg_profile_median_10': '2.161676'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
526 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
527 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
528 param_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
529 with open(params_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
530 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
531 cols = line.strip().split(" ") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
532 param = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
533 setting = cols[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
534 if re.search(".+:", param): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
535 m = re.search("(.+):", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
536 par = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
537 param_dic[par] = setting |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
538 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
539 return param_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
540 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
541 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
542 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
543 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
544 def graphprot_filter_predictions_file(in_file, out_file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
545 sc_thr=0): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
546 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
547 Filter GraphProt .predictions file by given score thr_sc. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
548 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
549 OUTPRED = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
550 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
551 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
552 row = line.strip() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
553 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
554 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
555 if score < sc_thr: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
556 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
557 OUTPRED.write("%s\n" %(row)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
558 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
559 OUTPRED.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
560 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
561 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
562 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
563 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
564 def fasta_read_in_ids(fasta_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
565 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
566 Given a .fa file, read in header IDs in order appearing in file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
567 and store in list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
568 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
569 >>> test_file = "test-data/test3.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
570 >>> fasta_read_in_ids(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
571 ['SERBP1_K562_rep01_544', 'SERBP1_K562_rep02_709', 'SERBP1_K562_rep01_316'] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
572 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
573 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
574 ids_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
575 with open(fasta_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
576 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
577 if re.search(">.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
578 m = re.search(">(.+)", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
579 seq_id = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
580 ids_list.append(seq_id) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
581 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
582 return ids_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
583 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
584 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
585 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
586 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
587 def graphprot_profile_calculate_avg_profile(in_file, out_file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
588 ap_extlr=5, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
589 seq_ids_list=False, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
590 method=1): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
591 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
592 Given a GraphProt .profile file, calculate average profiles and output |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
593 average profile file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
594 Average profile means that the position-wise scores will get smoothed |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
595 out by calculating for each position a new score, taking a sequence |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
596 window -ap_extlr to +ap_extlr relative to the position |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
597 and calculate the mean score over this window. The mean score then |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
598 becomes the new average profile score at this position. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
599 Two different implementations of the task are given: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
600 method=1 (new python implementation, slower + more memory but easy to read) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
601 method=2 (old perl implementation, faster and less memory but more code) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
602 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
603 >>> in_file = "test-data/test2.profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
604 >>> out_file1 = "test-data/test2_1.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
605 >>> out_file2 = "test-data/test2_2.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
606 >>> out_file4 = "test-data/test2_3.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
607 >>> graphprot_profile_calculate_avg_profile(in_file, out_file1, ap_extlr=2, method=1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
608 >>> graphprot_profile_calculate_avg_profile(in_file, out_file2, ap_extlr=2, method=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
609 >>> diff_two_files_identical(out_file1, out_file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
610 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
611 >>> test_list = ["s1", "s2", "s3", "s4"] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
612 >>> out_file3_exp = "test-data/test3_added_ids_exp.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
613 >>> out_file3 = "test-data/test3_added_ids_out.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
614 >>> graphprot_profile_calculate_avg_profile(in_file, out_file3, ap_extlr=2, method=1, seq_ids_list=test_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
615 >>> diff_two_files_identical(out_file3_exp, out_file3) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
616 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
617 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
618 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
619 if method == 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
620 # Dictionary of lists, with list of scores (value) for each site (key). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
621 lists_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
622 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
623 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
624 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
625 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
626 site_id = int(cols[0]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
627 pos = int(cols[1]) # 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
628 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
629 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
630 if site_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
631 site_starts_dic[site_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
632 if site_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
633 lists_dic[site_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
634 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
635 lists_dic[site_id] = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
636 lists_dic[site_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
637 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
638 # Check number of IDs (# FASTA sequence IDs has to be same as # site IDs). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
639 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
640 c_seq_ids = len(seq_ids_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
641 c_site_ids = len(site_starts_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
642 assert c_seq_ids == c_site_ids, "# sequence IDs != # site IDs (%i != %i)" %(c_seq_ids, c_site_ids) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
643 OUTPROF = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
644 # For each site, calculate average profile scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
645 max_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
646 for site_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
647 # Convert profile score list to average profile scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
648 aps_list = list_moving_window_average_values(lists_dic[site_id], |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
649 win_extlr=ap_extlr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
650 start_pos = site_starts_dic[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
651 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
652 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
653 site_id = seq_ids_list[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
654 for i, sc in enumerate(aps_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
655 pos = i + start_pos + 1 # make 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
656 OUTPROF.write("%s\t%i\t%f\n" %(site_id, pos, sc)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
657 OUTPROF.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
658 elif method == 2: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
659 OUTPROF = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
660 # Old site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
661 old_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
662 # Current site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
663 cur_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
664 # Scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
665 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
666 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
667 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
668 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
669 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
670 cur_id = int(cols[0]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
671 pos = int(cols[1]) # 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
672 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
673 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
674 if cur_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
675 site_starts_dic[cur_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
676 # Case: new site (new column 1 ID). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
677 if cur_id != old_id: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
678 # Process old id scores. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
679 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
680 aps_list = list_moving_window_average_values(scores_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
681 win_extlr=ap_extlr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
682 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
683 seq_id = old_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
684 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
685 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
686 seq_id = seq_ids_list[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
687 for i, sc in enumerate(aps_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
688 pos = i + start_pos + 1 # make 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
689 OUTPROF.write("%s\t%i\t%f\n" %(seq_id, pos, sc)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
690 # Reset list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
691 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
692 old_id = cur_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
693 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
694 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
695 # Add to scores_list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
696 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
697 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
698 # Process last block. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
699 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
700 aps_list = list_moving_window_average_values(scores_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
701 win_extlr=ap_extlr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
702 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
703 seq_id = old_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
704 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
705 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
706 seq_id = seq_ids_list[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
707 for i, sc in enumerate(aps_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
708 pos = i + start_pos + 1 # make 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
709 OUTPROF.write("%s\t%i\t%f\n" %(seq_id, pos, sc)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
710 OUTPROF.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
711 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
712 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
713 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
714 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
715 def graphprot_profile_extract_peak_regions(in_file, out_file, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
716 max_merge_dist=0, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
717 sc_thr=0): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
718 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
719 Extract peak regions from GraphProt .profile file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
720 Store the peak regions (defined as regions with scores >= sc_thr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
721 as to out_file in 6-column .bed. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
722 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
723 TODO: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
724 Add option for genomic coordinates input (+ - polarity support). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
725 Output genomic regions instead of sequence regions. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
726 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
727 >>> in_file = "test-data/test4.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
728 >>> out_file = "test-data/test4_out.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
729 >>> exp_file = "test-data/test4_out_exp.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
730 >>> exp2_file = "test-data/test4_out_exp2.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
731 >>> empty_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
732 >>> graphprot_profile_extract_peak_regions(in_file, out_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
733 >>> diff_two_files_identical(out_file, exp_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
734 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
735 >>> graphprot_profile_extract_peak_regions(in_file, out_file, sc_thr=10) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
736 >>> diff_two_files_identical(out_file, empty_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
737 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
738 >>> graphprot_profile_extract_peak_regions(in_file, out_file, max_merge_dist=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
739 >>> diff_two_files_identical(out_file, exp2_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
740 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
741 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
742 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
743 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
744 OUTPEAKS = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
745 # Old site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
746 old_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
747 # Current site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
748 cur_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
749 # Scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
750 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
751 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
752 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
753 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
754 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
755 cur_id = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
756 pos = int(cols[1]) # 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
757 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
758 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
759 if cur_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
760 # If first position != zero, we assume positions are 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
761 if pos != 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
762 # Make index 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
763 site_starts_dic[cur_id] = pos - 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
764 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
765 site_starts_dic[cur_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
766 # Case: new site (new column 1 ID). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
767 if cur_id != old_id: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
768 # Process old id scores. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
769 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
770 # Extract peaks from region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
771 peak_list = list_extract_peaks(scores_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
772 max_merge_dist=max_merge_dist, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
773 coords="bed", |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
774 sc_thr=sc_thr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
775 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
776 # Print out peaks in .bed format. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
777 for l in peak_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
778 peak_s = start_pos + l[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
779 peak_e = start_pos + l[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
780 site_id = "%s,%i" %(old_id, l[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
781 OUTPEAKS.write("%s\t%i\t%i\t%s\t%f\t+\n" %(old_id, peak_s, peak_e, site_id, l[3])) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
782 # Reset list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
783 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
784 old_id = cur_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
785 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
786 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
787 # Add to scores_list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
788 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
789 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
790 # Process last block. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
791 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
792 # Extract peaks from region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
793 peak_list = list_extract_peaks(scores_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
794 max_merge_dist=max_merge_dist, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
795 coords="bed", |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
796 sc_thr=sc_thr) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
797 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
798 # Print out peaks in .bed format. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
799 for l in peak_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
800 peak_s = start_pos + l[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
801 peak_e = start_pos + l[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
802 site_id = "%s,%i" %(old_id, l[2]) # best score also 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
803 OUTPEAKS.write("%s\t%i\t%i\t%s\t%f\t+\n" %(old_id, peak_s, peak_e, site_id, l[3])) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
804 OUTPEAKS.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
805 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
806 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
807 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
808 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
809 def list_extract_peaks(in_list, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
810 max_merge_dist=0, |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
811 coords="list", |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
812 sc_thr=0): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
813 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
814 Extract peak regions from list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
815 Peak region is defined as region >= score threshold. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
816 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
817 coords=bed : peak start 0-based, peak end 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
818 coords=list : peak start 0-based, peak end 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
819 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
820 >>> test_list = [-1, 0, 2, 4.5, 1, -1, 5, 6.5] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
821 >>> list_extract_peaks(test_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
822 [[1, 4, 3, 4.5], [6, 7, 7, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
823 >>> list_extract_peaks(test_list, sc_thr=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
824 [[2, 3, 3, 4.5], [6, 7, 7, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
825 >>> list_extract_peaks(test_list, sc_thr=2, coords="bed") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
826 [[2, 4, 4, 4.5], [6, 8, 8, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
827 >>> list_extract_peaks(test_list, sc_thr=10) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
828 [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
829 >>> test_list = [2, -1, 3, -1, 4, -1, -1, 6, 9] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
830 >>> list_extract_peaks(test_list, max_merge_dist=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
831 [[0, 4, 4, 4], [7, 8, 8, 9]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
832 >>> list_extract_peaks(test_list, max_merge_dist=3) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
833 [[0, 8, 8, 9]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
834 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
835 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
836 # Check. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
837 assert len(in_list), "Given list is empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
838 # Peak regions list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
839 peak_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
840 # Help me. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
841 inside = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
842 pr_s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
843 pr_e = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
844 pr_top_pos = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
845 pr_top_sc = -100000 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
846 for i, sc in enumerate(in_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
847 # Part of peak region? |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
848 if sc >= sc_thr: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
849 # At peak start. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
850 if not inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
851 pr_s = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
852 pr_e = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
853 inside = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
854 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
855 # Inside peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
856 pr_e = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
857 # Store top position. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
858 if sc > pr_top_sc: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
859 pr_top_sc = sc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
860 pr_top_pos = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
861 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
862 # Before was peak region? |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
863 if inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
864 # Store peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
865 #peak_infos = "%i,%i,%i,%f" %(pr_s, pr_e, pr_top_pos, pr_top_sc) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
866 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
867 peak_list.append(peak_infos) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
868 inside = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
869 pr_top_pos = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
870 pr_top_sc = -100000 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
871 # If peak at the end, also report. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
872 if inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
873 # Store peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
874 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
875 peak_list.append(peak_infos) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
876 # Merge peaks. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
877 if max_merge_dist and len(peak_list) > 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
878 iterate = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
879 while iterate: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
880 merged_peak_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
881 added_peaks_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
882 peaks_merged = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
883 for i, l in enumerate(peak_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
884 if i in added_peaks_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
885 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
886 j = i + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
887 # Last element. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
888 if j == len(peak_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
889 if i not in added_peaks_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
890 merged_peak_list.append(peak_list[i]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
891 break |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
892 # Compare two elements. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
893 new_peak = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
894 if (peak_list[j][0] - peak_list[i][1]) <= max_merge_dist: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
895 peaks_merged = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
896 new_top_pos = peak_list[i][2] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
897 new_top_sc = peak_list[i][3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
898 if peak_list[i][3] < peak_list[j][3]: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
899 new_top_pos = peak_list[j][2] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
900 new_top_sc = peak_list[j][3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
901 new_peak = [peak_list[i][0], peak_list[j][1], new_top_pos, new_top_sc] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
902 # If two peaks were merged. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
903 if new_peak: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
904 merged_peak_list.append(new_peak) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
905 added_peaks_dic[i] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
906 added_peaks_dic[j] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
907 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
908 merged_peak_list.append(peak_list[i]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
909 added_peaks_dic[i] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
910 if not peaks_merged: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
911 iterate = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
912 peak_list = merged_peak_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
913 peaks_merged = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
914 # If peak coordinates should be in .bed format, make peak ends 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
915 if coords == "bed": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
916 for i in range(len(peak_list)): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
917 peak_list[i][1] += 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
918 peak_list[i][2] += 1 # 1-base best score position too. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
919 return peak_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
920 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
921 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
922 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
923 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
924 def bed_peaks_to_genomic_peaks(peak_file, genomic_peak_file, genomic_sites_bed, print_rows=False): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
925 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
926 Given a .bed file of sequence peak regions (possible coordinates from |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
927 0 to length of s), convert peak coordinates to genomic coordinates. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
928 Do this by taking genomic regions of sequences as input. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
929 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
930 >>> test_in = "test-data/test.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
931 >>> test_exp = "test-data/test_exp.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
932 >>> test_out = "test-data/test_out.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
933 >>> gen_in = "test-data/test.peaks_genomic.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
934 >>> bed_peaks_to_genomic_peaks(test_in, test_out, gen_in) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
935 >>> diff_two_files_identical(test_out, test_exp) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
936 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
937 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
938 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
939 # Read in genomic region info. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
940 id2row_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
941 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
942 with open(genomic_sites_bed) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
943 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
944 row = line.strip() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
945 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
946 site_id = cols[3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
947 assert site_id not in id2row_dic, "column 4 IDs not unique in given .bed file \"%s\"" %(args.genomic_sites_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
948 id2row_dic[site_id] = row |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
949 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
950 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
951 # Read in peaks file and convert coordinates. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
952 OUTPEAKS = open(genomic_peak_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
953 with open(peak_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
954 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
955 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
956 site_id = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
957 site_s = int(cols[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
958 site_e = int(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
959 site_id2 = cols[3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
960 site_sc = float(cols[4]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
961 assert re.search(".+,.+", site_id2), "regular expression failed for ID \"%s\"" %(site_id2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
962 m = re.search(".+,(\d+)", site_id2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
963 sc_pos = int(m.group(1)) # 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
964 assert site_id in id2row_dic, "site ID \"%s\" not found in genomic sites dictionary" %(site_id) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
965 row = id2row_dic[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
966 rowl = row.split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
967 gen_chr = rowl[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
968 gen_s = int(rowl[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
969 gen_e = int(rowl[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
970 gen_pol = rowl[5] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
971 new_s = site_s + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
972 new_e = site_e + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
973 new_sc_pos = sc_pos + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
974 if gen_pol == "-": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
975 new_s = gen_e - site_e |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
976 new_e = gen_e - site_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
977 new_sc_pos = gen_e - sc_pos + 1 # keep 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
978 new_row = "%s\t%i\t%i\t%s,%i\t%f\t%s" %(gen_chr, new_s, new_e, site_id, new_sc_pos, site_sc, gen_pol) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
979 OUTPEAKS.write("%s\n" %(new_row)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
980 if print_rows: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
981 print(new_row) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
982 OUTPEAKS.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
983 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
984 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
985 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
986 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
987 def diff_two_files_identical(file1, file2): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
988 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
989 Check whether two files are identical. Return true if diff reports no |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
990 differences. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
991 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
992 >>> file1 = "test-data/file1" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
993 >>> file2 = "test-data/file2" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
994 >>> diff_two_files_identical(file1, file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
995 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
996 >>> file1 = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
997 >>> diff_two_files_identical(file1, file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
998 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
999 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1000 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1001 same = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1002 check_cmd = "diff " + file1 + " " + file2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1003 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1004 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1005 same = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1006 return same |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1007 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1008 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1009 ################################################################################ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1010 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1011 |