Mercurial > repos > rnateam > graphprot_predict_profile
annotate gplib.py @ 6:33b590aa07c1 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit 902e994cb04db968ce797afa676f5fa6512ab6d3
author | bgruening |
---|---|
date | Tue, 06 Aug 2024 14:55:18 +0000 |
parents | ddcf35a868b8 |
children |
rev | line source |
---|---|
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1 import gzip |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
2 import random |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
3 import re |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
4 import statistics |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
5 import subprocess |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
6 from distutils.spawn import find_executable |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
7 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
8 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
9 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
10 Run doctests: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
11 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
12 python3 -m doctest gplib.py |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
13 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
14 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
15 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
16 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
17 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
18 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
19 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
20 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
21 def graphprot_predictions_get_median(predictions_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
22 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
23 Given a GraphProt .predictions file, read in site scores and return |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
24 the median value. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
25 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
26 >>> test_file = "test-data/test.predictions" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
27 >>> graphprot_predictions_get_median(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
28 0.571673 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
29 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
30 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
31 # Site scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
32 sc_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
33 with open(predictions_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
34 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
35 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
36 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
37 sc_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
38 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
39 # Return the median. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
40 return statistics.median(sc_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
41 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
42 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
43 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
44 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
45 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
46 def graphprot_profile_get_tsm( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
47 profile_file, profile_type="profile", avg_profile_extlr=5 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
48 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
49 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
50 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
51 Given a GraphProt .profile file, extract for each site (identified by |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
52 column 1 ID) the top (= highest) score. Then return the median of these |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
53 top scores. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
54 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
55 profile_type can be either "profile" or "avg_profile". |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
56 "avg_profile means that the position-wise scores will first get smoothed |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
57 out by calculating for each position a new score through taking a |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
58 sequence window -avg_profile_extlr to +avg_profile_extlr of the position |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
59 and calculate the mean score over this window and assign it to the |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
60 position. After that, the maximum score of each site is chosen, and the |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
61 median over all maximum scores is returned. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
62 "profile" leaves the position-wise scores as they are, directly extracting |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
63 the maximum for each site and then reporting the median. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
64 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
65 >>> test_file = "test-data/test.profile" |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
66 >>> graphprot_profile_get_tsm(test_file) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
67 3.2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
68 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
69 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
70 # Dictionary of lists, with list of scores (value) for each site (key). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
71 lists_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
72 with open(profile_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
73 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
74 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
75 seq_id = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
76 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
77 if seq_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
78 lists_dic[seq_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
79 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
80 lists_dic[seq_id] = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
81 lists_dic[seq_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
82 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
83 # For each site, extract maximum and store in new list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
84 max_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
85 for seq_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
86 if profile_type == "profile": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
87 max_sc = max(lists_dic[seq_id]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
88 max_list.append(max_sc) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
89 elif profile_type == "avg_profile": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
90 # Convert profile score list to average profile scores list. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
91 aps_list = list_moving_window_average_values( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
92 lists_dic[seq_id], win_extlr=avg_profile_extlr |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
93 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
94 max_sc = max(aps_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
95 max_list.append(max_sc) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
96 else: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
97 assert 0, 'invalid profile_type argument given: "%s"' % (profile_type) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
98 # Return the median. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
99 return statistics.median(max_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
100 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
101 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
102 ####################################################################### |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
103 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
104 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
105 def list_moving_window_average_values(in_list, win_extlr=5, method=1): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
106 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
107 Take a list of numeric values, and calculate for each position a new value, |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
108 by taking the mean value of the window of positions -win_extlr and |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
109 +win_extlr. If full extension is not possible (at list ends), it just |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
110 takes what it gets. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
111 Two implementations of the task are given, chose by method=1 or method=2. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
112 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
113 >>> test_list = [2, 3, 5, 8, 4, 3, 7, 1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
114 >>> list_moving_window_average_values(test_list, win_extlr=2, method=1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
115 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
116 >>> list_moving_window_average_values(test_list, win_extlr=2, method=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
117 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
118 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
119 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
120 l_list = len(in_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
121 assert l_list, "Given list is empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
122 new_list = [0] * l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
123 if win_extlr == 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
124 return l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
125 if method == 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
126 for i in range(l_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
127 s = i - win_extlr |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
128 e = i + win_extlr + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
129 if s < 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
130 s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
131 if e > l_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
132 e = l_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
133 # Extract portion and assign value to new list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
134 new_list[i] = statistics.mean(in_list[s:e]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
135 elif method == 2: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
136 for i in range(l_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
137 s = i - win_extlr |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
138 e = i + win_extlr + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
139 if s < 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
140 s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
141 if e > l_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
142 e = l_list |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
143 ln = e - s |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
144 sc_sum = 0 |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
145 for j in range(ln): |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
146 sc_sum += in_list[s + j] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
147 new_list[i] = sc_sum / ln |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
148 else: |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
149 assert 0, "invalid method ID given (%i)" % (method) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
150 return new_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
151 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
152 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
153 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
154 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
155 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
156 def echo_add_to_file(echo_string, out_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
157 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
158 Add a string to file, using echo command. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
159 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
160 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
161 check_cmd = 'echo "%s" >> %s' % (echo_string, out_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
162 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
163 error = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
164 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
165 error = True |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
166 assert not error, "echo is complaining:\n%s\n%s" % (check_cmd, output) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
167 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
168 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
169 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
170 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
171 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
172 def is_tool(name): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
173 """Check whether tool "name" is in PATH.""" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
174 return find_executable(name) is not None |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
175 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
176 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
177 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
178 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
179 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
180 def count_fasta_headers(fasta_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
181 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
182 Count number of FASTA headers in fasta_file using grep. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
183 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
184 >>> test_file = "test-data/test.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
185 >>> count_fasta_headers(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
186 2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
187 >>> test_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
188 >>> count_fasta_headers(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
189 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
190 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
191 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
192 check_cmd = 'grep -c ">" ' + fasta_file |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
193 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
194 row_count = int(output.strip()) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
195 return row_count |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
196 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
197 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
198 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
199 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
200 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
201 def make_file_copy(in_file, out_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
202 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
203 Make a file copy by copying in_file to out_file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
204 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
205 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
206 check_cmd = "cat " + in_file + " > " + out_file |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
207 assert in_file != out_file, "cat does not like to cat file into same file (%s)" % ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
208 check_cmd |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
209 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
210 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
211 error = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
212 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
213 error = True |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
214 assert not error, "cat did not like your input (in_file: %s, out_file: %s):\n%s" % ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
215 in_file, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
216 out_file, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
217 output, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
218 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
219 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
220 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
221 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
222 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
223 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
224 def split_fasta_into_test_train_files( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
225 in_fasta, test_out_fa, train_out_fa, test_size=500 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
226 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
227 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
228 Split in_fasta .fa file into two files (e.g. test, train). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
229 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
230 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
231 # Read in in_fasta. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
232 seqs_dic = read_fasta_into_dic(in_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
233 # Shuffle IDs. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
234 rand_ids_list = random_order_dic_keys_into_list(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
235 c_out = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
236 TESTOUT = open(test_out_fa, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
237 TRAINOUT = open(train_out_fa, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
238 for seq_id in rand_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
239 seq = seqs_dic[seq_id] |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
240 if c_out >= test_size: |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
241 TRAINOUT.write(">%s\n%s\n" % (seq_id, seq)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
242 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
243 TESTOUT.write(">%s\n%s\n" % (seq_id, seq)) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
244 c_out += 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
245 TESTOUT.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
246 TRAINOUT.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
247 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
248 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
249 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
250 |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
251 |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
252 def check_seqs_dic_format(seqs_dic): |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
253 """ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
254 Check sequence dictionary for lowercase-only sequences or sequences |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
255 wich have lowercase nts in between uppercase nts. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
256 Return suspicious IDs as list or empty list if not hits. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
257 IDs with lowercase-only sequences. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
258 |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
259 >>> seqs_dic = {"id1" : "acguACGU", "id2" : "acgua", "id3" : "acgUUaUcc"} |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
260 >>> check_seqs_dic_format(seqs_dic) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
261 ['id2', 'id3'] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
262 >>> seqs_dic = {"id1" : "acgAUaa", "id2" : "ACGUACUA"} |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
263 >>> check_seqs_dic_format(seqs_dic) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
264 [] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
265 |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
266 """ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
267 assert seqs_dic, "given seqs_dic empty" |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
268 bad_seq_ids = [] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
269 for seq_id in seqs_dic: |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
270 seq = seqs_dic[seq_id] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
271 if re.search("^[acgtun]+$", seq): |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
272 bad_seq_ids.append(seq_id) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
273 if re.search("[ACGTUN][acgtun]+[ACGTUN]", seq): |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
274 bad_seq_ids.append(seq_id) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
275 return bad_seq_ids |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
276 |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
277 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
278 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
279 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
280 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
281 def read_fasta_into_dic( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
282 fasta_file, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
283 seqs_dic=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
284 ids_dic=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
285 read_dna=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
286 short_ensembl=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
287 reject_lc=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
288 convert_to_uc=False, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
289 skip_n_seqs=True, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
290 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
291 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
292 Read in FASTA sequences, convert to RNA, store in dictionary |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
293 and return dictionary. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
294 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
295 >>> test_fasta = "test-data/test.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
296 >>> read_fasta_into_dic(test_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
297 {'seq1': 'acguACGUacgu', 'seq2': 'ugcaUGCAugcaACGUacgu'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
298 >>> test_fasta = "test-data/test2.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
299 >>> read_fasta_into_dic(test_fasta) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
300 {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
301 >>> test_fasta = "test-data/test.ensembl.fa" |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
302 >>> read_fasta_into_dic(test_fasta, read_dna=True, short_ensembl=True) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
303 {'ENST00000415118': 'GAAATAGT', 'ENST00000448914': 'ACTGGGGGATACGAAAA'} |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
304 >>> test_fasta = "test-data/test4.fa" |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
305 >>> read_fasta_into_dic(test_fasta) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
306 {'1': 'gccuAUGUuuua', '2': 'cugaAACUaugu'} |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
307 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
308 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
309 if not seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
310 seqs_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
311 seq_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
312 seq = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
313 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
314 # Go through FASTA file, extract sequences. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
315 if re.search(r".+\.gz$", fasta_file): |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
316 f = gzip.open(fasta_file, "rt") |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
317 else: |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
318 f = open(fasta_file, "r") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
319 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
320 if re.search(">.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
321 m = re.search(">(.+)", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
322 seq_id = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
323 # If there is a ".", take only first part of header. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
324 # This assumes ENSEMBL header format ">ENST00000631435.1 cdna ..." |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
325 if short_ensembl: |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
326 if re.search(r".+\..+", seq_id): |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
327 m = re.search(r"(.+?)\..+", seq_id) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
328 seq_id = m.group(1) |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
329 assert seq_id not in seqs_dic, 'non-unique FASTA header "%s" in "%s"' % ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
330 seq_id, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
331 fasta_file, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
332 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
333 if ids_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
334 if seq_id in ids_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
335 seqs_dic[seq_id] = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
336 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
337 seqs_dic[seq_id] = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
338 elif re.search("[ACGTUN]+", line, re.I): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
339 if seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
340 m = re.search("([ACGTUN]+)", line, re.I) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
341 seq = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
342 if reject_lc: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
343 assert not re.search( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
344 "[a-z]", seq |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
345 ), 'lc char detected in seq "%i" (reject_lc=True)' % (seq_id) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
346 if convert_to_uc: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
347 seq = seq.upper() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
348 # If sequences with N nucleotides should be skipped. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
349 if skip_n_seqs: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
350 if "n" in m.group(1) or "N" in m.group(1): |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
351 print( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
352 'WARNING: "%s" contains N. Discarding ' |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
353 "sequence ... " % (seq_id) |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
354 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
355 del seqs_dic[seq_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
356 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
357 # Convert to RNA, concatenate sequence. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
358 if read_dna: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
359 seqs_dic[seq_id] += m.group(1).replace("U", "T").replace("u", "t") |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
360 else: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
361 seqs_dic[seq_id] += m.group(1).replace("T", "U").replace("t", "u") |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
362 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
363 return seqs_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
364 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
365 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
366 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
367 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
368 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
369 def random_order_dic_keys_into_list(in_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
370 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
371 Read in dictionary keys, and return random order list of IDs. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
372 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
373 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
374 id_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
375 for key in in_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
376 id_list.append(key) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
377 random.shuffle(id_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
378 return id_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
379 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
380 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
381 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
382 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
383 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
384 def graphprot_get_param_string(params_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
385 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
386 Get parameter string from GraphProt .params file. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
387 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
388 >>> test_params = "test-data/test.params" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
389 >>> graphprot_get_param_string(test_params) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
390 '-epochs 20 -lambda 0.01 -R 1 -D 3 -bitsize 14 -onlyseq ' |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
391 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
392 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
393 param_string = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
394 with open(params_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
395 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
396 cols = line.strip().split(" ") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
397 param = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
398 setting = cols[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
399 if re.search(".+:", param): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
400 m = re.search("(.+):", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
401 par = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
402 if re.search("pos_train.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
403 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
404 if par == "model_type": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
405 if setting == "sequence": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
406 param_string += "-onlyseq " |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
407 else: |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
408 param_string += "-%s %s " % (par, setting) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
409 else: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
410 assert 0, 'pattern matching failed for string "%s"' % (param) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
411 return param_string |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
412 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
413 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
414 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
415 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
416 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
417 def seqs_dic_count_uc_nts(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
418 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
419 Count number of uppercase nucleotides in sequences stored in sequence |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
420 dictionary. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
421 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
422 >>> seqs_dic = {'seq1': "acgtACGTacgt", 'seq2': 'acgtACacgt'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
423 >>> seqs_dic_count_uc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
424 6 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
425 >>> seqs_dic = {'seq1': "acgtacgt", 'seq2': 'acgtacgt'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
426 >>> seqs_dic_count_uc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
427 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
428 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
429 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
430 assert seqs_dic, "Given sequence dictionary empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
431 c_uc = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
432 for seq_id in seqs_dic: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
433 c_uc += len(re.findall(r"[A-Z]", seqs_dic[seq_id])) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
434 return c_uc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
435 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
436 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
437 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
438 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
439 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
440 def seqs_dic_count_lc_nts(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
441 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
442 Count number of lowercase nucleotides in sequences stored in sequence |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
443 dictionary. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
444 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
445 >>> seqs_dic = {'seq1': "gtACGTac", 'seq2': 'cgtACacg'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
446 >>> seqs_dic_count_lc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
447 10 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
448 >>> seqs_dic = {'seq1': "ACGT", 'seq2': 'ACGTAC'} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
449 >>> seqs_dic_count_lc_nts(seqs_dic) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
450 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
451 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
452 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
453 assert seqs_dic, "Given sequence dictionary empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
454 c_uc = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
455 for seq_id in seqs_dic: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
456 c_uc += len(re.findall(r"[a-z]", seqs_dic[seq_id])) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
457 return c_uc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
458 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
459 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
460 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
461 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
462 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
463 def count_file_rows(in_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
464 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
465 Count number of file rows for given input file. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
466 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
467 >>> test_file = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
468 >>> count_file_rows(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
469 7 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
470 >>> test_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
471 >>> count_file_rows(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
472 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
473 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
474 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
475 check_cmd = "cat " + in_file + " | wc -l" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
476 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
477 row_count = int(output.strip()) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
478 return row_count |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
479 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
480 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
481 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
482 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
483 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
484 def bed_check_six_col_format(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
485 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
486 Check whether given .bed file has 6 columns. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
487 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
488 >>> test_bed = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
489 >>> bed_check_six_col_format(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
490 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
491 >>> test_bed = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
492 >>> bed_check_six_col_format(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
493 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
494 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
495 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
496 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
497 six_col_format = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
498 with open(bed_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
499 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
500 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
501 if len(cols) == 6: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
502 six_col_format = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
503 break |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
504 f.closed |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
505 return six_col_format |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
506 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
507 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
508 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
509 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
510 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
511 def bed_check_unique_ids(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
512 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
513 Check whether .bed file (6 column format with IDs in column 4) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
514 has unique column 4 IDs. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
515 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
516 >>> test_bed = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
517 >>> bed_check_unique_ids(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
518 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
519 >>> test_bed = "test-data/test2.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
520 >>> bed_check_unique_ids(test_bed) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
521 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
522 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
523 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
524 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
525 check_cmd = "cut -f 4 " + bed_file + " | sort | uniq -d" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
526 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
527 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
528 return False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
529 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
530 return True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
531 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
532 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
533 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
534 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
535 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
536 def get_seq_lengths_from_seqs_dic(seqs_dic): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
537 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
538 Given a dictionary of sequences, return dictionary of sequence lengths. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
539 Mapping is sequence ID -> sequence length. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
540 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
541 seq_len_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
542 assert seqs_dic, "sequence dictionary seems to be empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
543 for seq_id in seqs_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
544 seq_l = len(seqs_dic[seq_id]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
545 seq_len_dic[seq_id] = seq_l |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
546 return seq_len_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
547 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
548 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
549 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
550 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
551 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
552 def bed_get_region_lengths(bed_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
553 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
554 Read in .bed file, store and return region lengths in dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
555 key : region ID (.bed col4) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
556 value : region length (.bed col3-col2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
557 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
558 >>> test_file = "test-data/test4.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
559 >>> bed_get_region_lengths(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
560 {'CLIP1': 10, 'CLIP2': 10} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
561 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
562 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
563 id2len_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
564 with open(bed_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
565 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
566 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
567 site_s = int(cols[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
568 site_e = int(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
569 site_id = cols[3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
570 site_l = site_e - site_s |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
571 assert ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
572 site_id not in id2len_dic |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
573 ), 'column 4 IDs not unique in given .bed file "%s"' % (bed_file) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
574 id2len_dic[site_id] = site_l |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
575 f.closed |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
576 assert ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
577 id2len_dic |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
578 ), 'No IDs read into dic (input file "%s" empty or malformatted?)' % (bed_file) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
579 return id2len_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
580 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
581 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
582 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
583 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
584 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
585 def graphprot_get_param_dic(params_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
586 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
587 Read in GraphProt .params file and store in dictionary. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
588 key = parameter |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
589 value = parameter value |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
590 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
591 >>> params_file = "test-data/test.params" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
592 >>> graphprot_get_param_dic(params_file) |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
593 {'epochs': '20', 'lambda': '0.01', 'R': '1', 'D': '3', 'bitsize': '14', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
594 'model_type': 'sequence', 'pos_train_ws_pred_median': '0.760321', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
595 'pos_train_profile_median': '5.039610', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
596 'pos_train_avg_profile_median_1': '4.236340', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
597 'pos_train_avg_profile_median_2': '3.868431', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
598 'pos_train_avg_profile_median_3': '3.331277', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
599 'pos_train_avg_profile_median_4': '2.998667', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
600 'pos_train_avg_profile_median_5': '2.829782', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
601 'pos_train_avg_profile_median_6': '2.626623', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
602 'pos_train_avg_profile_median_7': '2.447083', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
603 'pos_train_avg_profile_median_8': '2.349919', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
604 'pos_train_avg_profile_median_9': '2.239829', \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
605 'pos_train_avg_profile_median_10': '2.161676'} |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
606 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
607 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
608 param_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
609 with open(params_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
610 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
611 cols = line.strip().split(" ") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
612 param = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
613 setting = cols[1] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
614 if re.search(".+:", param): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
615 m = re.search("(.+):", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
616 par = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
617 param_dic[par] = setting |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
618 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
619 return param_dic |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
620 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
621 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
622 ####################################################################### |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
623 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
624 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
625 def graphprot_filter_predictions_file(in_file, out_file, sc_thr=0): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
626 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
627 Filter GraphProt .predictions file by given score thr_sc. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
628 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
629 OUTPRED = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
630 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
631 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
632 row = line.strip() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
633 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
634 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
635 if score < sc_thr: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
636 continue |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
637 OUTPRED.write("%s\n" % (row)) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
638 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
639 OUTPRED.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
640 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
641 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
642 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
643 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
644 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
645 def fasta_read_in_ids(fasta_file): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
646 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
647 Given a .fa file, read in header IDs in order appearing in file, |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
648 and store in list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
649 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
650 >>> test_file = "test-data/test3.fa" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
651 >>> fasta_read_in_ids(test_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
652 ['SERBP1_K562_rep01_544', 'SERBP1_K562_rep02_709', 'SERBP1_K562_rep01_316'] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
653 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
654 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
655 ids_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
656 with open(fasta_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
657 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
658 if re.search(">.+", line): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
659 m = re.search(">(.+)", line) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
660 seq_id = m.group(1) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
661 ids_list.append(seq_id) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
662 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
663 return ids_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
664 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
665 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
666 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
667 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
668 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
669 def graphprot_profile_calc_avg_profile( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
670 in_file, out_file, ap_extlr=5, seq_ids_list=False, method=1 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
671 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
672 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
673 Given a GraphProt .profile file, calculate average profiles and output |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
674 average profile file. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
675 Average profile means that the position-wise scores will get smoothed |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
676 out by calculating for each position a new score, taking a sequence |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
677 window -ap_extlr to +ap_extlr relative to the position |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
678 and calculate the mean score over this window. The mean score then |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
679 becomes the new average profile score at this position. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
680 Two different implementations of the task are given: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
681 method=1 (new python implementation, slower + more memory but easy to read) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
682 method=2 (old perl implementation, faster and less memory but more code) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
683 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
684 >>> in_file = "test-data/test2.profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
685 >>> out_file1 = "test-data/test2_1.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
686 >>> out_file2 = "test-data/test2_2.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
687 >>> out_file4 = "test-data/test2_3.avg_profile" |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
688 >>> graphprot_profile_calc_avg_profile(in_file, \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
689 out_file1, ap_extlr=2, method=1) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
690 >>> graphprot_profile_calc_avg_profile(in_file, \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
691 out_file2, ap_extlr=2, method=2) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
692 >>> diff_two_files_identical(out_file1, out_file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
693 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
694 >>> test_list = ["s1", "s2", "s3", "s4"] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
695 >>> out_file3_exp = "test-data/test3_added_ids_exp.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
696 >>> out_file3 = "test-data/test3_added_ids_out.avg_profile" |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
697 >>> graphprot_profile_calc_avg_profile(in_file, out_file3, \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
698 ap_extlr=2, method=1, seq_ids_list=test_list) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
699 >>> diff_two_files_identical(out_file3_exp, out_file3) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
700 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
701 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
702 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
703 if method == 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
704 # Dictionary of lists, with list of scores (value) for each site (key). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
705 lists_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
706 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
707 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
708 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
709 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
710 site_id = int(cols[0]) |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
711 pos = int(cols[1]) # 0-based. |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
712 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
713 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
714 if site_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
715 site_starts_dic[site_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
716 if site_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
717 lists_dic[site_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
718 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
719 lists_dic[site_id] = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
720 lists_dic[site_id].append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
721 f.close() |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
722 # Check number of IDs (# FASTA IDs has to be same as # site IDs). |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
723 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
724 c_seq_ids = len(seq_ids_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
725 c_site_ids = len(site_starts_dic) |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
726 assert ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
727 c_seq_ids == c_site_ids |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
728 ), "# sequence IDs != # site IDs (%i != %i)" % (c_seq_ids, c_site_ids) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
729 OUTPROF = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
730 # For each site, calculate average profile scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
731 for site_id in lists_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
732 # Convert profile score list to average profile scores list. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
733 aps_list = list_moving_window_average_values( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
734 lists_dic[site_id], win_extlr=ap_extlr |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
735 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
736 start_pos = site_starts_dic[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
737 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
738 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
739 site_id = seq_ids_list[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
740 for i, sc in enumerate(aps_list): |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
741 pos = i + start_pos + 1 # make 1-based. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
742 OUTPROF.write("%s\t%i\t%f\n" % (site_id, pos, sc)) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
743 OUTPROF.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
744 elif method == 2: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
745 OUTPROF = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
746 # Old site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
747 old_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
748 # Current site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
749 cur_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
750 # Scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
751 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
752 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
753 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
754 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
755 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
756 cur_id = int(cols[0]) |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
757 pos = int(cols[1]) # 0-based. |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
758 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
759 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
760 if cur_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
761 site_starts_dic[cur_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
762 # Case: new site (new column 1 ID). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
763 if cur_id != old_id: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
764 # Process old id scores. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
765 if scores_list: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
766 aps_list = list_moving_window_average_values( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
767 scores_list, win_extlr=ap_extlr |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
768 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
769 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
770 seq_id = old_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
771 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
772 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
773 seq_id = seq_ids_list[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
774 for i, sc in enumerate(aps_list): |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
775 pos = i + start_pos + 1 # make 1-based. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
776 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc)) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
777 # Reset list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
778 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
779 old_id = cur_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
780 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
781 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
782 # Add to scores_list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
783 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
784 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
785 # Process last block. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
786 if scores_list: |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
787 aps_list = list_moving_window_average_values( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
788 scores_list, win_extlr=ap_extlr |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
789 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
790 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
791 seq_id = old_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
792 # Get original FASTA sequence ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
793 if seq_ids_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
794 seq_id = seq_ids_list[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
795 for i, sc in enumerate(aps_list): |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
796 pos = i + start_pos + 1 # make 1-based. |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
797 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc)) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
798 OUTPROF.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
799 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
800 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
801 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
802 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
803 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
804 def graphprot_profile_extract_peak_regions( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
805 in_file, out_file, max_merge_dist=0, sc_thr=0 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
806 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
807 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
808 Extract peak regions from GraphProt .profile file. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
809 Store the peak regions (defined as regions with scores >= sc_thr) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
810 as to out_file in 6-column .bed. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
811 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
812 TODO: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
813 Add option for genomic coordinates input (+ - polarity support). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
814 Output genomic regions instead of sequence regions. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
815 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
816 >>> in_file = "test-data/test4.avg_profile" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
817 >>> out_file = "test-data/test4_out.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
818 >>> exp_file = "test-data/test4_out_exp.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
819 >>> exp2_file = "test-data/test4_out_exp2.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
820 >>> empty_file = "test-data/empty_file" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
821 >>> graphprot_profile_extract_peak_regions(in_file, out_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
822 >>> diff_two_files_identical(out_file, exp_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
823 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
824 >>> graphprot_profile_extract_peak_regions(in_file, out_file, sc_thr=10) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
825 >>> diff_two_files_identical(out_file, empty_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
826 True |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
827 >>> graphprot_profile_extract_peak_regions(in_file, out_file, \ |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
828 max_merge_dist=2) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
829 >>> diff_two_files_identical(out_file, exp2_file) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
830 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
831 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
832 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
833 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
834 OUTPEAKS = open(out_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
835 # Old site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
836 old_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
837 # Current site ID. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
838 cur_id = "" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
839 # Scores list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
840 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
841 site_starts_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
842 with open(in_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
843 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
844 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
845 cur_id = cols[0] |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
846 pos = int(cols[1]) # 0-based. |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
847 score = float(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
848 # Store first position of site. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
849 if cur_id not in site_starts_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
850 # If first position != zero, we assume positions are 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
851 if pos != 0: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
852 # Make index 0-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
853 site_starts_dic[cur_id] = pos - 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
854 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
855 site_starts_dic[cur_id] = pos |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
856 # Case: new site (new column 1 ID). |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
857 if cur_id != old_id: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
858 # Process old id scores. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
859 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
860 # Extract peaks from region. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
861 peak_list = list_extract_peaks( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
862 scores_list, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
863 max_merge_dist=max_merge_dist, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
864 coords="bed", |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
865 sc_thr=sc_thr, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
866 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
867 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
868 # Print out peaks in .bed format. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
869 for ln in peak_list: |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
870 peak_s = start_pos + ln[0] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
871 peak_e = start_pos + ln[1] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
872 site_id = "%s,%i" % (old_id, ln[2]) |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
873 OUTPEAKS.write( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
874 "%s\t%i\t%i" |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
875 "\t%s\t%f\t+\n" % (old_id, peak_s, peak_e, site_id, ln[3]) |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
876 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
877 # Reset list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
878 scores_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
879 old_id = cur_id |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
880 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
881 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
882 # Add to scores_list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
883 scores_list.append(score) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
884 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
885 # Process last block. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
886 if scores_list: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
887 # Extract peaks from region. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
888 peak_list = list_extract_peaks( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
889 scores_list, max_merge_dist=max_merge_dist, coords="bed", sc_thr=sc_thr |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
890 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
891 start_pos = site_starts_dic[old_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
892 # Print out peaks in .bed format. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
893 for ln in peak_list: |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
894 peak_s = start_pos + ln[0] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
895 peak_e = start_pos + ln[1] |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
896 site_id = "%s,%i" % (old_id, ln[2]) # best score also 1-based. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
897 OUTPEAKS.write( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
898 "%s\t%i\t%i\t%s\t%f\t+\n" % (old_id, peak_s, peak_e, site_id, ln[3]) |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
899 ) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
900 OUTPEAKS.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
901 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
902 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
903 ####################################################################### |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
904 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
905 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
906 def list_extract_peaks(in_list, max_merge_dist=0, coords="list", sc_thr=0): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
907 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
908 Extract peak regions from list. |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
909 Peak region is defined as region >= score threshold. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
910 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
911 coords=bed : peak start 0-based, peak end 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
912 coords=list : peak start 0-based, peak end 0-based. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
913 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
914 >>> test_list = [-1, 0, 2, 4.5, 1, -1, 5, 6.5] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
915 >>> list_extract_peaks(test_list) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
916 [[1, 4, 3, 4.5], [6, 7, 7, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
917 >>> list_extract_peaks(test_list, sc_thr=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
918 [[2, 3, 3, 4.5], [6, 7, 7, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
919 >>> list_extract_peaks(test_list, sc_thr=2, coords="bed") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
920 [[2, 4, 4, 4.5], [6, 8, 8, 6.5]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
921 >>> list_extract_peaks(test_list, sc_thr=10) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
922 [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
923 >>> test_list = [2, -1, 3, -1, 4, -1, -1, 6, 9] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
924 >>> list_extract_peaks(test_list, max_merge_dist=2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
925 [[0, 4, 4, 4], [7, 8, 8, 9]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
926 >>> list_extract_peaks(test_list, max_merge_dist=3) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
927 [[0, 8, 8, 9]] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
928 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
929 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
930 # Check. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
931 assert len(in_list), "Given list is empty" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
932 # Peak regions list. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
933 peak_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
934 # Help me. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
935 inside = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
936 pr_s = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
937 pr_e = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
938 pr_top_pos = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
939 pr_top_sc = -100000 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
940 for i, sc in enumerate(in_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
941 # Part of peak region? |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
942 if sc >= sc_thr: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
943 # At peak start. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
944 if not inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
945 pr_s = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
946 pr_e = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
947 inside = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
948 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
949 # Inside peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
950 pr_e = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
951 # Store top position. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
952 if sc > pr_top_sc: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
953 pr_top_sc = sc |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
954 pr_top_pos = i |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
955 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
956 # Before was peak region? |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
957 if inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
958 # Store peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
959 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
960 peak_list.append(peak_infos) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
961 inside = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
962 pr_top_pos = 0 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
963 pr_top_sc = -100000 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
964 # If peak at the end, also report. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
965 if inside: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
966 # Store peak region. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
967 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
968 peak_list.append(peak_infos) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
969 # Merge peaks. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
970 if max_merge_dist and len(peak_list) > 1: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
971 iterate = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
972 while iterate: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
973 merged_peak_list = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
974 added_peaks_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
975 peaks_merged = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
976 for i, l in enumerate(peak_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
977 if i in added_peaks_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
978 continue |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
979 j = i + 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
980 # Last element. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
981 if j == len(peak_list): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
982 if i not in added_peaks_dic: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
983 merged_peak_list.append(peak_list[i]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
984 break |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
985 # Compare two elements. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
986 new_peak = [] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
987 if (peak_list[j][0] - peak_list[i][1]) <= max_merge_dist: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
988 peaks_merged = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
989 new_top_pos = peak_list[i][2] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
990 new_top_sc = peak_list[i][3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
991 if peak_list[i][3] < peak_list[j][3]: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
992 new_top_pos = peak_list[j][2] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
993 new_top_sc = peak_list[j][3] |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
994 new_peak = [ |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
995 peak_list[i][0], |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
996 peak_list[j][1], |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
997 new_top_pos, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
998 new_top_sc, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
999 ] |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1000 # If two peaks were merged. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1001 if new_peak: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1002 merged_peak_list.append(new_peak) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1003 added_peaks_dic[i] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1004 added_peaks_dic[j] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1005 else: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1006 merged_peak_list.append(peak_list[i]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1007 added_peaks_dic[i] = 1 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1008 if not peaks_merged: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1009 iterate = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1010 peak_list = merged_peak_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1011 peaks_merged = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1012 # If peak coordinates should be in .bed format, make peak ends 1-based. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1013 if coords == "bed": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1014 for i in range(len(peak_list)): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1015 peak_list[i][1] += 1 |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1016 peak_list[i][2] += 1 # 1-base best score position too. |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1017 return peak_list |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1018 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1019 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1020 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1021 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1022 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1023 def bed_peaks_to_genomic_peaks( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1024 peak_file, genomic_peak_file, genomic_sites_bed, print_rows=False |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1025 ): |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1026 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1027 Given a .bed file of sequence peak regions (possible coordinates from |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1028 0 to length of s), convert peak coordinates to genomic coordinates. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1029 Do this by taking genomic regions of sequences as input. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1030 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1031 >>> test_in = "test-data/test.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1032 >>> test_exp = "test-data/test_exp.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1033 >>> test_out = "test-data/test_out.peaks.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1034 >>> gen_in = "test-data/test.peaks_genomic.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1035 >>> bed_peaks_to_genomic_peaks(test_in, test_out, gen_in) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1036 >>> diff_two_files_identical(test_out, test_exp) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1037 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1038 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1039 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1040 # Read in genomic region info. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1041 id2row_dic = {} |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1042 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1043 with open(genomic_sites_bed) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1044 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1045 row = line.strip() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1046 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1047 site_id = cols[3] |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1048 assert ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1049 site_id not in id2row_dic |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1050 ), 'column 4 IDs not unique in given .bed file "%s"' % (genomic_sites_bed) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1051 id2row_dic[site_id] = row |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1052 f.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1053 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1054 # Read in peaks file and convert coordinates. |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1055 OUTPEAKS = open(genomic_peak_file, "w") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1056 with open(peak_file) as f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1057 for line in f: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1058 cols = line.strip().split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1059 site_id = cols[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1060 site_s = int(cols[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1061 site_e = int(cols[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1062 site_id2 = cols[3] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1063 site_sc = float(cols[4]) |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1064 assert re.search( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1065 ".+,.+", site_id2 |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1066 ), 'regular expression failed for ID "%s"' % (site_id2) |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1067 m = re.search(r".+,(\d+)", site_id2) |
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1068 sc_pos = int(m.group(1)) # 1-based. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1069 assert ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1070 site_id in id2row_dic |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1071 ), 'site ID "%s" not found in genomic sites dictionary' % (site_id) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1072 row = id2row_dic[site_id] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1073 rowl = row.split("\t") |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1074 gen_chr = rowl[0] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1075 gen_s = int(rowl[1]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1076 gen_e = int(rowl[2]) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1077 gen_pol = rowl[5] |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1078 new_s = site_s + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1079 new_e = site_e + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1080 new_sc_pos = sc_pos + gen_s |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1081 if gen_pol == "-": |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1082 new_s = gen_e - site_e |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1083 new_e = gen_e - site_s |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1084 new_sc_pos = gen_e - sc_pos + 1 # keep 1-based. |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1085 new_row = "%s\t%i\t%i\t%s,%i\t%f\t%s" % ( |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1086 gen_chr, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1087 new_s, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1088 new_e, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1089 site_id, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1090 new_sc_pos, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1091 site_sc, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1092 gen_pol, |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1093 ) |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1094 OUTPEAKS.write("%s\n" % (new_row)) |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1095 if print_rows: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1096 print(new_row) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1097 OUTPEAKS.close() |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1098 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1099 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1100 ####################################################################### |
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1101 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1102 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1103 def diff_two_files_identical(file1, file2): |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1104 """ |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1105 Check whether two files are identical. Return true if diff reports no |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1106 differences. |
3
ace92c9a4653
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents:
1
diff
changeset
|
1107 |
1
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1108 >>> file1 = "test-data/file1" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1109 >>> file2 = "test-data/file2" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1110 >>> diff_two_files_identical(file1, file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1111 True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1112 >>> file1 = "test-data/test1.bed" |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1113 >>> diff_two_files_identical(file1, file2) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1114 False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1115 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1116 """ |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1117 same = True |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1118 check_cmd = "diff " + file1 + " " + file2 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1119 output = subprocess.getoutput(check_cmd) |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1120 if output: |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1121 same = False |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1122 return same |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1123 |
20429f4c1b95
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff
changeset
|
1124 |
5
ddcf35a868b8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents:
3
diff
changeset
|
1125 ####################################################################### |