annotate gplib.py @ 5:ddcf35a868b8 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
author bgruening
date Wed, 05 Jun 2024 16:40:51 +0000
parents ace92c9a4653
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1 import gzip
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
2 import random
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
3 import re
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
4 import statistics
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
5 import subprocess
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
6 from distutils.spawn import find_executable
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
7
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
8 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
9
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
10 Run doctests:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
11
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
12 python3 -m doctest gplib.py
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
13
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
14
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
15 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
16
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
17
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
18 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
19
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
20
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
21 def graphprot_predictions_get_median(predictions_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
22 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
23 Given a GraphProt .predictions file, read in site scores and return
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
24 the median value.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
25
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
26 >>> test_file = "test-data/test.predictions"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
27 >>> graphprot_predictions_get_median(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
28 0.571673
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
29
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
30 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
31 # Site scores list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
32 sc_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
33 with open(predictions_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
34 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
35 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
36 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
37 sc_list.append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
38 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
39 # Return the median.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
40 return statistics.median(sc_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
41
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
42
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
43 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
44
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
45
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
46 def graphprot_profile_get_tsm(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
47 profile_file, profile_type="profile", avg_profile_extlr=5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
48 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
49
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
50 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
51 Given a GraphProt .profile file, extract for each site (identified by
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
52 column 1 ID) the top (= highest) score. Then return the median of these
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
53 top scores.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
54
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
55 profile_type can be either "profile" or "avg_profile".
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
56 "avg_profile means that the position-wise scores will first get smoothed
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
57 out by calculating for each position a new score through taking a
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
58 sequence window -avg_profile_extlr to +avg_profile_extlr of the position
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
59 and calculate the mean score over this window and assign it to the
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
60 position. After that, the maximum score of each site is chosen, and the
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
61 median over all maximum scores is returned.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
62 "profile" leaves the position-wise scores as they are, directly extracting
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
63 the maximum for each site and then reporting the median.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
64
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
65 >>> test_file = "test-data/test.profile"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
66 >>> graphprot_profile_get_tsm(test_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
67 3.2
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
68
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
69 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
70 # Dictionary of lists, with list of scores (value) for each site (key).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
71 lists_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
72 with open(profile_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
73 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
74 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
75 seq_id = cols[0]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
76 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
77 if seq_id in lists_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
78 lists_dic[seq_id].append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
79 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
80 lists_dic[seq_id] = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
81 lists_dic[seq_id].append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
82 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
83 # For each site, extract maximum and store in new list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
84 max_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
85 for seq_id in lists_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
86 if profile_type == "profile":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
87 max_sc = max(lists_dic[seq_id])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
88 max_list.append(max_sc)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
89 elif profile_type == "avg_profile":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
90 # Convert profile score list to average profile scores list.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
91 aps_list = list_moving_window_average_values(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
92 lists_dic[seq_id], win_extlr=avg_profile_extlr
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
93 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
94 max_sc = max(aps_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
95 max_list.append(max_sc)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
96 else:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
97 assert 0, 'invalid profile_type argument given: "%s"' % (profile_type)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
98 # Return the median.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
99 return statistics.median(max_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
100
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
101
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
102 #######################################################################
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
103
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
104
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
105 def list_moving_window_average_values(in_list, win_extlr=5, method=1):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
106 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
107 Take a list of numeric values, and calculate for each position a new value,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
108 by taking the mean value of the window of positions -win_extlr and
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
109 +win_extlr. If full extension is not possible (at list ends), it just
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
110 takes what it gets.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
111 Two implementations of the task are given, chose by method=1 or method=2.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
112
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
113 >>> test_list = [2, 3, 5, 8, 4, 3, 7, 1]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
114 >>> list_moving_window_average_values(test_list, win_extlr=2, method=1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
115 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
116 >>> list_moving_window_average_values(test_list, win_extlr=2, method=2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
117 [3.3333333333333335, 4.5, 4.4, 4.6, 5.4, 4.6, 3.75, 3.6666666666666665]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
118
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
119 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
120 l_list = len(in_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
121 assert l_list, "Given list is empty"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
122 new_list = [0] * l_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
123 if win_extlr == 0:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
124 return l_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
125 if method == 1:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
126 for i in range(l_list):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
127 s = i - win_extlr
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
128 e = i + win_extlr + 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
129 if s < 0:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
130 s = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
131 if e > l_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
132 e = l_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
133 # Extract portion and assign value to new list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
134 new_list[i] = statistics.mean(in_list[s:e])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
135 elif method == 2:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
136 for i in range(l_list):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
137 s = i - win_extlr
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
138 e = i + win_extlr + 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
139 if s < 0:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
140 s = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
141 if e > l_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
142 e = l_list
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
143 ln = e - s
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
144 sc_sum = 0
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
145 for j in range(ln):
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
146 sc_sum += in_list[s + j]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
147 new_list[i] = sc_sum / ln
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
148 else:
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
149 assert 0, "invalid method ID given (%i)" % (method)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
150 return new_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
151
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
152
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
153 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
154
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
155
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
156 def echo_add_to_file(echo_string, out_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
157 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
158 Add a string to file, using echo command.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
159
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
160 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
161 check_cmd = 'echo "%s" >> %s' % (echo_string, out_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
162 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
163 error = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
164 if output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
165 error = True
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
166 assert not error, "echo is complaining:\n%s\n%s" % (check_cmd, output)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
167
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
168
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
169 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
170
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
171
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
172 def is_tool(name):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
173 """Check whether tool "name" is in PATH."""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
174 return find_executable(name) is not None
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
175
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
176
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
177 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
178
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
179
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
180 def count_fasta_headers(fasta_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
181 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
182 Count number of FASTA headers in fasta_file using grep.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
183
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
184 >>> test_file = "test-data/test.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
185 >>> count_fasta_headers(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
186 2
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
187 >>> test_file = "test-data/empty_file"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
188 >>> count_fasta_headers(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
189 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
190
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
191 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
192 check_cmd = 'grep -c ">" ' + fasta_file
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
193 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
194 row_count = int(output.strip())
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
195 return row_count
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
196
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
197
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
198 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
199
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
200
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
201 def make_file_copy(in_file, out_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
202 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
203 Make a file copy by copying in_file to out_file.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
204
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
205 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
206 check_cmd = "cat " + in_file + " > " + out_file
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
207 assert in_file != out_file, "cat does not like to cat file into same file (%s)" % (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
208 check_cmd
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
209 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
210 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
211 error = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
212 if output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
213 error = True
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
214 assert not error, "cat did not like your input (in_file: %s, out_file: %s):\n%s" % (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
215 in_file,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
216 out_file,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
217 output,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
218 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
219
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
220
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
221 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
222
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
223
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
224 def split_fasta_into_test_train_files(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
225 in_fasta, test_out_fa, train_out_fa, test_size=500
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
226 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
227 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
228 Split in_fasta .fa file into two files (e.g. test, train).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
229
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
230 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
231 # Read in in_fasta.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
232 seqs_dic = read_fasta_into_dic(in_fasta)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
233 # Shuffle IDs.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
234 rand_ids_list = random_order_dic_keys_into_list(seqs_dic)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
235 c_out = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
236 TESTOUT = open(test_out_fa, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
237 TRAINOUT = open(train_out_fa, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
238 for seq_id in rand_ids_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
239 seq = seqs_dic[seq_id]
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
240 if c_out >= test_size:
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
241 TRAINOUT.write(">%s\n%s\n" % (seq_id, seq))
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
242 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
243 TESTOUT.write(">%s\n%s\n" % (seq_id, seq))
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
244 c_out += 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
245 TESTOUT.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
246 TRAINOUT.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
247
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
248
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
249 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
250
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
251
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
252 def check_seqs_dic_format(seqs_dic):
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
253 """
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
254 Check sequence dictionary for lowercase-only sequences or sequences
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
255 wich have lowercase nts in between uppercase nts.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
256 Return suspicious IDs as list or empty list if not hits.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
257 IDs with lowercase-only sequences.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
258
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
259 >>> seqs_dic = {"id1" : "acguACGU", "id2" : "acgua", "id3" : "acgUUaUcc"}
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
260 >>> check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
261 ['id2', 'id3']
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
262 >>> seqs_dic = {"id1" : "acgAUaa", "id2" : "ACGUACUA"}
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
263 >>> check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
264 []
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
265
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
266 """
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
267 assert seqs_dic, "given seqs_dic empty"
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
268 bad_seq_ids = []
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
269 for seq_id in seqs_dic:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
270 seq = seqs_dic[seq_id]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
271 if re.search("^[acgtun]+$", seq):
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
272 bad_seq_ids.append(seq_id)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
273 if re.search("[ACGTUN][acgtun]+[ACGTUN]", seq):
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
274 bad_seq_ids.append(seq_id)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
275 return bad_seq_ids
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
276
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
277
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
278 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
279
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
280
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
281 def read_fasta_into_dic(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
282 fasta_file,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
283 seqs_dic=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
284 ids_dic=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
285 read_dna=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
286 short_ensembl=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
287 reject_lc=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
288 convert_to_uc=False,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
289 skip_n_seqs=True,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
290 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
291 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
292 Read in FASTA sequences, convert to RNA, store in dictionary
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
293 and return dictionary.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
294
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
295 >>> test_fasta = "test-data/test.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
296 >>> read_fasta_into_dic(test_fasta)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
297 {'seq1': 'acguACGUacgu', 'seq2': 'ugcaUGCAugcaACGUacgu'}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
298 >>> test_fasta = "test-data/test2.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
299 >>> read_fasta_into_dic(test_fasta)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
300 {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
301 >>> test_fasta = "test-data/test.ensembl.fa"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
302 >>> read_fasta_into_dic(test_fasta, read_dna=True, short_ensembl=True)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
303 {'ENST00000415118': 'GAAATAGT', 'ENST00000448914': 'ACTGGGGGATACGAAAA'}
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
304 >>> test_fasta = "test-data/test4.fa"
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
305 >>> read_fasta_into_dic(test_fasta)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
306 {'1': 'gccuAUGUuuua', '2': 'cugaAACUaugu'}
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
307
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
308 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
309 if not seqs_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
310 seqs_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
311 seq_id = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
312 seq = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
313
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
314 # Go through FASTA file, extract sequences.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
315 if re.search(r".+\.gz$", fasta_file):
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
316 f = gzip.open(fasta_file, "rt")
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
317 else:
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
318 f = open(fasta_file, "r")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
319 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
320 if re.search(">.+", line):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
321 m = re.search(">(.+)", line)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
322 seq_id = m.group(1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
323 # If there is a ".", take only first part of header.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
324 # This assumes ENSEMBL header format ">ENST00000631435.1 cdna ..."
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
325 if short_ensembl:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
326 if re.search(r".+\..+", seq_id):
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
327 m = re.search(r"(.+?)\..+", seq_id)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
328 seq_id = m.group(1)
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
329 assert seq_id not in seqs_dic, 'non-unique FASTA header "%s" in "%s"' % (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
330 seq_id,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
331 fasta_file,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
332 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
333 if ids_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
334 if seq_id in ids_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
335 seqs_dic[seq_id] = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
336 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
337 seqs_dic[seq_id] = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
338 elif re.search("[ACGTUN]+", line, re.I):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
339 if seq_id in seqs_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
340 m = re.search("([ACGTUN]+)", line, re.I)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
341 seq = m.group(1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
342 if reject_lc:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
343 assert not re.search(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
344 "[a-z]", seq
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
345 ), 'lc char detected in seq "%i" (reject_lc=True)' % (seq_id)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
346 if convert_to_uc:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
347 seq = seq.upper()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
348 # If sequences with N nucleotides should be skipped.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
349 if skip_n_seqs:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
350 if "n" in m.group(1) or "N" in m.group(1):
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
351 print(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
352 'WARNING: "%s" contains N. Discarding '
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
353 "sequence ... " % (seq_id)
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
354 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
355 del seqs_dic[seq_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
356 continue
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
357 # Convert to RNA, concatenate sequence.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
358 if read_dna:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
359 seqs_dic[seq_id] += m.group(1).replace("U", "T").replace("u", "t")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
360 else:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
361 seqs_dic[seq_id] += m.group(1).replace("T", "U").replace("t", "u")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
362 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
363 return seqs_dic
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
364
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
365
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
366 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
367
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
368
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
369 def random_order_dic_keys_into_list(in_dic):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
370 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
371 Read in dictionary keys, and return random order list of IDs.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
372
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
373 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
374 id_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
375 for key in in_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
376 id_list.append(key)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
377 random.shuffle(id_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
378 return id_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
379
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
380
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
381 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
382
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
383
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
384 def graphprot_get_param_string(params_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
385 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
386 Get parameter string from GraphProt .params file.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
387
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
388 >>> test_params = "test-data/test.params"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
389 >>> graphprot_get_param_string(test_params)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
390 '-epochs 20 -lambda 0.01 -R 1 -D 3 -bitsize 14 -onlyseq '
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
391
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
392 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
393 param_string = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
394 with open(params_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
395 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
396 cols = line.strip().split(" ")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
397 param = cols[0]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
398 setting = cols[1]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
399 if re.search(".+:", param):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
400 m = re.search("(.+):", line)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
401 par = m.group(1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
402 if re.search("pos_train.+", line):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
403 continue
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
404 if par == "model_type":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
405 if setting == "sequence":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
406 param_string += "-onlyseq "
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
407 else:
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
408 param_string += "-%s %s " % (par, setting)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
409 else:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
410 assert 0, 'pattern matching failed for string "%s"' % (param)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
411 return param_string
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
412
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
413
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
414 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
415
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
416
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
417 def seqs_dic_count_uc_nts(seqs_dic):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
418 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
419 Count number of uppercase nucleotides in sequences stored in sequence
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
420 dictionary.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
421
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
422 >>> seqs_dic = {'seq1': "acgtACGTacgt", 'seq2': 'acgtACacgt'}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
423 >>> seqs_dic_count_uc_nts(seqs_dic)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
424 6
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
425 >>> seqs_dic = {'seq1': "acgtacgt", 'seq2': 'acgtacgt'}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
426 >>> seqs_dic_count_uc_nts(seqs_dic)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
427 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
428
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
429 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
430 assert seqs_dic, "Given sequence dictionary empty"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
431 c_uc = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
432 for seq_id in seqs_dic:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
433 c_uc += len(re.findall(r"[A-Z]", seqs_dic[seq_id]))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
434 return c_uc
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
435
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
436
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
437 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
438
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
439
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
440 def seqs_dic_count_lc_nts(seqs_dic):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
441 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
442 Count number of lowercase nucleotides in sequences stored in sequence
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
443 dictionary.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
444
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
445 >>> seqs_dic = {'seq1': "gtACGTac", 'seq2': 'cgtACacg'}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
446 >>> seqs_dic_count_lc_nts(seqs_dic)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
447 10
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
448 >>> seqs_dic = {'seq1': "ACGT", 'seq2': 'ACGTAC'}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
449 >>> seqs_dic_count_lc_nts(seqs_dic)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
450 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
451
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
452 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
453 assert seqs_dic, "Given sequence dictionary empty"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
454 c_uc = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
455 for seq_id in seqs_dic:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
456 c_uc += len(re.findall(r"[a-z]", seqs_dic[seq_id]))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
457 return c_uc
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
458
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
459
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
460 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
461
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
462
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
463 def count_file_rows(in_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
464 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
465 Count number of file rows for given input file.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
466
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
467 >>> test_file = "test-data/test1.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
468 >>> count_file_rows(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
469 7
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
470 >>> test_file = "test-data/empty_file"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
471 >>> count_file_rows(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
472 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
473
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
474 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
475 check_cmd = "cat " + in_file + " | wc -l"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
476 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
477 row_count = int(output.strip())
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
478 return row_count
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
479
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
480
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
481 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
482
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
483
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
484 def bed_check_six_col_format(bed_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
485 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
486 Check whether given .bed file has 6 columns.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
487
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
488 >>> test_bed = "test-data/test1.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
489 >>> bed_check_six_col_format(test_bed)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
490 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
491 >>> test_bed = "test-data/empty_file"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
492 >>> bed_check_six_col_format(test_bed)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
493 False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
494
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
495 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
496
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
497 six_col_format = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
498 with open(bed_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
499 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
500 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
501 if len(cols) == 6:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
502 six_col_format = True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
503 break
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
504 f.closed
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
505 return six_col_format
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
506
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
507
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
508 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
509
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
510
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
511 def bed_check_unique_ids(bed_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
512 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
513 Check whether .bed file (6 column format with IDs in column 4)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
514 has unique column 4 IDs.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
515
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
516 >>> test_bed = "test-data/test1.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
517 >>> bed_check_unique_ids(test_bed)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
518 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
519 >>> test_bed = "test-data/test2.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
520 >>> bed_check_unique_ids(test_bed)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
521 False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
522
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
523 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
524
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
525 check_cmd = "cut -f 4 " + bed_file + " | sort | uniq -d"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
526 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
527 if output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
528 return False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
529 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
530 return True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
531
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
532
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
533 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
534
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
535
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
536 def get_seq_lengths_from_seqs_dic(seqs_dic):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
537 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
538 Given a dictionary of sequences, return dictionary of sequence lengths.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
539 Mapping is sequence ID -> sequence length.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
540 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
541 seq_len_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
542 assert seqs_dic, "sequence dictionary seems to be empty"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
543 for seq_id in seqs_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
544 seq_l = len(seqs_dic[seq_id])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
545 seq_len_dic[seq_id] = seq_l
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
546 return seq_len_dic
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
547
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
548
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
549 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
550
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
551
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
552 def bed_get_region_lengths(bed_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
553 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
554 Read in .bed file, store and return region lengths in dictionary.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
555 key : region ID (.bed col4)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
556 value : region length (.bed col3-col2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
557
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
558 >>> test_file = "test-data/test4.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
559 >>> bed_get_region_lengths(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
560 {'CLIP1': 10, 'CLIP2': 10}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
561
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
562 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
563 id2len_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
564 with open(bed_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
565 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
566 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
567 site_s = int(cols[1])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
568 site_e = int(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
569 site_id = cols[3]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
570 site_l = site_e - site_s
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
571 assert (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
572 site_id not in id2len_dic
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
573 ), 'column 4 IDs not unique in given .bed file "%s"' % (bed_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
574 id2len_dic[site_id] = site_l
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
575 f.closed
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
576 assert (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
577 id2len_dic
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
578 ), 'No IDs read into dic (input file "%s" empty or malformatted?)' % (bed_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
579 return id2len_dic
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
580
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
581
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
582 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
583
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
584
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
585 def graphprot_get_param_dic(params_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
586 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
587 Read in GraphProt .params file and store in dictionary.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
588 key = parameter
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
589 value = parameter value
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
590
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
591 >>> params_file = "test-data/test.params"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
592 >>> graphprot_get_param_dic(params_file)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
593 {'epochs': '20', 'lambda': '0.01', 'R': '1', 'D': '3', 'bitsize': '14', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
594 'model_type': 'sequence', 'pos_train_ws_pred_median': '0.760321', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
595 'pos_train_profile_median': '5.039610', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
596 'pos_train_avg_profile_median_1': '4.236340', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
597 'pos_train_avg_profile_median_2': '3.868431', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
598 'pos_train_avg_profile_median_3': '3.331277', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
599 'pos_train_avg_profile_median_4': '2.998667', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
600 'pos_train_avg_profile_median_5': '2.829782', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
601 'pos_train_avg_profile_median_6': '2.626623', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
602 'pos_train_avg_profile_median_7': '2.447083', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
603 'pos_train_avg_profile_median_8': '2.349919', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
604 'pos_train_avg_profile_median_9': '2.239829', \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
605 'pos_train_avg_profile_median_10': '2.161676'}
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
606
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
607 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
608 param_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
609 with open(params_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
610 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
611 cols = line.strip().split(" ")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
612 param = cols[0]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
613 setting = cols[1]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
614 if re.search(".+:", param):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
615 m = re.search("(.+):", line)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
616 par = m.group(1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
617 param_dic[par] = setting
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
618 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
619 return param_dic
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
620
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
621
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
622 #######################################################################
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
623
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
624
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
625 def graphprot_filter_predictions_file(in_file, out_file, sc_thr=0):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
626 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
627 Filter GraphProt .predictions file by given score thr_sc.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
628 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
629 OUTPRED = open(out_file, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
630 with open(in_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
631 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
632 row = line.strip()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
633 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
634 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
635 if score < sc_thr:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
636 continue
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
637 OUTPRED.write("%s\n" % (row))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
638 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
639 OUTPRED.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
640
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
641
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
642 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
643
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
644
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
645 def fasta_read_in_ids(fasta_file):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
646 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
647 Given a .fa file, read in header IDs in order appearing in file,
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
648 and store in list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
649
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
650 >>> test_file = "test-data/test3.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
651 >>> fasta_read_in_ids(test_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
652 ['SERBP1_K562_rep01_544', 'SERBP1_K562_rep02_709', 'SERBP1_K562_rep01_316']
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
653
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
654 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
655 ids_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
656 with open(fasta_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
657 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
658 if re.search(">.+", line):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
659 m = re.search(">(.+)", line)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
660 seq_id = m.group(1)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
661 ids_list.append(seq_id)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
662 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
663 return ids_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
664
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
665
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
666 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
667
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
668
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
669 def graphprot_profile_calc_avg_profile(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
670 in_file, out_file, ap_extlr=5, seq_ids_list=False, method=1
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
671 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
672 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
673 Given a GraphProt .profile file, calculate average profiles and output
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
674 average profile file.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
675 Average profile means that the position-wise scores will get smoothed
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
676 out by calculating for each position a new score, taking a sequence
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
677 window -ap_extlr to +ap_extlr relative to the position
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
678 and calculate the mean score over this window. The mean score then
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
679 becomes the new average profile score at this position.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
680 Two different implementations of the task are given:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
681 method=1 (new python implementation, slower + more memory but easy to read)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
682 method=2 (old perl implementation, faster and less memory but more code)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
683
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
684 >>> in_file = "test-data/test2.profile"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
685 >>> out_file1 = "test-data/test2_1.avg_profile"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
686 >>> out_file2 = "test-data/test2_2.avg_profile"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
687 >>> out_file4 = "test-data/test2_3.avg_profile"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
688 >>> graphprot_profile_calc_avg_profile(in_file, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
689 out_file1, ap_extlr=2, method=1)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
690 >>> graphprot_profile_calc_avg_profile(in_file, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
691 out_file2, ap_extlr=2, method=2)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
692 >>> diff_two_files_identical(out_file1, out_file2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
693 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
694 >>> test_list = ["s1", "s2", "s3", "s4"]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
695 >>> out_file3_exp = "test-data/test3_added_ids_exp.avg_profile"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
696 >>> out_file3 = "test-data/test3_added_ids_out.avg_profile"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
697 >>> graphprot_profile_calc_avg_profile(in_file, out_file3, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
698 ap_extlr=2, method=1, seq_ids_list=test_list)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
699 >>> diff_two_files_identical(out_file3_exp, out_file3)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
700 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
701
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
702 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
703 if method == 1:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
704 # Dictionary of lists, with list of scores (value) for each site (key).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
705 lists_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
706 site_starts_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
707 with open(in_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
708 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
709 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
710 site_id = int(cols[0])
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
711 pos = int(cols[1]) # 0-based.
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
712 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
713 # Store first position of site.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
714 if site_id not in site_starts_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
715 site_starts_dic[site_id] = pos
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
716 if site_id in lists_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
717 lists_dic[site_id].append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
718 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
719 lists_dic[site_id] = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
720 lists_dic[site_id].append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
721 f.close()
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
722 # Check number of IDs (# FASTA IDs has to be same as # site IDs).
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
723 if seq_ids_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
724 c_seq_ids = len(seq_ids_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
725 c_site_ids = len(site_starts_dic)
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
726 assert (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
727 c_seq_ids == c_site_ids
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
728 ), "# sequence IDs != # site IDs (%i != %i)" % (c_seq_ids, c_site_ids)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
729 OUTPROF = open(out_file, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
730 # For each site, calculate average profile scores list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
731 for site_id in lists_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
732 # Convert profile score list to average profile scores list.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
733 aps_list = list_moving_window_average_values(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
734 lists_dic[site_id], win_extlr=ap_extlr
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
735 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
736 start_pos = site_starts_dic[site_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
737 # Get original FASTA sequence ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
738 if seq_ids_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
739 site_id = seq_ids_list[site_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
740 for i, sc in enumerate(aps_list):
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
741 pos = i + start_pos + 1 # make 1-based.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
742 OUTPROF.write("%s\t%i\t%f\n" % (site_id, pos, sc))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
743 OUTPROF.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
744 elif method == 2:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
745 OUTPROF = open(out_file, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
746 # Old site ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
747 old_id = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
748 # Current site ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
749 cur_id = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
750 # Scores list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
751 scores_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
752 site_starts_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
753 with open(in_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
754 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
755 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
756 cur_id = int(cols[0])
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
757 pos = int(cols[1]) # 0-based.
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
758 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
759 # Store first position of site.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
760 if cur_id not in site_starts_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
761 site_starts_dic[cur_id] = pos
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
762 # Case: new site (new column 1 ID).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
763 if cur_id != old_id:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
764 # Process old id scores.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
765 if scores_list:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
766 aps_list = list_moving_window_average_values(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
767 scores_list, win_extlr=ap_extlr
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
768 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
769 start_pos = site_starts_dic[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
770 seq_id = old_id
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
771 # Get original FASTA sequence ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
772 if seq_ids_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
773 seq_id = seq_ids_list[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
774 for i, sc in enumerate(aps_list):
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
775 pos = i + start_pos + 1 # make 1-based.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
776 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
777 # Reset list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
778 scores_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
779 old_id = cur_id
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
780 scores_list.append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
781 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
782 # Add to scores_list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
783 scores_list.append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
784 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
785 # Process last block.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
786 if scores_list:
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
787 aps_list = list_moving_window_average_values(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
788 scores_list, win_extlr=ap_extlr
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
789 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
790 start_pos = site_starts_dic[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
791 seq_id = old_id
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
792 # Get original FASTA sequence ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
793 if seq_ids_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
794 seq_id = seq_ids_list[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
795 for i, sc in enumerate(aps_list):
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
796 pos = i + start_pos + 1 # make 1-based.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
797 OUTPROF.write("%s\t%i\t%f\n" % (seq_id, pos, sc))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
798 OUTPROF.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
799
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
800
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
801 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
802
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
803
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
804 def graphprot_profile_extract_peak_regions(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
805 in_file, out_file, max_merge_dist=0, sc_thr=0
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
806 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
807 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
808 Extract peak regions from GraphProt .profile file.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
809 Store the peak regions (defined as regions with scores >= sc_thr)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
810 as to out_file in 6-column .bed.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
811
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
812 TODO:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
813 Add option for genomic coordinates input (+ - polarity support).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
814 Output genomic regions instead of sequence regions.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
815
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
816 >>> in_file = "test-data/test4.avg_profile"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
817 >>> out_file = "test-data/test4_out.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
818 >>> exp_file = "test-data/test4_out_exp.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
819 >>> exp2_file = "test-data/test4_out_exp2.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
820 >>> empty_file = "test-data/empty_file"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
821 >>> graphprot_profile_extract_peak_regions(in_file, out_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
822 >>> diff_two_files_identical(out_file, exp_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
823 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
824 >>> graphprot_profile_extract_peak_regions(in_file, out_file, sc_thr=10)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
825 >>> diff_two_files_identical(out_file, empty_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
826 True
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
827 >>> graphprot_profile_extract_peak_regions(in_file, out_file, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
828 max_merge_dist=2)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
829 >>> diff_two_files_identical(out_file, exp2_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
830 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
831
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
832 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
833
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
834 OUTPEAKS = open(out_file, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
835 # Old site ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
836 old_id = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
837 # Current site ID.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
838 cur_id = ""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
839 # Scores list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
840 scores_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
841 site_starts_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
842 with open(in_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
843 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
844 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
845 cur_id = cols[0]
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
846 pos = int(cols[1]) # 0-based.
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
847 score = float(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
848 # Store first position of site.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
849 if cur_id not in site_starts_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
850 # If first position != zero, we assume positions are 1-based.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
851 if pos != 0:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
852 # Make index 0-based.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
853 site_starts_dic[cur_id] = pos - 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
854 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
855 site_starts_dic[cur_id] = pos
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
856 # Case: new site (new column 1 ID).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
857 if cur_id != old_id:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
858 # Process old id scores.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
859 if scores_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
860 # Extract peaks from region.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
861 peak_list = list_extract_peaks(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
862 scores_list,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
863 max_merge_dist=max_merge_dist,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
864 coords="bed",
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
865 sc_thr=sc_thr,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
866 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
867 start_pos = site_starts_dic[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
868 # Print out peaks in .bed format.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
869 for ln in peak_list:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
870 peak_s = start_pos + ln[0]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
871 peak_e = start_pos + ln[1]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
872 site_id = "%s,%i" % (old_id, ln[2])
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
873 OUTPEAKS.write(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
874 "%s\t%i\t%i"
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
875 "\t%s\t%f\t+\n" % (old_id, peak_s, peak_e, site_id, ln[3])
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
876 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
877 # Reset list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
878 scores_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
879 old_id = cur_id
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
880 scores_list.append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
881 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
882 # Add to scores_list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
883 scores_list.append(score)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
884 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
885 # Process last block.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
886 if scores_list:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
887 # Extract peaks from region.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
888 peak_list = list_extract_peaks(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
889 scores_list, max_merge_dist=max_merge_dist, coords="bed", sc_thr=sc_thr
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
890 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
891 start_pos = site_starts_dic[old_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
892 # Print out peaks in .bed format.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
893 for ln in peak_list:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
894 peak_s = start_pos + ln[0]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
895 peak_e = start_pos + ln[1]
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
896 site_id = "%s,%i" % (old_id, ln[2]) # best score also 1-based.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
897 OUTPEAKS.write(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
898 "%s\t%i\t%i\t%s\t%f\t+\n" % (old_id, peak_s, peak_e, site_id, ln[3])
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
899 )
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
900 OUTPEAKS.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
901
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
902
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
903 #######################################################################
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
904
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
905
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
906 def list_extract_peaks(in_list, max_merge_dist=0, coords="list", sc_thr=0):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
907 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
908 Extract peak regions from list.
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
909 Peak region is defined as region >= score threshold.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
910
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
911 coords=bed : peak start 0-based, peak end 1-based.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
912 coords=list : peak start 0-based, peak end 0-based.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
913
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
914 >>> test_list = [-1, 0, 2, 4.5, 1, -1, 5, 6.5]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
915 >>> list_extract_peaks(test_list)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
916 [[1, 4, 3, 4.5], [6, 7, 7, 6.5]]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
917 >>> list_extract_peaks(test_list, sc_thr=2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
918 [[2, 3, 3, 4.5], [6, 7, 7, 6.5]]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
919 >>> list_extract_peaks(test_list, sc_thr=2, coords="bed")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
920 [[2, 4, 4, 4.5], [6, 8, 8, 6.5]]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
921 >>> list_extract_peaks(test_list, sc_thr=10)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
922 []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
923 >>> test_list = [2, -1, 3, -1, 4, -1, -1, 6, 9]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
924 >>> list_extract_peaks(test_list, max_merge_dist=2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
925 [[0, 4, 4, 4], [7, 8, 8, 9]]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
926 >>> list_extract_peaks(test_list, max_merge_dist=3)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
927 [[0, 8, 8, 9]]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
928
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
929 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
930 # Check.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
931 assert len(in_list), "Given list is empty"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
932 # Peak regions list.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
933 peak_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
934 # Help me.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
935 inside = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
936 pr_s = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
937 pr_e = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
938 pr_top_pos = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
939 pr_top_sc = -100000
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
940 for i, sc in enumerate(in_list):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
941 # Part of peak region?
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
942 if sc >= sc_thr:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
943 # At peak start.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
944 if not inside:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
945 pr_s = i
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
946 pr_e = i
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
947 inside = True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
948 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
949 # Inside peak region.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
950 pr_e = i
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
951 # Store top position.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
952 if sc > pr_top_sc:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
953 pr_top_sc = sc
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
954 pr_top_pos = i
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
955 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
956 # Before was peak region?
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
957 if inside:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
958 # Store peak region.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
959 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
960 peak_list.append(peak_infos)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
961 inside = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
962 pr_top_pos = 0
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
963 pr_top_sc = -100000
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
964 # If peak at the end, also report.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
965 if inside:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
966 # Store peak region.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
967 peak_infos = [pr_s, pr_e, pr_top_pos, pr_top_sc]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
968 peak_list.append(peak_infos)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
969 # Merge peaks.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
970 if max_merge_dist and len(peak_list) > 1:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
971 iterate = True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
972 while iterate:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
973 merged_peak_list = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
974 added_peaks_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
975 peaks_merged = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
976 for i, l in enumerate(peak_list):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
977 if i in added_peaks_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
978 continue
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
979 j = i + 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
980 # Last element.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
981 if j == len(peak_list):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
982 if i not in added_peaks_dic:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
983 merged_peak_list.append(peak_list[i])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
984 break
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
985 # Compare two elements.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
986 new_peak = []
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
987 if (peak_list[j][0] - peak_list[i][1]) <= max_merge_dist:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
988 peaks_merged = True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
989 new_top_pos = peak_list[i][2]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
990 new_top_sc = peak_list[i][3]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
991 if peak_list[i][3] < peak_list[j][3]:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
992 new_top_pos = peak_list[j][2]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
993 new_top_sc = peak_list[j][3]
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
994 new_peak = [
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
995 peak_list[i][0],
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
996 peak_list[j][1],
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
997 new_top_pos,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
998 new_top_sc,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
999 ]
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1000 # If two peaks were merged.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1001 if new_peak:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1002 merged_peak_list.append(new_peak)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1003 added_peaks_dic[i] = 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1004 added_peaks_dic[j] = 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1005 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1006 merged_peak_list.append(peak_list[i])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1007 added_peaks_dic[i] = 1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1008 if not peaks_merged:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1009 iterate = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1010 peak_list = merged_peak_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1011 peaks_merged = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1012 # If peak coordinates should be in .bed format, make peak ends 1-based.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1013 if coords == "bed":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1014 for i in range(len(peak_list)):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1015 peak_list[i][1] += 1
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1016 peak_list[i][2] += 1 # 1-base best score position too.
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1017 return peak_list
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1018
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1019
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1020 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1021
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1022
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1023 def bed_peaks_to_genomic_peaks(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1024 peak_file, genomic_peak_file, genomic_sites_bed, print_rows=False
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1025 ):
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1026 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1027 Given a .bed file of sequence peak regions (possible coordinates from
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1028 0 to length of s), convert peak coordinates to genomic coordinates.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1029 Do this by taking genomic regions of sequences as input.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1030
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1031 >>> test_in = "test-data/test.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1032 >>> test_exp = "test-data/test_exp.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1033 >>> test_out = "test-data/test_out.peaks.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1034 >>> gen_in = "test-data/test.peaks_genomic.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1035 >>> bed_peaks_to_genomic_peaks(test_in, test_out, gen_in)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1036 >>> diff_two_files_identical(test_out, test_exp)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1037 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1038
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1039 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1040 # Read in genomic region info.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1041 id2row_dic = {}
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1042
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1043 with open(genomic_sites_bed) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1044 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1045 row = line.strip()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1046 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1047 site_id = cols[3]
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1048 assert (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1049 site_id not in id2row_dic
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1050 ), 'column 4 IDs not unique in given .bed file "%s"' % (genomic_sites_bed)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1051 id2row_dic[site_id] = row
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1052 f.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1053
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1054 # Read in peaks file and convert coordinates.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1055 OUTPEAKS = open(genomic_peak_file, "w")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1056 with open(peak_file) as f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1057 for line in f:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1058 cols = line.strip().split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1059 site_id = cols[0]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1060 site_s = int(cols[1])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1061 site_e = int(cols[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1062 site_id2 = cols[3]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1063 site_sc = float(cols[4])
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1064 assert re.search(
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1065 ".+,.+", site_id2
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1066 ), 'regular expression failed for ID "%s"' % (site_id2)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1067 m = re.search(r".+,(\d+)", site_id2)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1068 sc_pos = int(m.group(1)) # 1-based.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1069 assert (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1070 site_id in id2row_dic
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1071 ), 'site ID "%s" not found in genomic sites dictionary' % (site_id)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1072 row = id2row_dic[site_id]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1073 rowl = row.split("\t")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1074 gen_chr = rowl[0]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1075 gen_s = int(rowl[1])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1076 gen_e = int(rowl[2])
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1077 gen_pol = rowl[5]
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1078 new_s = site_s + gen_s
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1079 new_e = site_e + gen_s
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1080 new_sc_pos = sc_pos + gen_s
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1081 if gen_pol == "-":
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1082 new_s = gen_e - site_e
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1083 new_e = gen_e - site_s
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1084 new_sc_pos = gen_e - sc_pos + 1 # keep 1-based.
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1085 new_row = "%s\t%i\t%i\t%s,%i\t%f\t%s" % (
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1086 gen_chr,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1087 new_s,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1088 new_e,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1089 site_id,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1090 new_sc_pos,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1091 site_sc,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1092 gen_pol,
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1093 )
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1094 OUTPEAKS.write("%s\n" % (new_row))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1095 if print_rows:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1096 print(new_row)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1097 OUTPEAKS.close()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1098
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1099
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1100 #######################################################################
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1101
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1102
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1103 def diff_two_files_identical(file1, file2):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1104 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1105 Check whether two files are identical. Return true if diff reports no
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1106 differences.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
1107
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1108 >>> file1 = "test-data/file1"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1109 >>> file2 = "test-data/file2"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1110 >>> diff_two_files_identical(file1, file2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1111 True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1112 >>> file1 = "test-data/test1.bed"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1113 >>> diff_two_files_identical(file1, file2)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1114 False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1115
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1116 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1117 same = True
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1118 check_cmd = "diff " + file1 + " " + file2
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1119 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1120 if output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1121 same = False
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1122 return same
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1123
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1124
5
ddcf35a868b8 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit ad60258f5759eaa205fec4af6143c728ea131419
bgruening
parents: 3
diff changeset
1125 #######################################################################