annotate graphprot_train_wrapper.py @ 3:ace92c9a4653 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
author bgruening
date Wed, 27 Jan 2021 19:27:47 +0000
parents 20429f4c1b95
children ddcf35a868b8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python3
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
2
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
3 import argparse as ap
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
4 import os
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
5 import subprocess
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
6 import sys
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
7
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
8 import gplib
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
9
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
10
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
11 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
12
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
13 TOOL DEPENDENCIES
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
14 =================
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
15
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
16 GraphProt 1.1.7
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
17 Best install via:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
18 https://anaconda.org/bioconda/graphprot
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
19 Tested with: miniconda3, conda 4.7.12
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
20
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
21
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
22 OUTPUT FILES
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
23 ============
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
24
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
25 data_id.model
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
26 data_id.params
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
27 if not --disable-cv:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
28 data_id.cv_results
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
29 if not --disable-motifs:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
30 data_id.sequence_motif
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
31 data_id.sequence_motif.png
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
32 if --str-model:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
33 data_id.structure_motif
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
34 data_id.structure_motif.png
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
35 Temporary:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
36 data_id.predictions
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
37 data_id.profile
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
38
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
39
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
40 EXAMPLE CALLS
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
41 =============
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
42
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
43 python graphprot_train_wrapper.py --pos gp_data/SERBP1_positives.train.fa
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
44 --neg gp_data/SERBP1_negatives.train.fa --data-id test2 --disable-cv
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
45 --gp-output --opt-set-size 200 --min-train 400
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
46
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
47 python graphprot_train_wrapper.py --pos gp_data/SERBP1_positives.train.fa
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
48 --neg gp_data/SERBP1_negatives.train.fa --data-id test2 --disable-cv
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
49 --opt-set-size 100 --min-train 200
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
50
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
51 python graphprot_train_wrapper.py --pos test-data/test_positives.train.fa
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
52 --neg test-data/test_negatives.train.fa --data-id gptest2 --disable-cv
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
53 --opt-pos test-data/test_positives.parop.fa
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
54 --opt-neg test-data/test_negatives.parop.fa
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
55
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
56 python graphprot_train_wrapper.py --pos test-data/test_positives.train.fa
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
57 --neg test-data/test_negatives.train.fa --data-id gptest2 --disable-cv
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
58 --disable-motifs --opt-pos test-data/test_positives.parop.fa --opt-neg
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
59 test-data/test_negatives.parop.fa
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
60
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
61
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
62 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
63
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
64
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
65 ###############################################################################
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
66
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
67 def setup_argument_parser():
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
68 """Setup argparse parser."""
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
69 help_description = """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
70 Galaxy wrapper script for GraphProt to train a GraphProt model on
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
71 a given set of input sequences (positives and negatives .fa). By
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
72 default a sequence model is trained (due to structure models
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
73 being much slower to train). Also by default take a portion of
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
74 the input sequences for hyperparameter optimization (HPO) prior to
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
75 model training, and run a 10-fold cross validation and motif
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
76 generation after model training. Thus the following output
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
77 files are produced:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
78 .model model file, .params model parameter file, .png motif files
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
79 (sequence, or sequence+structure), .cv_results CV results file.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
80 After model training, predict on positives to get highest whole
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
81 site and profile scores found in binding sites. Take the median
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
82 score out of these to store in .params file, using it later
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
83 for outputting binding sites or peaks with higher confidence.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
84
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
85 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
86 # Define argument parser.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
87 p = ap.ArgumentParser(add_help=False,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
88 prog="graphprot_train_wrapper.py",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
89 description=help_description,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
90 formatter_class=ap.MetavarTypeHelpFormatter)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
91
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
92 # Argument groups.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
93 p_man = p.add_argument_group("REQUIRED ARGUMENTS")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
94 p_opt = p.add_argument_group("OPTIONAL ARGUMENTS")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
95
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
96 # Required arguments.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
97 p_opt.add_argument("-h", "--help",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
98 action="help",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
99 help="Print help message")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
100 p_man.add_argument("--pos",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
101 dest="in_pos_fa",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
102 type=str,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
103 required=True,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
104 help="Positive (= binding site) sequences .fa file "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
105 "for model training (option -fasta)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
106 p_man.add_argument("--neg",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
107 dest="in_neg_fa",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
108 type=str,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
109 required=True,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
110 help="Negative sequences .fa file for model "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
111 "training (option -negfasta)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
112 p_man.add_argument("--data-id",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
113 dest="data_id",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
114 type=str,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
115 required=True,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
116 help="Data ID (option -prefix)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
117 # Additional arguments.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
118 p_opt.add_argument("--opt-set-size",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
119 dest="opt_set_size",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
120 type=int,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
121 default=500,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
122 help="Hyperparameter optimization set size (taken "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
123 "away from both --pos and --neg) (default: 500)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
124 p_opt.add_argument("--opt-pos",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
125 dest="opt_pos_fa",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
126 type=str,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
127 help="Positive (= binding site) sequences .fa file "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
128 "for hyperparameter optimization (default: take "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
129 "--opt-set-size from --pos)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
130 p_opt.add_argument("--opt-neg",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
131 dest="opt_neg_fa",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
132 type=str,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
133 help="Negative sequences .fa file for hyperparameter "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
134 "optimization (default: take --opt-set-size "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
135 "from --neg)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
136 p_opt.add_argument("--min-train",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
137 dest="min_train",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
138 type=int,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
139 default=500,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
140 help="Minimum amount of training sites demanded "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
141 "(default: 500)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
142 p_opt.add_argument("--disable-cv",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
143 dest="disable_cv",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
144 default=False,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
145 action="store_true",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
146 help="Disable cross validation step (default: false)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
147 p_opt.add_argument("--disable-motifs",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
148 dest="disable_motifs",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
149 default=False,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
150 action="store_true",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
151 help="Disable motif generation step (default: false)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
152 p_opt.add_argument("--gp-output",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
153 dest="gp_output",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
154 default=False,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
155 action="store_true",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
156 help="Print output produced by GraphProt "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
157 "(default: false)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
158 p_opt.add_argument("--str-model",
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
159 dest="train_str_model",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
160 default=False,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
161 action="store_true",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
162 help="Train a structure model (default: train "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
163 "a sequence model)")
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
164 return p
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
165
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
166
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
167 ###############################################################################
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
168
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
169 if __name__ == '__main__':
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
170
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
171 # Setup argparse.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
172 parser = setup_argument_parser()
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
173 # Read in command line arguments.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
174 args = parser.parse_args()
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
175
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
176 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
177 Do all sorts of sanity checking.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
178
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
179 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
180 # Check for Linux.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
181 assert "linux" in sys.platform, "please use Linux"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
182 # Check tool availability.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
183 assert gplib.is_tool("GraphProt.pl"), "GraphProt.pl not in PATH"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
184 # Check file inputs.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
185 assert os.path.exists(args.in_pos_fa), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
186 "positives .fa file \"%s\" not found" % (args.in_pos_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
187 assert os.path.exists(args.in_neg_fa), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
188 "negatives .fa file \"%s\" not found" % (args.in_neg_fa)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
189 # Count .fa entries.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
190 c_pos_fa = gplib.count_fasta_headers(args.in_pos_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
191 c_neg_fa = gplib.count_fasta_headers(args.in_neg_fa)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
192 assert c_pos_fa, "positives .fa file \"%s\" no headers found" % \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
193 (args.in_pos_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
194 assert c_neg_fa, "negatives .fa file \"%s\" no headers found" % \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
195 (args.in_neg_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
196 print("# positive .fa sequences: %i" % (c_pos_fa))
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
197 print("# negative .fa sequences: %i" % (c_neg_fa))
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
198 # Check additional files.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
199 if args.opt_pos_fa:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
200 assert args.opt_neg_fa, "--opt-pos but no --opt-neg given"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
201 if args.opt_neg_fa:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
202 assert args.opt_pos_fa, "--opt-neg but no --opt-pos given"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
203 # Check for lowercase only sequences, which cause GP to crash.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
204 error_mess = "input sequences encountered containing "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
205 "only lowercase characters or lowercase characters in between "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
206 "uppercase characters. Please provide either all uppercase "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
207 "sequences or sequences containing uppercase regions surrounded "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
208 "by lowercase context regions for structure calculation (see "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
209 "viewpoint concept in original GraphProt publication "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
210 "for more details)"
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
211 seqs_dic = gplib.read_fasta_into_dic(args.in_pos_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
212 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
213 assert not bad_ids, "%s" % (error_mess)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
214 seqs_dic = gplib.read_fasta_into_dic(args.in_neg_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
215 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
216 assert not bad_ids, "%s" % (error_mess)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
217 if args.opt_pos_fa:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
218 seqs_dic = gplib.read_fasta_into_dic(args.opt_pos_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
219 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
220 assert not bad_ids, "%s" % (error_mess)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
221 if args.opt_neg_fa:
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
222 seqs_dic = gplib.read_fasta_into_dic(args.opt_neg_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
223 bad_ids = gplib.check_seqs_dic_format(seqs_dic)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
224 assert not bad_ids, "%s" % (error_mess)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
225
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
226 # If parop .fa files given.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
227 if args.opt_pos_fa and args.opt_neg_fa:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
228 c_parop_pos_fa = gplib.count_fasta_headers(args.opt_pos_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
229 c_parop_neg_fa = gplib.count_fasta_headers(args.opt_neg_fa)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
230 assert c_parop_pos_fa, "--opt-pos .fa file \"%s\" no headers found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
231 % (args.opt_pos_fa)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
232 assert c_parop_neg_fa, "--opt-neg .fa file \"%s\" no headers found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
233 % (args.opt_neg_fa)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
234 # Less than 500 for training?? You gotta be kidding.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
235 assert c_pos_fa >= args.min_train, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
236 "--pos for training < %i, please provide more (try at least "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
237 "> 1000, the more the better)" % (args.min_train)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
238 assert c_neg_fa >= args.min_train, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
239 "--neg for training < %i, please provide more (try at least "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
240 "> 1000, the more the better)" % (args.min_train)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
241 # Looking closer at ratios.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
242 pos_neg_ratio = c_parop_pos_fa / c_parop_neg_fa
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
243 if pos_neg_ratio < 0.8 or pos_neg_ratio > 1.25:
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
244 assert 0, "ratio of --opt-pos to --opt-neg < 0.8 or > 1.25 "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
245 "(ratio = %f). Try to keep ratio closer to 1 or better use "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
246 "identical numbers (keep in mind that performance measures "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
247 "such as accuracy or AUROC are not suitable for imbalanced "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
248 " datasets!)" % (pos_neg_ratio)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
249 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
250 # Define some minimum amount of training sites for the sake of sanity.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
251 c_pos_train = c_pos_fa - args.opt_set_size
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
252 c_neg_train = c_neg_fa - args.opt_set_size
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
253 # Start complaining.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
254 assert c_pos_fa >= args.opt_set_size, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
255 "# positives < --opt-set-size (%i < %i)" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
256 % (c_pos_fa, args.opt_set_size)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
257 assert c_neg_fa >= args.opt_set_size, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
258 "# negatives < --opt-set-size (%i < %i)" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
259 % (c_neg_fa, args.opt_set_size)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
260 assert c_pos_train >= args.opt_set_size, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
261 "# positives remaining for training < --opt-set-size "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
262 "(%i < %i)" % (c_pos_train, args.opt_set_size)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
263 assert c_neg_train >= args.opt_set_size, "# negatives remaining "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
264 "for training < --opt-set-size (%i < %i)" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
265 % (c_neg_train, args.opt_set_size)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
266 # Less than 500?? You gotta be kidding.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
267 assert c_pos_train >= args.min_train, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
268 "# positives remaining for training < %i, please provide more "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
269 " (try at least > 1000, the more the better)" % (args.min_train)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
270 assert c_neg_train >= args.min_train, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
271 "# negatives remaining for training < %i, please provide more "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
272 "(try at least > 1000, the more the better)" % (args.min_train)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
273 # Looking closer at ratios.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
274 pos_neg_ratio = c_pos_train / c_neg_train
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
275 if pos_neg_ratio < 0.8 or pos_neg_ratio > 1.25:
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
276 assert 0, "ratio of --pos to --neg < 0.8 or > 1.25 "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
277 "(ratio = %f). Try to keep ratio closer to 1 or better use "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
278 "identical numbers (keep in mind that performance measures "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
279 "such as accuracy or AUROC are not suitable for imbalanced "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
280 "datasets!)" % (pos_neg_ratio)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
281
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
282 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
283 Generate parop + train .fa output files for hyperparameter
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
284 optimization + training.
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
285
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
286 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
287 # Output files for training.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
288 pos_parop_fa = args.data_id + ".positives.parop.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
289 neg_parop_fa = args.data_id + ".negatives.parop.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
290 pos_train_fa = args.data_id + ".positives.train.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
291 neg_train_fa = args.data_id + ".negatives.train.fa"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
292
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
293 # If parop .fa files given.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
294 if args.opt_pos_fa and args.opt_neg_fa:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
295 # Just copy parop and train files.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
296 gplib.make_file_copy(args.opt_pos_fa, pos_parop_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
297 gplib.make_file_copy(args.opt_neg_fa, neg_parop_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
298 gplib.make_file_copy(args.in_pos_fa, pos_train_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
299 gplib.make_file_copy(args.in_neg_fa, neg_train_fa)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
300 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
301 # Generate parop + train .fa files from input .fa files.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
302 gplib.split_fasta_into_test_train_files(args.in_pos_fa, pos_parop_fa,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
303 pos_train_fa,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
304 test_size=args.opt_set_size)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
305 gplib.split_fasta_into_test_train_files(args.in_neg_fa, neg_parop_fa,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
306 neg_train_fa,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
307 test_size=args.opt_set_size)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
308
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
309 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
310 Do the hyperparameter optimization.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
311
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
312 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
313 print("Starting hyperparameter optimization (-action ls) ... ")
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
314 check_cmd = "GraphProt.pl -action ls -prefix " + args.data_id + \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
315 " -fasta " + pos_parop_fa + " -negfasta " + neg_parop_fa
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
316 # If sequence model should be trained (default).
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
317 if not args.train_str_model:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
318 check_cmd += " -onlyseq"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
319 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
320 output = subprocess.getoutput(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
321 params_file = args.data_id + ".params"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
322 assert os.path.exists(params_file), "Hyperparameter optimization output "\
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
323 " .params file \"%s\" not found" % (params_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
324 # Add model type to params file.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
325 if args.train_str_model:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
326 gplib.echo_add_to_file("model_type: structure", params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
327 else:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
328 gplib.echo_add_to_file("model_type: sequence", params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
329 # Get parameter string.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
330 param_string = gplib.graphprot_get_param_string(params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
331
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
332 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
333 Do the model training. (Yowza!)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
334
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
335 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
336 print("Starting model training (-action train) ... ")
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
337 check_cmd = "GraphProt.pl -action train -prefix " + args.data_id \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
338 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
339 + " " + param_string
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
340 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
341 output = subprocess.getoutput(check_cmd)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
342 assert output, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
343 "The following call of GraphProt.pl produced no output:\n%s" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
344 % (check_cmd)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
345 if args.gp_output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
346 print(output)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
347 model_file = args.data_id + ".model"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
348 assert os.path.exists(model_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
349 "Training output .model file \"%s\" not found" % (model_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
350
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
351 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
352 Do the 10-fold cross validation.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
353
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
354 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
355 if not args.disable_cv:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
356 print("Starting 10-fold cross validation (-action cv) ... ")
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
357 check_cmd = "GraphProt.pl -action cv -prefix " + args.data_id \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
358 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
359 + " " + param_string + " -model " + model_file
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
360 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
361 output = subprocess.getoutput(check_cmd)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
362 assert output, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
363 "The following call of GraphProt.pl produced no output:\n%s" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
364 % (check_cmd)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
365 if args.gp_output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
366 print(output)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
367 cv_results_file = args.data_id + ".cv_results"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
368 assert os.path.exists(cv_results_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
369 "CV output .cv_results file \"%s\" not found" % (cv_results_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
370
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
371 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
372 Do the motif generation.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
373
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
374 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
375 if not args.disable_motifs:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
376 print("Starting motif generation (-action motif) ... ")
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
377 check_cmd = "GraphProt.pl -action motif -prefix " + args.data_id \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
378 + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
379 + " " + param_string + " -model " + model_file
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
380 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
381 output = subprocess.getoutput(check_cmd)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
382 assert output, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
383 "The following call of GraphProt.pl produced no output:\n%s" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
384 % (check_cmd)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
385 if args.gp_output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
386 print(output)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
387 seq_motif_file = args.data_id + ".sequence_motif"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
388 seq_motif_png_file = args.data_id + ".sequence_motif.png"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
389 assert os.path.exists(seq_motif_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
390 "Motif output .sequence_motif file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
391 % (seq_motif_file)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
392 assert os.path.exists(seq_motif_png_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
393 "Motif output .sequence_motif.png file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
394 % (seq_motif_png_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
395 if args.train_str_model:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
396 str_motif_file = args.data_id + ".structure_motif"
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
397 str_motif_png_file = args.data_id + ".structure_motif.png"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
398 assert os.path.exists(str_motif_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
399 "Motif output .structure_motif file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
400 % (str_motif_file)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
401 assert os.path.exists(str_motif_png_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
402 "Motif output .structure_motif.png file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
403 % (str_motif_png_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
404
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
405 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
406 Do whole site predictions on positive training set.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
407
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
408 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
409 print("Starting whole site predictions on positive training set "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
410 " (-action predict) ... ")
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
411 check_cmd = "GraphProt.pl -action predict -prefix " + args.data_id \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
412 + " -fasta " + pos_train_fa + " " + param_string \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
413 + " -model " + model_file
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
414 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
415 output = subprocess.getoutput(check_cmd)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
416 assert output, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
417 "The following call of GraphProt.pl produced no output:\n%s" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
418 % (check_cmd)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
419 if args.gp_output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
420 print(output)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
421 ws_predictions_file = args.data_id + ".predictions"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
422 assert os.path.exists(ws_predictions_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
423 "Whole site prediction output .predictions file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
424 % (ws_predictions_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
425
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
426 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
427 Do profile predictions on positive training set.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
428
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
429 """
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
430 print("Starting profile predictions on positive training set "
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
431 "-action predict_profile) ... ")
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
432 check_cmd = "GraphProt.pl -action predict_profile -prefix " \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
433 + args.data_id + " -fasta " + pos_train_fa + " " \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
434 + param_string + " -model " + model_file
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
435 print(check_cmd)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
436 output = subprocess.getoutput(check_cmd)
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
437 assert output, \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
438 "The following call of GraphProt.pl produced no output:\n%s" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
439 % (check_cmd)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
440 if args.gp_output:
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
441 print(output)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
442 profile_predictions_file = args.data_id + ".profile"
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
443 assert os.path.exists(profile_predictions_file), \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
444 "Profile prediction output .profile file \"%s\" not found" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
445 % (profile_predictions_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
446
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
447 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
448 Get 50 % score (median) for .predictions and .profile file.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
449 For .profile, first extract for each site the maximum score, and then
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
450 from the list of maximum site scores get the median.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
451 For whole site .predictions, get the median from the site scores list.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
452
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
453 """
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
454 print("Getting .profile and .predictions median scores ... ")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
455
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
456 # Whole site scores median.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
457 ws_pred_median = \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
458 gplib.graphprot_predictions_get_median(ws_predictions_file)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
459 # Profile top site scores median.
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
460 profile_median = \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
461 gplib.graphprot_profile_get_tsm(profile_predictions_file,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
462 profile_type="profile")
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
463 ws_pred_string = "pos_train_ws_pred_median: %f" % (ws_pred_median)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
464 profile_string = "pos_train_profile_median: %f" % (profile_median)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
465 gplib.echo_add_to_file(ws_pred_string, params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
466 gplib.echo_add_to_file(profile_string, params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
467 # Average profile top site scores median for extlr 1 to 10.
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
468 for i in range(10):
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
469 i += 1
3
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
470 avg_profile_median = \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
471 gplib.graphprot_profile_get_tsm(profile_predictions_file,
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
472 profile_type="avg_profile",
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
473 avg_profile_extlr=i)
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
474
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
475 avg_profile_string = "pos_train_avg_profile_median_%i: %f" \
ace92c9a4653 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit efcac98677c3ea9039c1c61eaa9e58f78287ccb3"
bgruening
parents: 1
diff changeset
476 % (i, avg_profile_median)
1
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
477 gplib.echo_add_to_file(avg_profile_string, params_file)
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
478
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
479 print("Script: I'm done.")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
480 print("Author: Good. Now go back to your file system directory.")
20429f4c1b95 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit f3fb925b83a4982e0cf9a0c11ff93ecbb8e4e6d5"
bgruening
parents:
diff changeset
481 print("Script: Ok.")