Mercurial > repos > bgruening > protease_prediction
comparison protease.py @ 0:c7a363d7ab26 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
author | bgruening |
---|---|
date | Sat, 12 Mar 2016 19:28:41 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7a363d7ab26 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 description = """ | |
4 Explicit Decomposition with Neighborhood (EDeN) utility program. | |
5 Protease modelling driver. | |
6 """ | |
7 | |
8 epilog = """ | |
9 Author: Fabrizio Costa | |
10 Copyright: 2015 | |
11 License: GPL | |
12 Maintainer: Fabrizio Costa | |
13 Email: costa@informatik.uni-freiburg.de | |
14 Status: Production | |
15 | |
16 Cite: Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise | |
17 distance kernel', Proceedings of the 26th International Conference on Machine | |
18 Learning. 2010. """ | |
19 | |
20 import os | |
21 import logging | |
22 | |
23 from eden.graph import Vectorizer | |
24 from eden.model_base import ModelInitializerBase, main_script | |
25 from eden.converter.fasta import fasta_to_sequence | |
26 from eden.modifier.seq import seq_to_seq | |
27 from eden.modifier.seq import shuffle_modifier | |
28 from eden.modifier.seq import mark_modifier | |
29 from eden.converter.fasta import sequence_to_eden | |
30 | |
31 | |
32 class ModelInitializer(ModelInitializerBase): | |
33 | |
34 def load_data(self, args): | |
35 seqs = fasta_to_sequence(args.input_file) | |
36 return seqs | |
37 | |
38 def load_positive_data(self, args): | |
39 return self.load_data(args) | |
40 | |
41 def load_negative_data(self, args): | |
42 seqs = self.load_data(args) | |
43 return seq_to_seq(seqs, | |
44 modifier=shuffle_modifier, | |
45 times=args.negative_ratio, | |
46 order=args.shuffle_order) | |
47 | |
48 def pre_processor_init(self, args): | |
49 def pre_processor(seqs, **args): | |
50 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%') | |
51 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@') | |
52 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*') | |
53 graphs = sequence_to_eden(seqs) | |
54 return graphs | |
55 | |
56 pre_processor_parameters = {} | |
57 return pre_processor, pre_processor_parameters | |
58 | |
59 def vectorizer_init(self, args): | |
60 vectorizer = Vectorizer() | |
61 vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]} | |
62 return vectorizer, vectorizer_parameters | |
63 | |
64 def add_arguments(self, parser): | |
65 parser.add_argument('--version', action='version', version='0.1') | |
66 return parser | |
67 | |
68 def add_arguments_fit(self, parser): | |
69 parser.add_argument("-i", "--input-file", | |
70 dest="input_file", | |
71 help="Path to FASTA file containing input sequences.", | |
72 required=True) | |
73 parser.add_argument("--negative-ratio", | |
74 dest="negative_ratio", | |
75 type=int, | |
76 help="Relative size ration for the randomly permuted negative instances w.r.t.\ | |
77 the positive instances.", | |
78 default=2) | |
79 parser.add_argument("--shuffle-order", | |
80 dest="shuffle_order", | |
81 type=int, | |
82 help="Order of the k-mer for the random shuffling procedure.", | |
83 default=2) | |
84 return parser | |
85 | |
86 def add_arguments_estimate(self, parser): | |
87 return self.add_arguments_fit(parser) | |
88 | |
89 if __name__ == "__main__": | |
90 model_initializer = ModelInitializer() | |
91 main_script(model_initializer=model_initializer, | |
92 description=description, | |
93 epilog=epilog, | |
94 prog_name=os.path.basename(__file__), | |
95 logger=logging.getLogger()) |