comparison protease.py @ 0:c7a363d7ab26 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
author bgruening
date Sat, 12 Mar 2016 19:28:41 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7a363d7ab26
1 #!/usr/bin/env python
2
3 description = """
4 Explicit Decomposition with Neighborhood (EDeN) utility program.
5 Protease modelling driver.
6 """
7
8 epilog = """
9 Author: Fabrizio Costa
10 Copyright: 2015
11 License: GPL
12 Maintainer: Fabrizio Costa
13 Email: costa@informatik.uni-freiburg.de
14 Status: Production
15
16 Cite: Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise
17 distance kernel', Proceedings of the 26th International Conference on Machine
18 Learning. 2010. """
19
20 import os
21 import logging
22
23 from eden.graph import Vectorizer
24 from eden.model_base import ModelInitializerBase, main_script
25 from eden.converter.fasta import fasta_to_sequence
26 from eden.modifier.seq import seq_to_seq
27 from eden.modifier.seq import shuffle_modifier
28 from eden.modifier.seq import mark_modifier
29 from eden.converter.fasta import sequence_to_eden
30
31
32 class ModelInitializer(ModelInitializerBase):
33
34 def load_data(self, args):
35 seqs = fasta_to_sequence(args.input_file)
36 return seqs
37
38 def load_positive_data(self, args):
39 return self.load_data(args)
40
41 def load_negative_data(self, args):
42 seqs = self.load_data(args)
43 return seq_to_seq(seqs,
44 modifier=shuffle_modifier,
45 times=args.negative_ratio,
46 order=args.shuffle_order)
47
48 def pre_processor_init(self, args):
49 def pre_processor(seqs, **args):
50 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%')
51 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@')
52 seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*')
53 graphs = sequence_to_eden(seqs)
54 return graphs
55
56 pre_processor_parameters = {}
57 return pre_processor, pre_processor_parameters
58
59 def vectorizer_init(self, args):
60 vectorizer = Vectorizer()
61 vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]}
62 return vectorizer, vectorizer_parameters
63
64 def add_arguments(self, parser):
65 parser.add_argument('--version', action='version', version='0.1')
66 return parser
67
68 def add_arguments_fit(self, parser):
69 parser.add_argument("-i", "--input-file",
70 dest="input_file",
71 help="Path to FASTA file containing input sequences.",
72 required=True)
73 parser.add_argument("--negative-ratio",
74 dest="negative_ratio",
75 type=int,
76 help="Relative size ration for the randomly permuted negative instances w.r.t.\
77 the positive instances.",
78 default=2)
79 parser.add_argument("--shuffle-order",
80 dest="shuffle_order",
81 type=int,
82 help="Order of the k-mer for the random shuffling procedure.",
83 default=2)
84 return parser
85
86 def add_arguments_estimate(self, parser):
87 return self.add_arguments_fit(parser)
88
89 if __name__ == "__main__":
90 model_initializer = ModelInitializer()
91 main_script(model_initializer=model_initializer,
92 description=description,
93 epilog=epilog,
94 prog_name=os.path.basename(__file__),
95 logger=logging.getLogger())