diff protease.py @ 0:c7a363d7ab26 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
author bgruening
date Sat, 12 Mar 2016 19:28:41 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/protease.py	Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+description = """
+Explicit Decomposition with Neighborhood (EDeN) utility program.
+Protease modelling driver.
+"""
+
+epilog = """
+Author: Fabrizio Costa
+Copyright: 2015
+License: GPL
+Maintainer: Fabrizio Costa
+Email: costa@informatik.uni-freiburg.de
+Status: Production
+
+Cite:  Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise
+distance kernel', Proceedings of the 26th International Conference on Machine
+Learning. 2010. """
+
+import os
+import logging
+
+from eden.graph import Vectorizer
+from eden.model_base import ModelInitializerBase, main_script
+from eden.converter.fasta import fasta_to_sequence
+from eden.modifier.seq import seq_to_seq
+from eden.modifier.seq import shuffle_modifier
+from eden.modifier.seq import mark_modifier
+from eden.converter.fasta import sequence_to_eden
+
+
+class ModelInitializer(ModelInitializerBase):
+
+    def load_data(self, args):
+        seqs = fasta_to_sequence(args.input_file)
+        return seqs
+
+    def load_positive_data(self, args):
+        return self.load_data(args)
+
+    def load_negative_data(self, args):
+        seqs = self.load_data(args)
+        return seq_to_seq(seqs,
+                          modifier=shuffle_modifier,
+                          times=args.negative_ratio,
+                          order=args.shuffle_order)
+
+    def pre_processor_init(self, args):
+        def pre_processor(seqs, **args):
+            seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%')
+            seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@')
+            seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*')
+            graphs = sequence_to_eden(seqs)
+            return graphs
+
+        pre_processor_parameters = {}
+        return pre_processor, pre_processor_parameters
+
+    def vectorizer_init(self, args):
+        vectorizer = Vectorizer()
+        vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]}
+        return vectorizer, vectorizer_parameters
+
+    def add_arguments(self, parser):
+        parser.add_argument('--version', action='version', version='0.1')
+        return parser
+
+    def add_arguments_fit(self, parser):
+        parser.add_argument("-i", "--input-file",
+                            dest="input_file",
+                            help="Path to FASTA file containing input sequences.",
+                            required=True)
+        parser.add_argument("--negative-ratio",
+                            dest="negative_ratio",
+                            type=int,
+                            help="Relative size ration for the randomly permuted negative instances w.r.t.\
+                            the positive instances.",
+                            default=2)
+        parser.add_argument("--shuffle-order",
+                            dest="shuffle_order",
+                            type=int,
+                            help="Order of the k-mer for the random shuffling procedure.",
+                            default=2)
+        return parser
+
+    def add_arguments_estimate(self, parser):
+        return self.add_arguments_fit(parser)
+
+if __name__ == "__main__":
+    model_initializer = ModelInitializer()
+    main_script(model_initializer=model_initializer,
+                description=description,
+                epilog=epilog,
+                prog_name=os.path.basename(__file__),
+                logger=logging.getLogger())