Mercurial > repos > bgruening > protease_prediction
diff protease.py @ 0:c7a363d7ab26 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
author | bgruening |
---|---|
date | Sat, 12 Mar 2016 19:28:41 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protease.py Sat Mar 12 19:28:41 2016 -0500 @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +description = """ +Explicit Decomposition with Neighborhood (EDeN) utility program. +Protease modelling driver. +""" + +epilog = """ +Author: Fabrizio Costa +Copyright: 2015 +License: GPL +Maintainer: Fabrizio Costa +Email: costa@informatik.uni-freiburg.de +Status: Production + +Cite: Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise +distance kernel', Proceedings of the 26th International Conference on Machine +Learning. 2010. """ + +import os +import logging + +from eden.graph import Vectorizer +from eden.model_base import ModelInitializerBase, main_script +from eden.converter.fasta import fasta_to_sequence +from eden.modifier.seq import seq_to_seq +from eden.modifier.seq import shuffle_modifier +from eden.modifier.seq import mark_modifier +from eden.converter.fasta import sequence_to_eden + + +class ModelInitializer(ModelInitializerBase): + + def load_data(self, args): + seqs = fasta_to_sequence(args.input_file) + return seqs + + def load_positive_data(self, args): + return self.load_data(args) + + def load_negative_data(self, args): + seqs = self.load_data(args) + return seq_to_seq(seqs, + modifier=shuffle_modifier, + times=args.negative_ratio, + order=args.shuffle_order) + + def pre_processor_init(self, args): + def pre_processor(seqs, **args): + seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%') + seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@') + seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*') + graphs = sequence_to_eden(seqs) + return graphs + + pre_processor_parameters = {} + return pre_processor, pre_processor_parameters + + def vectorizer_init(self, args): + vectorizer = Vectorizer() + vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]} + return vectorizer, vectorizer_parameters + + def add_arguments(self, parser): + parser.add_argument('--version', action='version', version='0.1') + return parser + + def add_arguments_fit(self, parser): + parser.add_argument("-i", "--input-file", + dest="input_file", + help="Path to FASTA file containing input sequences.", + required=True) + parser.add_argument("--negative-ratio", + dest="negative_ratio", + type=int, + help="Relative size ration for the randomly permuted negative instances w.r.t.\ + the positive instances.", + default=2) + parser.add_argument("--shuffle-order", + dest="shuffle_order", + type=int, + help="Order of the k-mer for the random shuffling procedure.", + default=2) + return parser + + def add_arguments_estimate(self, parser): + return self.add_arguments_fit(parser) + +if __name__ == "__main__": + model_initializer = ModelInitializer() + main_script(model_initializer=model_initializer, + description=description, + epilog=epilog, + prog_name=os.path.basename(__file__), + logger=logging.getLogger())