Mercurial > repos > bgruening > protease_prediction
view protease.py @ 0:c7a363d7ab26 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
author | bgruening |
---|---|
date | Sat, 12 Mar 2016 19:28:41 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python description = """ Explicit Decomposition with Neighborhood (EDeN) utility program. Protease modelling driver. """ epilog = """ Author: Fabrizio Costa Copyright: 2015 License: GPL Maintainer: Fabrizio Costa Email: costa@informatik.uni-freiburg.de Status: Production Cite: Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise distance kernel', Proceedings of the 26th International Conference on Machine Learning. 2010. """ import os import logging from eden.graph import Vectorizer from eden.model_base import ModelInitializerBase, main_script from eden.converter.fasta import fasta_to_sequence from eden.modifier.seq import seq_to_seq from eden.modifier.seq import shuffle_modifier from eden.modifier.seq import mark_modifier from eden.converter.fasta import sequence_to_eden class ModelInitializer(ModelInitializerBase): def load_data(self, args): seqs = fasta_to_sequence(args.input_file) return seqs def load_positive_data(self, args): return self.load_data(args) def load_negative_data(self, args): seqs = self.load_data(args) return seq_to_seq(seqs, modifier=shuffle_modifier, times=args.negative_ratio, order=args.shuffle_order) def pre_processor_init(self, args): def pre_processor(seqs, **args): seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%') seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@') seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*') graphs = sequence_to_eden(seqs) return graphs pre_processor_parameters = {} return pre_processor, pre_processor_parameters def vectorizer_init(self, args): vectorizer = Vectorizer() vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]} return vectorizer, vectorizer_parameters def add_arguments(self, parser): parser.add_argument('--version', action='version', version='0.1') return parser def add_arguments_fit(self, parser): parser.add_argument("-i", "--input-file", dest="input_file", help="Path to FASTA file containing input sequences.", required=True) parser.add_argument("--negative-ratio", dest="negative_ratio", type=int, help="Relative size ration for the randomly permuted negative instances w.r.t.\ the positive instances.", default=2) parser.add_argument("--shuffle-order", dest="shuffle_order", type=int, help="Order of the k-mer for the random shuffling procedure.", default=2) return parser def add_arguments_estimate(self, parser): return self.add_arguments_fit(parser) if __name__ == "__main__": model_initializer = ModelInitializer() main_script(model_initializer=model_initializer, description=description, epilog=epilog, prog_name=os.path.basename(__file__), logger=logging.getLogger())