# HG changeset patch
# User bgruening
# Date 1457828921 18000
# Node ID c7a363d7ab26e2a7a1210cd282e47ff2708238f8
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/protease_prediction commit e933135e5dc9aa8c96800fd10b62b256ac3a8523-dirty
diff -r 000000000000 -r c7a363d7ab26 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff -r 000000000000 -r c7a363d7ab26 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,34 @@
+
+ 0.9
+
+
+ eden
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.5281/zenodo.27945
+
+
+
diff -r 000000000000 -r c7a363d7ab26 protease.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/protease.py Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+description = """
+Explicit Decomposition with Neighborhood (EDeN) utility program.
+Protease modelling driver.
+"""
+
+epilog = """
+Author: Fabrizio Costa
+Copyright: 2015
+License: GPL
+Maintainer: Fabrizio Costa
+Email: costa@informatik.uni-freiburg.de
+Status: Production
+
+Cite: Costa, Fabrizio, and Kurt De Grave, 'Fast neighborhood subgraph pairwise
+distance kernel', Proceedings of the 26th International Conference on Machine
+Learning. 2010. """
+
+import os
+import logging
+
+from eden.graph import Vectorizer
+from eden.model_base import ModelInitializerBase, main_script
+from eden.converter.fasta import fasta_to_sequence
+from eden.modifier.seq import seq_to_seq
+from eden.modifier.seq import shuffle_modifier
+from eden.modifier.seq import mark_modifier
+from eden.converter.fasta import sequence_to_eden
+
+
+class ModelInitializer(ModelInitializerBase):
+
+ def load_data(self, args):
+ seqs = fasta_to_sequence(args.input_file)
+ return seqs
+
+ def load_positive_data(self, args):
+ return self.load_data(args)
+
+ def load_negative_data(self, args):
+ seqs = self.load_data(args)
+ return seq_to_seq(seqs,
+ modifier=shuffle_modifier,
+ times=args.negative_ratio,
+ order=args.shuffle_order)
+
+ def pre_processor_init(self, args):
+ def pre_processor(seqs, **args):
+ seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%')
+ seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@')
+ seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*')
+ graphs = sequence_to_eden(seqs)
+ return graphs
+
+ pre_processor_parameters = {}
+ return pre_processor, pre_processor_parameters
+
+ def vectorizer_init(self, args):
+ vectorizer = Vectorizer()
+ vectorizer_parameters = {'complexity': [2, 3, 4, 5, 6]}
+ return vectorizer, vectorizer_parameters
+
+ def add_arguments(self, parser):
+ parser.add_argument('--version', action='version', version='0.1')
+ return parser
+
+ def add_arguments_fit(self, parser):
+ parser.add_argument("-i", "--input-file",
+ dest="input_file",
+ help="Path to FASTA file containing input sequences.",
+ required=True)
+ parser.add_argument("--negative-ratio",
+ dest="negative_ratio",
+ type=int,
+ help="Relative size ration for the randomly permuted negative instances w.r.t.\
+ the positive instances.",
+ default=2)
+ parser.add_argument("--shuffle-order",
+ dest="shuffle_order",
+ type=int,
+ help="Order of the k-mer for the random shuffling procedure.",
+ default=2)
+ return parser
+
+ def add_arguments_estimate(self, parser):
+ return self.add_arguments_fit(parser)
+
+if __name__ == "__main__":
+ model_initializer = ModelInitializer()
+ main_script(model_initializer=model_initializer,
+ description=description,
+ epilog=epilog,
+ prog_name=os.path.basename(__file__),
+ logger=logging.getLogger())
diff -r 000000000000 -r c7a363d7ab26 protease.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/protease.xml Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,81 @@
+
+ based on cleavage sites
+
+ macros.xml
+
+
+
+ echo "@VERSION@"
+
+
+
+
+
+
+
+
+
+ selected_tasks['selected_task'] == 'predict'
+
+
+ selected_tasks['selected_task'] == 'fit'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CTSL1
+ SSFVSNWD
+ >CTSL1
+ SSIQATTA
+ >CTSL1
+ SSLAGCQI
+ >CTSL1
+ SSLGGTVV
+
+
+ ]]>
+
+
diff -r 000000000000 -r c7a363d7ab26 test-data/CTSL_test.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTSL_test.fasta Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,150 @@
+>a
+SSFVSNWD
+>b
+SSIQATTA
+>c
+SSLAGCQI
+>d
+SSLGGTVV
+>e
+SSLQDCLH
+>f
+SSPAGGHA
+>g
+SSVGNVAD
+>h
+SSYVHGGV
+>i
+STFEERSY
+>j
+TFPKASVP
+>k
+TFVNITPA
+>l
+TGFAGIDS
+>m
+TGFEISSS
+>n
+TGFGMIYD
+>o
+TGLRDPFN
+>p
+TGLTQIET
+>q
+THYFLPPD
+>r
+TKAQAAAP
+>s
+TLIVRPDN
+>t
+TLLNQAPD
+>u
+TLVQTQVE
+>v
+TLWTSDMQ
+>w
+TPFAATSS
+>x
+TPVATSPT
+>y
+TQVHGTIT
+>z
+TRVSHFLP
+>aa
+TSFNGHKP
+>ab
+TSVGSVNP
+>ac
+TSYQSPHG
+>ad
+TTLSGTAP
+>ae
+TTMGGPLP
+>af
+TTVNGQSP
+>ag
+TTVSNSQQ
+>ah
+TVFAEHIS
+>ai
+TVFFDIAV
+>aj
+TVIGGGDT
+>ak
+TVVMASKG
+>al
+TYPQWQPP
+>am
+VAFCDAQS
+>an
+VAFTQVNS
+>ao
+VAVAGCCH
+>ap
+VAVSAAPG
+>aq
+VAYVSFGP
+>ar
+VDIEAIFS
+>as
+VDLSHPGV
+>at
+VELNGNQP
+>au
+VEVLAGHG
+>av
+VFFDIAVD
+>aw
+VFVGGLSP
+>ax
+VGAGGPAP
+>ay
+VGFLEGGK
+>az
+VGFSSGTE
+>bb
+VGINYQPP
+>rr
+VGLTSIAN
+>ss
+VGVSGSET
+>ee
+VHIQAGQC
+>ww
+VHYGEVTN
+>qq
+VIFQGTDH
+>tt
+VIISAPSA
+>zz
+VIITGPPE
+>uu
+VILESDPQ
+>ii
+VILGSEAA
+>oo
+VILHLKED
+>pp
+VLAMSGDP
+>ll
+VLIEHIGN
+>kk
+VLLEGNPD
+>jj
+VLLQAGAD
+>hh
+VLPRSAKE
+>gg
+VLVERSAA
+>ff
+VMIQDGPQ
+>nn
+VMLGETNP
+>bb
+VNIGSIST
+>bn
+VNLQHLDL
+>mm
+VPLGSEKP
+>cc
+VPVTGIPP
diff -r 000000000000 -r c7a363d7ab26 test-data/CTSL_train.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CTSL_train.fasta Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,100 @@
+>CTSL1
+AALAAAPA
+>CTSL1
+AALAHISG
+>CTSL1
+AAMAASPH
+>CTSL1
+AAPGSAAP
+>CTSL1
+AARKSAPA
+>CTSL1
+AASGSPGP
+>CTSL1
+AATQGAAA
+>CTSL1
+AAVGGVFD
+>CTSL1
+ACLEKPLL
+>CTSL1
+ADYESVNE
+>CTSL1
+AEIGQNHQ
+>CTSL1
+AESESLVN
+>CTSL1
+AFVNQHLC
+>CTSL1
+AGCTSAGP
+>CTSL1
+AGIATHFV
+>CTSL1
+AGIQHSCQ
+>CTSL1
+AGLESGAE
+>CTSL1
+AGLVSPSL
+>CTSL1
+AGSFGGAG
+>CTSL1
+AGVGEFEA
+>CTSL1
+AGVNTVTT
+>CTSL1
+AGWMGLDC
+>CTSL1
+AGYLGQVT
+>CTSL1
+AHFGIHEE
+>CTSL1
+AHLDITPN
+>CTSL1
+AHLKNSQE
+>CTSL1
+AHLMEIQV
+>CTSL1
+AHLQTSHK
+>CTSL1
+AIFGRPVV
+>CTSL1
+AIICGSGL
+>CTSL1
+AIPMSIPP
+>CTSL1
+AIYEGQLG
+>CTSL1
+AKVKAQTA
+>CTSL1
+ALEYATDT
+>CTSL1
+ALGHRPIP
+>CTSL1
+ALKPMYSM
+>CTSL1
+ALLELQLE
+>CTSL1
+ALLGGHQG
+>CTSL1
+ALLSSAVD
+>CTSL1
+ALVAEEHL
+>CTSL1
+ALVLGGVD
+>CTSL1
+ALVQHQEW
+>CTSL1
+ALVTGGEI
+>CTSL1
+ALWDTAGQ
+>CTSL1
+ALYLVCGE
+>CTSL1
+AMLGNSED
+>CTSL1
+AMLSGPGQ
+>CTSL1
+ANIAHGNS
+>CTSL1
+ANLTQSQI
+>CTSL1
+ANVGAVPS
diff -r 000000000000 -r c7a363d7ab26 test-data/model
Binary file test-data/model has changed
diff -r 000000000000 -r c7a363d7ab26 test-data/predictions.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/predictions.txt Sat Mar 12 19:28:41 2016 -0500
@@ -0,0 +1,75 @@
+1 714865162965.0 a
+-1 -2.0633772184e+12 b
+-1 -2.63278832465e+12 c
+-1 -2.30657489269e+12 d
+-1 -1.60666238581e+12 e
+-1 -5.64892007591e+12 f
+1 1.19958430313e+12 g
+-1 -4.81891904858e+12 h
+-1 -4.26115839421e+12 i
+-1 -2.01451585778e+12 j
+-1 -3.18448213118e+12 k
+-1 -5.32148298316e+12 l
+-1 -4.25594148364e+12 m
+-1 -5.05361918097e+12 n
+-1 -2.81407147475e+12 o
+-1 -743476285794.0 p
+-1 -1.28450200191e+12 q
+-1 -6.82098953196e+12 r
+-1 -911697110363.0 s
+-1 -1.41018885051e+12 t
+1 1.54489789585e+12 u
+1 15904035492.6 v
+-1 -7.1604898574e+12 w
+-1 -291097086285.0 x
+-1 -2.94082503016e+12 y
+-1 -1.73028072922e+12 z
+-1 -1.92238905582e+12 aa
+-1 -635673300943.0 ab
+-1 -486766774604.0 ac
+-1 -1.11318146795e+12 ad
+-1 -3.65821042965e+12 ae
+-1 -114610205054.0 af
+-1 -510138596388.0 ag
+-1 -6.65599199641e+12 ah
+-1 -4.13413986663e+12 ai
+-1 -5.8294381292e+12 aj
+-1 -3.52307285487e+12 ak
+-1 -1.63846242641e+12 al
+-1 -6.2381237974e+12 am
+1 1.56329451125e+12 an
+-1 -3.41757523005e+12 ao
+-1 -3.69981770962e+12 ap
+-1 -1.26491397758e+12 aq
+-1 -6.1732488464e+12 ar
+-1 -2.93027667881e+12 as
+-1 -1.23589278355e+12 at
+-1 -7.81321990096e+12 au
+-1 -3.37867184582e+12 av
+1 1.81255065566e+12 aw
+-1 -5.8103087454e+12 ax
+-1 -7.64938989051e+12 ay
+-1 -2.56010386139e+12 az
+-1 -2.19510046853e+12 bb
+-1 -1.38509574184e+12 rr
+-1 -1.82551763609e+12 ss
+-1 -2.22551450346e+12 ee
+-1 -4.51255078762e+12 ww
+-1 -3.36285574975e+12 qq
+-1 -3.07010023516e+12 tt
+-1 -1.27965891837e+12 zz
+-1 -1.32001091916e+12 uu
+-1 -1.91484366367e+12 ii
+-1 -3.10115319124e+12 oo
+-1 -7.10850199103e+12 pp
+-1 -4.95385785405e+12 ll
+-1 -1.40493423999e+12 kk
+-1 -3.5605949667e+12 jj
+-1 -2.88491677858e+12 hh
+-1 -3.71463321771e+12 gg
+-1 -3.30053101487e+12 ff
+-1 -3.04988922726e+12 nn
+1 50271977527.9 bb
+1 772008596347.0 bn
+-1 -554618958861.0 mm
+-1 -1.30728155546e+12 cc