# HG changeset patch # User rnateam # Date 1469699725 14400 # Node ID a609d6dc8047fe528f3fc6245cf680fe5cfcb830 # Parent ecf125a1ad73b6d38d3f75c2c1192b05921d6220 planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4 diff -r ecf125a1ad73 -r a609d6dc8047 data.py --- a/data.py Tue Jul 19 10:22:02 2016 -0400 +++ b/data.py Thu Jul 28 05:55:25 2016 -0400 @@ -4,8 +4,6 @@ import pandas as pd -from theano import config - __author__ = "Gianluca Corrado" __copyright__ = "Copyright 2016, Gianluca Corrado" __license__ = "MIT" @@ -30,10 +28,10 @@ fr : str The name of the HDF5 file containing features for the RNAs. """ - self.Fp = fp.astype(config.floatX) + self.Fp = fp.astype('float32') store = pd.io.pytables.HDFStore(fr) - self.Fr = store.features.astype(config.floatX) + self.Fr = store.features.astype('float32') store.close() def load(self): @@ -74,9 +72,9 @@ protein_input_dim = self.Fp.shape[0] rna_input_dim = self.Fr.shape[0] num_examples = self.Fp.shape[1] * self.Fr.shape[1] - p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX) + p = np.zeros((num_examples, protein_input_dim)).astype('float32') p_names = [] - r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX) + r = np.zeros((num_examples, rna_input_dim)).astype('float32') r_names = [] index = 0 for protein in self.Fp.columns: diff -r ecf125a1ad73 -r a609d6dc8047 main.py --- a/main.py Tue Jul 19 10:22:02 2016 -0400 +++ b/main.py Thu Jul 28 05:55:25 2016 -0400 @@ -7,8 +7,6 @@ from data import PredictDataset from recommend import Predictor -from theano import config - __author__ = "Gianluca Corrado" __copyright__ = "Copyright 2016, Gianluca Corrado" __license__ = "MIT" @@ -16,7 +14,6 @@ __email__ = "gianluca.corrado@unitn.it" __status__ = "Production" -config.floatX = 'float32' if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -46,4 +43,6 @@ output="output.txt") P.predict() else: - sys.exit("""The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.""") + sys.stdout.write(""" + The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted. + """) diff -r ecf125a1ad73 -r a609d6dc8047 model.py --- a/model.py Tue Jul 19 10:22:02 2016 -0400 +++ b/model.py Thu Jul 28 05:55:25 2016 -0400 @@ -5,7 +5,7 @@ import numpy as np -from theano import config, function, shared +from theano import function, shared import theano.tensor as T __author__ = "Gianluca Corrado" @@ -61,25 +61,25 @@ self.lambda_reg = lambda_reg np.random.seed(seed) # explictit features for proteins - fp = T.matrix("Fp", dtype=config.floatX) + fp = T.matrix("Fp", dtype='float32') # explictit features for RNAs - fr = T.matrix("Fr", dtype=config.floatX) + fr = T.matrix("Fr", dtype='float32') # Correct label y = T.vector("y") # projection matrix for proteins self.Ap = shared(((.5 - np.random.rand(kp, sp)) * - irange).astype(config.floatX), name="Ap") + irange).astype('float32'), name="Ap") self.bp = shared(((.5 - np.random.rand(kp)) * - irange).astype(config.floatX), name="bp") + irange).astype('float32'), name="bp") # projection matrix for RNAs self.Ar = shared(((.5 - np.random.rand(kr, sr)) * - irange).astype(config.floatX), name="Ar") + irange).astype('float32'), name="Ar") self.br = shared(((.5 - np.random.rand(kr)) * - irange).astype(config.floatX), name="br") + irange).astype('float32'), name="br") # generalization matrix self.B = shared(((.5 - np.random.rand(kp, kr)) * - irange).astype(config.floatX), name="B") + irange).astype('float32'), name="B") # Latent space for proteins p = T.nnet.sigmoid(T.dot(fp, self.Ap.T) + self.bp) diff -r ecf125a1ad73 -r a609d6dc8047 rbpfeatures.py --- a/rbpfeatures.py Tue Jul 19 10:22:02 2016 -0400 +++ b/rbpfeatures.py Thu Jul 28 05:55:25 2016 -0400 @@ -1,6 +1,7 @@ """Compute the RBP features.""" import re +import sys import subprocess as sp import uuid from os import mkdir @@ -57,6 +58,9 @@ fasta = fasta_utils.import_fasta(self.fasta) + if len(fasta) != 1: + sys.exit("""Fasta file must contain exactly one sequence.""") + for rbp in sorted(fasta.keys()): seq = fasta[rbp] text = pfam_utils.sequence_search(rbp, seq) diff -r ecf125a1ad73 -r a609d6dc8047 recommend.py --- a/recommend.py Tue Jul 19 10:22:02 2016 -0400 +++ b/recommend.py Thu Jul 28 05:55:25 2016 -0400 @@ -53,6 +53,8 @@ """Predict interaction values.""" # predict the y_hat (p, p_names, r, r_names) = self.predict_dataset + assert p.dtype == 'float32' + assert r.dtype == 'float32' y_hat = self.model.predict(p, r) # sort the interactions according to y_hat ordering = sorted(range(len(y_hat)), diff -r ecf125a1ad73 -r a609d6dc8047 rnacommender.xml --- a/rnacommender.xml Tue Jul 19 10:22:02 2016 -0400 +++ b/rnacommender.xml Thu Jul 28 05:55:25 2016 -0400 @@ -1,20 +1,21 @@ - files into a collection + genome-wide recommendation of RNA-protein interactions sam numpy + scipy pandas pytables - theano + theano requests hide.txt && + THEANO_FLAGS=base_compiledir=./tmp python $__tool_directory__/main.py "$infile" ]]> - +