# HG changeset patch # User tyty # Date 1429035447 14400 # Node ID 75e3711e23c481584f081ee109c239ad106f1b44 # Parent 62e8f7adf1ab2b07a1ff43e1b0c996a50d48de37 Uploaded diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._.DS_Store Binary file predict/._.DS_Store has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._ct_to_dot.py Binary file predict/._ct_to_dot.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._dot_convert.py Binary file predict/._dot_convert.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._parse_dis_pac.py Binary file predict/._parse_dis_pac.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._predict_RNAs.py Binary file predict/._predict_RNAs.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._read_file.py Binary file predict/._read_file.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/._rtts_plot.py Binary file predict/._rtts_plot.py has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/ct_to_dot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/ct_to_dot.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import shlex +import os +import subprocess +from read_file import * + +ct_file = sys.argv[1] +path = sys.argv[2] +id_s = sys.argv[3] +result_file = sys.argv[4] + +h = file(result_file, 'w') +os.system('grep "'+id_s+'" '+ct_file+' |wc -l > '+path+'/count.txt') +count = read_t_file(path+'/count.txt') +comm = '' +for i in range(int(count[0][0])): + command = shlex.split('ct2dot %s %s %s' % (ct_file, str(i+1), os.path.join(path, 'db_file_%s.dbnn' % str(i+1)))) + subprocess.call(command) + comm = comm +' '+path+'/db_file_'+str(i+1)+'.dbnn' + + + +os.system('cat'+comm+' > '+result_file) +for i in range(int(count[0][0])): + command = shlex.split('rm %s' % (os.path.join(path, 'db_file_%s.dbnn' % str(i+1)))) + subprocess.call(command) +command = shlex.split('rm %s' % (os.path.join(path, 'count.txt'))) +subprocess.call(command) + + + +h.close() + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/dot_convert.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/dot_convert.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + +dot_file = sys.argv[1] +result_file = sys.argv[2] + +h = file(result_file, 'w') +f = open(dot_file) + + + +for aline in f.readlines(): + line = aline.strip() + if line.find('>')!=-1: + id_line = line + idt = id_line.split('>') + ids = idt[1].strip() + else: + if line.find('(')!=-1: + structure_line = line + st = structure_line.split(' ') + structure = st[0].strip() + enert = st[1].strip() + if len(enert)>1: + enertt = enert.split('(') + enertt = enertt[1].strip() + else: + enertt = st[2].strip() + enerttt = enertt.split(')') + ener = enerttt[0].strip() + h.write('>ENERGY = '+ener+' '+ids+'\n') + h.write(seq+'\n') + h.write(structure+'\n') + else: + seq = line + + + + + +f.close() +h.close() + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/parse_dis_pac.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/parse_dis_pac.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,43 @@ +#parse reactivity file into a dictionary + +import sys + +def parse_dist(in_file): + result = [] + distribution = {} + name = [] + f = open(in_file) + for aline in f.readlines(): + line = aline.strip() + dis = line.strip() + dist = dis.split('\t') #split the line and the reactivites or reads are in a list + if len(dist) > 0: + if len(dist) == 1: + if dist[0].strip().find('coverage')==-1: + name.append(line) #add the name in the name list + flag = 1 + t_name = line + else: + distri = [] + for i in range(0, len(dist)): + distri.append(dist[i].strip()) + distribution[t_name] = distri #add the list of reactivities into a dictionary + result.append(name) + result.append(distribution) #Output the dictionary + f.close() + return result + + + + + + + + + + + + + + + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/parse_dis_pac.pyc Binary file predict/parse_dis_pac.pyc has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/predict_RNAs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/predict_RNAs.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,219 @@ +#RNA structure prediction & Output and illustrate reactivities + +import sys +import shlex +import subprocess +import tarfile +from parse_dis_pac import * +from read_file import * +from Bio import SeqIO +import os +from rtts_plot import * +import random +import string + + +id_file = sys.argv[1] +seq_file = sys.argv[2] +predict_type = sys.argv[3] +temperature = sys.argv[4] +predict_program = sys.argv[5] +output_html = sys.argv[6] +output_directory = sys.argv[7] + + + +flag = False +if predict_type!='silico': #input reactivity file if provided + if predict_program == 'rs': + react_file = sys.argv[8] + slope = sys.argv[9] + intercept = sys.argv[10] + else: + react_file = sys.argv[8] + thres_h = sys.argv[9] + thres_h = float(thres_h) + thres_l = sys.argv[10] + thres_l = float(thres_l) + gqs = sys.argv[11] + gqs = int(gqs) + + react = parse_dist(react_file) + react = react[1] + flag = True +else: + if predict_program!='rs': + gqs = sys.argv[8] + gqs = int(gqs) + + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspathpt = "" +for i in range(len(ost)-1): + syspathpt = syspathpt+ost[i].strip() + syspathpt = syspathpt+'/' + + +syspath = os.getcwd() + +ids = read_t_file(id_file) +sequences = SeqIO.parse(seq_file, 'fasta') + + +seqs = {} +for seq in sequences: + seqs[seq.id] = seq.seq.tostring() + +if len(ids)>100: #setup a limit of the number of sequence to be predicted + print("Number of sequences exceeds limitation!") + sys.exit(0) + + +#predict RNA structures + +os.mkdir(output_directory) +flag3 = 0 + +id_predicted = set() +for i in range(len(ids)): + flag2 = 0 + id_s = ids[i][0] + #print(id_s) + #Put RNA sequence and reactivities into files + if id_s in seqs: + fh = file(os.path.join(syspath,"temp.txt"), 'w') + fh.write('>'+id_s) + fh.write('\n') + fh.write(seqs[id_s]) + fh.close() + if not flag: + if predict_program == 'rs': + command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) + subprocess.call(command) + command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) + subprocess.call(command) + else: + if gqs: + os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') + + else: + os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') + command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) + subprocess.call(command) + if not gqs: + command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) + else: + command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) + subprocess.call(command) + command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) + subprocess.call(command) + else: + if id_s in react: + fh = file(os.path.join(syspath, "constraint.txt"), 'w') + make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA + if predict_program == 'rs': + for j in range(0, (len(react[id_s]))): + if react[id_s][j]!='NA': + fh.write(str(j+1)) + fh.write('\t') + fh.write(str(react[id_s][j])) + fh.write('\n') + fh.close() + command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), + os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, + os.path.join(output_directory, "%s.ct" % id_s))) + subprocess.call(command) + command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) + subprocess.call(command) + else: + fh.write('>'+id_s) + fh.write('\n') + fh.write(seqs[id_s]) + fh.write('\n') + for j in range(0, (len(react[id_s]))): + if react[id_s][j]!='NA': + re = float(react[id_s][j]) + if re>thres_h: + fh.write('x') + else: + if re '+output_directory+'/'+id_s+'.dbnb') + + else: + os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') + command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) + subprocess.call(command) + if not gqs: + command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) + else: + command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) + subprocess.call(command) + command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) + subprocess.call(command) + + else: + print(id_s+" not in the data of react!") + flag2 = 1 + if flag2 == 0: + if predict_program == 'rs': + command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) + subprocess.call(command) + command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) + subprocess.call(command) + else: + if not gqs: + command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) + subprocess.call(command) + command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) + subprocess.call(command) + flag3 = 1 + id_predicted.add(id_s) + else: + print(id_s+" not in the data of sequences!") + +#Remove the unnecessary files +if flag3 == 1: + + tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:') + for filename in os.listdir(output_directory): + filepath = os.path.join(output_directory, filename) + print filepath + tarball.add(filepath, arcname=filename) + #print os.listdir(syspath) + #print os.listdir(output_directory) + # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) + tarball.close() + + h = open(output_html, 'wb' ) + h.write('<h1>Results of RNA structure prediction</h1>\n') + + h.write('

\n') + h.write('Click here to download the compressed file containing all prediction results.\n' % (('prediction_results.tar'))) + #h.write('<\p>\n') + h.write('


\n') + + + for id_p in id_predicted: + h.write('

'+id_p+'

\n' ) + h.write('
\n') + h.write( '\n' ) + h.close() + + + + + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/predict_RNAs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/predict_RNAs.xml Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,127 @@ + + predict RNA structures with or without experimental constraints from the Reactivity Calculation module + + #if $program.wh == "rs" + #if $program.rs_reactivity.type == "restraint" + predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output $output.files_path $program.rs_reactivity.reactivity_file $program.rs_reactivity.slope $program.rs_reactivity.intercept + #else + predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output $output.files_path + #end if + #else + #if $program.vp_reactivity.type == "restraint" + predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $output.files_path $program.vp_reactivity.reactivity_file $program.vp_reactivity.threshold_high $program.vp_reactivity.threshold_low $program.gqs + #else + predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $output.files_path $program.gqs + #end if + #end if + + + + + + + + + rnastructure + biopython + numpy + imaging + matplotlib + vienna_rna + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Function** + +RNA Structure Prediction uses the RNAstructure program (V5.6) and ViennaRNA package (V2.1.9) to predict RNA structures without restraints (in silico) or with restraints from structural reactivities, as provided by the Reactivity Calculation module. Users can designate the temperature under which to predict the RNA structures. + +----- + +**Input**: + +* 1. A file with transcript Ids (Max num. 100), (each ID one line) +* 2. Reference file (fasta) used to map the reads to +* 3. Temperature for RNA structure prediction +* [Optional]: +* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided +* /RNAstructure prediction mode/ +* 2. Slope used with structural restraints (default 1.8) +* 3. Intercept used with structural restraints (default -0.6) +* /ViennaRNA package prediction mode/ +* 2. Flag that determines whether to incoorporate G-Quadruplex prediction +* 3. High reactivity threshold (Any nucleotide with structural reactivity that is higher than it will be constrainted as single stranded) (default 0.6) +* 4. Low reactivity threshold (Any nucleotide with structural reactivity that is lower than it will be constrainted as double stranded) (default 0.3) + +----- + +**Output**: + +* 1. Dot bracket files with predicted RNA structures [transciptID.dbn] +* 2. .ps files which depict the predicted RNA structures [transciptID.ps] +* [Optional] +* 3. .tif files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.tif] + +----- + +**Attention** + +Make sure that none of the transcript Ids contains a "|" or a space! + +----- + +**Backend program**: + +* 1. This module uses RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/) as the backend programs to predict RNA structures. +* 2. Default parameters are used for RNAstructure and ViennaRNA package except -T (Temperature), -sm (slope used with SHAPE restraints [RNAstructure prediction mode]), -si (intercept used with SHAPE restraints [RNAstructure prediction mode]) and thresholds for high and low reactivity [ViennaRNA package prediciton mode], for which users can specify the value + + + + + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/read_file.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/read_file.pyc Binary file predict/read_file.pyc has changed diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/rtts_plot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict/rtts_plot.py Tue Apr 14 14:17:27 2015 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/env python +#Make a plot of reactivity distribution + +import sys +import os +import numpy as np +import matplotlib +from pylab import * +import math + +#Convert the reactivities (Make NA to 0) +def convert_react(a): + r = [] + for i in range(len(a)): + if a[i]!='NA': + r.append(float(a[i])) + else: + r.append(float(0)) + return r + + +#Make a plot of the distribution +def make_plot(ar,id_s,path): + font = {'family' : 'normal', + 'weight' : 'bold', + 'size' : 16} + matplotlib.rc('font', **font) + N = len(ar) + a = convert_react(ar) + w = 1 + ind = np.arange(N) + + fig = figure() + fig, ax = subplots() + ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black') + ax.set_ylabel('Final Structural Reactivity (FSR)') + ax.set_xlabel('Nucleotide Number') + + + mag = int(math.log(N,10))-1 + tail = 10**mag + + intervel = int(math.ceil(float(N)/tail/5)) + tl = [] + k = 0 + upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1 + ax.set_xticks(np.arange(0,upmax,intervel*tail)) + ax.set_xticklabels(np.arange(0,upmax,intervel*tail)) + savefig(os.path.join(path, id_s+'.tif')) + + + + + + + + diff -r 62e8f7adf1ab -r 75e3711e23c4 predict/rtts_plot.pyc Binary file predict/rtts_plot.pyc has changed