# HG changeset patch
# User tyty
# Date 1413830698 14400
# Node ID 1a92d934f8d17bf4632666e9337eca4a18486111
# Parent 001b4562ac14dd53d2662ba90e54b46e6d47dccd
Uploaded
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/.DS_Store
Binary file predict/.DS_Store has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/._.DS_Store
Binary file predict/._.DS_Store has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/._tool_dependencies.xml
Binary file predict/._tool_dependencies.xml has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/parse_dis_pac.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/parse_dis_pac.py Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,43 @@
+#parse reactivity file into a dictionary
+
+import sys
+
+def parse_dist(in_file):
+ result = []
+ distribution = {}
+ name = []
+ f = open(in_file)
+ for aline in f.readlines():
+ line = aline.strip()
+ dis = line.strip()
+ dist = dis.split('\t') #split the line and the reactivites or reads are in a list
+ if len(dist) > 0:
+ if len(dist) == 1:
+ if dist[0].strip().find('coverage')==-1:
+ name.append(line) #add the name in the name list
+ flag = 1
+ t_name = line
+ else:
+ distri = []
+ for i in range(0, len(dist)):
+ distri.append(dist[i].strip())
+ distribution[t_name] = distri #add the list of reactivities into a dictionary
+ result.append(name)
+ result.append(distribution) #Output the dictionary
+ f.close()
+ return result
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/parse_dis_pac.pyc
Binary file predict/parse_dis_pac.pyc has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/predict_RNAs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.py Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,93 @@
+#RNA structure prediction & Output and illustrate reactivities
+
+import sys
+from parse_dis_pac import *
+from read_file import *
+from Bio import SeqIO
+import os
+from rtts_plot import *
+
+
+id_file = sys.argv[1]
+seq_file = sys.argv[2]
+output_file = sys.argv[4]
+
+
+flag = 0
+if sys.argv[3]!='None': #input reactivity file if provided
+ react_file = sys.argv[3]
+ react = parse_dist(react_file)
+ react = react[1]
+ flag = 1
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+ syspath = syspath+ost[i].strip()
+ syspath = syspath+'/'
+
+ids = read_t_file(id_file)
+sequences = SeqIO.parse(seq_file, 'fasta')
+
+
+seqs = {}
+for seq in sequences:
+ seqs[seq.id] = seq.seq.tostring()
+
+if len(ids)>10: #setup a limit of the number of sequence to be predicted
+ print("Number of sequences exceeds limitation!")
+ sys.exit(0)
+
+
+#predict RNA structures
+os.system("mkdir "+syspath+"output_f")
+for i in range(len(ids)):
+ id_s = ids[i][0]
+ print(id_s)
+ #Put RNA sequence and reactivities into files
+ if id_s in seqs:
+ f = file(syspath+"temp.txt", 'w')
+ f.write('>'+id_s)
+ f.write('\n')
+ f.write(seqs[id_s])
+ f.close()
+ if flag == 0:
+ os.system("Fold "+syspath+"temp.txt"+" "+syspath+"output_f/"+id_s+".ct")
+ if flag == 1:
+ if id_s in react:
+ f = file(syspath+"constraint.txt",'w')
+ make_plot(react[id_s],id_s,(syspath+"output_f/")) #make a plot of the distribution of the reactivites of the input RNA
+ #h = file(syspath+"output_f/transcript_reactivities.txt", 'w')
+ #h.write(id_s)
+ #h.write('\n')
+ for j in range(0, (len(react[id_s]))):
+ if react[id_s][j]!='NA':
+ f.write(str(j+1))
+ f.write('\t')
+ f.write(str(react[id_s][j]))
+ f.write('\n')
+ #h.write(str(react[id_s][j])) #Output the reactivities
+ #h.write('\t')
+ f.close()
+ #h.write('\n')
+ #h.write('\n')
+ os.system("Fold "+syspath+"temp.txt"+" -sh"+" "+syspath+"constraint.txt"+" "+syspath+"output_f/"+id_s+".ct")
+ else:
+ print(id_s+" not in the data of react!")
+ os.system("draw "+syspath+"output_f/"+id_s+".ct "+syspath+"output_f/"+id_s+".ps")
+ else:
+ print(id_s+" not in the data of sequences!")
+
+#Remove the unnecessary files
+os.system("tar -zcvPf "+output_file+" "+syspath+"output_f/"+"*.* 2>"+syspath+"log.txt")
+os.system("rm -f "+syspath+"temp.txt")
+os.system("rm -r "+syspath+"output_f")
+if flag == 1:
+ os.system("rm -f "+syspath+"constraint.txt")
+ # h.close()
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/predict_RNAs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.xml Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,59 @@
+
+
+ predict_RNAs.py $rna_list $reference_file $reactivity_file $output
+
+ biopython
+ numpy
+ matplotlib
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. A file with transcript Ids (Max num. 20), (each ID one line)
+* 2. Reference file (fasta) used to map the reads
+* [Optional]:
+* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
+
+-----
+
+**Output**:
+
+* 1. .ct files with predicted RNA structures [transciptID.ct]
+* 2. .ps files which depict the predicted RNA structures [[transciptID.ps]
+* [Optional]
+* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
+* 4. A .txt file that includes the reactivities of all the nucleotides on the transcripts of interest. [transciptID.txt]
+
+-----
+
+**Attention**
+
+Make sure any of the transcript Ids does not contain "|" or space!
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/predict_RNAs.xml~
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.xml~ Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,63 @@
+
+
+ predict_RNAs.py $rna_list $reference_file $reactivity_file $output
+
+ biopython
+ numpy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. A file with transcript Ids (Max num. 20), (each ID one line)
+* 2. Reference file (fasta) used to map the reads
+* [Optional]:
+* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
+
+-----
+
+**Output**:
+
+* 1. .ct files with predicted RNA structures [transciptID.ct]
+* 2. .ps files which depict the predicted RNA structures [[transciptID.ps]
+* [Optional]
+* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
+* 4. A .txt file that includes the reactivities of all the nucleotides on the transcripts of interest. [transciptID.txt]
+
+-----
+
+**Attention**
+
+Make sure any of the transcript Ids does not contain "|" or space!
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/rRNA.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rRNA.txt Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,8 @@
+>25s rRNA 3375nts
+GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
+>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
+TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
+>Arabidopsis thaliana 1
+GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
+>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
+AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/read_file.py Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/read_file.pyc
Binary file predict/read_file.pyc has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/rtts_plot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rtts_plot.py Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#Make a plot of reactivity distribution
+
+import sys
+import numpy as np
+import matplotlib
+from pylab import *
+import math
+
+#Convert the reactivities (Make NA to 0)
+def convert_react(a):
+ r = []
+ for i in range(len(a)):
+ if a[i]!='NA':
+ r.append(float(a[i]))
+ else:
+ r.append(float(0))
+ return r
+
+
+#Make a plot of the distribution
+def make_plot(ar,id_s,path):
+ N = len(ar)
+ a = convert_react(ar)
+ w = 1
+ ind = np.arange(N)
+
+ fig = figure()
+ fig, ax = subplots()
+ ax.bar(ind+w, a, width = w, color = 'r',edgecolor = 'r')
+ ax.set_ylabel('DMS Reactivity')
+ ax.set_xlabel('Nucleotide Index')
+
+
+ mag = int(math.log(N,10))-1
+ tail = 10**mag
+
+ intervel = int(math.ceil(float(N)/tail)/5)
+ print(N)
+ print(intervel)
+ tl = []
+ k = 0
+ ax.set_xticks(np.arange(0,N,intervel*tail))
+ print(np.arange(0,N,intervel*tail))
+ ax.set_xticklabels(np.arange(0,N,intervel*tail))
+
+ ax.set_title(id_s+" reactivity distribution")
+ savefig(path+id_s+'.tif')
+
+
+
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/rtts_plot.pyc
Binary file predict/rtts_plot.pyc has changed
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/tool_dependencies.xml Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 001b4562ac14 -r 1a92d934f8d1 predict/tool_dependencies.xml~
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/tool_dependencies.xml~ Mon Oct 20 14:44:58 2014 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+