# HG changeset patch
# User tyty
# Date 1418112210 18000
# Node ID 63c41304b2210e1b3f63448bd87ce80f77c7d8f0
# Parent 1c325ff557d93db7e9d2b18305ade522b1020038
Uploaded
diff -r 1c325ff557d9 -r 63c41304b221 predict/.DS_Store
Binary file predict/.DS_Store has changed
diff -r 1c325ff557d9 -r 63c41304b221 predict/._predict_RNAs.xml
Binary file predict/._predict_RNAs.xml has changed
diff -r 1c325ff557d9 -r 63c41304b221 predict/parse_dis_pac.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/parse_dis_pac.py Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,43 @@
+#parse reactivity file into a dictionary
+
+import sys
+
+def parse_dist(in_file):
+ result = []
+ distribution = {}
+ name = []
+ f = open(in_file)
+ for aline in f.readlines():
+ line = aline.strip()
+ dis = line.strip()
+ dist = dis.split('\t') #split the line and the reactivites or reads are in a list
+ if len(dist) > 0:
+ if len(dist) == 1:
+ if dist[0].strip().find('coverage')==-1:
+ name.append(line) #add the name in the name list
+ flag = 1
+ t_name = line
+ else:
+ distri = []
+ for i in range(0, len(dist)):
+ distri.append(dist[i].strip())
+ distribution[t_name] = distri #add the list of reactivities into a dictionary
+ result.append(name)
+ result.append(distribution) #Output the dictionary
+ f.close()
+ return result
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1c325ff557d9 -r 63c41304b221 predict/parse_dis_pac.pyc
Binary file predict/parse_dis_pac.pyc has changed
diff -r 1c325ff557d9 -r 63c41304b221 predict/predict_RNAs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.py Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,102 @@
+#RNA structure prediction & Output and illustrate reactivities
+
+import sys
+import shlex
+import subprocess
+import tarfile
+from parse_dis_pac import *
+from read_file import *
+from Bio import SeqIO
+import os
+from rtts_plot import *
+import random
+import string
+
+
+id_file = sys.argv[1]
+seq_file = sys.argv[2]
+predict_type = sys.argv[3]
+temperature = sys.argv[4]
+output_file = sys.argv[5]
+
+
+flag = False
+if predict_type!='silico': #input reactivity file if provided
+ react_file = sys.argv[6]
+ slope = sys.argv[7]
+ intercept = sys.argv[8]
+ react = parse_dist(react_file)
+ react = react[1]
+ flag = True
+
+syspath = os.getcwd()
+
+ids = read_t_file(id_file)
+sequences = SeqIO.parse(seq_file, 'fasta')
+
+
+seqs = {}
+for seq in sequences:
+ seqs[seq.id] = seq.seq.tostring()
+
+if len(ids)>100: #setup a limit of the number of sequence to be predicted
+ print("Number of sequences exceeds limitation!")
+ sys.exit(0)
+
+
+#predict RNA structures
+output_directory = os.path.join(syspath, "output_files")
+if not os.path.exists(output_directory):
+ os.makedirs(output_directory)
+for i in range(len(ids)):
+ flag2 = 0
+ id_s = ids[i][0]
+ #print(id_s)
+ #Put RNA sequence and reactivities into files
+ if id_s in seqs:
+ fh = file(os.path.join(syspath,"temp.txt"), 'w')
+ fh.write('>'+id_s)
+ fh.write('\n')
+ fh.write(seqs[id_s])
+ fh.close()
+ if not flag:
+ command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
+ subprocess.call(command)
+ else:
+ if id_s in react:
+ fh = file(os.path.join(syspath, "constraint.txt"), 'w')
+ make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
+ for j in range(0, (len(react[id_s]))):
+ if react[id_s][j]!='NA':
+ fh.write(str(j+1))
+ fh.write('\t')
+ fh.write(str(react[id_s][j]))
+ fh.write('\n')
+ #h.write(str(react[id_s][j])) #Output the reactivities
+ #h.write('\t')
+ fh.close()
+ #h.write('\n')
+ #h.write('\n')
+ command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
+ os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
+ os.path.join(output_directory, "%s.ct" % id_s)))
+ subprocess.call(command)
+ else:
+ print(id_s+" not in the data of react!")
+ flag2 = 1
+ if flag2 == 0:
+ command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
+ subprocess.call(command)
+ else:
+ print(id_s+" not in the data of sequences!")
+
+#Remove the unnecessary files
+tarball = tarfile.open(output_file, 'w:')
+for filename in os.listdir(output_directory):
+ filepath = os.path.join(output_directory, filename)
+ print filepath
+ tarball.add(filepath, arcname=filename)
+#print os.listdir(syspath)
+#print os.listdir(output_directory)
+# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
+tarball.close()
diff -r 1c325ff557d9 -r 63c41304b221 predict/predict_RNAs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.xml Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,79 @@
+
+
+
+ #if $reactivity.type == "restraint"
+ predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output $reactivity.reactivity_file $reactivity.slope $reactivity.intercept
+ #else
+ predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output
+ #end if
+
+
+ biopython
+ numpy
+ matplotlib
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. A file with transcript Ids (Max num. 100), (each ID one line)
+* 2. Reference file (fasta) used to map the reads to
+* 3. Temperature for RNA structure prediction
+* [Optional]:
+* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
+* 2. Slope used with structural restraints (default 1.8)
+* 3. Intercept used with structural restraints (default -0.6)
+
+-----
+
+**Output**:
+
+* 1. .ct files with predicted RNA structures [transciptID.ct]
+* 2. .ps files which depict the predicted RNA structures [[transciptID.ps]
+* [Optional]
+* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
+
+-----
+
+**Attention**
+
+Make sure any of the transcript Ids does not contain "|" or space!
+
+-----
+
+**Backend program**:
+
+* 1. This module is using RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) as the backend program to predict RNA structures.
+* 2. Default parameters are used for RNAstructure expect -T (Temperature), -sm (slope used with SHAPE restraints) and -si (intercept used with SHAPE restraints) which users can specify the value
+
+
+
+
+
diff -r 1c325ff557d9 -r 63c41304b221 predict/rRNA.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rRNA.txt Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,8 @@
+>25s rRNA 3375nts
+GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
+>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
+TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
+>Arabidopsis thaliana 1
+GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
+>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
+AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
diff -r 1c325ff557d9 -r 63c41304b221 predict/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/read_file.py Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 1c325ff557d9 -r 63c41304b221 predict/read_file.pyc
Binary file predict/read_file.pyc has changed
diff -r 1c325ff557d9 -r 63c41304b221 predict/rtts_plot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rtts_plot.py Tue Dec 09 03:03:30 2014 -0500
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#Make a plot of reactivity distribution
+
+import sys
+import os
+import numpy as np
+import matplotlib
+from pylab import *
+import math
+
+#Convert the reactivities (Make NA to 0)
+def convert_react(a):
+ r = []
+ for i in range(len(a)):
+ if a[i]!='NA':
+ r.append(float(a[i]))
+ else:
+ r.append(float(0))
+ return r
+
+
+#Make a plot of the distribution
+def make_plot(ar,id_s,path):
+ font = {'family' : 'normal',
+ 'weight' : 'bold',
+ 'size' : 16}
+ matplotlib.rc('font', **font)
+ N = len(ar)
+ a = convert_react(ar)
+ w = 1
+ ind = np.arange(N)
+
+ fig = figure()
+ fig, ax = subplots()
+ ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black')
+ ax.set_ylabel('Final Structural Reactivity (FSR)')
+ ax.set_xlabel('Nucleotide Number')
+
+
+ mag = int(math.log(N,10))-1
+ tail = 10**mag
+
+ intervel = int(math.ceil(float(N)/tail/5))
+ print(N)
+ print(intervel)
+ tl = []
+ k = 0
+ upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1
+ ax.set_xticks(np.arange(0,upmax,intervel*tail))
+ print(np.arange(0,upmax,intervel*tail))
+ ax.set_xticklabels(np.arange(0,upmax,intervel*tail))
+ savefig(os.path.join(path, id_s+'.tif'))
+
+
+
+
+
+
+
+
diff -r 1c325ff557d9 -r 63c41304b221 predict/rtts_plot.pyc
Binary file predict/rtts_plot.pyc has changed
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/._.DS_Store
Binary file reactivity_cal/._.DS_Store has changed
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/parse_dis_react.py
--- a/reactivity_cal/parse_dis_react.py Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-
-def parse_dist(in_file):
- result = []
- distribution = {}
- name = []
- f = open(in_file)
- flag = 0
- for aline in f.readlines():
- line = aline.strip()
- dis = line.strip()
- dist = dis.split('\t')
- if len(dist) > 0:
- if len(dist) == 1:
- if dist[0].strip().find('coverage')==-1:
- if flag == 0:
- name.append(line)
- flag = 1
- t_name = line
- else:
- distribution[t_name] = 'null'
- name.append(line)
- flag = 1
- t_name = line
- else:
- distri = []
- for i in range(0, len(dist)):
- distri.append(dist[i].strip())
- distribution[t_name] = distri
- flag = 0
- result.append(name)
- result.append(distribution)
- f.close()
- return result
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/parse_dis_react.pyc
Binary file reactivity_cal/parse_dis_react.pyc has changed
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/rRNA.txt
--- a/reactivity_cal/rRNA.txt Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
->25s rRNA 3375nts
-GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
-TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
->Arabidopsis thaliana 1
-GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
-AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/react_cal.py
--- a/reactivity_cal/react_cal.py Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-from react_norm_function import *
-import os
-import random
-import string
-
-
-dist_file1 = sys.argv[1] #plus library
-dist_file2 = sys.argv[2] #minus library
-seq_file = sys.argv[3] #Reference library(genome/cDNA)
-nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
-flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
-threshold = sys.argv[6] #Threshold to cap the reactivities
-output_file = sys.argv[7]
-
-
-distri_p = parse_dist(dist_file1)
-distri_m = parse_dist(dist_file2)
-threshold = float(threshold)
-
-
-syspathrs = os.getcwd()
-
-h = file(syspathrs+"react.txt",'w')
-flag_in = int(flag_in)
-
-seqs = SeqIO.parse(open(seq_file),'fasta');
-nt_s = set()
-for i in range(len(nt_spec)):
- nt_s.add(nt_spec[i])
-
-flag = 0
-trans = []
-distri_p = distri_p[1]
-distri_m = distri_m[1]
-
-#thres = int(threshold)
-
-
-transcripts = {}
-for seq in seqs:
- n = seq.id
- trans.append(n)
- transcripts[n] = seq.seq.tostring()
-
-
-#print(distri_p)
-
-
-for i in range(0, len(trans)):
- h.write(trans[i])
- h.write('\n')
- for j in range(len(distri_p[trans[i]])):
- distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
- for j in range(len(distri_m[trans[i]])):
- distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
- s_p = sum(distri_p[trans[i]])
- s_m = sum(distri_m[trans[i]])
- length = len(distri_p[trans[i]])
- if s_p!= 0 and s_m!= 0:
- r = []
- for j in range(0, len(distri_p[trans[i]])):
- f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
- f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
- raw_react = f_p-f_m
- r.append(max(0, raw_react))
-
- if s_p!= 0 and s_m!= 0:
- for k in range(1,(len(r)-1)):
- if transcripts[trans[i]][k-1] in nt_s:
- h.write(str(r[k]))
- h.write('\t')
- else:
- h.write('NA')
- h.write('\t')
- k = k+1
- if transcripts[trans[i]][k-1] in nt_s:
- h.write(str(r[k]))
- h.write('\n')
- else:
- h.write('NA')
- h.write('\n')
-
-
-h.close()
-
-if flag_in:
- react_norm((syspathrs+"react.txt"),output_file, threshold)
-else:
- h_o = file(output_file, 'w')
- f_i = open(syspathrs+"react.txt")
- for aline in f_i.readlines():
- h_o.write(aline.strip())
- h_o.write('\n')
-os.system("rm -f "+syspathrs+"react.txt")
-
-#os.system("rm -r "+syspathrs)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/react_norm_function.py
--- a/reactivity_cal/react_norm_function.py Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-
-def cap(a,value):
- if a>=value:
- return value
- else:
- return a
-
-def react_norm(react_file, result_file, capped_value):
- print("Normalizing.....")
- react1 = parse_dist(react_file)
- react = react1[1]
- h = file(result_file, 'w')
-
- capped = int(capped_value)
-
- all_react = []
-
-
- for t in react:
- if react[t]!='null':
- for i in range(len(react[t])):
- if react[t][i]!='NA':
- all_react.append(float(react[t][i]))
-
-
- all_react.sort(reverse = True)
-
-
- eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
- meight = sum(eight)/len(eight)
-
- for t in react:
- h.write(t)
- h.write('\n')
- if react[t]!='null':
- for i in range((len(react[t])-1)):
- if react[t][i]!='NA':
- h.write(str(cap((float(react[t][i])/meight),capped)))
- else:
- h.write('NA')
- h.write('\t')
- if react[t][i+1]!='NA':
- h.write(str(cap((float(react[t][i+1])/meight),capped)))
- else:
- h.write('NA')
- h.write('\n')
-
- h.close()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/react_norm_function.pyc
Binary file reactivity_cal/react_norm_function.pyc has changed
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/reactivity_calculation.xml
--- a/reactivity_cal/reactivity_calculation.xml Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-
-
- react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output
-
- biopython
- numpy
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**TIPS**:
-
------
-
-**Input**:
-
-* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library
-* 2. Reference file (fasta) used to map the reads to
-* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
-* [Optional]:
-* 1. A threshold to cap the structural reactivities. {Default: 7}
-* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes}
-
------
-
-**Output**:
-
-A text file with structural reactivity for each nucleotide (Reactivity file)
-
-
-
-
-
diff -r 1c325ff557d9 -r 63c41304b221 reactivity_cal/read_file.py
--- a/reactivity_cal/read_file.py Tue Dec 09 03:03:11 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split('\t');
- for i in range(0, len(tl)):
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-