# HG changeset patch
# User tyty
# Date 1413829933 14400
# Node ID ec9348a4c42f7a5b591e39eb21ee72488801fadf
# Parent 362993bbcf2bae18739667a4b122b9d2ca52d68f
Uploaded
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/.DS_Store
Binary file reactivity_cal/.DS_Store has changed
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/parse_dis_react.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/parse_dis_react.py Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+
+def parse_dist(in_file):
+ result = []
+ distribution = {}
+ name = []
+ f = open(in_file)
+ flag = 0
+ for aline in f.readlines():
+ line = aline.strip()
+ dis = line.strip()
+ dist = dis.split('\t')
+ if len(dist) > 0:
+ if len(dist) == 1:
+ if dist[0].strip().find('coverage')==-1:
+ if flag == 0:
+ name.append(line)
+ flag = 1
+ t_name = line
+ else:
+ distribution[t_name] = 'null'
+ name.append(line)
+ flag = 1
+ t_name = line
+ else:
+ distri = []
+ for i in range(0, len(dist)):
+ distri.append(dist[i].strip())
+ distribution[t_name] = distri
+ flag = 0
+ result.append(name)
+ result.append(distribution)
+ f.close()
+ return result
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/react_cal.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/react_cal.py Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+from react_norm_function import *
+import os
+
+
+dist_file1 = sys.argv[1] #plus library
+dist_file2 = sys.argv[2] #minus library
+seq_file = sys.argv[3]
+nt_spec = sys.argv[4]
+flag_in = sys.argv[5]
+threshold = sys.argv[6]
+output_file = sys.argv[7]
+
+
+distri_p = parse_dist(dist_file1)
+distri_m = parse_dist(dist_file2)
+threshold = float(threshold)
+
+print(flag_in)
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+ syspath = syspath+ost[i].strip()
+ syspath = syspath+'/'
+
+h = file(syspath+"react.txt",'w')
+flag_in = int(flag_in)
+
+seqs = SeqIO.parse(open(seq_file),'fasta');
+nt_s = set()
+for i in range(len(nt_spec)):
+ nt_s.add(nt_spec[i])
+
+flag = 0
+trans = []
+distri_p = distri_p[1]
+distri_m = distri_m[1]
+
+#thres = int(threshold)
+
+
+transcripts = {}
+for seq in seqs:
+ n = seq.id
+ trans.append(n)
+ transcripts[n] = seq.seq.tostring()
+
+
+#print(distri_p)
+
+
+for i in range(0, len(trans)):
+ h.write(trans[i])
+ h.write('\n')
+ if (trans[i].find('AT1G29930')==-1) and (trans[i].find('At1g29930')==-1):
+ for j in range(len(distri_p[trans[i]])):
+ distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
+ for j in range(len(distri_m[trans[i]])):
+ distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
+ s_p = sum(distri_p[trans[i]])
+ s_m = sum(distri_m[trans[i]])
+ length = len(distri_p[trans[i]])
+ if s_p!= 0 and s_m!= 0:
+ r = []
+ for j in range(0, len(distri_p[trans[i]])):
+ f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
+ f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
+ raw_react = f_p-f_m
+ r.append(max(0, raw_react))
+ else:
+ for j in range(len(distri_p[trans[i]])):
+ distri_p[trans[i]][j] = int(distri_p[trans[i]][j])
+ for j in range(len(distri_m[trans[i]])):
+ distri_m[trans[i]][j] = int(distri_m[trans[i]][j])
+ s_p = sum(distri_p[trans[i]])
+ s_m = sum(distri_m[trans[i]])
+ if s_p!= 0 and s_m!= 0:
+ r = []
+ for j in range(0, len(distri_p[trans[i]])):
+ f_p = float(distri_p[trans[i]][j])/float(s_p)
+ f_m = float(distri_m[trans[i]][j])/float(s_m)
+ r.append((max(0,(f_p-f_m)))*100)
+
+ if s_p!= 0 and s_m!= 0:
+ for k in range(1,(len(r)-1)):
+ if transcripts[trans[i]][k-1] in nt_s:
+ h.write(str(r[k]))
+ h.write('\t')
+ else:
+ h.write('NA')
+ h.write('\t')
+ k = k+1
+ if transcripts[trans[i]][k-1] in nt_s:
+ h.write(str(r[k]))
+ h.write('\n')
+ else:
+ h.write('NA')
+ h.write('\n')
+
+
+h.close()
+
+if flag_in:
+ react_norm((syspath+"react.txt"),output_file, threshold)
+else:
+ h_o = file(output_file, 'w')
+ f_i = open(syspath+"react.txt")
+ for aline in f_i.readlines():
+ h_o.write(aline.strip())
+ h_o.write('\n')
+os.system("rm -f "+syspath+"react.txt")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/react_norm_function.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/react_norm_function.py Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+
+def cap(a,value):
+ if a>=value:
+ return value
+ else:
+ return a
+
+def react_norm(react_file, result_file, capped_value):
+ print("Normalizing.....")
+ react1 = parse_dist(react_file)
+ react = react1[1]
+ h = file(result_file, 'w')
+
+ capped = int(capped_value)
+
+ all_react = []
+
+
+ for t in react:
+ if react[t]!='null':
+ for i in range(len(react[t])):
+ if react[t][i]!='NA':
+ all_react.append(float(react[t][i]))
+# except:
+# print(react[t][i])
+# print(t)
+# print(i)
+
+ all_react.sort(reverse = True)
+ #print((all_react))
+ #print(all_react[int(len(all_react)*0.02)])
+ #print(all_react[int(len(all_react)*0.03)])
+ #print(all_react[int(len(all_react)*0.025)])
+ #print(all_react[int(len(all_react)*0.04)])
+ #print(all_react[int(len(all_react)*0.05)])
+ '''
+ mean = sum(all_react)/len(all_react)
+ print(mean)
+ temp = 0
+
+ for i in range(len(all_react)):
+ temp = temp+all_react[i]*all_react[i]
+ temp = temp/len(all_react)
+ sd = math.sqrt(temp-mean*mean)
+ '''
+ eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
+ meight = sum(eight)/len(eight)
+
+ for t in react:
+ h.write(t)
+ h.write('\n')
+ if react[t]!='null':
+ if (t.find('AT1G29930')==-1) and (t.find('At1g29930')==-1):
+ for i in range((len(react[t])-1)):
+ if react[t][i]!='NA':
+ h.write(str(cap((float(react[t][i])/meight),capped)))
+ else:
+ h.write('NA')
+ h.write('\t')
+ if react[t][i+1]!='NA':
+ h.write(str(cap((float(react[t][i+1])/meight),capped)))
+ else:
+ h.write('NA')
+ h.write('\n')
+ else:
+ for i in range((len(react[t])-1)):
+ if react[t][i]!='NA':
+ h.write(str(float(react[t][i])*2.6))
+ else:
+ h.write('NA')
+ h.write('\t')
+ if react[t][i+1]!='NA':
+ h.write(str(float(react[t][i])*2.6))
+ else:
+ h.write('NA')
+ h.write('\n')
+
+
+
+ h.close()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/reactivity_calculation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/reactivity_calculation.xml Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,48 @@
+
+
+ react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output
+
+ biopython
+ numpy
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. RTSC files (Output of II) for (+) and (-) library
+* 2. Reference file (fasta) used to map the reads
+* 3. Nucleotide Specificity (Type of nucleotide to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
+* [Optional]:
+* 1. A threshold to cap the structural reactivities. {Default: 7}
+* 2. Flag that determines whether to perform 2%-8% normalization {Default: yes}
+
+-----
+
+**Output**:
+
+A text file with structural reactivity for each nucleotide (Reactivity file)
+
+
+
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/read_file.py Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/separate_rna.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/separate_rna.py Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from parse_dis_pac import *
+
+
+dist_file = sys.argv[1]
+cdna_file = sys.argv[2]
+rrna_file = sys.argv[3]
+
+dist = parse_dist(dist_file)
+dist = dist[1]
+hc = file(cdna_file, 'w')
+hr = file(rrna_file, 'w')
+
+for t in dist:
+ if t.find('AT') != -1:
+ hc.write(t)
+ hc.write('\n')
+ for i in range(len(dist[t])-1):
+ hc.write(dist[t][i])
+ hc.write('\t')
+ i = i+1
+ hc.write(dist[t][i])
+ hc.write('\n')
+ else:
+ hr.write(t)
+ hr.write('\n')
+ for i in range(len(dist[t])-1):
+ hr.write(dist[t][i])
+ hr.write('\t')
+ i = i+1
+ hr.write(dist[t][i])
+ hr.write('\n')
+
+hc.close()
+hr.close()
+
+
+
+
diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/tool_dependencies.xml Mon Oct 20 14:32:13 2014 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+