# HG changeset patch # User tyty # Date 1413829933 14400 # Node ID ec9348a4c42f7a5b591e39eb21ee72488801fadf # Parent 362993bbcf2bae18739667a4b122b9d2ca52d68f Uploaded diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/.DS_Store Binary file reactivity_cal/.DS_Store has changed diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/parse_dis_react.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/parse_dis_react.py Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys + +def parse_dist(in_file): + result = [] + distribution = {} + name = [] + f = open(in_file) + flag = 0 + for aline in f.readlines(): + line = aline.strip() + dis = line.strip() + dist = dis.split('\t') + if len(dist) > 0: + if len(dist) == 1: + if dist[0].strip().find('coverage')==-1: + if flag == 0: + name.append(line) + flag = 1 + t_name = line + else: + distribution[t_name] = 'null' + name.append(line) + flag = 1 + t_name = line + else: + distri = [] + for i in range(0, len(dist)): + distri.append(dist[i].strip()) + distribution[t_name] = distri + flag = 0 + result.append(name) + result.append(distribution) + f.close() + return result + + + + + + + + + + + + + + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/react_cal.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_cal.py Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * +from react_norm_function import * +import os + + +dist_file1 = sys.argv[1] #plus library +dist_file2 = sys.argv[2] #minus library +seq_file = sys.argv[3] +nt_spec = sys.argv[4] +flag_in = sys.argv[5] +threshold = sys.argv[6] +output_file = sys.argv[7] + + +distri_p = parse_dist(dist_file1) +distri_m = parse_dist(dist_file2) +threshold = float(threshold) + +print(flag_in) + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspath = "" +for i in range(len(ost)-1): + syspath = syspath+ost[i].strip() + syspath = syspath+'/' + +h = file(syspath+"react.txt",'w') +flag_in = int(flag_in) + +seqs = SeqIO.parse(open(seq_file),'fasta'); +nt_s = set() +for i in range(len(nt_spec)): + nt_s.add(nt_spec[i]) + +flag = 0 +trans = [] +distri_p = distri_p[1] +distri_m = distri_m[1] + +#thres = int(threshold) + + +transcripts = {} +for seq in seqs: + n = seq.id + trans.append(n) + transcripts[n] = seq.seq.tostring() + + +#print(distri_p) + + +for i in range(0, len(trans)): + h.write(trans[i]) + h.write('\n') + if (trans[i].find('AT1G29930')==-1) and (trans[i].find('At1g29930')==-1): + for j in range(len(distri_p[trans[i]])): + distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e) + for j in range(len(distri_m[trans[i]])): + distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e) + s_p = sum(distri_p[trans[i]]) + s_m = sum(distri_m[trans[i]]) + length = len(distri_p[trans[i]]) + if s_p!= 0 and s_m!= 0: + r = [] + for j in range(0, len(distri_p[trans[i]])): + f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length + f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length + raw_react = f_p-f_m + r.append(max(0, raw_react)) + else: + for j in range(len(distri_p[trans[i]])): + distri_p[trans[i]][j] = int(distri_p[trans[i]][j]) + for j in range(len(distri_m[trans[i]])): + distri_m[trans[i]][j] = int(distri_m[trans[i]][j]) + s_p = sum(distri_p[trans[i]]) + s_m = sum(distri_m[trans[i]]) + if s_p!= 0 and s_m!= 0: + r = [] + for j in range(0, len(distri_p[trans[i]])): + f_p = float(distri_p[trans[i]][j])/float(s_p) + f_m = float(distri_m[trans[i]][j])/float(s_m) + r.append((max(0,(f_p-f_m)))*100) + + if s_p!= 0 and s_m!= 0: + for k in range(1,(len(r)-1)): + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(r[k])) + h.write('\t') + else: + h.write('NA') + h.write('\t') + k = k+1 + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(r[k])) + h.write('\n') + else: + h.write('NA') + h.write('\n') + + +h.close() + +if flag_in: + react_norm((syspath+"react.txt"),output_file, threshold) +else: + h_o = file(output_file, 'w') + f_i = open(syspath+"react.txt") + for aline in f_i.readlines(): + h_o.write(aline.strip()) + h_o.write('\n') +os.system("rm -f "+syspath+"react.txt") + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/react_norm_function.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_norm_function.py Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * + +def cap(a,value): + if a>=value: + return value + else: + return a + +def react_norm(react_file, result_file, capped_value): + print("Normalizing.....") + react1 = parse_dist(react_file) + react = react1[1] + h = file(result_file, 'w') + + capped = int(capped_value) + + all_react = [] + + + for t in react: + if react[t]!='null': + for i in range(len(react[t])): + if react[t][i]!='NA': + all_react.append(float(react[t][i])) +# except: +# print(react[t][i]) +# print(t) +# print(i) + + all_react.sort(reverse = True) + #print((all_react)) + #print(all_react[int(len(all_react)*0.02)]) + #print(all_react[int(len(all_react)*0.03)]) + #print(all_react[int(len(all_react)*0.025)]) + #print(all_react[int(len(all_react)*0.04)]) + #print(all_react[int(len(all_react)*0.05)]) + ''' + mean = sum(all_react)/len(all_react) + print(mean) + temp = 0 + + for i in range(len(all_react)): + temp = temp+all_react[i]*all_react[i] + temp = temp/len(all_react) + sd = math.sqrt(temp-mean*mean) + ''' + eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)] + meight = sum(eight)/len(eight) + + for t in react: + h.write(t) + h.write('\n') + if react[t]!='null': + if (t.find('AT1G29930')==-1) and (t.find('At1g29930')==-1): + for i in range((len(react[t])-1)): + if react[t][i]!='NA': + h.write(str(cap((float(react[t][i])/meight),capped))) + else: + h.write('NA') + h.write('\t') + if react[t][i+1]!='NA': + h.write(str(cap((float(react[t][i+1])/meight),capped))) + else: + h.write('NA') + h.write('\n') + else: + for i in range((len(react[t])-1)): + if react[t][i]!='NA': + h.write(str(float(react[t][i])*2.6)) + else: + h.write('NA') + h.write('\t') + if react[t][i+1]!='NA': + h.write(str(float(react[t][i])*2.6)) + else: + h.write('NA') + h.write('\n') + + + + h.close() + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/reactivity_calculation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/reactivity_calculation.xml Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,48 @@ + + + react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output + + biopython + numpy + + + + + + + + + + + + + + + + + + + +**TIPS**: + +----- + +**Input**: + +* 1. RTSC files (Output of II) for (+) and (-) library +* 2. Reference file (fasta) used to map the reads +* 3. Nucleotide Specificity (Type of nucleotide to have reactivity, e.g. AC for DMS and ACTG for SHAPE) +* [Optional]: +* 1. A threshold to cap the structural reactivities. {Default: 7} +* 2. Flag that determines whether to perform 2%-8% normalization {Default: yes} + +----- + +**Output**: + +A text file with structural reactivity for each nucleotide (Reactivity file) + + + + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/read_file.py Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/separate_rna.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/separate_rna.py Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from parse_dis_pac import * + + +dist_file = sys.argv[1] +cdna_file = sys.argv[2] +rrna_file = sys.argv[3] + +dist = parse_dist(dist_file) +dist = dist[1] +hc = file(cdna_file, 'w') +hr = file(rrna_file, 'w') + +for t in dist: + if t.find('AT') != -1: + hc.write(t) + hc.write('\n') + for i in range(len(dist[t])-1): + hc.write(dist[t][i]) + hc.write('\t') + i = i+1 + hc.write(dist[t][i]) + hc.write('\n') + else: + hr.write(t) + hr.write('\n') + for i in range(len(dist[t])-1): + hr.write(dist[t][i]) + hr.write('\t') + i = i+1 + hr.write(dist[t][i]) + hr.write('\n') + +hc.close() +hr.close() + + + + diff -r 362993bbcf2b -r ec9348a4c42f reactivity_cal/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/tool_dependencies.xml Mon Oct 20 14:32:13 2014 -0400 @@ -0,0 +1,9 @@ + + + + + + + + +