# HG changeset patch # User tyty # Date 1429035415 14400 # Node ID 62e8f7adf1ab2b07a1ff43e1b0c996a50d48de37 # Parent a6e3505227b59ec5f8cc425af77ae777c7bcbbcb Uploaded diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._.DS_Store Binary file reactivity_cal/._.DS_Store has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._parse_dis_react.py Binary file reactivity_cal/._parse_dis_react.py has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._parse_dis_react.pyc Binary file reactivity_cal/._parse_dis_react.pyc has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._react_cal.py Binary file reactivity_cal/._react_cal.py has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._react_norm_function.py Binary file reactivity_cal/._react_norm_function.py has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._react_norm_function.pyc Binary file reactivity_cal/._react_norm_function.pyc has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._reactivity_calculation.xml Binary file reactivity_cal/._reactivity_calculation.xml has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/._read_file.py Binary file reactivity_cal/._read_file.py has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/parse_dis_react.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/parse_dis_react.py Tue Apr 14 14:16:55 2015 -0400 @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys + +def parse_dist(in_file): + result = [] + distribution = {} + name = [] + f = open(in_file) + flag = 0 + for aline in f.readlines(): + line = aline.strip() + dis = line.strip() + dist = dis.split('\t') + if len(dist) > 0: + if len(dist) == 1: + if dist[0].strip().find('coverage')==-1: + if flag == 0: + name.append(line) + flag = 1 + t_name = line + else: + distribution[t_name] = 'null' + name.append(line) + flag = 1 + t_name = line + else: + distri = [] + for i in range(0, len(dist)): + distri.append(dist[i].strip()) + distribution[t_name] = distri + flag = 0 + result.append(name) + result.append(distribution) + f.close() + return result + + + + + + + + + + + + + + + diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/parse_dis_react.pyc Binary file reactivity_cal/parse_dis_react.pyc has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/react_cal.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_cal.py Tue Apr 14 14:16:55 2015 -0400 @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * +from react_norm_function import * +import os +import random +import string + + +dist_file1 = sys.argv[1] #plus library +dist_file2 = sys.argv[2] #minus library +seq_file = sys.argv[3] #Reference library(genome/cDNA) +nt_spec = sys.argv[4] #only show reactivity for AC or ATCG +flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0) +threshold = sys.argv[6] #Threshold to cap the reactivities +output_file = sys.argv[7] + + +distri_p = parse_dist(dist_file1) +distri_m = parse_dist(dist_file2) +threshold = float(threshold) + + +syspathrs = os.getcwd() + +h = file(syspathrs+"react.txt",'w') +flag_in = int(flag_in) + +seqs = SeqIO.parse(open(seq_file),'fasta'); +nt_s = set() +for i in range(len(nt_spec)): + nt_s.add(nt_spec[i]) + +flag = 0 +trans = [] +distri_p = distri_p[1] +distri_m = distri_m[1] + +#thres = int(threshold) + + +transcripts = {} +for seq in seqs: + n = seq.id + trans.append(n) + transcripts[n] = seq.seq.tostring() + + +#print(distri_p) + + +for i in range(0, len(trans)): + h.write(trans[i]) + h.write('\n') + for j in range(len(distri_p[trans[i]])): + distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e) + for j in range(len(distri_m[trans[i]])): + distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e) + s_p = sum(distri_p[trans[i]]) + s_m = sum(distri_m[trans[i]]) + length = len(distri_p[trans[i]]) + if s_p!= 0 and s_m!= 0: + r = [] + for j in range(0, len(distri_p[trans[i]])): + f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length + f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length + raw_react = f_p-f_m + r.append(max(0, raw_react)) + + if s_p!= 0 and s_m!= 0: + for k in range(1,(len(r)-1)): + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(float('%.3f'%r[k]))) + h.write('\t') + else: + h.write('NA') + h.write('\t') + k = k+1 + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(float('%.3f'%r[k]))) + h.write('\n') + else: + h.write('NA') + h.write('\n') + + +h.close() + +if flag_in: + react_norm((syspathrs+"react.txt"),output_file, threshold) +else: + h_o = file(output_file, 'w') + f_i = open(syspathrs+"react.txt") + for aline in f_i.readlines(): + h_o.write(aline.strip()) + h_o.write('\n') +os.system("rm -f "+syspathrs+"react.txt") + +#os.system("rm -r "+syspathrs) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/react_norm_function.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_norm_function.py Tue Apr 14 14:16:55 2015 -0400 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * + +def cap(a,value): + if a>=value: + return value + else: + return a + +def react_norm(react_file, result_file, capped_value): + print("Normalizing.....") + react1 = parse_dist(react_file) + react = react1[1] + h = file(result_file, 'w') + + capped = int(capped_value) + + all_react = [] + + + for t in react: + if react[t]!='null': + for i in range(len(react[t])): + if react[t][i]!='NA': + all_react.append(float(react[t][i])) + + + all_react.sort(reverse = True) + + + eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)] + meight = sum(eight)/len(eight) + + for t in react: + h.write(t) + h.write('\n') + if react[t]!='null': + for i in range((len(react[t])-1)): + if react[t][i]!='NA': + h.write(str(float('%.3f'%cap((float(react[t][i])/meight),capped)))) + else: + h.write('NA') + h.write('\t') + if react[t][i+1]!='NA': + h.write(str(float('%.3f'%cap((float(react[t][i+1])/meight),capped)))) + else: + h.write('NA') + h.write('\n') + + h.close() + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/react_norm_function.pyc Binary file reactivity_cal/react_norm_function.pyc has changed diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/reactivity_calculation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/reactivity_calculation.xml Tue Apr 14 14:16:55 2015 -0400 @@ -0,0 +1,62 @@ + + calculates structural reactivity on each nucleotide based on RT stop counts from the Get RT Stop Counts module + react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output + + biopython + numpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Function** + +* Reactivity Calculation calculates the structural reactivity on each nucleotide based on an RT stop count file containing the RT stop count on each nucleotide, typically the output from the Get RT Stop Counts module. + +----- + +**Input**: + +* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library +* 2. Reference file (fasta) used to map the reads to +* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE) +* [Optional]: +* 1. A threshold to cap the structural reactivities. {Default: 7} +* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes} + +----- + +**Output**: + +A text file with structural reactivity for each nucleotide (Reactivity file) + + + + + diff -r a6e3505227b5 -r 62e8f7adf1ab reactivity_cal/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/read_file.py Tue Apr 14 14:16:55 2015 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + +