# HG changeset patch # User tyty # Date 1413829911 14400 # Node ID 362993bbcf2bae18739667a4b122b9d2ca52d68f # Parent 13024d65eee79cbe70cb10b03e137ce06058835d Deleted selected files diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/.DS_Store Binary file get_reads/reactivity_cal/.DS_Store has changed diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/parse_dis_react.py --- a/get_reads/reactivity_cal/parse_dis_react.py Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import sys - -def parse_dist(in_file): - result = [] - distribution = {} - name = [] - f = open(in_file) - flag = 0 - for aline in f.readlines(): - line = aline.strip() - dis = line.strip() - dist = dis.split('\t') - if len(dist) > 0: - if len(dist) == 1: - if dist[0].strip().find('coverage')==-1: - if flag == 0: - name.append(line) - flag = 1 - t_name = line - else: - distribution[t_name] = 'null' - name.append(line) - flag = 1 - t_name = line - else: - distri = [] - for i in range(0, len(dist)): - distri.append(dist[i].strip()) - distribution[t_name] = distri - flag = 0 - result.append(name) - result.append(distribution) - f.close() - return result - - - - - - - - - - - - - - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/react_cal.py --- a/get_reads/reactivity_cal/react_cal.py Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import sys -from Bio import SeqIO -import math -from parse_dis_react import * -from react_norm_function import * -import os - - -dist_file1 = sys.argv[1] #plus library -dist_file2 = sys.argv[2] #minus library -seq_file = sys.argv[3] -nt_spec = sys.argv[4] -flag_in = sys.argv[5] -threshold = sys.argv[6] -output_file = sys.argv[7] - - -distri_p = parse_dist(dist_file1) -distri_m = parse_dist(dist_file2) -threshold = float(threshold) - -print(flag_in) - -ospath = os.path.realpath(sys.argv[0]) -ost = ospath.split('/') -syspath = "" -for i in range(len(ost)-1): - syspath = syspath+ost[i].strip() - syspath = syspath+'/' - -h = file(syspath+"react.txt",'w') -flag_in = int(flag_in) - -seqs = SeqIO.parse(open(seq_file),'fasta'); -nt_s = set() -for i in range(len(nt_spec)): - nt_s.add(nt_spec[i]) - -flag = 0 -trans = [] -distri_p = distri_p[1] -distri_m = distri_m[1] - -#thres = int(threshold) - - -transcripts = {} -for seq in seqs: - n = seq.id - trans.append(n) - transcripts[n] = seq.seq.tostring() - - -#print(distri_p) - - -for i in range(0, len(trans)): - h.write(trans[i]) - h.write('\n') - if (trans[i].find('AT1G29930')==-1) and (trans[i].find('At1g29930')==-1): - for j in range(len(distri_p[trans[i]])): - distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e) - for j in range(len(distri_m[trans[i]])): - distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e) - s_p = sum(distri_p[trans[i]]) - s_m = sum(distri_m[trans[i]]) - length = len(distri_p[trans[i]]) - if s_p!= 0 and s_m!= 0: - r = [] - for j in range(0, len(distri_p[trans[i]])): - f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length - f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length - raw_react = f_p-f_m - r.append(max(0, raw_react)) - else: - for j in range(len(distri_p[trans[i]])): - distri_p[trans[i]][j] = int(distri_p[trans[i]][j]) - for j in range(len(distri_m[trans[i]])): - distri_m[trans[i]][j] = int(distri_m[trans[i]][j]) - s_p = sum(distri_p[trans[i]]) - s_m = sum(distri_m[trans[i]]) - if s_p!= 0 and s_m!= 0: - r = [] - for j in range(0, len(distri_p[trans[i]])): - f_p = float(distri_p[trans[i]][j])/float(s_p) - f_m = float(distri_m[trans[i]][j])/float(s_m) - r.append((max(0,(f_p-f_m)))*100) - - if s_p!= 0 and s_m!= 0: - for k in range(1,(len(r)-1)): - if transcripts[trans[i]][k-1] in nt_s: - h.write(str(r[k])) - h.write('\t') - else: - h.write('NA') - h.write('\t') - k = k+1 - if transcripts[trans[i]][k-1] in nt_s: - h.write(str(r[k])) - h.write('\n') - else: - h.write('NA') - h.write('\n') - - -h.close() - -if flag_in: - react_norm((syspath+"react.txt"),output_file, threshold) -else: - h_o = file(output_file, 'w') - f_i = open(syspath+"react.txt") - for aline in f_i.readlines(): - h_o.write(aline.strip()) - h_o.write('\n') -os.system("rm -f "+syspath+"react.txt") - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/react_norm_function.py --- a/get_reads/reactivity_cal/react_norm_function.py Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,114 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import sys -from Bio import SeqIO -import math -from parse_dis_react import * - -def cap(a,value): - if a>=value: - return value - else: - return a - -def react_norm(react_file, result_file, capped_value): - print("Normalizing.....") - react1 = parse_dist(react_file) - react = react1[1] - h = file(result_file, 'w') - - capped = int(capped_value) - - all_react = [] - - - for t in react: - if react[t]!='null': - for i in range(len(react[t])): - if react[t][i]!='NA': - all_react.append(float(react[t][i])) -# except: -# print(react[t][i]) -# print(t) -# print(i) - - all_react.sort(reverse = True) - #print((all_react)) - #print(all_react[int(len(all_react)*0.02)]) - #print(all_react[int(len(all_react)*0.03)]) - #print(all_react[int(len(all_react)*0.025)]) - #print(all_react[int(len(all_react)*0.04)]) - #print(all_react[int(len(all_react)*0.05)]) - ''' - mean = sum(all_react)/len(all_react) - print(mean) - temp = 0 - - for i in range(len(all_react)): - temp = temp+all_react[i]*all_react[i] - temp = temp/len(all_react) - sd = math.sqrt(temp-mean*mean) - ''' - eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)] - meight = sum(eight)/len(eight) - - for t in react: - h.write(t) - h.write('\n') - if react[t]!='null': - if (t.find('AT1G29930')==-1) and (t.find('At1g29930')==-1): - for i in range((len(react[t])-1)): - if react[t][i]!='NA': - h.write(str(cap((float(react[t][i])/meight),capped))) - else: - h.write('NA') - h.write('\t') - if react[t][i+1]!='NA': - h.write(str(cap((float(react[t][i+1])/meight),capped))) - else: - h.write('NA') - h.write('\n') - else: - for i in range((len(react[t])-1)): - if react[t][i]!='NA': - h.write(str(float(react[t][i])*2.6)) - else: - h.write('NA') - h.write('\t') - if react[t][i+1]!='NA': - h.write(str(float(react[t][i])*2.6)) - else: - h.write('NA') - h.write('\n') - - - - h.close() - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/reactivity_calculation.xml --- a/get_reads/reactivity_cal/reactivity_calculation.xml Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ - - - react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output - - biopython - numpy - - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input**: - -* 1. RTSC files (Output of II) for (+) and (-) library -* 2. Reference file (fasta) used to map the reads -* 3. Nucleotide Specificity (Type of nucleotide to have reactivity, e.g. AC for DMS and ACTG for SHAPE) -* [Optional]: -* 1. A threshold to cap the structural reactivities. {Default: 7} -* 2. Flag that determines whether to perform 2%-8% normalization {Default: yes} - ------ - -**Output**: - -A text file with structural reactivity for each nucleotide (Reactivity file) - - - - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/read_file.py --- a/get_reads/reactivity_cal/read_file.py Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/separate_rna.py --- a/get_reads/reactivity_cal/separate_rna.py Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from parse_dis_pac import * - - -dist_file = sys.argv[1] -cdna_file = sys.argv[2] -rrna_file = sys.argv[3] - -dist = parse_dist(dist_file) -dist = dist[1] -hc = file(cdna_file, 'w') -hr = file(rrna_file, 'w') - -for t in dist: - if t.find('AT') != -1: - hc.write(t) - hc.write('\n') - for i in range(len(dist[t])-1): - hc.write(dist[t][i]) - hc.write('\t') - i = i+1 - hc.write(dist[t][i]) - hc.write('\n') - else: - hr.write(t) - hr.write('\n') - for i in range(len(dist[t])-1): - hr.write(dist[t][i]) - hr.write('\t') - i = i+1 - hr.write(dist[t][i]) - hr.write('\n') - -hc.close() -hr.close() - - - - diff -r 13024d65eee7 -r 362993bbcf2b get_reads/reactivity_cal/tool_dependencies.xml --- a/get_reads/reactivity_cal/tool_dependencies.xml Mon Oct 20 14:31:27 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - - - - - - - - -