changeset 78:332a0da1508d draft

Uploaded
author tyty
date Tue, 09 Dec 2014 03:05:57 -0500
parents a47006e7279b
children 8ec9acab68ab
files reactivity_cal/.DS_Store reactivity_cal/._.DS_Store reactivity_cal/parse_dis_react.py reactivity_cal/parse_dis_react.pyc reactivity_cal/rRNA.txt reactivity_cal/react_cal.py reactivity_cal/react_norm_function.py reactivity_cal/react_norm_function.pyc reactivity_cal/reactivity_calculation.xml reactivity_cal/read_file.py
diffstat 10 files changed, 357 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file reactivity_cal/.DS_Store has changed
Binary file reactivity_cal/._.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/parse_dis_react.py	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+
+def parse_dist(in_file):
+    result = []
+    distribution = {}
+    name = []
+    f = open(in_file)
+    flag = 0
+    for aline in f.readlines():
+        line = aline.strip()
+        dis = line.strip()
+        dist = dis.split('\t')
+        if len(dist) > 0:
+            if len(dist) == 1:
+                if dist[0].strip().find('coverage')==-1:
+                    if flag == 0:
+                        name.append(line)
+                        flag = 1
+                        t_name = line
+                    else:
+                        distribution[t_name] = 'null'
+                        name.append(line)
+                        flag = 1
+                        t_name = line
+            else:
+                distri = []
+                for i in range(0, len(dist)):
+                    distri.append(dist[i].strip())
+                distribution[t_name] = distri
+                flag = 0
+    result.append(name)
+    result.append(distribution)
+    f.close()
+    return result
+                
+                
+
+
+
+
+
+
+
+        
+
+
+
+
+
Binary file reactivity_cal/parse_dis_react.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/rRNA.txt	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,8 @@
+>25s rRNA 3375nts
+GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
+>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
+TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
+>Arabidopsis thaliana 1
+GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
+>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
+AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/react_cal.py	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+from react_norm_function import *
+import os
+import random
+import string
+
+
+dist_file1 = sys.argv[1] #plus library
+dist_file2 = sys.argv[2] #minus library
+seq_file = sys.argv[3] #Reference library(genome/cDNA)
+nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
+flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
+threshold = sys.argv[6] #Threshold to cap the reactivities
+output_file = sys.argv[7]
+
+
+distri_p = parse_dist(dist_file1)
+distri_m = parse_dist(dist_file2)
+threshold = float(threshold)
+
+
+syspathrs = os.getcwd()
+
+h = file(syspathrs+"react.txt",'w')
+flag_in = int(flag_in)
+
+seqs = SeqIO.parse(open(seq_file),'fasta');
+nt_s = set()
+for i in range(len(nt_spec)):
+    nt_s.add(nt_spec[i])
+
+flag = 0
+trans = []
+distri_p = distri_p[1]
+distri_m = distri_m[1]
+
+#thres = int(threshold)
+
+
+transcripts = {}
+for seq in seqs:
+    n = seq.id
+    trans.append(n)
+    transcripts[n] = seq.seq.tostring()
+    
+
+#print(distri_p)
+        
+
+for i in range(0, len(trans)):
+    h.write(trans[i])
+    h.write('\n')       
+    for j in range(len(distri_p[trans[i]])):
+        distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
+    for j in range(len(distri_m[trans[i]])):
+        distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)       
+    s_p = sum(distri_p[trans[i]])
+    s_m = sum(distri_m[trans[i]])
+    length = len(distri_p[trans[i]])
+    if s_p!= 0 and s_m!= 0:
+        r = []
+        for j in range(0, len(distri_p[trans[i]])):
+            f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
+            f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
+            raw_react = f_p-f_m
+            r.append(max(0, raw_react))
+                
+    if s_p!= 0 and s_m!= 0:    
+        for k in range(1,(len(r)-1)):
+            if transcripts[trans[i]][k-1] in nt_s:
+                h.write(str(r[k]))
+                h.write('\t')
+            else:
+                h.write('NA')
+                h.write('\t')
+        k = k+1
+        if transcripts[trans[i]][k-1] in nt_s:
+            h.write(str(r[k]))
+            h.write('\n')
+        else:
+            h.write('NA')
+            h.write('\n')
+            
+
+h.close()
+
+if flag_in:
+    react_norm((syspathrs+"react.txt"),output_file, threshold)
+else:
+    h_o = file(output_file, 'w')
+    f_i = open(syspathrs+"react.txt")
+    for aline in f_i.readlines():
+        h_o.write(aline.strip())
+        h_o.write('\n')
+os.system("rm -f "+syspathrs+"react.txt")
+
+#os.system("rm -r "+syspathrs)
+    
+     
+            
+    
+    
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/react_norm_function.py	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+
+def cap(a,value):
+    if a>=value:
+        return value
+    else:
+        return a
+
+def react_norm(react_file, result_file, capped_value):
+    print("Normalizing.....")
+    react1 = parse_dist(react_file)
+    react = react1[1]
+    h = file(result_file, 'w')
+
+    capped = int(capped_value)
+
+    all_react = []
+
+
+    for t in react:
+        if react[t]!='null':
+            for i in range(len(react[t])):
+                if react[t][i]!='NA':                   
+                    all_react.append(float(react[t][i]))
+
+
+    all_react.sort(reverse = True)
+
+
+    eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
+    meight = sum(eight)/len(eight)
+
+    for t in react:
+        h.write(t)
+        h.write('\n')
+        if react[t]!='null':
+            for i in range((len(react[t])-1)):
+                if react[t][i]!='NA':
+                    h.write(str(cap((float(react[t][i])/meight),capped)))
+                else:
+                    h.write('NA')
+                h.write('\t')
+            if react[t][i+1]!='NA':
+                h.write(str(cap((float(react[t][i+1])/meight),capped)))
+            else:
+                h.write('NA')
+            h.write('\n')
+
+    h.close()
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        
+
+
+
+
+
Binary file reactivity_cal/react_norm_function.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/reactivity_calculation.xml	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,60 @@
+<tool id="react_cal_pipeline" name="Reactivity Calculation" version="1.0">
+	<description></description>
+	<command interpreter="python">react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+        </requirements>
+	<inputs>
+                <param name="dist_file1" type="data" format="txt" label="RTSC file for (+) library"/>
+		        <param name="dist_file2" type="data" format="txt" label="RTSC file for (-) library"/>
+                <param name="seq_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+                <param name="nt_spec" type="select" label="Nucleotide specificity">
+                    <option value="AC">AC</option>
+                    <option value="ATCG">AUCG</option>
+                </param>
+                <param name="flag_in" type="boolean" checked="true" truevalue = "1" falsevalue = "0" label="Normalization is performed if checked"/>
+                <param name="threshold" type="float" value = "7" optional = "true" label="Threshold to cap the reactivities"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="txt"/>
+	</outputs>
+    <tests>
+        <test>
+            <param name="dist_file1" value="dis_f_N1Ap_rrna.txt" />
+	        <param name="dist_file2" value="dis_f_N1Am_rrna.txt" />
+            <param name="seq_file" value="rRNA.txt" />
+            <param name="nt_spec" value="AC" />
+            <param name="flag_in" value="1" />
+            <param name="threshold" value="7" />
+	        <output name="output" file="DMS_reactivities.out" />
+ 
+          </test>
+    </tests>
+
+	<help>
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library
+* 2. Reference file (fasta) used to map the reads to
+* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
+* [Optional]:
+* 1. A threshold to cap the structural reactivities. {Default: 7}
+* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes}
+
+-----
+
+**Output**:
+
+A text file with structural reactivity for each nucleotide (Reactivity file)
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/reactivity_cal/read_file.py	Tue Dec 09 03:05:57 2014 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+