# HG changeset patch
# User tyty
# Date 1410807321 14400
# Node ID b10c6790f01e9f891319a2c757b4fc32e1ed89c3
# Parent 1a909794e94d5d88163ce7fa6b807dee5d522a0b
Uploaded
diff -r 1a909794e94d -r b10c6790f01e get_reads/.DS_Store
Binary file get_reads/.DS_Store has changed
diff -r 1a909794e94d -r b10c6790f01e get_reads/get_read.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.py Mon Sep 15 14:55:21 2014 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+#from galaxy.tools.read_file import *
+from Bio import SeqIO
+import os
+from read_file import *
+
+fasta_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+length_seq = {};
+for seq in fasta_sequences:
+ nuc = seq.id;
+ length_seq[nuc] = len(seq.seq.tostring());
+
+
+
+mapping = {}
+transcripts = []
+
+f = open("map_info.txt");
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if tl[0].strip() not in transcripts:
+ transcripts.append(tl[0].strip());
+ mapping[tl[0].strip()] = [];
+
+ mapping[tl[0].strip()].append(tl[1].strip());
+
+distribution = {};
+coverage = {};
+for transcript in length_seq:
+ distribution[transcript] = [];
+ for i in range(0, length_seq[transcript]):
+ distribution[transcript].append(0);
+ sum_count = float(0);
+ if transcript in mapping:
+ for j in range(0, len(mapping[transcript])):
+ index = mapping[transcript][j];
+ #count = reads[mapping[transcript][j][0]];
+ sum_count = sum_count + 1;
+ distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
+ coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
+ else:
+ coverage[transcript] = 0
+
+
+
+
+
+h = file(result_file, 'w')
+for transcript in length_seq:
+ h.write(transcript);
+ h.write('\n')
+ for i in range(0, length_seq[transcript]):
+ h.write(str(distribution[transcript][i]))
+ h.write('\t')
+ h.write('\n')
+ h.write('\n')
+
+
+
+
+
+f.close();
+h.close()
+
+
+
+
diff -r 1a909794e94d -r b10c6790f01e get_reads/get_read.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.xml Mon Sep 15 14:55:21 2014 -0400
@@ -0,0 +1,44 @@
+
+
+ get_read.py $lib_file $map_file $output
+
+ biopython
+ numpy
+ samtools
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**
+1. A mapped (bam) file from Bowtie (or any mapping program)
+2. Reference library sequences (fasta) used to map the reads
+
+-----
+
+**Output**:
+A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
+
+
+
+
+
diff -r 1a909794e94d -r b10c6790f01e get_reads/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/read_file.py Mon Sep 15 14:55:21 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 1a909794e94d -r b10c6790f01e get_reads/test.bam
Binary file get_reads/test.bam has changed
diff -r 1a909794e94d -r b10c6790f01e get_reads/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/tool_dependencies.xml Mon Sep 15 14:55:21 2014 -0400
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/parse_dis_react.py
--- a/reactivity_cal/parse_dis_react.py Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-
-def parse_dist(in_file):
- result = []
- distribution = {}
- name = []
- f = open(in_file)
- flag = 0
- for aline in f.readlines():
- line = aline.strip()
- dis = line.strip()
- dist = dis.split('\t')
- if len(dist) > 0:
- if len(dist) == 1:
- if dist[0].strip().find('coverage')==-1:
- if flag == 0:
- name.append(line)
- flag = 1
- t_name = line
- else:
- distribution[t_name] = 'null'
- name.append(line)
- flag = 1
- t_name = line
- else:
- distri = []
- for i in range(0, len(dist)):
- distri.append(dist[i].strip())
- distribution[t_name] = distri
- flag = 0
- result.append(name)
- result.append(distribution)
- f.close()
- return result
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/react_cal.py
--- a/reactivity_cal/react_cal.py Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-from react_norm_function import *
-import os
-
-
-dist_file1 = sys.argv[1] #plus library
-dist_file2 = sys.argv[2] #minus library
-seq_file = sys.argv[3]
-nt_spec = sys.argv[4]
-flag_in = sys.argv[5]
-threshold = sys.argv[6]
-output_file = sys.argv[7]
-
-
-distri_p = parse_dist(dist_file1)
-distri_m = parse_dist(dist_file2)
-threshold = float(threshold)
-
-print(flag_in)
-
-ospath = os.path.realpath(sys.argv[0])
-ost = ospath.split('/')
-syspath = ""
-for i in range(len(ost)-1):
- syspath = syspath+ost[i].strip()
- syspath = syspath+'/'
-
-h = file(syspath+"react.txt",'w')
-flag_in = int(flag_in)
-
-seqs = SeqIO.parse(open(seq_file),'fasta');
-nt_s = set()
-for i in range(len(nt_spec)):
- nt_s.add(nt_spec[i])
-
-flag = 0
-trans = []
-distri_p = distri_p[1]
-distri_m = distri_m[1]
-
-#thres = int(threshold)
-
-
-transcripts = {}
-for seq in seqs:
- n = seq.id
- trans.append(n)
- transcripts[n] = seq.seq.tostring()
-
-
-#print(distri_p)
-
-
-for i in range(0, len(trans)):
- h.write(trans[i])
- h.write('\n')
- if (trans[i].find('AT1G29930')==-1) and (trans[i].find('At1g29930')==-1):
- for j in range(len(distri_p[trans[i]])):
- distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
- for j in range(len(distri_m[trans[i]])):
- distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
- s_p = sum(distri_p[trans[i]])
- s_m = sum(distri_m[trans[i]])
- length = len(distri_p[trans[i]])
- if s_p!= 0 and s_m!= 0:
- r = []
- for j in range(0, len(distri_p[trans[i]])):
- f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
- f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
- raw_react = f_p-f_m
- r.append(max(0, raw_react))
- else:
- for j in range(len(distri_p[trans[i]])):
- distri_p[trans[i]][j] = int(distri_p[trans[i]][j])
- for j in range(len(distri_m[trans[i]])):
- distri_m[trans[i]][j] = int(distri_m[trans[i]][j])
- s_p = sum(distri_p[trans[i]])
- s_m = sum(distri_m[trans[i]])
- if s_p!= 0 and s_m!= 0:
- r = []
- for j in range(0, len(distri_p[trans[i]])):
- f_p = float(distri_p[trans[i]][j])/float(s_p)
- f_m = float(distri_m[trans[i]][j])/float(s_m)
- r.append((max(0,(f_p-f_m)))*100)
-
- if s_p!= 0 and s_m!= 0:
- for k in range(1,(len(r)-1)):
- if transcripts[trans[i]][k-1] in nt_s:
- h.write(str(r[k]))
- h.write('\t')
- else:
- h.write('NA')
- h.write('\t')
- k = k+1
- if transcripts[trans[i]][k-1] in nt_s:
- h.write(str(r[k]))
- h.write('\n')
- else:
- h.write('NA')
- h.write('\n')
-
-
-h.close()
-
-if flag_in:
- react_norm((syspath+"react.txt"),output_file, threshold)
-else:
- h_o = file(output_file, 'w')
- f_i = open(syspath+"react.txt")
- for aline in f_i.readlines():
- h_o.write(aline.strip())
- h_o.write('\n')
-os.system("rm -f "+syspath+"react.txt")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/react_norm_function.py
--- a/reactivity_cal/react_norm_function.py Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-
-def cap(a,value):
- if a>=value:
- return value
- else:
- return a
-
-def react_norm(react_file, result_file, capped_value):
- print("Normalizing.....")
- react1 = parse_dist(react_file)
- react = react1[1]
- h = file(result_file, 'w')
-
- capped = int(capped_value)
-
- all_react = []
-
-
- for t in react:
- if react[t]!='null':
- for i in range(len(react[t])):
- if react[t][i]!='NA':
- all_react.append(float(react[t][i]))
-# except:
-# print(react[t][i])
-# print(t)
-# print(i)
-
- all_react.sort(reverse = True)
- #print((all_react))
- #print(all_react[int(len(all_react)*0.02)])
- #print(all_react[int(len(all_react)*0.03)])
- #print(all_react[int(len(all_react)*0.025)])
- #print(all_react[int(len(all_react)*0.04)])
- #print(all_react[int(len(all_react)*0.05)])
- '''
- mean = sum(all_react)/len(all_react)
- print(mean)
- temp = 0
-
- for i in range(len(all_react)):
- temp = temp+all_react[i]*all_react[i]
- temp = temp/len(all_react)
- sd = math.sqrt(temp-mean*mean)
- '''
- eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
- meight = sum(eight)/len(eight)
-
- for t in react:
- h.write(t)
- h.write('\n')
- if react[t]!='null':
- if (t.find('AT1G29930')==-1) and (t.find('At1g29930')==-1):
- for i in range((len(react[t])-1)):
- if react[t][i]!='NA':
- h.write(str(cap((float(react[t][i])/meight),capped)))
- else:
- h.write('NA')
- h.write('\t')
- if react[t][i+1]!='NA':
- h.write(str(cap((float(react[t][i+1])/meight),capped)))
- else:
- h.write('NA')
- h.write('\n')
- else:
- for i in range((len(react[t])-1)):
- if react[t][i]!='NA':
- h.write(str(float(react[t][i])*2.6))
- else:
- h.write('NA')
- h.write('\t')
- if react[t][i+1]!='NA':
- h.write(str(float(react[t][i])*2.6))
- else:
- h.write('NA')
- h.write('\n')
-
-
-
- h.close()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/reactivity_calculation.xml
--- a/reactivity_cal/reactivity_calculation.xml Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-
-
- react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output
-
- biopython
- numpy
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**TIPS**:
-
------
-
-**Input**:
-
-* 1. RTSC files (Output of II) for (+) and (-) library
-* 2. Reference file (fasta) used to map the reads
-* 3. Nucleotide Specificity (Type of nucleotide to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
-* [Optional]:
-* 1. A threshold to cap the structural reactivities. {Default: 7}
-* 2. Flag that determines whether to perform 2%-8% normalization {Default: yes}
-
------
-
-**Output**:
-
-A text file with structural reactivity for each nucleotide (Reactivity file)
-
-
-
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/read_file.py
--- a/reactivity_cal/read_file.py Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split('\t');
- for i in range(0, len(tl)):
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/separate_rna.py
--- a/reactivity_cal/separate_rna.py Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from parse_dis_pac import *
-
-
-dist_file = sys.argv[1]
-cdna_file = sys.argv[2]
-rrna_file = sys.argv[3]
-
-dist = parse_dist(dist_file)
-dist = dist[1]
-hc = file(cdna_file, 'w')
-hr = file(rrna_file, 'w')
-
-for t in dist:
- if t.find('AT') != -1:
- hc.write(t)
- hc.write('\n')
- for i in range(len(dist[t])-1):
- hc.write(dist[t][i])
- hc.write('\t')
- i = i+1
- hc.write(dist[t][i])
- hc.write('\n')
- else:
- hr.write(t)
- hr.write('\n')
- for i in range(len(dist[t])-1):
- hr.write(dist[t][i])
- hr.write('\t')
- i = i+1
- hr.write(dist[t][i])
- hr.write('\n')
-
-hc.close()
-hr.close()
-
-
-
-
diff -r 1a909794e94d -r b10c6790f01e reactivity_cal/tool_dependencies.xml
--- a/reactivity_cal/tool_dependencies.xml Mon Sep 15 14:54:59 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-