# HG changeset patch
# User tyty
# Date 1429035155 14400
# Node ID e269e4c6818ea2f3bc53fb10563b50952a20d6ee
# Parent aedb21527abdb2c4f0ee7190e0560c8fbf795ede
Uploaded
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._.DS_Store
Binary file Iterative_mapping/._.DS_Store has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._iterative_map.py
Binary file Iterative_mapping/._iterative_map.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._iterative_map.xml
Binary file Iterative_mapping/._iterative_map.xml has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._map_ex.py
Binary file Iterative_mapping/._map_ex.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._read_file.py
Binary file Iterative_mapping/._read_file.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._read_s_file.py
Binary file Iterative_mapping/._read_s_file.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._remove_map.py
Binary file Iterative_mapping/._remove_map.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._seq_track.py
Binary file Iterative_mapping/._seq_track.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._truncate.py
Binary file Iterative_mapping/._truncate.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/._unmap.py
Binary file Iterative_mapping/._unmap.py has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/iterative_map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+from read_file import *
+from read_s_file import *
+import random
+import string
+
+type_input = sys.argv[1]
+seq_file = sys.argv[2]
+ref_file = sys.argv[3]
+shift = sys.argv[4]
+length = sys.argv[5]
+t_end = sys.argv[6]
+map_type = sys.argv[7]
+output_file = sys.argv[8]
+
+
+if map_type!="default":
+ s = ""
+ sm = ""
+ s = s+"-v "+sys.argv[9]
+ sm = sm+"-v "+sys.argv[9]
+ sm = sm+" -5 "+sys.argv[10]
+ sm = sm+" -3 "+sys.argv[11]
+ s = s+" -k "+sys.argv[12]
+ sm = sm+" -k "+sys.argv[12]
+ if sys.argv[13]:
+ s = s+" -a"
+ sm = sm+" -a"
+ if int(sys.argv[14])>=1:
+ s = s+" -m "+sys.argv[14]
+ sm = sm+" -m "+sys.argv[14]
+ if sys.argv[15]:
+ s = s+" --best --strata "
+ sm = sm+" --best --strata "
+
+else:
+ s = "-v 3 -a --best --strata "
+ sm = "-v 3 -a --best --strata "
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+ syspath = syspath+ost[i].strip()
+ syspath = syspath+'/'
+
+syspathrs = os.getcwd()
+syspathrs = syspathrs+'/'
+
+os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt")
+
+os.system("cp "+seq_file+" "+syspathrs+"seq0.fa")
+
+if type_input == "fasta":
+ tp = 'fasta'
+if type_input == "fastq":
+ tp = 'fastq'
+
+k = 0
+
+if type_input == "fasta":
+ os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+if type_input == "fastq":
+ os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+
+while(True):
+ os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads
+ os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads
+ os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads
+ os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads
+ os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads
+ if k>0:
+ os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads
+ os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt")
+ os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt")
+ os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt")
+ os.system("rm "+syspathrs+"mapped"+str(k)+".sam")
+ os.system("rm "+syspathrs+"mapped"+str(k)+".txt")
+ os.system("rm "+syspathrs+"unmapped"+str(k)+".txt")
+ else:
+ os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt")
+
+ os.system("rm "+syspathrs+"unmapped"+str(k)+".bam")
+ os.system("rm "+syspathrs+"umapped"+str(k)+".bam")
+ os.system("rm "+syspathrs+"rmapped"+str(k)+".bam")
+ os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
+ os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence
+ os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped
+ os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file
+ os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
+ os.system("rm "+syspathrs+"runmapped"+str(k)+".txt")
+ os.system("rm "+syspathrs+"unmapped"+str(k)+".sam")
+
+ os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt")
+ c = read_sp_file(syspathrs+"count"+str(k+1)+".txt")
+ if c[0][0] == '0': #If no reads is in the sequence file, stop
+ os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
+ os.system("rm "+syspathrs+"seq"+str(k+1)+".fa")
+ break
+ os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
+ k = k+1
+ os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+
+
+ss = ""
+for i in range(0,k+1):
+ ss = ss+" "+syspathrs+"mapped"+str(i)+".bam"
+
+
+os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss)
+os.system("samtools sort "+syspathrs+"combine.bam sorted")
+os.system("samtools view -b -h sorted.bam > " + output_file)
+#print("samtools merge mapped_all.bam"+ss)
+os.system("rm "+syspathrs+"mapped*.bam")
+os.system("rm "+syspathrs+"combine.bam")
+os.system("rm "+syspathrs+"sorted.bam")
+os.system("rm "+syspathrs+"ref*")
+#os.system("rm -r "+syspathrs)
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/iterative_map.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.xml Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,104 @@
+
+ iteratively maps the raw reads of RNA structural data to the reference transcriptome
+
+ #if $mapping_file.type == "user"
+ iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best
+ #else
+ iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output
+ #end if
+
+
+ biopython
+ numpy
+ samtools
+ bowtie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Overview of StructureFold**
+
+* StructureFold is a series of software packages that automates the process of predicting RNA secondary structure for a transcript or an entire transcriptome, with or without the inclusion of constraints on the structure(s) provided by wet bench experimentation. The process consists of mapping the raw reads of RNA structural data on every transcript in the dataset to the transcriptome, getting RT stop counts on each nucleotide, calculating structural reactivities on the nucleotides, and predicting the RNA structures. Please cite: Tang, Y, Bouvier, E, Kwok CK, Ding Y, Nekrutenko, A, Bevilacqua PC, Assmann SM, StructureFold: Genome-wide RNA secondary structure mapping and reconstruction in vivo, Bioinformatics, In press. RNA structure is predicted using the RNAstructure algorithm (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/).
+
+-----
+
+**Function**
+
+* Iterative Mapping maps the raw reads of RNA structural data to the reference transcriptome using Bowtie (v0.12.8). It allows users to trim each read from either end to iteratively map the read to the reference transcriptome.
+
+-----
+
+**Input**:
+
+* 1. Sequence file type (FASTA/FASTQ)
+* 2. Sequence file (fasta/fastq format)
+* 3. Reference file (fasta) used to map the reads to
+* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
+* 5. “Length” (The minimum length of the reads for mapping after trimming)
+* [Optional]
+* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads)
+
+-----
+
+**Output**:
+
+A sorted .bam file with all of the reads that are mapped
+
+
+
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/map_ex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/map_ex.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if len(tl)>4:
+ if int(tl[1].strip())== 0:
+ h.write(tline)
+ h.write('\n')
+
+
+f.close();
+h.close()
+
+
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_file.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/read_file.pyc
Binary file Iterative_mapping/read_file.pyc has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/read_s_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_s_file.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_sp_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split(' ');
+ for i in range(0, len(tl)):
+ if len(tl[i].strip())>0:
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/read_s_file.pyc
Binary file Iterative_mapping/read_s_file.pyc has changed
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/remove_map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/remove_map.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+
+
+unmap_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+
+unmap = read_t_file(unmap_file)
+mapped = read_t_file(map_file)
+h = file(result_file, 'w')
+
+maps = set()
+for i in range(len(mapped)):
+ maps.add(mapped[i][0])
+
+
+for i in range(len(unmap)):
+ name = unmap[i][0]
+ if name not in maps:
+ h.write(name)
+ h.write('\n')
+
+
+h.close()
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/seq_track.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/seq_track.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+unmap_file = sys.argv[1]
+reads_file = sys.argv[2]
+result_file = sys.argv[3]
+tp = sys.argv[4]
+
+
+unmap = read_t_file(unmap_file);
+
+h = file(result_file, 'w')
+
+reads = SeqIO.parse(reads_file,tp)
+um = set()
+for i in range(0, len(unmap)):
+ id_r = unmap[i][0]
+ um.add(id_r)
+
+for read in reads:
+ if read.id in um:
+ h.write('>')
+ h.write(read.id)
+ h.write('\n')
+ h.write(read.seq.tostring())
+ h.write('\n')
+
+
+
+h.close()
+
+
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/truncate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/truncate.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from Bio import SeqIO
+
+fasta_file = sys.argv[1]
+shift_in = sys.argv[2]
+result_file = sys.argv[3]
+length = sys.argv[4]
+t_end = sys.argv[5]
+
+shift = int(shift_in)
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+h = file(result_file,'w')
+for seq in fasta_sequences:
+ nuc = seq.id;
+ sequence = seq.seq.tostring();
+ if (len(sequence)-shift)>=int(length):
+ h.write('>'+nuc)
+ h.write('\n')
+ if t_end == 'three_end':
+ h.write(sequence[0:(len(sequence)-shift)])
+ if t_end == 'five_end':
+ h.write(sequence[(shift):(len(sequence))])
+ h.write('\n')
+
+
+
+
+h.close()
+
+
+
+
diff -r aedb21527abd -r e269e4c6818e Iterative_mapping/unmap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/unmap.py Tue Apr 14 14:12:35 2015 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if len(tl)>4:
+ if int(tl[1].strip()) != 0:
+ h.write(tl[0].strip());
+ h.write('\n');
+
+
+f.close();
+h.close()
+
+
+
+
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._.DS_Store
Binary file structurefold/Iterative_mapping/._.DS_Store has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._iterative_map.py
Binary file structurefold/Iterative_mapping/._iterative_map.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._iterative_map.xml
Binary file structurefold/Iterative_mapping/._iterative_map.xml has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._map_ex.py
Binary file structurefold/Iterative_mapping/._map_ex.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._read_file.py
Binary file structurefold/Iterative_mapping/._read_file.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._read_s_file.py
Binary file structurefold/Iterative_mapping/._read_s_file.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._remove_map.py
Binary file structurefold/Iterative_mapping/._remove_map.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._seq_track.py
Binary file structurefold/Iterative_mapping/._seq_track.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._truncate.py
Binary file structurefold/Iterative_mapping/._truncate.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/._unmap.py
Binary file structurefold/Iterative_mapping/._unmap.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/iterative_map.py
--- a/structurefold/Iterative_mapping/iterative_map.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-import os
-from read_file import *
-from read_s_file import *
-import random
-import string
-
-type_input = sys.argv[1]
-seq_file = sys.argv[2]
-ref_file = sys.argv[3]
-shift = sys.argv[4]
-length = sys.argv[5]
-t_end = sys.argv[6]
-map_type = sys.argv[7]
-output_file = sys.argv[8]
-
-
-if map_type!="default":
- s = ""
- sm = ""
- s = s+"-v "+sys.argv[9]
- sm = sm+"-v "+sys.argv[9]
- sm = sm+" -5 "+sys.argv[10]
- sm = sm+" -3 "+sys.argv[11]
- s = s+" -k "+sys.argv[12]
- sm = sm+" -k "+sys.argv[12]
- if sys.argv[13]:
- s = s+" -a"
- sm = sm+" -a"
- if int(sys.argv[14])>=1:
- s = s+" -m "+sys.argv[14]
- sm = sm+" -m "+sys.argv[14]
- if sys.argv[15]:
- s = s+" --best --strata "
- sm = sm+" --best --strata "
-
-else:
- s = "-v 3 -a --best --strata "
- sm = "-v 3 -a --best --strata "
-
-ospath = os.path.realpath(sys.argv[0])
-ost = ospath.split('/')
-syspath = ""
-for i in range(len(ost)-1):
- syspath = syspath+ost[i].strip()
- syspath = syspath+'/'
-
-syspathrs = os.getcwd()
-syspathrs = syspathrs+'/'
-
-os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt")
-
-os.system("cp "+seq_file+" "+syspathrs+"seq0.fa")
-
-if type_input == "fasta":
- tp = 'fasta'
-if type_input == "fastq":
- tp = 'fastq'
-
-k = 0
-
-if type_input == "fasta":
- os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-if type_input == "fastq":
- os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-
-while(True):
- os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads
- os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads
- os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads
- os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads
- os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads
- if k>0:
- os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads
- os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt")
- os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt")
- os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt")
- os.system("rm "+syspathrs+"mapped"+str(k)+".sam")
- os.system("rm "+syspathrs+"mapped"+str(k)+".txt")
- os.system("rm "+syspathrs+"unmapped"+str(k)+".txt")
- else:
- os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt")
-
- os.system("rm "+syspathrs+"unmapped"+str(k)+".bam")
- os.system("rm "+syspathrs+"umapped"+str(k)+".bam")
- os.system("rm "+syspathrs+"rmapped"+str(k)+".bam")
- os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
- os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence
- os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped
- os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file
- os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
- os.system("rm "+syspathrs+"runmapped"+str(k)+".txt")
- os.system("rm "+syspathrs+"unmapped"+str(k)+".sam")
-
- os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt")
- c = read_sp_file(syspathrs+"count"+str(k+1)+".txt")
- if c[0][0] == '0': #If no reads is in the sequence file, stop
- os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
- os.system("rm "+syspathrs+"seq"+str(k+1)+".fa")
- break
- os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
- k = k+1
- os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-
-
-ss = ""
-for i in range(0,k+1):
- ss = ss+" "+syspathrs+"mapped"+str(i)+".bam"
-
-
-os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss)
-os.system("samtools sort "+syspathrs+"combine.bam sorted")
-os.system("samtools view -b -h sorted.bam > " + output_file)
-#print("samtools merge mapped_all.bam"+ss)
-os.system("rm "+syspathrs+"mapped*.bam")
-os.system("rm "+syspathrs+"combine.bam")
-os.system("rm "+syspathrs+"sorted.bam")
-os.system("rm "+syspathrs+"ref*")
-#os.system("rm -r "+syspathrs)
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/iterative_map.xml
--- a/structurefold/Iterative_mapping/iterative_map.xml Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-
- iteratively maps the raw reads of RNA structural data to the reference transcriptome
-
- #if $mapping_file.type == "user"
- iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best
- #else
- iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output
- #end if
-
-
- biopython
- numpy
- samtools
- bowtie
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**Overview of StructureFold**
-
-* StructureFold is a series of software packages that automates the process of predicting RNA secondary structure for a transcript or an entire transcriptome, with or without the inclusion of constraints on the structure(s) provided by wet bench experimentation. The process consists of mapping the raw reads of RNA structural data on every transcript in the dataset to the transcriptome, getting RT stop counts on each nucleotide, calculating structural reactivities on the nucleotides, and predicting the RNA structures. Please cite: Tang, Y, Bouvier, E, Kwok CK, Ding Y, Nekrutenko, A, Bevilacqua PC, Assmann SM, StructureFold: Genome-wide RNA secondary structure mapping and reconstruction in vivo, Bioinformatics, In press. RNA structure is predicted using the RNAstructure algorithm (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/).
-
------
-
-**Function**
-
-* Iterative Mapping maps the raw reads of RNA structural data to the reference transcriptome using Bowtie (v0.12.8). It allows users to trim each read from either end to iteratively map the read to the reference transcriptome.
-
------
-
-**Input**:
-
-* 1. Sequence file type (FASTA/FASTQ)
-* 2. Sequence file (fasta/fastq format)
-* 3. Reference file (fasta) used to map the reads to
-* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
-* 5. “Length” (The minimum length of the reads for mapping after trimming)
-* [Optional]
-* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads)
-
------
-
-**Output**:
-
-A sorted .bam file with all of the reads that are mapped
-
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/map_ex.py
--- a/structurefold/Iterative_mapping/map_ex.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-map_file = sys.argv[1]
-result_file = sys.argv[2]
-
-
-#reads = read_t_file(read_file);
-
-f = open(map_file);
-h = file(result_file, 'w')
-
-for aline in f.readlines():
- tline = aline.strip();
- tl = tline.split('\t');
- if len(tl)>4:
- if int(tl[1].strip())== 0:
- h.write(tline)
- h.write('\n')
-
-
-f.close();
-h.close()
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/read_file.py
--- a/structurefold/Iterative_mapping/read_file.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split('\t');
- for i in range(0, len(tl)):
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/read_file.pyc
Binary file structurefold/Iterative_mapping/read_file.pyc has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/read_s_file.py
--- a/structurefold/Iterative_mapping/read_s_file.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_sp_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split(' ');
- for i in range(0, len(tl)):
- if len(tl[i].strip())>0:
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/read_s_file.pyc
Binary file structurefold/Iterative_mapping/read_s_file.pyc has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/remove_map.py
--- a/structurefold/Iterative_mapping/remove_map.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-
-
-unmap_file = sys.argv[1]
-map_file = sys.argv[2]
-result_file = sys.argv[3]
-
-
-unmap = read_t_file(unmap_file)
-mapped = read_t_file(map_file)
-h = file(result_file, 'w')
-
-maps = set()
-for i in range(len(mapped)):
- maps.add(mapped[i][0])
-
-
-for i in range(len(unmap)):
- name = unmap[i][0]
- if name not in maps:
- h.write(name)
- h.write('\n')
-
-
-h.close()
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/seq_track.py
--- a/structurefold/Iterative_mapping/seq_track.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-unmap_file = sys.argv[1]
-reads_file = sys.argv[2]
-result_file = sys.argv[3]
-tp = sys.argv[4]
-
-
-unmap = read_t_file(unmap_file);
-
-h = file(result_file, 'w')
-
-reads = SeqIO.parse(reads_file,tp)
-um = set()
-for i in range(0, len(unmap)):
- id_r = unmap[i][0]
- um.add(id_r)
-
-for read in reads:
- if read.id in um:
- h.write('>')
- h.write(read.id)
- h.write('\n')
- h.write(read.seq.tostring())
- h.write('\n')
-
-
-
-h.close()
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/truncate.py
--- a/structurefold/Iterative_mapping/truncate.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from Bio import SeqIO
-
-fasta_file = sys.argv[1]
-shift_in = sys.argv[2]
-result_file = sys.argv[3]
-length = sys.argv[4]
-t_end = sys.argv[5]
-
-shift = int(shift_in)
-
-fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
-h = file(result_file,'w')
-for seq in fasta_sequences:
- nuc = seq.id;
- sequence = seq.seq.tostring();
- if (len(sequence)-shift)>=int(length):
- h.write('>'+nuc)
- h.write('\n')
- if t_end == 'three_end':
- h.write(sequence[0:(len(sequence)-shift)])
- if t_end == 'five_end':
- h.write(sequence[(shift):(len(sequence))])
- h.write('\n')
-
-
-
-
-h.close()
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/Iterative_mapping/unmap.py
--- a/structurefold/Iterative_mapping/unmap.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-map_file = sys.argv[1]
-result_file = sys.argv[2]
-
-
-#reads = read_t_file(read_file);
-
-f = open(map_file);
-h = file(result_file, 'w')
-
-for aline in f.readlines():
- tline = aline.strip();
- tl = tline.split('\t');
- if len(tl)>4:
- if int(tl[1].strip()) != 0:
- h.write(tl[0].strip());
- h.write('\n');
-
-
-f.close();
-h.close()
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/._.DS_Store
Binary file structurefold/get_reads/._.DS_Store has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/._get_read.py
Binary file structurefold/get_reads/._get_read.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/._get_read.xml
Binary file structurefold/get_reads/._get_read.xml has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/._read_file.py
Binary file structurefold/get_reads/._read_file.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/get_read.py
--- a/structurefold/get_reads/get_read.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from Bio import SeqIO
-import os
-from read_file import *
-import random
-import string
-
-fasta_file = sys.argv[1]
-map_file = sys.argv[2]
-result_file = sys.argv[3]
-
-syspathrs = os.getcwd()
-
-os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt")
-
-fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
-length_seq = {};
-for seq in fasta_sequences:
- nuc = seq.id;
- length_seq[nuc] = len(seq.seq.tostring());
-
-
-
-mapping = {}
-transcripts = []
-
-f = open(syspathrs+"map_info.txt");
-for aline in f.readlines():
- tline = aline.strip();
- tl = tline.split('\t');
- if tl[0].strip() not in transcripts:
- transcripts.append(tl[0].strip());
- mapping[tl[0].strip()] = [];
-
- mapping[tl[0].strip()].append(tl[1].strip());
-
-distribution = {};
-coverage = {};
-for transcript in length_seq:
- distribution[transcript] = [];
- for i in range(0, length_seq[transcript]):
- distribution[transcript].append(0);
- sum_count = float(0);
- if transcript in mapping:
- for j in range(0, len(mapping[transcript])):
- index = mapping[transcript][j];
- #count = reads[mapping[transcript][j][0]];
- sum_count = sum_count + 1;
- distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
- coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
- else:
- coverage[transcript] = 0
-
-
-
-
-
-h = file(result_file, 'w')
-for transcript in length_seq:
- h.write(transcript);
- h.write('\n')
- for i in range(0, length_seq[transcript]):
- h.write(str(distribution[transcript][i]))
- h.write('\t')
- h.write('\n')
- h.write('\n')
-
-#os.system("rm -r "+syspathrs)
-
-
-
-f.close();
-h.close()
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/get_read.xml
--- a/structurefold/get_reads/get_read.xml Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-
- derives the reverse transcriptase (RT) stop count on each nucleotide from a mapped file provided by the Iterative Mapping module
- get_read.py $lib_file $map_file $output
-
- biopython
- numpy
- samtools
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**Function**
-
-Get RT Stop Counts derives the RT stop counts on each nucleotide of each transcript in the reference transcriptome based on a mapped file (.bam), typically the output from the Iterative Mapping module.
-
------
-
-**Input**:
-
-* 1. A mapped (.bam) file from Bowtie (or any other mapping program)
-* 2. Reference library sequences (fasta) used to map the reads to
-
------
-
-**Output**:
-
-A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
-
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/read_file.py
--- a/structurefold/get_reads/read_file.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split('\t');
- for i in range(0, len(tl)):
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/get_reads/read_file.pyc
Binary file structurefold/get_reads/read_file.pyc has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._.DS_Store
Binary file structurefold/predict/._.DS_Store has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._ct_to_dot.py
Binary file structurefold/predict/._ct_to_dot.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._dot_convert.py
Binary file structurefold/predict/._dot_convert.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._parse_dis_pac.py
Binary file structurefold/predict/._parse_dis_pac.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._predict_RNAs.py
Binary file structurefold/predict/._predict_RNAs.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._read_file.py
Binary file structurefold/predict/._read_file.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/._rtts_plot.py
Binary file structurefold/predict/._rtts_plot.py has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/ct_to_dot.py
--- a/structurefold/predict/ct_to_dot.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-import shlex
-import os
-import subprocess
-from read_file import *
-
-ct_file = sys.argv[1]
-path = sys.argv[2]
-id_s = sys.argv[3]
-result_file = sys.argv[4]
-
-h = file(result_file, 'w')
-os.system('grep "'+id_s+'" '+ct_file+' |wc -l > '+path+'/count.txt')
-count = read_t_file(path+'/count.txt')
-comm = ''
-for i in range(int(count[0][0])):
- command = shlex.split('ct2dot %s %s %s' % (ct_file, str(i+1), os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
- subprocess.call(command)
- comm = comm +' '+path+'/db_file_'+str(i+1)+'.dbnn'
-
-
-
-os.system('cat'+comm+' > '+result_file)
-for i in range(int(count[0][0])):
- command = shlex.split('rm %s' % (os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
- subprocess.call(command)
-command = shlex.split('rm %s' % (os.path.join(path, 'count.txt')))
-subprocess.call(command)
-
-
-
-h.close()
-
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/dot_convert.py
--- a/structurefold/predict/dot_convert.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-dot_file = sys.argv[1]
-result_file = sys.argv[2]
-
-h = file(result_file, 'w')
-f = open(dot_file)
-
-
-
-for aline in f.readlines():
- line = aline.strip()
- if line.find('>')!=-1:
- id_line = line
- idt = id_line.split('>')
- ids = idt[1].strip()
- else:
- if line.find('(')!=-1:
- structure_line = line
- st = structure_line.split(' ')
- structure = st[0].strip()
- enert = st[1].strip()
- if len(enert)>1:
- enertt = enert.split('(')
- enertt = enertt[1].strip()
- else:
- enertt = st[2].strip()
- enerttt = enertt.split(')')
- ener = enerttt[0].strip()
- h.write('>ENERGY = '+ener+' '+ids+'\n')
- h.write(seq+'\n')
- h.write(structure+'\n')
- else:
- seq = line
-
-
-
-
-
-f.close()
-h.close()
-
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/parse_dis_pac.py
--- a/structurefold/predict/parse_dis_pac.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#parse reactivity file into a dictionary
-
-import sys
-
-def parse_dist(in_file):
- result = []
- distribution = {}
- name = []
- f = open(in_file)
- for aline in f.readlines():
- line = aline.strip()
- dis = line.strip()
- dist = dis.split('\t') #split the line and the reactivites or reads are in a list
- if len(dist) > 0:
- if len(dist) == 1:
- if dist[0].strip().find('coverage')==-1:
- name.append(line) #add the name in the name list
- flag = 1
- t_name = line
- else:
- distri = []
- for i in range(0, len(dist)):
- distri.append(dist[i].strip())
- distribution[t_name] = distri #add the list of reactivities into a dictionary
- result.append(name)
- result.append(distribution) #Output the dictionary
- f.close()
- return result
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/parse_dis_pac.pyc
Binary file structurefold/predict/parse_dis_pac.pyc has changed
diff -r aedb21527abd -r e269e4c6818e structurefold/predict/predict_RNAs.py
--- a/structurefold/predict/predict_RNAs.py Tue Apr 14 14:09:42 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,219 +0,0 @@
-#RNA structure prediction & Output and illustrate reactivities
-
-import sys
-import shlex
-import subprocess
-import tarfile
-from parse_dis_pac import *
-from read_file import *
-from Bio import SeqIO
-import os
-from rtts_plot import *
-import random
-import string
-
-
-id_file = sys.argv[1]
-seq_file = sys.argv[2]
-predict_type = sys.argv[3]
-temperature = sys.argv[4]
-predict_program = sys.argv[5]
-output_html = sys.argv[6]
-output_directory = sys.argv[7]
-
-
-
-flag = False
-if predict_type!='silico': #input reactivity file if provided
- if predict_program == 'rs':
- react_file = sys.argv[8]
- slope = sys.argv[9]
- intercept = sys.argv[10]
- else:
- react_file = sys.argv[8]
- thres_h = sys.argv[9]
- thres_h = float(thres_h)
- thres_l = sys.argv[10]
- thres_l = float(thres_l)
- gqs = sys.argv[11]
- gqs = int(gqs)
-
- react = parse_dist(react_file)
- react = react[1]
- flag = True
-else:
- if predict_program!='rs':
- gqs = sys.argv[8]
- gqs = int(gqs)
-
-
-ospath = os.path.realpath(sys.argv[0])
-ost = ospath.split('/')
-syspathpt = ""
-for i in range(len(ost)-1):
- syspathpt = syspathpt+ost[i].strip()
- syspathpt = syspathpt+'/'
-
-
-syspath = os.getcwd()
-
-ids = read_t_file(id_file)
-sequences = SeqIO.parse(seq_file, 'fasta')
-
-
-seqs = {}
-for seq in sequences:
- seqs[seq.id] = seq.seq.tostring()
-
-if len(ids)>100: #setup a limit of the number of sequence to be predicted
- print("Number of sequences exceeds limitation!")
- sys.exit(0)
-
-
-#predict RNA structures
-
-os.mkdir(output_directory)
-flag3 = 0
-
-id_predicted = set()
-for i in range(len(ids)):
- flag2 = 0
- id_s = ids[i][0]
- #print(id_s)
- #Put RNA sequence and reactivities into files
- if id_s in seqs:
- fh = file(os.path.join(syspath,"temp.txt"), 'w')
- fh.write('>'+id_s)
- fh.write('\n')
- fh.write(seqs[id_s])
- fh.close()
- if not flag:
- if predict_program == 'rs':
- command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
- subprocess.call(command)
- command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
- subprocess.call(command)
- else:
- if gqs:
- os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
-
- else:
- os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
- command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
- subprocess.call(command)
- if not gqs:
- command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
- else:
- command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
- subprocess.call(command)
- command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
- subprocess.call(command)
- else:
- if id_s in react:
- fh = file(os.path.join(syspath, "constraint.txt"), 'w')
- make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
- if predict_program == 'rs':
- for j in range(0, (len(react[id_s]))):
- if react[id_s][j]!='NA':
- fh.write(str(j+1))
- fh.write('\t')
- fh.write(str(react[id_s][j]))
- fh.write('\n')
- fh.close()
- command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
- os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
- os.path.join(output_directory, "%s.ct" % id_s)))
- subprocess.call(command)
- command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
- subprocess.call(command)
- else:
- fh.write('>'+id_s)
- fh.write('\n')
- fh.write(seqs[id_s])
- fh.write('\n')
- for j in range(0, (len(react[id_s]))):
- if react[id_s][j]!='NA':
- re = float(react[id_s][j])
- if re>thres_h:
- fh.write('x')
- else:
- if re '+output_directory+'/'+id_s+'.dbnb')
-
- else:
- os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
- command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
- subprocess.call(command)
- if not gqs:
- command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
- else:
- command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
- subprocess.call(command)
- command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
- subprocess.call(command)
-
- else:
- print(id_s+" not in the data of react!")
- flag2 = 1
- if flag2 == 0:
- if predict_program == 'rs':
- command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
- subprocess.call(command)
- command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
- subprocess.call(command)
- else:
- if not gqs:
- command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
- subprocess.call(command)
- command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
- subprocess.call(command)
- flag3 = 1
- id_predicted.add(id_s)
- else:
- print(id_s+" not in the data of sequences!")
-
-#Remove the unnecessary files
-if flag3 == 1:
-
- tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:')
- for filename in os.listdir(output_directory):
- filepath = os.path.join(output_directory, filename)
- print filepath
- tarball.add(filepath, arcname=filename)
- #print os.listdir(syspath)
- #print os.listdir(output_directory)
- # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
- tarball.close()
-
- h = open(output_html, 'wb' )
- h.write('
Results of RNA structure prediction
\n')
-
- h.write('
\n')
- h.write('Click here to download the compressed file containing all prediction results.\n' % (('prediction_results.tar')))
- #h.write('<\p>\n')
- h.write('