# HG changeset patch
# User tyty
# Date 1429034982 14400
# Node ID aedb21527abdb2c4f0ee7190e0560c8fbf795ede
# Parent  87ec0ecdc2afcdf0e7f3bf95b57411aaf06f051c
Uploaded

diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._.DS_Store
Binary file Iterative_mapping/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._iterative_map.py
Binary file Iterative_mapping/._iterative_map.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._iterative_map.xml
Binary file Iterative_mapping/._iterative_map.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._map_ex.py
Binary file Iterative_mapping/._map_ex.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._read_file.py
Binary file Iterative_mapping/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._read_s_file.py
Binary file Iterative_mapping/._read_s_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._remove_map.py
Binary file Iterative_mapping/._remove_map.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._seq_track.py
Binary file Iterative_mapping/._seq_track.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._truncate.py
Binary file Iterative_mapping/._truncate.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/._unmap.py
Binary file Iterative_mapping/._unmap.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/iterative_map.py
--- a/Iterative_mapping/iterative_map.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-import os
-from read_file import *
-from read_s_file import *
-import random
-import string
-
-type_input = sys.argv[1]
-seq_file = sys.argv[2]
-ref_file = sys.argv[3]
-shift = sys.argv[4]
-length = sys.argv[5]
-t_end = sys.argv[6]
-map_type = sys.argv[7]
-output_file = sys.argv[8]
-
-
-if map_type!="default":
-    s = ""
-    sm = ""
-    s = s+"-v "+sys.argv[9]
-    sm = sm+"-v "+sys.argv[9]
-    sm = sm+" -5 "+sys.argv[10]
-    sm = sm+" -3 "+sys.argv[11]
-    s = s+" -k "+sys.argv[12]
-    sm = sm+" -k "+sys.argv[12]
-    if sys.argv[13]:
-        s = s+" -a"
-        sm = sm+" -a"
-    if int(sys.argv[14])>=1:
-        s = s+" -m "+sys.argv[14]
-        sm = sm+" -m "+sys.argv[14]
-    if sys.argv[15]:
-        s = s+" --best --strata "
-        sm = sm+" --best --strata "
-    
-else:
-    s = "-v 3 -a --best --strata "
-    sm = "-v 3 -a --best --strata "
-
-ospath = os.path.realpath(sys.argv[0])
-ost = ospath.split('/')
-syspath = ""
-for i in range(len(ost)-1):
-    syspath = syspath+ost[i].strip()
-    syspath = syspath+'/'
-
-syspathrs = os.getcwd()
-syspathrs = syspathrs+'/'
-
-os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt")
-
-os.system("cp "+seq_file+" "+syspathrs+"seq0.fa")
-
-if type_input == "fasta":
-    tp = 'fasta'
-if type_input == "fastq":
-    tp = 'fastq'
-
-k = 0
-
-if type_input == "fasta":
-    os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-if type_input == "fastq":
-    os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-
-while(True):
-    os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads
-    os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads
-    os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads
-    os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads
-    os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads
-    if k>0:
-        os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads
-        os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt")
-        os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt")
-        os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt")
-        os.system("rm "+syspathrs+"mapped"+str(k)+".sam")
-        os.system("rm "+syspathrs+"mapped"+str(k)+".txt")
-        os.system("rm "+syspathrs+"unmapped"+str(k)+".txt")
-    else:
-        os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt")
-    
-    os.system("rm "+syspathrs+"unmapped"+str(k)+".bam")
-    os.system("rm "+syspathrs+"umapped"+str(k)+".bam")
-    os.system("rm "+syspathrs+"rmapped"+str(k)+".bam")
-    os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
-    os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence
-    os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped
-    os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file
-    os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
-    os.system("rm "+syspathrs+"runmapped"+str(k)+".txt")
-    os.system("rm "+syspathrs+"unmapped"+str(k)+".sam")
-    
-    os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt")
-    c = read_sp_file(syspathrs+"count"+str(k+1)+".txt")
-    if c[0][0] == '0': #If no reads is in the sequence file, stop
-        os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
-        os.system("rm "+syspathrs+"seq"+str(k+1)+".fa")
-        break
-    os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
-    k = k+1
-    os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
-
-
-ss = ""
-for i in range(0,k+1):
-    ss = ss+" "+syspathrs+"mapped"+str(i)+".bam"
-
-
-os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss)
-os.system("samtools sort "+syspathrs+"combine.bam sorted")
-os.system("samtools view -b -h sorted.bam > " + output_file)
-#print("samtools merge mapped_all.bam"+ss)
-os.system("rm "+syspathrs+"mapped*.bam")
-os.system("rm "+syspathrs+"combine.bam")
-os.system("rm "+syspathrs+"sorted.bam")
-os.system("rm "+syspathrs+"ref*")
-#os.system("rm -r "+syspathrs)
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/iterative_map.xml
--- a/Iterative_mapping/iterative_map.xml	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-<tool id="iterative_map_pipeline" name="Iterative Mapping" version="1.0">
-	<description>iteratively maps the raw reads of RNA structural data to the reference transcriptome</description>
-	<command interpreter="python">
-        #if $mapping_file.type == "user"
-            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best  
-        #else
-            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output
-        #end if
-    </command>
-        <requirements>
-                <requirement type="package" version="1.61">biopython</requirement>
-                <requirement type="package" version="1.7.1">numpy</requirement>
-                <requirement type="package" version="0.1.18">samtools</requirement>
-                <requirement type="package" version="0.12.7">bowtie</requirement>
-        </requirements>
-	<inputs>
-                <conditional name="file_format">
-                  <param name="type" type="select" label="File format of the reads (Default FASTQ)">
-                    <option value="fastq">FASTQ</option>
-                    <option value="fasta">FASTA</option>
-                  </param>
-                  <when value="fastq">
-                    <param name="seq_file" type="data" format="fastq" label="Fastq file"/>
-                  </when>
-                  <when value="fasta">
-                    <param name="seq_file" type="data" format="fasta" label="Fasta file"/>
-                  </when>
-                </conditional>
-		        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
-                <param name="shift" type="integer" value="1" label="Number of nucleotides trimmed each round"/>
-                <param name="length" type="integer" value="21" label="Minimum requirement of read length for mapping"/>
-                <param name="t_end" type="select" label="Trimming end">
-                    <option value="five_end">5' end</option>
-                    <option value="three_end">3' end</option>
-                </param>
-                
-                <conditional name="mapping_file">
-                  <param name="type" type="select" label="Bowtie mapping flags (Default -v 0 -a --best --strata)">
-                    <option value="default">Default</option>
-                    <option value="user">User specified</option>
-                  </param>
-                  <when value="default"/>
-                  <when value="user"> 
-                    <param name="param_v" type="integer" value="0" label="Number of mismatches for SOAP-like alignment policy (-v)"/>
-                    <param name="param_five" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)"/>
-                    <param name="param_three" type="integer" value="0" label="Trim n bases from high-quality (right) end of each read before alignment (-3)"/>
-                    <param name="param_k" type="integer" value="1" label="Report up to n valid alignments per read (-k)"/>
-                    <param name="param_a" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to report all valid alignments per read (-a)"/>
-                    <param name="param_m" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m), -1 for unlimited"/>
-                    <param name="param_best" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best --strata)"/>
-                  </when>
-                </conditional>
-
-	</inputs>
-	<outputs>
-		<data name="output" type="data" format="bam"/>
-	</outputs>
-    <tests>
-        <test>
-            <param name="file_format.type" value="fasta" />
-            <param name="file_format.seq_file" value="sample.fasta" />
-	        <param name="reference_file" value="rRNA.txt" />
-            <param name="shift" value="1" />
-            <param name="length" value="21" />
-            <param name="mapping_file.type" value="default" />
-	        <output name="output" file="mapped.out" />
-        </test>
-    </tests>
-
-	<help>
-
-
-**Overview of StructureFold**
-
-* StructureFold is a series of software packages that automates the process of predicting RNA secondary structure for a transcript or an entire transcriptome, with or without the inclusion of constraints on the structure(s) provided by wet bench experimentation. The process consists of mapping the raw reads of RNA structural data on every transcript in the dataset to the transcriptome, getting RT stop counts on each nucleotide, calculating structural reactivities on the nucleotides, and predicting the RNA structures. Please cite: Tang, Y, Bouvier, E, Kwok CK, Ding Y, Nekrutenko, A, Bevilacqua PC, Assmann SM, StructureFold: Genome-wide RNA secondary structure mapping and reconstruction in vivo, submitted. RNA structure is predicted using the RNAstructure algorithm (http://rna.urmc.rochester.edu/RNAstructure.html).
-
------
-
-**Function**
-
-* Iterative Mapping maps the raw reads of RNA structural data to the reference transcriptome using Bowtie (v0.12.8). It allows users to trim each read from either end to iteratively map the read to the reference transcriptome. 
-
------
-
-**Input**:
-
-* 1. Sequence file type (FASTA/FASTQ)
-* 2. Sequence file (fasta/fastq format)
-* 3. Reference file (fasta) used to map the reads to
-* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
-* 5. “Length” (The minimum length of the reads for mapping after trimming)
-* [Optional]
-* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads)
-
------
-
-**Output**:
-
-A sorted .bam file with all of the reads that are mapped
-
-
-
-	</help>
-</tool>
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/map_ex.py
--- a/Iterative_mapping/map_ex.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-map_file = sys.argv[1]
-result_file = sys.argv[2]
-
-
-#reads = read_t_file(read_file);
-
-f = open(map_file);
-h = file(result_file, 'w')
-
-for aline in f.readlines():
-    tline = aline.strip();
-    tl = tline.split('\t');
-    if len(tl)>4:
-        if int(tl[1].strip())== 0:
-            h.write(tline)
-            h.write('\n')
-
-
-f.close();
-h.close()
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/read_file.py
--- a/Iterative_mapping/read_file.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split('\t');
-        for i in range(0, len(tl)):
-            temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/read_file.pyc
Binary file Iterative_mapping/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/read_s_file.py
--- a/Iterative_mapping/read_s_file.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_sp_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split(' ');
-        for i in range(0, len(tl)):
-            if len(tl[i].strip())>0:
-                temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/read_s_file.pyc
Binary file Iterative_mapping/read_s_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/remove_map.py
--- a/Iterative_mapping/remove_map.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-
-
-unmap_file = sys.argv[1]
-map_file = sys.argv[2]
-result_file = sys.argv[3]
-
-
-unmap = read_t_file(unmap_file)
-mapped = read_t_file(map_file)
-h = file(result_file, 'w')
-
-maps = set()
-for i in range(len(mapped)):
-    maps.add(mapped[i][0])
-
-
-for i in range(len(unmap)):
-    name = unmap[i][0]
-    if name not in maps:
-        h.write(name)
-        h.write('\n')
-
-
-h.close()
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/seq_track.py
--- a/Iterative_mapping/seq_track.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-unmap_file = sys.argv[1]
-reads_file = sys.argv[2]
-result_file = sys.argv[3]
-tp = sys.argv[4]
-
-
-unmap = read_t_file(unmap_file);
-
-h = file(result_file, 'w')
-
-reads = SeqIO.parse(reads_file,tp)
-um = set()
-for i in range(0, len(unmap)):
-    id_r = unmap[i][0]
-    um.add(id_r)
-
-for read in reads:
-    if read.id in um:
-        h.write('>')
-        h.write(read.id)
-        h.write('\n')
-        h.write(read.seq.tostring())
-        h.write('\n')
-    
-
-
-h.close()
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/truncate.py
--- a/Iterative_mapping/truncate.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from Bio import SeqIO
-
-fasta_file = sys.argv[1]
-shift_in = sys.argv[2]
-result_file = sys.argv[3]
-length = sys.argv[4]
-t_end = sys.argv[5]
-
-shift = int(shift_in)
-    
-fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
-h = file(result_file,'w')
-for seq in fasta_sequences:
-        nuc = seq.id;
-        sequence = seq.seq.tostring();
-        if (len(sequence)-shift)>=int(length):
-                h.write('>'+nuc)
-                h.write('\n')
-                if t_end == 'three_end':
-                        h.write(sequence[0:(len(sequence)-shift)])
-                if t_end == 'five_end':
-                        h.write(sequence[(shift):(len(sequence))])
-                h.write('\n')
-
-
-
-
-h.close()
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd Iterative_mapping/unmap.py
--- a/Iterative_mapping/unmap.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from read_file import *
-from Bio import SeqIO
-
-map_file = sys.argv[1]
-result_file = sys.argv[2]
-
-
-#reads = read_t_file(read_file);
-
-f = open(map_file);
-h = file(result_file, 'w')
-
-for aline in f.readlines():
-    tline = aline.strip();
-    tl = tline.split('\t');
-    if len(tl)>4:
-        if int(tl[1].strip()) != 0:
-            h.write(tl[0].strip());
-            h.write('\n');
-
-
-f.close();
-h.close()
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/._.DS_Store
Binary file get_reads/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/._get_read.py
Binary file get_reads/._get_read.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/._get_read.xml
Binary file get_reads/._get_read.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/._read_file.py
Binary file get_reads/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/get_read.py
--- a/get_reads/get_read.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-from Bio import SeqIO
-import os
-from read_file import *
-import random
-import string
-
-fasta_file = sys.argv[1]
-map_file = sys.argv[2]
-result_file = sys.argv[3]
-
-syspathrs = os.getcwd()
-
-os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") 
-
-fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
-length_seq = {};
-for seq in fasta_sequences:
-        nuc = seq.id;
-        length_seq[nuc] = len(seq.seq.tostring());
-
-
-
-mapping = {}
-transcripts = []
-
-f = open(syspathrs+"map_info.txt");
-for aline in f.readlines():
-    tline = aline.strip();
-    tl = tline.split('\t');
-    if tl[0].strip() not in transcripts:
-        transcripts.append(tl[0].strip());
-        mapping[tl[0].strip()] = [];
-
-    mapping[tl[0].strip()].append(tl[1].strip());
-
-distribution = {};
-coverage = {};
-for transcript in length_seq:
-    distribution[transcript] = [];
-    for i in range(0, length_seq[transcript]):
-        distribution[transcript].append(0);
-    sum_count = float(0);
-    if transcript in mapping:
-        for j in range(0, len(mapping[transcript])):
-            index = mapping[transcript][j];
-            #count = reads[mapping[transcript][j][0]];
-            sum_count = sum_count + 1;
-            distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
-            coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
-    else:
-        coverage[transcript] = 0
-
-        
-        
-    
-
-h = file(result_file, 'w')
-for transcript in length_seq:
-    h.write(transcript);
-    h.write('\n')
-    for i in range(0, length_seq[transcript]):
-        h.write(str(distribution[transcript][i]))
-        h.write('\t')
-    h.write('\n')
-    h.write('\n')
-
-#os.system("rm -r "+syspathrs)
-
-    
-
-f.close();
-h.close()
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/get_read.xml
--- a/get_reads/get_read.xml	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-<tool id="get_read_pipeline" name="Get RT Stop Counts" version="1.0">
-	<description>derives the reverse transcriptase (RT) stop count on each nucleotide from a mapped file provided by the Iterative Mapping module</description>
-	<command interpreter="python">get_read.py $lib_file $map_file $output </command>
-        <requirements>
-                <requirement type="package" version="1.61">biopython</requirement>
-                <requirement type="package" version="1.7.1">numpy</requirement>
-                <requirement type="package" version="0.1.18">samtools</requirement>
-        </requirements>
-	<inputs>
-                <param name="lib_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
-		<param name="map_file" type="data" format="bam" label="Mapped file"/>
-	</inputs>
-	<outputs>
-		<data name="output" format="txt"/>
-	</outputs>
-    <tests>
-        <test>
-            <param name="lib_file" value="test.bam" />
-	        <param name="map_file" value="com_rna.txt" />
-	        <output name="output" file="get_RT_stop_test.out" /> 
-        </test>
-    </tests>
-	<help>
-
-
-**Function**
-
-Get RT Stop Counts derives the RT stop counts on each nucleotide of each transcript in the reference transcriptome based on a mapped file (.bam), typically the output from the Iterative Mapping module.
-
------
-
-**Input**:
-
-* 1. A mapped (.bam) file from Bowtie (or any other mapping program)
-* 2. Reference library sequences (fasta) used to map the reads to
-
------
-
-**Output**:
-
-A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
-
-
-
-	</help>
-</tool>
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/read_file.py
--- a/get_reads/read_file.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split('\t');
-        for i in range(0, len(tl)):
-            temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
diff -r 87ec0ecdc2af -r aedb21527abd get_reads/read_file.pyc
Binary file get_reads/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._.DS_Store
Binary file predict/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._ct_to_dot.py
Binary file predict/._ct_to_dot.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._dot_convert.py
Binary file predict/._dot_convert.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._parse_dis_pac.py
Binary file predict/._parse_dis_pac.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._predict_RNAs.py
Binary file predict/._predict_RNAs.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._read_file.py
Binary file predict/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/._rtts_plot.py
Binary file predict/._rtts_plot.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/ct_to_dot.py
--- a/predict/ct_to_dot.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-import shlex
-import os
-import subprocess
-from read_file import *
-
-ct_file = sys.argv[1]
-path = sys.argv[2]
-id_s = sys.argv[3]
-result_file = sys.argv[4]
-
-h = file(result_file, 'w')
-os.system('grep "'+id_s+'" '+ct_file+' |wc -l > '+path+'/count.txt')
-count = read_t_file(path+'/count.txt')
-comm = ''
-for i in range(int(count[0][0])):
-    command = shlex.split('ct2dot %s %s %s' % (ct_file, str(i+1), os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
-    subprocess.call(command)
-    comm = comm +' '+path+'/db_file_'+str(i+1)+'.dbnn' 
-
-
-
-os.system('cat'+comm+' > '+result_file)
-for i in range(int(count[0][0])):
-    command = shlex.split('rm %s' % (os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
-    subprocess.call(command)
-command = shlex.split('rm %s' % (os.path.join(path, 'count.txt')))
-subprocess.call(command)
-    
-
-
-h.close()
-
diff -r 87ec0ecdc2af -r aedb21527abd predict/dot_convert.py
--- a/predict/dot_convert.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-dot_file = sys.argv[1]
-result_file = sys.argv[2]
-
-h = file(result_file, 'w')
-f = open(dot_file)
-
-
-
-for aline in f.readlines():
-    line = aline.strip()
-    if line.find('>')!=-1:
-        id_line = line
-        idt = id_line.split('>')
-        ids = idt[1].strip()
-    else:
-        if line.find('(')!=-1:
-            structure_line = line
-            st = structure_line.split(' ')
-            structure = st[0].strip()
-            enert = st[1].strip()
-            if len(enert)>1:
-                enertt = enert.split('(')
-                enertt = enertt[1].strip()
-            else:
-                enertt = st[2].strip()
-            enerttt = enertt.split(')')
-            ener = enerttt[0].strip()
-            h.write('>ENERGY = '+ener+'  '+ids+'\n')
-            h.write(seq+'\n')
-            h.write(structure+'\n')
-        else:
-            seq = line
-
-
-    
-
-
-f.close()
-h.close()
-
diff -r 87ec0ecdc2af -r aedb21527abd predict/parse_dis_pac.py
--- a/predict/parse_dis_pac.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#parse reactivity file into a dictionary
-
-import sys
-
-def parse_dist(in_file):
-    result = []
-    distribution = {}
-    name = []
-    f = open(in_file)
-    for aline in f.readlines():
-        line = aline.strip()
-        dis = line.strip()
-        dist = dis.split('\t') #split the line and the reactivites or reads are in a list
-        if len(dist) > 0:
-            if len(dist) == 1:
-                if dist[0].strip().find('coverage')==-1:
-                    name.append(line) #add the name in the name list
-                    flag = 1
-                    t_name = line
-            else:
-                distri = []
-                for i in range(0, len(dist)):
-                    distri.append(dist[i].strip())
-                distribution[t_name] = distri #add the list of reactivities into a dictionary
-    result.append(name)
-    result.append(distribution) #Output the dictionary
-    f.close()
-    return result
-                
-                
-
-
-
-
-
-
-
-        
-
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd predict/parse_dis_pac.pyc
Binary file predict/parse_dis_pac.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/predict_RNAs.py
--- a/predict/predict_RNAs.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,189 +0,0 @@
-#RNA structure prediction & Output and illustrate reactivities
-
-import sys
-import shlex
-import subprocess
-import tarfile
-from parse_dis_pac import *
-from read_file import *
-from Bio import SeqIO
-import os
-from rtts_plot import *
-import random
-import string
-
-
-id_file = sys.argv[1]
-seq_file = sys.argv[2]
-predict_type = sys.argv[3]
-temperature = sys.argv[4]
-predict_program = sys.argv[5]
-output_file = sys.argv[6]
-
-
-flag = False
-if predict_type!='silico': #input reactivity file if provided
-    if predict_program == 'rs':
-        react_file = sys.argv[7]
-        slope = sys.argv[8]
-        intercept = sys.argv[9]
-    else:
-        react_file = sys.argv[7]
-        thres_h = sys.argv[8]
-        thres_h = float(thres_h)
-        thres_l = sys.argv[9]
-        thres_l = float(thres_l)
-        gqs = sys.argv[10]
-        gqs = int(gqs)
-        
-    react = parse_dist(react_file)
-    react = react[1]
-    flag = True
-else:
-    if predict_program!='rs':
-        gqs = sys.argv[7]
-        gqs = int(gqs)
-
-
-ospath = os.path.realpath(sys.argv[0])
-ost = ospath.split('/')
-syspathpt = ""
-for i in range(len(ost)-1):
-    syspathpt = syspathpt+ost[i].strip()
-    syspathpt = syspathpt+'/'
-
-
-syspath = os.getcwd()
-
-ids = read_t_file(id_file)
-sequences = SeqIO.parse(seq_file, 'fasta')
-
-
-seqs = {}
-for seq in sequences:
-    seqs[seq.id] = seq.seq.tostring()
-
-if len(ids)>100: #setup a limit of the number of sequence to be predicted
-    print("Number of sequences exceeds limitation!")
-    sys.exit(0)
-    
-
-#predict RNA structures
-output_directory = os.path.join(syspath, "output_files")
-if not os.path.exists(output_directory):
-    os.makedirs(output_directory)
-flag3 = 0
-for i in range(len(ids)):
-    flag2 = 0
-    id_s = ids[i][0]
-    #print(id_s)
-    #Put RNA sequence and reactivities into files
-    if id_s in seqs:
-        fh = file(os.path.join(syspath,"temp.txt"), 'w')        
-        fh.write('>'+id_s)
-        fh.write('\n')
-        fh.write(seqs[id_s])
-        fh.close()
-        if not flag:
-            if predict_program == 'rs':
-                command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
-                subprocess.call(command)
-                command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
-                subprocess.call(command)               
-            else:
-                if gqs:
-                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
-                    
-                else:
-                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
-                command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
-                subprocess.call(command)
-                if not gqs:
-                    command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
-                else:
-                    command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
-                subprocess.call(command)
-                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
-                subprocess.call(command)
-        else:
-            if id_s in react:
-                fh = file(os.path.join(syspath, "constraint.txt"), 'w')
-                make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
-                if predict_program == 'rs': 
-                    for j in range(0, (len(react[id_s]))):
-                        if react[id_s][j]!='NA':
-                            fh.write(str(j+1))
-                            fh.write('\t')
-                            fh.write(str(react[id_s][j]))
-                            fh.write('\n')
-                    fh.close()
-                    command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), 
-                                                                 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, 
-                                                                 os.path.join(output_directory, "%s.ct" % id_s)))
-                    subprocess.call(command)
-                    command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
-                    subprocess.call(command)
-                else:
-                    fh.write('>'+id_s)
-                    fh.write('\n')
-                    fh.write(seqs[id_s])
-                    fh.write('\n')
-                    for j in range(0, (len(react[id_s]))):
-                        if react[id_s][j]!='NA':
-                            re = float(react[id_s][j])
-                            if re>thres_h:
-                                fh.write('x')
-                            else:
-                                if re<thres_l:
-                                    fh.write('|')
-                                else:
-                                    fh.write('.')
-                        else:
-                            fh.write('.')
-                    fh.write('.')
-                    fh.close()
-                    if gqs:
-                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
-                        
-                    else:
-                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
-                    command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
-                    subprocess.call(command)
-                    if not gqs:
-                        command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
-                    else:
-                        command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
-                    subprocess.call(command)
-                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
-                    subprocess.call(command)                  
-
-            else:
-                print(id_s+" not in the data of react!")
-                flag2 = 1
-        if flag2 == 0:
-            if predict_program == 'rs':
-                command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
-                subprocess.call(command)
-                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
-                subprocess.call(command)
-            else:
-                if not gqs:
-                    command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
-                    subprocess.call(command)
-                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
-                    subprocess.call(command)
-            flag3 = 1
-    else:
-        print(id_s+" not in the data of sequences!")
-
-#Remove the unnecessary files
-if flag3 == 1:
-    tarball = tarfile.open(output_file, 'w:')
-    for filename in os.listdir(output_directory):
-        filepath = os.path.join(output_directory, filename)
-        print filepath
-        tarball.add(filepath, arcname=filename)
-    #print os.listdir(syspath)
-    #print os.listdir(output_directory)
-    # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
-    tarball.close()
diff -r 87ec0ecdc2af -r aedb21527abd predict/predict_RNAs.xml
--- a/predict/predict_RNAs.xml	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0">
-	<description>predict RNA structures with or without experimental constraints from the Reactivity Calculation module</description>
-	<command interpreter="python">
-        #if $program.wh == "rs"
-            #if $program.rs_reactivity.type == "restraint"
-                predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output $program.rs_reactivity.reactivity_file $program.rs_reactivity.slope $program.rs_reactivity.intercept
-            #else
-                predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output
-            #end if
-        #else
-            #if $program.vp_reactivity.type == "restraint"
-                predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $program.vp_reactivity.reactivity_file $program.vp_reactivity.threshold_high $program.vp_reactivity.threshold_low $program.gqs
-            #else
-                predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $program.gqs
-            #end if
-        #end if
-    </command>
-        <stdio>
-            <exit_code range="1:" />
-            <exit_code range=":-1" />
-            <regex match="Error:" />
-            <regex match="Exception:" />
-        </stdio>
-        <requirements>
-                <requirement type="package" version="5.7">rnastructure</requirement>
-                <requirement type="package" version="1.61">biopython</requirement>
-                <requirement type="package" version="1.7.1">numpy</requirement>
-                <requirement type="package" version="1.1.7">imaging</requirement>
-                <requirement type="package" version="1.2.1">matplotlib</requirement>
-                <requirement type="package" version="2.1">vienna_rna</requirement>
-        </requirements>
-	<inputs>
-        <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/>
-        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
-        <param name="temperature" type="float" value="310.15" label="Temperature (K)"/>
-        <conditional name="program">
-            <param name="wh" type="select" label="Program for RNA structure prediction">
-                <option value="rs">RNAstructure</option>
-                <option value="vp">ViennaRNA Package</option>
-            </param>
-            <when value="rs">
-                <conditional name="rs_reactivity">
-                    <param name="type" type="select" label="RNA structure prediction type">
-                        <option value="silico">In silico</option>
-                        <option value="restraint">With experimental restraints</option>
-                    </param>
-                    <when value="silico"/>
-                    <when value="restraint">
-                        <param name="reactivity_file" type="data" label="Reactivity file"/>
-                        <param name="slope" type="float" value="1.8" label="Slope used with structural restraints"/>
-                        <param name="intercept" type="float" value="-0.6" label="Intercept used with structural restraints"/>
-                    </when>
-                </conditional>
-            </when>
-            <when value="vp">
-                <conditional name="vp_reactivity">
-                    <param name="type" type="select" label="RNA structure prediction type">
-                        <option value="silico">In silico</option>
-                        <option value="restraint">With experimental restraints</option>
-                    </param>
-                    <when value="silico"/>
-                    <when value="restraint">
-                        <param name="reactivity_file" type="data" label="Reactivity file"/>
-                        <param name="threshold_high" type="float" value="0.6" label="Threshold for high reactivities"/>
-                        <param name="threshold_low" type="float" value="0.3" label="Threshold for low reactivities"/>
-                    </when>
-                </conditional>
-                <param name="gqs" type="boolean" checked="false" truevalue = "1" falsevalue = "0" label="Incoorporate G-Quadruplex prediction if checked"/>
-            </when>
-         </conditional>
-            
-	
-	</inputs>
-	<outputs>
-		<data name="output" format=".tar"/>
-	</outputs>
-
-	<help>
-
-
-**Function**
-
-RNA Structure Prediction uses the RNAstructure program (V5.6) and ViennaRNA package (V2.1.9) to predict RNA structures without restraints (in silico) or with restraints from structural reactivities, as provided by the Reactivity Calculation module. Users can designate the temperature under which to predict the RNA structures.
-
------
-
-**Input**:
-
-* 1. A file with transcript Ids (Max num. 100), (each ID one line)
-* 2. Reference file (fasta) used to map the reads to
-* 3. Temperature for RNA structure prediction
-* [Optional]:
-* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
-* /RNAstructure prediction mode/
-* 2. Slope used with structural restraints (default 1.8)
-* 3. Intercept used with structural restraints (default -0.6)
-* /ViennaRNA package prediction mode/
-* 2. Flag that determines whether to incoorporate G-Quadruplex prediction
-* 3. High reactivity threshold (Any nucleotide with structural reactivity that is higher than it will be constrainted as single stranded) (default 0.6)
-* 4. Low reactivity threshold (Any nucleotide with structural reactivity that is lower than it will be constrainted as double stranded) (default 0.3)
-
------
-
-**Output**:
-
-* 1. Dot bracket files with predicted RNA structures [transciptID.dbn]
-* 2. .ps files which depict the predicted RNA structures [transciptID.ps]
-* [Optional]
-* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
-
------
-
-**Attention**
-
-Make sure that none of the transcript Ids contains a "|" or a space!
-
------
-
-**Backend program**:
-
-* 1. This module uses RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/) as the backend programs to predict RNA structures.
-* 2. Default parameters are used for RNAstructure and ViennaRNA package except -T (Temperature), -sm (slope used with SHAPE restraints [RNAstructure prediction mode]), -si (intercept used with SHAPE restraints [RNAstructure prediction mode]) and thresholds for high and low reactivity [ViennaRNA package prediciton mode], for which users can specify the value
-
-
-
-	</help>
-</tool>
diff -r 87ec0ecdc2af -r aedb21527abd predict/read_file.py
--- a/predict/read_file.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split('\t');
-        for i in range(0, len(tl)):
-            temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
diff -r 87ec0ecdc2af -r aedb21527abd predict/read_file.pyc
Binary file predict/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd predict/rtts_plot.py
--- a/predict/rtts_plot.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-#Make a plot of reactivity distribution
-
-import sys
-import os
-import numpy as np
-import matplotlib
-from pylab import *
-import math
-
-#Convert the reactivities (Make NA to 0)
-def convert_react(a):
-    r = []
-    for i in range(len(a)):
-        if a[i]!='NA':
-            r.append(float(a[i]))
-        else:
-            r.append(float(0))
-    return r
-        
-
-#Make a plot of the distribution
-def make_plot(ar,id_s,path):
-    font = {'family' : 'normal',
-            'weight' : 'bold',
-            'size'   : 16}
-    matplotlib.rc('font', **font)
-    N = len(ar)
-    a = convert_react(ar)
-    w = 1
-    ind = np.arange(N)
-
-    fig = figure()
-    fig, ax = subplots()
-    ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black')
-    ax.set_ylabel('Final Structural Reactivity (FSR)')
-    ax.set_xlabel('Nucleotide Number')
-
-    
-    mag = int(math.log(N,10))-1
-    tail = 10**mag
-
-    intervel = int(math.ceil(float(N)/tail/5))
-    tl = []
-    k = 0
-    upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1
-    ax.set_xticks(np.arange(0,upmax,intervel*tail))
-    ax.set_xticklabels(np.arange(0,upmax,intervel*tail))
-    savefig(os.path.join(path, id_s+'.tif'))
-
-
-
-    
-    
-    
-
-
diff -r 87ec0ecdc2af -r aedb21527abd predict/rtts_plot.pyc
Binary file predict/rtts_plot.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._.DS_Store
Binary file reactivity_cal/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._parse_dis_react.py
Binary file reactivity_cal/._parse_dis_react.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._parse_dis_react.pyc
Binary file reactivity_cal/._parse_dis_react.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._react_cal.py
Binary file reactivity_cal/._react_cal.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._react_norm_function.py
Binary file reactivity_cal/._react_norm_function.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._react_norm_function.pyc
Binary file reactivity_cal/._react_norm_function.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._reactivity_calculation.xml
Binary file reactivity_cal/._reactivity_calculation.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/._read_file.py
Binary file reactivity_cal/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/parse_dis_react.py
--- a/reactivity_cal/parse_dis_react.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-
-def parse_dist(in_file):
-    result = []
-    distribution = {}
-    name = []
-    f = open(in_file)
-    flag = 0
-    for aline in f.readlines():
-        line = aline.strip()
-        dis = line.strip()
-        dist = dis.split('\t')
-        if len(dist) > 0:
-            if len(dist) == 1:
-                if dist[0].strip().find('coverage')==-1:
-                    if flag == 0:
-                        name.append(line)
-                        flag = 1
-                        t_name = line
-                    else:
-                        distribution[t_name] = 'null'
-                        name.append(line)
-                        flag = 1
-                        t_name = line
-            else:
-                distri = []
-                for i in range(0, len(dist)):
-                    distri.append(dist[i].strip())
-                distribution[t_name] = distri
-                flag = 0
-    result.append(name)
-    result.append(distribution)
-    f.close()
-    return result
-                
-                
-
-
-
-
-
-
-
-        
-
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/parse_dis_react.pyc
Binary file reactivity_cal/parse_dis_react.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/react_cal.py
--- a/reactivity_cal/react_cal.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-from react_norm_function import *
-import os
-import random
-import string
-
-
-dist_file1 = sys.argv[1] #plus library
-dist_file2 = sys.argv[2] #minus library
-seq_file = sys.argv[3] #Reference library(genome/cDNA)
-nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
-flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
-threshold = sys.argv[6] #Threshold to cap the reactivities
-output_file = sys.argv[7]
-
-
-distri_p = parse_dist(dist_file1)
-distri_m = parse_dist(dist_file2)
-threshold = float(threshold)
-
-
-syspathrs = os.getcwd()
-
-h = file(syspathrs+"react.txt",'w')
-flag_in = int(flag_in)
-
-seqs = SeqIO.parse(open(seq_file),'fasta');
-nt_s = set()
-for i in range(len(nt_spec)):
-    nt_s.add(nt_spec[i])
-
-flag = 0
-trans = []
-distri_p = distri_p[1]
-distri_m = distri_m[1]
-
-#thres = int(threshold)
-
-
-transcripts = {}
-for seq in seqs:
-    n = seq.id
-    trans.append(n)
-    transcripts[n] = seq.seq.tostring()
-    
-
-#print(distri_p)
-        
-
-for i in range(0, len(trans)):
-    h.write(trans[i])
-    h.write('\n')       
-    for j in range(len(distri_p[trans[i]])):
-        distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
-    for j in range(len(distri_m[trans[i]])):
-        distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)       
-    s_p = sum(distri_p[trans[i]])
-    s_m = sum(distri_m[trans[i]])
-    length = len(distri_p[trans[i]])
-    if s_p!= 0 and s_m!= 0:
-        r = []
-        for j in range(0, len(distri_p[trans[i]])):
-            f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
-            f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
-            raw_react = f_p-f_m
-            r.append(max(0, raw_react))
-                
-    if s_p!= 0 and s_m!= 0:    
-        for k in range(1,(len(r)-1)):
-            if transcripts[trans[i]][k-1] in nt_s:
-                h.write(str(float('%.3f'%r[k])))
-                h.write('\t')
-            else:
-                h.write('NA')
-                h.write('\t')
-        k = k+1
-        if transcripts[trans[i]][k-1] in nt_s:
-            h.write(str(float('%.3f'%r[k])))
-            h.write('\n')
-        else:
-            h.write('NA')
-            h.write('\n')
-            
-
-h.close()
-
-if flag_in:
-    react_norm((syspathrs+"react.txt"),output_file, threshold)
-else:
-    h_o = file(output_file, 'w')
-    f_i = open(syspathrs+"react.txt")
-    for aline in f_i.readlines():
-        h_o.write(aline.strip())
-        h_o.write('\n')
-os.system("rm -f "+syspathrs+"react.txt")
-
-#os.system("rm -r "+syspathrs)
-    
-     
-            
-    
-    
-        
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-        
-
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/react_norm_function.py
--- a/reactivity_cal/react_norm_function.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import sys
-from Bio import SeqIO
-import math
-from parse_dis_react import *
-
-def cap(a,value):
-    if a>=value:
-        return value
-    else:
-        return a
-
-def react_norm(react_file, result_file, capped_value):
-    print("Normalizing.....")
-    react1 = parse_dist(react_file)
-    react = react1[1]
-    h = file(result_file, 'w')
-
-    capped = int(capped_value)
-
-    all_react = []
-
-
-    for t in react:
-        if react[t]!='null':
-            for i in range(len(react[t])):
-                if react[t][i]!='NA':                   
-                    all_react.append(float(react[t][i]))
-
-
-    all_react.sort(reverse = True)
-
-
-    eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
-    meight = sum(eight)/len(eight)
-
-    for t in react:
-        h.write(t)
-        h.write('\n')
-        if react[t]!='null':
-            for i in range((len(react[t])-1)):
-                if react[t][i]!='NA':
-                    h.write(str(float('%.3f'%cap((float(react[t][i])/meight),capped))))
-                else:
-                    h.write('NA')
-                h.write('\t')
-            if react[t][i+1]!='NA':
-                h.write(str(float('%.3f'%cap((float(react[t][i+1])/meight),capped))))
-            else:
-                h.write('NA')
-            h.write('\n')
-
-    h.close()
-        
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-        
-
-
-
-
-
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/react_norm_function.pyc
Binary file reactivity_cal/react_norm_function.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/reactivity_calculation.xml
--- a/reactivity_cal/reactivity_calculation.xml	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="react_cal_pipeline" name="Reactivity Calculation" version="1.0">
-	<description>calculates structural reactivity on each nucleotide based on RT stop counts from the Get RT Stop Counts module</description>
-	<command interpreter="python">react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output </command>
-        <requirements>
-                <requirement type="package" version="1.61">biopython</requirement>
-                <requirement type="package" version="1.7.1">numpy</requirement>
-        </requirements>
-	<inputs>
-                <param name="dist_file1" type="data" format="txt" label="RTSC file for (+) library"/>
-		        <param name="dist_file2" type="data" format="txt" label="RTSC file for (-) library"/>
-                <param name="seq_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
-                <param name="nt_spec" type="select" label="Nucleotide specificity">
-                    <option value="AC">AC</option>
-                    <option value="ATCG">AUCG</option>
-                </param>
-                <param name="flag_in" type="boolean" checked="true" truevalue = "1" falsevalue = "0" label="Normalization is performed if checked"/>
-                <param name="threshold" type="float" value = "7" optional = "true" label="Threshold to cap the reactivities"/>
-	</inputs>
-	<outputs>
-		<data name="output" format="txt"/>
-	</outputs>
-    <tests>
-        <test>
-            <param name="dist_file1" value="dis_f_N1Ap_rrna.txt" />
-	        <param name="dist_file2" value="dis_f_N1Am_rrna.txt" />
-            <param name="seq_file" value="rRNA.txt" />
-            <param name="nt_spec" value="AC" />
-            <param name="flag_in" value="1" />
-            <param name="threshold" value="7" />
-	        <output name="output" file="DMS_reactivities.out" />
- 
-          </test>
-    </tests>
-
-	<help>
-
-
-**Function**
-
-* Reactivity Calculation calculates the structural reactivity on each nucleotide based on an RT stop count file containing the RT stop count on each nucleotide, typically the output from the Get RT Stop Counts module.
-
------
-
-**Input**:
-
-* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library
-* 2. Reference file (fasta) used to map the reads to
-* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
-* [Optional]:
-* 1. A threshold to cap the structural reactivities. {Default: 7}
-* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes}
-
------
-
-**Output**:
-
-A text file with structural reactivity for each nucleotide (Reactivity file)
-
-
-
-	</help>
-</tool>
diff -r 87ec0ecdc2af -r aedb21527abd reactivity_cal/read_file.py
--- a/reactivity_cal/read_file.py	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split('\t');
-        for i in range(0, len(tl)):
-            temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/.DS_Store
Binary file structurefold/.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/.DS_Store
Binary file structurefold/Iterative_mapping/.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._.DS_Store
Binary file structurefold/Iterative_mapping/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._iterative_map.py
Binary file structurefold/Iterative_mapping/._iterative_map.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._iterative_map.xml
Binary file structurefold/Iterative_mapping/._iterative_map.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._map_ex.py
Binary file structurefold/Iterative_mapping/._map_ex.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._read_file.py
Binary file structurefold/Iterative_mapping/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._read_s_file.py
Binary file structurefold/Iterative_mapping/._read_s_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._remove_map.py
Binary file structurefold/Iterative_mapping/._remove_map.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._seq_track.py
Binary file structurefold/Iterative_mapping/._seq_track.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._truncate.py
Binary file structurefold/Iterative_mapping/._truncate.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/._unmap.py
Binary file structurefold/Iterative_mapping/._unmap.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/iterative_map.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/iterative_map.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+from read_file import *
+from read_s_file import *
+import random
+import string
+
+type_input = sys.argv[1]
+seq_file = sys.argv[2]
+ref_file = sys.argv[3]
+shift = sys.argv[4]
+length = sys.argv[5]
+t_end = sys.argv[6]
+map_type = sys.argv[7]
+output_file = sys.argv[8]
+
+
+if map_type!="default":
+    s = ""
+    sm = ""
+    s = s+"-v "+sys.argv[9]
+    sm = sm+"-v "+sys.argv[9]
+    sm = sm+" -5 "+sys.argv[10]
+    sm = sm+" -3 "+sys.argv[11]
+    s = s+" -k "+sys.argv[12]
+    sm = sm+" -k "+sys.argv[12]
+    if sys.argv[13]:
+        s = s+" -a"
+        sm = sm+" -a"
+    if int(sys.argv[14])>=1:
+        s = s+" -m "+sys.argv[14]
+        sm = sm+" -m "+sys.argv[14]
+    if sys.argv[15]:
+        s = s+" --best --strata "
+        sm = sm+" --best --strata "
+    
+else:
+    s = "-v 3 -a --best --strata "
+    sm = "-v 3 -a --best --strata "
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+    syspath = syspath+ost[i].strip()
+    syspath = syspath+'/'
+
+syspathrs = os.getcwd()
+syspathrs = syspathrs+'/'
+
+os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt")
+
+os.system("cp "+seq_file+" "+syspathrs+"seq0.fa")
+
+if type_input == "fasta":
+    tp = 'fasta'
+if type_input == "fastq":
+    tp = 'fastq'
+
+k = 0
+
+if type_input == "fasta":
+    os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+if type_input == "fastq":
+    os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+
+while(True):
+    os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads
+    os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads
+    os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads
+    os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads
+    os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads
+    if k>0:
+        os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads
+        os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt")
+        os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt")
+        os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt")
+        os.system("rm "+syspathrs+"mapped"+str(k)+".sam")
+        os.system("rm "+syspathrs+"mapped"+str(k)+".txt")
+        os.system("rm "+syspathrs+"unmapped"+str(k)+".txt")
+    else:
+        os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt")
+    
+    os.system("rm "+syspathrs+"unmapped"+str(k)+".bam")
+    os.system("rm "+syspathrs+"umapped"+str(k)+".bam")
+    os.system("rm "+syspathrs+"rmapped"+str(k)+".bam")
+    os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
+    os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence
+    os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped
+    os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file
+    os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
+    os.system("rm "+syspathrs+"runmapped"+str(k)+".txt")
+    os.system("rm "+syspathrs+"unmapped"+str(k)+".sam")
+    
+    os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt")
+    c = read_sp_file(syspathrs+"count"+str(k+1)+".txt")
+    if c[0][0] == '0': #If no reads is in the sequence file, stop
+        os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
+        os.system("rm "+syspathrs+"seq"+str(k+1)+".fa")
+        break
+    os.system("rm "+syspathrs+"count"+str(k+1)+".txt")
+    k = k+1
+    os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam")
+
+
+ss = ""
+for i in range(0,k+1):
+    ss = ss+" "+syspathrs+"mapped"+str(i)+".bam"
+
+
+os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss)
+os.system("samtools sort "+syspathrs+"combine.bam sorted")
+os.system("samtools view -b -h sorted.bam > " + output_file)
+#print("samtools merge mapped_all.bam"+ss)
+os.system("rm "+syspathrs+"mapped*.bam")
+os.system("rm "+syspathrs+"combine.bam")
+os.system("rm "+syspathrs+"sorted.bam")
+os.system("rm "+syspathrs+"ref*")
+#os.system("rm -r "+syspathrs)
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/iterative_map.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/iterative_map.xml	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,104 @@
+<tool id="iterative_map_pipeline" name="Iterative Mapping" version="1.0">
+	<description>iteratively maps the raw reads of RNA structural data to the reference transcriptome</description>
+	<command interpreter="python">
+        #if $mapping_file.type == "user"
+            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best  
+        #else
+            iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output
+        #end if
+    </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+                <requirement type="package" version="0.1.18">samtools</requirement>
+                <requirement type="package" version="0.12.7">bowtie</requirement>
+        </requirements>
+	<inputs>
+                <conditional name="file_format">
+                  <param name="type" type="select" label="File format of the reads (Default FASTQ)">
+                    <option value="fastq">FASTQ</option>
+                    <option value="fasta">FASTA</option>
+                  </param>
+                  <when value="fastq">
+                    <param name="seq_file" type="data" format="fastq" label="Fastq file"/>
+                  </when>
+                  <when value="fasta">
+                    <param name="seq_file" type="data" format="fasta" label="Fasta file"/>
+                  </when>
+                </conditional>
+		        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+                <param name="shift" type="integer" value="1" label="Number of nucleotides trimmed each round"/>
+                <param name="length" type="integer" value="21" label="Minimum requirement of read length for mapping"/>
+                <param name="t_end" type="select" label="Trimming end">
+                    <option value="five_end">5' end</option>
+                    <option value="three_end">3' end</option>
+                </param>
+                
+                <conditional name="mapping_file">
+                  <param name="type" type="select" label="Bowtie mapping flags (Default -v 0 -a --best --strata)">
+                    <option value="default">Default</option>
+                    <option value="user">User specified</option>
+                  </param>
+                  <when value="default"/>
+                  <when value="user"> 
+                    <param name="param_v" type="integer" value="0" label="Number of mismatches for SOAP-like alignment policy (-v)"/>
+                    <param name="param_five" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)"/>
+                    <param name="param_three" type="integer" value="0" label="Trim n bases from high-quality (right) end of each read before alignment (-3)"/>
+                    <param name="param_k" type="integer" value="1" label="Report up to n valid alignments per read (-k)"/>
+                    <param name="param_a" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to report all valid alignments per read (-a)"/>
+                    <param name="param_m" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m), -1 for unlimited"/>
+                    <param name="param_best" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best --strata)"/>
+                  </when>
+                </conditional>
+
+	</inputs>
+	<outputs>
+		<data name="output" type="data" format="bam"/>
+	</outputs>
+    <tests>
+        <test>
+            <param name="file_format.type" value="fasta" />
+            <param name="file_format.seq_file" value="sample.fasta" />
+	        <param name="reference_file" value="rRNA.txt" />
+            <param name="shift" value="1" />
+            <param name="length" value="21" />
+            <param name="mapping_file.type" value="default" />
+	        <output name="output" file="mapped.out" />
+        </test>
+    </tests>
+
+	<help>
+
+
+**Overview of StructureFold**
+
+* StructureFold is a series of software packages that automates the process of predicting RNA secondary structure for a transcript or an entire transcriptome, with or without the inclusion of constraints on the structure(s) provided by wet bench experimentation. The process consists of mapping the raw reads of RNA structural data on every transcript in the dataset to the transcriptome, getting RT stop counts on each nucleotide, calculating structural reactivities on the nucleotides, and predicting the RNA structures. Please cite: Tang, Y, Bouvier, E, Kwok CK, Ding Y, Nekrutenko, A, Bevilacqua PC, Assmann SM, StructureFold: Genome-wide RNA secondary structure mapping and reconstruction in vivo, Bioinformatics, In press. RNA structure is predicted using the RNAstructure algorithm (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/).
+
+-----
+
+**Function**
+
+* Iterative Mapping maps the raw reads of RNA structural data to the reference transcriptome using Bowtie (v0.12.8). It allows users to trim each read from either end to iteratively map the read to the reference transcriptome. 
+
+-----
+
+**Input**:
+
+* 1. Sequence file type (FASTA/FASTQ)
+* 2. Sequence file (fasta/fastq format)
+* 3. Reference file (fasta) used to map the reads to
+* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
+* 5. “Length” (The minimum length of the reads for mapping after trimming)
+* [Optional]
+* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads)
+
+-----
+
+**Output**:
+
+A sorted .bam file with all of the reads that are mapped
+
+
+
+	</help>
+</tool>
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/map_ex.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/map_ex.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if len(tl)>4:
+        if int(tl[1].strip())== 0:
+            h.write(tline)
+            h.write('\n')
+
+
+f.close();
+h.close()
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/read_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/read_file.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/read_file.pyc
Binary file structurefold/Iterative_mapping/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/read_s_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/read_s_file.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_sp_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split(' ');
+        for i in range(0, len(tl)):
+            if len(tl[i].strip())>0:
+                temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/read_s_file.pyc
Binary file structurefold/Iterative_mapping/read_s_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/remove_map.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/remove_map.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+
+
+unmap_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+
+unmap = read_t_file(unmap_file)
+mapped = read_t_file(map_file)
+h = file(result_file, 'w')
+
+maps = set()
+for i in range(len(mapped)):
+    maps.add(mapped[i][0])
+
+
+for i in range(len(unmap)):
+    name = unmap[i][0]
+    if name not in maps:
+        h.write(name)
+        h.write('\n')
+
+
+h.close()
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/seq_track.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/seq_track.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+unmap_file = sys.argv[1]
+reads_file = sys.argv[2]
+result_file = sys.argv[3]
+tp = sys.argv[4]
+
+
+unmap = read_t_file(unmap_file);
+
+h = file(result_file, 'w')
+
+reads = SeqIO.parse(reads_file,tp)
+um = set()
+for i in range(0, len(unmap)):
+    id_r = unmap[i][0]
+    um.add(id_r)
+
+for read in reads:
+    if read.id in um:
+        h.write('>')
+        h.write(read.id)
+        h.write('\n')
+        h.write(read.seq.tostring())
+        h.write('\n')
+    
+
+
+h.close()
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/truncate.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/truncate.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from Bio import SeqIO
+
+fasta_file = sys.argv[1]
+shift_in = sys.argv[2]
+result_file = sys.argv[3]
+length = sys.argv[4]
+t_end = sys.argv[5]
+
+shift = int(shift_in)
+    
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+h = file(result_file,'w')
+for seq in fasta_sequences:
+        nuc = seq.id;
+        sequence = seq.seq.tostring();
+        if (len(sequence)-shift)>=int(length):
+                h.write('>'+nuc)
+                h.write('\n')
+                if t_end == 'three_end':
+                        h.write(sequence[0:(len(sequence)-shift)])
+                if t_end == 'five_end':
+                        h.write(sequence[(shift):(len(sequence))])
+                h.write('\n')
+
+
+
+
+h.close()
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/Iterative_mapping/unmap.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/Iterative_mapping/unmap.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if len(tl)>4:
+        if int(tl[1].strip()) != 0:
+            h.write(tl[0].strip());
+            h.write('\n');
+
+
+f.close();
+h.close()
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/.DS_Store
Binary file structurefold/get_reads/.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/._.DS_Store
Binary file structurefold/get_reads/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/._get_read.py
Binary file structurefold/get_reads/._get_read.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/._get_read.xml
Binary file structurefold/get_reads/._get_read.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/._read_file.py
Binary file structurefold/get_reads/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/get_read.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/get_reads/get_read.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from Bio import SeqIO
+import os
+from read_file import *
+import random
+import string
+
+fasta_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+syspathrs = os.getcwd()
+
+os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") 
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+length_seq = {};
+for seq in fasta_sequences:
+        nuc = seq.id;
+        length_seq[nuc] = len(seq.seq.tostring());
+
+
+
+mapping = {}
+transcripts = []
+
+f = open(syspathrs+"map_info.txt");
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if tl[0].strip() not in transcripts:
+        transcripts.append(tl[0].strip());
+        mapping[tl[0].strip()] = [];
+
+    mapping[tl[0].strip()].append(tl[1].strip());
+
+distribution = {};
+coverage = {};
+for transcript in length_seq:
+    distribution[transcript] = [];
+    for i in range(0, length_seq[transcript]):
+        distribution[transcript].append(0);
+    sum_count = float(0);
+    if transcript in mapping:
+        for j in range(0, len(mapping[transcript])):
+            index = mapping[transcript][j];
+            #count = reads[mapping[transcript][j][0]];
+            sum_count = sum_count + 1;
+            distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
+            coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
+    else:
+        coverage[transcript] = 0
+
+        
+        
+    
+
+h = file(result_file, 'w')
+for transcript in length_seq:
+    h.write(transcript);
+    h.write('\n')
+    for i in range(0, length_seq[transcript]):
+        h.write(str(distribution[transcript][i]))
+        h.write('\t')
+    h.write('\n')
+    h.write('\n')
+
+#os.system("rm -r "+syspathrs)
+
+    
+
+f.close();
+h.close()
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/get_read.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/get_reads/get_read.xml	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,46 @@
+<tool id="get_read_pipeline" name="Get RT Stop Counts" version="1.0">
+	<description>derives the reverse transcriptase (RT) stop count on each nucleotide from a mapped file provided by the Iterative Mapping module</description>
+	<command interpreter="python">get_read.py $lib_file $map_file $output </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+                <requirement type="package" version="0.1.18">samtools</requirement>
+        </requirements>
+	<inputs>
+                <param name="lib_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+		<param name="map_file" type="data" format="bam" label="Mapped file"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="txt"/>
+	</outputs>
+    <tests>
+        <test>
+            <param name="lib_file" value="test.bam" />
+	        <param name="map_file" value="com_rna.txt" />
+	        <output name="output" file="get_RT_stop_test.out" /> 
+        </test>
+    </tests>
+	<help>
+
+
+**Function**
+
+Get RT Stop Counts derives the RT stop counts on each nucleotide of each transcript in the reference transcriptome based on a mapped file (.bam), typically the output from the Iterative Mapping module.
+
+-----
+
+**Input**:
+
+* 1. A mapped (.bam) file from Bowtie (or any other mapping program)
+* 2. Reference library sequences (fasta) used to map the reads to
+
+-----
+
+**Output**:
+
+A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
+
+
+
+	</help>
+</tool>
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/read_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/get_reads/read_file.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/get_reads/read_file.pyc
Binary file structurefold/get_reads/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/.DS_Store
Binary file structurefold/predict/.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._.DS_Store
Binary file structurefold/predict/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._ct_to_dot.py
Binary file structurefold/predict/._ct_to_dot.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._dot_convert.py
Binary file structurefold/predict/._dot_convert.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._parse_dis_pac.py
Binary file structurefold/predict/._parse_dis_pac.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._predict_RNAs.py
Binary file structurefold/predict/._predict_RNAs.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._read_file.py
Binary file structurefold/predict/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/._rtts_plot.py
Binary file structurefold/predict/._rtts_plot.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/ct_to_dot.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/ct_to_dot.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import shlex
+import os
+import subprocess
+from read_file import *
+
+ct_file = sys.argv[1]
+path = sys.argv[2]
+id_s = sys.argv[3]
+result_file = sys.argv[4]
+
+h = file(result_file, 'w')
+os.system('grep "'+id_s+'" '+ct_file+' |wc -l > '+path+'/count.txt')
+count = read_t_file(path+'/count.txt')
+comm = ''
+for i in range(int(count[0][0])):
+    command = shlex.split('ct2dot %s %s %s' % (ct_file, str(i+1), os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
+    subprocess.call(command)
+    comm = comm +' '+path+'/db_file_'+str(i+1)+'.dbnn' 
+
+
+
+os.system('cat'+comm+' > '+result_file)
+for i in range(int(count[0][0])):
+    command = shlex.split('rm %s' % (os.path.join(path, 'db_file_%s.dbnn' % str(i+1))))
+    subprocess.call(command)
+command = shlex.split('rm %s' % (os.path.join(path, 'count.txt')))
+subprocess.call(command)
+    
+
+
+h.close()
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/dot_convert.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/dot_convert.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+dot_file = sys.argv[1]
+result_file = sys.argv[2]
+
+h = file(result_file, 'w')
+f = open(dot_file)
+
+
+
+for aline in f.readlines():
+    line = aline.strip()
+    if line.find('>')!=-1:
+        id_line = line
+        idt = id_line.split('>')
+        ids = idt[1].strip()
+    else:
+        if line.find('(')!=-1:
+            structure_line = line
+            st = structure_line.split(' ')
+            structure = st[0].strip()
+            enert = st[1].strip()
+            if len(enert)>1:
+                enertt = enert.split('(')
+                enertt = enertt[1].strip()
+            else:
+                enertt = st[2].strip()
+            enerttt = enertt.split(')')
+            ener = enerttt[0].strip()
+            h.write('>ENERGY = '+ener+'  '+ids+'\n')
+            h.write(seq+'\n')
+            h.write(structure+'\n')
+        else:
+            seq = line
+
+
+    
+
+
+f.close()
+h.close()
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/parse_dis_pac.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/parse_dis_pac.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,43 @@
+#parse reactivity file into a dictionary
+
+import sys
+
+def parse_dist(in_file):
+    result = []
+    distribution = {}
+    name = []
+    f = open(in_file)
+    for aline in f.readlines():
+        line = aline.strip()
+        dis = line.strip()
+        dist = dis.split('\t') #split the line and the reactivites or reads are in a list
+        if len(dist) > 0:
+            if len(dist) == 1:
+                if dist[0].strip().find('coverage')==-1:
+                    name.append(line) #add the name in the name list
+                    flag = 1
+                    t_name = line
+            else:
+                distri = []
+                for i in range(0, len(dist)):
+                    distri.append(dist[i].strip())
+                distribution[t_name] = distri #add the list of reactivities into a dictionary
+    result.append(name)
+    result.append(distribution) #Output the dictionary
+    f.close()
+    return result
+                
+                
+
+
+
+
+
+
+
+        
+
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/parse_dis_pac.pyc
Binary file structurefold/predict/parse_dis_pac.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/predict_RNAs.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/predict_RNAs.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,219 @@
+#RNA structure prediction & Output and illustrate reactivities
+
+import sys
+import shlex
+import subprocess
+import tarfile
+from parse_dis_pac import *
+from read_file import *
+from Bio import SeqIO
+import os
+from rtts_plot import *
+import random
+import string
+
+
+id_file = sys.argv[1]
+seq_file = sys.argv[2]
+predict_type = sys.argv[3]
+temperature = sys.argv[4]
+predict_program = sys.argv[5]
+output_html = sys.argv[6]
+output_directory = sys.argv[7]
+
+
+
+flag = False
+if predict_type!='silico': #input reactivity file if provided
+    if predict_program == 'rs':
+        react_file = sys.argv[8]
+        slope = sys.argv[9]
+        intercept = sys.argv[10]
+    else:
+        react_file = sys.argv[8]
+        thres_h = sys.argv[9]
+        thres_h = float(thres_h)
+        thres_l = sys.argv[10]
+        thres_l = float(thres_l)
+        gqs = sys.argv[11]
+        gqs = int(gqs)
+        
+    react = parse_dist(react_file)
+    react = react[1]
+    flag = True
+else:
+    if predict_program!='rs':
+        gqs = sys.argv[8]
+        gqs = int(gqs)
+
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspathpt = ""
+for i in range(len(ost)-1):
+    syspathpt = syspathpt+ost[i].strip()
+    syspathpt = syspathpt+'/'
+
+
+syspath = os.getcwd()
+
+ids = read_t_file(id_file)
+sequences = SeqIO.parse(seq_file, 'fasta')
+
+
+seqs = {}
+for seq in sequences:
+    seqs[seq.id] = seq.seq.tostring()
+
+if len(ids)>100: #setup a limit of the number of sequence to be predicted
+    print("Number of sequences exceeds limitation!")
+    sys.exit(0)
+    
+
+#predict RNA structures
+
+os.mkdir(output_directory)
+flag3 = 0
+
+id_predicted = set()
+for i in range(len(ids)):
+    flag2 = 0
+    id_s = ids[i][0]
+    #print(id_s)
+    #Put RNA sequence and reactivities into files
+    if id_s in seqs:
+        fh = file(os.path.join(syspath,"temp.txt"), 'w')        
+        fh.write('>'+id_s)
+        fh.write('\n')
+        fh.write(seqs[id_s])
+        fh.close()
+        if not flag:
+            if predict_program == 'rs':
+                command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
+                subprocess.call(command)
+                command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
+                subprocess.call(command)               
+            else:
+                if gqs:
+                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
+                    
+                else:
+                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
+                command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
+                subprocess.call(command)
+                if not gqs:
+                    command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
+                else:
+                    command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
+                subprocess.call(command)
+                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
+                subprocess.call(command)
+        else:
+            if id_s in react:
+                fh = file(os.path.join(syspath, "constraint.txt"), 'w')
+                make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
+                if predict_program == 'rs': 
+                    for j in range(0, (len(react[id_s]))):
+                        if react[id_s][j]!='NA':
+                            fh.write(str(j+1))
+                            fh.write('\t')
+                            fh.write(str(react[id_s][j]))
+                            fh.write('\n')
+                    fh.close()
+                    command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), 
+                                                                 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, 
+                                                                 os.path.join(output_directory, "%s.ct" % id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
+                    subprocess.call(command)
+                else:
+                    fh.write('>'+id_s)
+                    fh.write('\n')
+                    fh.write(seqs[id_s])
+                    fh.write('\n')
+                    for j in range(0, (len(react[id_s]))):
+                        if react[id_s][j]!='NA':
+                            re = float(react[id_s][j])
+                            if re>thres_h:
+                                fh.write('x')
+                            else:
+                                if re<thres_l:
+                                    fh.write('|')
+                                else:
+                                    fh.write('.')
+                        else:
+                            fh.write('.')
+                    fh.write('.')
+                    fh.close()
+                    if gqs:
+                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
+                        
+                    else:
+                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
+                    command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
+                    subprocess.call(command)
+                    if not gqs:
+                        command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
+                    else:
+                        command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
+                    subprocess.call(command)                  
+
+            else:
+                print(id_s+" not in the data of react!")
+                flag2 = 1
+        if flag2 == 0:
+            if predict_program == 'rs':
+                command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
+                subprocess.call(command)
+                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
+                subprocess.call(command)
+            else:
+                if not gqs:
+                    command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
+                    subprocess.call(command)
+            flag3 = 1
+        id_predicted.add(id_s)
+    else:
+        print(id_s+" not in the data of sequences!")
+
+#Remove the unnecessary files
+if flag3 == 1:
+
+    tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:')
+    for filename in os.listdir(output_directory):
+        filepath = os.path.join(output_directory, filename)
+        print filepath
+        tarball.add(filepath, arcname=filename)
+    #print os.listdir(syspath)
+    #print os.listdir(output_directory)
+    # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
+    tarball.close()
+ 
+    h = open(output_html, 'wb' )
+    h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n')
+
+    h.write('<p>\n')
+    h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar')))
+    #h.write('<\p>\n')
+    h.write('<hr>\n')
+
+    
+    for id_p in id_predicted:
+        h.write('<h4>'+id_p+'</h4><p><ul>\n')
+        h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p)))
+        h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p)))
+        if flag:
+            h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p)))
+        h.write( '</ul></p>\n' )
+        h.write('<hr>\n')
+    h.write( '</body></html>\n' )
+    h.close()
+                
+                
+        
+    
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/predict_RNAs.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/predict_RNAs.xml	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,127 @@
+<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0">
+	<description>predict RNA structures with or without experimental constraints from the Reactivity Calculation module</description>
+	<command interpreter="python">
+        #if $program.wh == "rs"
+            #if $program.rs_reactivity.type == "restraint"
+                predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output $output.files_path $program.rs_reactivity.reactivity_file $program.rs_reactivity.slope $program.rs_reactivity.intercept
+            #else
+                predict_RNAs.py $rna_list $reference_file $program.rs_reactivity.type $temperature $program.wh $output $output.files_path
+            #end if
+        #else
+            #if $program.vp_reactivity.type == "restraint"
+                predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $output.files_path $program.vp_reactivity.reactivity_file $program.vp_reactivity.threshold_high $program.vp_reactivity.threshold_low $program.gqs
+            #else
+                predict_RNAs.py $rna_list $reference_file $program.vp_reactivity.type $temperature $program.wh $output $output.files_path $program.gqs
+            #end if
+        #end if
+    </command>
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+        <requirements>
+                <requirement type="package" version="5.7">rnastructure</requirement>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+                <requirement type="package" version="1.1.7">imaging</requirement>
+                <requirement type="package" version="1.2.1">matplotlib</requirement>
+                <requirement type="package" version="2.1">vienna_rna</requirement>
+        </requirements>
+	<inputs>
+        <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/>
+        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+        <param name="temperature" type="float" value="310.15" label="Temperature (K)"/>
+        <conditional name="program">
+            <param name="wh" type="select" label="Program for RNA structure prediction">
+                <option value="rs">RNAstructure</option>
+                <option value="vp">ViennaRNA Package</option>
+            </param>
+            <when value="rs">
+                <conditional name="rs_reactivity">
+                    <param name="type" type="select" label="RNA structure prediction type">
+                        <option value="silico">In silico</option>
+                        <option value="restraint">With experimental restraints</option>
+                    </param>
+                    <when value="silico"/>
+                    <when value="restraint">
+                        <param name="reactivity_file" type="data" label="Reactivity file"/>
+                        <param name="slope" type="float" value="1.8" label="Slope used with structural restraints"/>
+                        <param name="intercept" type="float" value="-0.6" label="Intercept used with structural restraints"/>
+                    </when>
+                </conditional>
+            </when>
+            <when value="vp">
+                <conditional name="vp_reactivity">
+                    <param name="type" type="select" label="RNA structure prediction type">
+                        <option value="silico">In silico</option>
+                        <option value="restraint">With experimental restraints</option>
+                    </param>
+                    <when value="silico"/>
+                    <when value="restraint">
+                        <param name="reactivity_file" type="data" label="Reactivity file"/>
+                        <param name="threshold_high" type="float" value="0.6" label="Threshold for high reactivities"/>
+                        <param name="threshold_low" type="float" value="0.3" label="Threshold for low reactivities"/>
+                    </when>
+                </conditional>
+                <param name="gqs" type="boolean" checked="false" truevalue = "1" falsevalue = "0" label="Incoorporate G-Quadruplex prediction if checked"/>
+            </when>
+         </conditional>
+            
+	
+	</inputs>
+	<outputs>
+		<data name="output" format="html" />
+	</outputs>
+
+	<help>
+
+
+**Function**
+
+RNA Structure Prediction uses the RNAstructure program (V5.6) and ViennaRNA package (V2.1.9) to predict RNA structures without restraints (in silico) or with restraints from structural reactivities, as provided by the Reactivity Calculation module. Users can designate the temperature under which to predict the RNA structures.
+
+-----
+
+**Input**:
+
+* 1. A file with transcript Ids (Max num. 100), (each ID one line)
+* 2. Reference file (fasta) used to map the reads to
+* 3. Temperature for RNA structure prediction
+* [Optional]:
+* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
+* /RNAstructure prediction mode/
+* 2. Slope used with structural restraints (default 1.8)
+* 3. Intercept used with structural restraints (default -0.6)
+* /ViennaRNA package prediction mode/
+* 2. Flag that determines whether to incoorporate G-Quadruplex prediction
+* 3. High reactivity threshold (Any nucleotide with structural reactivity that is higher than it will be constrainted as single stranded) (default 0.6)
+* 4. Low reactivity threshold (Any nucleotide with structural reactivity that is lower than it will be constrainted as double stranded) (default 0.3)
+
+-----
+
+**Output**:
+
+* 1. Dot bracket files with predicted RNA structures [transciptID.dbn]
+* 2. .ps files which depict the predicted RNA structures [transciptID.ps]
+* [Optional]
+* 3. .tif files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.tif]
+
+-----
+
+**Attention**
+
+Make sure that none of the transcript Ids contains a "|" or a space!
+
+-----
+
+**Backend program**:
+
+* 1. This module uses RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) or ViennaRNA package (http://www.tbi.univie.ac.at/RNA/) as the backend programs to predict RNA structures.
+* 2. Default parameters are used for RNAstructure and ViennaRNA package except -T (Temperature), -sm (slope used with SHAPE restraints [RNAstructure prediction mode]), -si (intercept used with SHAPE restraints [RNAstructure prediction mode]) and thresholds for high and low reactivity [ViennaRNA package prediciton mode], for which users can specify the value
+
+
+
+	</help>
+</tool>
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/read_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/read_file.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/read_file.pyc
Binary file structurefold/predict/read_file.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/rtts_plot.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/predict/rtts_plot.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#Make a plot of reactivity distribution
+
+import sys
+import os
+import numpy as np
+import matplotlib
+from pylab import *
+import math
+
+#Convert the reactivities (Make NA to 0)
+def convert_react(a):
+    r = []
+    for i in range(len(a)):
+        if a[i]!='NA':
+            r.append(float(a[i]))
+        else:
+            r.append(float(0))
+    return r
+        
+
+#Make a plot of the distribution
+def make_plot(ar,id_s,path):
+    font = {'family' : 'normal',
+            'weight' : 'bold',
+            'size'   : 16}
+    matplotlib.rc('font', **font)
+    N = len(ar)
+    a = convert_react(ar)
+    w = 1
+    ind = np.arange(N)
+
+    fig = figure()
+    fig, ax = subplots()
+    ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black')
+    ax.set_ylabel('Final Structural Reactivity (FSR)')
+    ax.set_xlabel('Nucleotide Number')
+
+    
+    mag = int(math.log(N,10))-1
+    tail = 10**mag
+
+    intervel = int(math.ceil(float(N)/tail/5))
+    tl = []
+    k = 0
+    upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1
+    ax.set_xticks(np.arange(0,upmax,intervel*tail))
+    ax.set_xticklabels(np.arange(0,upmax,intervel*tail))
+    savefig(os.path.join(path, id_s+'.tif'))
+
+
+
+    
+    
+    
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/predict/rtts_plot.pyc
Binary file structurefold/predict/rtts_plot.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/.DS_Store
Binary file structurefold/reactivity_cal/.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._.DS_Store
Binary file structurefold/reactivity_cal/._.DS_Store has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._parse_dis_react.py
Binary file structurefold/reactivity_cal/._parse_dis_react.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._parse_dis_react.pyc
Binary file structurefold/reactivity_cal/._parse_dis_react.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._react_cal.py
Binary file structurefold/reactivity_cal/._react_cal.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._react_norm_function.py
Binary file structurefold/reactivity_cal/._react_norm_function.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._react_norm_function.pyc
Binary file structurefold/reactivity_cal/._react_norm_function.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._reactivity_calculation.xml
Binary file structurefold/reactivity_cal/._reactivity_calculation.xml has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/._read_file.py
Binary file structurefold/reactivity_cal/._read_file.py has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/parse_dis_react.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/reactivity_cal/parse_dis_react.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+
+def parse_dist(in_file):
+    result = []
+    distribution = {}
+    name = []
+    f = open(in_file)
+    flag = 0
+    for aline in f.readlines():
+        line = aline.strip()
+        dis = line.strip()
+        dist = dis.split('\t')
+        if len(dist) > 0:
+            if len(dist) == 1:
+                if dist[0].strip().find('coverage')==-1:
+                    if flag == 0:
+                        name.append(line)
+                        flag = 1
+                        t_name = line
+                    else:
+                        distribution[t_name] = 'null'
+                        name.append(line)
+                        flag = 1
+                        t_name = line
+            else:
+                distri = []
+                for i in range(0, len(dist)):
+                    distri.append(dist[i].strip())
+                distribution[t_name] = distri
+                flag = 0
+    result.append(name)
+    result.append(distribution)
+    f.close()
+    return result
+                
+                
+
+
+
+
+
+
+
+        
+
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/parse_dis_react.pyc
Binary file structurefold/reactivity_cal/parse_dis_react.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/react_cal.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/reactivity_cal/react_cal.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+from react_norm_function import *
+import os
+import random
+import string
+
+
+dist_file1 = sys.argv[1] #plus library
+dist_file2 = sys.argv[2] #minus library
+seq_file = sys.argv[3] #Reference library(genome/cDNA)
+nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
+flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
+threshold = sys.argv[6] #Threshold to cap the reactivities
+output_file = sys.argv[7]
+
+
+distri_p = parse_dist(dist_file1)
+distri_m = parse_dist(dist_file2)
+threshold = float(threshold)
+
+
+syspathrs = os.getcwd()
+
+h = file(syspathrs+"react.txt",'w')
+flag_in = int(flag_in)
+
+seqs = SeqIO.parse(open(seq_file),'fasta');
+nt_s = set()
+for i in range(len(nt_spec)):
+    nt_s.add(nt_spec[i])
+
+flag = 0
+trans = []
+distri_p = distri_p[1]
+distri_m = distri_m[1]
+
+#thres = int(threshold)
+
+
+transcripts = {}
+for seq in seqs:
+    n = seq.id
+    trans.append(n)
+    transcripts[n] = seq.seq.tostring()
+    
+
+#print(distri_p)
+        
+
+for i in range(0, len(trans)):
+    h.write(trans[i])
+    h.write('\n')       
+    for j in range(len(distri_p[trans[i]])):
+        distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
+    for j in range(len(distri_m[trans[i]])):
+        distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)       
+    s_p = sum(distri_p[trans[i]])
+    s_m = sum(distri_m[trans[i]])
+    length = len(distri_p[trans[i]])
+    if s_p!= 0 and s_m!= 0:
+        r = []
+        for j in range(0, len(distri_p[trans[i]])):
+            f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
+            f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
+            raw_react = f_p-f_m
+            r.append(max(0, raw_react))
+                
+    if s_p!= 0 and s_m!= 0:    
+        for k in range(1,(len(r)-1)):
+            if transcripts[trans[i]][k-1] in nt_s:
+                h.write(str(float('%.3f'%r[k])))
+                h.write('\t')
+            else:
+                h.write('NA')
+                h.write('\t')
+        k = k+1
+        if transcripts[trans[i]][k-1] in nt_s:
+            h.write(str(float('%.3f'%r[k])))
+            h.write('\n')
+        else:
+            h.write('NA')
+            h.write('\n')
+            
+
+h.close()
+
+if flag_in:
+    react_norm((syspathrs+"react.txt"),output_file, threshold)
+else:
+    h_o = file(output_file, 'w')
+    f_i = open(syspathrs+"react.txt")
+    for aline in f_i.readlines():
+        h_o.write(aline.strip())
+        h_o.write('\n')
+os.system("rm -f "+syspathrs+"react.txt")
+
+#os.system("rm -r "+syspathrs)
+    
+     
+            
+    
+    
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        
+
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/react_norm_function.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/reactivity_cal/react_norm_function.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+from Bio import SeqIO
+import math
+from parse_dis_react import *
+
+def cap(a,value):
+    if a>=value:
+        return value
+    else:
+        return a
+
+def react_norm(react_file, result_file, capped_value):
+    print("Normalizing.....")
+    react1 = parse_dist(react_file)
+    react = react1[1]
+    h = file(result_file, 'w')
+
+    capped = int(capped_value)
+
+    all_react = []
+
+
+    for t in react:
+        if react[t]!='null':
+            for i in range(len(react[t])):
+                if react[t][i]!='NA':                   
+                    all_react.append(float(react[t][i]))
+
+
+    all_react.sort(reverse = True)
+
+
+    eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)]
+    meight = sum(eight)/len(eight)
+
+    for t in react:
+        h.write(t)
+        h.write('\n')
+        if react[t]!='null':
+            for i in range((len(react[t])-1)):
+                if react[t][i]!='NA':
+                    h.write(str(float('%.3f'%cap((float(react[t][i])/meight),capped))))
+                else:
+                    h.write('NA')
+                h.write('\t')
+            if react[t][i+1]!='NA':
+                h.write(str(float('%.3f'%cap((float(react[t][i+1])/meight),capped))))
+            else:
+                h.write('NA')
+            h.write('\n')
+
+    h.close()
+        
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        
+
+
+
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/react_norm_function.pyc
Binary file structurefold/reactivity_cal/react_norm_function.pyc has changed
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/reactivity_calculation.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/reactivity_cal/reactivity_calculation.xml	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,62 @@
+<tool id="react_cal_pipeline" name="Reactivity Calculation" version="1.0">
+	<description>calculates structural reactivity on each nucleotide based on RT stop counts from the Get RT Stop Counts module</description>
+	<command interpreter="python">react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+        </requirements>
+	<inputs>
+                <param name="dist_file1" type="data" format="txt" label="RTSC file for (+) library"/>
+		        <param name="dist_file2" type="data" format="txt" label="RTSC file for (-) library"/>
+                <param name="seq_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+                <param name="nt_spec" type="select" label="Nucleotide specificity">
+                    <option value="AC">AC</option>
+                    <option value="ATCG">AUCG</option>
+                </param>
+                <param name="flag_in" type="boolean" checked="true" truevalue = "1" falsevalue = "0" label="Normalization is performed if checked"/>
+                <param name="threshold" type="float" value = "7" optional = "true" label="Threshold to cap the reactivities"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="txt"/>
+	</outputs>
+    <tests>
+        <test>
+            <param name="dist_file1" value="dis_f_N1Ap_rrna.txt" />
+	        <param name="dist_file2" value="dis_f_N1Am_rrna.txt" />
+            <param name="seq_file" value="rRNA.txt" />
+            <param name="nt_spec" value="AC" />
+            <param name="flag_in" value="1" />
+            <param name="threshold" value="7" />
+	        <output name="output" file="DMS_reactivities.out" />
+ 
+          </test>
+    </tests>
+
+	<help>
+
+
+**Function**
+
+* Reactivity Calculation calculates the structural reactivity on each nucleotide based on an RT stop count file containing the RT stop count on each nucleotide, typically the output from the Get RT Stop Counts module.
+
+-----
+
+**Input**:
+
+* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library
+* 2. Reference file (fasta) used to map the reads to
+* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE)
+* [Optional]:
+* 1. A threshold to cap the structural reactivities. {Default: 7}
+* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes}
+
+-----
+
+**Output**:
+
+A text file with structural reactivity for each nucleotide (Reactivity file)
+
+
+
+	</help>
+</tool>
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/reactivity_cal/read_file.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/reactivity_cal/read_file.py	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/tool_dependencies.xml	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="rnastructure" version="5.7">
+        <repository changeset_revision="5a621464e533" name="package_rnastructure_5_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="biopython" version="1.61">
+        <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.7.1">
+        <repository changeset_revision="ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="imaging" version="1.1.7">
+      <repository changeset_revision="d5f2627f4cfd" name="package_imaging_1_1_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="matplotlib" version="1.2.1">
+        <repository changeset_revision="fe60617380df" name="package_matplotlib_1_2" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="vienna_rna" version="2.1">
+        <repository changeset_revision="3b53eda26527" name="package_vienna_rna_2_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
diff -r 87ec0ecdc2af -r aedb21527abd structurefold/tool_dependencies.xml.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/structurefold/tool_dependencies.xml.orig	Tue Apr 14 14:09:42 2015 -0400
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="rnastructure" version="5.7">
+      <repository changeset_revision="a7e4328e1919" name="package_rnastructure_5_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="biopython" version="1.61">
+      <repository changeset_revision="12eb2448f7e3" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="numpy" version="1.7.1">
+      <repository changeset_revision="b8c74cd7b4e9" name="package_numpy_1_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="samtools" version="0.1.18">
+      <repository changeset_revision="f1885df2a943" name="package_samtools_0_1_18" owner="devteam" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+      <repository changeset_revision="de68886e7e18" name="package_bowtie_0_12_7" owner="devteam" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="imaging" version="1.1.7">
+      <repository changeset_revision="213af494db5a" name="package_imaging_1_1_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+    <package name="matplotlib" version="1.2.1">
+      <repository changeset_revision="ef93c83f8c07" name="package_matplotlib_1_2" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
+    </package>
+</tool_dependency>
diff -r 87ec0ecdc2af -r aedb21527abd tool_dependencies.xml
--- a/tool_dependencies.xml	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="rnastructure" version="5.7">
-        <repository changeset_revision="5a621464e533" name="package_rnastructure_5_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="biopython" version="1.61">
-        <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="numpy" version="1.7.1">
-        <repository changeset_revision="ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="samtools" version="0.1.18">
-        <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="bowtie" version="0.12.7">
-        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="imaging" version="1.1.7">
-      <repository changeset_revision="d5f2627f4cfd" name="package_imaging_1_1_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="matplotlib" version="1.2.1">
-        <repository changeset_revision="fe60617380df" name="package_matplotlib_1_2" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="vienna_rna" version="2.1">
-        <repository changeset_revision="3b53eda26527" name="package_vienna_rna_2_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>
diff -r 87ec0ecdc2af -r aedb21527abd tool_dependencies.xml.orig
--- a/tool_dependencies.xml.orig	Sun Apr 12 14:28:30 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="rnastructure" version="5.7">
-      <repository changeset_revision="a7e4328e1919" name="package_rnastructure_5_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="biopython" version="1.61">
-      <repository changeset_revision="12eb2448f7e3" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="numpy" version="1.7.1">
-      <repository changeset_revision="b8c74cd7b4e9" name="package_numpy_1_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="samtools" version="0.1.18">
-      <repository changeset_revision="f1885df2a943" name="package_samtools_0_1_18" owner="devteam" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="bowtie" version="0.12.7">
-      <repository changeset_revision="de68886e7e18" name="package_bowtie_0_12_7" owner="devteam" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="imaging" version="1.1.7">
-      <repository changeset_revision="213af494db5a" name="package_imaging_1_1_7" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-    <package name="matplotlib" version="1.2.1">
-      <repository changeset_revision="ef93c83f8c07" name="package_matplotlib_1_2" owner="iuc" toolshed="http://gaius.bx.psu.edu:9009" />
-    </package>
-</tool_dependency>