changeset 75:c2c90f3604e0 draft

Deleted selected files
author tyty
date Tue, 09 Dec 2014 03:04:10 -0500
parents 63c41304b221
children 20b74fd7b58a
files Iterative_mapping/.DS_Store get_reads/.DS_Store predict/.DS_Store predict/._predict_RNAs.xml predict/parse_dis_pac.py predict/parse_dis_pac.pyc predict/predict_RNAs.py predict/predict_RNAs.xml predict/rRNA.txt predict/read_file.py predict/read_file.pyc predict/rtts_plot.py predict/rtts_plot.pyc reactivity_cal/.DS_Store
diffstat 14 files changed, 0 insertions(+), 313 deletions(-) [+]
line wrap: on
line diff
Binary file Iterative_mapping/.DS_Store has changed
Binary file get_reads/.DS_Store has changed
Binary file predict/.DS_Store has changed
Binary file predict/._predict_RNAs.xml has changed
--- a/predict/parse_dis_pac.py	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#parse reactivity file into a dictionary
-
-import sys
-
-def parse_dist(in_file):
-    result = []
-    distribution = {}
-    name = []
-    f = open(in_file)
-    for aline in f.readlines():
-        line = aline.strip()
-        dis = line.strip()
-        dist = dis.split('\t') #split the line and the reactivites or reads are in a list
-        if len(dist) > 0:
-            if len(dist) == 1:
-                if dist[0].strip().find('coverage')==-1:
-                    name.append(line) #add the name in the name list
-                    flag = 1
-                    t_name = line
-            else:
-                distri = []
-                for i in range(0, len(dist)):
-                    distri.append(dist[i].strip())
-                distribution[t_name] = distri #add the list of reactivities into a dictionary
-    result.append(name)
-    result.append(distribution) #Output the dictionary
-    f.close()
-    return result
-                
-                
-
-
-
-
-
-
-
-        
-
-
-
-
-
Binary file predict/parse_dis_pac.pyc has changed
--- a/predict/predict_RNAs.py	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-#RNA structure prediction & Output and illustrate reactivities
-
-import sys
-import shlex
-import subprocess
-import tarfile
-from parse_dis_pac import *
-from read_file import *
-from Bio import SeqIO
-import os
-from rtts_plot import *
-import random
-import string
-
-
-id_file = sys.argv[1]
-seq_file = sys.argv[2]
-predict_type = sys.argv[3]
-temperature = sys.argv[4]
-output_file = sys.argv[5]
-
-
-flag = False
-if predict_type!='silico': #input reactivity file if provided
-    react_file = sys.argv[6]
-    slope = sys.argv[7]
-    intercept = sys.argv[8]
-    react = parse_dist(react_file)
-    react = react[1]
-    flag = True
-
-syspath = os.getcwd()
-
-ids = read_t_file(id_file)
-sequences = SeqIO.parse(seq_file, 'fasta')
-
-
-seqs = {}
-for seq in sequences:
-    seqs[seq.id] = seq.seq.tostring()
-
-if len(ids)>100: #setup a limit of the number of sequence to be predicted
-    print("Number of sequences exceeds limitation!")
-    sys.exit(0)
-    
-
-#predict RNA structures
-output_directory = os.path.join(syspath, "output_files")
-if not os.path.exists(output_directory):
-    os.makedirs(output_directory)
-for i in range(len(ids)):
-    flag2 = 0
-    id_s = ids[i][0]
-    #print(id_s)
-    #Put RNA sequence and reactivities into files
-    if id_s in seqs:
-        fh = file(os.path.join(syspath,"temp.txt"), 'w')        
-        fh.write('>'+id_s)
-        fh.write('\n')
-        fh.write(seqs[id_s])
-        fh.close()
-        if not flag:
-            command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
-            subprocess.call(command)
-        else:
-            if id_s in react:
-                fh = file(os.path.join(syspath, "constraint.txt"), 'w')
-                make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
-                for j in range(0, (len(react[id_s]))):
-                    if react[id_s][j]!='NA':
-                        fh.write(str(j+1))
-                        fh.write('\t')
-                        fh.write(str(react[id_s][j]))
-                        fh.write('\n')
-                    #h.write(str(react[id_s][j])) #Output the reactivities
-                    #h.write('\t')
-                fh.close()
-                #h.write('\n')
-                #h.write('\n')
-                command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), 
-                                                             os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, 
-                                                             os.path.join(output_directory, "%s.ct" % id_s)))
-                subprocess.call(command)
-            else:
-                print(id_s+" not in the data of react!")
-                flag2 = 1
-        if flag2 == 0:
-            command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
-            subprocess.call(command)
-    else:
-        print(id_s+" not in the data of sequences!")
-
-#Remove the unnecessary files
-tarball = tarfile.open(output_file, 'w:')
-for filename in os.listdir(output_directory):
-    filepath = os.path.join(output_directory, filename)
-    print filepath
-    tarball.add(filepath, arcname=filename)
-#print os.listdir(syspath)
-#print os.listdir(output_directory)
-# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
-tarball.close()
--- a/predict/predict_RNAs.xml	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0">
-	<description></description>
-	<command interpreter="python">
-        #if $reactivity.type == "restraint"
-            predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output $reactivity.reactivity_file $reactivity.slope $reactivity.intercept
-        #else
-            predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output
-        #end if
-    </command>
-        <requirements>
-                <requirement type="package" version="1.61">biopython</requirement>
-                <requirement type="package" version="1.7.1">numpy</requirement>
-                <requirement type="package" version="1.2.1">matplotlib</requirement>
-        </requirements>
-	<inputs>
-        <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/>
-        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
-        <param name="temperature" type="float" value="310.15" label="Temperature (K)"/>
-        <conditional name="reactivity">
-            <param name="type" type="select" label="RNA structure prediction type">
-                <option value="silico">In silico</option>
-                <option value="restraint">With experimental restraints</option>
-            </param>
-            <when value="silico"/>
-            <when value="restraint">
-                <param name="reactivity_file" type="data" label="Reactivity file"/>
-                <param name="slope" type="float" value="1.8" label="Slope used with structural restraints"/>
-                <param name="intercept" type="float" value="-0.6" label="Intercept used with structural restraints"/>
-            </when>
-        </conditional>
-	
-	</inputs>
-	<outputs>
-		<data name="output" format=".tgz"/>
-	</outputs>
-
-	<help>
-
-
-**TIPS**:
-
------
-
-**Input**:
-
-* 1. A file with transcript Ids (Max num. 100), (each ID one line)
-* 2. Reference file (fasta) used to map the reads to
-* 3. Temperature for RNA structure prediction
-* [Optional]:
-* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
-* 2. Slope used with structural restraints (default 1.8)
-* 3. Intercept used with structural restraints (default -0.6)
-
------
-
-**Output**:
-
-* 1. .ct files with predicted RNA structures [transciptID.ct]
-* 2. .ps files which depict the predicted RNA structures [[transciptID.ps]
-* [Optional]
-* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
-
------
-
-**Attention**
-
-Make sure any of the transcript Ids does not contain "|" or space!
-
------
-
-**Backend program**:
-
-* 1. This module is using RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) as the backend program to predict RNA structures.
-* 2. Default parameters are used for RNAstructure expect -T (Temperature), -sm (slope used with SHAPE restraints) and -si (intercept used with SHAPE restraints) which users can specify the value
-
-
-
-	</help>
-</tool>
--- a/predict/rRNA.txt	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
->25s rRNA 3375nts
-GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
-TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
->Arabidopsis thaliana 1
-GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
-AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
--- a/predict/read_file.py	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
-    f = open(in_file);
-    result = [];
-    for aline in f.readlines():
-        temp = [];
-        tline = aline.strip();
-        tl = tline.split('\t');
-        for i in range(0, len(tl)):
-            temp.append(tl[i].strip());
-        result.append(temp);
-    f.close();
-    return result;
-
-
Binary file predict/read_file.pyc has changed
--- a/predict/rtts_plot.py	Tue Dec 09 03:03:30 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-#Make a plot of reactivity distribution
-
-import sys
-import os
-import numpy as np
-import matplotlib
-from pylab import *
-import math
-
-#Convert the reactivities (Make NA to 0)
-def convert_react(a):
-    r = []
-    for i in range(len(a)):
-        if a[i]!='NA':
-            r.append(float(a[i]))
-        else:
-            r.append(float(0))
-    return r
-        
-
-#Make a plot of the distribution
-def make_plot(ar,id_s,path):
-    font = {'family' : 'normal',
-            'weight' : 'bold',
-            'size'   : 16}
-    matplotlib.rc('font', **font)
-    N = len(ar)
-    a = convert_react(ar)
-    w = 1
-    ind = np.arange(N)
-
-    fig = figure()
-    fig, ax = subplots()
-    ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black')
-    ax.set_ylabel('Final Structural Reactivity (FSR)')
-    ax.set_xlabel('Nucleotide Number')
-
-    
-    mag = int(math.log(N,10))-1
-    tail = 10**mag
-
-    intervel = int(math.ceil(float(N)/tail/5))
-    print(N)
-    print(intervel)
-    tl = []
-    k = 0
-    upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1
-    ax.set_xticks(np.arange(0,upmax,intervel*tail))
-    print(np.arange(0,upmax,intervel*tail))
-    ax.set_xticklabels(np.arange(0,upmax,intervel*tail))
-    savefig(os.path.join(path, id_s+'.tif'))
-
-
-
-    
-    
-    
-
-
Binary file predict/rtts_plot.pyc has changed
Binary file reactivity_cal/.DS_Store has changed