Mercurial > repos > tyty > structurefold
changeset 90:024a4ac5db36 draft
Deleted selected files
author | tyty |
---|---|
date | Mon, 16 Feb 2015 02:20:53 -0500 |
parents | 361dc1047dc0 |
children | 1866eeef792f |
files | predict/.DS_Store predict/._predict_RNAs.xml predict/parse_dis_pac.py predict/parse_dis_pac.pyc predict/predict_RNAs.py predict/predict_RNAs.xml predict/rRNA.txt predict/read_file.py predict/read_file.pyc predict/rtts_plot.py predict/rtts_plot.pyc |
diffstat | 11 files changed, 0 insertions(+), 321 deletions(-) [+] |
line wrap: on
line diff
--- a/predict/parse_dis_pac.py Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -#parse reactivity file into a dictionary - -import sys - -def parse_dist(in_file): - result = [] - distribution = {} - name = [] - f = open(in_file) - for aline in f.readlines(): - line = aline.strip() - dis = line.strip() - dist = dis.split('\t') #split the line and the reactivites or reads are in a list - if len(dist) > 0: - if len(dist) == 1: - if dist[0].strip().find('coverage')==-1: - name.append(line) #add the name in the name list - flag = 1 - t_name = line - else: - distri = [] - for i in range(0, len(dist)): - distri.append(dist[i].strip()) - distribution[t_name] = distri #add the list of reactivities into a dictionary - result.append(name) - result.append(distribution) #Output the dictionary - f.close() - return result - - - - - - - - - - - - - - -
--- a/predict/predict_RNAs.py Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -#RNA structure prediction & Output and illustrate reactivities - -import sys -import shlex -import subprocess -import tarfile -from parse_dis_pac import * -from read_file import * -from Bio import SeqIO -import os -from rtts_plot import * -import random -import string - - -id_file = sys.argv[1] -seq_file = sys.argv[2] -predict_type = sys.argv[3] -temperature = sys.argv[4] -output_file = sys.argv[5] - - -flag = False -if predict_type!='silico': #input reactivity file if provided - react_file = sys.argv[6] - slope = sys.argv[7] - intercept = sys.argv[8] - react = parse_dist(react_file) - react = react[1] - flag = True - -syspath = os.getcwd() - -ids = read_t_file(id_file) -sequences = SeqIO.parse(seq_file, 'fasta') - - -seqs = {} -for seq in sequences: - seqs[seq.id] = seq.seq.tostring() - -if len(ids)>100: #setup a limit of the number of sequence to be predicted - print("Number of sequences exceeds limitation!") - sys.exit(0) - - -#predict RNA structures -output_directory = os.path.join(syspath, "output_files") -if not os.path.exists(output_directory): - os.makedirs(output_directory) -for i in range(len(ids)): - flag2 = 0 - id_s = ids[i][0] - #print(id_s) - #Put RNA sequence and reactivities into files - if id_s in seqs: - fh = file(os.path.join(syspath,"temp.txt"), 'w') - fh.write('>'+id_s) - fh.write('\n') - fh.write(seqs[id_s]) - fh.close() - if not flag: - command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) - subprocess.call(command) - else: - if id_s in react: - fh = file(os.path.join(syspath, "constraint.txt"), 'w') - make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA - for j in range(0, (len(react[id_s]))): - if react[id_s][j]!='NA': - fh.write(str(j+1)) - fh.write('\t') - fh.write(str(react[id_s][j])) - fh.write('\n') - #h.write(str(react[id_s][j])) #Output the reactivities - #h.write('\t') - fh.close() - #h.write('\n') - #h.write('\n') - command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), - os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, - os.path.join(output_directory, "%s.ct" % id_s))) - subprocess.call(command) - else: - print(id_s+" not in the data of react!") - flag2 = 1 - if flag2 == 0: - command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) - subprocess.call(command) - else: - print(id_s+" not in the data of sequences!") - -#Remove the unnecessary files -tarball = tarfile.open(output_file, 'w:') -for filename in os.listdir(output_directory): - filepath = os.path.join(output_directory, filename) - print filepath - tarball.add(filepath, arcname=filename) -#print os.listdir(syspath) -#print os.listdir(output_directory) -# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) -tarball.close()
--- a/predict/predict_RNAs.xml Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0"> - <description>predict RNA structures with or without experimental constraints from the Reactivity Calculation module</description> - <command interpreter="python"> - #if $reactivity.type == "restraint" - predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output $reactivity.reactivity_file $reactivity.slope $reactivity.intercept - #else - predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output - #end if - </command> - <stdio> - <exit_code range="1:" /> - <exit_code range=":-1" /> - <regex match="Error:" /> - <regex match="Exception:" /> - </stdio> - <requirements> - <requirement type="package" version="1.61">biopython</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="1.2.1">matplotlib</requirement> - </requirements> - <inputs> - <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/> - <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/> - <param name="temperature" type="float" value="310.15" label="Temperature (K)"/> - <conditional name="reactivity"> - <param name="type" type="select" label="RNA structure prediction type"> - <option value="silico">In silico</option> - <option value="restraint">With experimental restraints</option> - </param> - <when value="silico"/> - <when value="restraint"> - <param name="reactivity_file" type="data" label="Reactivity file"/> - <param name="slope" type="float" value="1.8" label="Slope used with structural restraints"/> - <param name="intercept" type="float" value="-0.6" label="Intercept used with structural restraints"/> - </when> - </conditional> - - </inputs> - <outputs> - <data name="output" format=".tar"/> - </outputs> - - <help> - - -**Function** - -RNA Structure Prediction uses the RNAstructure algorithm (Version 5.6, http://rna.urmc.rochester.edu/RNAstructure.html) to predict RNA structures without restraints (in silico) or with restraints from structural reactivities, typically provided by the Reactivity Calculation module. Users can designate the temperature under which to predict the RNA structures. - ------ - -**Input**: - -* 1. A file with transcript Ids (Max num. 100), (each ID one line) -* 2. Reference file (fasta) used to map the reads to -* 3. Temperature for RNA structure prediction -* [Optional]: -* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided -* 2. Slope used with structural restraints (default 1.8) -* 3. Intercept used with structural restraints (default -0.6) - ------ - -**Output**: - -* 1. .ct files with predicted RNA structures [transciptID.ct] -* 2. .ps files which depict the predicted RNA structures [[transciptID.ps] -* [Optional] -* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png] - ------ - -**Attention** - -Make sure that none of the transcript Ids contains a "|" or a space! - ------ - -**Backend program**: - -* 1. This module uses RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) as the backend program to predict RNA structures. -* 2. Default parameters are used for RNAstructure except -T (Temperature), -sm (slope used with SHAPE restraints) and -si (intercept used with SHAPE restraints), for which users can specify the value - - - - </help> -</tool>
--- a/predict/rRNA.txt Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ ->25s rRNA 3375nts -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA ->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG ->Arabidopsis thaliana 1 -GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC ->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA -AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file
--- a/predict/read_file.py Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - -
--- a/predict/rtts_plot.py Fri Dec 19 13:58:50 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -#!/usr/bin/env python -#Make a plot of reactivity distribution - -import sys -import os -import numpy as np -import matplotlib -from pylab import * -import math - -#Convert the reactivities (Make NA to 0) -def convert_react(a): - r = [] - for i in range(len(a)): - if a[i]!='NA': - r.append(float(a[i])) - else: - r.append(float(0)) - return r - - -#Make a plot of the distribution -def make_plot(ar,id_s,path): - font = {'family' : 'normal', - 'weight' : 'bold', - 'size' : 16} - matplotlib.rc('font', **font) - N = len(ar) - a = convert_react(ar) - w = 1 - ind = np.arange(N) - - fig = figure() - fig, ax = subplots() - ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black') - ax.set_ylabel('Final Structural Reactivity (FSR)') - ax.set_xlabel('Nucleotide Number') - - - mag = int(math.log(N,10))-1 - tail = 10**mag - - intervel = int(math.ceil(float(N)/tail/5)) - print(N) - print(intervel) - tl = [] - k = 0 - upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1 - ax.set_xticks(np.arange(0,upmax,intervel*tail)) - print(np.arange(0,upmax,intervel*tail)) - ax.set_xticklabels(np.arange(0,upmax,intervel*tail)) - savefig(os.path.join(path, id_s+'.tif')) - - - - - - - -