Mercurial > repos > tyty > structurefold
changeset 75:c2c90f3604e0 draft
Deleted selected files
author | tyty |
---|---|
date | Tue, 09 Dec 2014 03:04:10 -0500 |
parents | 63c41304b221 |
children | 20b74fd7b58a |
files | Iterative_mapping/.DS_Store get_reads/.DS_Store predict/.DS_Store predict/._predict_RNAs.xml predict/parse_dis_pac.py predict/parse_dis_pac.pyc predict/predict_RNAs.py predict/predict_RNAs.xml predict/rRNA.txt predict/read_file.py predict/read_file.pyc predict/rtts_plot.py predict/rtts_plot.pyc reactivity_cal/.DS_Store |
diffstat | 14 files changed, 0 insertions(+), 313 deletions(-) [+] |
line wrap: on
line diff
--- a/predict/parse_dis_pac.py Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -#parse reactivity file into a dictionary - -import sys - -def parse_dist(in_file): - result = [] - distribution = {} - name = [] - f = open(in_file) - for aline in f.readlines(): - line = aline.strip() - dis = line.strip() - dist = dis.split('\t') #split the line and the reactivites or reads are in a list - if len(dist) > 0: - if len(dist) == 1: - if dist[0].strip().find('coverage')==-1: - name.append(line) #add the name in the name list - flag = 1 - t_name = line - else: - distri = [] - for i in range(0, len(dist)): - distri.append(dist[i].strip()) - distribution[t_name] = distri #add the list of reactivities into a dictionary - result.append(name) - result.append(distribution) #Output the dictionary - f.close() - return result - - - - - - - - - - - - - - -
--- a/predict/predict_RNAs.py Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -#RNA structure prediction & Output and illustrate reactivities - -import sys -import shlex -import subprocess -import tarfile -from parse_dis_pac import * -from read_file import * -from Bio import SeqIO -import os -from rtts_plot import * -import random -import string - - -id_file = sys.argv[1] -seq_file = sys.argv[2] -predict_type = sys.argv[3] -temperature = sys.argv[4] -output_file = sys.argv[5] - - -flag = False -if predict_type!='silico': #input reactivity file if provided - react_file = sys.argv[6] - slope = sys.argv[7] - intercept = sys.argv[8] - react = parse_dist(react_file) - react = react[1] - flag = True - -syspath = os.getcwd() - -ids = read_t_file(id_file) -sequences = SeqIO.parse(seq_file, 'fasta') - - -seqs = {} -for seq in sequences: - seqs[seq.id] = seq.seq.tostring() - -if len(ids)>100: #setup a limit of the number of sequence to be predicted - print("Number of sequences exceeds limitation!") - sys.exit(0) - - -#predict RNA structures -output_directory = os.path.join(syspath, "output_files") -if not os.path.exists(output_directory): - os.makedirs(output_directory) -for i in range(len(ids)): - flag2 = 0 - id_s = ids[i][0] - #print(id_s) - #Put RNA sequence and reactivities into files - if id_s in seqs: - fh = file(os.path.join(syspath,"temp.txt"), 'w') - fh.write('>'+id_s) - fh.write('\n') - fh.write(seqs[id_s]) - fh.close() - if not flag: - command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) - subprocess.call(command) - else: - if id_s in react: - fh = file(os.path.join(syspath, "constraint.txt"), 'w') - make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA - for j in range(0, (len(react[id_s]))): - if react[id_s][j]!='NA': - fh.write(str(j+1)) - fh.write('\t') - fh.write(str(react[id_s][j])) - fh.write('\n') - #h.write(str(react[id_s][j])) #Output the reactivities - #h.write('\t') - fh.close() - #h.write('\n') - #h.write('\n') - command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), - os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, - os.path.join(output_directory, "%s.ct" % id_s))) - subprocess.call(command) - else: - print(id_s+" not in the data of react!") - flag2 = 1 - if flag2 == 0: - command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) - subprocess.call(command) - else: - print(id_s+" not in the data of sequences!") - -#Remove the unnecessary files -tarball = tarfile.open(output_file, 'w:') -for filename in os.listdir(output_directory): - filepath = os.path.join(output_directory, filename) - print filepath - tarball.add(filepath, arcname=filename) -#print os.listdir(syspath) -#print os.listdir(output_directory) -# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) -tarball.close()
--- a/predict/predict_RNAs.xml Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0"> - <description></description> - <command interpreter="python"> - #if $reactivity.type == "restraint" - predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output $reactivity.reactivity_file $reactivity.slope $reactivity.intercept - #else - predict_RNAs.py $rna_list $reference_file $reactivity.type $temperature $output - #end if - </command> - <requirements> - <requirement type="package" version="1.61">biopython</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="1.2.1">matplotlib</requirement> - </requirements> - <inputs> - <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/> - <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/> - <param name="temperature" type="float" value="310.15" label="Temperature (K)"/> - <conditional name="reactivity"> - <param name="type" type="select" label="RNA structure prediction type"> - <option value="silico">In silico</option> - <option value="restraint">With experimental restraints</option> - </param> - <when value="silico"/> - <when value="restraint"> - <param name="reactivity_file" type="data" label="Reactivity file"/> - <param name="slope" type="float" value="1.8" label="Slope used with structural restraints"/> - <param name="intercept" type="float" value="-0.6" label="Intercept used with structural restraints"/> - </when> - </conditional> - - </inputs> - <outputs> - <data name="output" format=".tgz"/> - </outputs> - - <help> - - -**TIPS**: - ------ - -**Input**: - -* 1. A file with transcript Ids (Max num. 100), (each ID one line) -* 2. Reference file (fasta) used to map the reads to -* 3. Temperature for RNA structure prediction -* [Optional]: -* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided -* 2. Slope used with structural restraints (default 1.8) -* 3. Intercept used with structural restraints (default -0.6) - ------ - -**Output**: - -* 1. .ct files with predicted RNA structures [transciptID.ct] -* 2. .ps files which depict the predicted RNA structures [[transciptID.ps] -* [Optional] -* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png] - ------ - -**Attention** - -Make sure any of the transcript Ids does not contain "|" or space! - ------ - -**Backend program**: - -* 1. This module is using RNAstructure (http://rna.urmc.rochester.edu/RNAstructure.html) as the backend program to predict RNA structures. -* 2. Default parameters are used for RNAstructure expect -T (Temperature), -sm (slope used with SHAPE restraints) and -si (intercept used with SHAPE restraints) which users can specify the value - - - - </help> -</tool>
--- a/predict/rRNA.txt Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ ->25s rRNA 3375nts -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA ->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG ->Arabidopsis thaliana 1 -GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC ->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA -AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file
--- a/predict/read_file.py Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - -
--- a/predict/rtts_plot.py Tue Dec 09 03:03:30 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,60 +0,0 @@ -#!/usr/bin/env python -#Make a plot of reactivity distribution - -import sys -import os -import numpy as np -import matplotlib -from pylab import * -import math - -#Convert the reactivities (Make NA to 0) -def convert_react(a): - r = [] - for i in range(len(a)): - if a[i]!='NA': - r.append(float(a[i])) - else: - r.append(float(0)) - return r - - -#Make a plot of the distribution -def make_plot(ar,id_s,path): - font = {'family' : 'normal', - 'weight' : 'bold', - 'size' : 16} - matplotlib.rc('font', **font) - N = len(ar) - a = convert_react(ar) - w = 1 - ind = np.arange(N) - - fig = figure() - fig, ax = subplots() - ax.bar(ind+w, a, width = w, color = 'black',edgecolor = 'black') - ax.set_ylabel('Final Structural Reactivity (FSR)') - ax.set_xlabel('Nucleotide Number') - - - mag = int(math.log(N,10))-1 - tail = 10**mag - - intervel = int(math.ceil(float(N)/tail/5)) - print(N) - print(intervel) - tl = [] - k = 0 - upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1 - ax.set_xticks(np.arange(0,upmax,intervel*tail)) - print(np.arange(0,upmax,intervel*tail)) - ax.set_xticklabels(np.arange(0,upmax,intervel*tail)) - savefig(os.path.join(path, id_s+'.tif')) - - - - - - - -