# HG changeset patch # User tyty # Date 1413830620 14400 # Node ID 003096bd038cd56002be15a5878fcd1b72b1c3fc # Parent cfb6e6e68917e12234e1643f877d1fd7fd6ee4cf Deleted selected files diff -r cfb6e6e68917 -r 003096bd038c Iterative_mapping/.DS_Store Binary file Iterative_mapping/.DS_Store has changed diff -r cfb6e6e68917 -r 003096bd038c get_reads/.DS_Store Binary file get_reads/.DS_Store has changed diff -r cfb6e6e68917 -r 003096bd038c predict/.DS_Store Binary file predict/.DS_Store has changed diff -r cfb6e6e68917 -r 003096bd038c predict/._.DS_Store Binary file predict/._.DS_Store has changed diff -r cfb6e6e68917 -r 003096bd038c predict/._tool_dependencies.xml Binary file predict/._tool_dependencies.xml has changed diff -r cfb6e6e68917 -r 003096bd038c predict/parse_dis_pac.py --- a/predict/parse_dis_pac.py Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -#parse reactivity file into a dictionary - -import sys - -def parse_dist(in_file): - result = [] - distribution = {} - name = [] - f = open(in_file) - for aline in f.readlines(): - line = aline.strip() - dis = line.strip() - dist = dis.split('\t') #split the line and the reactivites or reads are in a list - if len(dist) > 0: - if len(dist) == 1: - if dist[0].strip().find('coverage')==-1: - name.append(line) #add the name in the name list - flag = 1 - t_name = line - else: - distri = [] - for i in range(0, len(dist)): - distri.append(dist[i].strip()) - distribution[t_name] = distri #add the list of reactivities into a dictionary - result.append(name) - result.append(distribution) #Output the dictionary - f.close() - return result - - - - - - - - - - - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/parse_dis_pac.pyc Binary file predict/parse_dis_pac.pyc has changed diff -r cfb6e6e68917 -r 003096bd038c predict/predict_RNAs.py --- a/predict/predict_RNAs.py Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ -#RNA structure prediction & Output and illustrate reactivities - -import sys -from parse_dis_pac import * -from read_file import * -from Bio import SeqIO -import os -from rtts_plot import * - - -id_file = sys.argv[1] -seq_file = sys.argv[2] -output_file = sys.argv[4] - - -flag = 0 -if sys.argv[3]!='None': #input reactivity file if provided - react_file = sys.argv[3] - react = parse_dist(react_file) - react = react[1] - flag = 1 - -ospath = os.path.realpath(sys.argv[0]) -ost = ospath.split('/') -syspath = "" -for i in range(len(ost)-1): - syspath = syspath+ost[i].strip() - syspath = syspath+'/' - -ids = read_t_file(id_file) -sequences = SeqIO.parse(seq_file, 'fasta') - - -seqs = {} -for seq in sequences: - seqs[seq.id] = seq.seq.tostring() - -if len(ids)>10: #setup a limit of the number of sequence to be predicted - print("Number of sequences exceeds limitation!") - sys.exit(0) - - -#predict RNA structures -os.system("mkdir "+syspath+"output_f") -for i in range(len(ids)): - id_s = ids[i][0] - print(id_s) - #Put RNA sequence and reactivities into files - if id_s in seqs: - f = file(syspath+"temp.txt", 'w') - f.write('>'+id_s) - f.write('\n') - f.write(seqs[id_s]) - f.close() - if flag == 0: - os.system("Fold "+syspath+"temp.txt"+" "+syspath+"output_f/"+id_s+".ct") - if flag == 1: - if id_s in react: - f = file(syspath+"constraint.txt",'w') - make_plot(react[id_s],id_s,(syspath+"output_f/")) #make a plot of the distribution of the reactivites of the input RNA - #h = file(syspath+"output_f/transcript_reactivities.txt", 'w') - #h.write(id_s) - #h.write('\n') - for j in range(0, (len(react[id_s]))): - if react[id_s][j]!='NA': - f.write(str(j+1)) - f.write('\t') - f.write(str(react[id_s][j])) - f.write('\n') - #h.write(str(react[id_s][j])) #Output the reactivities - #h.write('\t') - f.close() - #h.write('\n') - #h.write('\n') - os.system("Fold "+syspath+"temp.txt"+" -sh"+" "+syspath+"constraint.txt"+" "+syspath+"output_f/"+id_s+".ct") - else: - print(id_s+" not in the data of react!") - os.system("draw "+syspath+"output_f/"+id_s+".ct "+syspath+"output_f/"+id_s+".ps") - else: - print(id_s+" not in the data of sequences!") - -#Remove the unnecessary files -os.system("tar -zcvPf "+output_file+" "+syspath+"output_f/"+"*.* 2>"+syspath+"log.txt") -os.system("rm -f "+syspath+"temp.txt") -os.system("rm -r "+syspath+"output_f") -if flag == 1: - os.system("rm -f "+syspath+"constraint.txt") - # h.close() - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/predict_RNAs.xml --- a/predict/predict_RNAs.xml Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ - - - predict_RNAs.py $rna_list $reference_file $reactivity_file $output - - biopython - numpy - matplotlib - - - - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input**: - -* 1. A file with transcript Ids (Max num. 20), (each ID one line) -* 2. Reference file (fasta) used to map the reads -* [Optional]: -* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided - ------ - -**Output**: - -* 1. .ct files with predicted RNA structures [transciptID.ct] -* 2. .ps files which depict the predicted RNA structures [[transciptID.ps] -* [Optional] -* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png] -* 4. A .txt file that includes the reactivities of all the nucleotides on the transcripts of interest. [transciptID.txt] - ------ - -**Attention** - -Make sure any of the transcript Ids does not contain "|" or space! - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/predict_RNAs.xml~ --- a/predict/predict_RNAs.xml~ Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ - - - predict_RNAs.py $rna_list $reference_file $reactivity_file $output - - biopython - numpy - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input**: - -* 1. A file with transcript Ids (Max num. 20), (each ID one line) -* 2. Reference file (fasta) used to map the reads -* [Optional]: -* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided - ------ - -**Output**: - -* 1. .ct files with predicted RNA structures [transciptID.ct] -* 2. .ps files which depict the predicted RNA structures [[transciptID.ps] -* [Optional] -* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png] -* 4. A .txt file that includes the reactivities of all the nucleotides on the transcripts of interest. [transciptID.txt] - ------ - -**Attention** - -Make sure any of the transcript Ids does not contain "|" or space! - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/rRNA.txt --- a/predict/rRNA.txt Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ ->25s rRNA 3375nts -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA ->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG ->Arabidopsis thaliana 1 -GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC ->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA -AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file diff -r cfb6e6e68917 -r 003096bd038c predict/read_file.py --- a/predict/read_file.py Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r cfb6e6e68917 -r 003096bd038c predict/read_file.pyc Binary file predict/read_file.pyc has changed diff -r cfb6e6e68917 -r 003096bd038c predict/rtts_plot.py --- a/predict/rtts_plot.py Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -#!/usr/bin/env python -#Make a plot of reactivity distribution - -import sys -import numpy as np -import matplotlib -from pylab import * -import math - -#Convert the reactivities (Make NA to 0) -def convert_react(a): - r = [] - for i in range(len(a)): - if a[i]!='NA': - r.append(float(a[i])) - else: - r.append(float(0)) - return r - - -#Make a plot of the distribution -def make_plot(ar,id_s,path): - N = len(ar) - a = convert_react(ar) - w = 1 - ind = np.arange(N) - - fig = figure() - fig, ax = subplots() - ax.bar(ind+w, a, width = w, color = 'r',edgecolor = 'r') - ax.set_ylabel('DMS Reactivity') - ax.set_xlabel('Nucleotide Index') - - - mag = int(math.log(N,10))-1 - tail = 10**mag - - intervel = int(math.ceil(float(N)/tail)/5) - print(N) - print(intervel) - tl = [] - k = 0 - ax.set_xticks(np.arange(0,N,intervel*tail)) - print(np.arange(0,N,intervel*tail)) - ax.set_xticklabels(np.arange(0,N,intervel*tail)) - - ax.set_title(id_s+" reactivity distribution") - savefig(path+id_s+'.tif') - - - - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/rtts_plot.pyc Binary file predict/rtts_plot.pyc has changed diff -r cfb6e6e68917 -r 003096bd038c predict/tool_dependencies.xml --- a/predict/tool_dependencies.xml Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff -r cfb6e6e68917 -r 003096bd038c predict/tool_dependencies.xml~ --- a/predict/tool_dependencies.xml~ Mon Oct 20 14:42:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - - - - - - - - -