changeset 64:a1ce42d5258d draft

Uploaded
author tyty
date Tue, 18 Nov 2014 15:54:31 -0500
parents c1f1b552c1b8
children 36d912d5b1ac
files predict/.DS_Store predict/._.DS_Store predict/._predict_RNAs.py predict/id_list_test.txt predict/log.txt predict/parse_dis_pac.py predict/parse_dis_pac.pyc predict/predict_RNAs.py predict/predict_RNAs.xml predict/rRNA.txt predict/read_file.py predict/read_file.pyc predict/rtts_plot.py predict/rtts_plot.pyc predict/test_reactivity.txt predict/test_reference.fa
diffstat 16 files changed, 293 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file predict/.DS_Store has changed
Binary file predict/._.DS_Store has changed
Binary file predict/._predict_RNAs.py has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/id_list_test.txt	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,1 @@
+AT3G05880.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/log.txt	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,3 @@
+a /Users/yintang/Project/galaxy/galaxy-dist/tools/pipeline_programs/predict/output_qicbsuLr/AT3G05880.1.ct
+a /Users/yintang/Project/galaxy/galaxy-dist/tools/pipeline_programs/predict/output_qicbsuLr/AT3G05880.1.ps
+a /Users/yintang/Project/galaxy/galaxy-dist/tools/pipeline_programs/predict/output_qicbsuLr/AT3G05880.1.tif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/parse_dis_pac.py	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,43 @@
+#parse reactivity file into a dictionary
+
+import sys
+
+def parse_dist(in_file):
+    result = []
+    distribution = {}
+    name = []
+    f = open(in_file)
+    for aline in f.readlines():
+        line = aline.strip()
+        dis = line.strip()
+        dist = dis.split('\t') #split the line and the reactivites or reads are in a list
+        if len(dist) > 0:
+            if len(dist) == 1:
+                if dist[0].strip().find('coverage')==-1:
+                    name.append(line) #add the name in the name list
+                    flag = 1
+                    t_name = line
+            else:
+                distri = []
+                for i in range(0, len(dist)):
+                    distri.append(dist[i].strip())
+                distribution[t_name] = distri #add the list of reactivities into a dictionary
+    result.append(name)
+    result.append(distribution) #Output the dictionary
+    f.close()
+    return result
+                
+                
+
+
+
+
+
+
+
+        
+
+
+
+
+
Binary file predict/parse_dis_pac.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.py	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,91 @@
+#RNA structure prediction & Output and illustrate reactivities
+
+import sys
+from parse_dis_pac import *
+from read_file import *
+from Bio import SeqIO
+import os
+from rtts_plot import *
+import random
+import string
+
+
+id_file = sys.argv[1]
+seq_file = sys.argv[2]
+output_file = sys.argv[4]
+
+
+flag = 0
+if sys.argv[3]!='None': #input reactivity file if provided
+    react_file = sys.argv[3]
+    react = parse_dist(react_file)
+    react = react[1]
+    flag = 1
+
+syspath = os.getcwd()
+
+ids = read_t_file(id_file)
+sequences = SeqIO.parse(seq_file, 'fasta')
+
+
+seqs = {}
+for seq in sequences:
+    seqs[seq.id] = seq.seq.tostring()
+
+if len(ids)>100: #setup a limit of the number of sequence to be predicted
+    print("Number of sequences exceeds limitation!")
+    sys.exit(0)
+    
+
+#predict RNA structures
+output_directory = os.path.join(syspath, "output_files/")
+os.makedirs(output_directory)
+for i in range(len(ids)):
+    id_s = ids[i][0]
+    print(id_s)
+    #Put RNA sequence and reactivities into files
+    if id_s in seqs:
+        f = file(syspath+"temp.txt", 'w')        
+        f.write('>'+id_s)
+        f.write('\n')
+        f.write(seqs[id_s])
+        f.close()
+        if flag == 0:
+            os.system("Fold "+syspath+"temp.txt"+" "+output_directory+id_s+".ct")
+        if flag == 1:
+            if id_s in react:
+                f = file(syspath+"constraint.txt",'w')
+                make_plot(react[id_s],id_s,(output_directory)) #make a plot of the distribution of the reactivites of the input RNA
+                #h = file(syspath+"output_f/transcript_reactivities.txt", 'w')
+                #h.write(id_s)
+                #h.write('\n')
+                for j in range(0, (len(react[id_s]))):
+                    if react[id_s][j]!='NA':
+                        f.write(str(j+1))
+                        f.write('\t')
+                        f.write(str(react[id_s][j]))
+                        f.write('\n')
+                    #h.write(str(react[id_s][j])) #Output the reactivities
+                    #h.write('\t')
+                f.close()
+                #h.write('\n')
+                #h.write('\n')
+                os.system("Fold "+syspath+"temp.txt"+" -sh"+" "+syspath+"constraint.txt"+" "+output_directory+id_s+".ct")
+            else:
+                print(id_s+" not in the data of react!")
+        os.system("draw "+output_directory+id_s+".ct "+output_directory+"/"+id_s+".ps")
+    else:
+        print(id_s+" not in the data of sequences!")
+
+#Remove the unnecessary files
+os.system("tar -zcvPf "+output_file+" "+output_directory+"/"+"*.* 2>"+output_directory+"log.txt")
+os.system("rm -f "+syspath+"temp.txt")
+os.system("rm -r "+output_directory)
+if flag == 1:
+    os.system("rm -f "+syspath+"constraint.txt")
+ #   h.close()
+    
+    
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.xml	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,59 @@
+<tool id="predict_pipeline" name="RNA Structure Prediction" version="1.0">
+	<description></description>
+	<command interpreter="python">predict_RNAs.py $rna_list $reference_file $reactivity_file $output </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="1.7.1">numpy</requirement>
+                <requirement type="package" version="1.2.1">matplotlib</requirement>
+        </requirements>
+	<inputs>
+        <param name="rna_list" type="data" format="txt" label="List of RNA ids to predict"/>
+        <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/>
+        <param name="reactivity_file" type="data" optional = "true" label="Reactivity file"/>
+	
+	</inputs>
+	<outputs>
+		<data name="output" format=".tgz"/>
+	</outputs>
+    <tests>
+        <test>
+            <param name="rna_list" value="id_list_test.txt" />
+	        <param name="reference_file" value="cdna.txt" />
+            <param name="reactivity_file" value="mRNA_react_test2.txt" />
+	        <output name="output" file="structures.out" />
+        </test>
+    </tests>
+	<help>
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. A file with transcript Ids (Max num. 20), (each ID one line)
+* 2. Reference file (fasta) used to map the reads to
+* [Optional]:
+* 1. A reactivity file with structural reactivity for each nucleotide on the sequence provided
+
+-----
+
+**Output**:
+
+* 1. .ct files with predicted RNA structures [transciptID.ct]
+* 2. .ps files which depict the predicted RNA structures [[transciptID.ps]
+* [Optional]
+* 3. .png files that shows the distribution of the reactivity of each nucleotide on the transcripts of interest. [transciptID.png]
+* 4. A .txt file that includes the reactivities of all the nucleotides on the transcripts of interest. [transciptID.txt]
+
+-----
+
+**Attention**
+
+Make sure any of the transcript Ids does not contain "|" or space!	
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rRNA.txt	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,8 @@
+>25s rRNA 3375nts
+GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA
+>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence
+TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG
+>Arabidopsis thaliana 1
+GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC
+>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA
+AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/read_file.py	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
Binary file predict/read_file.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/rtts_plot.py	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#Make a plot of reactivity distribution
+
+import sys
+import numpy as np
+import matplotlib
+from pylab import *
+import math
+
+#Convert the reactivities (Make NA to 0)
+def convert_react(a):
+    r = []
+    for i in range(len(a)):
+        if a[i]!='NA':
+            r.append(float(a[i]))
+        else:
+            r.append(float(0))
+    return r
+        
+
+#Make a plot of the distribution
+def make_plot(ar,id_s,path):
+    N = len(ar)
+    a = convert_react(ar)
+    w = 1
+    ind = np.arange(N)
+
+    fig = figure()
+    fig, ax = subplots()
+    ax.bar(ind+w, a, width = w, color = 'r',edgecolor = 'r')
+    ax.set_ylabel('Structural Reactivity')
+    ax.set_xlabel('Nucleotide Index')
+
+    
+    mag = int(math.log(N,10))-1
+    tail = 10**mag
+
+    intervel = int(math.ceil(float(N)/tail/5))
+    print(N)
+    print(intervel)
+    tl = []
+    k = 0
+    upmax = int(math.ceil(float(N)/intervel/tail)*intervel*tail)+1
+    ax.set_xticks(np.arange(0,upmax,intervel*tail))
+    print(np.arange(0,upmax,intervel*tail))
+    ax.set_xticklabels(np.arange(0,upmax,intervel*tail))
+
+    ax.set_title(id_s+" reactivity distribution")
+    savefig(path+id_s+'.tif')
+
+
+
+    
+    
+    
+
+
Binary file predict/rtts_plot.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/test_reactivity.txt	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,2 @@
+AT3G05880.1
+0.943887685769	0.421815158787	NA	1.01455360981	NA	NA	NA	NA	0.503726666615	NA	0.562759466181	0.53286496306	NA	0.452307806554	NA	NA	NA	NA	0.234006609126	NA	0.356521303582	0.3345952334	NA	0.938455477986	0.961629159648	0.58445845106	0.277563382428	NA	NA	NA	0.381285618597	0.510385809404	0.263300197836	0.351983737127	NA	0.478451311944	NA	0.0	NA	NA	0.249061701962	NA	0.602014314955	0.768409570219	0.479108914417	0.609654847688	0.395147907741	NA	NA	0.316409963987	NA	NA	1.01642560569	0.178529288881	1.1258499175	NA	0.102264245055	NA	0.588862283199	0.675885983569	NA	NA	NA	0.0	NA	NA	NA	NA	0.8152009763	NA	0.730574123452	NA	NA	0.196012732449	NA	NA	0.748188202713	NA	0.0	NA	0.925269643553	0.0	0.506395998703	NA	0.511028818599	0.354285255052	NA	1.01563235674	NA	NA	NA	0.602118316823	0.486534824365	NA	0.266635693932	0.176995791343	0.887089878761	0.654802870139	NA	0.24940376078	NA	NA	NA	NA	NA	0.835049477972	NA	NA	NA	0.230188979227	NA	0.145932219541	0.510982455489	NA	0.70545494854	NA	NA	NA	NA	NA	NA	NA	NA	0.0	NA	NA	NA	NA	NA	NA	NA	0.0607994838688	NA	NA	NA	NA	NA	NA	NA	0.0	NA	0.510982455489	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.805809423851	0.214474701586	NA	0.320112197187	NA	0.886851602907	0.0	NA	NA	NA	NA	NA	NA	0.173824155265	NA	0.499351609605	NA	NA	0.119452482914	0.0	NA	NA	NA	NA	0.985352919102	NA	0.0	0.0	NA	0.925509063242	NA	0.54084940725	0.0	NA	0.622064968928	0.285526636999	0.373974554632	NA	NA	NA	0.0	NA	NA	NA	0.0	0.369187155673	NA	0.644843692277	0.0	0.0	0.0	0.0	0.155248951804	0.0	NA	NA	0.0	NA	NA	NA	0.0	0.0	0.0464264694222	0.0	NA	0.0	NA	0.250790026642	NA	0.11120052998	0.0299680537584	NA	0.0723351276735	0.0	NA	0.069968383925	NA	0.394969636508	0.0	NA	NA	NA	0.270970925021	0.0436999866019	0.0	0.172881011784	NA	1.24794080936	NA	NA	NA	NA	0.0	0.0	NA	NA	0.241516034212	NA	NA	NA	NA	NA	0.19903265234	NA	0.289893769912	0.835049477972	NA	0.83678872047	NA	0.769301566905	NA	NA	NA	NA	NA	NA	0.486001309495	1.00671172955	NA	NA	0.392970275151	NA	NA	0.369187155673	0.0330308799953	NA	0.0	NA	NA	NA	NA	0.322006332632	NA	NA	NA	NA	NA	1.10146992643	NA	NA	NA	0.602394928175	0.052285391313	NA	0.0	0.474447727012	NA	0.258266798648	NA	NA	NA	NA	NA	NA	0.173824155265	NA	NA	NA	NA	NA	0.435796833817	0.341824194398	0.0	NA	NA	0.971161850563	0.0	NA	NA	NA	0.594354216766	0.0	NA	NA	0.0	0.0666024493389	NA	0.36331086056	0.0	NA	0.838839492047	0.078362328999	0.54084940725	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.0	0.0	NA	NA	NA	NA	NA	0.454308208169	NA	0.0	NA	0.0	0.653920441757	0.369187155673	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.0	NA	0.0	NA	0.0	0.0	0.602118316823	0.0	NA	0.0	0.0	NA	0.0	NA	0.0	NA	0.0	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	0.0	0.0	NA	NA	NA	NA	NA	NA	NA	NA	0.0	0.0	NA	0.0	NA	NA	NA	NA	NA	NA	0.0	0.0	0.0	0.0	0.0	NA	0.0	NA	0.0	NA	NA	NA	NA	NA	NA	NA	NA	0.0	NA	NA	0.0	0.0	0.465862322301	0.0	NA	0.0	0.0	0.0	NA	NA	NA	NA	0.0	0.0	0.0	NA	NA	NA	NA	NA	NA	NA	NA	0.0	0.0	NA	NA	NA	NA	NA	NA	NA	0.0	NA	0.0	0.0	0.0	0.0	NA	NA	0.0	0.0	0.0	0.0	0.0	0.0	0.0	NA	NA	0.0	0.0	0.0	0.0	0.0	NA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/test_reference.fa	Tue Nov 18 15:54:31 2014 -0500
@@ -0,0 +1,8 @@
+>AT3G05880.1 | Symbols: RCI2A | Low temperature and salt responsive protein family | chr3:1755497-1756540 REVERSE LENGTH=495
+AAGCTTTTATAATATTTTCTCAGAAACTTTCAAAGAGCTTAGAAAAATGAGTACAGCTACTTTCGTTGATATTATTATCG
+CCATCCTCTTGCCTCCACTCGGTGTCTTTCTCAGATTTGGTTGCGGGGTTGAGTTTTGGATATGTTTGGTTTTGACGCTA
+CTTGGGTATATTCCTGGGATCATATACGCCATTTATGTCCTCACCAAATGATTTACCATCTATCATCATCTCCTTGAACA
+GCTGTTCCGTCGTGTTCTCCTATCTTTGTGACTGATTCAGCGTTTCTTTTTCTTTCATCAGAGTTTTTATGTTTCAAGTA
+ATTTAATTAATCATCACTGTTGTGTTTGCATTGTTATATAAATGTTGTGTTGATATAAAAGAAGAGAGCGTTGGTTTGTA
+CTTTGTGTGAAGATTTTTTAAAAATATAGTTGGTTTATTACAATAAATTGGAAATTGTGTTGCCTTGGTGGATCACAGGA
+CCACCATTAACCATT