Repository 'protk_proteogenomics'
hg clone https://toolshed.g2.bx.psu.edu/repos/iracooke/protk_proteogenomics

Changeset 0:28067ed4ea0e (2015-03-26)
Next changeset 1:a85c8de9630a (2015-03-26)
Commit message:
Docker support and update for protk 1.4
added:
README.rst
gff3_to_fasta.xml
protxml_to_gff.xml
repository_dependencies.xml
test-data/augustus_sample.fasta
test-data/augustus_sample.gff
test-data/small.prot.xml
test-data/small_combined.gff
test-data/small_prot.fasta
tool_dependencies.xml
b
diff -r 000000000000 -r 28067ed4ea0e README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Mar 26 20:11:34 2015 -0400
b
@@ -0,0 +1,35 @@
+Protk Proteogenomics
+====================
+
+Map peptides and proteins to genomic (or transcriptomic) coordinates
+
+Requirements
+------------
+
+This package uses protk_ and blast_ which need to be present in order for the tools to work.
+
+.. _protk: https://github.com/iracooke/protk
+.. _blast: https://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus
+
+When installing this package, if you instruct galaxy to install dependencies it will automatically install blast_ but installation of protk_ may require additional steps (see below).
+
+There are two ways you can install protk_ (choose one):
+
+1. **Manual Install:** Details on how to install protk_ manually are available here_.
+
+2. **Use Docker:** These tools are designed to run inside a docker_ container. If your galaxy supports `running tools within a docker container`__ you don't need to worry about dependencies. Simply install and things should just work.  The docker container itself is versioned and new versions of this tool will automatically download an update to the container if needed.
+
+.. _docker: https://www.docker.com/
+.. _here: https://github.com/iracooke/protk/#galaxy-integration
+.. _container: https://wiki.galaxyproject.org/Admin/Tools/Docker
+__ container_
+
+
+Further Info
+------------
+
+The source code for this tool and other protk galaxy tools is on github_.  Please visit the github page to contribute to the project or to `report an issue`__ 
+
+.. _github: https://github.com/iracooke/protk-galaxytools
+.. _issue: https://github.com/iracooke/protk-galaxytools/issues
+__ issue_
\ No newline at end of file
b
diff -r 000000000000 -r 28067ed4ea0e gff3_to_fasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_to_fasta.xml Thu Mar 26 20:11:34 2015 -0400
b
@@ -0,0 +1,52 @@
+<tool id="gff3_to_fasta" name="Extract proteins from gff3" version="1.1.0">
+ <requirements>
+        <container type="docker">iracooke/protk-1.4.1</container>
+     <requirement type="package" version="1.4">protk</requirement>
+   </requirements>
+
+ <description>Extract proteins from gff3 and encode genomic coordinates in the fasta header</description>
+
+ <command>
+ augustus_to_proteindb.rb $gff_file -o $output $coords
+ </command>
+
+
+
+
+ <stdio>
+ <exit_code range="1:"   level="fatal"   description="Failure" />
+ </stdio>
+
+ <inputs>
+ <param name="gff_file" type="data" format="gff3" label="Augustus Generated gff3 File" />
+ <param name="coords" type="boolean" label="Write genomic coordinates" help="" truevalue="--info" falsevalue="" />
+ </inputs>
+
+ <outputs>
+ <data format="fasta" name="output" />
+ </outputs>
+
+ <tests>
+   <!-- Just test that the tool runs and produces vaguely correct output -->
+   <test>
+       <param name="gff_file" value="augustus_sample.gff" format="gff3"/>
+       <output name="output" file="augustus_sample.fasta" format="fasta"/>
+   </test>
+ </tests>
+
+
+  <help>
+
+**What it does**
+
+Extract proteins from gff3 and encode genomic coordinates in the fasta header.
+Currently this only works with gff3 generated using the Augustus gene finder
+
+----
+
+**References**
+
+
+  </help>
+
+</tool>
b
diff -r 000000000000 -r 28067ed4ea0e protxml_to_gff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/protxml_to_gff.xml Thu Mar 26 20:11:34 2015 -0400
[
@@ -0,0 +1,102 @@
+<tool id="protxml_to_gff" name="Proteomics to GFF" version="1.1.0">
+ <description>Export Proteomics Data to GFF</description>
+
+ <requirements>
+ <container type="docker">iracooke/protk-1.4.1</container>
+ <requirement type="package" version="1.4">protk</requirement>
+ <requirement type="package" version="2.2.29">blast+</requirement>
+   </requirements>
+
+ <command>
+ protxml_to_gff.rb $protxml_file 
+
+ #if $database.source_select=="built_in":
+ -d $database.dbkey
+ #else 
+ -d $database.fasta_file
+ #end if
+
+ -c $gene_file
+
+ --gff-idregex='$gffidpattern'
+
+ -o $output
+
+
+ </command>
+
+ <stdio>
+ <exit_code range="1:"   level="fatal"   description="Failure" />
+ </stdio>
+
+ <inputs>
+ <conditional name="database">
+ <param name="source_select" type="select" label="Database source used for Proteomics Searches" help="Database should be an amino acid fasta file with entry id's that can be parsed to obtain contig or scaffold ids referenced in your gff file">
+ <option value="input_ref">Your Upload File</option>
+ <option value="built_in">Built-In</option>
+ </param>
+ <when value="built_in">
+ <param name="dbkey" type="select" format="text" >
+ <label>Database</label>
+ <options from_file="pepxml_databases.loc">
+ <column name="name" index="0" />
+ <column name="value" index="2" />
+ </options>
+ </param>
+ </when>
+ <when value="input_ref">
+ <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />
+ </when>
+ </conditional>
+
+ <param name="protxml_file" type="data" format="protxml" multiple="false" label="Proteomics Search Results" help="A ProtXML file produced by Protein Prophet"/>
+
+ <param name="gene_file" type="data" format="gff3" multiple="false" label="Protein coordinates" help="A gff3 file with coordinates for all protein entries used for proteomics searches. Coordinates should correspond to entries in the genome fasta file"/>
+
+ <param name="gffidpattern" size="40" type="text" value="lcl\|([^ ]*)" label="gff id regex" help="Regex with capture group for parsing gff ids from protein ids">
+      <sanitizer>
+         <valid initial="string.printable">
+          <remove value="&apos;"/>
+         </valid>
+         <mapping initial="none">
+           <add source="&apos;" target="__sq__"/>
+         </mapping>
+       </sanitizer>
+     </param>
+
+ </inputs>
+
+ <outputs>
+ <data format="gff3" name="output" />
+ </outputs>
+
+
+ <tests>
+   <test>
+    <param name="source_select" value="input_ref"/>
+    <param name="fasta_file" value="small_prot.fasta" format="fasta"/>
+
+       <param name="protxml_file" value="small.prot.xml" format="protxml"/>
+       <param name="gene_file" value="small_combined.gff" format="gff3"/>
+       <output name="output" format="gff3">
+           <assert_contents>
+               <has_text text="polypeptide" />
+           </assert_contents>
+       </output>
+   </test>
+ </tests>
+
+  <help>
+
+**What it does**
+
+Exports peptides and proteins to gff
+
+----
+
+**References**
+
+
+  </help>
+
+</tool>
b
diff -r 000000000000 -r 28067ed4ea0e repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Thu Mar 26 20:11:34 2015 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="Proteomics datatypes">
+    <repository changeset_revision="ac51d9dbfb4d" name="proteomics_datatypes" owner="iracooke" toolshed="https://toolshed.g2.bx.psu.edu" />
+ </repositories>
b
diff -r 000000000000 -r 28067ed4ea0e test-data/augustus_sample.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus_sample.fasta Thu Mar 26 20:11:34 2015 -0400
b
@@ -0,0 +1,24 @@
+>lcl|scaffold14_rev_g1.t1 2517|3803
+MAGLAAGIVGVVSAGTKVAIVLSQYGNEVGAAGQEARMIASEIRGSCTVLTTLHSTLKHVQTSPYYANCAELISDMTDASLEMYTEIMEVVEGLSAMTSDSKMNLRKRLLWTFQKPKIVMLRTALEAYRSNLALMLGTLDMAEKASRSYVALTEEIVQEDEMDCAKLQDLQLEQQMSLLKVQELDPESIELPPSPTGAGRGFWGSSNKESAFPVEEGYVSALREEIATLKRSRTVYLTDPEKVRDRVARQSNRLSQLLVQDQRRISRRWSQSLPERRMSMYSDLAAERSRSPTSTPGSSSASPSESSDDLSSDYAQVNEPMVRDFYAWMSAQTGVQRSIVLRQLQARFGDGRGTTVRKPVNGVGIHPGASEDTLCADVSKLELEKDAVAAETYERQAEPAPVVGASKEKKSSFLKRSMGLKRRTPSAS
+>lcl|scaffold14_rev_g2.t1 6798|7541
+MPTRLSNTRKHRGHVSAGHGRVGKQYVTPLSNPLLDVERIFQFSPLHRKHPGGRGLAGGQHHHRTNMDKYHPGYFGKVGMRYFHKQGNHFWKPTINLDKLWSLVPLEQREKYISNKKSDTAPVLDLLSFGYSKVLGKGRLPEIPLVVRARYFSAEAEKKIKEAGGVVQLVG
+>lcl|scaffold14_rev_g2.t2 6798|7541
+MPTRLSNTRKHRGHVSAGHGRVGKHRKHPGGRGLAGGQHHHRTNMDKYHPGYFGKVGMRYFHKQGNHFWKPTINLDKLWSLVPLEQREKYISNKKSDTAPVLDLLSFGYSKVLGKGRLPEIPLVVRARYFSAEAEKKIKEAGGVVQLVG
+>lcl|scaffold14_fwd_g3.t1 8211|8576
+MSKQLSTKEVSTHKSVEEGLWIIIDNEVYDVTKFIEEHPGGPKILKRVGGKDASKQFWKASNPIPLQPSHQKHPLSHPLPSSVLIGARVLTLSASQYHSDNVLKKYKPKLMIGSVKEDAKL
+>lcl|scaffold14_rev_g4.t1 9661|12081
+METYQGKVQTPQDAIILFEACRLGLTPRVQRRLSEKERQQIKSGSVFVWDEREAGMRRWTDGKSWSASRVSGSFLTYREMEGKRGGTGLPAGATPPALRKSSSGTRSSANSNNGGSEDDEEGPDGYKYKADGLMKQSFSITTSDGRHLHLISYFARNPAHPLKQPSTDPQLRNIVPQKGMYPEATVNEATNIPPVTRGPMGGTSPHPPAHPPPQMYVAAYPFPPPPGYQAGPMHHHYPPQGWPPSPGHLGPGGVPPGAFYYPAPPPGYVYPNSPYAPSPYPYGHPPPHAQERGPPPPPQSSLPPLSGPIHPSAGQWAPPAARTSSATYAQSRPPTSLQESYSRSAAEQRQEETSRAPGPQLAPVVAFQNKPPTPPESRNGATPPAKSYPTDPLPTRETNTTTGPTKTFPSIKALINGDVITSRREEDRTRTRSRSRSPLGSRIRDVPSAMAAGGRERDRSDLKRLDAQLLKSSGLESSDGNGKLAQNHPGAHSKPISRSIDVIAKVIPGHVLATMPENIELEPQDFGRWPLQSGNGRYTEIQPTPTSSEDGQKLQLQTMPDDNFGRVMEPSLKEVLDGMTGRQRGLGQLGQMRLTTQDVNSKRFDLYQALKYATPSSTITRILNELKTGVLPSEKLLRETNIGVEVDGLRWLTKIPVIAKLSTEIVNEWRTVIESGPRPHPSNDEIFGPYWRLSNNFYVSNHGSESPFPRLSLGNEKLGLGSLNDNKNLGTENGMIPKTKEDRL
+>lcl|scaffold14_fwd_g5.t1 11112|11486
+MYAQLEGPIAQQRDVSGHSEGADLIEAAEVDLSPARAEEGGHTDMGMARMVNSDKHIPEEEQDNKMLQVVLLQGLDVPGLAASLVEGSDDALVLLGILEAVEMDRLRRTSVEADAQEDAAMSPP
+>lcl|scaffold14_fwd_g6.t1 22383|25038
+MLIRSNPSLHYPITITKLLAKPDQSVDRFAPLFSYTYKTTVTEGNKYGDEPIQFQKDFPAEFQSELEGTLRVWKIKPGDVLYESGIQLVDIDEPCTHDTVFGGLCANCGQEMDKLDNYLTTERASQRATVFASHGNTALLVSKKEAGRINEEAKRRLLQGRKLSLVVDLDQTIIHATVDPTVAEWQQDPANPNYEAVKGVRQFQLVDDGPGGRGCNYYIKLRPGLEEFLEDIAKKYELHIYTMGTRAYAQKIAEIVDPQRRFFGDRILSRDESGSLTNKTLQRIFPVDTDMVVIIDDRGDVWQWSPNLIKVTPYDFFVGIGDINSSFLPKKTDSISKRTRTPPPPPPPAEIPEDKETNDSFEEAESPTNGLAIDTTVILEDSDAPTLETQLVAMAGGDDPSIIEEKSNTQAETLAAQVTDRPLMKKQEMLDKLDEEEEAKENSEEERPVETTPDTKQRHNLLHDDDDELRHLQANLESIHRLFYEEYDQNIIASPPRLSQLRGERSAKKTPVDNLELVPDVKDIMPRMKLDVLKHVVICFTGVIPQGLNHETSDIGMWARSFGARVSPNLTKSTTHVVAHKDRRTSKVRQAARHPNIKIVATSWLLECFVQWVAAPEGPHTIKVESDEHDAHDSLPFEELEEASMLTPSEDGGADDAVLPSEVEEADEEPNSPVLDILENVEWGDLEDEMKDFYDSEDETEAEDTDEGHDEGMESDASGKSGRSSRSTSSRKAKRKRRTESVNGDYEEAGESTSALQKRRKMAAERTTGLANVETLENPSGLPSPDTTGPEEEAGEKDKGLSNGAEGDVSDDIDEFEKDMMAMFDKDSDAED
+>lcl|scaffold14_fwd_g7.t1 25584|27469
+MTTDNPEAKMVEVAQPTKMAVVPVEKEVPKRPSKLRFMDLSVDIKTLIVSHVHRPTELKNLCLTCSQMHKITVRKLYHEVTLEVGSSTDAKLTSFINPRNIGLPHIRKLDLYLAEVADKCNQLQQAHFAIRMILEFLPENILEKFSWHPWSPFLADNLVLLYKKQRRMEWLEGIALDRNVLPELEKQSDFDQIFTRTRRLGLYPDSRDVLNFCANLIKRTKKVEKITLHASFDEDSDRTPIPSRELNDSSTDLGLITRTMFGHMEPFEKCTPLSLKDLTLQKINLRYAANSYCKFIDFRVMKALRIFGCPGADSLFAELSKSHKLPERLETLEFKHDDNQENDALNALDGFLKLVSGIKVLTMDICYAKQLPSADGIIRHSRTLRELNVHGSRGDGEEEELIIDFDDFEKICKSCSSIEQLSMAFPTTSLIRPISDSFQAFENALGDLANLITLNITTWPTNTPSTSRLPRKTYEILLQSLANSGFEHSIHHASSRTPPRSSKLAVIAFGSSDRVYDREDSKNQIIFVKGRQTDPFGGEKPLAVQVGWCLRKFVEPRSDVLDFSLARSCRPPTREPPASEDSE
+>lcl|scaffold14_rev_g8.t1 29180|30106
+MFRARDSLRKLRSQGLPSTKIWLPTPRSQRLLASFAKSRNTFPWHYAVPAAMGAAVFSFVFIAQSQKGSLPYPGGCCSRDRKPLLVSYYTDKPCTLRDVTDIAGLMAAMWVSERINEQSSSKDALTVKDDLLFDIASRLALLRNGGVEGSSLGKTECLQMFAESESNRLKDSPFGRESCDLSLMSGDAKTAWVADSIGYFIFSDRMDEPHPLSKLDDWIDHLIDRGSFVESKFMTVLNDVERVIMIEMYKEARKAYQNAETRKEMEAMGLPVASAQDMMLIDAMMHVYRRLSGDRVLKDVGHEDDVGH
+>lcl|scaffold14_fwd_g9.t1 30672|31824
+MPIENIQNPSAKHWNCTNTLNPSILPFHQSLPDYAITPLTPLEALAEDLGIGHIYVKDEGTRFGLPAFKILGASWAVYRAIAAQLGKPTTTSLDDLCNAAEGRGIRIVTTSEGNWGRAVARMGRYLGVAVTVYVPRYMDEATRGKISREGAEVILHKGEYDDCLFTCRHVSEETGALLVLDTSFDGYTEIPQWVTDGYSSMLEEVDSQLRAAIDRPATHAIASVGVGSWAHAVVAHYKSKDPTAAVATVEPKTANCLATSLDAGKIISVKTDVTIMNGMNCGTVSDIAWPYLRDGVDASVTVSEMQAHEAVLYLRAHGVNAGPCGAAPLAALRELKKLNWLGLGSDSVVVLYCTEAARAYKEPTSE
+>lcl|scaffold14_rev_g10.t1 31923|33129
+MSHHESYDSHKMPNMHVQQGPDMAMTDALRKHLTRHILSPGPVTQRRLDFEHARPTWLREMFAEATGVFFYVYPGIAATAAFILQKESPALGSVFTIGLAYGIGVSLGIICCAGTSGGHFNPSITICFAIWQGFPWRKVPYYIFAQIAGAFFAGLILYGQYHQQIEVYRLGLEKLGMPEVFMGSPASIFTSYPLGSQTNQAWLVLIEFFVDAFLGFAIWAVLDPSNPFISAGIAPFVIGLAYSLMIWGFASVTISTNLARDLGGRLVAMLFYGSDAFTYMSYWWIGIFINIPATIFATCFYELVFRDSLDKIALGHAQHEDGVDGIARHLSQTGLMDPYPEESSLRRKEYASQSSRIS
+>lcl|scaffold14_rev_g10.t2 31923|33087
+MHVQQGPDMAMTDALRKHLTRHILSPGPVTQRRLDFEHARPTWLREMFAEATGVFFYVYPGIAATAAFILQKESPALGSVFTIGLAYGIGVSLGIICCAGTSGGHFNPSITICFAIWQGFPWRKVPYYIFAQIAGAFFAGLILYGQYHQQIEVYRLGLEKLGMPEVFMGSPASIFTSYPLGSQTNQAWLVLIEFFVDAFLGFAIWAVLDPSNPFISAGIAPFVIGLAYSLMIWGFASVTISTNLARDLGGRLVAMLFYGSDAFTYMSYWWIGIFINIPATIFATCFYELVFRDSLDKIALGHAQHEDGVDGIARHLSQTGLMDPYPEESSLRRKEYASQSSRIS
b
diff -r 000000000000 -r 28067ed4ea0e test-data/augustus_sample.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus_sample.gff Thu Mar 26 20:11:34 2015 -0400
[
b'@@ -0,0 +1,453 @@\n+##gff-version 3\n+# This output was generated with AUGUSTUS (version 2.5.5).\n+# AUGUSTUS is a gene prediction tool for eukaryotes written by Mario Stanke (mario.stanke@uni-greifswald.de)\n+# and Oliver Keller (keller@cs.uni-goettingen.de).\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# No extrinsic information on sequences given.\n+# Initialising the parameters ...\n+# venturia_inaequalis version. Use default transition matrix.\n+# Looks like /home/virtualdan/winshare/vp/final_vp_assembly/Pirina.100.fasta is in fasta format.\n+# We have hints for 0 sequences and for 0 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 532736, name = scaffold14) -----\n+#\n+# Predicted genes for sequence number 1 on both strands\n+# start gene g1\n+scaffold14\tAUGUSTUS\tgene\t2517\t3803\t.\t-\t.\tID=g1\n+scaffold14\tAUGUSTUS\ttranscript\t2517\t3803\t0.92\t-\t.\tID=g1.t1;Parent=g1\n+scaffold14\tAUGUSTUS\tstop_codon\t2517\t2519\t.\t-\t0\tParent=g1.t1\n+scaffold14\tAUGUSTUS\tCDS\t2517\t3803\t0.92\t-\t0\tID=g1.t1.cds;Parent=g1.t1\n+scaffold14\tAUGUSTUS\tstart_codon\t3801\t3803\t.\t-\t0\tParent=g1.t1\n+# coding sequence = [atggcagggctagcagcaggcattgttggtgtcgtgtcagctggtaccaaagtcgccatcgtcctttcgcagtatggca\n+# atgaagtgggagcagctggccaagaagcgcgaatgatcgcgtcggaaatccgaggatcatgcacagttctcacgaccctccactcgacattgaaacat\n+# gtccagacatcgccgtactacgcgaattgcgctgaactgatcagcgatatgaccgatgcgagtctggagatgtatacggaaatcatggaggtcgtcga\n+# gggattgtcggcaatgacgagcgacagcaagatgaatttgaggaagcggctgctgtggacctttcaaaagccgaagatcgttatgttgagaacggcac\n+# tggaggcctatagatcgaatttggctcttatgcttggaacgttggatatggctgagaaggcctcgcgaagctacgttgctttgactgaggagattgtg\n+# caggaagatgaaatggactgcgcaaagcttcaagacctacaactggaacaacaaatgtctctgcttaaggttcaagagttggacccggaaagtatcga\n+# gcttccgcctagcccaacgggcgcagggcggggattttgggggtcgtcaaataaagaatctgcatttccagtcgaggagggttatgtcagtgccctga\n+# gagaagagattgcgactctcaagaggagccgaactgtctacctgacagaccccgaaaaggtgcgcgatcgagtagctcgacagagcaatcgtttgtcc\n+# caactcctcgtccaggatcagaggagaatttcgcgaagatggtctcaatccctgccagagagacgtatgagcatgtacagcgacttggcagcagagag\n+# gtcacggtcgcctacgagtacgcctggcagctcatcagcctccccgagcgaatccagcgatgatctatccagcgattatgcccaggtgaacgaaccca\n+# tggttagagacttttatgcttggatgtctgcgcaaactggcgttcagcggagcattgtgcttcgacagctgcaagcgcggtttggtgatgggcgaggg\n+# acaactgtccgcaagcccgtcaacggtgttggaatccatccaggagcaagcgaggacacactatgcgccgatgtctccaagcttgaattggagaagga\n+# tgcagttgcagcagagacctacgagagacaggctgagccagctcctgtggttggagccagtaaggaaaagaagagctcgttcttgaagaggtcaatgg\n+# ggttgaaaagacgaacgccgtcagcgtcgtga]\n+# protein sequence = [MAGLAAGIVGVVSAGTKVAIVLSQYGNEVGAAGQEARMIASEIRGSCTVLTTLHSTLKHVQTSPYYANCAELISDMTD\n+# ASLEMYTEIMEVVEGLSAMTSDSKMNLRKRLLWTFQKPKIVMLRTALEAYRSNLALMLGTLDMAEKASRSYVALTEEIVQEDEMDCAKLQDLQLEQQM\n+# SLLKVQELDPESIELPPSPTGAGRGFWGSSNKESAFPVEEGYVSALREEIATLKRSRTVYLTDPEKVRDRVARQSNRLSQLLVQDQRRISRRWSQSLP\n+# ERRMSMYSDLAAERSRSPTSTPGSSSASPSESSDDLSSDYAQVNEPMVRDFYAWMSAQTGVQRSIVLRQLQARFGDGRGTTVRKPVNGVGIHPGASED\n+# TLCADVSKLELEKDAVAAETYERQAEPAPVVGASKEKKSSFLKRSMGLKRRTPSAS]\n+# end gene g1\n+###\n+# start gene g2\n+scaffold14\tAUGUSTUS\tgene\t6798\t7541\t.\t-\t.\tID=g2\n+scaffold14\tAUGUSTUS\ttranscript\t6798\t7541\t0.56\t-\t.\tID=g2.t1;Parent=g2\n+scaffold14\tAUGUSTUS\tstop_codon\t6798\t6800\t.\t-\t0\tParent=g2.t1\n+scaffold14\tAUGUSTUS\tintron\t6936\t6986\t1\t-\t.\tParent=g2.t1\n+scaffold14\tAUGUSTUS\tintron\t7068\t7117\t1\t-\t.\tParent=g2.t1\n+scaffold14\tAUGUSTUS\tintron\t7275\t7401\t0.63\t-\t.\tParent=g2.t1\n+scaffold14\tAUGUSTUS\tCDS\t6798\t6935\t0.97\t-\t0\tID=g2.t1.cds;Parent=g2.t1\n+scaffold14\tAUGUSTUS\tCDS\t6987\t7067\t1\t-\t0\tID=g2.t1.cds;Parent=g2.t1\n+scaffold14\tAUGUSTUS\tCDS\t7118\t7274\t1\t-\t1\tID=g2.t1.cds;Parent=g2.t1\n+scaffold14\tAUGUSTUS\tCDS\t7402\t7541\t0.59\t-\t0\tID=g2.t1.cds;Parent=g2.t1\n+scaffold14\tAUGUSTUS\tstart_codon\t7539\t7541\t.\t-\t0\tParent=g2.t1\n+# coding sequence = [atgcctaccagattatccaacacccgcaagcaccgcggtcacgtctctgccggtcacggtcgtgtcggcaagcagtatg\n+# tcacacccctcagcaaccctctcctcgacgttgaacggatcttc'..b't2\n+scaffold14\tAUGUSTUS\tCDS\t36126\t36289\t1\t-\t2\tID=g11.t2.cds;Parent=g11.t2\n+scaffold14\tAUGUSTUS\tCDS\t36341\t36977\t0.88\t-\t0\tID=g11.t2.cds;Parent=g11.t2\n+scaffold14\tAUGUSTUS\tCDS\t37203\t37919\t0.89\t-\t0\tID=g11.t2.cds;Parent=g11.t2\n+scaffold14\tAUGUSTUS\tCDS\t37966\t38117\t0.25\t-\t2\tID=g11.t2.cds;Parent=g11.t2\n+scaffold14\tAUGUSTUS\tCDS\t38193\t38214\t0.96\t-\t0\tID=g11.t2.cds;Parent=g11.t2\n+scaffold14\tAUGUSTUS\tstart_codon\t38212\t38214\t.\t-\t0\tParent=g11.t2\n+# coding sequence = [atgtctagatactctcgcattggagagagcggccgctacgatgaagacgctcctaaccgctacagtcggcctgatgcgg\n+# tgccgcttcaacatattcccagtcatgaggactttgctacggatggacaacattcctacatggatacccgggatcgactcgccgcccagccgacctac\n+# tccgttgataatctagcttactcttatggacttagagaagcatacgagtcaacattccagaacatcccagagcatccgggccgccagcattatgattc\n+# acatccagctgaagaagcagatccagcgtattatgacgatgatgataccagaccaatgctagatcgtcaccacagtaacgatgccagtagttcttcgg\n+# tgacactgacagtgcaggtcccgagatgggccaccatccagtatgtcccattgttcaaaggcaacctggttttggactgtccagtcccatcaaaattg\n+# gtcaatcagttggcgacaggggatgaacagcctcgtgaatctacgcacatgagatattcagcagcaacttgcgatcccgccgattttcttgacaaccg\n+# attcacattgcgccaaaaattatatcaacaaccgagagcagttgaactcttcatcgttgtcacaatgtacaatgaagacgatgcgctgctggcgagaa\n+# cattgatagggattttccagaatgtcaattacatggaaagtttgagggatagcacgatgtggggtgctaacgcctggaagaagatagtagtgtgcatc\n+# gttagtgacggaatcgcgaagatcaatccccgttctgcagcacttcttgctggtctgggtgtctttcagcaaggcattgccaaagctaagatcaacaa\n+# actcgagaccgaggcccatatattcgagtacacaacgcaaatggaaccagtaatgagaggcgagacagtctccgtccagaagggcaacactccggttc\n+# aactgctcttctgcttgaagcaaaagaatgcgcagaagatcaattcgcatcgctggttctttcaagcctttggcagatgtctcgagcctaatatttgt\n+# gtactgctcgatgctggaaccaaacctggtaagcactcgatattcaaactttggaacgttttcgatcgaaacagacattgtgccggcgcttgtggaga\n+# gatcaaggcagacttaggaagagggggtaaaaacttgttgaatccattggtcgcgactcaaaactttgaatataagatgtcgaatatacttgataagc\n+# ctctggaatcagctttcggcttcatctcggtccttccaggagccttctctgcttataggtacgaggctcttcaaggagaacccatggtcaaatatttt\n+# gctggagagaaaatgcacaataacagcaatttctttactgcgaatatgtacctggctgaggaccgaattctatgttacgagctggtggcaaagaaggg\n+# cagcaaatgggttctccaatacgtaaagaactccacaggagaaacagatgttccgacagaagtgcctgacttcatctcgcaacgtcggagatggctta\n+# acggtagtctcttcgctgctgtttacaccgtggtccatttctggaaaatctggcaatccggccacgggtttgttcgaaagtgcatgctgcagatcgaa\n+# gtagtctaccaggtcattagcatgatctttgcttggttcgccttgggcaacttttacttggtcttcaaaattcttgtgggctcgctgggtgatccgac\n+# tttacttggaaaagcaggtttggtgctatcggtattcttcgagtggtcttatgtcatctgcctcatcgcttgcttcattctcgccttgggcaatacgc\n+# cgaaaggaacgaagaaggtctatgttggcatgaccatctattgggcaatcgtgatgacttacctcatgtttgccactgtctatctcagcgtcaaatct\n+# gtccagtcggaaatcaaagatggctttaagattggcgatttgctgacgaataagacctttgcgacacttattctctctatgctttccacctacgtctt\n+# gtgtcaaaccctcaaatgtatcggctcatttgctaacaacacaatagggggcactaaaggcgccaacgatgcacctggaggcggttcagtcgcctctg\n+# gaaaagacggcaaagcaggtgtcgagcttccatccgatccagacgctcaatacaagaaagagctcgaagtcctcgccgaggcacgcaaagaagaagag\n+# tcgaaaacaagcgacgaagatgtcaagaaattctactacgcttctgttcgcagctggatcgtgatggcgtggatattcagcaacttgtcgttgatcac\n+# ccttgtgatgaaagccggttcggtcgggatcattaccaagaccaagacgagtgaggaggaggcgcagcgcaagaactctgacctttacctctacatta\n+# tcttgtggagtgtagcgggactcagtgcgttcagatttgttggctcggtttggttcttgattcatagaaagtttgctgggatctga]\n+# protein sequence = [MSRYSRIGESGRYDEDAPNRYSRPDAVPLQHIPSHEDFATDGQHSYMDTRDRLAAQPTYSVDNLAYSYGLREAYESTF\n+# QNIPEHPGRQHYDSHPAEEADPAYYDDDDTRPMLDRHHSNDASSSSVTLTVQVPRWATIQYVPLFKGNLVLDCPVPSKLVNQLATGDEQPRESTHMRY\n+# SAATCDPADFLDNRFTLRQKLYQQPRAVELFIVVTMYNEDDALLARTLIGIFQNVNYMESLRDSTMWGANAWKKIVVCIVSDGIAKINPRSAALLAGL\n+# GVFQQGIAKAKINKLETEAHIFEYTTQMEPVMRGETVSVQKGNTPVQLLFCLKQKNAQKINSHRWFFQAFGRCLEPNICVLLDAGTKPGKHSIFKLWN\n+# VFDRNRHCAGACGEIKADLGRGGKNLLNPLVATQNFEYKMSNILDKPLESAFGFISVLPGAFSAYRYEALQGEPMVKYFAGEKMHNNSNFFTANMYLA\n+# EDRILCYELVAKKGSKWVLQYVKNSTGETDVPTEVPDFISQRRRWLNGSLFAAVYTVVHFWKIWQSGHGFVRKCMLQIEVVYQVISMIFAWFALGNFY\n+# LVFKILVGSLGDPTLLGKAGLVLSVFFEWSYVICLIACFILALGNTPKGTKKVYVGMTIYWAIVMTYLMFATVYLSVKSVQSEIKDGFKIGDLLTNKT\n+# FATLILSMLSTYVLCQTLKCIGSFANNTIGGTKGANDAPGGGSVASGKDGKAGVELPSDPDAQYKKELEVLAEARKEEESKTSDEDVKKFYYASVRSW\n+# IVMAWIFSNLSLITLVMKAGSVGIITKTKTSEEEAQRKNSDLYLYIILWSVAGLSAFRFVGSVWFLIHRKFAGI]\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 28067ed4ea0e test-data/small.prot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.prot.xml Thu Mar 26 20:11:34 2015 -0400
b
b'@@ -0,0 +1,11012 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v6.xsd" summary_xml="/home/iracooke/venturiapg/search_results/Pirina/Pirina_pproph.prot.xml">\n+<protein_summary_header reference_database="/home/iracooke/venturiapg/vpirina6frame_decoy.fasta" residue_substitution_list="I -&gt; L" source_files="/home/iracooke/venturiapg/search_results/Pirina/Pirina_iproph.pep.xml" source_files_alt="/home/iracooke/venturiapg/search_results/Pirina/Pirina_iproph.pep.xml" min_peptide_probability="0.20" min_peptide_weight="0.50" num_predicted_correct_prots="1787.3" num_input_1_spectra="5700" num_input_2_spectra="24737" num_input_3_spectra="8680" num_input_4_spectra="714" num_input_5_spectra="124" initial_min_peptide_prob="0.05" total_no_spectrum_ids="28710.0" sample_enzyme="trypsin">\n+<program_details analysis="proteinprophet" time="2014-01-20T14:17:37" version=" Insilicos_LabKey_C++ (TPP v0.0 Development trunk rev 0, Build 201307090846 (linux))">\n+<proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="2" final_peptide_wt_iters="3">\n+      <nsp_information neighboring_bin_smoothing="Y">\n+         <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_incl="0.00" pos_freq="0.057" neg_freq="0.625" pos_to_neg_ratio="0.09"/>\n+         <nsp_distribution bin_no="1" nsp_lower_bound_excl="0.00" nsp_upper_bound_incl="0.31" pos_freq="0.037" neg_freq="0.152" pos_to_neg_ratio="0.24"/>\n+         <nsp_distribution bin_no="2" nsp_lower_bound_excl="0.31" nsp_upper_bound_incl="1.00" pos_freq="0.077" neg_freq="0.032" pos_to_neg_ratio="2.42"/>\n+         <nsp_distribution bin_no="3" nsp_lower_bound_excl="1.00" nsp_upper_bound_incl="2.50" pos_freq="0.113" neg_freq="0.033" pos_to_neg_ratio="3.39"/>\n+         <nsp_distribution bin_no="4" nsp_lower_bound_excl="2.50" nsp_upper_bound_incl="4.63" pos_freq="0.123" neg_freq="0.032" pos_to_neg_ratio="3.91"/>\n+         <nsp_distribution bin_no="5" nsp_lower_bound_excl="4.63" nsp_upper_bound_incl="7.90" pos_freq="0.143" neg_freq="0.032" pos_to_neg_ratio="4.50"/>\n+         <nsp_distribution bin_no="6" nsp_lower_bound_excl="7.90" nsp_upper_bound_incl="14.92" pos_freq="0.196" neg_freq="0.041" pos_to_neg_ratio="4.78"/>\n+         <nsp_distribution bin_no="7" nsp_lower_bound_excl="14.92" nsp_upper_bound_excl="inf" pos_freq="0.254" neg_freq="0.054" pos_to_neg_ratio="4.72" alt_pos_to_neg_ratio="4.78"/>\n+      </nsp_information>\n+      <ni_information>\n+      </ni_information>\n+      <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.835" predicted_num_correct="1787" predicted_num_incorrect="9044"/>\n+      <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>\n+      <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>\n+      <protein_summary_data_filter min_probability="0.30" sensitivity="0.956" false_positive_error_rate="0.151" predicted_num_correct="1709" predicted_num_incorrect="305"/>\n+      <protein_summary_data_filter min_probability="0.40" sensitivity="0.916" false_positive_error_rate="0.095" predicted_num_correct="1638" predicted_num_incorrect="171"/>\n+      <protein_summary_data_filter min_probability="0.50" sensitivity="0.887" false_positive_error_rate="0.063" predicted_num_correct="1585" predicted_num_incorrect="106"/>\n+      <protein_summary_data_filter min_probability="0.60" sensitivity="0.853" false_positive_error_rate="0.036" predicted_num_correct="1525" predicted_num_incorrect="58"/>\n+      <protein_summary_data_filter min_probability="0.70" '..b'\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+</protein_summary>\n'
b
diff -r 000000000000 -r 28067ed4ea0e test-data/small_combined.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_combined.gff Thu Mar 26 20:11:34 2015 -0400
[
@@ -0,0 +1,31 @@
+##gff-version 3
+scaffold10 sixframe CDS 513537 514187 . + 0 ID=scaffold10_frame_3_orf_6477
+scaffold10 sixframe CDS 346373 347986 . - 0 ID=scaffold10_frame_6_orf_3129
+# start gene g447
+scaffold10 AUGUSTUS gene 195564 196835 0.75 + . ID=g447
+scaffold10 AUGUSTUS transcript 195564 196835 0.75 + . ID=g447.t1;Parent=g447
+scaffold10 AUGUSTUS start_codon 195564 195566 . + 0 Parent=g447.t1
+scaffold10 AUGUSTUS intron 195714 195776 0.93 + . Parent=g447.t1
+scaffold10 AUGUSTUS CDS 195564 195713 0.82 + 0 ID=g447.t1.cds;Parent=g447.t1
+scaffold10 AUGUSTUS CDS 195777 196835 0.92 + 0 ID=g447.t1.cds;Parent=g447.t1
+scaffold10 AUGUSTUS stop_codon 196833 196835 . + 0 Parent=g447.t1
+# coding sequence = [atggcggacaatagagattacgcgccagatactctggccgttcacgccgacgatgcgatgaactcatatacagatgtcg
+# cgcctgctttacacgtttctacaacctaccggtatgataattcaagactggtcccgatttcagatgaagagctagatcccctcaccgccctccctgta
+# tactcccgcctctcggccccaaacatctcgcgcctcgaacttatactgagctccctgacgaacggacaggcactgacttattcgtctggtttaagtgc
+# atttcatgccatgcttgtgtttttacgaccgaaagttatcgctatcagcgacggctaccacggatgtcatggtgttattgctatactggaaaaattat
+# acggattgaagaaggtcaacctctgggatgagaaggaatgggatactttcggtttcggcgaaggcgatgtggtacacgtcgaaactcctctcaatcct
+# accggcgaagccatcaatcttgaggttttcagggagaaagctaggaaaaggggcgcgtatctgacagttgattcgacgtttgggccgccggggttgca
+# ggatccgttcgtgcaaggggcagatgtggttatgcattctgggaccaagtattttggaggacatagtgatatgctttgtggtgtcttggttgttcgga
+# aagaaaagaaagaatggattagggggttgtggaatgagaggatgtatttagggagtgtaatggggaatatggagagttggcttggtgtgaggagtttg
+# aggacattggagttgagggtggcgaggcagagtaagaatgctgagagtattgtgaagtggcttgacgcttccttgcggggggagggggatgatgccgt
+# cgttgtacagaagacggttttgaagattcagcatgcgagtttgcagaaggatgatatggattggattaagacgcaaatgccgggtgggtttggaccgg
+# tgtttgtgatatggatgaagtcgatgcagttagcaagggctcttcccgggaagctgaagctatttcatcatgctacgagtcttggaggcgtggaaagt
+# ctgattgaatggaggaaaatgagcgatagcggtgtagatgtcagagtgttgagagtcagcataggtgtagagcactgggaagacctgagggccgactt
+# cttggaagggttcaaggctttggctctgtcggacgaagtaaatggagcataa]
+# protein sequence = [MADNRDYAPDTLAVHADDAMNSYTDVAPALHVSTTYRYDNSRLVPISDEELDPLTALPVYSRLSAPNISRLELILSSL
+# TNGQALTYSSGLSAFHAMLVFLRPKVIAISDGYHGCHGVIAILEKLYGLKKVNLWDEKEWDTFGFGEGDVVHVETPLNPTGEAINLEVFREKARKRGA
+# YLTVDSTFGPPGLQDPFVQGADVVMHSGTKYFGGHSDMLCGVLVVRKEKKEWIRGLWNERMYLGSVMGNMESWLGVRSLRTLELRVARQSKNAESIVK
+# WLDASLRGEGDDAVVVQKTVLKIQHASLQKDDMDWIKTQMPGGFGPVFVIWMKSMQLARALPGKLKLFHHATSLGGVESLIEWRKMSDSGVDVRVLRV
+# SIGVEHWEDLRADFLEGFKALALSDEVNGA]
+# end gene g447
+###
\ No newline at end of file
b
diff -r 000000000000 -r 28067ed4ea0e test-data/small_prot.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small_prot.fasta Thu Mar 26 20:11:34 2015 -0400
b
b'@@ -0,0 +1,36668 @@\n+>lcl|scaffold10_frame_1_orf_3 64|150\n+FTQNEKTNRSCFHFQYPHLCRHPLKPDTL\n+>lcl|scaffold10_frame_1_orf_5 172|321\n+LHKPHRPTTKDRNLFIYRTSRWISKSRTQFSTKKTGSDKASKASVTIHQK\n+>lcl|scaffold10_frame_1_orf_7 337|507\n+IISPPLNSPIRPRTKDKSTSYRAIKHGLVTSNQTLQVGTHDQFCSHHLSKYVYTSQI\n+>lcl|scaffold10_frame_1_orf_8 511|672\n+TVQTIKAAFETRNHQMGIYLSENYTLPLISNANAAVLSRLKLGSVHCDGDRLKR\n+>lcl|scaffold10_frame_1_orf_9 676|879\n+TSMTQAIQKLPALNAKDYLRSLVSLSAFSFILVMANVVGVWLPGLPPGVGVVARLGAALGAALRLCAM\n+>lcl|scaffold10_frame_1_orf_11 898|1035\n+PAARAEQRLSSPARTPAAITRASCLALSPGVVGWGPRTPSKSSMAD\n+>lcl|scaffold10_frame_1_orf_12 1039|1131\n+GSRMVPPPMVPTSIQGMETEICRLPPTLYIL\n+>lcl|scaffold10_frame_1_orf_18 1231|1305\n+PRRLQRRCWRHGHCFHQRCLPLLIR\n+>lcl|scaffold10_frame_1_orf_28 1591|1656\n+CRDKCLQEQFSYFGDERSDWWS\n+>lcl|scaffold10_frame_1_orf_30 1675|1773\n+LSYQLQSRANFWLRLLCGHFQSSRKDRESFHHQ\n+>lcl|scaffold10_frame_1_orf_34 1816|1932\n+RLCLYLQPEHPLLLPSARSSSDRRSSWDQRHRLEGVSNQ\n+>lcl|scaffold10_frame_1_orf_35 1936|2043\n+RSHSKHHERARNRSACTASWKIPGDPRFLLLRECER\n+>lcl|scaffold10_frame_1_orf_39 2179|2274\n+AHLHTLALFECLPMSVARSMESSSLDHHPQTQ\n+>lcl|scaffold10_frame_1_orf_41 2341|2409\n+DWWRMEQPSFHLPWVADQLVLAP\n+>lcl|scaffold10_frame_1_orf_46 2506|2637\n+SSLESCRQLCLLCHQASGRRIGCQIDHLHFDQTRNELTDTRAHT\n+>lcl|scaffold10_frame_1_orf_48 2659|2724\n+QWWIASCVVLLKGCGQARCLRL\n+>lcl|scaffold10_frame_1_orf_49 2728|2793\n+KDPLEAASRHWKGLKEPARHCF\n+>lcl|scaffold10_frame_1_orf_55 2860|2976\n+TEEYWCRLLAFQRWVSRWGCCLMWTKHRGLLRHFRRHCG\n+>lcl|scaffold10_frame_1_orf_61 3073|3216\n+TGGCSQRTESMQQGSRIGGDRLARQTQLQPPKLQSQPSSHELSRRHNP\n+>lcl|scaffold10_frame_1_orf_63 3247|3327\n+SVQHWFYRFQAGHPRVFCPKNGPRRLW\n+>lcl|scaffold10_frame_1_orf_64 3331|3399\n+GLWAFRSPRSQPKTVVPRIAAPY\n+>lcl|scaffold10_frame_1_orf_66 3466|3561\n+ERLRQARNLSPIFSWTRKEPQMSDAWTGTQVS\n+>lcl|scaffold10_frame_1_orf_69 3610|3705\n+SLCYLSACIPSEDQESRSKHFATIISLLPMCE\n+>lcl|scaffold10_frame_1_orf_72 3793|3912\n+IQGSLSRHIENFQPPRRAWNRPKLCPVKFARGDSWNMEER\n+>lcl|scaffold10_frame_1_orf_76 4006|4269\n+GRNSSLCVRVALRTYCTNSRCPLSCRDHSLGSANSSCCLLLRINICYLQASYQNQLFLQCQQAKIAFRKDCFPTTPDMSCLLLLPQIL\n+>lcl|scaffold10_frame_1_orf_78 4327|4533\n+FAVITRLVCGACAHCQSCAIALVRKRTQALCQRISDQAPSPSMYSLPSVFLSWMPRVFYVLERRRVGEA\n+>lcl|scaffold10_frame_1_orf_80 4582|4644\n+GSGAKGLQGAEGRYKAKWLCS\n+>lcl|scaffold10_frame_1_orf_86 4795|5052\n+RSSSLSCCGSLHQWSRLQEPDFFPKEGVHRRDGEGKGGKKKNRRCSWSSRGWAVPWKGEVLRNCAGPGGKVENISKDFHRNLEFVT\n+>lcl|scaffold10_frame_1_orf_92 5233|5385\n+VDIAFVLNFPITIHPHHGSSLILVTDSTIGSQPGMKNVCPAFGVQCIKYAC\n+>lcl|scaffold10_frame_1_orf_94 5392|5472\n+QTTAIVRRCVPALNSMYLPLWTLKLRY\n+>lcl|scaffold10_frame_1_orf_99 5608|5670\n+RALHRIVLLMLQRGGRRRGWN\n+>lcl|scaffold10_frame_1_orf_100 5674|5775\n+GYTCLGRIEWVTGRRNEGGRRRWRRTWTHCGMYR\n+>lcl|scaffold10_frame_1_orf_101 5779|5898\n+IGRDQSNIKINLAGSGSDGSDGMKEVGECRHTVYPVSTTV\n+>lcl|scaffold10_frame_1_orf_109 6142|6324\n+LYEHNRRTVKLLWLEFSAEILFPSPFLDVYLRCRRDWPYWLVRPRVDTSLVIRLVTSSTCN\n+>lcl|scaffold10_frame_1_orf_111 6352|6759\n+YVLSKKLSSFLKLSITLYFDYRTKHLGFSNVIKVRLRALLRLYEPRVGLSEHEALANLANFGGYLHSTAMGWMGQARYLPDDPINKSSTIVRYFFLLKSWHVWFLHSRSKNLSTLCSVIARIIGGIKIAESLDTRL\n+>lcl|scaffold10_frame_1_orf_113 6769|6894\n+VDGPATISLIEFKRDSYYQVMELLTLVSPLMMWKPYHAEARR\n+>lcl|scaffold10_frame_1_orf_117 7045|7326\n+PYPGSIVSEPAFAANLKHASTLTSSLLSTCQRIVKTIQPILYHSDFFSKGTRQNRNLSPLRQHESKERHKSAAIIAKTKFEVTTKHLLSSDCNW\n+>lcl|scaffold10_frame_1_orf_118 7330|7731\n+LCLPKLSVILISPTNEILLLHRVQTSRSFPSAHVFPGGNVDTFHDGTVPTDGDPSRHTDSETYRLAAVRETFEESGILLAYNNGFGRLLEVEDTEREAGRKAVHKKEVPFTKWLSKKGGRPDTGMQIAQRIARA\n+>lcl|scaffold10_frame_1_orf_121 7753|7926\n+TTSSLLLDGSRHRTYPSALQLRCIYISSQCQPQTPQKLEALCQALGKPSYQTQPTTAD\n+>lcl|scaffold10_frame_1_orf_122 7930|8028\n+NTLKLNFYQQQLGWSKQYQGRSFYFLHNSFSLQ\n+>lcl|scaffold10_frame_1_orf_124 8071|8169\n+NQKHWNNKGSSSNNSSSVEGIRLGAKLAFLRRH\n+>lcl|scaffold10_frame_1_orf_129 8257|8352\n+RTSLCLSSSQRKVQESLKSRREKRSCKMFEMM\n+>lcl|scaffold10_frame_1_orf_130 8356|8478\n+RRLGQLTRIICNICRQRRRSCEVDFSRNVSTLFIASVSFHL\n+>lcl|scaffold10_frame_1_orf_132 8485|8562\n+VHDFLFSQLISSRECKDPSPFCKLEH\n+>lcl|scaffold10_frame_1_orf_137 8719|8883\n+VS'..b'597692 597757|598152 598212|599515\n+MGHLSSNQIKPYQPPDLIHDLRRPSACLDNTMDSASATVTVTKSSSSSTASASATTSSDPLVGTHRTGPSSVWALGSTLLPMFLFASVLVFAFLVLRARHKRIYAPRTFLPTLREDEKTPSTTDGMFAWIKDFMAIPDDFVLNHQSLDQYLFLRFLKMITIMCAVGCLITWPLLLPINATGGEGESGLNAVSISNIKSPNRFYAHAIVAWLFLGFVMLMITRETIYFINLRQAFLMTPRNASRISSRTVLFTDVPDEYLNERSLRTICSSVRRIWLATDCYKLAKVVDEREDAALRLETAEVQLSQTANKERLKHKTPLSHSEGHDPEMNGSCAAKWIKHSQRPTHRLRFLGKKVDSINWTRATLPGLVEAVKNAQSAHRNGEKKYIGAVFVEFETQRAAQVAFQLTAHELPLKMQARCIGIPPPQILWQNLGMKAWQRVTKGIWATVFVTAMILFWSLPVALVGVLSNIDYLTNKIPWLGFINRMPEVILGALKGLLPSAFLALLVMVVPVVLRRLAIMAGAVSQQEVELRTQSWFFAFQVIQVFLVTTFSSGASAVVTQIVQYPTSAPKLLAQNLPKASNFYISYFLVFGLASSSKTLFNFMGLLRYGVGGILKRTPRQQYEHFLNLSHLKWGSEYPKWTLLAVIAIAYSCIAPLVLGFATIGLGLVYLVARYNSIYTLATTIDTKGQAYGRAMQQLTVGVYLAEICLIGLFAIKTGEGVASIGPLLLVVVLLITTALFHAAMRRALHPLTRTLPSSLLTRAEESRYSSAVLEEGVIGYSYEGEIRYHSSNEGSVREVSEKIGSIRPPSSHGDSIRPAPRDLDYGYLVPDTVENDSVIARVLNEEEETEKAQAISNSIIARSYSQRLASVEPRNPNSEKQQSSSENTMRTAEEGRSLEITEDDRRDFEIREMRISAAARASAAEDAWLRPTPTHHHHHHHHNNNNRDTFQSVWSASPVPSIAPPPTGSLLNTFFRPNKFATYAHLKRKFEEGLFAQPVPMYAPEVARTAYFHPAISRQCPTLWFVRDQMGISEQEMRHSGRILKVSDRGAWFDEKGRVRWDDGDLRGAVAWERGRDVLI\n+>lcl|scaffold10_fwd_g580.t4 596085|601204                 596085|596168 596225|596394 596447|596739 596787|597692 597757|598152 598212|599343 600407|601019 601073|601204\n+MDSASATVTVTKSSSSSTASASATTSSDPLVGTHRTGPSSVWALGSTLLPMFLFASVLVFAFLVLRARHKRIYAPRTFLPTLREDEKTPSTTDGMFAWIKDFMAIPDDFVLNHQSLDQYLFLRFLKMITIMCAVGCLITWPLLLPINATGGEGESGLNAVSISNIKSPNRFYAHAIVAWLFLGFVMLMITRETIYFINLRQAFLMTPRNASRISSRTVLFTDVPDEYLNERSLRTICSSVRRIWLATDCYKLAKVVDEREDAALRLETAEVQLSQTANKERLKHKTPLSHSEGHDPEMNGSCAAKWIKHSQRPTHRLRFLGKKVDSINWTRATLPGLVEAVKNAQSAHRNGEKKYIGAVFVEFETQRAAQVAFQLTAHELPLKMQARCIGIPPPQILWQNLGMKAWQRVTKGIWATVFVTAMILFWSLPVALVGVLSNIDYLTNKIPWLGFINRMPEVILGALKGLLPSAFLALLVMVVPVVLRRLAIMAGAVSQQEVELRTQSWFFAFQVIQVFLVTTFSSGASAVVTQIVQYPTSAPKLLAQNLPKASNFYISYFLVFGLASSSKTLFNFMGLLRYGVGGILKRTPRQQYEHFLNLSHLKWGSEYPKWTLLAVIAIAYSCIAPLVLGFATIGLGLVYLVARYNSIYTLATTIDTKGQAYGRAMQQLTVGVYLAEICLIGLFAIKTGEGVASIGPLLLVVVLLITTALFHAAMRRALHPLTRTLPSSLLTRAEESRYSSAVLEEGVIGYSYEGEIRYHSSNEGSVREVSEKIGSIRPPSSHGDSIRPAPRDLDYGYLVPDTVENDSVIARVLNEEEETEKAQAISNSIIARSYSQRLASVEPRNPNSEKQQSSSENTMRTAEEGRSLEITEDDRRDFEIREMRISAAARASAAEDAWLRPTPTHHHHHHHHNNNNRDTFQSVWSASPVPSIAPPPTGSLLNTFFRPNKFATYAHLKRKFEEGLFAQPVPMYAPEVARTAYFHPAISRQCPTLXCPTVMLESSQTTLVELESWRPSRSCAICIKRSLRDFAGVRKGHGVDTSPIVSPQWLFVFSDSKQRYQSTSPSSIKVLSEDLAEERTVLKQVNREMGDEVDGFRNEQIQAQIKLHVMKGDVERLEMKLMRADAKIVILESGPAIKANGRERAVVDSSESEEEDHTIAKPDCQDGVDRCPLCGWEIEDGQCTDLVHCGKMSKESGDIDIESVSSDGSEDTDYDGENPDQYEEDLYDRNGDLKEYEELNV\n+>lcl|scaffold10_fwd_g580.t5 595745|601204                         595745|595749 595997|596168 596225|596394 596447|596739 596787|597692 597757|598152 598212|599343 600407|601019 601073|601204\n+MGHLSSNQIKPYQPPDLIHDLRRPSACLDNTMDSASATVTVTKSSSSSTASASATTSSDPLVGTHRTGPSSVWALGSTLLPMFLFASVLVFAFLVLRARHKRIYAPRTFLPTLREDEKTPSTTDGMFAWIKDFMAIPDDFVLNHQSLDQYLFLRFLKMITIMCAVGCLITWPLLLPINATGGEGESGLNAVSISNIKSPNRFYAHAIVAWLFLGFVMLMITRETIYFINLRQAFLMTPRNASRISSRTVLFTDVPDEYLNERSLRTICSSVRRIWLATDCYKLAKVVDEREDAALRLETAEVQLSQTANKERLKHKTPLSHSEGHDPEMNGSCAAKWIKHSQRPTHRLRFLGKKVDSINWTRATLPGLVEAVKNAQSAHRNGEKKYIGAVFVEFETQRAAQVAFQLTAHELPLKMQARCIGIPPPQILWQNLGMKAWQRVTKGIWATVFVTAMILFWSLPVALVGVLSNIDYLTNKIPWLGFINRMPEVILGALKGLLPSAFLALLVMVVPVVLRRLAIMAGAVSQQEVELRTQSWFFAFQVIQVFLVTTFSSGASAVVTQIVQYPTSAPKLLAQNLPKASNFYISYFLVFGLASSSKTLFNFMGLLRYGVGGILKRTPRQQYEHFLNLSHLKWGSEYPKWTLLAVIAIAYSCIAPLVLGFATIGLGLVYLVARYNSIYTLATTIDTKGQAYGRAMQQLTVGVYLAEICLIGLFAIKTGEGVASIGPLLLVVVLLITTALFHAAMRRALHPLTRTLPSSLLTRAEESRYSSAVLEEGVIGYSYEGEIRYHSSNEGSVREVSEKIGSIRPPSSHGDSIRPAPRDLDYGYLVPDTVENDSVIARVLNEEEETEKAQAISNSIIARSYSQRLASVEPRNPNSEKQQSSSENTMRTAEEGRSLEITEDDRRDFEIREMRISAAARASAAEDAWLRPTPTHHHHHHHHNNNNRDTFQSVWSASPVPSIAPPPTGSLLNTFFRPNKFATYAHLKRKFEEGLFAQPVPMYAPEVARTAYFHPAISRQCPTLXCPTVMLESSQTTLVELESWRPSRSCAICIKRSLRDFAGVRKGHGVDTSPIVSPQWLFVFSDSKQRYQSTSPSSIKVLSEDLAEERTVLKQVNREMGDEVDGFRNEQIQAQIKLHVMKGDVERLEMKLMRADAKIVILESGPAIKANGRERAVVDSSESEEEDHTIAKPDCQDGVDRCPLCGWEIEDGQCTDLVHCGKMSKESGDIDIESVSSDGSEDTDYDGENPDQYEEDLYDRNGDLKEYEELNV\n'
b
diff -r 000000000000 -r 28067ed4ea0e tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Mar 26 20:11:34 2015 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tool_dependency>
+
+    <package name="blast+" version="2.2.29">
+        <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
+</tool_dependency>