Repository 'cherry_pick_fasta'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/cherry_pick_fasta

Changeset 0:e3aee4ba49c6 (2017-10-15)
Next changeset 1:ea8fde9c6f82 (2019-10-09)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
added:
cherry_pick_fasta.py
cherry_pick_fasta.xml
test-data/input.fa
test-data/output.fa
b
diff -r 000000000000 -r e3aee4ba49c6 cherry_pick_fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cherry_pick_fasta.py Sun Oct 15 13:26:45 2017 -0400
b
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Chery pick of fasta sequences satisfying a query string in their header/name
+"""
+
+import argparse
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser(
+        description="Cherry pick fasta sequences")
+    the_parser.add_argument('--input', action="store", type=str,
+                            help="input fasta file")
+    the_parser.add_argument('--query-string', dest="query_string",
+                            action="store", type=str,
+                            help="header containing the string will be\
+                                  extracted as well as the corresponding\
+                                  sequence")
+    the_parser.add_argument(
+        '--output', action="store", type=str, help="output fasta file")
+    args = the_parser.parse_args()
+    return args
+
+
+def __main__():
+    """ main function """
+    args = Parser()
+    search_term = args.query_string
+    CrudeFasta = open(args.input, "r").read()
+    Output = open(args.output, "w")
+    FastaListe = CrudeFasta.split(">")
+    for sequence in FastaListe:
+        if search_term in sequence:
+            Output.write(">%s\n" % sequence.rstrip())
+    Output.close()
+
+
+if __name__ == "__main__":
+    __main__()
b
diff -r 000000000000 -r e3aee4ba49c6 cherry_pick_fasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cherry_pick_fasta.xml Sun Oct 15 13:26:45 2017 -0400
b
@@ -0,0 +1,45 @@
+<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="1.0.0">
+  <description>with header satisfying a query string</description>
+  <command interpreter="python">cherry_pick_fasta.py
+                                   --input $input
+                                   --query-string "$query"
+                                   --output $output
+  </command>
+
+  <inputs>
+    <param name="query" type="text" size="30" value="" label="Select sequences with this string in their header" help="exemple: gi|40557596">
+    <sanitizer>
+        <valid initial="string.printable">
+          <remove value="&quot;"/>
+          <remove value="\"/>
+        </valid>
+        <mapping initial="none">
+          <add source="&quot;" target="\&quot;"/>
+          <add source="\" target="\\"/>
+        </mapping>
+      </sanitizer>
+    </param>
+    <param format="fasta" label="Source file" name="input" type="data" />
+  </inputs>
+  <outputs>
+    <data name="output" format="fasta" label="${tool.name} on ${on_string} including '${query.value}' in header" />
+  </outputs>
+  <tests>
+    <test>
+        <param ftype="fasta" name="input" value="input.fa" />
+        <param name="query" value="gi|81971654" />
+        <output name="output" ftype="fasta" file="output.fa" />
+    </test>
+  </tests>
+  <help>
+**What it does**
+
+This tool retrieves nucleotide/peptide sequences from a fasta file whose headers match a given query string.
+
+It is Copyright © 2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_.
+
+.. _CNRS and University Pierre et Marie Curie: http://www.upmc.fr/en/index.html
+.. _MIT license: http://opensource.org/licenses/MIT
+
+  </help>
+</tool>
b
diff -r 000000000000 -r e3aee4ba49c6 test-data/input.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fa Sun Oct 15 13:26:45 2017 -0400
b
b'@@ -0,0 +1,56346 @@\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122\n+TAGATAAGGTTTGCTCATTTCTTGAGGATGCTTTACCAGGTATGGTCGAGCACGTTACGC\n+TCGTAGCACAAAATACATCCGCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAA\n+TGCTTTGCATTGTTTTGATTTGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCG\n+CGGTACTTATAGTGGTTGCTCTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAG\n+CTATGGACATGTATCGCGTAATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCG\n+TTTTCCATCCGTGGTTGAACACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGT\n+GTCTCAAGAAATTACCAGGAAAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCT\n+TACCCAAAGCTGTTAAGGGTGCGACACAACTACATGAATGGGTGTCAAAATACTTCGATC\n+TCTCTTTGGATCACGTCAAGGCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGG\n+CTGAATCATCAAGCGCCAAAGTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGAT\n+TGGAACAACGAAGTAAAATCGATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGT\n+ATCACACTGGATTGCAATTTGCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCG\n+TGAACAGTGCGCTAAGACCAGCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAA\n+AAGGAGGGAGTCGTAAGATGAGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTG\n+GGAAAACCTCTATGGTGGATCCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGG\n+GACCTGAACATCTCCACTCGTTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATG\n+GTTACAAAGCCCACAAGATAGTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTG\n+TGAACAGGAATTTGGAGGTATTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACC\n+TTCATATGGCTTGTCTCTCGGATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACA\n+CTACCAACGAAATGAATGTCAAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACC\n+GCATGAGTGAAAACGCGTTCACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAG\n+GATCAACCGGCAATAAGCAGTATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATC\n+TCGATGTGTACGAATTCGTGCGC\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_1/2_Confidence_0.333_Length_1607_hit1_IdMatch=52.99,AligLength=536,E-val=0.0\n+CTAGAATCACAGCTCAGATGAGTTTTGAGGCACCGAAGGACGCAATTGAAGGACCGTGTC\n+AAACCCCGGAAGGATTGTTCGCCCCTATTGGCAAAGCGCCGATAGGCGTAGGGATGTCCA\n+CCAAGACGGCTATACGACCTTCACGCTTGTATGGAAGAATAACTAAACCGACAACTGCAC\n+CATCATACTTGGGTAAAGACGCGCTTTATCGTGGATTGACCAAGTGTGGTGTTCGCACAG\n+TTAATATTCAACCAGAATACATAGACGCAGCGGCGAATGACGTCGCACGCTATGTGTTAA\n+ACCAGCATGTTGGTCACGTGGATAGGGAACGATACACACGTATATTGTCGTACGAGGAGG\n+CTGTTAAGGGCGTGCCGTACGATGATTTCATGAAGTCAGTGACTCGAGTCACTTCCCCTG\n+GTTACCCCTATTGCTTGGATACTGGAAACATGCCAGGGAAAAGCAAATGGATGGGGCTCG\n+AACAAGATTTCGATATGACAAGTCCAGCTGCTTTGGCTTTGAGGAAAGATGTTGAAAGTT\n+TGTTGGAAGATTGCAAAAATGGCTTAGTCCGTGATGTGGTGTTTGTCGACACTCTCAAGG\n+ATGAAAGGCGCGAGCTGATAAAGGTGGAAGCAAAGAAGACTCGAGTCTTTTCTGCTGGAC\n+CACAGCATTTTGTAATAGCTTTCCGGCAATACTTTCTTCCATTCTCTGCCTGGGTCATGC\n+ATAACAGAATCGAAAACGAAGTAGCCGTTGGAACAAACCCCTTCTCAATGGATTGGCACA\n+ACATTGCTGTGCGTATGCGTAGTAAAGGGAGACACATTATTGCTGGAGATTTTAGCAATT\n+TTGATGGATCCCTCAACGCCCAAGTTCTCTGGACAATATTTTGGAAGATATTTGTCCCGT\n+GGCTTAATGATATTGAACCACTTGGTACACCCAAGAATGAGGAGAATCTGCGGGTCTGCA\n+CGAGTCTATGGACGCACTTGGTGCACTCCGTGCACATTTGTGGAGATAACTTGTACATGT\n+GGACACATTCTCAACCATCGGGCAATCCCTTCACGGTGATAATCAATAGTTTGTATAACT\n+CAGTTATCATGCGTGTCGTGTGGCAATACATAATGGCGAAAGAAGAACCTAAGTTACGCA\n+CAATGAACCATTTCAATCAACATGTTGCTATGGTTTCATATGGTGATGACAATCTACTTA\n+ACATCTCGGAAGGGGTAATTGATATCTTCAACCAACTTACCATCTCGGAAGCCATGCGTT\n+GGATAGGACACGAATACACAGATGAAACGAAAACAGGCGAGGCTGCGCCCTATCGGACAT\n+TGGAAGAAGTCCGTTTCCTTAAAAGAGGGTTCAGAATGGATCACCTCTTGTGTCGGTGGG\n+TAGCTCCTTTGAAGAAGGATGTCATCTACGAAATGCTTAATTGGACGCGCAAAGGGATTA\n+ACCCAGATGATGTGACGATGATGATCATTGATACAGCATTTAGGGAGATCTCTTATCACG\n+GAAGGGAAGCTTTCGAGAAGCTGCGAGGGCAGATACTTGAGCAGCGGGATGTGTTGGTTG\n+AATATCCTCAA\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_50_Transcript_1/1_Confidence_0.000_Length_1442_hit1_IdMatch=24.95,AligLength=537,E-val=8e-47\n+GAATTCGTGCGCATGCAAAGAATAAATGACCACCCTTCGGGTTGGAAAGCTACGGATGAA\n+GTATATGGCTATGCAGAGTTCTCGAAACTAATGTGTGCTGAATGGAAGAGAAGGAAGACA\n+GAACATCAGAATACTGTTGACTTCCTTAAGAAGTATGCAGAGCGACCCTTCGAGACCAAC\n+CCCGGACCAGTGGAGGATATCCCAATAAGACACGATGATGTCGAGCAGGGGGTAGAAGCG\n+CAGATGGGTCGAGATGCAGATTGGTTTAACAATGACATAGCGGAACGTATAGCGCGTGGA\n+CAGGATATTACTGATATCTTGTATGAGTATGCTGAAGATGACGAGTTGC'..b'\n+\n+>gi|451927572|gb|AGF85450.1|_directed_RNA_polymerase_(II)_subunit_1__Moumouvirus_goulette--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=44.44,AligLength=36,E-val=4e-04\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|156068623|gb|ABU44330.1|_hypothetical_protein_AR158_C785L__Paramecium_bursaria_Chlorella_virus_A--Locus_285_Transcript_1/1_Confidence_0.000_Length_112_hit1_IdMatch=55.56,AligLength=36,E-val=7e-04\n+TATTAAAATCGAAGGGACATACTGTAGGTTTTATGGGAGACGGAATTAACGATGCAGCAG\n+CGATTAAAGAAGCTGATGTCGGAATTTCTGTAGACACCGGAGCAGATATT\n+\n+>gi|558482105|gb|AHA55668.1|_putative_DNA-dependent_RNA_polymerase_II_largest_subunit__Emiliania_hux--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|347601122|gb|AEP15608.1|_DNA-directed_RNA_polymerase_subunit_beta__Emiliania_huxleyi_virus_207--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|448932900|gb|AGE56458.1|_calcium-transporting_ATPase,_plasma_membrane-type__Paramecium_bursaria--Locus_285_Transcript_1/1_Confidence_0.000_Length_112_hit1_IdMatch=55.56,AligLength=36,E-val=3e-04\n+TATTAAAATCGAAGGGACATACTGTAGGTTTTATGGGAGACGGAATTAACGATGCAGCAG\n+CGATTAAAGAAGCTGATGTCGGAATTTCTGTAGACACCGGAGCAGATATT\n+\n+>gi|347481891|gb|AEO97877.1|_DNA-directed_RNA_polymerase_subunit_beta__Emiliania_huxleyi_virus_84--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|557952511|gb|AHA46305.1|_DNA-directed_RNA_polymerase_subunit_alpha__Insectomime_virus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=54.84,AligLength=31,E-val=3e-04\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|326784736|gb|AEA06870.1|_DNA-directed_RNA_polymerase_subunit_alpha__Lausannevirus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=58.06,AligLength=31,E-val=7e-05\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|327409562|ref|YP_004346982.1|_DNA-directed_RNA_polymerase_subunit_alpha__Lausannevirus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=58.06,AligLength=31,E-val=7e-05\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|566082702|gb|AHC55112.1|_putative_DNA-directed_RNA_polymerase_subunit_Rpb1__Tunisvirus_fontaine2--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=54.84,AligLength=31,E-val=3e-04\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_5/26_Confidence_0.077_Length_365_hit1_IdMatch=89.47,AligLength=19,E-val=2e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_2/26_Confidence_0.051_Length_287_hit1_IdMatch=89.47,AligLength=19,E-val=1e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_3/26_Confidence_0.103_Length_422_hit1_IdMatch=89.47,AligLength=19,E-val=2e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_4/26_Confidence_0.051_Length_306_hit1_IdMatch=89.47,AligLength=19,E-val=1e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+\n'
b
diff -r 000000000000 -r e3aee4ba49c6 test-data/output.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fa Sun Oct 15 13:26:45 2017 -0400
b
@@ -0,0 +1,109 @@
+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122
+TAGATAAGGTTTGCTCATTTCTTGAGGATGCTTTACCAGGTATGGTCGAGCACGTTACGC
+TCGTAGCACAAAATACATCCGCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAA
+TGCTTTGCATTGTTTTGATTTGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCG
+CGGTACTTATAGTGGTTGCTCTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAG
+CTATGGACATGTATCGCGTAATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCG
+TTTTCCATCCGTGGTTGAACACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGT
+GTCTCAAGAAATTACCAGGAAAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCT
+TACCCAAAGCTGTTAAGGGTGCGACACAACTACATGAATGGGTGTCAAAATACTTCGATC
+TCTCTTTGGATCACGTCAAGGCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGG
+CTGAATCATCAAGCGCCAAAGTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGAT
+TGGAACAACGAAGTAAAATCGATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGT
+ATCACACTGGATTGCAATTTGCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCG
+TGAACAGTGCGCTAAGACCAGCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAA
+AAGGAGGGAGTCGTAAGATGAGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTG
+GGAAAACCTCTATGGTGGATCCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGG
+GACCTGAACATCTCCACTCGTTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATG
+GTTACAAAGCCCACAAGATAGTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTG
+TGAACAGGAATTTGGAGGTATTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACC
+TTCATATGGCTTGTCTCTCGGATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACA
+CTACCAACGAAATGAATGTCAAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACC
+GCATGAGTGAAAACGCGTTCACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAG
+GATCAACCGGCAATAAGCAGTATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATC
+TCGATGTGTACGAATTCGTGCGC
+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_1/2_Confidence_0.333_Length_1607_hit1_IdMatch=52.99,AligLength=536,E-val=0.0
+CTAGAATCACAGCTCAGATGAGTTTTGAGGCACCGAAGGACGCAATTGAAGGACCGTGTC
+AAACCCCGGAAGGATTGTTCGCCCCTATTGGCAAAGCGCCGATAGGCGTAGGGATGTCCA
+CCAAGACGGCTATACGACCTTCACGCTTGTATGGAAGAATAACTAAACCGACAACTGCAC
+CATCATACTTGGGTAAAGACGCGCTTTATCGTGGATTGACCAAGTGTGGTGTTCGCACAG
+TTAATATTCAACCAGAATACATAGACGCAGCGGCGAATGACGTCGCACGCTATGTGTTAA
+ACCAGCATGTTGGTCACGTGGATAGGGAACGATACACACGTATATTGTCGTACGAGGAGG
+CTGTTAAGGGCGTGCCGTACGATGATTTCATGAAGTCAGTGACTCGAGTCACTTCCCCTG
+GTTACCCCTATTGCTTGGATACTGGAAACATGCCAGGGAAAAGCAAATGGATGGGGCTCG
+AACAAGATTTCGATATGACAAGTCCAGCTGCTTTGGCTTTGAGGAAAGATGTTGAAAGTT
+TGTTGGAAGATTGCAAAAATGGCTTAGTCCGTGATGTGGTGTTTGTCGACACTCTCAAGG
+ATGAAAGGCGCGAGCTGATAAAGGTGGAAGCAAAGAAGACTCGAGTCTTTTCTGCTGGAC
+CACAGCATTTTGTAATAGCTTTCCGGCAATACTTTCTTCCATTCTCTGCCTGGGTCATGC
+ATAACAGAATCGAAAACGAAGTAGCCGTTGGAACAAACCCCTTCTCAATGGATTGGCACA
+ACATTGCTGTGCGTATGCGTAGTAAAGGGAGACACATTATTGCTGGAGATTTTAGCAATT
+TTGATGGATCCCTCAACGCCCAAGTTCTCTGGACAATATTTTGGAAGATATTTGTCCCGT
+GGCTTAATGATATTGAACCACTTGGTACACCCAAGAATGAGGAGAATCTGCGGGTCTGCA
+CGAGTCTATGGACGCACTTGGTGCACTCCGTGCACATTTGTGGAGATAACTTGTACATGT
+GGACACATTCTCAACCATCGGGCAATCCCTTCACGGTGATAATCAATAGTTTGTATAACT
+CAGTTATCATGCGTGTCGTGTGGCAATACATAATGGCGAAAGAAGAACCTAAGTTACGCA
+CAATGAACCATTTCAATCAACATGTTGCTATGGTTTCATATGGTGATGACAATCTACTTA
+ACATCTCGGAAGGGGTAATTGATATCTTCAACCAACTTACCATCTCGGAAGCCATGCGTT
+GGATAGGACACGAATACACAGATGAAACGAAAACAGGCGAGGCTGCGCCCTATCGGACAT
+TGGAAGAAGTCCGTTTCCTTAAAAGAGGGTTCAGAATGGATCACCTCTTGTGTCGGTGGG
+TAGCTCCTTTGAAGAAGGATGTCATCTACGAAATGCTTAATTGGACGCGCAAAGGGATTA
+ACCCAGATGATGTGACGATGATGATCATTGATACAGCATTTAGGGAGATCTCTTATCACG
+GAAGGGAAGCTTTCGAGAAGCTGCGAGGGCAGATACTTGAGCAGCGGGATGTGTTGGTTG
+AATATCCTCAA
+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_50_Transcript_1/1_Confidence_0.000_Length_1442_hit1_IdMatch=24.95,AligLength=537,E-val=8e-47
+GAATTCGTGCGCATGCAAAGAATAAATGACCACCCTTCGGGTTGGAAAGCTACGGATGAA
+GTATATGGCTATGCAGAGTTCTCGAAACTAATGTGTGCTGAATGGAAGAGAAGGAAGACA
+GAACATCAGAATACTGTTGACTTCCTTAAGAAGTATGCAGAGCGACCCTTCGAGACCAAC
+CCCGGACCAGTGGAGGATATCCCAATAAGACACGATGATGTCGAGCAGGGGGTAGAAGCG
+CAGATGGGTCGAGATGCAGATTGGTTTAACAATGACATAGCGGAACGTATAGCGCGTGGA
+CAGGATATTACTGATATCTTGTATGAGTATGCTGAAGATGACGAGTTGCATGAGGATTAC
+ATGGCTTACAAGAAACAGCAGGCCCAGCCTAGTAAGTGGGACAAATACGCGCGCCGTCTC
+GAAAGTGCAATTACGGAAGGAAAGAATTTCCTTGCCCGTGTGGTTTCAAAAATAGCCAGC
+GTCATTCGGGAGAACCCATATTTGACCATGATGGCAACTGTGGGGAGCGTTCTCGCTCTG
+TATGGAGCTATGCGTTGGTTTTCGAAAGGAGTAACGGAGACTTTTGACGCTGAAGAAGTT
+ACTATTCCCAACGAAACCAAGGTAGAGAACGTTGTGCGTACGGAGGGTTTTGAATCTTAC
+GACCATCGGACTCCGCGCGCTCATCGAGCCAACAGGCAATATGTGCGAGCTGAGGCGATG
+ATAGATGAAACAGGGTACCTGGTAGCCAACAACAAAGTCACTGGCAACACGTATCGAATG
+TGTATCAAGAGGGATCCTGATGATTTGGTCGTTGGAAACGCTGTGTTTATCACAGGGTGG
+ACGCTCCTCATACCGTACCACTTCGTTTGTGGACTGGCGGGACGGAGAATAGCTGCTGAT
+TCCATCGTGACTTTGTCAAAGCCAGGCTTGGATAAGATTATTGAATTCCCGTTGTCACGA
+ATCTTCCGATACGATACCTCACCAGATGGTTTTACCACTAGTGAGTATTGTGCTCGAATG
+GAACATGAAGATGGAGAATTGGTTGATGCCATCCTGGTAAACCTGCATGGTTTGGGAGTG
+CGAATCCATCCTGACCTCCGGGGAAAAATTGTGACGGTACGAGACCAAGCGCACCTGAGT
+ACGACATTTCATGCGATTCTCACAACGATGTCCAGGAAGCCACCACTAACAACGTCACAA
+CAAGTGGTAAAAGGGGTCAAGCCAATGGATAAAATCCTGCACATCAATTTACCAGTTGGG
+GACAAAACAACGCAATACACCCAACGTGACTGTTACAAGTATTATTCCGTAACGGTCGTT
+GGAGATTGTGGTGCCTTGCTGGTAGCACAAAATCATGCAATTGTGAGGAAAATATTTGCA
+ATGCATATAGCAGGTGCGGAAGAAAATGGCTATGCTTGTCCAATCAATCAGGAAATGTTA
+G
+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_2/2_Confidence_0.333_Length_343_hit1_IdMatch=50.86,AligLength=116,E-val=6e-30
+ACACAGTCCACAGTCCGAAGACCAAAGCGTTGGATAGGACACGAATACACAGATGAAACG
+AAAACAGGCGAGGCTGCGCCCTATCGGACATTGGAAGAAGTCCGTTTCCTTAAAAGAGGG
+TTCAGAATGGATCACCTCTTGTGTCGGTGGGTAGCTCCTTTGAAGAAGGATGTCATCTAC
+GAAATGCTTAATTGGACGCGCAAAGGGATTAACCCAGATGATGTGACGATGATGATCATT
+GATACAGCATTTAGGGAGATCTCTTATCACGGAAGGGAAGCTTTCGAGAAGCTGCGAGGG
+CAGATACTTGAGCAGCGGGATGTGTTGGTTGAATATCCTCAA
+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_2/2_Confidence_0.333_Length_1324_hit1_IdMatch=43.5,AligLength=446,E-val=1e-119
+CTTGAGGATACTTTACCAGGTATGGTCGAGCACGTTACGCTCGTAGCACAAAATACATCC
+GCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAATGCTTTGCATTGTTTTGATT
+TGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCGCGGTACTTATAGTGGTTGCT
+CTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAGCTATGGACATGTATCGCGTA
+ATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCGTTTTCCATCCGTGGTTGAAC
+ACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGTGTCTCAAGAAATTACCAGGA
+AAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCTTACCCAAAGCTGTTAAGGGT
+GCGACACAACTACATGAATGGGTGTCAAAATACTTCGATCTCTCTTTGGATCACGTCAAG
+GCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGGCTGAATCATCAAGCGCCAAA
+GTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGATTGGAACAACGAAGTAAAATC
+GATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGTATCACACTGGATTGCAATTT
+GCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCGTGAACAGTGCGCTAAGACCA
+GCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAAAAGGAGGGAGTCGTAAGATG
+AGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTGGGAAAACCTCTATGGTGGAT
+CCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGGGACCTGAACATCTCCACTCG
+TTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATGGTTACAAAGCCCACAAGATA
+GTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTGTGAACAGGAATTTGGAGGTA
+TTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACCTTCATATGGCTTGTCTCTCG
+GATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACACTACCAACGAAATGAATGTC
+AAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACCGCATGAGTGAAAACGCGTTC
+ACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAGGATCAACCGGCAATAAGCAG
+TATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATCTCGATGTGTACGAATTCGTG
+CGC