Next changeset 1:ea8fde9c6f82 (2019-10-09) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3 |
added:
cherry_pick_fasta.py cherry_pick_fasta.xml test-data/input.fa test-data/output.fa |
b |
diff -r 000000000000 -r e3aee4ba49c6 cherry_pick_fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cherry_pick_fasta.py Sun Oct 15 13:26:45 2017 -0400 |
b |
@@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Chery pick of fasta sequences satisfying a query string in their header/name +""" + +import argparse + + +def Parser(): + the_parser = argparse.ArgumentParser( + description="Cherry pick fasta sequences") + the_parser.add_argument('--input', action="store", type=str, + help="input fasta file") + the_parser.add_argument('--query-string', dest="query_string", + action="store", type=str, + help="header containing the string will be\ + extracted as well as the corresponding\ + sequence") + the_parser.add_argument( + '--output', action="store", type=str, help="output fasta file") + args = the_parser.parse_args() + return args + + +def __main__(): + """ main function """ + args = Parser() + search_term = args.query_string + CrudeFasta = open(args.input, "r").read() + Output = open(args.output, "w") + FastaListe = CrudeFasta.split(">") + for sequence in FastaListe: + if search_term in sequence: + Output.write(">%s\n" % sequence.rstrip()) + Output.close() + + +if __name__ == "__main__": + __main__() |
b |
diff -r 000000000000 -r e3aee4ba49c6 cherry_pick_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cherry_pick_fasta.xml Sun Oct 15 13:26:45 2017 -0400 |
b |
@@ -0,0 +1,45 @@ +<tool id="cherry_pick_fasta" name="Pick Fasta sequences" version="1.0.0"> + <description>with header satisfying a query string</description> + <command interpreter="python">cherry_pick_fasta.py + --input $input + --query-string "$query" + --output $output + </command> + + <inputs> + <param name="query" type="text" size="30" value="" label="Select sequences with this string in their header" help="exemple: gi|40557596"> + <sanitizer> + <valid initial="string.printable"> + <remove value="""/> + <remove value="\"/> + </valid> + <mapping initial="none"> + <add source=""" target="\""/> + <add source="\" target="\\"/> + </mapping> + </sanitizer> + </param> + <param format="fasta" label="Source file" name="input" type="data" /> + </inputs> + <outputs> + <data name="output" format="fasta" label="${tool.name} on ${on_string} including '${query.value}' in header" /> + </outputs> + <tests> + <test> + <param ftype="fasta" name="input" value="input.fa" /> + <param name="query" value="gi|81971654" /> + <output name="output" ftype="fasta" file="output.fa" /> + </test> + </tests> + <help> +**What it does** + +This tool retrieves nucleotide/peptide sequences from a fasta file whose headers match a given query string. + +It is Copyright © 2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_. + +.. _CNRS and University Pierre et Marie Curie: http://www.upmc.fr/en/index.html +.. _MIT license: http://opensource.org/licenses/MIT + + </help> +</tool> |
b |
diff -r 000000000000 -r e3aee4ba49c6 test-data/input.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fa Sun Oct 15 13:26:45 2017 -0400 |
b |
b'@@ -0,0 +1,56346 @@\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122\n+TAGATAAGGTTTGCTCATTTCTTGAGGATGCTTTACCAGGTATGGTCGAGCACGTTACGC\n+TCGTAGCACAAAATACATCCGCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAA\n+TGCTTTGCATTGTTTTGATTTGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCG\n+CGGTACTTATAGTGGTTGCTCTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAG\n+CTATGGACATGTATCGCGTAATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCG\n+TTTTCCATCCGTGGTTGAACACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGT\n+GTCTCAAGAAATTACCAGGAAAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCT\n+TACCCAAAGCTGTTAAGGGTGCGACACAACTACATGAATGGGTGTCAAAATACTTCGATC\n+TCTCTTTGGATCACGTCAAGGCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGG\n+CTGAATCATCAAGCGCCAAAGTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGAT\n+TGGAACAACGAAGTAAAATCGATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGT\n+ATCACACTGGATTGCAATTTGCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCG\n+TGAACAGTGCGCTAAGACCAGCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAA\n+AAGGAGGGAGTCGTAAGATGAGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTG\n+GGAAAACCTCTATGGTGGATCCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGG\n+GACCTGAACATCTCCACTCGTTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATG\n+GTTACAAAGCCCACAAGATAGTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTG\n+TGAACAGGAATTTGGAGGTATTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACC\n+TTCATATGGCTTGTCTCTCGGATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACA\n+CTACCAACGAAATGAATGTCAAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACC\n+GCATGAGTGAAAACGCGTTCACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAG\n+GATCAACCGGCAATAAGCAGTATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATC\n+TCGATGTGTACGAATTCGTGCGC\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_1/2_Confidence_0.333_Length_1607_hit1_IdMatch=52.99,AligLength=536,E-val=0.0\n+CTAGAATCACAGCTCAGATGAGTTTTGAGGCACCGAAGGACGCAATTGAAGGACCGTGTC\n+AAACCCCGGAAGGATTGTTCGCCCCTATTGGCAAAGCGCCGATAGGCGTAGGGATGTCCA\n+CCAAGACGGCTATACGACCTTCACGCTTGTATGGAAGAATAACTAAACCGACAACTGCAC\n+CATCATACTTGGGTAAAGACGCGCTTTATCGTGGATTGACCAAGTGTGGTGTTCGCACAG\n+TTAATATTCAACCAGAATACATAGACGCAGCGGCGAATGACGTCGCACGCTATGTGTTAA\n+ACCAGCATGTTGGTCACGTGGATAGGGAACGATACACACGTATATTGTCGTACGAGGAGG\n+CTGTTAAGGGCGTGCCGTACGATGATTTCATGAAGTCAGTGACTCGAGTCACTTCCCCTG\n+GTTACCCCTATTGCTTGGATACTGGAAACATGCCAGGGAAAAGCAAATGGATGGGGCTCG\n+AACAAGATTTCGATATGACAAGTCCAGCTGCTTTGGCTTTGAGGAAAGATGTTGAAAGTT\n+TGTTGGAAGATTGCAAAAATGGCTTAGTCCGTGATGTGGTGTTTGTCGACACTCTCAAGG\n+ATGAAAGGCGCGAGCTGATAAAGGTGGAAGCAAAGAAGACTCGAGTCTTTTCTGCTGGAC\n+CACAGCATTTTGTAATAGCTTTCCGGCAATACTTTCTTCCATTCTCTGCCTGGGTCATGC\n+ATAACAGAATCGAAAACGAAGTAGCCGTTGGAACAAACCCCTTCTCAATGGATTGGCACA\n+ACATTGCTGTGCGTATGCGTAGTAAAGGGAGACACATTATTGCTGGAGATTTTAGCAATT\n+TTGATGGATCCCTCAACGCCCAAGTTCTCTGGACAATATTTTGGAAGATATTTGTCCCGT\n+GGCTTAATGATATTGAACCACTTGGTACACCCAAGAATGAGGAGAATCTGCGGGTCTGCA\n+CGAGTCTATGGACGCACTTGGTGCACTCCGTGCACATTTGTGGAGATAACTTGTACATGT\n+GGACACATTCTCAACCATCGGGCAATCCCTTCACGGTGATAATCAATAGTTTGTATAACT\n+CAGTTATCATGCGTGTCGTGTGGCAATACATAATGGCGAAAGAAGAACCTAAGTTACGCA\n+CAATGAACCATTTCAATCAACATGTTGCTATGGTTTCATATGGTGATGACAATCTACTTA\n+ACATCTCGGAAGGGGTAATTGATATCTTCAACCAACTTACCATCTCGGAAGCCATGCGTT\n+GGATAGGACACGAATACACAGATGAAACGAAAACAGGCGAGGCTGCGCCCTATCGGACAT\n+TGGAAGAAGTCCGTTTCCTTAAAAGAGGGTTCAGAATGGATCACCTCTTGTGTCGGTGGG\n+TAGCTCCTTTGAAGAAGGATGTCATCTACGAAATGCTTAATTGGACGCGCAAAGGGATTA\n+ACCCAGATGATGTGACGATGATGATCATTGATACAGCATTTAGGGAGATCTCTTATCACG\n+GAAGGGAAGCTTTCGAGAAGCTGCGAGGGCAGATACTTGAGCAGCGGGATGTGTTGGTTG\n+AATATCCTCAA\n+>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_50_Transcript_1/1_Confidence_0.000_Length_1442_hit1_IdMatch=24.95,AligLength=537,E-val=8e-47\n+GAATTCGTGCGCATGCAAAGAATAAATGACCACCCTTCGGGTTGGAAAGCTACGGATGAA\n+GTATATGGCTATGCAGAGTTCTCGAAACTAATGTGTGCTGAATGGAAGAGAAGGAAGACA\n+GAACATCAGAATACTGTTGACTTCCTTAAGAAGTATGCAGAGCGACCCTTCGAGACCAAC\n+CCCGGACCAGTGGAGGATATCCCAATAAGACACGATGATGTCGAGCAGGGGGTAGAAGCG\n+CAGATGGGTCGAGATGCAGATTGGTTTAACAATGACATAGCGGAACGTATAGCGCGTGGA\n+CAGGATATTACTGATATCTTGTATGAGTATGCTGAAGATGACGAGTTGC'..b'\n+\n+>gi|451927572|gb|AGF85450.1|_directed_RNA_polymerase_(II)_subunit_1__Moumouvirus_goulette--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=44.44,AligLength=36,E-val=4e-04\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|156068623|gb|ABU44330.1|_hypothetical_protein_AR158_C785L__Paramecium_bursaria_Chlorella_virus_A--Locus_285_Transcript_1/1_Confidence_0.000_Length_112_hit1_IdMatch=55.56,AligLength=36,E-val=7e-04\n+TATTAAAATCGAAGGGACATACTGTAGGTTTTATGGGAGACGGAATTAACGATGCAGCAG\n+CGATTAAAGAAGCTGATGTCGGAATTTCTGTAGACACCGGAGCAGATATT\n+\n+>gi|558482105|gb|AHA55668.1|_putative_DNA-dependent_RNA_polymerase_II_largest_subunit__Emiliania_hux--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|347601122|gb|AEP15608.1|_DNA-directed_RNA_polymerase_subunit_beta__Emiliania_huxleyi_virus_207--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|448932900|gb|AGE56458.1|_calcium-transporting_ATPase,_plasma_membrane-type__Paramecium_bursaria--Locus_285_Transcript_1/1_Confidence_0.000_Length_112_hit1_IdMatch=55.56,AligLength=36,E-val=3e-04\n+TATTAAAATCGAAGGGACATACTGTAGGTTTTATGGGAGACGGAATTAACGATGCAGCAG\n+CGATTAAAGAAGCTGATGTCGGAATTTCTGTAGACACCGGAGCAGATATT\n+\n+>gi|347481891|gb|AEO97877.1|_DNA-directed_RNA_polymerase_subunit_beta__Emiliania_huxleyi_virus_84--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=50.0,AligLength=36,E-val=8e-05\n+TCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCTTA\n+AAACTGCCGATGCGGGTTACCTGACTCGTCGTTTGGTAGACGTTGCGCAGGAC\n+\n+>gi|557952511|gb|AHA46305.1|_DNA-directed_RNA_polymerase_subunit_alpha__Insectomime_virus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=54.84,AligLength=31,E-val=3e-04\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|326784736|gb|AEA06870.1|_DNA-directed_RNA_polymerase_subunit_alpha__Lausannevirus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=58.06,AligLength=31,E-val=7e-05\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|327409562|ref|YP_004346982.1|_DNA-directed_RNA_polymerase_subunit_alpha__Lausannevirus--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=58.06,AligLength=31,E-val=7e-05\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|566082702|gb|AHC55112.1|_putative_DNA-directed_RNA_polymerase_subunit_Rpb1__Tunisvirus_fontaine2--Locus_295_Transcript_1/1_Confidence_0.000_Length_115_hit1_IdMatch=54.84,AligLength=31,E-val=3e-04\n+CATCTTGGAATACTTTATCTCAACCCACGGTGCTCGTAAAGGTCTTGCGGATACCGCTCT\n+TAAAACTGCCGATGCGGGTTACCTGACTCGTCGTTTG\n+\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_5/26_Confidence_0.077_Length_365_hit1_IdMatch=89.47,AligLength=19,E-val=2e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_2/26_Confidence_0.051_Length_287_hit1_IdMatch=89.47,AligLength=19,E-val=1e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_3/26_Confidence_0.103_Length_422_hit1_IdMatch=89.47,AligLength=19,E-val=2e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+>gi|567840471|gb|AHC95553.1|_glycoprotein_precursor__Lassa_virus--Locus_1_Transcript_4/26_Confidence_0.051_Length_306_hit1_IdMatch=89.47,AligLength=19,E-val=1e-04\n+TTGGGCTGCATTCCCAAGCAACCCGACTCCGGGAAGACCCGAGCCCGGCGCGCCGGGGGC\n+CG\n+\n' |
b |
diff -r 000000000000 -r e3aee4ba49c6 test-data/output.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fa Sun Oct 15 13:26:45 2017 -0400 |
b |
@@ -0,0 +1,109 @@ +>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_1/2_Confidence_0.667_Length_1344_hit1_IdMatch=43.46,AligLength=451,E-val=2e-122 +TAGATAAGGTTTGCTCATTTCTTGAGGATGCTTTACCAGGTATGGTCGAGCACGTTACGC +TCGTAGCACAAAATACATCCGCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAA +TGCTTTGCATTGTTTTGATTTGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCG +CGGTACTTATAGTGGTTGCTCTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAG +CTATGGACATGTATCGCGTAATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCG +TTTTCCATCCGTGGTTGAACACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGT +GTCTCAAGAAATTACCAGGAAAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCT +TACCCAAAGCTGTTAAGGGTGCGACACAACTACATGAATGGGTGTCAAAATACTTCGATC +TCTCTTTGGATCACGTCAAGGCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGG +CTGAATCATCAAGCGCCAAAGTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGAT +TGGAACAACGAAGTAAAATCGATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGT +ATCACACTGGATTGCAATTTGCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCG +TGAACAGTGCGCTAAGACCAGCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAA +AAGGAGGGAGTCGTAAGATGAGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTG +GGAAAACCTCTATGGTGGATCCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGG +GACCTGAACATCTCCACTCGTTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATG +GTTACAAAGCCCACAAGATAGTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTG +TGAACAGGAATTTGGAGGTATTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACC +TTCATATGGCTTGTCTCTCGGATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACA +CTACCAACGAAATGAATGTCAAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACC +GCATGAGTGAAAACGCGTTCACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAG +GATCAACCGGCAATAAGCAGTATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATC +TCGATGTGTACGAATTCGTGCGC +>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_1/2_Confidence_0.333_Length_1607_hit1_IdMatch=52.99,AligLength=536,E-val=0.0 +CTAGAATCACAGCTCAGATGAGTTTTGAGGCACCGAAGGACGCAATTGAAGGACCGTGTC +AAACCCCGGAAGGATTGTTCGCCCCTATTGGCAAAGCGCCGATAGGCGTAGGGATGTCCA +CCAAGACGGCTATACGACCTTCACGCTTGTATGGAAGAATAACTAAACCGACAACTGCAC +CATCATACTTGGGTAAAGACGCGCTTTATCGTGGATTGACCAAGTGTGGTGTTCGCACAG +TTAATATTCAACCAGAATACATAGACGCAGCGGCGAATGACGTCGCACGCTATGTGTTAA +ACCAGCATGTTGGTCACGTGGATAGGGAACGATACACACGTATATTGTCGTACGAGGAGG +CTGTTAAGGGCGTGCCGTACGATGATTTCATGAAGTCAGTGACTCGAGTCACTTCCCCTG +GTTACCCCTATTGCTTGGATACTGGAAACATGCCAGGGAAAAGCAAATGGATGGGGCTCG +AACAAGATTTCGATATGACAAGTCCAGCTGCTTTGGCTTTGAGGAAAGATGTTGAAAGTT +TGTTGGAAGATTGCAAAAATGGCTTAGTCCGTGATGTGGTGTTTGTCGACACTCTCAAGG +ATGAAAGGCGCGAGCTGATAAAGGTGGAAGCAAAGAAGACTCGAGTCTTTTCTGCTGGAC +CACAGCATTTTGTAATAGCTTTCCGGCAATACTTTCTTCCATTCTCTGCCTGGGTCATGC +ATAACAGAATCGAAAACGAAGTAGCCGTTGGAACAAACCCCTTCTCAATGGATTGGCACA +ACATTGCTGTGCGTATGCGTAGTAAAGGGAGACACATTATTGCTGGAGATTTTAGCAATT +TTGATGGATCCCTCAACGCCCAAGTTCTCTGGACAATATTTTGGAAGATATTTGTCCCGT +GGCTTAATGATATTGAACCACTTGGTACACCCAAGAATGAGGAGAATCTGCGGGTCTGCA +CGAGTCTATGGACGCACTTGGTGCACTCCGTGCACATTTGTGGAGATAACTTGTACATGT +GGACACATTCTCAACCATCGGGCAATCCCTTCACGGTGATAATCAATAGTTTGTATAACT +CAGTTATCATGCGTGTCGTGTGGCAATACATAATGGCGAAAGAAGAACCTAAGTTACGCA +CAATGAACCATTTCAATCAACATGTTGCTATGGTTTCATATGGTGATGACAATCTACTTA +ACATCTCGGAAGGGGTAATTGATATCTTCAACCAACTTACCATCTCGGAAGCCATGCGTT +GGATAGGACACGAATACACAGATGAAACGAAAACAGGCGAGGCTGCGCCCTATCGGACAT +TGGAAGAAGTCCGTTTCCTTAAAAGAGGGTTCAGAATGGATCACCTCTTGTGTCGGTGGG +TAGCTCCTTTGAAGAAGGATGTCATCTACGAAATGCTTAATTGGACGCGCAAAGGGATTA +ACCCAGATGATGTGACGATGATGATCATTGATACAGCATTTAGGGAGATCTCTTATCACG +GAAGGGAAGCTTTCGAGAAGCTGCGAGGGCAGATACTTGAGCAGCGGGATGTGTTGGTTG +AATATCCTCAA +>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_50_Transcript_1/1_Confidence_0.000_Length_1442_hit1_IdMatch=24.95,AligLength=537,E-val=8e-47 +GAATTCGTGCGCATGCAAAGAATAAATGACCACCCTTCGGGTTGGAAAGCTACGGATGAA +GTATATGGCTATGCAGAGTTCTCGAAACTAATGTGTGCTGAATGGAAGAGAAGGAAGACA +GAACATCAGAATACTGTTGACTTCCTTAAGAAGTATGCAGAGCGACCCTTCGAGACCAAC +CCCGGACCAGTGGAGGATATCCCAATAAGACACGATGATGTCGAGCAGGGGGTAGAAGCG +CAGATGGGTCGAGATGCAGATTGGTTTAACAATGACATAGCGGAACGTATAGCGCGTGGA +CAGGATATTACTGATATCTTGTATGAGTATGCTGAAGATGACGAGTTGCATGAGGATTAC +ATGGCTTACAAGAAACAGCAGGCCCAGCCTAGTAAGTGGGACAAATACGCGCGCCGTCTC +GAAAGTGCAATTACGGAAGGAAAGAATTTCCTTGCCCGTGTGGTTTCAAAAATAGCCAGC +GTCATTCGGGAGAACCCATATTTGACCATGATGGCAACTGTGGGGAGCGTTCTCGCTCTG +TATGGAGCTATGCGTTGGTTTTCGAAAGGAGTAACGGAGACTTTTGACGCTGAAGAAGTT +ACTATTCCCAACGAAACCAAGGTAGAGAACGTTGTGCGTACGGAGGGTTTTGAATCTTAC +GACCATCGGACTCCGCGCGCTCATCGAGCCAACAGGCAATATGTGCGAGCTGAGGCGATG +ATAGATGAAACAGGGTACCTGGTAGCCAACAACAAAGTCACTGGCAACACGTATCGAATG +TGTATCAAGAGGGATCCTGATGATTTGGTCGTTGGAAACGCTGTGTTTATCACAGGGTGG +ACGCTCCTCATACCGTACCACTTCGTTTGTGGACTGGCGGGACGGAGAATAGCTGCTGAT +TCCATCGTGACTTTGTCAAAGCCAGGCTTGGATAAGATTATTGAATTCCCGTTGTCACGA +ATCTTCCGATACGATACCTCACCAGATGGTTTTACCACTAGTGAGTATTGTGCTCGAATG +GAACATGAAGATGGAGAATTGGTTGATGCCATCCTGGTAAACCTGCATGGTTTGGGAGTG +CGAATCCATCCTGACCTCCGGGGAAAAATTGTGACGGTACGAGACCAAGCGCACCTGAGT +ACGACATTTCATGCGATTCTCACAACGATGTCCAGGAAGCCACCACTAACAACGTCACAA +CAAGTGGTAAAAGGGGTCAAGCCAATGGATAAAATCCTGCACATCAATTTACCAGTTGGG +GACAAAACAACGCAATACACCCAACGTGACTGTTACAAGTATTATTCCGTAACGGTCGTT +GGAGATTGTGGTGCCTTGCTGGTAGCACAAAATCATGCAATTGTGAGGAAAATATTTGCA +ATGCATATAGCAGGTGCGGAAGAAAATGGCTATGCTTGTCCAATCAATCAGGAAATGTTA +G +>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_63_Transcript_2/2_Confidence_0.333_Length_343_hit1_IdMatch=50.86,AligLength=116,E-val=6e-30 +ACACAGTCCACAGTCCGAAGACCAAAGCGTTGGATAGGACACGAATACACAGATGAAACG +AAAACAGGCGAGGCTGCGCCCTATCGGACATTGGAAGAAGTCCGTTTCCTTAAAAGAGGG +TTCAGAATGGATCACCTCTTGTGTCGGTGGGTAGCTCCTTTGAAGAAGGATGTCATCTAC +GAAATGCTTAATTGGACGCGCAAAGGGATTAACCCAGATGATGTGACGATGATGATCATT +GATACAGCATTTAGGGAGATCTCTTATCACGGAAGGGAAGCTTTCGAGAAGCTGCGAGGG +CAGATACTTGAGCAGCGGGATGTGTTGGTTGAATATCCTCAA +>gi|81971654|sp|Q9IJX4.1|POLN_CRPVC_RecName:_Full_Replicase_polyprotein;_Contains:_RecName:_Full_Pro--Locus_65_Transcript_2/2_Confidence_0.333_Length_1324_hit1_IdMatch=43.5,AligLength=446,E-val=1e-119 +CTTGAGGATACTTTACCAGGTATGGTCGAGCACGTTACGCTCGTAGCACAAAATACATCC +GCGTCAGCCAAGGTGTTATCTGACGAGTTGATCAAATCAATGCTTTGCATTGTTTTGATT +TGCTTGTTGATTGAAACCAAGTTCTATAAGACCGCTTTCGCGGTACTTATAGTGGTTGCT +CTACGTGTTTTCGGGTACAGTGAGCAAATAATTGAGACAGCTATGGACATGTATCGCGTA +ATTAGGGCTCCAAAGGCTCAAGGTAATATGGAAGATGTCGTTTTCCATCCGTGGTTGAAC +ACGTGTGGAAAGTTGATTTTCCTACTTATCGCTGTCCTGTGTCTCAAGAAATTACCAGGA +AAGAACGACGTAGACACTTTCATGCGCAGGCTCGACAGCTTACCCAAAGCTGTTAAGGGT +GCGACACAACTACATGAATGGGTGTCAAAATACTTCGATCTCTCTTTGGATCACGTCAAG +GCGATGATTGTTGGTAAATCTTGTGCCGAAATGAAGAAGGCTGAATCATCAAGCGCCAAA +GTTTTGGCTTGGGCCGCTAGAGTTCAAGATTTCGTCAGATTGGAACAACGAAGTAAAATC +GATAGTGATATCGCTGTCGCCAACGAGGCTGAAGCCTTGTATCACACTGGATTGCAATTT +GCAGGAGACACTCTGTTACCTCCAGAATTGCACAAGGTCGTGAACAGTGCGCTAAGACCA +GCCCGCGATATATATGAGTACGTCACCCGCTCCCCAATAAAAGGAGGGAGTCGTAAGATG +AGACCCTTGATGATTTGGCTAGCTGGCCAGTCAGGAATTGGGAAAACCTCTATGGTGGAT +CCTCTATGTATCGATTTGCTTCGAGCAATGGGTTATGTGGGACCTGAACATCTCCACTCG +TTGGTGTATGGCCGCCAAGTTGAGACGGAGTACTGGGATGGTTACAAAGCCCACAAGATA +GTGATCTATGATGATGCTTTTCAGCTGAAAGATGATGCTGTGAACAGGAATTTGGAGGTA +TTTGAGGTTATACGTTCTTGCAACACGTATCCTCAACACCTTCATATGGCTTGTCTCTCG +GATAAAAACACTTTTTCAGTAGCGGAAGTGTACATCTACACTACCAACGAAATGAATGTC +AAACTTGAGTCGCTGACTCATGAACAAGCATTCTACAACCGCATGAGTGAAAACGCGTTC +ACTGTGCGTCCAAAAGAGGCTTATCGTCTAGTCGAAGAAGGATCAACCGGCAATAAGCAG +TATCGTTTGGACAAAACGAAAACCAAAGGAGCTATCGATCTCGATGTGTACGAATTCGTG +CGC |