Repository 'dante'
hg clone https://toolshed.g2.bx.psu.edu/repos/petr-novak/dante

Changeset 9:ed4d9ede9cb4 (2019-07-03)
Previous changeset 8:cc10173bd0c5 (2019-07-03) Next changeset 10:d0431a839606 (2019-08-14)
Commit message:
Uploaded
modified:
configuration.py
dante_gff_to_dna.xml
tests.sh
added:
dom_prot_seq.fa
removed:
tmp/multifasta.gff3
tmp/multifasta_win.gff3
tmp/single_fasta.gff3
tmp/single_fasta_filtered.gff3
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 configuration.py
--- a/configuration.py Wed Jul 03 08:10:33 2019 -0400
+++ b/configuration.py Wed Jul 03 09:21:52 2019 -0400
[
@@ -2,118 +2,6 @@
 ''' configuration file to set up the paths and constants '''
 import os
 
-######## PROFREP #######################################################
-## Constansts
-N_segment = 50
-MAX_FILES_SUBPROFILES = 1000
-MAX_PIC_NUM = 50
-IMAGE_RES = 300
-FASTA_LINE = 60
-SEQ_LEN_VIZ = 200000
-FORBIDDEN_CHARS = "\\/"
-HTML_STR = '''
- <!DOCTYPE html>
- <html>
- <body>
- <h2>PROFREP OUTPUT</h2>
- <h4> Sequences processed: </h4>
- {}
- <h4> Total length: </h4>
- <pre> {} bp </pre>
- <h4> Database: </h4>
- <pre> {} </pre>
- <hr>
- <h3> Repetitive profile(s)</h3> </br>
- {} <br/>
- <h4>References: </h4>
- {}
- </h6>
- </body>
- </html>
- '''
-
-## IO
-DOMAINS_GFF = "output_domains.gff"
-N_GFF = "N_regions.gff"
-REPEATS_GFF = "output_repeats.gff"
-HTML = "output.html"
-LOG_FILE = "log.txt"
-PROFREP_DATA = "tool_data/profrep"
-PROFREP_TBL = "prepared_datasets.txt"
-PROFREP_OUTPUT_DIR = "profrep_output_dir"
-## JBrowse and Tracks Conf
-jbrowse_data_dir = "data"
-JSON_CONF_R = """{"hooks" : {"modify": "function( track, f, fdiv ) {fdiv.style.backgroundColor = '#278ECF'}"}}"""
-JSON_CONF_N = """{"hooks" : {"modify": "function( track, f, fdiv ) {fdiv.style.background = '#474747'}"}}"""
-COLORS_HEX = ["#7F7F7F", "#00FF00", "#0000FF", "#FF0000", "#01FFFE", "#FFA6FE",
-              "#FFDB66", "#006401", "#010067", "#95003A", "#007DB5", "#FF00F6",
-              "#774D00", "#90FB92", "#0076FF", "#D5FF00", "#FF937E", "#6A826C",
-              "#FF029D", "#FE8900", "#7A4782", "#7E2DD2", "#85A900", "#FF0056",
-              "#A42400", "#00AE7E", "#683D3B", "#BDC6FF", "#263400", "#BDD393",
-              "#00B917", "#9E008E", "#001544", "#C28C9F", "#FF74A3", "#01D0FF",
-              "#004754", "#E56FFE", "#788231", "#0E4CA1", "#91D0CB", "#BE9970",
-              "#968AE8", "#BB8800", "#43002C", "#DEFF74", "#00FFC6", "#FFE502",
-              "#620E00", "#008F9C", "#98FF52", "#7544B1", "#B500FF", "#00FF78",
-              "#FF6E41", "#005F39", "#6B6882", "#5FAD4E", "#A75740", "#A5FFD2",
-              "#FFB167", "#009BFF", "#E85EBE"]
-COLORS_RGB = ["127,127,127", "0,255,0", "0,0,255", "255,0,0", "1,255,254",
-              "255,166,254", "255,219,102", "0,100,1", "1,0,103", "149,0,58",
-              "0,125,181", "255,0,246", "119,77,0", "144,251,146", "0,118,255",
-              "213,255,0", "255,147,126", "106,130,108", "255,2,157",
-              "254,137,0", "122,71,130", "126,45,210", "133,169,0", "255,0,86",
-              "164,36,0", "0,174,126", "104,61,59", "189,198,255", "38,52,0",
-              "189,211,147", "0,185,23", "158,0,142", "0,21,68", "194,140,159",
-              "255,116,163", "1,208,255", "0,71,84", "229,111,254",
-              "120,130,49", "14,76,161", "145,208,203", "190,153,112",
-              "150,138,232", "187,136,0", "67,0,44", "222,255,116",
-              "0,255,198", "255,229,2", "98,14,0", "0,143,156", "152,255,82",
-              "117,68,177", "181,0,255", "0,255,120", "255,110,65", "0,95,57",
-              "107,104,130", "95,173,78", "167,87,64", "165,255,210",
-              "255,177,103", "0,155,255", "232,94,190"]
-TRACK_LIST = '''
- \t,{}\n
- \t"storeClass" : "JBrowse/Store/SeqFeature/BigWig",
- \t"urlTemplate" : "{}",
- \t"type" : "JBrowse/View/Track/Wiggle/XYPlot",
- \t"label" : "{}",
- \t"key" : "{}",
- \t"style": {}
- \t\t"pos_color": "{}"
- \t {},
- \t"scale" : "log"
- \t{}\n
- '''
-
-## GFF tracks
-HEADER_GFF = "##gff-version 3"
-SOURCE_PROFREP = "profrep"
-SOURCE_DANTE = "dante"
-PHASE = "."
-DOMAINS_FEATURE = "protein_domain"
-REPEATS_FEATURE = "repeat"
-N_NAME = "N"
-N_FEATURE = "N_region"
-HEADER_WIG = "variableStep\tchrom="
-GFF_EMPTY = "."
-
-######### BIG WIG ######################################################
-CHROM_SIZES_FILE = "chrom_sizes.txt"
-
-######### EXTRACT_DATA_DOR_PROFREP #####################################
-HITSORT_CLS = "seqclust/clustering/hitsort.cls"
-READS_ALL = "seqclust/sequences/sequences.fasta"
-ANNOTATION = "PROFREP_CLASSIFICATION_TEMPLATE.csv"
-
-######### PROFREP_DB_REDUCING ##########################################
-MEM_LIM = 1500  # MB
-CLS_REDUCED = "hitsort_reduced.cls"
-READS_ALL_REDUCED = "reads_all_reduced"
-
-######### PROFREP_REFINING #############################################
-WITH_DOMAINS = "mobile_element"
-QUALITY_DIFF_TO_REMOVE = 0.05  # 5% tolerance of PID
-
-######### DANTE ##############################################
 MAIN_GIT_DIR = os.path.dirname(os.path.realpath(__file__))
 TOOL_DATA = os.path.join(MAIN_GIT_DIR, "tool-data")
 TMP = "tmp"
@@ -126,3 +14,10 @@
 FILT_DOM_GFF = "domains_filtered.gff"
 EXTRACT_DOM_STAT = "domains_counts.txt"
 EXTRACT_OUT_DIR = "extracted_domains"
+FASTA_LINE = 60
+SOURCE_PROFREP = "profrep"
+SOURCE_DANTE = "dante"
+DOMAINS_FEATURE = "protein_domain"
+PHASE = "."
+HEADER_GFF = "##gff-version 3"
+DOMAINS_GFF = "output_domains.gff"
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 dante_gff_to_dna.xml
--- a/dante_gff_to_dna.xml Wed Jul 03 08:10:33 2019 -0400
+++ b/dante_gff_to_dna.xml Wed Jul 03 09:21:52 2019 -0400
b
@@ -1,5 +1,8 @@
 <tool id="domains_extract" name="Extract Domains Nucleotide Sequences" version="1.0.0">
   <description> Tool to extract nucleotide sequences of protein domains found by DANTE </description>
+  <requirements>
+    <requirement type="package">biopython</requirement>
+  </requirements>
   <command>
     TEMP_DIR_LINEAGES=\$(mktemp -d) &amp;&amp;
     python3 ${__tool_directory__}/dante_gff_to_dna.py --domains_gff ${domains_gff} --input_dna ${input_dna} --out_dir \$TEMP_DIR_LINEAGES
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 dom_prot_seq.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dom_prot_seq.fa Wed Jul 03 09:21:52 2019 -0400
b
@@ -0,0 +1,57 @@
+>scaffold146.1|size86774:976-1289 RH Class_I|LTR|Ty1/copia|Bianca
+ISWRSTKQTIVAISSNHVELLAIHDTSRECVWLRFMIESIIMXXXXXXXXXXXXXXXXXX
+QLKE*YIKCDRTKHISPKFFFTQDLQKNGDVIIQQIRSNDNVVD
+>scaffold146.1|size86774:6810-7049 PROT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand
+LVDSGASCNLMSKRVMKQMGIPDEKLEFLDATLYAFDRRTIIPAGKIQLPVTLGEEERTR
+SEMVEFIIVDMDLAYNAILG
+>scaffold146.1|size86774:8801-9241 RT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat
+DFKGVNKHCQPDPFPLPHIDRLVDAVAGSSLLSTMDAYSGYHQISLAREDQAKSSFLTED
+GVFCYVVMPFGLRNAGATYQRLVNKIFADLLGKEMEIYVDDMIVKSLNDEDHIIYLSHCF
+EVCRTHRLKLNPAKCCFGVRSGKFLGY
+>scaffold146.1|size86774:10819-11667 INT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand
+RDAMDCVRRCQSCQYFAPINRKPGAEITLTELPCPFDRWGIDILGPFPQSVRQRRFCIVA
+VEYHSKWIEAEAVASITSEAVKKFVMNNIIVRFGCPRVLVSDNGPQFISDKFATFCEEYG
+IQQRTSSVYHPQTNGQAEASNKIILHGLRRNLDSLGGSWPDQLPHVLWAYRTTPKSSTGE
+TPFSLVYGSEAVAPVESTIITPRIAAYMHTESANTEFRELDLDLLEERRNEVYGRVRKQQ
+RALRKRYNQRVRPRQFEKGDLILRSVESQGHKGKLDRAWEGPY
+>scaffold146.1|size86774:14592-14828 PROT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila
+MVDLGASINLMPYSIYSALQLGPLQGTAIVIKLADRSNTHPEGVIEDVLVQVNNLVFPAD
+FYVLKMGKAENNDCPLLLG
+>scaffold146.1|size86774:15420-15995 RT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila
+IYAISDSDWVSPVHVVPKKTGFTVERNKNGELVPKRVTNGWRVCIDYRKLNDATRKDHFP
+LPFIDQMLERLAGKKFYCFLDGYSGYNQVAIAPEDQEKTTFTCTYGTYAFRKMPFGLCNA
+PATFQRCMLSIFSEFTGKFIEVFMDDFTVYGDSFEGALENLEKVLQRCVEKKLVLNSEKC
+HFMVRQGIVLGH
+>scaffold146.1|size86774:16188-16634 RH Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila
+FNQECQEAFNKLKSLLTAAPIIQPPNWELPFELMCDASNYALGAVLGQKIEGKRHVIYYA
+SKTLSEAQIHYTTTEKELLAIVYALEKFRSYLLGTKITVHSDHAALRHLLSKKESKPRLI
+RWILLLQEFDLEIKDRAGTENAVADNLSR
+>scaffold146.1|size86774:24873-25481 INT Class_I|LTR|Ty1/copia|Bianca
+HDRLGHPGMIMMRKIIRTTSGHSLKNREILHPREYICTACAQGKLITRPSPVKIMNERIT
+FLERIQGDICGPIHPACGPFRYFIVLIDASSRWSHVSLLSTRNHAFARLLSQIIRLRAHF
+PDYPVKKIRLDNAAEFTSRTFNNYCLAMGIDVEHPVEYVHTQNGLAESLIKRLQLIARPL
+LMKSKLPVTCWGHAIIHASSLIR
+>scaffold146.1|size86774:26322-27032 RT Class_I|LTR|Ty1/copia|Bianca
+WKDAIESELKSLNKRDVFGPVVRTPEGVQPVGYKWVFVRKRNDKGEISRYKARLVAQGFS
+QRPGIDYDETYSPVMDATTFRFLISLAIEYGLDLQLMDVVTAYLYGSLDCEIYMKIPEGF
+HMPERYSSEPRTDYAIKLNKSLYGLKQSGRMWYNRLSEYLIKEGYKNNLVCPCVFMKKFE
+NEFVIIAVYVDDINIVGTQKALLDAVNCLKREFEMKDLGRTKYCLGLQIEYLKNGIF
+>scaffold146.1|size86774:27723-28124 RH Class_I|LTR|Ty1/copia|Bianca
+DAGYRSDPHNGRSQTGYVFLNKGAAISWRSTKQTIAATSSNHAELLAIHETSRECVWLRS
+MIESIYNACGLFTDKMPPTVLYEDNSACIIQLKEGYIKGDRTKHISPKFFFTHDLQKNGE
+VIIQQIRSSDNVAD
+>scaffold146.1|size86774:10299-10658 aRH Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat
+WNMYIDGSTQSGAGVGVHYITPYGDWINLAVKLQFPATNNVAEYEALLAGMNFALSLGVT
+RLKTFSDSQLVVEQFSGHFQAKEPMLEAYKSRSQLLAAKFSEFSLEHIPRESNRAADSLA
+>scaffold146.1|size86774:16812-17666 INT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila
+HASDYGGHFGPNRTARRILDVGFYWPSIFRDVYQFCRTCDACQRVGNITNRREMPQNYIL
+ANEIFDIWGLDFMGPFPQSQGNNYILVAVDYVSKWVEAIPTRTDDGKTVTEFLRKNIFTR
+YGVPKAIISDRGTHFCNSTMRAMMKKYNVIHKTTTAYHPQGNGQAEATNREIKSILEKVV
+NKKRSNWSQKLPDALWAYRTAYKTPIGTTPFRLIYGKHCNLPVGLEHKAYWAIREMNFEE
+GGDAELRQMQLQELDALRLEAYDNSRIYKERLKTYHDKKLLQQNF
+>scaffold146.1|size86774:19976-20212 PROT Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila
+MVDLGASINLMPYYIYSALKLGSLQGTAIIIKLADRSETHPEGVVKDVLAQVNNLVFPAD
+FYVLKMGEAENDDCPLLLG
+>scaffold146.1|size86774:28912-29124 PROT Class_I|LTR|Ty1/copia|Bianca
+CLVDSATTHTILKNMRYFTSFEKRDVNIATIVCEANIVEGSGRAVIVLPSGTHIRIDDAL
+YANKSRRNLLS
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 tests.sh
--- a/tests.sh Wed Jul 03 08:10:33 2019 -0400
+++ b/tests.sh Wed Jul 03 09:21:52 2019 -0400
[
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-export test_data="$DIR/test_data"
+export TEXT_DATA="$DIR/test-data"
 export classification_tbl=${DIR}/tool-data/protein_domains/Viridiplantae_v3.0_class
 export pdb=${DIR}/tool-data/protein_domains/Viridiplantae_v3.0_pdb
 
@@ -10,13 +10,13 @@
 
 ######## DANTE
 ## single_seq, for/rev strand of mapping
-$DIR/dante.py -q $test_data/GEPY_test_long_1 -pdb $pdb -cs $classification_tbl \
+$DIR/dante.py -q ${TEXT_DATA}/GEPY_test_long_1 -pdb $pdb -cs $classification_tbl \
               --domain_gff $PWD/tmp/single_fasta.gff3
 ## multifasta
-$DIR/dante.py -q $test_data/vyber-Ty1_01.fasta -pdb $pdb -cs $classification_tbl \
+$DIR/dante.py -q ${TEXT_DATA}/vyber-Ty1_01.fasta -pdb $pdb -cs $classification_tbl \
               --domain_gff $PWD/tmp/multifasta.gff3
 ## multifasta_win
-$DIR/dante.py -q $test_data/vyber-Ty1_01.fasta -pdb $pdb -cs $classification_tbl \
+$DIR/dante.py -q ${TEXT_DATA}/vyber-Ty1_01.fasta -pdb $pdb -cs $classification_tbl \
               -wd 3100 -od 1500 --domain_gff $PWD/tmp/multifasta_win.gff3
 
 # test filtering
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 tmp/multifasta.gff3
--- a/tmp/multifasta.gff3 Wed Jul 03 08:10:33 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,58 +0,0 @@\n-##gff-version 3\n-##-----------------------------------------------\n-##PIPELINE VERSION         : dante-rv-3081(adb2509)\n-##PROTEIN DATABASE VERSION : Viridiplantae_v3.0_pdb\n-##-----------------------------------------------\n-Acoerulea195_58_rc\tdante\tprotein_domain\t1753\t1956\t368\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-PROT__REXdb_ID3879|Class_I|LTR|Ty1/copia|SIRE:1753-1956[100percent];Best_Hit_DB_Pos=1:68of68;DB_Seq=WLLDSGASKHMSGNAKLFSSVTAIDGGSVTFGNGKSSPVIGKGFVAGIGLSPNDVCLLVDGLRVNLIS;Query_Seq=WLLDSGASKHMSGNAKLFSSVTAIDGGSVTFGNGKSSPVIGKGFVAGIGLSPNDVCLLVDGLRVNLIS;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t2140\t2742\t1162\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=INT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-INT__REXdb_ID3979|Class_I|LTR|Ty1/copia|SIRE:2140-2742[100percent];Best_Hit_DB_Pos=1:201of201;DB_Seq=HQRLGHVNFEHLDNLTRNEYIKGVPRLGRNRDTVCGGCQLGKQIRSPHSKKKSITTSSPLELIHMDLMGPTRTPSLGGKRYILVMVDDYTRFTWVSFLREKSDAFLEFQGICLRIQNEKDTQIKHIRSDRGGEFTATGVIEYCIANGTWQEFSAPYTPQQNGVAERKNRVIQEMARAMLHAKDVPTKFWAEVVHTACYIMN;Query_Seq=HQRLGHVNFEHLDNLTRNEYIKGVPRLGRNRDTVCGGCQLGKQIRSPHSKKKSITTSSPLELIHMDLMGPTRTPSLGGKRYILVMVDDYTRFTWVSFLREKSDAFLEFQGICLRIQNEKDTQIKHIRSDRGGEFTATGVIEYCIANGTWQEFSAPYTPQQNGVAERKNRVIQEMARAMLHAKDVPTKFWAEVVHTACYIMN;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t2815\t2958\t128\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RT__REXdb_ID4340|Class_I|LTR|Ty1/copia|SIRE:2815-2958[100percent];Best_Hit_DB_Pos=1:48of256;DB_Seq=HFHIFGIPCYILADREQRRKMDPKSDAGIFLGYSTNSRAYRVFNSRTR;Query_Seq=YMRVFGSVCYVCKDRQSLSKFDSRGEVALLLGYSSNSRAFRVFNYTTR;Identity=0.46;Similarity=0.65;Relat_Length=0.188;Relat_Interruptions=0.0;Hit_to_DB_Length=0.19\n-Acoerulea195_58_rc\tdante\tprotein_domain\t3268\t4199\t1404\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RT__REXdb_ID3881|Class_I|LTR|Ty1/copia|SIRE:3433-4199[82percent];Best_Hit_DB_Pos=1:256of256;DB_Seq=WLVAMQEELGQFKRSDVWTLVPRPTHTNVVGTKWIFKNKLDEFGQIVRNKARLVAQGYSQIEGIDYGETFAPVARLESVRLLLAMACHLNFKLYQMDVKSAFLNGILNEEVYVEQPKGFVDHT-FPNHVFKLQKALYGLKQAPRAWYERLTSFLLGKGFVRGSVDRTLFILRKNTDVLLAQVYVDDIVFGSTCPSLSESFSQLMSSEFEMSLMGELNFFLGLQVKQFDHGAFISQTKYAKELVKKFGLSTSSGQDTP;Query_Seq=WLVAMQEELGQFKRSDVWTLVPRPTHTNVVGTKWIFKNKLDEFGQIVRNKARLVAQGYSQIEGIDYGETFAPVARLESVRLLLAMACHLNFKLYQMDVKSAFLNGILNEEVYVEQPKGFVDHT/FPNHVFKLQKALYGLKQAPRAWYERLTSFLLGKGFVRGSVDRTLFILRKNTDVLLAQVYVDDIVFGSTCPSLSESFSQLMSSEFEMSLMGELNFFLGLQVKQFDHGAFISQTKYAKELVKKFGLSTSSGQDTP;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.33;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t4260\t4346\t90\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Ivana;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Ivana;Best_Hit=Ty1-RH__REXdb_ID3204|Class_I|LTR|Ty1/copia|Ivana:4260-4346[100percent];Best_Hit_DB_Pos=12:40of134;DB_Seq=YRSLIGCLMYLTATRPDIMHAVSLLSKKQ;Query_Seq=YRSMIGSLLYLTASRPDISYSVGVCARFQ;Identity=0.55;Similarity=0.83;Relat_Length=0.216;Relat_Interruptions=0.0;Hit_to_DB_Length=0.22\n-Acoerulea195_58_rc\tdante\tprotein_domain\t4467\t4844\t733\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RH__REXdb_ID3879|Class_I|LTR|Ty1/copia|SIRE:4467-4844[100percent];Best_Hit_DB_Pos=1:126of126;DB_Seq=DADWGGNLDDRHSTSGGCFYVGNNLVSWHSKKQSSVSISSCEAEYIAAASACTQLLWMRQMLRDYGIQQQAMDLFCDNTSTISISKNPVQHSRTKHIDIRHHFLREAVEKGDIVMEFIPTEHQLAD;Query_Seq=DADWGGNLDDRHSTSGGCFYVGNNLVSWHSKKQSSVSISSCEAEYIAAASACTQLLWMRQMLRDYGIQQQAMDLFCDNTSTISISKNPVQHSRTKHIDIRHHFLREAVEKGDIVMEFIPTEHQLAD;Identity=1.0;Similarity=1.0;Relat_Length='..b'romovirus|OTA|Tat|Retand;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-RH__REXdb_ID8306|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:683-830[100percent];Best_Hit_DB_Pos=57:105of253;DB_Seq=SAHPHPVSTRPG-REQGGEAPEPNGGLRPPTAGAGPPPACPTVPGAPDPQ;Query_Seq=SHHPHLEHPDQG\\EEEGMEAMGSSSSLRPPSPAIPPPPPAPPLPLTPPPE;Identity=0.4;Similarity=0.48;Relat_Length=0.194;Relat_Interruptions=1.0;Hit_to_DB_Length=0.2\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t2214\t2483\t350\t+\t.\tName=GAG;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=GAG|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-GAG__REXdb_ID2562|Class_I|LTR|Ty1/copia|Bianca:2214-2483[100percent];Best_Hit_DB_Pos=1:90of90;DB_Seq=LRHHLHPDLKSEYMTEKDPLVLWQSLKDRFDQQGSIVLPQAQHDWITLRFQDYKSVAAYNSALHRIISQLRLCGQKITDAEMIEKTLSTF;Query_Seq=LRHHLHSDLKSEYLMEEDPLVLWNSLKERYDQQRAVMLPEAQREWSLIRFQDFKSVAAYNSAVHKVNSKLRFCNQEISEEDLIEKTLCTF;Identity=0.64;Similarity=0.87;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t3336\t3548\t307\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2563|Class_I|LTR|Ty1/copia|Bianca:3336-3548[100percent];Best_Hit_DB_Pos=1:71of71;DB_Seq=CLVDSCTTNSILRETKYFQTLTKRTGNVLTIAGRDATIVGSGRATITLPMGTQVTIEDALLYPDSTRTLLS;Query_Seq=CLVDSCTSNTILREIKYFQTLTKRKGNIMTIDSRDASIIGSGRATLVLPMGSTIAIEDALLYPQSTRTLLS;Identity=0.76;Similarity=0.89;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t3762\t4370\t921\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=INT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-INT__REXdb_ID2589|Class_I|LTR|Ty1/copia|Bianca:3762-4370[100percent];Best_Hit_DB_Pos=1:203of203;DB_Seq=HDRLGHPGIGMMRKIISNSNGHELNTAKFPKSSDFMCTSCATGKLILRPSPVKIQNEPLKFLERIQGDICGPIQPTSGPFRYFMVLIDASTRWSHVCLLSTRNHAFAKIIAQLIKLRAHNPEHQIQSIRMDNAAEFSSKAFNDYCMALGIQVQHSVPYVHTQNGLAESLIKRIKLIARPLIQNCNLPTSCWGHAVLHAADLIQ;Query_Seq=HDRLGHPGVGMMTKIIDNSIGHSLPTINFSKLSDFVCTACATGKLIIKPSYLKVKNESLNFLERIQGDICGPIQALSGPFRYFMVLIYASTRWSHVCLLSTRNHAFSQLIDQIIKLRANHPKNRIKTIRMDNAAEFSSRAFNDYCMAMGIHLEHFVPYVHTQNGLAESLIKRVKLVARPLLQNCNLPASCWAHAVLHAADLIQ;Identity=0.77;Similarity=0.89;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t5271\t6058\t1172\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RT__REXdb_ID2589|Class_I|LTR|Ty1/copia|Bianca:5280-6058[98percent];Best_Hit_DB_Pos=1:260of260;DB_Seq=WKEAIKAEIASLTKRGVFTKAIPTPSKVFPVGFKWVFVRKRNENNEVVRYKARLVAQGFTQRPGIDYNETYSPVMSGITFRYLISLAVQNHLSMQLMDVVTAYLYGSLDSDIYMKVPDGIQIPNPNANRNMYCVKLQKSLYGLKQSGRMWYNRLSEFLLNKGYTNNDDCPCVFIKKSQTGFCIISVYVDDLNIIGSPKDIEEARKHL-KTEFEMKDLGKTKYCLGLQIEHRPLGILVHQSAYIQKILEKFNMDKSYPNKTP;Query_Seq=WKEAIEAEVRSLNKREVFSSVIPTPHNVFPVGAKWVFVRKRNENNEVVRYKARLVAQGFTQRPDIDYDDTYSPVMSGITFRYLISLAVQMNLSMQLMDVVTTYLYGSLKSDIYMKVPE*LKMSNPKENRNAYCVKLQKSLYGLKQSGRMWYNRLSEFLIQKGYSNNDDCPCVLIKKSSNGFCIISVYVDDLNIMGSTPDIEEAHNHL/NGEFEMKDLGKTKFCLGLQLEHLPSGILVYQPAYIQKVLENFNMDKSYPTKTP;Identity=0.81;Similarity=0.87;Relat_Length=1.0;Relat_Interruptions=0.67;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t6353\t6767\t384\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2595|Class_I|LTR|Ty1/copia|Bianca:6353-6767[100percent];Best_Hit_DB_Pos=1:133of133;DB_Seq=DAGYLSDPHNARSQSGFVFLHGGTAISWKSSKQTLVATSTNHS-EIISLFEASRECVWLRRMINHIQKSCGIGSIESPTIIYEDNAACVAQMQTRYIKTNI---TKHISPKWFFPHVLQKKGEINIL--QIKSCENLAD;Query_Seq=DXCYLSDPHNVRSQTGFVFLYGGTAFSWKSTKQTLLATSTNHS\\ELVAFFEAS*DCVWLRRMINPIQTSCGVGSLGSPTIIYEDNAASPLSQNANVVYVRK*YPHTYILPKVILILSALQKGWRNLIFSQIKSCANLAD;Identity=0.59;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=1.5;Hit_to_DB_Length=1.05\n'
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 tmp/multifasta_win.gff3
--- a/tmp/multifasta_win.gff3 Wed Jul 03 08:10:33 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,58 +0,0 @@\n-##gff-version 3\n-##-----------------------------------------------\n-##PIPELINE VERSION         : dante-rv-3081(adb2509)\n-##PROTEIN DATABASE VERSION : Viridiplantae_v3.0_pdb\n-##-----------------------------------------------\n-Acoerulea195_58_rc\tdante\tprotein_domain\t1753\t1956\t368\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-PROT__REXdb_ID3879|Class_I|LTR|Ty1/copia|SIRE:1753-1956[100percent];Best_Hit_DB_Pos=1:68of68;DB_Seq=WLLDSGASKHMSGNAKLFSSVTAIDGGSVTFGNGKSSPVIGKGFVAGIGLSPNDVCLLVDGLRVNLIS;Query_Seq=WLLDSGASKHMSGNAKLFSSVTAIDGGSVTFGNGKSSPVIGKGFVAGIGLSPNDVCLLVDGLRVNLIS;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t2140\t2742\t1162\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=INT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-INT__REXdb_ID3979|Class_I|LTR|Ty1/copia|SIRE:2140-2742[100percent];Best_Hit_DB_Pos=1:201of201;DB_Seq=HQRLGHVNFEHLDNLTRNEYIKGVPRLGRNRDTVCGGCQLGKQIRSPHSKKKSITTSSPLELIHMDLMGPTRTPSLGGKRYILVMVDDYTRFTWVSFLREKSDAFLEFQGICLRIQNEKDTQIKHIRSDRGGEFTATGVIEYCIANGTWQEFSAPYTPQQNGVAERKNRVIQEMARAMLHAKDVPTKFWAEVVHTACYIMN;Query_Seq=HQRLGHVNFEHLDNLTRNEYIKGVPRLGRNRDTVCGGCQLGKQIRSPHSKKKSITTSSPLELIHMDLMGPTRTPSLGGKRYILVMVDDYTRFTWVSFLREKSDAFLEFQGICLRIQNEKDTQIKHIRSDRGGEFTATGVIEYCIANGTWQEFSAPYTPQQNGVAERKNRVIQEMARAMLHAKDVPTKFWAEVVHTACYIMN;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t2815\t2958\t128\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RT__REXdb_ID4340|Class_I|LTR|Ty1/copia|SIRE:2815-2958[100percent];Best_Hit_DB_Pos=1:48of256;DB_Seq=HFHIFGIPCYILADREQRRKMDPKSDAGIFLGYSTNSRAYRVFNSRTR;Query_Seq=YMRVFGSVCYVCKDRQSLSKFDSRGEVALLLGYSSNSRAFRVFNYTTR;Identity=0.46;Similarity=0.65;Relat_Length=0.188;Relat_Interruptions=0.0;Hit_to_DB_Length=0.19\n-Acoerulea195_58_rc\tdante\tprotein_domain\t3268\t4199\t1404\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RT__REXdb_ID3881|Class_I|LTR|Ty1/copia|SIRE:3433-4199[82percent];Best_Hit_DB_Pos=1:256of256;DB_Seq=WLVAMQEELGQFKRSDVWTLVPRPTHTNVVGTKWIFKNKLDEFGQIVRNKARLVAQGYSQIEGIDYGETFAPVARLESVRLLLAMACHLNFKLYQMDVKSAFLNGILNEEVYVEQPKGFVDHT-FPNHVFKLQKALYGLKQAPRAWYERLTSFLLGKGFVRGSVDRTLFILRKNTDVLLAQVYVDDIVFGSTCPSLSESFSQLMSSEFEMSLMGELNFFLGLQVKQFDHGAFISQTKYAKELVKKFGLSTSSGQDTP;Query_Seq=WLVAMQEELGQFKRSDVWTLVPRPTHTNVVGTKWIFKNKLDEFGQIVRNKARLVAQGYSQIEGIDYGETFAPVARLESVRLLLAMACHLNFKLYQMDVKSAFLNGILNEEVYVEQPKGFVDHT/FPNHVFKLQKALYGLKQAPRAWYERLTSFLLGKGFVRGSVDRTLFILRKNTDVLLAQVYVDDIVFGSTCPSLSESFSQLMSSEFEMSLMGELNFFLGLQVKQFDHGAFISQTKYAKELVKKFGLSTSSGQDTP;Identity=1.0;Similarity=1.0;Relat_Length=1.0;Relat_Interruptions=0.33;Hit_to_DB_Length=1.0\n-Acoerulea195_58_rc\tdante\tprotein_domain\t4260\t4346\t90\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Ivana;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Ivana;Best_Hit=Ty1-RH__REXdb_ID3204|Class_I|LTR|Ty1/copia|Ivana:4260-4346[100percent];Best_Hit_DB_Pos=12:40of134;DB_Seq=YRSLIGCLMYLTATRPDIMHAVSLLSKKQ;Query_Seq=YRSMIGSLLYLTASRPDISYSVGVCARFQ;Identity=0.55;Similarity=0.83;Relat_Length=0.216;Relat_Interruptions=0.0;Hit_to_DB_Length=0.22\n-Acoerulea195_58_rc\tdante\tprotein_domain\t4467\t4844\t733\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|SIRE;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|SIRE;Best_Hit=Ty1-RH__REXdb_ID3879|Class_I|LTR|Ty1/copia|SIRE:4467-4844[100percent];Best_Hit_DB_Pos=1:126of126;DB_Seq=DADWGGNLDDRHSTSGGCFYVGNNLVSWHSKKQSSVSISSCEAEYIAAASACTQLLWMRQMLRDYGIQQQAMDLFCDNTSTISISKNPVQHSRTKHIDIRHHFLREAVEKGDIVMEFIPTEHQLAD;Query_Seq=DADWGGNLDDRHSTSGGCFYVGNNLVSWHSKKQSSVSISSCEAEYIAAASACTQLLWMRQMLRDYGIQQQAMDLFCDNTSTISISKNPVQHSRTKHIDIRHHFLREAVEKGDIVMEFIPTEHQLAD;Identity=1.0;Similarity=1.0;Relat_Length='..b'romovirus|OTA|Tat|Retand;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-RH__REXdb_ID8306|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:683-830[100percent];Best_Hit_DB_Pos=57:105of253;DB_Seq=SAHPHPVSTRPG-REQGGEAPEPNGGLRPPTAGAGPPPACPTVPGAPDPQ;Query_Seq=SHHPHLEHPDQG\\EEEGMEAMGSSSSLRPPSPAIPPPPPAPPLPLTPPPE;Identity=0.4;Similarity=0.48;Relat_Length=0.194;Relat_Interruptions=1.0;Hit_to_DB_Length=0.2\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t2214\t2483\t350\t+\t.\tName=GAG;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=GAG|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-GAG__REXdb_ID2562|Class_I|LTR|Ty1/copia|Bianca:2214-2483[100percent];Best_Hit_DB_Pos=1:90of90;DB_Seq=LRHHLHPDLKSEYMTEKDPLVLWQSLKDRFDQQGSIVLPQAQHDWITLRFQDYKSVAAYNSALHRIISQLRLCGQKITDAEMIEKTLSTF;Query_Seq=LRHHLHSDLKSEYLMEEDPLVLWNSLKERYDQQRAVMLPEAQREWSLIRFQDFKSVAAYNSAVHKVNSKLRFCNQEISEEDLIEKTLCTF;Identity=0.64;Similarity=0.87;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t3336\t3548\t307\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2563|Class_I|LTR|Ty1/copia|Bianca:3336-3548[100percent];Best_Hit_DB_Pos=1:71of71;DB_Seq=CLVDSCTTNSILRETKYFQTLTKRTGNVLTIAGRDATIVGSGRATITLPMGTQVTIEDALLYPDSTRTLLS;Query_Seq=CLVDSCTSNTILREIKYFQTLTKRKGNIMTIDSRDASIIGSGRATLVLPMGSTIAIEDALLYPQSTRTLLS;Identity=0.76;Similarity=0.89;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t3762\t4370\t921\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=INT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-INT__REXdb_ID2589|Class_I|LTR|Ty1/copia|Bianca:3762-4370[100percent];Best_Hit_DB_Pos=1:203of203;DB_Seq=HDRLGHPGIGMMRKIISNSNGHELNTAKFPKSSDFMCTSCATGKLILRPSPVKIQNEPLKFLERIQGDICGPIQPTSGPFRYFMVLIDASTRWSHVCLLSTRNHAFAKIIAQLIKLRAHNPEHQIQSIRMDNAAEFSSKAFNDYCMALGIQVQHSVPYVHTQNGLAESLIKRIKLIARPLIQNCNLPTSCWGHAVLHAADLIQ;Query_Seq=HDRLGHPGVGMMTKIIDNSIGHSLPTINFSKLSDFVCTACATGKLIIKPSYLKVKNESLNFLERIQGDICGPIQALSGPFRYFMVLIYASTRWSHVCLLSTRNHAFSQLIDQIIKLRANHPKNRIKTIRMDNAAEFSSRAFNDYCMAMGIHLEHFVPYVHTQNGLAESLIKRVKLVARPLLQNCNLPASCWAHAVLHAADLIQ;Identity=0.77;Similarity=0.89;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t5271\t6058\t1172\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RT__REXdb_ID2589|Class_I|LTR|Ty1/copia|Bianca:5280-6058[98percent];Best_Hit_DB_Pos=1:260of260;DB_Seq=WKEAIKAEIASLTKRGVFTKAIPTPSKVFPVGFKWVFVRKRNENNEVVRYKARLVAQGFTQRPGIDYNETYSPVMSGITFRYLISLAVQNHLSMQLMDVVTAYLYGSLDSDIYMKVPDGIQIPNPNANRNMYCVKLQKSLYGLKQSGRMWYNRLSEFLLNKGYTNNDDCPCVFIKKSQTGFCIISVYVDDLNIIGSPKDIEEARKHL-KTEFEMKDLGKTKYCLGLQIEHRPLGILVHQSAYIQKILEKFNMDKSYPNKTP;Query_Seq=WKEAIEAEVRSLNKREVFSSVIPTPHNVFPVGAKWVFVRKRNENNEVVRYKARLVAQGFTQRPDIDYDDTYSPVMSGITFRYLISLAVQMNLSMQLMDVVTTYLYGSLKSDIYMKVPE*LKMSNPKENRNAYCVKLQKSLYGLKQSGRMWYNRLSEFLIQKGYSNNDDCPCVLIKKSSNGFCIISVYVDDLNIMGSTPDIEEAHNHL/NGEFEMKDLGKTKFCLGLQLEHLPSGILVYQPAYIQKVLENFNMDKSYPTKTP;Identity=0.81;Similarity=0.87;Relat_Length=1.0;Relat_Interruptions=0.67;Hit_to_DB_Length=1.0\n-Wicker_Bianca_AF521177-1\tdante\tprotein_domain\t6353\t6767\t384\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2595|Class_I|LTR|Ty1/copia|Bianca:6353-6767[100percent];Best_Hit_DB_Pos=1:133of133;DB_Seq=DAGYLSDPHNARSQSGFVFLHGGTAISWKSSKQTLVATSTNHS-EIISLFEASRECVWLRRMINHIQKSCGIGSIESPTIIYEDNAACVAQMQTRYIKTNI---TKHISPKWFFPHVLQKKGEINIL--QIKSCENLAD;Query_Seq=DXCYLSDPHNVRSQTGFVFLYGGTAFSWKSTKQTLLATSTNHS\\ELVAFFEAS*DCVWLRRMINPIQTSCGVGSLGSPTIIYEDNAASPLSQNANVVYVRK*YPHTYILPKVILILSALQKGWRNLIFSQIKSCANLAD;Identity=0.59;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=1.5;Hit_to_DB_Length=1.05\n'
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 tmp/single_fasta.gff3
--- a/tmp/single_fasta.gff3 Wed Jul 03 08:10:33 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,26 +0,0 @@\n-##gff-version 3\n-##-----------------------------------------------\n-##PIPELINE VERSION         : dante-rv-3081(adb2509)\n-##PROTEIN DATABASE VERSION : Viridiplantae_v3.0_pdb\n-##-----------------------------------------------\n-scaffold146.1|size86774\tdante\tprotein_domain\t976\t1289\t293\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:976-1289[100percent];Best_Hit_DB_Pos=26:134of134;DB_Seq=ISWRSVKQTITATSSNHAELLALHEASRECVWLRSMIQHIQKNCG-LSSGRMDATIIYEDNTACIAQLKEGYIKGDRTKHISPKFF-FTHDLQKDGDISIQQIRSCDNLAD;Query_Seq=ISWRSTKQTIVAISSNHVELLAIHDTSRECVWLRFMIESI-----\\IMXXXXXXXXXXXXXXXXXXQLKE*YIKCDRTKHISPKFF\\FTQDLQKNGDVIIQQIRSNDNVVD;Identity=0.59;Similarity=0.66;Relat_Length=0.813;Relat_Interruptions=1.5;Hit_to_DB_Length=0.83\n-scaffold146.1|size86774\tdante\tprotein_domain\t6810\t7049\t153\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-PROT__REXdb_ID9702|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:6810-7049[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=LVDDGSKVNLLPYRVFQQMGIPEEQLVRDQAPVKGIGGVPVLVEGKVKLALTLGEAPRTRTHYAVFLVVKPPLSYNAILG;Query_Seq=LVDSGASCNLMSKRVMKQMGIPDEKLEFLDATLYAFDRRTIIPAGKIQLPVTLGEEERTRSEMVEFIIVDMDLAYNAILG;Identity=0.44;Similarity=0.62;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t7656\t8296\t.\t+\t.\tName=RT/INT;Final_Classification=Ambiguous_domain;Region_Hits_Classifications_=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[246bp],INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[468bp]\n-scaffold146.1|size86774\tdante\tprotein_domain\t8756\t9241\t538\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[486bp],RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[441bp];Best_Hit=Ty3-RT__REXdb_ID8210|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:8801-9241[90percent];Best_Hit_DB_Pos=27:173of173;DB_Seq=DFTDLNKACPKDSFPLPHIDRLVDSTAGNELLTFMDAFSGYNQIMMNPEDQEKTSFITDRGIYCYKVMPFGLKNAGATYQRLVNKMFHNHLGKTMEVYIDDMLVKSLKKEDHVKHLEECFDILNKYQMKLNPAKCTFGVPSGEFLGY;Query_Seq=DFKGVNKHCQPDPFPLPHIDRLVDAVAGSSLLSTMDAYSGYHQISLAREDQAKSSFLTEDGVFCYVVMPFGLRNAGATYQRLVNKIFADLLGKEMEIYVDDMIVKSLNDEDHIIYLSHCFEVCRTHRLKLNPAKCCFGVRSGKFLGY;Identity=0.63;Similarity=0.8;Relat_Length=0.85;Relat_Interruptions=0.0;Hit_to_DB_Length=0.85\n-scaffold146.1|size86774\tdante\tprotein_domain\t9433\t9781\t343\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-RH__REXdb_ID9729|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:9434-9772[97percent];Best_Hit_DB_Pos=1:113of149;DB_Seq=WTEECEEAFQKLKEYLGSPHLLVKPIQGEPLFLYLAVSEHATSSVLVREDDGVQRPIYYTSRALVDAETRYLSLEKIVLALIVSARRLRPYFQAHTIIVLTDQPIRQVLAKPD;Query_Seq=WTDQCDRAFKELKTYLASPPLIVSPTPTETLGLYLAVSEHAVSSVLVAERDGVQHPVYYVSHTLLPAESRYSTVEKFVLALLKSVAKLRHYFESRKVIVYTDQPIKAVLGQSD;Identity=0.58;Similarity=0.73;Relat_Length=0.758;Relat_Interruptions=0.0;Hit_to_DB_Length=0.76\n-scaffold146.1|size86774\tdante\tprotein_domain\t10810\t11667\t747\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-INT__REXdb_ID9633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10819-11667[98percent];Best_Hit_DB_Pos=30:310of310;DB_Seq=RDTHQYVQRCIQCQKFAPLIHKPGEEMTIMSAPCPFAQWGIDLVGPFPQTAGRKKFFIVAVDYFTKWVEAEALSKITEDEVMHFIWKYICCRFGLPRSLVSDNGTQFNGKKIRAWCEEMKITQKFVAVAHPQANGQVESTNRTIVNGLKKRIDELGGSWVDELPSVLWSYRTSAKAATGETPFRLTYGTEAVIPVEVAMDTLRIATF--DEEANDGALRTRLDEIFDLREAAYLHMERSKNLIKARYDQGVRSRSFQIGDLILRRADALKHTGKLEANWEGPY;Query_Seq=RDAMDCVRRCQSCQYFAPINRKPGAEI'..b'rus|OTA|Tat;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[99bp],RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[117bp];Best_Hit=Ty3-RH__REXdb_ID8372|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10701-10817[100percent];Best_Hit_DB_Pos=279:317of317;DB_Seq=NREGTGRVVKWAIELSEFDLHFEPRHAIKSQALADFVVE;Query_Seq=NTDHTSRLAKWAIKVSAMDIAFEPRKAIKGQALADFVVE;Identity=0.64;Similarity=0.77;Relat_Length=0.123;Relat_Interruptions=0.0;Hit_to_DB_Length=0.12\n-scaffold146.1|size86774\tdante\tprotein_domain\t16797\t17666\t1057\t-\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:16812-17666[98percent];Best_Hit_DB_Pos=1:285of313;DB_Seq=HSHSYGGHFGAKRTAHKVLESGFYWPSIFKDAYHFCKSCEKCQRTGNITHKNQMPLTNILVSEIFDVWGIDFMGPFPSSFGNLYILLVVDYVSKWIEAKATRTNDAKVVLDFVRTHIFNRFGIPKAIISDRGTHFCNRSMEALLRKYHVTHRTSTAYHPQTNGQAEISNREIKSILEKIVQPNRRDWSLRLGDALWAYRTAYKSPIGMSPYRMIYGKACHLPVELEHKAFWAIKQCNMDYDAAGIARKLQLQELEEIRNDAYENARIYKEKTKNLHDRMLTRKEF;Query_Seq=HASDYGGHFGPNRTARRILDVGFYWPSIFRDVYQFCRTCDACQRVGNITNRREMPQNYILANEIFDIWGLDFMGPFPQSQGNNYILVAVDYVSKWVEAIPTRTDDGKTVTEFLRKNIFTRYGVPKAIISDRGTHFCNSTMRAMMKKYNVIHKTTTAYHPQGNGQAEATNREIKSILEKVVNKKRSNWSQKLPDALWAYRTAYKTPIGTTPFRLIYGKHCNLPVGLEHKAYWAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKTYHDKKLLQQNF;Identity=0.61;Similarity=0.79;Relat_Length=0.911;Relat_Interruptions=0.0;Hit_to_DB_Length=0.91\n-scaffold146.1|size86774\tdante\tprotein_domain\t18554\t18811\t306\t-\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6693|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:18554-18802[96percent];Best_Hit_DB_Pos=231:313of313;DB_Seq=WALRLLNFDNNACGEKRKLQLQELEEMRLNAYESSRIYKERTKAYHDKKLQRREFQPGQQVLLFNSRLRLFPGKLKSKWSGPF;Query_Seq=WAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKAYHDKKILQQNFREGQQVLLFNSKLRLFPGKLKSRWMGPF;Identity=0.65;Similarity=0.82;Relat_Length=0.265;Relat_Interruptions=0.0;Hit_to_DB_Length=0.27\n-scaffold146.1|size86774\tdante\tprotein_domain\t19158\t19478\t197\t-\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:19182-19448[83percent];Best_Hit_DB_Pos=216:304of314;DB_Seq=YGKPCHLPVELEHKAWWAVKQCNMELDVAGQHRxLQLQELEEIRNDAYESSxIYKEKTKAFHDKQILRKNFEVGQKVLIFHSRLKLFPG;Query_Seq=FGKQCKVLVGMEHENYWEIREMNYEEGADVEQKQMQLQKMDALKLEAYDNSRIDKEKLKAHHAKRILQQNCKKRQQVLIFDSKLKMFPG;Identity=0.42;Similarity=0.71;Relat_Length=0.283;Relat_Interruptions=0.0;Hit_to_DB_Length=0.28\n-scaffold146.1|size86774\tdante\tprotein_domain\t19976\t20212\t259\t-\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-PROT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:19976-20212[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=MLDLGASINVMPYSIYNSLNLGPMEETCIIIQLADRSNAYPKGVMEDVLVQVNELVFPADFYILKMEDELSPNPTPILLG;Query_Seq=MVDLGASINLMPYYIYSALKLGSLQGTAIIIKLADRSETHPEGVVKDVLAQVNNLVFPADFYVLKM-GEAENDDCPLLLG;Identity=0.62;Similarity=0.79;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t28912\t29124\t216\t-\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2599|Class_I|LTR|Ty1/copia|Bianca:28912-29124[100percent];Best_Hit_DB_Pos=1:71of71;DB_Seq=CLADCATTHTILRDKRYFLELTLIKANVSTISGTTNLVEGSGRANIMLPNGTRFHINDALYSSKSRRNLLS;Query_Seq=CLVDSATTHTILKNMRYFTSFEKRDVNIATIVCEANIVEGSGRAVIVLPSGTHIRIDDALYANKSRRNLLS;Identity=0.59;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n'
b
diff -r cc10173bd0c5 -r ed4d9ede9cb4 tmp/single_fasta_filtered.gff3
--- a/tmp/single_fasta_filtered.gff3 Wed Jul 03 08:10:33 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,33 +0,0 @@\n-##gff-version 3\n-##-----------------------------------------------\n-##PIPELINE VERSION         : dante-rv-3081(adb2509)\n-##PROTEIN DATABASE VERSION : Viridiplantae_v3.0_pdb\n-##-----------------------------------------------\n-##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n-##Ambiguous_domain\t1\t0\n-##Class_I|LTR|Ty1/copia|Bianca\t6\t5\n-##Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila\t7\t5\n-##Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat\t3\t2\n-##Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand\t4\t2\n-##-----------------------------------------------\n-##SEQ\tDOMAIN\tCOUNTS\n-##scaffold146.1|size86774\tINT\t3\n-##scaffold146.1|size86774\tPROT\t4\n-##scaffold146.1|size86774\tRH\t3\n-##scaffold146.1|size86774\tRT\t3\n-##scaffold146.1|size86774\taRH\t1\n-##-----------------------------------------------\n-scaffold146.1|size86774\tdante\tprotein_domain\t976\t1289\t293\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:976-1289[100percent];Best_Hit_DB_Pos=26:134of134;DB_Seq=ISWRSVKQTITATSSNHAELLALHEASRECVWLRSMIQHIQKNCG-LSSGRMDATIIYEDNTACIAQLKEGYIKGDRTKHISPKFF-FTHDLQKDGDISIQQIRSCDNLAD;Query_Seq=ISWRSTKQTIVAISSNHVELLAIHDTSRECVWLRFMIESI-----\\IMXXXXXXXXXXXXXXXXXXQLKE*YIKCDRTKHISPKFF\\FTQDLQKNGDVIIQQIRSNDNVVD;Identity=0.59;Similarity=0.66;Relat_Length=0.813;Relat_Interruptions=1.5;Hit_to_DB_Length=0.83\n-scaffold146.1|size86774\tdante\tprotein_domain\t6810\t7049\t153\t+\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-PROT__REXdb_ID9702|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:6810-7049[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=LVDDGSKVNLLPYRVFQQMGIPEEQLVRDQAPVKGIGGVPVLVEGKVKLALTLGEAPRTRTHYAVFLVVKPPLSYNAILG;Query_Seq=LVDSGASCNLMSKRVMKQMGIPDEKLEFLDATLYAFDRRTIIPAGKIQLPVTLGEEERTRSEMVEFIIVDMDLAYNAILG;Identity=0.44;Similarity=0.62;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t8756\t9241\t538\t+\t.\tName=RT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[486bp],RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[441bp];Best_Hit=Ty3-RT__REXdb_ID8210|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:8801-9241[90percent];Best_Hit_DB_Pos=27:173of173;DB_Seq=DFTDLNKACPKDSFPLPHIDRLVDSTAGNELLTFMDAFSGYNQIMMNPEDQEKTSFITDRGIYCYKVMPFGLKNAGATYQRLVNKMFHNHLGKTMEVYIDDMLVKSLKKEDHVKHLEECFDILNKYQMKLNPAKCTFGVPSGEFLGY;Query_Seq=DFKGVNKHCQPDPFPLPHIDRLVDAVAGSSLLSTMDAYSGYHQISLAREDQAKSSFLTEDGVFCYVVMPFGLRNAGATYQRLVNKIFADLLGKEMEIYVDDMIVKSLNDEDHIIYLSHCFEVCRTHRLKLNPAKCCFGVRSGKFLGY;Identity=0.63;Similarity=0.8;Relat_Length=0.85;Relat_Interruptions=0.0;Hit_to_DB_Length=0.85\n-scaffold146.1|size86774\tdante\tprotein_domain\t10810\t11667\t747\t+\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-INT__REXdb_ID9633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10819-11667[98percent];Best_Hit_DB_Pos=30:310of310;DB_Seq=RDTHQYVQRCIQCQKFAPLIHKPGEEMTIMSAPCPFAQWGIDLVGPFPQTAGRKKFFIVAVDYFTKWVEAEALSKITEDEVMHFIWKYICCRFGLPRSLVSDNGTQFNGKKIRAWCEEMKITQKFVAVAHPQANGQVESTNRTIVNGLKKRIDELGGSWVDELPSVLWSYRTSAKAATGETPFRLTYGTEAVIPVEVAMDTLRIATF--DEEANDGALRTRLDEIFDLREAAYLHMERSKNLIKARYDQGVRSRSFQIGDLILRRADALKHTGKLEANWEGPY;Query_Seq=RDAMDCVRRCQSCQYFAPINRKPGAEITLTELPCPFDRWGIDILGPFPQSVRQRRFCIVAVEYHSKWIEAEAVASITSEAVKKFVMNNIIVRFGCPRVLVSDNGPQFISDKFATFCEEYGIQQRTSSVYHPQTNGQAEASNKIILHGLRRNLDSLGGSWPDQLPHVLWAYRTTPKSSTGETPFSLVYGSEAVAPVESTIITPRIAAYMHTESANTEFRELDLDLLEERRNEVYGRVRKQQRALRKRYNQRVRPRQFEKGDLILRSVESQGHKGKLDRAWEGPY;Identity=0.49;Similarity=0.66;Relat_Length=0.906;Relat_Interruptions=0.0;Hit_to_DB_Length=0.91\n-scaffold146.1|size86774\tdante\tprotein_domain\t14592'..b'PVMDATTFRFLISLAIEYGLDLQLMDVVTAYLYGSLDCEIYMKIPEGFHMPERYSSEPRTDYAIKLNKSLYGLKQSGRMWYNRLSEYLIKEGYKNNLVCPCVFMKKFENEFVIIAVYVDDINIVGTQKALLDAVNCLKREFEMKDLGRTKYCLGLQIEYLKNGIF;Identity=0.78;Similarity=0.91;Relat_Length=0.905;Relat_Interruptions=0.0;Hit_to_DB_Length=0.9\n-scaffold146.1|size86774\tdante\tprotein_domain\t27723\t28124\t581\t+\t.\tName=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:27723-28124[100percent];Best_Hit_DB_Pos=1:134of134;DB_Seq=DAGYLSDPHHGRSQTGYLFTSGNTAISWRSVKQTITATSSNHAELLALHEASRECVWLRSMIQHIQKNCGLSSGRMDATIIYEDNTACIAQLKEGYIKGDRTKHISPKFFFTHDLQKDGDISIQQIRSCDNLAD;Query_Seq=DAGYRSDPHNGRSQTGYVFLNKGAAISWRSTKQTIAATSSNHAELLAIHETSRECVWLRSMIESIYNACGLFTDKMPPTVLYEDNSACIIQLKEGYIKGDRTKHISPKFFFTHDLQKNGEVIIQQIRSSDNVAD;Identity=0.75;Similarity=0.84;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t10299\t10658\t303\t-\t.\tName=aRH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[360bp],aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[360bp],aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|TatII[360bp];Best_Hit=Ty3-aRH__REXdb_ID9546|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10299-10658[100percent];Best_Hit_DB_Pos=1:121of121;DB_Seq=WILHVDGASSKQGSGIGIRLQSPYGEVIEQSFCLAFNASNNEAEYESLLAGLRLAVGIGVTKLRAFCNSQLVANQFSGDYEAKDSRMEAYLAQVQELSKKFLSFELARIPRSENSAADSLA;Query_Seq=WNMYIDG-STQSGAGVGVHYITPYGDWINLAVKLQFPATNNVAEYEALLAGMNFALSLGVTRLKTFSDSQLVVEQFSGHFQAKEPMLEAYKSRSQLLAAKFSEFSLEHIPRESNRAADSLA;Identity=0.49;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t16797\t17666\t1057\t-\t.\tName=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:16812-17666[98percent];Best_Hit_DB_Pos=1:285of313;DB_Seq=HSHSYGGHFGAKRTAHKVLESGFYWPSIFKDAYHFCKSCEKCQRTGNITHKNQMPLTNILVSEIFDVWGIDFMGPFPSSFGNLYILLVVDYVSKWIEAKATRTNDAKVVLDFVRTHIFNRFGIPKAIISDRGTHFCNRSMEALLRKYHVTHRTSTAYHPQTNGQAEISNREIKSILEKIVQPNRRDWSLRLGDALWAYRTAYKSPIGMSPYRMIYGKACHLPVELEHKAFWAIKQCNMDYDAAGIARKLQLQELEEIRNDAYENARIYKEKTKNLHDRMLTRKEF;Query_Seq=HASDYGGHFGPNRTARRILDVGFYWPSIFRDVYQFCRTCDACQRVGNITNRREMPQNYILANEIFDIWGLDFMGPFPQSQGNNYILVAVDYVSKWVEAIPTRTDDGKTVTEFLRKNIFTRYGVPKAIISDRGTHFCNSTMRAMMKKYNVIHKTTTAYHPQGNGQAEATNREIKSILEKVVNKKRSNWSQKLPDALWAYRTAYKTPIGTTPFRLIYGKHCNLPVGLEHKAYWAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKTYHDKKLLQQNF;Identity=0.61;Similarity=0.79;Relat_Length=0.911;Relat_Interruptions=0.0;Hit_to_DB_Length=0.91\n-scaffold146.1|size86774\tdante\tprotein_domain\t19976\t20212\t259\t-\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-PROT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:19976-20212[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=MLDLGASINVMPYSIYNSLNLGPMEETCIIIQLADRSNAYPKGVMEDVLVQVNELVFPADFYILKMEDELSPNPTPILLG;Query_Seq=MVDLGASINLMPYYIYSALKLGSLQGTAIIIKLADRSETHPEGVVKDVLAQVNNLVFPADFYVLKM-GEAENDDCPLLLG;Identity=0.62;Similarity=0.79;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n-scaffold146.1|size86774\tdante\tprotein_domain\t28912\t29124\t216\t-\t.\tName=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2599|Class_I|LTR|Ty1/copia|Bianca:28912-29124[100percent];Best_Hit_DB_Pos=1:71of71;DB_Seq=CLADCATTHTILRDKRYFLELTLIKANVSTISGTTNLVEGSGRANIMLPNGTRFHINDALYSSKSRRNLLS;Query_Seq=CLVDSATTHTILKNMRYFTSFEKRDVNIATIVCEANIVEGSGRAVIVLPSGTHIRIDDALYANKSRRNLLS;Identity=0.59;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0\n'