Previous changeset 21:b575af79e250 (2019-04-20) Next changeset 23:a455e43048cd (2019-04-20) |
Commit message:
Uploaded |
added:
test-data/NC_015264.fasta test-data/NC_015264.gb test-data/output.fasta test-data/output.gb test-data/output.html |
removed:
README.rst auxiliar.py model1600.sav model2400.sav phagepromoter.py phagepromoter.xml pssm10_6.txt pssm10_8.txt pssm35_6.txt pssm35_9.txt pssm35_cbb.txt pssm35_lb.txt pssm35_t4.txt pssm_21.txt pssm_23.txt pssm_27.txt pssm_32.txt scaler1600.sav scaler2400.sav tool_dependencies.xml |
b |
diff -r b575af79e250 -r 5acc4fa8b62d README.rst --- a/README.rst Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -=============== -PhagePromoter -=============== - -Get promoter of phage genomes - -PhagePromoter is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 19 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. - -**Inputs:** - -* genome format: fasta vs genbank; -* genome file: acepts both genbank and fasta formats; -* both strands (yes or no): allows the search in both DNA strands; -* threshold: represents the probability of the test sequence be a promoter (float between 0 and 1)" -* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae; -* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results. -* phage type: The type of the phage, according to its lifecycle: virulent or temperate; - -**Outputs:** -This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters. - -**Requirements:** - -* Biopython -* Sklearn -* Numpy -* Pandas - - |
b |
diff -r b575af79e250 -r 5acc4fa8b62d auxiliar.py --- a/auxiliar.py Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,121 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Sun May 27 17:37:09 2018 - -@author: Marta -""" - - -#get the phage host from the file 'bacteria.xlsx' -def get_bacteria(file): - import pandas as pd - df = pd.read_excel(file,header=0,index_col=0) - bacteria = {} - for ind,row in df.iterrows(): - bac = row['Bacteria'] - bacteria[ind] = bac - return bacteria - -#get the phage family from the file 'family.xlsx' -def get_families(file): - import pandas as pd - df = pd.read_excel(file,header=0,index_col=0) - families = {} - for ind,row in df.iterrows(): - fam = row['Family'] - families[ind] = fam - return families - -#get phage lifecycle from the file 'lifecycle.xlsx' -def get_lifecycle(file): - import pandas as pd - df = pd.read_excel(file,header=0,index_col=0) - types = {} - for ind,row in df.iterrows(): - lc = row['lifecycle'] - types[ind] = lc - return types - -#reads a file with a PSSM and return the max possible score of that PSSM -def get_max_pssm(file_pssm): - from Bio.Alphabet import IUPAC - from Bio.motifs import matrix - m = [] - fic = open(file_pssm,'r') - rf = fic.readline() - while rf: - new_l = [] - l = rf.strip().split('\t') - for val in l: - x = float(val) - new_l.append(x) - m.append(new_l) - rf = fic.readline() - a = IUPAC.unambiguous_dna - dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]} - pssm = matrix.PositionSpecificScoringMatrix(a,dic) - return pssm.max - -#reads a file with a PSSM and returns a list of scores in all positions of the sequence -#returns the score divided by the maximum possible value -def get_scores(file_pssm, seq): - from Bio.Alphabet import IUPAC - from Bio.motifs import matrix - maxi = get_max_pssm(file_pssm) - m = [] - fic = open(file_pssm,'r') - rf = fic.readline() - while rf: - new_l = [] - l = rf.strip().split('\t') - for val in l: - x = float(val) - new_l.append(x) - m.append(new_l) - rf = fic.readline() - a = IUPAC.unambiguous_dna - dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]} - pssm = matrix.PositionSpecificScoringMatrix(a,dic) - scores = [] - positions = [] - a = IUPAC.unambiguous_dna - seq.alphabet = a - for pos, score in pssm.search(seq, both=False,threshold=-50): - scores.append(score/maxi) - positions.append(pos) - return scores,positions - -#returns the frequencia of A and T bases in a sequence -def freq_base(seq): - A = seq.count('A') - T = seq.count('T') - AT = A+T - return AT - -#returns the free energy value of that sequence -def free_energy(seq): - dic1 = {'AA':-1.00, - 'TT':-1.00, - 'AT':-0.88, - 'TA':-0.58, - 'CA':-1.45, - 'AC':-1.44, - 'GG':-1.84, - 'CC':-1.84, - 'GA':-1.30, - 'AG':-1.28, - 'TC':-1.30, - 'CT':-1.28, - 'TG':-1.45, - 'GT':-1.44, - 'GC':-2.24, - 'CG':-2.17} - total = 0 - i = 0 - j = 1 - while i < len(seq)-1: - dint = seq[i]+seq[j] - total += dic1[dint] - i += 1 - j += 1 - return total \ No newline at end of file |
b |
diff -r b575af79e250 -r 5acc4fa8b62d model1600.sav |
b |
Binary file model1600.sav has changed |
b |
diff -r b575af79e250 -r 5acc4fa8b62d model2400.sav |
b |
Binary file model2400.sav has changed |
b |
diff -r b575af79e250 -r 5acc4fa8b62d phagepromoter.py --- a/phagepromoter.py Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,570 +0,0 @@\n-# -*- coding: utf-8 -*-\n-"""\n-Created on Thu Jul 19 13:45:05 2018\n-\n-@author: Marta\n-"""\n-\n-from Bio import SeqIO\n-import numpy as np\n-import pandas as pd\n-from auxiliar import free_energy,freq_base\n-from Bio.Seq import Seq\n-from Bio.SeqRecord import SeqRecord\n-from Bio.Alphabet import IUPAC\n-from auxiliar import get_bacteria, get_families, get_max_pssm, get_scores, get_lifecycle\n-\n-#division of the test genome in sequences of 65 bp\n-def get_testseqs65(form,fic,both=False):\n- ALL = []\n- indexes = []\n- a = 0\n- rec = SeqIO.read(fic,form)\n- genome = rec.seq\n- i = 0\n- j = 65\n- while j < len(genome):\n- s = genome[i:j]\n- ALL.append([1,i,j,s])\n- i += 20\n- j += 20\n- a += 1\n- indexes.append(rec.name+":"+str(a))\n- if both:\n- comp = genome.reverse_complement()\n- size = len(rec.seq)\n- i = 0\n- j = 65\n- while j < len(comp):\n- s = comp[i:j]\n- ALL.append([-1,size-j,size-i,s])\n- i += 20\n- j += 20\n- a += 1\n- indexes.append(rec.name+":"+str(a))\n- df = pd.DataFrame(ALL, index=indexes, columns=[\'strand\',\'iniprom\',\'endprom\',\'seq\'])\n- return df\n-\n-#calculate the scores of all sequences (similar to get_posScores and get_negScores)\n-def get_testScores(loc,test):\n- scores = []\n- posis = []\n- sizes = []\n- dic = {}\n- for ind,row in test.iterrows():\n- _,window = ind.split(\':\')\n- strand = row[\'strand\']\n- ini = row[\'iniprom\']\n- end = row[\'endprom\']\n- seq = row[\'seq\']\n- pos = [ini,end,strand]\n- dic[window] = pos\n- s = seq\n- score10_6,pos10_6 = get_scores(os.path.join(loc,\'pssm10_6.txt\'), s)\n- maxi10_6 = get_max_pssm(os.path.join(loc,\'pssm10_6.txt\'))\n- score10_8,pos10_8 = get_scores(os.path.join(loc,\'pssm10_8.txt\'), s)\n- maxi10_8 = get_max_pssm(os.path.join(loc,\'pssm10_8.txt\'))\n- scores23,pos23 = get_scores(os.path.join(loc,\'pssm_23.txt\'), s)\n- maxi23 = get_max_pssm(os.path.join(loc,\'pssm_23.txt\'))\n- scores21,pos21 = get_scores(os.path.join(loc,\'pssm_21.txt\'), s)\n- maxi21 = get_max_pssm(os.path.join(loc,\'pssm_21.txt\'))\n- scores27,pos27 = get_scores(os.path.join(loc,\'pssm_27.txt\'), s)\n- maxi27 = get_max_pssm(os.path.join(loc,\'pssm_27.txt\'))\n- scores32,pos32 = get_scores(os.path.join(loc,\'pssm_32.txt\'), s)\n- maxi32 = get_max_pssm(os.path.join(loc,\'pssm_32.txt\'))\n- score23 = max(scores23)\n- score21 = max(scores21)\n- score27 = max(scores27)\n- score32 = max(scores32)\n- maxiphage = max(score23,score21,score27,score32)\n- if maxiphage == score23: phage_max = score23*maxi23\n- elif maxiphage == score21: phage_max = score21*maxi21\n- elif maxiphage == score27: phage_max = score27*maxi27\n- elif maxiphage == score32: phage_max = score32*maxi32\n- score35_6,pos35_6 = get_scores(os.path.join(loc,\'pssm35_6.txt\'), s)\n- maxi35_6 = get_max_pssm(os.path.join(loc,\'pssm35_6.txt\'))\n- score35_9,pos35_9 = get_scores(os.path.join(loc,\'pssm35_9.txt\'), s)\n- maxi35_9 = get_max_pssm(os.path.join(loc,\'pssm35_9.txt\'))\n- score35_t4,pos35_t4 = get_scores(os.path.join(loc,\'pssm35_t4.txt\'), s)\n- maxi35_t4 = get_max_pssm(os.path.join(loc,\'pssm35_t4.txt\'))\n- score35_cbb,pos35_cbb = get_scores(os.path.join(loc,\'pssm35_cbb.txt\'), s)\n- maxi35_cbb = get_max_pssm(os.path.join(loc,\'pssm35_cbb.txt\'))\n- score35_lb,pos35_lb = get_scores(os.path.join(loc,\'pssm35_lb.txt\'),s)\n- maxi35_lb = get_max_pssm(os.path.join(loc,\'pssm35_lb.txt\'))\n- score35_mu, pos35_mu = get_scores(os.path.join(loc,\'pssm35_mu.txt\'),s)\n- maxi35_mu = get_max_pssm(os.path.join(loc,\'pssm35_mu.txt\'))\n- \n- dists6 = []\n- score6 = []\n- for p in pos10_6:\n- for a in range(14,22):\n- d = p-a-6\n- '..b' inds = new_df.groups[g]\n- if len(inds) == 1: new_inds.append(inds[0])\n- else:\n- maxi = max(new_df.get_group(g)[\'Scores\'])\n- i = new_df.groups[g][new_df.get_group(g)[\'Scores\']==maxi][0]\n- new_inds.append(i)\n- \n- output = test.loc[new_inds,:]\n- strands = []\n- new_pos = []\n- old_pos = output[\'Positions\'].tolist()\n- \n- from Bio.SeqFeature import SeqFeature, FeatureLocation\n- feats = rec.features\n- for ind, row in output.iterrows():\n- pos = row[\'Positions\']\n- if \'complement\' in pos: \n- strands.append(\'-\')\n- new_pos.append(pos[10:])\n- ini,end= pos[11:-1].split(\'..\')\n- new_loc = FeatureLocation(int(ini),int(end),strand=-1)\n- else: \n- strands.append(\'+\')\n- new_pos.append(pos)\n- ini,end= pos[1:-1].split(\'..\')\n- new_loc = FeatureLocation(int(ini),int(end),strand=1)\n- feat = SeqFeature(new_loc, type=\'regulatory\',qualifiers={\'regulatory_class\':[\'promoter\'], \'note=\':[\'predicted by PhagePromoter\']})\n- feats.append(feat) \n- \n- output.insert(loc=0, column=\'Strand\', value=strands)\n- output[\'Positions\'] = new_pos\n- output.to_html(\'output.html\',index=False, justify=\'center\')\n- recs = []\n- i = 0\n- for ind,row in output.iterrows():\n- s = Seq(row[\'Promoter Sequence\'])\n- posis = old_pos[i]\n- typ = row[\'Type\']\n- score = row[\'Scores\']\n- sq = SeqRecord(seq=s, id=ind, description=typ+\' \'+posis+\' score=\'+str(score))\n- recs.append(sq)\n- i += 1\n- SeqIO.write(recs, \'output.fasta\',\'fasta\')\n- new_rec = rec\n- new_rec.seq.alphabet = IUPAC.IUPACAmbiguousDNA()\n- new_feats = sorted(feats, key=lambda x: x.location.start)\n- new_rec.features = new_feats\n- SeqIO.write(new_rec,\'output.gb\',\'genbank\')\n-\n-if __name__== "__main__":\n- \n- import sys\n- import os\n- __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))\n- \n- gen_format = sys.argv[1]\n- genome_file = sys.argv[2]\n- both = sys.argv[3]\n- threshold = sys.argv[4]\n- family = sys.argv[5]\n- host = sys.argv[6]\n- phage_type = sys.argv[7]\n- model = sys.argv[8]\n- \'\'\'\n- gen_format = \'genbank\'\n- genome_file = \'test-data/NC_015264.gb\'\n- both = False\n- threshold = \'0.50\'\n- family = \'Podoviridae\'\n- host = \'Pseudomonas\'\n- phage_type = \'virulent\'\n- model = \'SVM2400\'\n- #model = \'ANN1600\'\n- \'\'\'\n- \n- rec = SeqIO.read(genome_file, gen_format)\n- test_windows = get_testseqs65(gen_format, genome_file,both)\n- try: score_test,dic_window = get_testScores(__location__,test_windows)\n- except IndexError: print(\'Error. Input sequence can only have A,C,G or T\')\n- else:\n- df_test,df_testinfo = create_dftest(score_test,dic_window,family,host,phage_type)\n- if model == \'ANN1600\':\n- scaler_file = os.path.join(__location__, \'scaler1600.sav\')\n- model_file = os.path.join(__location__, \'model1600.sav\')\n- preds = get_predictions(scaler_file, model_file, df_test,df_testinfo,threshold)\n- if preds is None: print(\'There is no sequence with a score value higher or equal to the threshold \'+str(threshold))\n- elif type(preds) == str: print(preds)\n- else: output = get_finaldf(preds,rec)\n- else:\n- scaler_file = os.path.join(__location__, \'scaler2400.sav\')\n- model_file = os.path.join(__location__, \'model2400.sav\')\n- new_df_test = df_test.iloc[:,[0,1,2,3,4,5,6,7,8,9,13,14,16,17,19,20,22,24,25]]\n- preds = get_predictions(scaler_file, model_file, new_df_test,df_testinfo,threshold)\n- if preds is None: print(\'There is no sequence with a score value higher or equal to the threshold \'+str(threshold))\n- elif type(preds) == str: print(preds)\n- else: output = get_finaldf(preds,rec)\n- \n' |
b |
diff -r b575af79e250 -r 5acc4fa8b62d phagepromoter.xml --- a/phagepromoter.xml Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,120 +0,0 @@ -<tool id="get_proms" name="PhagePromoter" version="0.1.0"> - <description> -Get promoters of phage genomes - </description> - <requirements> - <requirement type="package">biopython</requirement> - <requirement type="package">scikit-learn</requirement> - <requirement type="package">numpy</requirement> - <requirement type="package">pandas</requirement> - </requirements> - <command detect_errors="exit_code" interpreter="python3"><![CDATA[ - phagepromoter.py "$input_type.genome_format" "$genome" "$both" "$threshold" "$family" "$bacteria" "$lifecycle" - "$adv.model" ]]> - </command> - <inputs> - <conditional name="input_type"> - <param type="select" name="genome_format" label='file format'> - <option value="genbank" selected="yes">genbank</option> - <option value="fasta">fasta</option> - </param> - <when value="genbank"> - <param type="data" name="genome" format="genbank" label='genome'/> - </when> - <when value="fasta"> - <param type="data" name="genome" format="fasta" label='genome'/> - </when> - </conditional> - <param type="boolean" name="both" label='Search both strands' checked="false" truevalue="-both" falsevalue="" /> - <param name="threshold" type="float" value="0.50" label="Threshold" help="Probabilty of being a promoter (float between 0 and 1)" /> - <param type="select" name="family" label='Phage family'> - <option value="Podoviridae" selected="yes">Podoviridae</option> - <option value="Siphoviridae">Siphoviridae</option> - <option value="Myoviridae">Myoviridae</option> - </param> - <param type="select" name="bacteria" label='Host bacteria Genus'> - <option value="Escherichia coli" selected="yes">Escherichia coli</option> - <option value="Salmonella">Salmonella</option> - <option value="Pseudomonas">Pseudomonas</option> - <option value="Yersinia">Yersinia</option> - <option value="Morganella">Morganella</option> - <option value="Cronobacter">Cronobacter</option> - <option value="Staphylococcus">Staphylococcus</option> - <option value="Streptococcus">Streptococcus</option> - <option value="Lactococcus">Lactococcus</option> - <option value="Streptomyces">Streptomyces</option> - <option value="Klebsiella">Klebsiella</option> - <option value="Bacillus">Bacillus</option> - <option value="Pectobacterium">Pectobacterium</option> - <option value="other">other</option> - </param> - <param type="select" name="lifecycle" label='Phage type'> - <option value="virulent" selected="yes">virulent</option> - <option value="temperate">temperate</option> - </param> - <section name = 'adv' title= 'Advanced Options' expanded = 'False'> - <param type = "select" name="model" label="Model"> - <option value="SVM2400" selected="yes">SVM2400</option> - <option value="ANN1600">ANN1600</option> - </param> - </section> - </inputs> - <outputs> - <data name="output1" format="html" from_work_dir="output.html" /> - <data name="output2" format="fasta" from_work_dir="output.fasta" /> - <data name="output3" format="genbank" from_work_dir="output.gb" /> - </outputs> - <tests> - <test> - <param name="genome_format" value="genbank"/> - <param name="genome" value="NC_015264.gb"/> - <param name="both" value="False"/> - <param name="threshold" value="0.50"/> - <param name="family" value="Podoviridae"/> - <param name="bacteria" value="Pseudomonas"/> - <param name="lifecycle" value="virulent"/> - <param name="model" value="SVM2400"/> - <output name="output1" file="output.html"/> - <output name="output2" file="output.fasta"/> - <output name="output3" file="output.gb"/> - </test> - </tests> - <help><![CDATA[ - -=============== -PhagePromoter -=============== - -Get promoters of phage genomes - -PhagePromoter is a python script that predicts promoter sequences in phage genomes, using machine learning models. Two different datasets were used to developed two models: the ANN model was built using a dataset with 26 features and 2400 examples (800 positives and 1600 negatives) and the SVM model was created using a dataset with 19 features and 3200 examples (800 positives and 2400 negatives). -Each example represents a sequence of 65 base pairs of a phage genome. The positive examples correspond to phage sequences already identified as promoters. - -**Inputs:** - -* genome format: fasta vs genbank (default); -* genome file: acepts both GenBank and FASTA formats; -* both strands: yes or no (default). Allows the search only in the direct strand or in both DNA strands; -* threshold: represents the probability of the test sequence being a promoter (a float between 0 and 1, default=0.50). For example, if threshold=0.90, the model will only return predicted sequences with more than 90% probability of being a promoter. The larger the genome, the higher the threshold should be. -* Family: The family of the testing phage - Podoviridae (default), Siphoviridae or Myoviridae; -* Host: The host of the phage. The training dataset include the following hosts: Bacillus, Escherichia coli (default), Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other'. -* Phage type: The type of the phage, according to its lifecycle: virulent or temperate; - -**Advanced options:** - -* Model: the user can choose which model to run: the SVM model (default) or the ANN model. The SVM model uses more negative data, so it will return less promoters but with a higher probability of being real promoters. However, it can fail to detect some of the real promoters. On the other hand, the ANN model will predict more promoters, so it can identify more real promoters, but it is expected to predict more false negatives. - -**Outputs:** - -This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters. -In addition, the tool will output a GenBank file with the predicted promoters as features. - -**Requirements:** - -* Biopython -* Sklearn -* Numpy -* Pandas - - ]]> </help> -</tool> |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm10_6.txt --- a/pssm10_6.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --3.24 1.93 -0.34 1.38 1.43 -3.05 --2.14 -4.24 -1.03 -1.44 -1.19 -4.05 --2.29 -4.46 -1.12 -1.44 -1.53 -3.59 -1.79 -3.59 1.17 -0.61 -0.96 1.9 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm10_8.txt --- a/pssm10_8.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -0.09 1.72 -4.49 1.92 1.95 1.95 -4.49 1.92 --1.68 -1.32 -4.49 -4.49 -4.49 -4.49 -3.49 -4.49 --0.79 -2.49 -4.49 -2.91 -4.49 -4.49 -2.49 -4.49 -1.03 -2.91 1.95 -4.49 -4.49 -4.49 1.88 -2.91 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm35_6.txt --- a/pssm35_6.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --2.84 -2.12 -2.4 1.49 -0.74 1.16 --2.95 -4.65 -2.56 -0.26 1.13 -1.56 --4.33 -2.48 1.73 -3.33 -1.69 -1.65 -1.88 1.83 -1.65 -1.95 -0.14 0.15 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm35_9.txt --- a/pssm35_9.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -0.93 0.79 -0.65 -1.87 -0.14 -1.46 -1.14 -0.14 1.13 --1.46 -1.87 -1.46 -2.46 1.35 -3.46 -2.46 -0.14 -0.65 --0.87 -1.87 -1.87 1.79 -1.87 -1.87 -3.46 -1.46 -1.87 -0.24 0.79 1.45 -3.46 -1.87 1.71 1.71 0.86 -0.14 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm35_cbb.txt --- a/pssm35_cbb.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --2.46 -2.46 1.45 1.79 1.79 -2.46 0.13 --2.46 -2.46 -2.46 -2.46 -2.46 1.54 -2.46 --2.46 1.79 -0.14 -2.46 -2.46 -2.46 1.24 -1.79 -2.46 -2.46 -2.46 -2.46 -0.46 -1.46 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm35_lb.txt --- a/pssm35_lb.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --0.81 -0.81 -0.81 -0.81 -0.81 0.19 -0.81 -0.81 0.78 0.19 -0.81 -0.81 -0.81 -0.81 --0.81 -0.81 -0.81 1.19 -0.81 -0.81 -0.81 0.19 -0.81 0.19 -0.81 -0.81 -0.81 1.19 --0.81 -0.81 1.19 -0.81 1.19 -0.81 0.78 -0.81 0.19 -0.81 -0.81 -0.81 1.19 -0.81 -1.19 1.19 -0.81 -0.81 -0.81 0.78 0.19 0.78 -0.81 0.19 1.19 1.19 -0.81 -0.81 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm35_t4.txt --- a/pssm35_t4.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --2.43 -3.43 -2.43 -2.43 1.86 -2.43 1.33 --3.43 -3.43 -3.43 -3.43 -3.43 1.86 -3.43 -1.82 -3.43 -2.43 -3.43 -3.43 -3.43 -3.43 --2.43 1.9 1.82 1.86 -2.43 -3.43 0.38 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm_21.txt --- a/pssm_21.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.7 -2.0 -1.0 -1.0 -2.0 -2.0 -2.0 -2.0 0.81 0.58 -1.17 1.46 -2.0 -2.0 -1.0 -2.0 -1.0 -2.0 1.7 1.7 -0.42 -2.0 1.7 -2.0 0.81 1.7 1.7 -2.0 -2.0 0.81 0.81 --0.42 -2.0 1.46 1.7 1.58 -2.0 -2.0 1.7 -2.0 -2.0 1.46 -2.0 -2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -1.0 --0.42 -1.0 -2.0 -2.0 -2.0 1.7 1.58 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.46 0.0 -2.0 -2.0 1.7 1.7 -2.0 -2.0 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm_23.txt --- a/pssm_23.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -0.07 1.71 1.18 -0.18 1.65 0.6 -1.86 0.82 -2.86 -0.79 -5.18 1.88 -1.86 -4.18 1.69 0.97 1.26 -0.05 -0.79 0.07 1.11 0.28 1.25 --0.32 -2.86 -2.86 -4.18 -2.86 0.49 0.6 0.52 1.68 -2.37 1.88 -1.86 1.84 -1.09 -2.86 -3.18 -1.18 -5.18 -0.54 -1.86 -1.86 -1.48 -1.09 --3.59 -2.86 -3.59 -4.18 -3.18 -0.32 0.95 -0.86 -1.37 -1.59 -1.86 -5.18 -3.18 -5.18 -2.59 -3.59 -1.18 1.56 1.41 1.3 0.6 1.16 -0.18 -1.05 -1.18 0.6 1.59 -0.72 -1.86 -1.86 -2.01 -1.86 1.53 -5.18 -5.18 -5.18 1.79 -1.09 0.89 -0.48 -4.18 -3.59 -2.18 -4.18 -2.37 -1.86 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm_27.txt --- a/pssm_27.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ --3.09 -2.09 1.67 -1.5 -1.09 -0.28 -3.09 -3.09 -2.09 -3.09 -3.09 -3.09 1.87 -3.09 -3.09 -3.09 -3.09 -3.09 -1.5 -3.09 -3.09 -2.09 -3.09 1.72 1.56 1.5 -1.5 -1.82 -3.09 -3.09 1.67 1.56 1.3 -1.09 -0.5 1.16 1.67 1.67 -1.09 -3.09 1.87 -3.09 1.87 1.67 -0.77 -3.09 0.82 1.16 -0.28 0.91 -2.09 -2.09 -0.5 0.08 --3.09 1.82 -2.09 -2.09 -3.09 -0.77 0.08 1.16 0.5 -3.09 -0.77 -2.09 -3.09 -3.09 -2.09 -3.09 -1.09 1.67 1.77 1.0 0.61 -3.09 -3.09 -1.5 -3.09 -3.09 -0.5 --2.09 -3.09 -1.09 -2.09 -1.09 -3.09 1.23 -0.09 -3.09 -0.77 -3.09 1.67 -3.09 -3.09 1.82 -3.09 -2.09 -3.09 -3.09 -3.09 -3.09 1.5 0.91 -3.09 -0.5 -1.5 0.91 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d pssm_32.txt --- a/pssm_32.txt Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 0.78 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 1.65 --1.81 -1.81 1.65 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.65 1.65 -1.81 -1.81 1.19 -1.81 -1.81 -1.81 -1.81 0.78 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 --1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 0.78 -1.81 -1.81 -1.81 -0.81 -1.81 -1.81 1.51 1.65 1.65 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 --1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.51 -1.81 0.19 -0.81 -1.81 -1.81 -1.81 0.78 -1.81 -1.81 1.65 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81 |
b |
diff -r b575af79e250 -r 5acc4fa8b62d scaler1600.sav |
b |
Binary file scaler1600.sav has changed |
b |
diff -r b575af79e250 -r 5acc4fa8b62d scaler2400.sav |
b |
Binary file scaler2400.sav has changed |
b |
diff -r b575af79e250 -r 5acc4fa8b62d test-data/NC_015264.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NC_015264.fasta Sat Apr 20 11:07:28 2019 -0400 |
b |
b'@@ -0,0 +1,588 @@\n+>NC_015264.1 Pseudomonas phage phiIBB-PF7A, complete genome\n+AAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTGACCACGGGATAGCGGAAT\n+GGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTGACTGTCCGGGGTCTCTTC\n+AAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTGTCGATCACTTCGCTTGCC\n+GCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGCAAAGCGCACCACAGCTCA\n+GACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACCCTGCTGTTGCTGAAGCGT\n+GCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGAAGCGTCATAAAGACTTCT\n+GGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGGCCACGCTGTAACCGCTGT\n+GGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATCGAACTGATGGCATCGCAA\n+GGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCCTAATAAGTACAAGGGTGA\n+CGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAGGGTCGCATGGTCCCGAAC\n+TTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCATTCAAGGGGAACGCTATGC\n+TGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGCGGCGCTGCTGTTCGGGAT\n+GACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTGGATGCGTCCAGTAACCTC\n+AACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCGTTAAATCCACTCACTGCT\n+AATGCAGGTATCACCATGATCGCTCTAAACTACACCAGCTTCACCAGCCGCGAAGTCGCCGCTAAGATCC\n+TTGCAGCAATGCAGGAAGTCCGGGCCACAGGTAACGCTGTGCGTGTCCTCAATCGTCGCGGTAAGGCTTT\n+CCTGCTGGTCACAATCCACAAGGACGCCTTAGGGTACGCCTTCAAGTTCATCGCTGAGGATGGCACCGAG\n+GTCGGCCAGATGATCCAACGTGCGTCTAACGACTGGGATAACCCGACGTTTACCGCTTACTGGTCTATCC\n+TGAGCTGGGCATGGGACCTGAAGGAACACCCACTGCTGAGCCTGTCTAAGCTGGCCGCACAGGCCGAAGC\n+GATGAAAGGGCAAGGGGCAACCCACAAAGTGACCTGCTACGGCGGTACGGTGCAGTACGGGGCCTATCAG\n+CGTGACTGGCTGTGTCGTCGCCGGTTGTACCTGTTCGGTAAGGACGGAGTATGGCGCAAGGTTGACGATG\n+ACCAAGCCGCTCAAATCTGCTGGATCGAGGTGCTGAAATGATCTTGAGTGACCCAGCGGTAGCCATTTGC\n+TCAGGCGAGAAGTATGCACCGGGCCTGTACTGGGAGGGTCGCCAGATCCTTGACCAGATTGACCAGTACA\n+TGGACGCCTACGGGCCGATCCAACCGGGCGTTGACTATGCGCCAGATAGCGAATGGGCCAAGTTCTTCGC\n+CGATAACGTCATCATGTATGCACCGGGCGTGTATGCCATCCGCTACCTGAACCCGGACTACTGCAAACGT\n+CTGCTGGTCGAGCTGTCCGATGTCCAGTACACGGTCAATGAAGAGGAACCCGAAGACGCTCAGATCCCTG\n+AGGTCGTCCTTGAGGAACACCACTATGGCCTCTTCCAGTGCCTGAGAGGGCTTTTCGAGGGGTATGCAAG\n+GAAACTGGCGTACATCCTCATGGGTCTCGAAACGGGGCCTTGTGTGTCCATACAGGCCGCACGGTACACC\n+CCAATGAACACGCCCCACGGCTGCTGGCACACTGATCGGGACTCTGAGGTGACCCTTGTGGTCGCCCTGA\n+GCGATGACCATGAAGGCGGTGGCACTCAGGTATATCAAGGCCCGTTGCTGCCTCCTGTGACGGTCCCACA\n+GCTTCCCACAGGCTGGGCCATGCTCTTCGCTGGTCGCACAAATGAACACATGGGTCTGCCGGTCACCAAA\n+GGCACCCGTAATTTGCTCGTACACTGGTACGGATTGGAGAAATAAAATGTCCCGCTTCGCAAACGTAGGT\n+TCCCAAGCATGTAACGCACTGGCTGTCAACATGGTTCACGCCATGGATGCAGACTTCTCGTCTCTTGAGC\n+GCCGTGCAATGGGTCACACACTGGCCGAGATCACTGGCCGCAAGATCCGCAAGCCGGGTCTGTATGACAA\n+GCATGTCTCGGACGCCAAACAGGGTTCCATTGCAGCGACCTACGTGGCCCACTCTGAAGGCAAGGCCGCA\n+GTAATGACCATGGCCTACGGGATGCGTCCTCAGACCGACCTACAACACGCTCTGGACGCCCGGTATCGTC\n+AACCCGGCTTCGCTGGTGCGCAGTTCTTCACCGAGCGCGGCGACTTCACTCATCTGGCGGGGCGGGGCGT\n+ATGAACCTGCACAGCATCAAGTTCGCCGTAGAGGTCGAGGCACGGCTACTGAATCCAGACCACAGCCATG\n+CGGCTGACGTGAAGGGTCATAAAGTCCTCGACTGGAAACTCGGCTTGGGTCCTTGTGGCCGCTGGGTTGA\n+CCAGTTGACCGCTGAGATCGACATGGCAACGCTGAGAATCCGCCAAACGTCCTACAGCTCAGACCCCACA\n+AAGGAACTCGAAAGGGTCCAGCGTGGGCTGTACAAGGACGATGGGGCGCTTGAGCCGTTCGGTCCAAACA\n+AGCGGGATCGTATGGCCGCACTTGAGGCCCGCCAGCGACTCATCAAGGATCGCAAGATTGAGACGTTTAT\n+CTACAAGTTAGAAGACGTTCGCGGCAGGATCAAGGCAGTCGAGCGCTGAGGACGTTAAAAACCCTCACCA\n+GAACAGGGACCACTTAAGGGGCCTTTAACTTAAAGACCCTTTAAGATCCTTTAAGATTACTCTTATAGTA\n+ATTATCATTAAGTAAAATCATTAAGTAAGAGGGTGTACCGCAGTGGCATTCATTGAGAAACAGAAACACA\n+ACTTCGACGACGTGAAGACCTCATGGGCCTTTGATACCCTGAGCAAGCTTTACGGTGAAGACCTCGCCGC\n+TGCCCAACTGGCTCTCGAACATGAGTCGCACATGATGGGTGAAGAGCGGTTCCATAAGGCGCTTGATCGC\n+CAGATTGAACGCTCGGAATTCGCTGAAACATCCGTCGCAAAACCACTGGTCGCCATGCTGGTGCCGATGT\n+TCGTCAAGGAGTTCGCCGAATGGACTGAACACCAGATGACCAAGGTGCGCCGCAAGTCGGTGGCCCTGAA\n+GTACCTCAACATGGTTCGCCCTGAACGGGTCGCTGCGTTGACGATCAAGCTGGTCATTACGGAAATGGCT\n+CGCCAGCGTCAAGACCTCGTGTCGGTGACCACTCGAATCGGTCGGACCATCGAAGAGGAAGCCCGCTTTG\n+GTCGCATCCGTGACGAAGAAGCGAAGCACTTTCAGAAGCACATCAAGGACGCTCTCAACAAGCGCAACGG\n+CCACACCTACAAGAAAGCCTTCATGGAAGCTGTCGAGTCCAAGATGCTGGAAGCTGGTGAGCTGAACGGT\n+GCGTGGGCTGACTGGGCAAACTCTGACTCGGACGTAATGCACCACATCGGCGCTCGCTGCCTTGAAATCC\n+TGATTAAGTCCACAGCACTGGTCGAGATCGT'..b'ATGGAGTGGGG\n+CGGCTTCCGTGGTGGCTACGAGGACACAACCCTCGAAGCTCTGGCGAAGGTCGGCAAGAAGCACAAGGTA\n+AGCGAAGTGGTTATTGAAGGTAACTTCGGTGACGGTATGTACACCAAGCTGTTCAGCCCTGTGATGACCC\n+GTATCCACCGTTGCGCTATCACTGAGGTGAAGTCCAAGGGTCAGAAAGAAATGCGTATCTGCGACGTGCT\n+GGAGCCTGTACTGGGTTCTCACAAGCTGATCGTGCATGAGTCTGTCATTGAACAGGACTACCGTACAGCC\n+CTCAACGCTGACGGGACGACTGACGTTGTGTACTCCGGGTTCCACCAACTGACCCGACTGACCAAGGAGC\n+GTGGCTCTCTGGGCCATGATGACCGTCTGGATGCTCTCGCCATCGGCGTGCAGTTCTTCACGGACTCCAT\n+GGAGAAGGACAGTGAGCGTGGCGCTGAGGAAATGCTGGAAGGCTTCCTTGAGGATCACCTTGAGAATCAC\n+ATGGTCGGCTTCGAGCAGGCCCGTGAGATCAGCCTCGGGAATGGCGTAAACATCCAGTGGGACGACGATG\n+ACGGCACTGAAGGGAACTTCATGGGCTGGTAACGAAGTCTGCACGATAGCGACACGTTCGACGAAGATTA\n+AAAACCCTCACCTAAAGAGGGACGGTGGGGGTCCTATATGATAAACACAGAAGATACTTAAAGGTGACCT\n+CAGCACGTTCTGAGAACTTTGAGAATCATGTGCTATCTGAGGACCCTATGCCGTTCTACTCATTGATGGT\n+GATTATTGCTGATAAGCATCACCTCCACATAGGAGCATACAGATGACCAAGAAAGCTACCGCAACCTTTG\n+TGGCAGTGCTGGTCAGTCTGGCGAAACACCGTGCGACCTATCGCTTCCTCGCTGTTCTTCTCGTTGCCCT\n+TGGTATCTCCAATGGCGAAGCGATTATGTCTGGGATCGAGACCGTAGCTTGTGCGTACCTTGGCTGCATC\n+GGCTGACGCCCAACGAGGCTCATTACGGGTCTGCTAAAGCATACCCTCCATGAGCATTGACTTACAGTAC\n+ATTCTTCAAGGGACCTTTCAATGGGTCATCCAGTACCCTTTAGGGTGCAGCACATCAAGGCCCTTCAAGG\n+GTCCCTTCAAGGGTCCCTTCAAGAATTTACCATAAATTTTCCTTACAGGACCTCTCATAGACTAACCCAC\n+CGAGTACCCCCGTGGCCCCTCTCGTCAACCCTAACGGCCTCTGGGGCACCCCTAAGGGGATCATTACGGT\n+GCTGGGCACCTACAGACAATCATCAAGGGATACCGGTAGGCGGTCCCTCTATGGGCTACCTCATGTCCTA\n+CCTTGGGGCACATCATGAACCCTTTAAGGGGAGCTGTCAACCCTCAAGGGATCATTAAGGGGACATCAAG\n+GGACTTAAGGGGAGACCTACAGACCATTACGTCTCCTTGTCCTAATCTGTAAGGCCCATCAAGGAACCAT\n+TGAGAGACCATCAAGGCCACCGTCAAGGCACACACCCTAAGGATAGACCATAGGAGACCCATCAAGGCTG\n+GACATCAAGGGATTGACAAAGCGATCCCTGTGATCTATTACGTGCGCTACGCGGTCATTAAGGGGACTTT\n+AAAACCCTCACCTAAACAGGGATGAACAATTGTTGAAATAAGTGGTTGACACCCTCAAAGGAGTCTGTAG\n+AATGGCCACCACAACGAAGCGACACGCCAAGGCGGATCACTGAGTTGCCTTGAAGCTTAGTAGTTACGGT\n+CTGTGCGATCCTAAGGGTGCCAAACGGGCTTAACGTAACGAACACGGATAGGGTGAAACAAAACGCTTGA\n+CACTGAGTAACAACGCTGTAGAATACGCCACATCAGCTACACACCGCTCTTTAACAACTTGGATGCAACA\n+TGGTCTGCAAGGCTCCGAAAGGACACCTACAGGCATCGCTACGAAGGCAAGCACTGAGCCTTCTCATAGC\n+GCCTTTGAGAACCTGAACGGATGTTACGGGTAGGGTTAGCAAGGGCCTTATGGGAACCACTTAGGAGCCT\n+CACACAATGGCACGCTTAGACATGCACGTCACTGGTCTTGAATATGATCTTTCCCTTGAGGAAAACGCAC\n+AGGGCCAGCAAGGATTCACCGTGACATACGGTGCGCAAGTCAATCATTACGAACGGTTTGATGAAGCCTT\n+TATGGACTTCAATGAATCGCTTAGCCACGCTCTGGCCCTTCAGGGTCTGTAAATCCTCACTGATAAGGTA\n+CAATCGCATGAACACGTCACAGAACCGCTTCATTCTGGCCCACGCAGCAAGCCTGCTGGTCACCGCTTAC\n+AAAGAGTGCATCGCTGAGTATCAGACGGTGCTGCCGCTTAACCTGAGCATCGGCCATGATGCACCTGATA\n+GCTACGCTGCGCTCCGTTCGCAAGCGGCTCAGGGTCAACTGAAGGTAAGCACGGCGCACAACGCCTCGTC\n+GATCTACGGGGCTTCGGGTAACCTCACGTTCCGCATCTTTCACGACTACGGTCACCTGCTGTATGACGCT\n+GAGTTCACCACTGAGCAAGAGGTTAGCCTTGCGCTGACTCAATGGCGTGACCTGATCCGCTACATCCCTC\n+AGGAATGGCAAGGCATCTGCTATGTGGTCTACCGCGCCGACACCGTGGCACAAAGCGAGTATGAAGCGAT\n+CCATAAGGACTTCCCTGTGGATCAAAAGGCATTCGTTCTGGACATCCTGAACAAGCACTTCGAGGCTGAG\n+CCACGATGAGCATGAACACGAATGAAACACTGGACGCCGTGCTGATTACCCGTGAAGAACGCAAGGCGCT\n+GGCCGCTCTGCTGTACTCAGGGTTGACCGGTAACGCAGTCGATAAGCTGGGCCTTCGGGCCTTGCAGGAA\n+AAGCTTTCATCGGCCTTTAAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTG\n+ACCACGGGATAGCGGAATGGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTG\n+ACTGTCCGGGGTCTCTTCAAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTG\n+TCGATCACTTCGCTTGCCGCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGC\n+AAAGCGCACCACAGCTCAGACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACC\n+CTGCTGTTGCTGAAGCGTGCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGA\n+AGCGTCATAAAGACTTCTGGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGG\n+CCACGCTGTAACCGCTGTGGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATC\n+GAACTGATGGCATCGCAAGGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCC\n+TAATAAGTACAAGGGTGACGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAG\n+GGTCGCATGGTCCCGAACTTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCAT\n+TCAAGGGGAACGCTATGCTGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGC\n+GGCGCTGCTGTTCGGGATGACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTG\n+GATGCGTCCAGTAACCTCAACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCG\n+TTAAATCCACTCACTGCTAATGC\n+\n' |
b |
diff -r b575af79e250 -r 5acc4fa8b62d test-data/NC_015264.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NC_015264.gb Sat Apr 20 11:07:28 2019 -0400 |
b |
b'@@ -0,0 +1,1688 @@\n+LOCUS NC_015264 40973 bp DNA linear PHG 18-APR-2011\n+DEFINITION Pseudomonas phage phiIBB-PF7A, complete genome.\n+ACCESSION NC_015264\n+VERSION NC_015264.1\n+DBLINK BioProject: PRJNA64561\n+KEYWORDS RefSeq.\n+SOURCE Pseudomonas phage phiIBB-PF7A\n+ ORGANISM Pseudomonas phage phiIBB-PF7A\n+ Viruses; dsDNA viruses, no RNA stage; Caudovirales; Podoviridae;\n+ Autographivirinae; T7virus.\n+REFERENCE 1 (bases 1 to 40973)\n+ AUTHORS Sillankorva,S., Kluskens,L.D., Lingohr,E.J., Kropinski,A.M.,\n+ Neubauer,P. and Azeredo,J.\n+ TITLE Complete genome sequence of the lytic Pseudomonas fluorescens phage\n+ phiIBB-PF7A\n+ JOURNAL Virol. J. 8 (1), 142 (2011)\n+ PUBMED 21439081\n+ REMARK Publication Status: Online-Only\n+REFERENCE 2 (bases 1 to 40973)\n+ CONSRTM NCBI Genome Project\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-MAR-2011) National Center for Biotechnology\n+ Information, NIH, Bethesda, MD 20894, USA\n+REFERENCE 3 (bases 1 to 40973)\n+ AUTHORS Sillankorva,S., Kluskens,L.H., Lingohr,E.J., Kropinski,A.M.,\n+ Neubauer,P. and Azeredo,J.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (28-JAN-2010) Biological Engineering, University of\n+ Minho, Campus de Gualtar, Braga 4710-057, Portugal\n+COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final\n+ NCBI review. The reference sequence is identical to GU583987.\n+ COMPLETENESS: full length.\n+FEATURES Location/Qualifiers\n+ source 1..40973\n+ /organism="Pseudomonas phage phiIBB-PF7A"\n+ /mol_type="genomic DNA"\n+ /host="Pseudomonas fluorescens"\n+ /db_xref="taxon:942165"\n+ repeat_region 1..985\n+ /note="redundant terminal repeat"\n+ /rpt_type=direct\n+ gene 252..602\n+ /locus_tag="phiIBB-PF7Ap00"\n+ /db_xref="GeneID:10323835"\n+ CDS 252..602\n+ /locus_tag="phiIBB-PF7Ap00"\n+ /note="orf1"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306315.1"\n+ /db_xref="GeneID:10323835"\n+ /translation="MTNAKRTTAQTIKHKGRKYVVIHRSVTLLLLKRAGKPHHFTLEG\n+ GKEGTLKRHKDFWAALQHYSDRQDALGIKGHAVTAVVGTEDGPVVTEGELAAIELMAS\n+ QGATTGRVSAKEPN"\n+ gene 602..928\n+ /locus_tag="phiIBB-PF7Ap01"\n+ /db_xref="GeneID:10323784"\n+ CDS 602..928\n+ /locus_tag="phiIBB-PF7Ap01"\n+ /note="orf2"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306316.1"\n+ /db_xref="GeneID:10323784"\n+ /translation="MPKPNKYKGDGSKKPEGTVEGAYVMHKGRMVPNFQATDSAIERG\n+ INAYKAFKGNAMLKCVTRIMLYIMVAAMCAALLFGMTGCQVNVVNVIHSDIGLDASSN\n+ LNALTE"\n+ gene 996..1511\n+ /locus_tag="phiIBB-PF7Ap02"\n+ /db_xref="GeneID:10323785"\n+ CDS 996..1511\n+ /locus_tag="phiIBB-PF7Ap02"\n+ /note="orf3"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306317.1"\n+ /db_xref="GeneID:10323785"\n+ /translation="MIALNYTSFTSREVAAKILAAMQEVRATGNAVRVLNRRGKAFLL\n+ VTIHKDALGYAFKFIAEDGTEVGQMIQRASNDWDNPTFTAYWSILSWAWDLKEHPLLS\n+ LSKLAAQAEAMKGQ'..b' 37861 cattgatggt gattattgct gataagcatc acctccacat aggagcatac agatgaccaa\n+ 37921 gaaagctacc gcaacctttg tggcagtgct ggtcagtctg gcgaaacacc gtgcgaccta\n+ 37981 tcgcttcctc gctgttcttc tcgttgccct tggtatctcc aatggcgaag cgattatgtc\n+ 38041 tgggatcgag accgtagctt gtgcgtacct tggctgcatc ggctgacgcc caacgaggct\n+ 38101 cattacgggt ctgctaaagc ataccctcca tgagcattga cttacagtac attcttcaag\n+ 38161 ggacctttca atgggtcatc cagtaccctt tagggtgcag cacatcaagg cccttcaagg\n+ 38221 gtcccttcaa gggtcccttc aagaatttac cataaatttt ccttacagga cctctcatag\n+ 38281 actaacccac cgagtacccc cgtggcccct ctcgtcaacc ctaacggcct ctggggcacc\n+ 38341 cctaagggga tcattacggt gctgggcacc tacagacaat catcaaggga taccggtagg\n+ 38401 cggtccctct atgggctacc tcatgtccta ccttggggca catcatgaac cctttaaggg\n+ 38461 gagctgtcaa ccctcaaggg atcattaagg ggacatcaag ggacttaagg ggagacctac\n+ 38521 agaccattac gtctccttgt cctaatctgt aaggcccatc aaggaaccat tgagagacca\n+ 38581 tcaaggccac cgtcaaggca cacaccctaa ggatagacca taggagaccc atcaaggctg\n+ 38641 gacatcaagg gattgacaaa gcgatccctg tgatctatta cgtgcgctac gcggtcatta\n+ 38701 aggggacttt aaaaccctca cctaaacagg gatgaacaat tgttgaaata agtggttgac\n+ 38761 accctcaaag gagtctgtag aatggccacc acaacgaagc gacacgccaa ggcggatcac\n+ 38821 tgagttgcct tgaagcttag tagttacggt ctgtgcgatc ctaagggtgc caaacgggct\n+ 38881 taacgtaacg aacacggata gggtgaaaca aaacgcttga cactgagtaa caacgctgta\n+ 38941 gaatacgcca catcagctac acaccgctct ttaacaactt ggatgcaaca tggtctgcaa\n+ 39001 ggctccgaaa ggacacctac aggcatcgct acgaaggcaa gcactgagcc ttctcatagc\n+ 39061 gcctttgaga acctgaacgg atgttacggg tagggttagc aagggcctta tgggaaccac\n+ 39121 ttaggagcct cacacaatgg cacgcttaga catgcacgtc actggtcttg aatatgatct\n+ 39181 ttcccttgag gaaaacgcac agggccagca aggattcacc gtgacatacg gtgcgcaagt\n+ 39241 caatcattac gaacggtttg atgaagcctt tatggacttc aatgaatcgc ttagccacgc\n+ 39301 tctggccctt cagggtctgt aaatcctcac tgataaggta caatcgcatg aacacgtcac\n+ 39361 agaaccgctt cattctggcc cacgcagcaa gcctgctggt caccgcttac aaagagtgca\n+ 39421 tcgctgagta tcagacggtg ctgccgctta acctgagcat cggccatgat gcacctgata\n+ 39481 gctacgctgc gctccgttcg caagcggctc agggtcaact gaaggtaagc acggcgcaca\n+ 39541 acgcctcgtc gatctacggg gcttcgggta acctcacgtt ccgcatcttt cacgactacg\n+ 39601 gtcacctgct gtatgacgct gagttcacca ctgagcaaga ggttagcctt gcgctgactc\n+ 39661 aatggcgtga cctgatccgc tacatccctc aggaatggca aggcatctgc tatgtggtct\n+ 39721 accgcgccga caccgtggca caaagcgagt atgaagcgat ccataaggac ttccctgtgg\n+ 39781 atcaaaaggc attcgttctg gacatcctga acaagcactt cgaggctgag ccacgatgag\n+ 39841 catgaacacg aatgaaacac tggacgccgt gctgattacc cgtgaagaac gcaaggcgct\n+ 39901 ggccgctctg ctgtactcag ggttgaccgg taacgcagtc gataagctgg gccttcgggc\n+ 39961 cttgcaggaa aagctttcat cggcctttaa gggttactgg gacacgttca atccgctcga\n+ 40021 caagcaccca acaatggctg accacgggat agcggaatgg gtcagcccgg attcaacgca\n+ 40081 gaagacacgg aacagcatcc aatgaacgtg actgtccggg gtctcttcaa gctctgcaat\n+ 40141 agctgcaacc aacggggcca agtggctcaa gtctggcttg tcgatcactt cgcttgccgc\n+ 40201 tgcatcttcc taccttacga ccatcaataa ggagtcacca tgaccaacgc aaagcgcacc\n+ 40261 acagctcaga ccatcaagca caagggtcgc aagtacgtcg taattcaccg ctcggttacc\n+ 40321 ctgctgttgc tgaagcgtgc cggtaagcct caccacttca cccttgaagg cggcaaagag\n+ 40381 ggcaccctga agcgtcataa agacttctgg gcggcactgc aacactacag cgaccgtcag\n+ 40441 gacgcactgg gcatcaaggg ccacgctgta accgctgtgg ttggcaccga ggacggccct\n+ 40501 gttgtcactg aaggcgagct ggccgcaatc gaactgatgg catcgcaagg ggccacaacg\n+ 40561 ggccgtgtta gcgccaagga accgaactaa tgccaaagcc taataagtac aagggtgacg\n+ 40621 gctccaagaa gcctgaaggg accgtagagg gcgcctacgt catgcacaag ggtcgcatgg\n+ 40681 tcccgaactt tcaagccacc gactcggcta tcgagcgggg catcaacgcc tacaaggcat\n+ 40741 tcaaggggaa cgctatgctg aagtgtgtga ctcgcatcat gctgtacatc atggtcgctg\n+ 40801 caatgtgtgc ggcgctgctg ttcgggatga ctggctgtca agtcaacgtc gtgaacgtga\n+ 40861 ttcacagtga catcgggctg gatgcgtcca gtaacctcaa cgctttgacc gagtagcgtt\n+ 40921 aatcggtggc cttctcttga gggtcacccg ttaaatccac tcactgctaa tgc\n+//\n+\n' |
b |
diff -r b575af79e250 -r 5acc4fa8b62d test-data/output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Sat Apr 20 11:07:28 2019 -0400 |
b |
@@ -0,0 +1,60 @@ +>NC_015264:12 phage (238..260) score=0.606 +AATAAGGAGTCACCATGACCAAC +>NC_015264:150 phage (2996..3018) score=0.931 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:154 host (3062..3088) score=0.729 +TAAGATTACTCTTATAGTAATTATCAT +>NC_015264:216 host (4313..4342) score=0.653 +TGAAATGAACGTGGAGACGGCTCGTAAGTT +>NC_015264:242 phage (4835..4855) score=0.516 +ACTGGTCTCCGACGGTGTTAA +>NC_015264:244 host (4876..4907) score=0.519 +TTGATAACGGCACAGATGATTCCTCGGAGACT +>NC_015264:300 phage (6000..6022) score=0.981 +TAAAACCCCTCACCAAAACAGGG +>NC_015264:412 phage (8257..8279) score=0.985 +TAAAAACCCTCACCAAAACAGGG +>NC_015264:473 phage (9458..9480) score=0.966 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:532 phage (10632..10654) score=0.657 +CCAAACCGATCCCTAAAGGGGTC +>NC_015264:557 phage (11141..11163) score=0.948 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:586 phage (11710..11730) score=0.557 +CTGACTTGCCGATACCCTGAA +>NC_015264:698 phage (13956..13978) score=0.677 +CAAATACCCTCACCTAAACAGCT +>NC_015264:826 host (16512..16542) score=0.748 +TTGACATCGAAGAGGTCTTCGTTGAGACAGT +>NC_015264:890 phage (17793..17815) score=0.99 +TAAAACCCCTCACCTAAACAGGG +>NC_015264:892 phage (17842..17864) score=0.7 +ACTTAAAGATCACTCTAAGGGAG +>NC_015264:893 host (17869..17896) score=0.777 +TATGCTTAAAGAGATCCAGCACTATCTG +>NC_015264:979 host (19585..19615) score=0.529 +TTGAGAACCTGCACGAAGCCATGATTAAGTT +>NC_015264:1019 phage (20361..20383) score=0.993 +TAAAAACCCTCACCTAAACAGGG +>NC_015264:1068 phage (21354..21376) score=0.99 +TAAAAACCCTCACCTAAACAGGG +>NC_015264:1303 phage (26071..26093) score=0.953 +TAAAAACACTCACCACAACAGGG +>NC_015264:1616 phage (32321..32341) score=0.665 +GCCGCTCACCAAGTTCCTTAC +>NC_015264:1667 phage (33327..33349) score=0.991 +TAAAACCCCTCACCTAAACAGGG +>NC_015264:1886 phage (37729..37751) score=0.991 +TAAAAACCCTCACCTAAAGAGGG +>NC_015264:1932 host (38653..38681) score=0.875 +TTGACAAAGCGATCCCTGTGATCTATTAC +>NC_015264:1936 phage (38709..38731) score=0.938 +TTAAAACCCTCACCTAAACAGGG +>NC_015264:1937 host (38756..38783) score=1.0 +TTGACACCCTCAAAGGAGTCTGTAGAAT +>NC_015264:1945 host (38917..38944) score=0.997 +TTGACACTGAGTAACAACGCTGTAGAAT +>NC_015264:1958 host (39147..39178) score=0.697 +TAGACATGCACGTCACTGGTCTTGAATATGAT +>NC_015264:2011 phage (40226..40248) score=0.63 +AATAAGGAGTCACCATGACCAAC |
b |
diff -r b575af79e250 -r 5acc4fa8b62d test-data/output.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.gb Sat Apr 20 11:07:28 2019 -0400 |
b |
b'@@ -0,0 +1,1739 @@\n+LOCUS NC_015264 40973 bp DNA linear PHG 18-APR-2011\r\n+DEFINITION Pseudomonas phage phiIBB-PF7A, complete genome.\r\n+ACCESSION NC_015264\r\n+VERSION NC_015264.1\r\n+DBLINK BioProject: PRJNA64561\r\n+KEYWORDS RefSeq.\r\n+SOURCE Pseudomonas phage phiIBB-PF7A\r\n+ ORGANISM Pseudomonas phage phiIBB-PF7A\r\n+ Viruses; dsDNA viruses, no RNA stage; Caudovirales; Podoviridae;\r\n+ Autographivirinae; T7virus.\r\n+REFERENCE 1 (bases 1 to 40973)\r\n+ AUTHORS Sillankorva,S., Kluskens,L.D., Lingohr,E.J., Kropinski,A.M.,\r\n+ Neubauer,P. and Azeredo,J.\r\n+ TITLE Complete genome sequence of the lytic Pseudomonas fluorescens phage\r\n+ phiIBB-PF7A\r\n+ JOURNAL Virol. J. 8 (1), 142 (2011)\r\n+ PUBMED 21439081\r\n+ REMARK Publication Status: Online-Only\r\n+REFERENCE 2 (bases 1 to 40973)\r\n+ CONSRTM NCBI Genome Project\r\n+ TITLE Direct Submission\r\n+ JOURNAL Submitted (21-MAR-2011) National Center for Biotechnology\r\n+ Information, NIH, Bethesda, MD 20894, USA\r\n+REFERENCE 3 (bases 1 to 40973)\r\n+ AUTHORS Sillankorva,S., Kluskens,L.H., Lingohr,E.J., Kropinski,A.M.,\r\n+ Neubauer,P. and Azeredo,J.\r\n+ TITLE Direct Submission\r\n+ JOURNAL Submitted (28-JAN-2010) Biological Engineering, University of Minho,\r\n+ Campus de Gualtar, Braga 4710-057, Portugal\r\n+COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final\r\n+ NCBI review. The reference sequence is identical to GU583987.\r\n+ COMPLETENESS: full length.\r\n+FEATURES Location/Qualifiers\r\n+ source 1..40973\r\n+ /organism="Pseudomonas phage phiIBB-PF7A"\r\n+ /mol_type="genomic DNA"\r\n+ /host="Pseudomonas fluorescens"\r\n+ /db_xref="taxon:942165"\r\n+ repeat_region 1..985\r\n+ /note="redundant terminal repeat"\r\n+ /rpt_type=direct\r\n+ regulatory 239..260\r\n+ /regulatory_class="promoter"\r\n+ /note=="predicted by PhagePromoter"\r\n+ gene 252..602\r\n+ /locus_tag="phiIBB-PF7Ap00"\r\n+ /db_xref="GeneID:10323835"\r\n+ CDS 252..602\r\n+ /locus_tag="phiIBB-PF7Ap00"\r\n+ /note="orf1"\r\n+ /codon_start=1\r\n+ /transl_table=11\r\n+ /product="hypothetical protein"\r\n+ /protein_id="YP_004306315.1"\r\n+ /db_xref="GeneID:10323835"\r\n+ /translation="MTNAKRTTAQTIKHKGRKYVVIHRSVTLLLLKRAGKPHHFTLEGG\r\n+ KEGTLKRHKDFWAALQHYSDRQDALGIKGHAVTAVVGTEDGPVVTEGELAAIELMASQG\r\n+ ATTGRVSAKEPN"\r\n+ gene 602..928\r\n+ /locus_tag="phiIBB-PF7Ap01"\r\n+ /db_xref="GeneID:10323784"\r\n+ CDS 602..928\r\n+ /locus_tag="phiIBB-PF7Ap01"\r\n+ /note="orf2"\r\n+ /codon_start=1\r\n+ /transl_table=11\r\n+ /product="hypothetical protein"\r\n+ /protein_id="YP_004306316.1"\r\n+ /db_xref="GeneID:10323784"\r\n+ /translation="MPKPNKYKGDGSKKPEGTVEGAYVMHKGRMVPNFQATDSAIERGI\r\n+ NAYKAFKGNAMLKCVTRIMLYIMVAAMCAALLFGMTGCQVNVVNVIHSDIGLDASSNLN\r\n+ ALTE"\r\n+ gene 996..1511\r\n+ /locus_tag="phiIBB-PF7Ap02"\r\n+ /db_xref="GeneID:10323785"\r\n+ CDS 996..1511\r\n+ /locus_tag="phiIBB-PF7Ap02"\r\n+ /note="orf3"\r\n+ /codon_start=1\r\n+ /transl_table=11\r\n+ /product="hypothetical protein"\r\n+ /protein_id="YP_004306317.1"\r\n+ /db'..b'aggagcatac agatgaccaa\r\n+ 37921 gaaagctacc gcaacctttg tggcagtgct ggtcagtctg gcgaaacacc gtgcgaccta\r\n+ 37981 tcgcttcctc gctgttcttc tcgttgccct tggtatctcc aatggcgaag cgattatgtc\r\n+ 38041 tgggatcgag accgtagctt gtgcgtacct tggctgcatc ggctgacgcc caacgaggct\r\n+ 38101 cattacgggt ctgctaaagc ataccctcca tgagcattga cttacagtac attcttcaag\r\n+ 38161 ggacctttca atgggtcatc cagtaccctt tagggtgcag cacatcaagg cccttcaagg\r\n+ 38221 gtcccttcaa gggtcccttc aagaatttac cataaatttt ccttacagga cctctcatag\r\n+ 38281 actaacccac cgagtacccc cgtggcccct ctcgtcaacc ctaacggcct ctggggcacc\r\n+ 38341 cctaagggga tcattacggt gctgggcacc tacagacaat catcaaggga taccggtagg\r\n+ 38401 cggtccctct atgggctacc tcatgtccta ccttggggca catcatgaac cctttaaggg\r\n+ 38461 gagctgtcaa ccctcaaggg atcattaagg ggacatcaag ggacttaagg ggagacctac\r\n+ 38521 agaccattac gtctccttgt cctaatctgt aaggcccatc aaggaaccat tgagagacca\r\n+ 38581 tcaaggccac cgtcaaggca cacaccctaa ggatagacca taggagaccc atcaaggctg\r\n+ 38641 gacatcaagg gattgacaaa gcgatccctg tgatctatta cgtgcgctac gcggtcatta\r\n+ 38701 aggggacttt aaaaccctca cctaaacagg gatgaacaat tgttgaaata agtggttgac\r\n+ 38761 accctcaaag gagtctgtag aatggccacc acaacgaagc gacacgccaa ggcggatcac\r\n+ 38821 tgagttgcct tgaagcttag tagttacggt ctgtgcgatc ctaagggtgc caaacgggct\r\n+ 38881 taacgtaacg aacacggata gggtgaaaca aaacgcttga cactgagtaa caacgctgta\r\n+ 38941 gaatacgcca catcagctac acaccgctct ttaacaactt ggatgcaaca tggtctgcaa\r\n+ 39001 ggctccgaaa ggacacctac aggcatcgct acgaaggcaa gcactgagcc ttctcatagc\r\n+ 39061 gcctttgaga acctgaacgg atgttacggg tagggttagc aagggcctta tgggaaccac\r\n+ 39121 ttaggagcct cacacaatgg cacgcttaga catgcacgtc actggtcttg aatatgatct\r\n+ 39181 ttcccttgag gaaaacgcac agggccagca aggattcacc gtgacatacg gtgcgcaagt\r\n+ 39241 caatcattac gaacggtttg atgaagcctt tatggacttc aatgaatcgc ttagccacgc\r\n+ 39301 tctggccctt cagggtctgt aaatcctcac tgataaggta caatcgcatg aacacgtcac\r\n+ 39361 agaaccgctt cattctggcc cacgcagcaa gcctgctggt caccgcttac aaagagtgca\r\n+ 39421 tcgctgagta tcagacggtg ctgccgctta acctgagcat cggccatgat gcacctgata\r\n+ 39481 gctacgctgc gctccgttcg caagcggctc agggtcaact gaaggtaagc acggcgcaca\r\n+ 39541 acgcctcgtc gatctacggg gcttcgggta acctcacgtt ccgcatcttt cacgactacg\r\n+ 39601 gtcacctgct gtatgacgct gagttcacca ctgagcaaga ggttagcctt gcgctgactc\r\n+ 39661 aatggcgtga cctgatccgc tacatccctc aggaatggca aggcatctgc tatgtggtct\r\n+ 39721 accgcgccga caccgtggca caaagcgagt atgaagcgat ccataaggac ttccctgtgg\r\n+ 39781 atcaaaaggc attcgttctg gacatcctga acaagcactt cgaggctgag ccacgatgag\r\n+ 39841 catgaacacg aatgaaacac tggacgccgt gctgattacc cgtgaagaac gcaaggcgct\r\n+ 39901 ggccgctctg ctgtactcag ggttgaccgg taacgcagtc gataagctgg gccttcgggc\r\n+ 39961 cttgcaggaa aagctttcat cggcctttaa gggttactgg gacacgttca atccgctcga\r\n+ 40021 caagcaccca acaatggctg accacgggat agcggaatgg gtcagcccgg attcaacgca\r\n+ 40081 gaagacacgg aacagcatcc aatgaacgtg actgtccggg gtctcttcaa gctctgcaat\r\n+ 40141 agctgcaacc aacggggcca agtggctcaa gtctggcttg tcgatcactt cgcttgccgc\r\n+ 40201 tgcatcttcc taccttacga ccatcaataa ggagtcacca tgaccaacgc aaagcgcacc\r\n+ 40261 acagctcaga ccatcaagca caagggtcgc aagtacgtcg taattcaccg ctcggttacc\r\n+ 40321 ctgctgttgc tgaagcgtgc cggtaagcct caccacttca cccttgaagg cggcaaagag\r\n+ 40381 ggcaccctga agcgtcataa agacttctgg gcggcactgc aacactacag cgaccgtcag\r\n+ 40441 gacgcactgg gcatcaaggg ccacgctgta accgctgtgg ttggcaccga ggacggccct\r\n+ 40501 gttgtcactg aaggcgagct ggccgcaatc gaactgatgg catcgcaagg ggccacaacg\r\n+ 40561 ggccgtgtta gcgccaagga accgaactaa tgccaaagcc taataagtac aagggtgacg\r\n+ 40621 gctccaagaa gcctgaaggg accgtagagg gcgcctacgt catgcacaag ggtcgcatgg\r\n+ 40681 tcccgaactt tcaagccacc gactcggcta tcgagcgggg catcaacgcc tacaaggcat\r\n+ 40741 tcaaggggaa cgctatgctg aagtgtgtga ctcgcatcat gctgtacatc atggtcgctg\r\n+ 40801 caatgtgtgc ggcgctgctg ttcgggatga ctggctgtca agtcaacgtc gtgaacgtga\r\n+ 40861 ttcacagtga catcgggctg gatgcgtcca gtaacctcaa cgctttgacc gagtagcgtt\r\n+ 40921 aatcggtggc cttctcttga gggtcacccg ttaaatccac tcactgctaa tgc\r\n+//\r\n' |
b |
diff -r b575af79e250 -r 5acc4fa8b62d test-data/output.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.html Sat Apr 20 11:07:28 2019 -0400 |
b |
@@ -0,0 +1,192 @@ +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th>positions</th> + <th>promoter_seq</th> + <th>promoter_type</th> + <th>scores</th> + </tr> + </thead> + <tbody> + <tr> + <td>(238..260)</td> + <td>AATAAGGAGTCACCATGACCAAC</td> + <td>phage</td> + <td>0.606</td> + </tr> + <tr> + <td>(2996..3018)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.931</td> + </tr> + <tr> + <td>(3062..3088)</td> + <td>TAAGATTACTCTTATAGTAATTATCAT</td> + <td>host</td> + <td>0.729</td> + </tr> + <tr> + <td>(4313..4342)</td> + <td>TGAAATGAACGTGGAGACGGCTCGTAAGTT</td> + <td>host</td> + <td>0.653</td> + </tr> + <tr> + <td>(4835..4855)</td> + <td>ACTGGTCTCCGACGGTGTTAA</td> + <td>phage</td> + <td>0.516</td> + </tr> + <tr> + <td>(4876..4907)</td> + <td>TTGATAACGGCACAGATGATTCCTCGGAGACT</td> + <td>host</td> + <td>0.519</td> + </tr> + <tr> + <td>(6000..6022)</td> + <td>TAAAACCCCTCACCAAAACAGGG</td> + <td>phage</td> + <td>0.981</td> + </tr> + <tr> + <td>(8257..8279)</td> + <td>TAAAAACCCTCACCAAAACAGGG</td> + <td>phage</td> + <td>0.985</td> + </tr> + <tr> + <td>(9458..9480)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.966</td> + </tr> + <tr> + <td>(10632..10654)</td> + <td>CCAAACCGATCCCTAAAGGGGTC</td> + <td>phage</td> + <td>0.657</td> + </tr> + <tr> + <td>(11141..11163)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.948</td> + </tr> + <tr> + <td>(11710..11730)</td> + <td>CTGACTTGCCGATACCCTGAA</td> + <td>phage</td> + <td>0.557</td> + </tr> + <tr> + <td>(13956..13978)</td> + <td>CAAATACCCTCACCTAAACAGCT</td> + <td>phage</td> + <td>0.677</td> + </tr> + <tr> + <td>(16512..16542)</td> + <td>TTGACATCGAAGAGGTCTTCGTTGAGACAGT</td> + <td>host</td> + <td>0.748</td> + </tr> + <tr> + <td>(17793..17815)</td> + <td>TAAAACCCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.990</td> + </tr> + <tr> + <td>(17842..17864)</td> + <td>ACTTAAAGATCACTCTAAGGGAG</td> + <td>phage</td> + <td>0.700</td> + </tr> + <tr> + <td>(17869..17896)</td> + <td>TATGCTTAAAGAGATCCAGCACTATCTG</td> + <td>host</td> + <td>0.777</td> + </tr> + <tr> + <td>(19585..19615)</td> + <td>TTGAGAACCTGCACGAAGCCATGATTAAGTT</td> + <td>host</td> + <td>0.529</td> + </tr> + <tr> + <td>(20361..20383)</td> + <td>TAAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.993</td> + </tr> + <tr> + <td>(21354..21376)</td> + <td>TAAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.990</td> + </tr> + <tr> + <td>(26071..26093)</td> + <td>TAAAAACACTCACCACAACAGGG</td> + <td>phage</td> + <td>0.953</td> + </tr> + <tr> + <td>(32321..32341)</td> + <td>GCCGCTCACCAAGTTCCTTAC</td> + <td>phage</td> + <td>0.665</td> + </tr> + <tr> + <td>(33327..33349)</td> + <td>TAAAACCCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.991</td> + </tr> + <tr> + <td>(37729..37751)</td> + <td>TAAAAACCCTCACCTAAAGAGGG</td> + <td>phage</td> + <td>0.991</td> + </tr> + <tr> + <td>(38653..38681)</td> + <td>TTGACAAAGCGATCCCTGTGATCTATTAC</td> + <td>host</td> + <td>0.875</td> + </tr> + <tr> + <td>(38709..38731)</td> + <td>TTAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.938</td> + </tr> + <tr> + <td>(38756..38783)</td> + <td>TTGACACCCTCAAAGGAGTCTGTAGAAT</td> + <td>host</td> + <td>1.000</td> + </tr> + <tr> + <td>(38917..38944)</td> + <td>TTGACACTGAGTAACAACGCTGTAGAAT</td> + <td>host</td> + <td>0.997</td> + </tr> + <tr> + <td>(39147..39178)</td> + <td>TAGACATGCACGTCACTGGTCTTGAATATGAT</td> + <td>host</td> + <td>0.697</td> + </tr> + <tr> + <td>(40226..40248)</td> + <td>AATAAGGAGTCACCATGACCAAC</td> + <td>phage</td> + <td>0.630</td> + </tr> + </tbody> +</table> \ No newline at end of file |
b |
diff -r b575af79e250 -r 5acc4fa8b62d tool_dependencies.xml --- a/tool_dependencies.xml Sat Apr 20 11:06:06 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<tool_dependency> - <package name="biopython"></package> - <package name="numpy" ></package> - <package name="pandas"></package> - <package name="scikit-learn"></package> -</tool_dependency> |