Repository 'phage_promoters'
hg clone https://toolshed.g2.bx.psu.edu/repos/martasampaio/phage_promoters

Changeset 0:34fb34df4473 (2018-07-08)
Next changeset 1:9a357864ec5c (2018-07-09)
Commit message:
planemo upload for repository https://github.com/martaS95/PhagePromoters commit 7f30028d32b56eeeaacc6a874f8f1f2e312b9b49-dirty
added:
README.rst
__pycache__/auxiliar.cpython-35.pyc
__pycache__/auxiliar.cpython-36.pyc
auxiliar.py
auxiliar.pyc
model_SVM_2400.sav
phage_promoters.py
phage_promoters.xml
pssm10_6.txt
pssm10_8.txt
pssm35_6.txt
pssm35_9.txt
pssm35_cbb.txt
pssm35_lb.txt
pssm35_mu.txt
pssm35_t4.txt
pssm_21.txt
pssm_23.txt
pssm_27.txt
pssm_32.txt
scaler_2400.sav
test-data/NC_015264.fasta
test-data/NC_015264.gb
test-data/output.fasta
test-data/output.html
tool_dependencies.xml
b
diff -r 000000000000 -r 34fb34df4473 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,29 @@
+===============
+PhagePromoters
+===============
+
+Get promoters of phage genomes
+
+PhagePromoters is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 25 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. 
+
+**Inputs:**
+
+* genome format: fasta vs genbank; 
+* genome file: acepts both genbank and fasta formats;
+* both strands (yes or no): allows the search in both DNA strands;
+* threshold: represents the probability of the test sequence be a promoter (float between 0 and 1)"
+* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae;
+* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results.
+* phage type: The type of the phage, according to its lifecycle: virulent or temperate;
+
+**Outputs:**
+This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters.
+
+**Requirements:**
+
+* Biopython
+* Sklearn 
+* Numpy
+* Pandas 
+
+
b
diff -r 000000000000 -r 34fb34df4473 __pycache__/auxiliar.cpython-35.pyc
b
Binary file __pycache__/auxiliar.cpython-35.pyc has changed
b
diff -r 000000000000 -r 34fb34df4473 __pycache__/auxiliar.cpython-36.pyc
b
Binary file __pycache__/auxiliar.cpython-36.pyc has changed
b
diff -r 000000000000 -r 34fb34df4473 auxiliar.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/auxiliar.py Sun Jul 08 11:58:28 2018 -0400
[
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May 27 17:37:09 2018
+
+@author: Marta
+"""
+
+
+
+def get_bacteria(file):
+    import pandas as pd
+    df = pd.read_excel(file,header=0,index_col=0)
+    bacteria = {}
+    for ind,row in df.iterrows():
+        bac = row['Bacteria']
+        bacteria[ind] = bac
+    return bacteria
+
+#retorna a familia do fago
+def get_families(file):
+    import pandas as pd
+    df = pd.read_excel(file,header=0,index_col=0)
+    families = {}
+    for ind,row in df.iterrows():
+        fam = row['Family']
+        families[ind] = fam
+    return families
+
+def get_lifecycle(file):
+    import pandas as pd
+    df = pd.read_excel(file,header=0,index_col=0)
+    types = {}
+    for ind,row in df.iterrows():
+        lc = row['lifecycle']
+        types[ind] = lc
+    return types
+
+#dá os scores e as posições do motif numa sequencia, ao ler o ficheiro com a pssm
+
+def get_max_pssm(file_pssm):
+    from Bio.Alphabet import IUPAC
+    from Bio.motifs import matrix
+    m = []
+    fic = open(file_pssm,'r')
+    rf = fic.readline()
+    while rf:
+        new_l = []
+        l = rf.strip().split('\t')
+        for val in l:
+            x = float(val)
+            new_l.append(x)
+        m.append(new_l)
+        rf = fic.readline()
+    a = IUPAC.unambiguous_dna
+    dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]}
+    pssm = matrix.PositionSpecificScoringMatrix(a,dic)
+    return pssm.max
+
+
+def get_scores(file_pssm, seq):
+    from Bio.Alphabet import IUPAC
+    from Bio.motifs import matrix
+    maxi = get_max_pssm(file_pssm)
+    m = []
+    fic = open(file_pssm,'r')
+    rf = fic.readline()
+    while rf:
+        new_l = []
+        l = rf.strip().split('\t')
+        for val in l:
+            x = float(val)
+            new_l.append(x)
+        m.append(new_l)
+        rf = fic.readline()
+    a = IUPAC.unambiguous_dna
+    dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]}
+    pssm = matrix.PositionSpecificScoringMatrix(a,dic)
+    scores = []
+    positions = []
+    a = IUPAC.unambiguous_dna
+    seq.alphabet = a
+    for pos, score in pssm.search(seq, both=False,threshold=-50):
+        scores.append(score/maxi)
+        positions.append(pos)
+    return scores,positions
+
+def get_genes(fic_name):
+    from Bio import SeqIO
+    numbers = []
+    fic = open(fic_name,'r')
+    rf = fic.readline()
+    while rf:
+        phage = rf.strip()
+        numbers.append(phage)
+        rf = fic.readline()
+    fic.close()
+    dic = {}
+    for number in numbers:
+        rec = SeqIO.read('genomas/'+number+'.gb','gb')
+        comp = []
+        dire = []
+        for feat in rec.features:
+            if feat.type == 'gene':
+                loc = feat.location
+                if loc.strand == 1: dire.append(loc)
+                else: comp.append(loc)
+            dic[number] = {'comp':comp, 'dir':dire}
+    return dic
+        
+def freq_base(seq):
+    A = seq.count('A')
+    C = seq.count('C')
+    G = seq.count('G')
+    T = seq.count('T')
+    AT = A+T
+    CG = C+G
+    return AT,CG
+
+def free_energy(seq):
+    dic1 = {'AA':-1.00, 
+        'TT':-1.00, 
+        'AT':-0.88, 
+        'TA':-0.58, 
+        'CA':-1.45,
+        'AC':-1.44, 
+        'GG':-1.84, 
+        'CC':-1.84, 
+        'GA':-1.30, 
+        'AG':-1.28, 
+        'TC':-1.30, 
+        'CT':-1.28, 
+        'TG':-1.45,
+        'GT':-1.44,
+        'GC':-2.24,
+        'CG':-2.17}
+    total = 0
+    i = 0
+    j = 1
+    while i < len(seq)-1:
+        dint = seq[i]+seq[j]
+        total += dic1[dint]
+        i += 1
+        j += 1
+    return total
b
diff -r 000000000000 -r 34fb34df4473 auxiliar.pyc
b
Binary file auxiliar.pyc has changed
b
diff -r 000000000000 -r 34fb34df4473 model_SVM_2400.sav
b
Binary file model_SVM_2400.sav has changed
b
diff -r 000000000000 -r 34fb34df4473 phage_promoters.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phage_promoters.py Sun Jul 08 11:58:28 2018 -0400
[
b'@@ -0,0 +1,533 @@\n+# -*- coding: utf-8 -*-\r\n+"""\r\n+Created on Mon Jun 11 21:08:47 2018\r\n+\r\n+@author: Marta\r\n+"""\r\n+\r\n+from Bio import SeqIO\r\n+import numpy as np\r\n+import pandas as pd\r\n+from Bio.Seq import Seq\r\n+from Bio.SeqRecord import SeqRecord\r\n+from Bio.Alphabet import IUPAC\r\n+from auxiliar import get_max_pssm, get_scores, free_energy,freq_base\r\n+\r\n+def get_testseqs65(form,fic,both=False):\r\n+    ALL = []\r\n+    indexes = []\r\n+    a = 0\r\n+    rec = SeqIO.read(fic, form)\r\n+    genome = rec.seq\r\n+    i = 0\r\n+    j = 65\r\n+    while j < len(genome):\r\n+        s = genome[i:j]\r\n+        ALL.append([1,i,j,s])\r\n+        i += 20\r\n+        j += 20\r\n+        a += 1\r\n+        indexes.append(rec.name+":"+str(a))\r\n+    if both:\r\n+        i = 0\r\n+        j = 65\r\n+        while j < len(genome):\r\n+            s = genome[i:j].reverse_complement()\r\n+            ALL.append([-1,i,j,s])\r\n+            i += 20\r\n+            j += 20\r\n+            a += 1\r\n+            indexes.append(rec.name+":"+str(a))\r\n+    df = pd.DataFrame(ALL, index=indexes, columns=[\'strand\',\'iniprom\',\'endprom\',\'seq\'])\r\n+    return df\r\n+\r\n+\r\n+def get_dftest(loc, test):\r\n+    scores = []\r\n+    posis = []\r\n+    sizes = []\r\n+    dic = {}\r\n+    for ind,row in test.iterrows():\r\n+        _,window = ind.split(\':\')\r\n+        strand = row[\'strand\']\r\n+        ini = row[\'iniprom\']\r\n+        end = row[\'endprom\']\r\n+        seq = row[\'seq\']\r\n+        pos = [ini,end,strand]\r\n+        dic[window] = pos\r\n+        s = seq\r\n+        score10_6,pos10_6 = get_scores(os.path.join(loc,\'pssm10_6.txt\'), s)\r\n+        maxi10_6 = get_max_pssm(os.path.join(loc,\'pssm10_6.txt\'))\r\n+        score10_8,pos10_8 = get_scores(os.path.join(loc,\'pssm10_8.txt\'), s)\r\n+        maxi10_8 = get_max_pssm(os.path.join(loc,\'pssm10_8.txt\'))\r\n+        scores23,pos23 = get_scores(os.path.join(loc,\'pssm_23.txt\'), s)\r\n+        maxi23 = get_max_pssm(os.path.join(loc,\'pssm_23.txt\'))\r\n+        scores21,pos21 = get_scores(os.path.join(loc,\'pssm_21.txt\'), s)\r\n+        maxi21 = get_max_pssm(os.path.join(loc,\'pssm_21.txt\'))\r\n+        scores27,pos27 = get_scores(os.path.join(loc,\'pssm_27.txt\'), s)\r\n+        maxi27 = get_max_pssm(os.path.join(loc,\'pssm_27.txt\'))\r\n+        scores32,pos32 = get_scores(os.path.join(loc,\'pssm_32.txt\'), s)\r\n+        maxi32 = get_max_pssm(os.path.join(loc,\'pssm_32.txt\'))\r\n+        score23 = max(scores23)\r\n+        score21 = max(scores21)\r\n+        score27 = max(scores27)\r\n+        score32 = max(scores32)\r\n+        maxiphage = max(score23,score21,score27,score32)\r\n+        if maxiphage == score23: phage_max = score23*maxi23\r\n+        elif maxiphage == score21: phage_max = score21*maxi21\r\n+        elif maxiphage == score27: phage_max = score27*maxi27\r\n+        elif maxiphage == score32: phage_max = score32*maxi32\r\n+        score35_6,pos35_6 = get_scores(os.path.join(loc,\'pssm35_6.txt\'), s)\r\n+        maxi35_6 = get_max_pssm(os.path.join(loc,\'pssm35_6.txt\'))\r\n+        score35_9,pos35_9 = get_scores(os.path.join(loc,\'pssm35_9.txt\'), s)\r\n+        maxi35_9 = get_max_pssm(os.path.join(loc,\'pssm35_9.txt\'))\r\n+        score35_t4,pos35_t4 = get_scores(os.path.join(loc,\'pssm35_t4.txt\'), s)\r\n+        maxi35_t4 = get_max_pssm(os.path.join(loc,\'pssm35_t4.txt\'))\r\n+        score35_cbb,pos35_cbb = get_scores(os.path.join(loc,\'pssm35_cbb.txt\'), s)\r\n+        maxi35_cbb = get_max_pssm(os.path.join(loc,\'pssm35_cbb.txt\'))\r\n+        score35_lb,pos35_lb = get_scores(os.path.join(loc,\'pssm35_lb.txt\'),s)\r\n+        maxi35_lb = get_max_pssm(os.path.join(loc,\'pssm35_lb.txt\'))\r\n+        score35_mu, pos35_mu = get_scores(os.path.join(loc,\'pssm35_mu.txt\'),s)\r\n+        maxi35_mu = get_max_pssm(os.path.join(loc,\'pssm35_mu.txt\'))\r\n+        \r\n+        dists6 = []\r\n+        score6 = []\r\n+        for p in pos10_6:\r\n+            for a in range(14,22):\r\n+                d = p-a-6\r\n+                if d >= 0: \r\n+                    s10 = score10_6[p]\r\n+                    s35_6 = score35_6[d]\r\n+                    score6.append([s35_6,s10])\r\n+           '..b'                inij = int(groups[j][11:].split(\'..\')[0])\r\n+            else:\r\n+                inij = int(groups[j][1:].split(\'..\')[0])\r\n+            if inij < inii:\r\n+                temp = groups[i]\r\n+                groups[i] = groups[j]\r\n+                groups[j] = temp\r\n+    new_inds = []\r\n+    for g in groups:\r\n+        inds = new_df.groups[g]\r\n+        if len(inds) == 1: new_inds.append(inds[0])\r\n+        else:\r\n+            #maxi = max(g[\'scores\'])\r\n+            maxi = max(new_df.get_group(g)[\'scores\'])\r\n+            i = new_df.groups[g][new_df.get_group(g)[\'scores\']==maxi][0]\r\n+            new_inds.append(i)\r\n+    output = test.loc[new_inds,:]\r\n+    #output.to_excel(\'output.xlsx\', header=True, index=True)\r\n+    output.to_html(\'output.html\',index=False)\r\n+    recs = []\r\n+    for ind,row in output.iterrows():\r\n+        s = Seq(row[\'promoter_seq\'])\r\n+        posis = row[\'positions\']\r\n+        typ = row[\'promoter_type\']\r\n+        score = row[\'scores\']\r\n+        sq = SeqRecord(seq=s, id=ind, description=typ+\' \'+posis+\' score=\'+str(score))\r\n+        recs.append(sq)\r\n+    SeqIO.write(recs, \'output.fasta\',\'fasta\')\r\n+    \r\n+            \r\n+def get_predictions(scaler_file,model_file,test,df_testinfo,threshold):\r\n+    from sklearn.externals import joblib\r\n+    scaler = joblib.load(scaler_file)\r\n+    model = joblib.load(model_file)\r\n+    feat_scaled = pd.DataFrame(scaler.transform(test.iloc[:,:7]),index =test.index, columns=test.columns[:7])\r\n+    TEST_scaled = pd.concat([feat_scaled,test.iloc[:,7:]],axis=1)\r\n+    #pred = model.predict(TEST_scaled)\r\n+    scores = model.predict_proba(TEST_scaled)\r\n+    pos_scores = np.empty((TEST_scaled.shape[0],0), float)\r\n+    for x in scores: pos_scores = np.append(pos_scores,x[1])\r\n+    try: positive_indexes = np.nonzero(pos_scores>=float(threshold))[0] #escolher os positivos, podia ser escolher com score > x\r\n+    except ValueError: return \'The threshold value is not a float\'\r\n+    else:\r\n+        if len(positive_indexes) == 0: return None\r\n+        else:\r\n+            positive_windows = TEST_scaled.index[positive_indexes]\r\n+            INFO = df_testinfo.loc[positive_windows,[\'positions\',\'promoter_seq\']]\r\n+            promoter_type = []\r\n+            for x in df_testinfo.loc[positive_windows,\'host\'].tolist():\r\n+                if x == 0: promoter_type.append(\'phage\')\r\n+                else: promoter_type.append(\'host\')\r\n+            INFO[\'promoter_type\'] = promoter_type\r\n+            INFO[\'scores\'] = np.around(pos_scores[positive_indexes],decimals=3)\r\n+            INFO.index = positive_windows\r\n+            return INFO\r\n+\r\n+if __name__== "__main__":\r\n+    import sys\r\n+    import os\r\n+    __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))\r\n+    scaler_file = os.path.join(__location__, \'scaler_2400.sav\')\r\n+    model_file = os.path.join(__location__, \'model_SVM_2400.sav\')\r\n+    \r\n+    gen_format = sys.argv[1]\r\n+    genome_file = sys.argv[2]\r\n+    both = sys.argv[3]\r\n+    threshold = sys.argv[4]\r\n+    family = sys.argv[5]\r\n+    host = sys.argv[6]\r\n+    phage_type = sys.argv[7]\r\n+    \'\'\'\r\n+    \r\n+    gen_format = \'gb\'\r\n+    genome_file = \'test-data/NC_015264.gb\'\r\n+    genbank_fasta = \'genbank\'\r\n+    both = False\r\n+    threshold = \'0.50\'\r\n+    family = \'Podoviridae\'\r\n+    host = \'Pseudomonas\'\r\n+    phage_type = \'virulent\'\r\n+    \'\'\'\r\n+    test_windows = get_testseqs65(gen_format, genome_file,both)\r\n+    try: score_test,dic_window = get_dftest(__location__,test_windows)\r\n+    except IndexError: print(\'Error. Input sequence can only have A,C,G or T\')\r\n+    else:\r\n+        df_test,df_testinfo = create_dftest(score_test,dic_window,family,host,phage_type)\r\n+        preds =  get_predictions(scaler_file, model_file, df_test,df_testinfo,threshold)\r\n+        if preds is None: print(\'There is no sequence with a score value higher or equal to the threshold \'+str(threshold))\r\n+        elif type(preds) == str: print(preds)\r\n+        else: output = get_finaldf(preds)\r\n+    \r\n'
b
diff -r 000000000000 -r 34fb34df4473 phage_promoters.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/phage_promoters.xml Sun Jul 08 11:58:28 2018 -0400
[
@@ -0,0 +1,104 @@
+<tool id="get_proms" name="PhagePromoters" version="0.1.0">
+    <description>
+Get promoters of phage genomes
+    </description>
+    <requirements>
+        <requirement type="package">biopython</requirement>
+        <requirement type="package">scikit-learn</requirement>
+        <requirement type="package">numpy</requirement>
+        <requirement type="package">pandas</requirement>
+    </requirements>
+    <command detect_errors="exit_code" interpreter="python3"><![CDATA[ 
+ phage_promoters.py "$input_type.genome_format" "$genome" "$both" "$threshold" "$family" "$bacteria"  "$lifecycle"
+ ]]>
+    </command>
+    <inputs>
+ <conditional name="input_type">
+           <param type="select" name="genome_format" label='file format'>
+              <option value="genbank" selected="yes">genbank</option>
+              <option value="fasta">fasta</option>
+    </param>
+    <when value="genbank">
+              <param type="data" name="genome" format="genbank" label='genome'/>
+    </when>
+    <when value="fasta">
+              <param type="data" name="genome" format="fasta" label='genome'/>
+    </when>
+        </conditional>
+        <param type="boolean" name="both" label='Search both strands' checked="false" truevalue="-both" falsevalue="" />
+ <param name="threshold" type="float" value="0.50" label="Threshold" help="Probabilty of being a promoter (float between 0 and 1)" />
+        <param type="select" name="family" label='Phage family'>
+   <option value="Podoviridae" selected="yes">Podoviridae</option>
+   <option value="Siphoviridae">Siphoviridae</option>
+   <option value="Myoviridae">Myoviridae</option>
+ </param>
+        <param type="select" name="bacteria" label='Host bacteria Genus'>
+   <option value="Escherichia coli" selected="yes">Escherichia coli</option>
+   <option value="Salmonella">Salmonella</option>
+   <option value="Pseudomonas">Pseudomonas</option>
+   <option value="Yersinia">Yersinia</option>
+   <option value="Morganella">Morganella</option>
+   <option value="Cronobacter">Cronobacter</option>
+   <option value="Staphylococcus">Staphylococcus</option>
+   <option value="Streptococcus">Streptococcus</option>
+   <option value="Lactococcus">Lactococcus</option>
+   <option value="Streptomyces">Streptomyces</option>
+   <option value="Klebsiella">Klebsiella</option>
+   <option value="Bacillus">Bacillus</option>
+   <option value="Pectobacterium">Pectobacterium</option>
+   <option value="other">other</option>
+ </param>
+        <param type="select" name="lifecycle" label='Phage type'>
+   <option value="virulent" selected="yes">virulent</option>
+   <option value="temperate">temperate</option>
+ </param>
+    </inputs>
+    <outputs>
+        <data name="output1" format="html" from_work_dir="output.html" />
+        <data name="output2" format="fasta" from_work_dir="output.fasta" />
+    </outputs>
+    <tests>
+        <test>
+     <param name="genome_format" value="genbank"/>
+            <param name="genome" value="NC_015264.gb"/>
+            <param name="both" value="False"/>
+     <param name="threshold" value="0.50"/>
+            <param name="family" value="Podoviridae"/>
+            <param name="bacteria" value="Pseudomonas"/>
+            <param name="lifecycle" value="virulent"/>
+            <output name="output1" file="output.html"/>
+            <output name="output2" file="output.fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+===============
+PhagePromoters
+===============
+
+Get promoters of phage genomes
+
+PhagePromoters is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 25 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. 
+
+**Inputs:**
+
+* genome format: fasta vs genbank; 
+* genome file: acepts both genbank and fasta formats;
+* both strands (yes or no): allows the search in both DNA strands;
+* threshold: represents the probability of the test sequence be a promoter (float between 0 and 1)"
+* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae;
+* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results.
+* phage type: The type of the phage, according to its lifecycle: virulent or temperate;
+
+**Outputs:**
+This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters.
+
+**Requirements:**
+
+* Biopython
+* Sklearn 
+* Numpy
+* Pandas  
+
+    ]]></help>
+</tool>
b
diff -r 000000000000 -r 34fb34df4473 pssm10_6.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm10_6.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-3.24 1.93 -0.34 1.38 1.43 -3.05
+-2.14 -4.24 -1.03 -1.44 -1.19 -4.05
+-2.29 -4.46 -1.12 -1.44 -1.53 -3.59
+1.79 -3.59 1.17 -0.61 -0.96 1.9
b
diff -r 000000000000 -r 34fb34df4473 pssm10_8.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm10_8.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+0.09 1.72 -4.49 1.92 1.95 1.95 -4.49 1.92
+-1.68 -1.32 -4.49 -4.49 -4.49 -4.49 -3.49 -4.49
+-0.79 -2.49 -4.49 -2.91 -4.49 -4.49 -2.49 -4.49
+1.03 -2.91 1.95 -4.49 -4.49 -4.49 1.88 -2.91
b
diff -r 000000000000 -r 34fb34df4473 pssm35_6.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_6.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-2.84 -2.12 -2.4 1.49 -0.74 1.16
+-2.95 -4.65 -2.56 -0.26 1.13 -1.56
+-4.33 -2.48 1.73 -3.33 -1.69 -1.65
+1.88 1.83 -1.65 -1.95 -0.14 0.15
b
diff -r 000000000000 -r 34fb34df4473 pssm35_9.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_9.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+0.93 0.79 -0.65 -1.87 -0.14 -1.46 -1.14 -0.14 1.13
+-1.46 -1.87 -1.46 -2.46 1.35 -3.46 -2.46 -0.14 -0.65
+-0.87 -1.87 -1.87 1.79 -1.87 -1.87 -3.46 -1.46 -1.87
+0.24 0.79 1.45 -3.46 -1.87 1.71 1.71 0.86 -0.14
b
diff -r 000000000000 -r 34fb34df4473 pssm35_cbb.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_cbb.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-2.46 -2.46 1.45 1.79 1.79 -2.46 0.13
+-2.46 -2.46 -2.46 -2.46 -2.46 1.54 -2.46
+-2.46 1.79 -0.14 -2.46 -2.46 -2.46 1.24
+1.79 -2.46 -2.46 -2.46 -2.46 -0.46 -1.46
b
diff -r 000000000000 -r 34fb34df4473 pssm35_lb.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_lb.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-0.81 -0.81 -0.81 -0.81 -0.81 0.19 -0.81 -0.81 0.78 0.19 -0.81 -0.81 -0.81 -0.81
+-0.81 -0.81 -0.81 1.19 -0.81 -0.81 -0.81 0.19 -0.81 0.19 -0.81 -0.81 -0.81 1.19
+-0.81 -0.81 1.19 -0.81 1.19 -0.81 0.78 -0.81 0.19 -0.81 -0.81 -0.81 1.19 -0.81
+1.19 1.19 -0.81 -0.81 -0.81 0.78 0.19 0.78 -0.81 0.19 1.19 1.19 -0.81 -0.81
b
diff -r 000000000000 -r 34fb34df4473 pssm35_mu.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_mu.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-1.17 -0.17 1.15 -1.17 1.15 1.15 -1.17 -0.17 0.42 -1.17 0.42 -1.17 0.83 -1.17
+0.83 0.83 -1.17 -0.17 -1.17 -1.17 0.83 1.15 0.83 1.42 -1.17 0.42 -1.17 0.83
+-0.17 -1.17 -0.17 -1.17 -1.17 -0.17 -1.17 -1.17 -1.17 -1.17 0.42 0.42 -0.17 -1.17
+-0.17 -0.17 -1.17 1.15 -0.17 -1.17 0.42 -1.17 -1.17 -1.17 -0.17 -0.17 -0.17 0.42
b
diff -r 000000000000 -r 34fb34df4473 pssm35_t4.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm35_t4.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-2.43 -3.43 -2.43 -2.43 1.86 -2.43 1.33
+-3.43 -3.43 -3.43 -3.43 -3.43 1.86 -3.43
+1.82 -3.43 -2.43 -3.43 -3.43 -3.43 -3.43
+-2.43 1.9 1.82 1.86 -2.43 -3.43 0.38
b
diff -r 000000000000 -r 34fb34df4473 pssm_21.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm_21.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.7 -2.0 -1.0 -1.0 -2.0 -2.0 -2.0 -2.0 0.81 0.58
+1.17 1.46 -2.0 -2.0 -1.0 -2.0 -1.0 -2.0 1.7 1.7 -0.42 -2.0 1.7 -2.0 0.81 1.7 1.7 -2.0 -2.0 0.81 0.81
+-0.42 -2.0 1.46 1.7 1.58 -2.0 -2.0 1.7 -2.0 -2.0 1.46 -2.0 -2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -1.0
+-0.42 -1.0 -2.0 -2.0 -2.0 1.7 1.58 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.46 0.0 -2.0 -2.0 1.7 1.7 -2.0 -2.0
b
diff -r 000000000000 -r 34fb34df4473 pssm_23.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm_23.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+0.07 1.71 1.18 -0.18 1.65 0.6 -1.86 0.82 -2.86 -0.79 -5.18 1.88 -1.86 -4.18 1.69 0.97 1.26 -0.05 -0.79 0.07 1.11 0.28 1.25
+-0.32 -2.86 -2.86 -4.18 -2.86 0.49 0.6 0.52 1.68 -2.37 1.88 -1.86 1.84 -1.09 -2.86 -3.18 -1.18 -5.18 -0.54 -1.86 -1.86 -1.48 -1.09
+-3.59 -2.86 -3.59 -4.18 -3.18 -0.32 0.95 -0.86 -1.37 -1.59 -1.86 -5.18 -3.18 -5.18 -2.59 -3.59 -1.18 1.56 1.41 1.3 0.6 1.16 -0.18
+1.05 -1.18 0.6 1.59 -0.72 -1.86 -1.86 -2.01 -1.86 1.53 -5.18 -5.18 -5.18 1.79 -1.09 0.89 -0.48 -4.18 -3.59 -2.18 -4.18 -2.37 -1.86
b
diff -r 000000000000 -r 34fb34df4473 pssm_27.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm_27.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+-3.09 -2.09 1.67 -1.5 -1.09 -0.28 -3.09 -3.09 -2.09 -3.09 -3.09 -3.09 1.87 -3.09 -3.09 -3.09 -3.09 -3.09 -1.5 -3.09 -3.09 -2.09 -3.09 1.72 1.56 1.5 -1.5
+1.82 -3.09 -3.09 1.67 1.56 1.3 -1.09 -0.5 1.16 1.67 1.67 -1.09 -3.09 1.87 -3.09 1.87 1.67 -0.77 -3.09 0.82 1.16 -0.28 0.91 -2.09 -2.09 -0.5 0.08
+-3.09 1.82 -2.09 -2.09 -3.09 -0.77 0.08 1.16 0.5 -3.09 -0.77 -2.09 -3.09 -3.09 -2.09 -3.09 -1.09 1.67 1.77 1.0 0.61 -3.09 -3.09 -1.5 -3.09 -3.09 -0.5
+-2.09 -3.09 -1.09 -2.09 -1.09 -3.09 1.23 -0.09 -3.09 -0.77 -3.09 1.67 -3.09 -3.09 1.82 -3.09 -2.09 -3.09 -3.09 -3.09 -3.09 1.5 0.91 -3.09 -0.5 -1.5 0.91
b
diff -r 000000000000 -r 34fb34df4473 pssm_32.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pssm_32.txt Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,4 @@
+1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 0.78 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 1.65
+-1.81 -1.81 1.65 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.65 1.65 -1.81 -1.81 1.19 -1.81 -1.81 -1.81 -1.81 0.78 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81
+-1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 0.78 -1.81 -1.81 -1.81 -0.81 -1.81 -1.81 1.51 1.65 1.65 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81
+-1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.51 -1.81 0.19 -0.81 -1.81 -1.81 -1.81 0.78 -1.81 -1.81 1.65 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81
b
diff -r 000000000000 -r 34fb34df4473 scaler_2400.sav
b
Binary file scaler_2400.sav has changed
b
diff -r 000000000000 -r 34fb34df4473 test-data/NC_015264.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_015264.fasta Sun Jul 08 11:58:28 2018 -0400
b
b'@@ -0,0 +1,588 @@\n+>NC_015264.1 Pseudomonas phage phiIBB-PF7A, complete genome\n+AAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTGACCACGGGATAGCGGAAT\n+GGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTGACTGTCCGGGGTCTCTTC\n+AAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTGTCGATCACTTCGCTTGCC\n+GCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGCAAAGCGCACCACAGCTCA\n+GACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACCCTGCTGTTGCTGAAGCGT\n+GCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGAAGCGTCATAAAGACTTCT\n+GGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGGCCACGCTGTAACCGCTGT\n+GGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATCGAACTGATGGCATCGCAA\n+GGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCCTAATAAGTACAAGGGTGA\n+CGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAGGGTCGCATGGTCCCGAAC\n+TTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCATTCAAGGGGAACGCTATGC\n+TGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGCGGCGCTGCTGTTCGGGAT\n+GACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTGGATGCGTCCAGTAACCTC\n+AACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCGTTAAATCCACTCACTGCT\n+AATGCAGGTATCACCATGATCGCTCTAAACTACACCAGCTTCACCAGCCGCGAAGTCGCCGCTAAGATCC\n+TTGCAGCAATGCAGGAAGTCCGGGCCACAGGTAACGCTGTGCGTGTCCTCAATCGTCGCGGTAAGGCTTT\n+CCTGCTGGTCACAATCCACAAGGACGCCTTAGGGTACGCCTTCAAGTTCATCGCTGAGGATGGCACCGAG\n+GTCGGCCAGATGATCCAACGTGCGTCTAACGACTGGGATAACCCGACGTTTACCGCTTACTGGTCTATCC\n+TGAGCTGGGCATGGGACCTGAAGGAACACCCACTGCTGAGCCTGTCTAAGCTGGCCGCACAGGCCGAAGC\n+GATGAAAGGGCAAGGGGCAACCCACAAAGTGACCTGCTACGGCGGTACGGTGCAGTACGGGGCCTATCAG\n+CGTGACTGGCTGTGTCGTCGCCGGTTGTACCTGTTCGGTAAGGACGGAGTATGGCGCAAGGTTGACGATG\n+ACCAAGCCGCTCAAATCTGCTGGATCGAGGTGCTGAAATGATCTTGAGTGACCCAGCGGTAGCCATTTGC\n+TCAGGCGAGAAGTATGCACCGGGCCTGTACTGGGAGGGTCGCCAGATCCTTGACCAGATTGACCAGTACA\n+TGGACGCCTACGGGCCGATCCAACCGGGCGTTGACTATGCGCCAGATAGCGAATGGGCCAAGTTCTTCGC\n+CGATAACGTCATCATGTATGCACCGGGCGTGTATGCCATCCGCTACCTGAACCCGGACTACTGCAAACGT\n+CTGCTGGTCGAGCTGTCCGATGTCCAGTACACGGTCAATGAAGAGGAACCCGAAGACGCTCAGATCCCTG\n+AGGTCGTCCTTGAGGAACACCACTATGGCCTCTTCCAGTGCCTGAGAGGGCTTTTCGAGGGGTATGCAAG\n+GAAACTGGCGTACATCCTCATGGGTCTCGAAACGGGGCCTTGTGTGTCCATACAGGCCGCACGGTACACC\n+CCAATGAACACGCCCCACGGCTGCTGGCACACTGATCGGGACTCTGAGGTGACCCTTGTGGTCGCCCTGA\n+GCGATGACCATGAAGGCGGTGGCACTCAGGTATATCAAGGCCCGTTGCTGCCTCCTGTGACGGTCCCACA\n+GCTTCCCACAGGCTGGGCCATGCTCTTCGCTGGTCGCACAAATGAACACATGGGTCTGCCGGTCACCAAA\n+GGCACCCGTAATTTGCTCGTACACTGGTACGGATTGGAGAAATAAAATGTCCCGCTTCGCAAACGTAGGT\n+TCCCAAGCATGTAACGCACTGGCTGTCAACATGGTTCACGCCATGGATGCAGACTTCTCGTCTCTTGAGC\n+GCCGTGCAATGGGTCACACACTGGCCGAGATCACTGGCCGCAAGATCCGCAAGCCGGGTCTGTATGACAA\n+GCATGTCTCGGACGCCAAACAGGGTTCCATTGCAGCGACCTACGTGGCCCACTCTGAAGGCAAGGCCGCA\n+GTAATGACCATGGCCTACGGGATGCGTCCTCAGACCGACCTACAACACGCTCTGGACGCCCGGTATCGTC\n+AACCCGGCTTCGCTGGTGCGCAGTTCTTCACCGAGCGCGGCGACTTCACTCATCTGGCGGGGCGGGGCGT\n+ATGAACCTGCACAGCATCAAGTTCGCCGTAGAGGTCGAGGCACGGCTACTGAATCCAGACCACAGCCATG\n+CGGCTGACGTGAAGGGTCATAAAGTCCTCGACTGGAAACTCGGCTTGGGTCCTTGTGGCCGCTGGGTTGA\n+CCAGTTGACCGCTGAGATCGACATGGCAACGCTGAGAATCCGCCAAACGTCCTACAGCTCAGACCCCACA\n+AAGGAACTCGAAAGGGTCCAGCGTGGGCTGTACAAGGACGATGGGGCGCTTGAGCCGTTCGGTCCAAACA\n+AGCGGGATCGTATGGCCGCACTTGAGGCCCGCCAGCGACTCATCAAGGATCGCAAGATTGAGACGTTTAT\n+CTACAAGTTAGAAGACGTTCGCGGCAGGATCAAGGCAGTCGAGCGCTGAGGACGTTAAAAACCCTCACCA\n+GAACAGGGACCACTTAAGGGGCCTTTAACTTAAAGACCCTTTAAGATCCTTTAAGATTACTCTTATAGTA\n+ATTATCATTAAGTAAAATCATTAAGTAAGAGGGTGTACCGCAGTGGCATTCATTGAGAAACAGAAACACA\n+ACTTCGACGACGTGAAGACCTCATGGGCCTTTGATACCCTGAGCAAGCTTTACGGTGAAGACCTCGCCGC\n+TGCCCAACTGGCTCTCGAACATGAGTCGCACATGATGGGTGAAGAGCGGTTCCATAAGGCGCTTGATCGC\n+CAGATTGAACGCTCGGAATTCGCTGAAACATCCGTCGCAAAACCACTGGTCGCCATGCTGGTGCCGATGT\n+TCGTCAAGGAGTTCGCCGAATGGACTGAACACCAGATGACCAAGGTGCGCCGCAAGTCGGTGGCCCTGAA\n+GTACCTCAACATGGTTCGCCCTGAACGGGTCGCTGCGTTGACGATCAAGCTGGTCATTACGGAAATGGCT\n+CGCCAGCGTCAAGACCTCGTGTCGGTGACCACTCGAATCGGTCGGACCATCGAAGAGGAAGCCCGCTTTG\n+GTCGCATCCGTGACGAAGAAGCGAAGCACTTTCAGAAGCACATCAAGGACGCTCTCAACAAGCGCAACGG\n+CCACACCTACAAGAAAGCCTTCATGGAAGCTGTCGAGTCCAAGATGCTGGAAGCTGGTGAGCTGAACGGT\n+GCGTGGGCTGACTGGGCAAACTCTGACTCGGACGTAATGCACCACATCGGCGCTCGCTGCCTTGAAATCC\n+TGATTAAGTCCACAGCACTGGTCGAGATCGT'..b'ATGGAGTGGGG\n+CGGCTTCCGTGGTGGCTACGAGGACACAACCCTCGAAGCTCTGGCGAAGGTCGGCAAGAAGCACAAGGTA\n+AGCGAAGTGGTTATTGAAGGTAACTTCGGTGACGGTATGTACACCAAGCTGTTCAGCCCTGTGATGACCC\n+GTATCCACCGTTGCGCTATCACTGAGGTGAAGTCCAAGGGTCAGAAAGAAATGCGTATCTGCGACGTGCT\n+GGAGCCTGTACTGGGTTCTCACAAGCTGATCGTGCATGAGTCTGTCATTGAACAGGACTACCGTACAGCC\n+CTCAACGCTGACGGGACGACTGACGTTGTGTACTCCGGGTTCCACCAACTGACCCGACTGACCAAGGAGC\n+GTGGCTCTCTGGGCCATGATGACCGTCTGGATGCTCTCGCCATCGGCGTGCAGTTCTTCACGGACTCCAT\n+GGAGAAGGACAGTGAGCGTGGCGCTGAGGAAATGCTGGAAGGCTTCCTTGAGGATCACCTTGAGAATCAC\n+ATGGTCGGCTTCGAGCAGGCCCGTGAGATCAGCCTCGGGAATGGCGTAAACATCCAGTGGGACGACGATG\n+ACGGCACTGAAGGGAACTTCATGGGCTGGTAACGAAGTCTGCACGATAGCGACACGTTCGACGAAGATTA\n+AAAACCCTCACCTAAAGAGGGACGGTGGGGGTCCTATATGATAAACACAGAAGATACTTAAAGGTGACCT\n+CAGCACGTTCTGAGAACTTTGAGAATCATGTGCTATCTGAGGACCCTATGCCGTTCTACTCATTGATGGT\n+GATTATTGCTGATAAGCATCACCTCCACATAGGAGCATACAGATGACCAAGAAAGCTACCGCAACCTTTG\n+TGGCAGTGCTGGTCAGTCTGGCGAAACACCGTGCGACCTATCGCTTCCTCGCTGTTCTTCTCGTTGCCCT\n+TGGTATCTCCAATGGCGAAGCGATTATGTCTGGGATCGAGACCGTAGCTTGTGCGTACCTTGGCTGCATC\n+GGCTGACGCCCAACGAGGCTCATTACGGGTCTGCTAAAGCATACCCTCCATGAGCATTGACTTACAGTAC\n+ATTCTTCAAGGGACCTTTCAATGGGTCATCCAGTACCCTTTAGGGTGCAGCACATCAAGGCCCTTCAAGG\n+GTCCCTTCAAGGGTCCCTTCAAGAATTTACCATAAATTTTCCTTACAGGACCTCTCATAGACTAACCCAC\n+CGAGTACCCCCGTGGCCCCTCTCGTCAACCCTAACGGCCTCTGGGGCACCCCTAAGGGGATCATTACGGT\n+GCTGGGCACCTACAGACAATCATCAAGGGATACCGGTAGGCGGTCCCTCTATGGGCTACCTCATGTCCTA\n+CCTTGGGGCACATCATGAACCCTTTAAGGGGAGCTGTCAACCCTCAAGGGATCATTAAGGGGACATCAAG\n+GGACTTAAGGGGAGACCTACAGACCATTACGTCTCCTTGTCCTAATCTGTAAGGCCCATCAAGGAACCAT\n+TGAGAGACCATCAAGGCCACCGTCAAGGCACACACCCTAAGGATAGACCATAGGAGACCCATCAAGGCTG\n+GACATCAAGGGATTGACAAAGCGATCCCTGTGATCTATTACGTGCGCTACGCGGTCATTAAGGGGACTTT\n+AAAACCCTCACCTAAACAGGGATGAACAATTGTTGAAATAAGTGGTTGACACCCTCAAAGGAGTCTGTAG\n+AATGGCCACCACAACGAAGCGACACGCCAAGGCGGATCACTGAGTTGCCTTGAAGCTTAGTAGTTACGGT\n+CTGTGCGATCCTAAGGGTGCCAAACGGGCTTAACGTAACGAACACGGATAGGGTGAAACAAAACGCTTGA\n+CACTGAGTAACAACGCTGTAGAATACGCCACATCAGCTACACACCGCTCTTTAACAACTTGGATGCAACA\n+TGGTCTGCAAGGCTCCGAAAGGACACCTACAGGCATCGCTACGAAGGCAAGCACTGAGCCTTCTCATAGC\n+GCCTTTGAGAACCTGAACGGATGTTACGGGTAGGGTTAGCAAGGGCCTTATGGGAACCACTTAGGAGCCT\n+CACACAATGGCACGCTTAGACATGCACGTCACTGGTCTTGAATATGATCTTTCCCTTGAGGAAAACGCAC\n+AGGGCCAGCAAGGATTCACCGTGACATACGGTGCGCAAGTCAATCATTACGAACGGTTTGATGAAGCCTT\n+TATGGACTTCAATGAATCGCTTAGCCACGCTCTGGCCCTTCAGGGTCTGTAAATCCTCACTGATAAGGTA\n+CAATCGCATGAACACGTCACAGAACCGCTTCATTCTGGCCCACGCAGCAAGCCTGCTGGTCACCGCTTAC\n+AAAGAGTGCATCGCTGAGTATCAGACGGTGCTGCCGCTTAACCTGAGCATCGGCCATGATGCACCTGATA\n+GCTACGCTGCGCTCCGTTCGCAAGCGGCTCAGGGTCAACTGAAGGTAAGCACGGCGCACAACGCCTCGTC\n+GATCTACGGGGCTTCGGGTAACCTCACGTTCCGCATCTTTCACGACTACGGTCACCTGCTGTATGACGCT\n+GAGTTCACCACTGAGCAAGAGGTTAGCCTTGCGCTGACTCAATGGCGTGACCTGATCCGCTACATCCCTC\n+AGGAATGGCAAGGCATCTGCTATGTGGTCTACCGCGCCGACACCGTGGCACAAAGCGAGTATGAAGCGAT\n+CCATAAGGACTTCCCTGTGGATCAAAAGGCATTCGTTCTGGACATCCTGAACAAGCACTTCGAGGCTGAG\n+CCACGATGAGCATGAACACGAATGAAACACTGGACGCCGTGCTGATTACCCGTGAAGAACGCAAGGCGCT\n+GGCCGCTCTGCTGTACTCAGGGTTGACCGGTAACGCAGTCGATAAGCTGGGCCTTCGGGCCTTGCAGGAA\n+AAGCTTTCATCGGCCTTTAAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTG\n+ACCACGGGATAGCGGAATGGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTG\n+ACTGTCCGGGGTCTCTTCAAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTG\n+TCGATCACTTCGCTTGCCGCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGC\n+AAAGCGCACCACAGCTCAGACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACC\n+CTGCTGTTGCTGAAGCGTGCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGA\n+AGCGTCATAAAGACTTCTGGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGG\n+CCACGCTGTAACCGCTGTGGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATC\n+GAACTGATGGCATCGCAAGGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCC\n+TAATAAGTACAAGGGTGACGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAG\n+GGTCGCATGGTCCCGAACTTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCAT\n+TCAAGGGGAACGCTATGCTGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGC\n+GGCGCTGCTGTTCGGGATGACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTG\n+GATGCGTCCAGTAACCTCAACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCG\n+TTAAATCCACTCACTGCTAATGC\n+\n'
b
diff -r 000000000000 -r 34fb34df4473 test-data/NC_015264.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NC_015264.gb Sun Jul 08 11:58:28 2018 -0400
b
b'@@ -0,0 +1,1688 @@\n+LOCUS       NC_015264              40973 bp    DNA     linear   PHG 18-APR-2011\n+DEFINITION  Pseudomonas phage phiIBB-PF7A, complete genome.\n+ACCESSION   NC_015264\n+VERSION     NC_015264.1\n+DBLINK      BioProject: PRJNA64561\n+KEYWORDS    RefSeq.\n+SOURCE      Pseudomonas phage phiIBB-PF7A\n+  ORGANISM  Pseudomonas phage phiIBB-PF7A\n+            Viruses; dsDNA viruses, no RNA stage; Caudovirales; Podoviridae;\n+            Autographivirinae; T7virus.\n+REFERENCE   1  (bases 1 to 40973)\n+  AUTHORS   Sillankorva,S., Kluskens,L.D., Lingohr,E.J., Kropinski,A.M.,\n+            Neubauer,P. and Azeredo,J.\n+  TITLE     Complete genome sequence of the lytic Pseudomonas fluorescens phage\n+            phiIBB-PF7A\n+  JOURNAL   Virol. J. 8 (1), 142 (2011)\n+   PUBMED   21439081\n+  REMARK    Publication Status: Online-Only\n+REFERENCE   2  (bases 1 to 40973)\n+  CONSRTM   NCBI Genome Project\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (21-MAR-2011) National Center for Biotechnology\n+            Information, NIH, Bethesda, MD 20894, USA\n+REFERENCE   3  (bases 1 to 40973)\n+  AUTHORS   Sillankorva,S., Kluskens,L.H., Lingohr,E.J., Kropinski,A.M.,\n+            Neubauer,P. and Azeredo,J.\n+  TITLE     Direct Submission\n+  JOURNAL   Submitted (28-JAN-2010) Biological Engineering, University of\n+            Minho, Campus de Gualtar, Braga 4710-057, Portugal\n+COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final\n+            NCBI review. The reference sequence is identical to GU583987.\n+            COMPLETENESS: full length.\n+FEATURES             Location/Qualifiers\n+     source          1..40973\n+                     /organism="Pseudomonas phage phiIBB-PF7A"\n+                     /mol_type="genomic DNA"\n+                     /host="Pseudomonas fluorescens"\n+                     /db_xref="taxon:942165"\n+     repeat_region   1..985\n+                     /note="redundant terminal repeat"\n+                     /rpt_type=direct\n+     gene            252..602\n+                     /locus_tag="phiIBB-PF7Ap00"\n+                     /db_xref="GeneID:10323835"\n+     CDS             252..602\n+                     /locus_tag="phiIBB-PF7Ap00"\n+                     /note="orf1"\n+                     /codon_start=1\n+                     /transl_table=11\n+                     /product="hypothetical protein"\n+                     /protein_id="YP_004306315.1"\n+                     /db_xref="GeneID:10323835"\n+                     /translation="MTNAKRTTAQTIKHKGRKYVVIHRSVTLLLLKRAGKPHHFTLEG\n+                     GKEGTLKRHKDFWAALQHYSDRQDALGIKGHAVTAVVGTEDGPVVTEGELAAIELMAS\n+                     QGATTGRVSAKEPN"\n+     gene            602..928\n+                     /locus_tag="phiIBB-PF7Ap01"\n+                     /db_xref="GeneID:10323784"\n+     CDS             602..928\n+                     /locus_tag="phiIBB-PF7Ap01"\n+                     /note="orf2"\n+                     /codon_start=1\n+                     /transl_table=11\n+                     /product="hypothetical protein"\n+                     /protein_id="YP_004306316.1"\n+                     /db_xref="GeneID:10323784"\n+                     /translation="MPKPNKYKGDGSKKPEGTVEGAYVMHKGRMVPNFQATDSAIERG\n+                     INAYKAFKGNAMLKCVTRIMLYIMVAAMCAALLFGMTGCQVNVVNVIHSDIGLDASSN\n+                     LNALTE"\n+     gene            996..1511\n+                     /locus_tag="phiIBB-PF7Ap02"\n+                     /db_xref="GeneID:10323785"\n+     CDS             996..1511\n+                     /locus_tag="phiIBB-PF7Ap02"\n+                     /note="orf3"\n+                     /codon_start=1\n+                     /transl_table=11\n+                     /product="hypothetical protein"\n+                     /protein_id="YP_004306317.1"\n+                     /db_xref="GeneID:10323785"\n+                     /translation="MIALNYTSFTSREVAAKILAAMQEVRATGNAVRVLNRRGKAFLL\n+                     VTIHKDALGYAFKFIAEDGTEVGQMIQRASNDWDNPTFTAYWSILSWAWDLKEHPLLS\n+                     LSKLAAQAEAMKGQ'..b' 37861 cattgatggt gattattgct gataagcatc acctccacat aggagcatac agatgaccaa\n+    37921 gaaagctacc gcaacctttg tggcagtgct ggtcagtctg gcgaaacacc gtgcgaccta\n+    37981 tcgcttcctc gctgttcttc tcgttgccct tggtatctcc aatggcgaag cgattatgtc\n+    38041 tgggatcgag accgtagctt gtgcgtacct tggctgcatc ggctgacgcc caacgaggct\n+    38101 cattacgggt ctgctaaagc ataccctcca tgagcattga cttacagtac attcttcaag\n+    38161 ggacctttca atgggtcatc cagtaccctt tagggtgcag cacatcaagg cccttcaagg\n+    38221 gtcccttcaa gggtcccttc aagaatttac cataaatttt ccttacagga cctctcatag\n+    38281 actaacccac cgagtacccc cgtggcccct ctcgtcaacc ctaacggcct ctggggcacc\n+    38341 cctaagggga tcattacggt gctgggcacc tacagacaat catcaaggga taccggtagg\n+    38401 cggtccctct atgggctacc tcatgtccta ccttggggca catcatgaac cctttaaggg\n+    38461 gagctgtcaa ccctcaaggg atcattaagg ggacatcaag ggacttaagg ggagacctac\n+    38521 agaccattac gtctccttgt cctaatctgt aaggcccatc aaggaaccat tgagagacca\n+    38581 tcaaggccac cgtcaaggca cacaccctaa ggatagacca taggagaccc atcaaggctg\n+    38641 gacatcaagg gattgacaaa gcgatccctg tgatctatta cgtgcgctac gcggtcatta\n+    38701 aggggacttt aaaaccctca cctaaacagg gatgaacaat tgttgaaata agtggttgac\n+    38761 accctcaaag gagtctgtag aatggccacc acaacgaagc gacacgccaa ggcggatcac\n+    38821 tgagttgcct tgaagcttag tagttacggt ctgtgcgatc ctaagggtgc caaacgggct\n+    38881 taacgtaacg aacacggata gggtgaaaca aaacgcttga cactgagtaa caacgctgta\n+    38941 gaatacgcca catcagctac acaccgctct ttaacaactt ggatgcaaca tggtctgcaa\n+    39001 ggctccgaaa ggacacctac aggcatcgct acgaaggcaa gcactgagcc ttctcatagc\n+    39061 gcctttgaga acctgaacgg atgttacggg tagggttagc aagggcctta tgggaaccac\n+    39121 ttaggagcct cacacaatgg cacgcttaga catgcacgtc actggtcttg aatatgatct\n+    39181 ttcccttgag gaaaacgcac agggccagca aggattcacc gtgacatacg gtgcgcaagt\n+    39241 caatcattac gaacggtttg atgaagcctt tatggacttc aatgaatcgc ttagccacgc\n+    39301 tctggccctt cagggtctgt aaatcctcac tgataaggta caatcgcatg aacacgtcac\n+    39361 agaaccgctt cattctggcc cacgcagcaa gcctgctggt caccgcttac aaagagtgca\n+    39421 tcgctgagta tcagacggtg ctgccgctta acctgagcat cggccatgat gcacctgata\n+    39481 gctacgctgc gctccgttcg caagcggctc agggtcaact gaaggtaagc acggcgcaca\n+    39541 acgcctcgtc gatctacggg gcttcgggta acctcacgtt ccgcatcttt cacgactacg\n+    39601 gtcacctgct gtatgacgct gagttcacca ctgagcaaga ggttagcctt gcgctgactc\n+    39661 aatggcgtga cctgatccgc tacatccctc aggaatggca aggcatctgc tatgtggtct\n+    39721 accgcgccga caccgtggca caaagcgagt atgaagcgat ccataaggac ttccctgtgg\n+    39781 atcaaaaggc attcgttctg gacatcctga acaagcactt cgaggctgag ccacgatgag\n+    39841 catgaacacg aatgaaacac tggacgccgt gctgattacc cgtgaagaac gcaaggcgct\n+    39901 ggccgctctg ctgtactcag ggttgaccgg taacgcagtc gataagctgg gccttcgggc\n+    39961 cttgcaggaa aagctttcat cggcctttaa gggttactgg gacacgttca atccgctcga\n+    40021 caagcaccca acaatggctg accacgggat agcggaatgg gtcagcccgg attcaacgca\n+    40081 gaagacacgg aacagcatcc aatgaacgtg actgtccggg gtctcttcaa gctctgcaat\n+    40141 agctgcaacc aacggggcca agtggctcaa gtctggcttg tcgatcactt cgcttgccgc\n+    40201 tgcatcttcc taccttacga ccatcaataa ggagtcacca tgaccaacgc aaagcgcacc\n+    40261 acagctcaga ccatcaagca caagggtcgc aagtacgtcg taattcaccg ctcggttacc\n+    40321 ctgctgttgc tgaagcgtgc cggtaagcct caccacttca cccttgaagg cggcaaagag\n+    40381 ggcaccctga agcgtcataa agacttctgg gcggcactgc aacactacag cgaccgtcag\n+    40441 gacgcactgg gcatcaaggg ccacgctgta accgctgtgg ttggcaccga ggacggccct\n+    40501 gttgtcactg aaggcgagct ggccgcaatc gaactgatgg catcgcaagg ggccacaacg\n+    40561 ggccgtgtta gcgccaagga accgaactaa tgccaaagcc taataagtac aagggtgacg\n+    40621 gctccaagaa gcctgaaggg accgtagagg gcgcctacgt catgcacaag ggtcgcatgg\n+    40681 tcccgaactt tcaagccacc gactcggcta tcgagcgggg catcaacgcc tacaaggcat\n+    40741 tcaaggggaa cgctatgctg aagtgtgtga ctcgcatcat gctgtacatc atggtcgctg\n+    40801 caatgtgtgc ggcgctgctg ttcgggatga ctggctgtca agtcaacgtc gtgaacgtga\n+    40861 ttcacagtga catcgggctg gatgcgtcca gtaacctcaa cgctttgacc gagtagcgtt\n+    40921 aatcggtggc cttctcttga gggtcacccg ttaaatccac tcactgctaa tgc\n+//\n+\n'
b
diff -r 000000000000 -r 34fb34df4473 test-data/output.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fasta Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,60 @@
+>NC_015264:12 phage (238..260) score=0.606
+AATAAGGAGTCACCATGACCAAC
+>NC_015264:150 phage (2996..3018) score=0.931
+TAAAAACCCTCACCAGAACAGGG
+>NC_015264:154 host (3062..3088) score=0.729
+TAAGATTACTCTTATAGTAATTATCAT
+>NC_015264:216 host (4313..4342) score=0.653
+TGAAATGAACGTGGAGACGGCTCGTAAGTT
+>NC_015264:242 phage (4835..4855) score=0.516
+ACTGGTCTCCGACGGTGTTAA
+>NC_015264:244 host (4876..4907) score=0.519
+TTGATAACGGCACAGATGATTCCTCGGAGACT
+>NC_015264:300 phage (6000..6022) score=0.981
+TAAAACCCCTCACCAAAACAGGG
+>NC_015264:412 phage (8257..8279) score=0.985
+TAAAAACCCTCACCAAAACAGGG
+>NC_015264:473 phage (9458..9480) score=0.966
+TAAAAACCCTCACCAGAACAGGG
+>NC_015264:532 phage (10632..10654) score=0.657
+CCAAACCGATCCCTAAAGGGGTC
+>NC_015264:557 phage (11141..11163) score=0.948
+TAAAAACCCTCACCAGAACAGGG
+>NC_015264:586 phage (11710..11730) score=0.557
+CTGACTTGCCGATACCCTGAA
+>NC_015264:698 phage (13956..13978) score=0.677
+CAAATACCCTCACCTAAACAGCT
+>NC_015264:826 host (16512..16542) score=0.748
+TTGACATCGAAGAGGTCTTCGTTGAGACAGT
+>NC_015264:890 phage (17793..17815) score=0.99
+TAAAACCCCTCACCTAAACAGGG
+>NC_015264:892 phage (17842..17864) score=0.7
+ACTTAAAGATCACTCTAAGGGAG
+>NC_015264:893 host (17869..17896) score=0.777
+TATGCTTAAAGAGATCCAGCACTATCTG
+>NC_015264:979 host (19585..19615) score=0.529
+TTGAGAACCTGCACGAAGCCATGATTAAGTT
+>NC_015264:1019 phage (20361..20383) score=0.993
+TAAAAACCCTCACCTAAACAGGG
+>NC_015264:1068 phage (21354..21376) score=0.99
+TAAAAACCCTCACCTAAACAGGG
+>NC_015264:1303 phage (26071..26093) score=0.953
+TAAAAACACTCACCACAACAGGG
+>NC_015264:1616 phage (32321..32341) score=0.665
+GCCGCTCACCAAGTTCCTTAC
+>NC_015264:1667 phage (33327..33349) score=0.991
+TAAAACCCCTCACCTAAACAGGG
+>NC_015264:1886 phage (37729..37751) score=0.991
+TAAAAACCCTCACCTAAAGAGGG
+>NC_015264:1932 host (38653..38681) score=0.875
+TTGACAAAGCGATCCCTGTGATCTATTAC
+>NC_015264:1936 phage (38709..38731) score=0.938
+TTAAAACCCTCACCTAAACAGGG
+>NC_015264:1937 host (38756..38783) score=1.0
+TTGACACCCTCAAAGGAGTCTGTAGAAT
+>NC_015264:1945 host (38917..38944) score=0.997
+TTGACACTGAGTAACAACGCTGTAGAAT
+>NC_015264:1958 host (39147..39178) score=0.697
+TAGACATGCACGTCACTGGTCTTGAATATGAT
+>NC_015264:2011 phage (40226..40248) score=0.63
+AATAAGGAGTCACCATGACCAAC
b
diff -r 000000000000 -r 34fb34df4473 test-data/output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.html Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,192 @@
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th>positions</th>
+      <th>promoter_seq</th>
+      <th>promoter_type</th>
+      <th>scores</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>(238..260)</td>
+      <td>AATAAGGAGTCACCATGACCAAC</td>
+      <td>phage</td>
+      <td>0.606</td>
+    </tr>
+    <tr>
+      <td>(2996..3018)</td>
+      <td>TAAAAACCCTCACCAGAACAGGG</td>
+      <td>phage</td>
+      <td>0.931</td>
+    </tr>
+    <tr>
+      <td>(3062..3088)</td>
+      <td>TAAGATTACTCTTATAGTAATTATCAT</td>
+      <td>host</td>
+      <td>0.729</td>
+    </tr>
+    <tr>
+      <td>(4313..4342)</td>
+      <td>TGAAATGAACGTGGAGACGGCTCGTAAGTT</td>
+      <td>host</td>
+      <td>0.653</td>
+    </tr>
+    <tr>
+      <td>(4835..4855)</td>
+      <td>ACTGGTCTCCGACGGTGTTAA</td>
+      <td>phage</td>
+      <td>0.516</td>
+    </tr>
+    <tr>
+      <td>(4876..4907)</td>
+      <td>TTGATAACGGCACAGATGATTCCTCGGAGACT</td>
+      <td>host</td>
+      <td>0.519</td>
+    </tr>
+    <tr>
+      <td>(6000..6022)</td>
+      <td>TAAAACCCCTCACCAAAACAGGG</td>
+      <td>phage</td>
+      <td>0.981</td>
+    </tr>
+    <tr>
+      <td>(8257..8279)</td>
+      <td>TAAAAACCCTCACCAAAACAGGG</td>
+      <td>phage</td>
+      <td>0.985</td>
+    </tr>
+    <tr>
+      <td>(9458..9480)</td>
+      <td>TAAAAACCCTCACCAGAACAGGG</td>
+      <td>phage</td>
+      <td>0.966</td>
+    </tr>
+    <tr>
+      <td>(10632..10654)</td>
+      <td>CCAAACCGATCCCTAAAGGGGTC</td>
+      <td>phage</td>
+      <td>0.657</td>
+    </tr>
+    <tr>
+      <td>(11141..11163)</td>
+      <td>TAAAAACCCTCACCAGAACAGGG</td>
+      <td>phage</td>
+      <td>0.948</td>
+    </tr>
+    <tr>
+      <td>(11710..11730)</td>
+      <td>CTGACTTGCCGATACCCTGAA</td>
+      <td>phage</td>
+      <td>0.557</td>
+    </tr>
+    <tr>
+      <td>(13956..13978)</td>
+      <td>CAAATACCCTCACCTAAACAGCT</td>
+      <td>phage</td>
+      <td>0.677</td>
+    </tr>
+    <tr>
+      <td>(16512..16542)</td>
+      <td>TTGACATCGAAGAGGTCTTCGTTGAGACAGT</td>
+      <td>host</td>
+      <td>0.748</td>
+    </tr>
+    <tr>
+      <td>(17793..17815)</td>
+      <td>TAAAACCCCTCACCTAAACAGGG</td>
+      <td>phage</td>
+      <td>0.990</td>
+    </tr>
+    <tr>
+      <td>(17842..17864)</td>
+      <td>ACTTAAAGATCACTCTAAGGGAG</td>
+      <td>phage</td>
+      <td>0.700</td>
+    </tr>
+    <tr>
+      <td>(17869..17896)</td>
+      <td>TATGCTTAAAGAGATCCAGCACTATCTG</td>
+      <td>host</td>
+      <td>0.777</td>
+    </tr>
+    <tr>
+      <td>(19585..19615)</td>
+      <td>TTGAGAACCTGCACGAAGCCATGATTAAGTT</td>
+      <td>host</td>
+      <td>0.529</td>
+    </tr>
+    <tr>
+      <td>(20361..20383)</td>
+      <td>TAAAAACCCTCACCTAAACAGGG</td>
+      <td>phage</td>
+      <td>0.993</td>
+    </tr>
+    <tr>
+      <td>(21354..21376)</td>
+      <td>TAAAAACCCTCACCTAAACAGGG</td>
+      <td>phage</td>
+      <td>0.990</td>
+    </tr>
+    <tr>
+      <td>(26071..26093)</td>
+      <td>TAAAAACACTCACCACAACAGGG</td>
+      <td>phage</td>
+      <td>0.953</td>
+    </tr>
+    <tr>
+      <td>(32321..32341)</td>
+      <td>GCCGCTCACCAAGTTCCTTAC</td>
+      <td>phage</td>
+      <td>0.665</td>
+    </tr>
+    <tr>
+      <td>(33327..33349)</td>
+      <td>TAAAACCCCTCACCTAAACAGGG</td>
+      <td>phage</td>
+      <td>0.991</td>
+    </tr>
+    <tr>
+      <td>(37729..37751)</td>
+      <td>TAAAAACCCTCACCTAAAGAGGG</td>
+      <td>phage</td>
+      <td>0.991</td>
+    </tr>
+    <tr>
+      <td>(38653..38681)</td>
+      <td>TTGACAAAGCGATCCCTGTGATCTATTAC</td>
+      <td>host</td>
+      <td>0.875</td>
+    </tr>
+    <tr>
+      <td>(38709..38731)</td>
+      <td>TTAAAACCCTCACCTAAACAGGG</td>
+      <td>phage</td>
+      <td>0.938</td>
+    </tr>
+    <tr>
+      <td>(38756..38783)</td>
+      <td>TTGACACCCTCAAAGGAGTCTGTAGAAT</td>
+      <td>host</td>
+      <td>1.000</td>
+    </tr>
+    <tr>
+      <td>(38917..38944)</td>
+      <td>TTGACACTGAGTAACAACGCTGTAGAAT</td>
+      <td>host</td>
+      <td>0.997</td>
+    </tr>
+    <tr>
+      <td>(39147..39178)</td>
+      <td>TAGACATGCACGTCACTGGTCTTGAATATGAT</td>
+      <td>host</td>
+      <td>0.697</td>
+    </tr>
+    <tr>
+      <td>(40226..40248)</td>
+      <td>AATAAGGAGTCACCATGACCAAC</td>
+      <td>phage</td>
+      <td>0.630</td>
+    </tr>
+  </tbody>
+</table>
\ No newline at end of file
b
diff -r 000000000000 -r 34fb34df4473 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Sun Jul 08 11:58:28 2018 -0400
b
@@ -0,0 +1,6 @@
+<tool_dependency>
+     <package name="biopython"></package>
+    <package name="numpy" ></package>
+    <package name="pandas"></package>
+    <package name="scikit-learn"></package>
+</tool_dependency>