Next changeset 1:9a357864ec5c (2018-07-09) |
Commit message:
planemo upload for repository https://github.com/martaS95/PhagePromoters commit 7f30028d32b56eeeaacc6a874f8f1f2e312b9b49-dirty |
added:
README.rst __pycache__/auxiliar.cpython-35.pyc __pycache__/auxiliar.cpython-36.pyc auxiliar.py auxiliar.pyc model_SVM_2400.sav phage_promoters.py phage_promoters.xml pssm10_6.txt pssm10_8.txt pssm35_6.txt pssm35_9.txt pssm35_cbb.txt pssm35_lb.txt pssm35_mu.txt pssm35_t4.txt pssm_21.txt pssm_23.txt pssm_27.txt pssm_32.txt scaler_2400.sav test-data/NC_015264.fasta test-data/NC_015264.gb test-data/output.fasta test-data/output.html tool_dependencies.xml |
b |
diff -r 000000000000 -r 34fb34df4473 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,29 @@ +=============== +PhagePromoters +=============== + +Get promoters of phage genomes + +PhagePromoters is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 25 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. + +**Inputs:** + +* genome format: fasta vs genbank; +* genome file: acepts both genbank and fasta formats; +* both strands (yes or no): allows the search in both DNA strands; +* threshold: represents the probability of the test sequence be a promoter (float between 0 and 1)" +* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae; +* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results. +* phage type: The type of the phage, according to its lifecycle: virulent or temperate; + +**Outputs:** +This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters. + +**Requirements:** + +* Biopython +* Sklearn +* Numpy +* Pandas + + |
b |
diff -r 000000000000 -r 34fb34df4473 __pycache__/auxiliar.cpython-35.pyc |
b |
Binary file __pycache__/auxiliar.cpython-35.pyc has changed |
b |
diff -r 000000000000 -r 34fb34df4473 __pycache__/auxiliar.cpython-36.pyc |
b |
Binary file __pycache__/auxiliar.cpython-36.pyc has changed |
b |
diff -r 000000000000 -r 34fb34df4473 auxiliar.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/auxiliar.py Sun Jul 08 11:58:28 2018 -0400 |
[ |
@@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" +Created on Sun May 27 17:37:09 2018 + +@author: Marta +""" + + + +def get_bacteria(file): + import pandas as pd + df = pd.read_excel(file,header=0,index_col=0) + bacteria = {} + for ind,row in df.iterrows(): + bac = row['Bacteria'] + bacteria[ind] = bac + return bacteria + +#retorna a familia do fago +def get_families(file): + import pandas as pd + df = pd.read_excel(file,header=0,index_col=0) + families = {} + for ind,row in df.iterrows(): + fam = row['Family'] + families[ind] = fam + return families + +def get_lifecycle(file): + import pandas as pd + df = pd.read_excel(file,header=0,index_col=0) + types = {} + for ind,row in df.iterrows(): + lc = row['lifecycle'] + types[ind] = lc + return types + +#dá os scores e as posições do motif numa sequencia, ao ler o ficheiro com a pssm + +def get_max_pssm(file_pssm): + from Bio.Alphabet import IUPAC + from Bio.motifs import matrix + m = [] + fic = open(file_pssm,'r') + rf = fic.readline() + while rf: + new_l = [] + l = rf.strip().split('\t') + for val in l: + x = float(val) + new_l.append(x) + m.append(new_l) + rf = fic.readline() + a = IUPAC.unambiguous_dna + dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]} + pssm = matrix.PositionSpecificScoringMatrix(a,dic) + return pssm.max + + +def get_scores(file_pssm, seq): + from Bio.Alphabet import IUPAC + from Bio.motifs import matrix + maxi = get_max_pssm(file_pssm) + m = [] + fic = open(file_pssm,'r') + rf = fic.readline() + while rf: + new_l = [] + l = rf.strip().split('\t') + for val in l: + x = float(val) + new_l.append(x) + m.append(new_l) + rf = fic.readline() + a = IUPAC.unambiguous_dna + dic = {'A':m[0],'C':m[1], 'G':m[2], 'T':m[3]} + pssm = matrix.PositionSpecificScoringMatrix(a,dic) + scores = [] + positions = [] + a = IUPAC.unambiguous_dna + seq.alphabet = a + for pos, score in pssm.search(seq, both=False,threshold=-50): + scores.append(score/maxi) + positions.append(pos) + return scores,positions + +def get_genes(fic_name): + from Bio import SeqIO + numbers = [] + fic = open(fic_name,'r') + rf = fic.readline() + while rf: + phage = rf.strip() + numbers.append(phage) + rf = fic.readline() + fic.close() + dic = {} + for number in numbers: + rec = SeqIO.read('genomas/'+number+'.gb','gb') + comp = [] + dire = [] + for feat in rec.features: + if feat.type == 'gene': + loc = feat.location + if loc.strand == 1: dire.append(loc) + else: comp.append(loc) + dic[number] = {'comp':comp, 'dir':dire} + return dic + +def freq_base(seq): + A = seq.count('A') + C = seq.count('C') + G = seq.count('G') + T = seq.count('T') + AT = A+T + CG = C+G + return AT,CG + +def free_energy(seq): + dic1 = {'AA':-1.00, + 'TT':-1.00, + 'AT':-0.88, + 'TA':-0.58, + 'CA':-1.45, + 'AC':-1.44, + 'GG':-1.84, + 'CC':-1.84, + 'GA':-1.30, + 'AG':-1.28, + 'TC':-1.30, + 'CT':-1.28, + 'TG':-1.45, + 'GT':-1.44, + 'GC':-2.24, + 'CG':-2.17} + total = 0 + i = 0 + j = 1 + while i < len(seq)-1: + dint = seq[i]+seq[j] + total += dic1[dint] + i += 1 + j += 1 + return total |
b |
diff -r 000000000000 -r 34fb34df4473 auxiliar.pyc |
b |
Binary file auxiliar.pyc has changed |
b |
diff -r 000000000000 -r 34fb34df4473 model_SVM_2400.sav |
b |
Binary file model_SVM_2400.sav has changed |
b |
diff -r 000000000000 -r 34fb34df4473 phage_promoters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phage_promoters.py Sun Jul 08 11:58:28 2018 -0400 |
[ |
b'@@ -0,0 +1,533 @@\n+# -*- coding: utf-8 -*-\r\n+"""\r\n+Created on Mon Jun 11 21:08:47 2018\r\n+\r\n+@author: Marta\r\n+"""\r\n+\r\n+from Bio import SeqIO\r\n+import numpy as np\r\n+import pandas as pd\r\n+from Bio.Seq import Seq\r\n+from Bio.SeqRecord import SeqRecord\r\n+from Bio.Alphabet import IUPAC\r\n+from auxiliar import get_max_pssm, get_scores, free_energy,freq_base\r\n+\r\n+def get_testseqs65(form,fic,both=False):\r\n+ ALL = []\r\n+ indexes = []\r\n+ a = 0\r\n+ rec = SeqIO.read(fic, form)\r\n+ genome = rec.seq\r\n+ i = 0\r\n+ j = 65\r\n+ while j < len(genome):\r\n+ s = genome[i:j]\r\n+ ALL.append([1,i,j,s])\r\n+ i += 20\r\n+ j += 20\r\n+ a += 1\r\n+ indexes.append(rec.name+":"+str(a))\r\n+ if both:\r\n+ i = 0\r\n+ j = 65\r\n+ while j < len(genome):\r\n+ s = genome[i:j].reverse_complement()\r\n+ ALL.append([-1,i,j,s])\r\n+ i += 20\r\n+ j += 20\r\n+ a += 1\r\n+ indexes.append(rec.name+":"+str(a))\r\n+ df = pd.DataFrame(ALL, index=indexes, columns=[\'strand\',\'iniprom\',\'endprom\',\'seq\'])\r\n+ return df\r\n+\r\n+\r\n+def get_dftest(loc, test):\r\n+ scores = []\r\n+ posis = []\r\n+ sizes = []\r\n+ dic = {}\r\n+ for ind,row in test.iterrows():\r\n+ _,window = ind.split(\':\')\r\n+ strand = row[\'strand\']\r\n+ ini = row[\'iniprom\']\r\n+ end = row[\'endprom\']\r\n+ seq = row[\'seq\']\r\n+ pos = [ini,end,strand]\r\n+ dic[window] = pos\r\n+ s = seq\r\n+ score10_6,pos10_6 = get_scores(os.path.join(loc,\'pssm10_6.txt\'), s)\r\n+ maxi10_6 = get_max_pssm(os.path.join(loc,\'pssm10_6.txt\'))\r\n+ score10_8,pos10_8 = get_scores(os.path.join(loc,\'pssm10_8.txt\'), s)\r\n+ maxi10_8 = get_max_pssm(os.path.join(loc,\'pssm10_8.txt\'))\r\n+ scores23,pos23 = get_scores(os.path.join(loc,\'pssm_23.txt\'), s)\r\n+ maxi23 = get_max_pssm(os.path.join(loc,\'pssm_23.txt\'))\r\n+ scores21,pos21 = get_scores(os.path.join(loc,\'pssm_21.txt\'), s)\r\n+ maxi21 = get_max_pssm(os.path.join(loc,\'pssm_21.txt\'))\r\n+ scores27,pos27 = get_scores(os.path.join(loc,\'pssm_27.txt\'), s)\r\n+ maxi27 = get_max_pssm(os.path.join(loc,\'pssm_27.txt\'))\r\n+ scores32,pos32 = get_scores(os.path.join(loc,\'pssm_32.txt\'), s)\r\n+ maxi32 = get_max_pssm(os.path.join(loc,\'pssm_32.txt\'))\r\n+ score23 = max(scores23)\r\n+ score21 = max(scores21)\r\n+ score27 = max(scores27)\r\n+ score32 = max(scores32)\r\n+ maxiphage = max(score23,score21,score27,score32)\r\n+ if maxiphage == score23: phage_max = score23*maxi23\r\n+ elif maxiphage == score21: phage_max = score21*maxi21\r\n+ elif maxiphage == score27: phage_max = score27*maxi27\r\n+ elif maxiphage == score32: phage_max = score32*maxi32\r\n+ score35_6,pos35_6 = get_scores(os.path.join(loc,\'pssm35_6.txt\'), s)\r\n+ maxi35_6 = get_max_pssm(os.path.join(loc,\'pssm35_6.txt\'))\r\n+ score35_9,pos35_9 = get_scores(os.path.join(loc,\'pssm35_9.txt\'), s)\r\n+ maxi35_9 = get_max_pssm(os.path.join(loc,\'pssm35_9.txt\'))\r\n+ score35_t4,pos35_t4 = get_scores(os.path.join(loc,\'pssm35_t4.txt\'), s)\r\n+ maxi35_t4 = get_max_pssm(os.path.join(loc,\'pssm35_t4.txt\'))\r\n+ score35_cbb,pos35_cbb = get_scores(os.path.join(loc,\'pssm35_cbb.txt\'), s)\r\n+ maxi35_cbb = get_max_pssm(os.path.join(loc,\'pssm35_cbb.txt\'))\r\n+ score35_lb,pos35_lb = get_scores(os.path.join(loc,\'pssm35_lb.txt\'),s)\r\n+ maxi35_lb = get_max_pssm(os.path.join(loc,\'pssm35_lb.txt\'))\r\n+ score35_mu, pos35_mu = get_scores(os.path.join(loc,\'pssm35_mu.txt\'),s)\r\n+ maxi35_mu = get_max_pssm(os.path.join(loc,\'pssm35_mu.txt\'))\r\n+ \r\n+ dists6 = []\r\n+ score6 = []\r\n+ for p in pos10_6:\r\n+ for a in range(14,22):\r\n+ d = p-a-6\r\n+ if d >= 0: \r\n+ s10 = score10_6[p]\r\n+ s35_6 = score35_6[d]\r\n+ score6.append([s35_6,s10])\r\n+ '..b' inij = int(groups[j][11:].split(\'..\')[0])\r\n+ else:\r\n+ inij = int(groups[j][1:].split(\'..\')[0])\r\n+ if inij < inii:\r\n+ temp = groups[i]\r\n+ groups[i] = groups[j]\r\n+ groups[j] = temp\r\n+ new_inds = []\r\n+ for g in groups:\r\n+ inds = new_df.groups[g]\r\n+ if len(inds) == 1: new_inds.append(inds[0])\r\n+ else:\r\n+ #maxi = max(g[\'scores\'])\r\n+ maxi = max(new_df.get_group(g)[\'scores\'])\r\n+ i = new_df.groups[g][new_df.get_group(g)[\'scores\']==maxi][0]\r\n+ new_inds.append(i)\r\n+ output = test.loc[new_inds,:]\r\n+ #output.to_excel(\'output.xlsx\', header=True, index=True)\r\n+ output.to_html(\'output.html\',index=False)\r\n+ recs = []\r\n+ for ind,row in output.iterrows():\r\n+ s = Seq(row[\'promoter_seq\'])\r\n+ posis = row[\'positions\']\r\n+ typ = row[\'promoter_type\']\r\n+ score = row[\'scores\']\r\n+ sq = SeqRecord(seq=s, id=ind, description=typ+\' \'+posis+\' score=\'+str(score))\r\n+ recs.append(sq)\r\n+ SeqIO.write(recs, \'output.fasta\',\'fasta\')\r\n+ \r\n+ \r\n+def get_predictions(scaler_file,model_file,test,df_testinfo,threshold):\r\n+ from sklearn.externals import joblib\r\n+ scaler = joblib.load(scaler_file)\r\n+ model = joblib.load(model_file)\r\n+ feat_scaled = pd.DataFrame(scaler.transform(test.iloc[:,:7]),index =test.index, columns=test.columns[:7])\r\n+ TEST_scaled = pd.concat([feat_scaled,test.iloc[:,7:]],axis=1)\r\n+ #pred = model.predict(TEST_scaled)\r\n+ scores = model.predict_proba(TEST_scaled)\r\n+ pos_scores = np.empty((TEST_scaled.shape[0],0), float)\r\n+ for x in scores: pos_scores = np.append(pos_scores,x[1])\r\n+ try: positive_indexes = np.nonzero(pos_scores>=float(threshold))[0] #escolher os positivos, podia ser escolher com score > x\r\n+ except ValueError: return \'The threshold value is not a float\'\r\n+ else:\r\n+ if len(positive_indexes) == 0: return None\r\n+ else:\r\n+ positive_windows = TEST_scaled.index[positive_indexes]\r\n+ INFO = df_testinfo.loc[positive_windows,[\'positions\',\'promoter_seq\']]\r\n+ promoter_type = []\r\n+ for x in df_testinfo.loc[positive_windows,\'host\'].tolist():\r\n+ if x == 0: promoter_type.append(\'phage\')\r\n+ else: promoter_type.append(\'host\')\r\n+ INFO[\'promoter_type\'] = promoter_type\r\n+ INFO[\'scores\'] = np.around(pos_scores[positive_indexes],decimals=3)\r\n+ INFO.index = positive_windows\r\n+ return INFO\r\n+\r\n+if __name__== "__main__":\r\n+ import sys\r\n+ import os\r\n+ __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))\r\n+ scaler_file = os.path.join(__location__, \'scaler_2400.sav\')\r\n+ model_file = os.path.join(__location__, \'model_SVM_2400.sav\')\r\n+ \r\n+ gen_format = sys.argv[1]\r\n+ genome_file = sys.argv[2]\r\n+ both = sys.argv[3]\r\n+ threshold = sys.argv[4]\r\n+ family = sys.argv[5]\r\n+ host = sys.argv[6]\r\n+ phage_type = sys.argv[7]\r\n+ \'\'\'\r\n+ \r\n+ gen_format = \'gb\'\r\n+ genome_file = \'test-data/NC_015264.gb\'\r\n+ genbank_fasta = \'genbank\'\r\n+ both = False\r\n+ threshold = \'0.50\'\r\n+ family = \'Podoviridae\'\r\n+ host = \'Pseudomonas\'\r\n+ phage_type = \'virulent\'\r\n+ \'\'\'\r\n+ test_windows = get_testseqs65(gen_format, genome_file,both)\r\n+ try: score_test,dic_window = get_dftest(__location__,test_windows)\r\n+ except IndexError: print(\'Error. Input sequence can only have A,C,G or T\')\r\n+ else:\r\n+ df_test,df_testinfo = create_dftest(score_test,dic_window,family,host,phage_type)\r\n+ preds = get_predictions(scaler_file, model_file, df_test,df_testinfo,threshold)\r\n+ if preds is None: print(\'There is no sequence with a score value higher or equal to the threshold \'+str(threshold))\r\n+ elif type(preds) == str: print(preds)\r\n+ else: output = get_finaldf(preds)\r\n+ \r\n' |
b |
diff -r 000000000000 -r 34fb34df4473 phage_promoters.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phage_promoters.xml Sun Jul 08 11:58:28 2018 -0400 |
[ |
@@ -0,0 +1,104 @@ +<tool id="get_proms" name="PhagePromoters" version="0.1.0"> + <description> +Get promoters of phage genomes + </description> + <requirements> + <requirement type="package">biopython</requirement> + <requirement type="package">scikit-learn</requirement> + <requirement type="package">numpy</requirement> + <requirement type="package">pandas</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="python3"><![CDATA[ + phage_promoters.py "$input_type.genome_format" "$genome" "$both" "$threshold" "$family" "$bacteria" "$lifecycle" + ]]> + </command> + <inputs> + <conditional name="input_type"> + <param type="select" name="genome_format" label='file format'> + <option value="genbank" selected="yes">genbank</option> + <option value="fasta">fasta</option> + </param> + <when value="genbank"> + <param type="data" name="genome" format="genbank" label='genome'/> + </when> + <when value="fasta"> + <param type="data" name="genome" format="fasta" label='genome'/> + </when> + </conditional> + <param type="boolean" name="both" label='Search both strands' checked="false" truevalue="-both" falsevalue="" /> + <param name="threshold" type="float" value="0.50" label="Threshold" help="Probabilty of being a promoter (float between 0 and 1)" /> + <param type="select" name="family" label='Phage family'> + <option value="Podoviridae" selected="yes">Podoviridae</option> + <option value="Siphoviridae">Siphoviridae</option> + <option value="Myoviridae">Myoviridae</option> + </param> + <param type="select" name="bacteria" label='Host bacteria Genus'> + <option value="Escherichia coli" selected="yes">Escherichia coli</option> + <option value="Salmonella">Salmonella</option> + <option value="Pseudomonas">Pseudomonas</option> + <option value="Yersinia">Yersinia</option> + <option value="Morganella">Morganella</option> + <option value="Cronobacter">Cronobacter</option> + <option value="Staphylococcus">Staphylococcus</option> + <option value="Streptococcus">Streptococcus</option> + <option value="Lactococcus">Lactococcus</option> + <option value="Streptomyces">Streptomyces</option> + <option value="Klebsiella">Klebsiella</option> + <option value="Bacillus">Bacillus</option> + <option value="Pectobacterium">Pectobacterium</option> + <option value="other">other</option> + </param> + <param type="select" name="lifecycle" label='Phage type'> + <option value="virulent" selected="yes">virulent</option> + <option value="temperate">temperate</option> + </param> + </inputs> + <outputs> + <data name="output1" format="html" from_work_dir="output.html" /> + <data name="output2" format="fasta" from_work_dir="output.fasta" /> + </outputs> + <tests> + <test> + <param name="genome_format" value="genbank"/> + <param name="genome" value="NC_015264.gb"/> + <param name="both" value="False"/> + <param name="threshold" value="0.50"/> + <param name="family" value="Podoviridae"/> + <param name="bacteria" value="Pseudomonas"/> + <param name="lifecycle" value="virulent"/> + <output name="output1" file="output.html"/> + <output name="output2" file="output.fasta"/> + </test> + </tests> + <help><![CDATA[ + +=============== +PhagePromoters +=============== + +Get promoters of phage genomes + +PhagePromoters is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 25 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. + +**Inputs:** + +* genome format: fasta vs genbank; +* genome file: acepts both genbank and fasta formats; +* both strands (yes or no): allows the search in both DNA strands; +* threshold: represents the probability of the test sequence be a promoter (float between 0 and 1)" +* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae; +* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results. +* phage type: The type of the phage, according to its lifecycle: virulent or temperate; + +**Outputs:** +This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters. + +**Requirements:** + +* Biopython +* Sklearn +* Numpy +* Pandas + + ]]></help> +</tool> |
b |
diff -r 000000000000 -r 34fb34df4473 pssm10_6.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm10_6.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-3.24 1.93 -0.34 1.38 1.43 -3.05 +-2.14 -4.24 -1.03 -1.44 -1.19 -4.05 +-2.29 -4.46 -1.12 -1.44 -1.53 -3.59 +1.79 -3.59 1.17 -0.61 -0.96 1.9 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm10_8.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm10_8.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +0.09 1.72 -4.49 1.92 1.95 1.95 -4.49 1.92 +-1.68 -1.32 -4.49 -4.49 -4.49 -4.49 -3.49 -4.49 +-0.79 -2.49 -4.49 -2.91 -4.49 -4.49 -2.49 -4.49 +1.03 -2.91 1.95 -4.49 -4.49 -4.49 1.88 -2.91 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_6.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_6.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-2.84 -2.12 -2.4 1.49 -0.74 1.16 +-2.95 -4.65 -2.56 -0.26 1.13 -1.56 +-4.33 -2.48 1.73 -3.33 -1.69 -1.65 +1.88 1.83 -1.65 -1.95 -0.14 0.15 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_9.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_9.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +0.93 0.79 -0.65 -1.87 -0.14 -1.46 -1.14 -0.14 1.13 +-1.46 -1.87 -1.46 -2.46 1.35 -3.46 -2.46 -0.14 -0.65 +-0.87 -1.87 -1.87 1.79 -1.87 -1.87 -3.46 -1.46 -1.87 +0.24 0.79 1.45 -3.46 -1.87 1.71 1.71 0.86 -0.14 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_cbb.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_cbb.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-2.46 -2.46 1.45 1.79 1.79 -2.46 0.13 +-2.46 -2.46 -2.46 -2.46 -2.46 1.54 -2.46 +-2.46 1.79 -0.14 -2.46 -2.46 -2.46 1.24 +1.79 -2.46 -2.46 -2.46 -2.46 -0.46 -1.46 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_lb.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_lb.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-0.81 -0.81 -0.81 -0.81 -0.81 0.19 -0.81 -0.81 0.78 0.19 -0.81 -0.81 -0.81 -0.81 +-0.81 -0.81 -0.81 1.19 -0.81 -0.81 -0.81 0.19 -0.81 0.19 -0.81 -0.81 -0.81 1.19 +-0.81 -0.81 1.19 -0.81 1.19 -0.81 0.78 -0.81 0.19 -0.81 -0.81 -0.81 1.19 -0.81 +1.19 1.19 -0.81 -0.81 -0.81 0.78 0.19 0.78 -0.81 0.19 1.19 1.19 -0.81 -0.81 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_mu.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_mu.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-1.17 -0.17 1.15 -1.17 1.15 1.15 -1.17 -0.17 0.42 -1.17 0.42 -1.17 0.83 -1.17 +0.83 0.83 -1.17 -0.17 -1.17 -1.17 0.83 1.15 0.83 1.42 -1.17 0.42 -1.17 0.83 +-0.17 -1.17 -0.17 -1.17 -1.17 -0.17 -1.17 -1.17 -1.17 -1.17 0.42 0.42 -0.17 -1.17 +-0.17 -0.17 -1.17 1.15 -0.17 -1.17 0.42 -1.17 -1.17 -1.17 -0.17 -0.17 -0.17 0.42 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm35_t4.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm35_t4.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-2.43 -3.43 -2.43 -2.43 1.86 -2.43 1.33 +-3.43 -3.43 -3.43 -3.43 -3.43 1.86 -3.43 +1.82 -3.43 -2.43 -3.43 -3.43 -3.43 -3.43 +-2.43 1.9 1.82 1.86 -2.43 -3.43 0.38 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm_21.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm_21.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.7 -2.0 -1.0 -1.0 -2.0 -2.0 -2.0 -2.0 0.81 0.58 +1.17 1.46 -2.0 -2.0 -1.0 -2.0 -1.0 -2.0 1.7 1.7 -0.42 -2.0 1.7 -2.0 0.81 1.7 1.7 -2.0 -2.0 0.81 0.81 +-0.42 -2.0 1.46 1.7 1.58 -2.0 -2.0 1.7 -2.0 -2.0 1.46 -2.0 -2.0 -1.0 -0.42 -2.0 -2.0 -2.0 -2.0 -2.0 -1.0 +-0.42 -1.0 -2.0 -2.0 -2.0 1.7 1.58 -2.0 -2.0 -2.0 -2.0 -2.0 -2.0 1.46 0.0 -2.0 -2.0 1.7 1.7 -2.0 -2.0 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm_23.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm_23.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +0.07 1.71 1.18 -0.18 1.65 0.6 -1.86 0.82 -2.86 -0.79 -5.18 1.88 -1.86 -4.18 1.69 0.97 1.26 -0.05 -0.79 0.07 1.11 0.28 1.25 +-0.32 -2.86 -2.86 -4.18 -2.86 0.49 0.6 0.52 1.68 -2.37 1.88 -1.86 1.84 -1.09 -2.86 -3.18 -1.18 -5.18 -0.54 -1.86 -1.86 -1.48 -1.09 +-3.59 -2.86 -3.59 -4.18 -3.18 -0.32 0.95 -0.86 -1.37 -1.59 -1.86 -5.18 -3.18 -5.18 -2.59 -3.59 -1.18 1.56 1.41 1.3 0.6 1.16 -0.18 +1.05 -1.18 0.6 1.59 -0.72 -1.86 -1.86 -2.01 -1.86 1.53 -5.18 -5.18 -5.18 1.79 -1.09 0.89 -0.48 -4.18 -3.59 -2.18 -4.18 -2.37 -1.86 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm_27.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm_27.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +-3.09 -2.09 1.67 -1.5 -1.09 -0.28 -3.09 -3.09 -2.09 -3.09 -3.09 -3.09 1.87 -3.09 -3.09 -3.09 -3.09 -3.09 -1.5 -3.09 -3.09 -2.09 -3.09 1.72 1.56 1.5 -1.5 +1.82 -3.09 -3.09 1.67 1.56 1.3 -1.09 -0.5 1.16 1.67 1.67 -1.09 -3.09 1.87 -3.09 1.87 1.67 -0.77 -3.09 0.82 1.16 -0.28 0.91 -2.09 -2.09 -0.5 0.08 +-3.09 1.82 -2.09 -2.09 -3.09 -0.77 0.08 1.16 0.5 -3.09 -0.77 -2.09 -3.09 -3.09 -2.09 -3.09 -1.09 1.67 1.77 1.0 0.61 -3.09 -3.09 -1.5 -3.09 -3.09 -0.5 +-2.09 -3.09 -1.09 -2.09 -1.09 -3.09 1.23 -0.09 -3.09 -0.77 -3.09 1.67 -3.09 -3.09 1.82 -3.09 -2.09 -3.09 -3.09 -3.09 -3.09 1.5 0.91 -3.09 -0.5 -1.5 0.91 |
b |
diff -r 000000000000 -r 34fb34df4473 pssm_32.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pssm_32.txt Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 0.78 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 1.65 +-1.81 -1.81 1.65 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.65 1.65 -1.81 -1.81 1.19 -1.81 -1.81 -1.81 -1.81 0.78 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 +-1.81 1.65 -1.81 -1.81 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 0.78 -1.81 -1.81 -1.81 -0.81 -1.81 -1.81 1.51 1.65 1.65 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 +-1.81 -1.81 -1.81 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81 -1.81 1.65 -1.81 -1.81 1.51 -1.81 0.19 -0.81 -1.81 -1.81 -1.81 0.78 -1.81 -1.81 1.65 -1.81 1.65 -1.81 1.65 -1.81 -1.81 -1.81 |
b |
diff -r 000000000000 -r 34fb34df4473 scaler_2400.sav |
b |
Binary file scaler_2400.sav has changed |
b |
diff -r 000000000000 -r 34fb34df4473 test-data/NC_015264.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NC_015264.fasta Sun Jul 08 11:58:28 2018 -0400 |
b |
b'@@ -0,0 +1,588 @@\n+>NC_015264.1 Pseudomonas phage phiIBB-PF7A, complete genome\n+AAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTGACCACGGGATAGCGGAAT\n+GGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTGACTGTCCGGGGTCTCTTC\n+AAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTGTCGATCACTTCGCTTGCC\n+GCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGCAAAGCGCACCACAGCTCA\n+GACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACCCTGCTGTTGCTGAAGCGT\n+GCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGAAGCGTCATAAAGACTTCT\n+GGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGGCCACGCTGTAACCGCTGT\n+GGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATCGAACTGATGGCATCGCAA\n+GGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCCTAATAAGTACAAGGGTGA\n+CGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAGGGTCGCATGGTCCCGAAC\n+TTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCATTCAAGGGGAACGCTATGC\n+TGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGCGGCGCTGCTGTTCGGGAT\n+GACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTGGATGCGTCCAGTAACCTC\n+AACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCGTTAAATCCACTCACTGCT\n+AATGCAGGTATCACCATGATCGCTCTAAACTACACCAGCTTCACCAGCCGCGAAGTCGCCGCTAAGATCC\n+TTGCAGCAATGCAGGAAGTCCGGGCCACAGGTAACGCTGTGCGTGTCCTCAATCGTCGCGGTAAGGCTTT\n+CCTGCTGGTCACAATCCACAAGGACGCCTTAGGGTACGCCTTCAAGTTCATCGCTGAGGATGGCACCGAG\n+GTCGGCCAGATGATCCAACGTGCGTCTAACGACTGGGATAACCCGACGTTTACCGCTTACTGGTCTATCC\n+TGAGCTGGGCATGGGACCTGAAGGAACACCCACTGCTGAGCCTGTCTAAGCTGGCCGCACAGGCCGAAGC\n+GATGAAAGGGCAAGGGGCAACCCACAAAGTGACCTGCTACGGCGGTACGGTGCAGTACGGGGCCTATCAG\n+CGTGACTGGCTGTGTCGTCGCCGGTTGTACCTGTTCGGTAAGGACGGAGTATGGCGCAAGGTTGACGATG\n+ACCAAGCCGCTCAAATCTGCTGGATCGAGGTGCTGAAATGATCTTGAGTGACCCAGCGGTAGCCATTTGC\n+TCAGGCGAGAAGTATGCACCGGGCCTGTACTGGGAGGGTCGCCAGATCCTTGACCAGATTGACCAGTACA\n+TGGACGCCTACGGGCCGATCCAACCGGGCGTTGACTATGCGCCAGATAGCGAATGGGCCAAGTTCTTCGC\n+CGATAACGTCATCATGTATGCACCGGGCGTGTATGCCATCCGCTACCTGAACCCGGACTACTGCAAACGT\n+CTGCTGGTCGAGCTGTCCGATGTCCAGTACACGGTCAATGAAGAGGAACCCGAAGACGCTCAGATCCCTG\n+AGGTCGTCCTTGAGGAACACCACTATGGCCTCTTCCAGTGCCTGAGAGGGCTTTTCGAGGGGTATGCAAG\n+GAAACTGGCGTACATCCTCATGGGTCTCGAAACGGGGCCTTGTGTGTCCATACAGGCCGCACGGTACACC\n+CCAATGAACACGCCCCACGGCTGCTGGCACACTGATCGGGACTCTGAGGTGACCCTTGTGGTCGCCCTGA\n+GCGATGACCATGAAGGCGGTGGCACTCAGGTATATCAAGGCCCGTTGCTGCCTCCTGTGACGGTCCCACA\n+GCTTCCCACAGGCTGGGCCATGCTCTTCGCTGGTCGCACAAATGAACACATGGGTCTGCCGGTCACCAAA\n+GGCACCCGTAATTTGCTCGTACACTGGTACGGATTGGAGAAATAAAATGTCCCGCTTCGCAAACGTAGGT\n+TCCCAAGCATGTAACGCACTGGCTGTCAACATGGTTCACGCCATGGATGCAGACTTCTCGTCTCTTGAGC\n+GCCGTGCAATGGGTCACACACTGGCCGAGATCACTGGCCGCAAGATCCGCAAGCCGGGTCTGTATGACAA\n+GCATGTCTCGGACGCCAAACAGGGTTCCATTGCAGCGACCTACGTGGCCCACTCTGAAGGCAAGGCCGCA\n+GTAATGACCATGGCCTACGGGATGCGTCCTCAGACCGACCTACAACACGCTCTGGACGCCCGGTATCGTC\n+AACCCGGCTTCGCTGGTGCGCAGTTCTTCACCGAGCGCGGCGACTTCACTCATCTGGCGGGGCGGGGCGT\n+ATGAACCTGCACAGCATCAAGTTCGCCGTAGAGGTCGAGGCACGGCTACTGAATCCAGACCACAGCCATG\n+CGGCTGACGTGAAGGGTCATAAAGTCCTCGACTGGAAACTCGGCTTGGGTCCTTGTGGCCGCTGGGTTGA\n+CCAGTTGACCGCTGAGATCGACATGGCAACGCTGAGAATCCGCCAAACGTCCTACAGCTCAGACCCCACA\n+AAGGAACTCGAAAGGGTCCAGCGTGGGCTGTACAAGGACGATGGGGCGCTTGAGCCGTTCGGTCCAAACA\n+AGCGGGATCGTATGGCCGCACTTGAGGCCCGCCAGCGACTCATCAAGGATCGCAAGATTGAGACGTTTAT\n+CTACAAGTTAGAAGACGTTCGCGGCAGGATCAAGGCAGTCGAGCGCTGAGGACGTTAAAAACCCTCACCA\n+GAACAGGGACCACTTAAGGGGCCTTTAACTTAAAGACCCTTTAAGATCCTTTAAGATTACTCTTATAGTA\n+ATTATCATTAAGTAAAATCATTAAGTAAGAGGGTGTACCGCAGTGGCATTCATTGAGAAACAGAAACACA\n+ACTTCGACGACGTGAAGACCTCATGGGCCTTTGATACCCTGAGCAAGCTTTACGGTGAAGACCTCGCCGC\n+TGCCCAACTGGCTCTCGAACATGAGTCGCACATGATGGGTGAAGAGCGGTTCCATAAGGCGCTTGATCGC\n+CAGATTGAACGCTCGGAATTCGCTGAAACATCCGTCGCAAAACCACTGGTCGCCATGCTGGTGCCGATGT\n+TCGTCAAGGAGTTCGCCGAATGGACTGAACACCAGATGACCAAGGTGCGCCGCAAGTCGGTGGCCCTGAA\n+GTACCTCAACATGGTTCGCCCTGAACGGGTCGCTGCGTTGACGATCAAGCTGGTCATTACGGAAATGGCT\n+CGCCAGCGTCAAGACCTCGTGTCGGTGACCACTCGAATCGGTCGGACCATCGAAGAGGAAGCCCGCTTTG\n+GTCGCATCCGTGACGAAGAAGCGAAGCACTTTCAGAAGCACATCAAGGACGCTCTCAACAAGCGCAACGG\n+CCACACCTACAAGAAAGCCTTCATGGAAGCTGTCGAGTCCAAGATGCTGGAAGCTGGTGAGCTGAACGGT\n+GCGTGGGCTGACTGGGCAAACTCTGACTCGGACGTAATGCACCACATCGGCGCTCGCTGCCTTGAAATCC\n+TGATTAAGTCCACAGCACTGGTCGAGATCGT'..b'ATGGAGTGGGG\n+CGGCTTCCGTGGTGGCTACGAGGACACAACCCTCGAAGCTCTGGCGAAGGTCGGCAAGAAGCACAAGGTA\n+AGCGAAGTGGTTATTGAAGGTAACTTCGGTGACGGTATGTACACCAAGCTGTTCAGCCCTGTGATGACCC\n+GTATCCACCGTTGCGCTATCACTGAGGTGAAGTCCAAGGGTCAGAAAGAAATGCGTATCTGCGACGTGCT\n+GGAGCCTGTACTGGGTTCTCACAAGCTGATCGTGCATGAGTCTGTCATTGAACAGGACTACCGTACAGCC\n+CTCAACGCTGACGGGACGACTGACGTTGTGTACTCCGGGTTCCACCAACTGACCCGACTGACCAAGGAGC\n+GTGGCTCTCTGGGCCATGATGACCGTCTGGATGCTCTCGCCATCGGCGTGCAGTTCTTCACGGACTCCAT\n+GGAGAAGGACAGTGAGCGTGGCGCTGAGGAAATGCTGGAAGGCTTCCTTGAGGATCACCTTGAGAATCAC\n+ATGGTCGGCTTCGAGCAGGCCCGTGAGATCAGCCTCGGGAATGGCGTAAACATCCAGTGGGACGACGATG\n+ACGGCACTGAAGGGAACTTCATGGGCTGGTAACGAAGTCTGCACGATAGCGACACGTTCGACGAAGATTA\n+AAAACCCTCACCTAAAGAGGGACGGTGGGGGTCCTATATGATAAACACAGAAGATACTTAAAGGTGACCT\n+CAGCACGTTCTGAGAACTTTGAGAATCATGTGCTATCTGAGGACCCTATGCCGTTCTACTCATTGATGGT\n+GATTATTGCTGATAAGCATCACCTCCACATAGGAGCATACAGATGACCAAGAAAGCTACCGCAACCTTTG\n+TGGCAGTGCTGGTCAGTCTGGCGAAACACCGTGCGACCTATCGCTTCCTCGCTGTTCTTCTCGTTGCCCT\n+TGGTATCTCCAATGGCGAAGCGATTATGTCTGGGATCGAGACCGTAGCTTGTGCGTACCTTGGCTGCATC\n+GGCTGACGCCCAACGAGGCTCATTACGGGTCTGCTAAAGCATACCCTCCATGAGCATTGACTTACAGTAC\n+ATTCTTCAAGGGACCTTTCAATGGGTCATCCAGTACCCTTTAGGGTGCAGCACATCAAGGCCCTTCAAGG\n+GTCCCTTCAAGGGTCCCTTCAAGAATTTACCATAAATTTTCCTTACAGGACCTCTCATAGACTAACCCAC\n+CGAGTACCCCCGTGGCCCCTCTCGTCAACCCTAACGGCCTCTGGGGCACCCCTAAGGGGATCATTACGGT\n+GCTGGGCACCTACAGACAATCATCAAGGGATACCGGTAGGCGGTCCCTCTATGGGCTACCTCATGTCCTA\n+CCTTGGGGCACATCATGAACCCTTTAAGGGGAGCTGTCAACCCTCAAGGGATCATTAAGGGGACATCAAG\n+GGACTTAAGGGGAGACCTACAGACCATTACGTCTCCTTGTCCTAATCTGTAAGGCCCATCAAGGAACCAT\n+TGAGAGACCATCAAGGCCACCGTCAAGGCACACACCCTAAGGATAGACCATAGGAGACCCATCAAGGCTG\n+GACATCAAGGGATTGACAAAGCGATCCCTGTGATCTATTACGTGCGCTACGCGGTCATTAAGGGGACTTT\n+AAAACCCTCACCTAAACAGGGATGAACAATTGTTGAAATAAGTGGTTGACACCCTCAAAGGAGTCTGTAG\n+AATGGCCACCACAACGAAGCGACACGCCAAGGCGGATCACTGAGTTGCCTTGAAGCTTAGTAGTTACGGT\n+CTGTGCGATCCTAAGGGTGCCAAACGGGCTTAACGTAACGAACACGGATAGGGTGAAACAAAACGCTTGA\n+CACTGAGTAACAACGCTGTAGAATACGCCACATCAGCTACACACCGCTCTTTAACAACTTGGATGCAACA\n+TGGTCTGCAAGGCTCCGAAAGGACACCTACAGGCATCGCTACGAAGGCAAGCACTGAGCCTTCTCATAGC\n+GCCTTTGAGAACCTGAACGGATGTTACGGGTAGGGTTAGCAAGGGCCTTATGGGAACCACTTAGGAGCCT\n+CACACAATGGCACGCTTAGACATGCACGTCACTGGTCTTGAATATGATCTTTCCCTTGAGGAAAACGCAC\n+AGGGCCAGCAAGGATTCACCGTGACATACGGTGCGCAAGTCAATCATTACGAACGGTTTGATGAAGCCTT\n+TATGGACTTCAATGAATCGCTTAGCCACGCTCTGGCCCTTCAGGGTCTGTAAATCCTCACTGATAAGGTA\n+CAATCGCATGAACACGTCACAGAACCGCTTCATTCTGGCCCACGCAGCAAGCCTGCTGGTCACCGCTTAC\n+AAAGAGTGCATCGCTGAGTATCAGACGGTGCTGCCGCTTAACCTGAGCATCGGCCATGATGCACCTGATA\n+GCTACGCTGCGCTCCGTTCGCAAGCGGCTCAGGGTCAACTGAAGGTAAGCACGGCGCACAACGCCTCGTC\n+GATCTACGGGGCTTCGGGTAACCTCACGTTCCGCATCTTTCACGACTACGGTCACCTGCTGTATGACGCT\n+GAGTTCACCACTGAGCAAGAGGTTAGCCTTGCGCTGACTCAATGGCGTGACCTGATCCGCTACATCCCTC\n+AGGAATGGCAAGGCATCTGCTATGTGGTCTACCGCGCCGACACCGTGGCACAAAGCGAGTATGAAGCGAT\n+CCATAAGGACTTCCCTGTGGATCAAAAGGCATTCGTTCTGGACATCCTGAACAAGCACTTCGAGGCTGAG\n+CCACGATGAGCATGAACACGAATGAAACACTGGACGCCGTGCTGATTACCCGTGAAGAACGCAAGGCGCT\n+GGCCGCTCTGCTGTACTCAGGGTTGACCGGTAACGCAGTCGATAAGCTGGGCCTTCGGGCCTTGCAGGAA\n+AAGCTTTCATCGGCCTTTAAGGGTTACTGGGACACGTTCAATCCGCTCGACAAGCACCCAACAATGGCTG\n+ACCACGGGATAGCGGAATGGGTCAGCCCGGATTCAACGCAGAAGACACGGAACAGCATCCAATGAACGTG\n+ACTGTCCGGGGTCTCTTCAAGCTCTGCAATAGCTGCAACCAACGGGGCCAAGTGGCTCAAGTCTGGCTTG\n+TCGATCACTTCGCTTGCCGCTGCATCTTCCTACCTTACGACCATCAATAAGGAGTCACCATGACCAACGC\n+AAAGCGCACCACAGCTCAGACCATCAAGCACAAGGGTCGCAAGTACGTCGTAATTCACCGCTCGGTTACC\n+CTGCTGTTGCTGAAGCGTGCCGGTAAGCCTCACCACTTCACCCTTGAAGGCGGCAAAGAGGGCACCCTGA\n+AGCGTCATAAAGACTTCTGGGCGGCACTGCAACACTACAGCGACCGTCAGGACGCACTGGGCATCAAGGG\n+CCACGCTGTAACCGCTGTGGTTGGCACCGAGGACGGCCCTGTTGTCACTGAAGGCGAGCTGGCCGCAATC\n+GAACTGATGGCATCGCAAGGGGCCACAACGGGCCGTGTTAGCGCCAAGGAACCGAACTAATGCCAAAGCC\n+TAATAAGTACAAGGGTGACGGCTCCAAGAAGCCTGAAGGGACCGTAGAGGGCGCCTACGTCATGCACAAG\n+GGTCGCATGGTCCCGAACTTTCAAGCCACCGACTCGGCTATCGAGCGGGGCATCAACGCCTACAAGGCAT\n+TCAAGGGGAACGCTATGCTGAAGTGTGTGACTCGCATCATGCTGTACATCATGGTCGCTGCAATGTGTGC\n+GGCGCTGCTGTTCGGGATGACTGGCTGTCAAGTCAACGTCGTGAACGTGATTCACAGTGACATCGGGCTG\n+GATGCGTCCAGTAACCTCAACGCTTTGACCGAGTAGCGTTAATCGGTGGCCTTCTCTTGAGGGTCACCCG\n+TTAAATCCACTCACTGCTAATGC\n+\n' |
b |
diff -r 000000000000 -r 34fb34df4473 test-data/NC_015264.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NC_015264.gb Sun Jul 08 11:58:28 2018 -0400 |
b |
b'@@ -0,0 +1,1688 @@\n+LOCUS NC_015264 40973 bp DNA linear PHG 18-APR-2011\n+DEFINITION Pseudomonas phage phiIBB-PF7A, complete genome.\n+ACCESSION NC_015264\n+VERSION NC_015264.1\n+DBLINK BioProject: PRJNA64561\n+KEYWORDS RefSeq.\n+SOURCE Pseudomonas phage phiIBB-PF7A\n+ ORGANISM Pseudomonas phage phiIBB-PF7A\n+ Viruses; dsDNA viruses, no RNA stage; Caudovirales; Podoviridae;\n+ Autographivirinae; T7virus.\n+REFERENCE 1 (bases 1 to 40973)\n+ AUTHORS Sillankorva,S., Kluskens,L.D., Lingohr,E.J., Kropinski,A.M.,\n+ Neubauer,P. and Azeredo,J.\n+ TITLE Complete genome sequence of the lytic Pseudomonas fluorescens phage\n+ phiIBB-PF7A\n+ JOURNAL Virol. J. 8 (1), 142 (2011)\n+ PUBMED 21439081\n+ REMARK Publication Status: Online-Only\n+REFERENCE 2 (bases 1 to 40973)\n+ CONSRTM NCBI Genome Project\n+ TITLE Direct Submission\n+ JOURNAL Submitted (21-MAR-2011) National Center for Biotechnology\n+ Information, NIH, Bethesda, MD 20894, USA\n+REFERENCE 3 (bases 1 to 40973)\n+ AUTHORS Sillankorva,S., Kluskens,L.H., Lingohr,E.J., Kropinski,A.M.,\n+ Neubauer,P. and Azeredo,J.\n+ TITLE Direct Submission\n+ JOURNAL Submitted (28-JAN-2010) Biological Engineering, University of\n+ Minho, Campus de Gualtar, Braga 4710-057, Portugal\n+COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final\n+ NCBI review. The reference sequence is identical to GU583987.\n+ COMPLETENESS: full length.\n+FEATURES Location/Qualifiers\n+ source 1..40973\n+ /organism="Pseudomonas phage phiIBB-PF7A"\n+ /mol_type="genomic DNA"\n+ /host="Pseudomonas fluorescens"\n+ /db_xref="taxon:942165"\n+ repeat_region 1..985\n+ /note="redundant terminal repeat"\n+ /rpt_type=direct\n+ gene 252..602\n+ /locus_tag="phiIBB-PF7Ap00"\n+ /db_xref="GeneID:10323835"\n+ CDS 252..602\n+ /locus_tag="phiIBB-PF7Ap00"\n+ /note="orf1"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306315.1"\n+ /db_xref="GeneID:10323835"\n+ /translation="MTNAKRTTAQTIKHKGRKYVVIHRSVTLLLLKRAGKPHHFTLEG\n+ GKEGTLKRHKDFWAALQHYSDRQDALGIKGHAVTAVVGTEDGPVVTEGELAAIELMAS\n+ QGATTGRVSAKEPN"\n+ gene 602..928\n+ /locus_tag="phiIBB-PF7Ap01"\n+ /db_xref="GeneID:10323784"\n+ CDS 602..928\n+ /locus_tag="phiIBB-PF7Ap01"\n+ /note="orf2"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306316.1"\n+ /db_xref="GeneID:10323784"\n+ /translation="MPKPNKYKGDGSKKPEGTVEGAYVMHKGRMVPNFQATDSAIERG\n+ INAYKAFKGNAMLKCVTRIMLYIMVAAMCAALLFGMTGCQVNVVNVIHSDIGLDASSN\n+ LNALTE"\n+ gene 996..1511\n+ /locus_tag="phiIBB-PF7Ap02"\n+ /db_xref="GeneID:10323785"\n+ CDS 996..1511\n+ /locus_tag="phiIBB-PF7Ap02"\n+ /note="orf3"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /protein_id="YP_004306317.1"\n+ /db_xref="GeneID:10323785"\n+ /translation="MIALNYTSFTSREVAAKILAAMQEVRATGNAVRVLNRRGKAFLL\n+ VTIHKDALGYAFKFIAEDGTEVGQMIQRASNDWDNPTFTAYWSILSWAWDLKEHPLLS\n+ LSKLAAQAEAMKGQ'..b' 37861 cattgatggt gattattgct gataagcatc acctccacat aggagcatac agatgaccaa\n+ 37921 gaaagctacc gcaacctttg tggcagtgct ggtcagtctg gcgaaacacc gtgcgaccta\n+ 37981 tcgcttcctc gctgttcttc tcgttgccct tggtatctcc aatggcgaag cgattatgtc\n+ 38041 tgggatcgag accgtagctt gtgcgtacct tggctgcatc ggctgacgcc caacgaggct\n+ 38101 cattacgggt ctgctaaagc ataccctcca tgagcattga cttacagtac attcttcaag\n+ 38161 ggacctttca atgggtcatc cagtaccctt tagggtgcag cacatcaagg cccttcaagg\n+ 38221 gtcccttcaa gggtcccttc aagaatttac cataaatttt ccttacagga cctctcatag\n+ 38281 actaacccac cgagtacccc cgtggcccct ctcgtcaacc ctaacggcct ctggggcacc\n+ 38341 cctaagggga tcattacggt gctgggcacc tacagacaat catcaaggga taccggtagg\n+ 38401 cggtccctct atgggctacc tcatgtccta ccttggggca catcatgaac cctttaaggg\n+ 38461 gagctgtcaa ccctcaaggg atcattaagg ggacatcaag ggacttaagg ggagacctac\n+ 38521 agaccattac gtctccttgt cctaatctgt aaggcccatc aaggaaccat tgagagacca\n+ 38581 tcaaggccac cgtcaaggca cacaccctaa ggatagacca taggagaccc atcaaggctg\n+ 38641 gacatcaagg gattgacaaa gcgatccctg tgatctatta cgtgcgctac gcggtcatta\n+ 38701 aggggacttt aaaaccctca cctaaacagg gatgaacaat tgttgaaata agtggttgac\n+ 38761 accctcaaag gagtctgtag aatggccacc acaacgaagc gacacgccaa ggcggatcac\n+ 38821 tgagttgcct tgaagcttag tagttacggt ctgtgcgatc ctaagggtgc caaacgggct\n+ 38881 taacgtaacg aacacggata gggtgaaaca aaacgcttga cactgagtaa caacgctgta\n+ 38941 gaatacgcca catcagctac acaccgctct ttaacaactt ggatgcaaca tggtctgcaa\n+ 39001 ggctccgaaa ggacacctac aggcatcgct acgaaggcaa gcactgagcc ttctcatagc\n+ 39061 gcctttgaga acctgaacgg atgttacggg tagggttagc aagggcctta tgggaaccac\n+ 39121 ttaggagcct cacacaatgg cacgcttaga catgcacgtc actggtcttg aatatgatct\n+ 39181 ttcccttgag gaaaacgcac agggccagca aggattcacc gtgacatacg gtgcgcaagt\n+ 39241 caatcattac gaacggtttg atgaagcctt tatggacttc aatgaatcgc ttagccacgc\n+ 39301 tctggccctt cagggtctgt aaatcctcac tgataaggta caatcgcatg aacacgtcac\n+ 39361 agaaccgctt cattctggcc cacgcagcaa gcctgctggt caccgcttac aaagagtgca\n+ 39421 tcgctgagta tcagacggtg ctgccgctta acctgagcat cggccatgat gcacctgata\n+ 39481 gctacgctgc gctccgttcg caagcggctc agggtcaact gaaggtaagc acggcgcaca\n+ 39541 acgcctcgtc gatctacggg gcttcgggta acctcacgtt ccgcatcttt cacgactacg\n+ 39601 gtcacctgct gtatgacgct gagttcacca ctgagcaaga ggttagcctt gcgctgactc\n+ 39661 aatggcgtga cctgatccgc tacatccctc aggaatggca aggcatctgc tatgtggtct\n+ 39721 accgcgccga caccgtggca caaagcgagt atgaagcgat ccataaggac ttccctgtgg\n+ 39781 atcaaaaggc attcgttctg gacatcctga acaagcactt cgaggctgag ccacgatgag\n+ 39841 catgaacacg aatgaaacac tggacgccgt gctgattacc cgtgaagaac gcaaggcgct\n+ 39901 ggccgctctg ctgtactcag ggttgaccgg taacgcagtc gataagctgg gccttcgggc\n+ 39961 cttgcaggaa aagctttcat cggcctttaa gggttactgg gacacgttca atccgctcga\n+ 40021 caagcaccca acaatggctg accacgggat agcggaatgg gtcagcccgg attcaacgca\n+ 40081 gaagacacgg aacagcatcc aatgaacgtg actgtccggg gtctcttcaa gctctgcaat\n+ 40141 agctgcaacc aacggggcca agtggctcaa gtctggcttg tcgatcactt cgcttgccgc\n+ 40201 tgcatcttcc taccttacga ccatcaataa ggagtcacca tgaccaacgc aaagcgcacc\n+ 40261 acagctcaga ccatcaagca caagggtcgc aagtacgtcg taattcaccg ctcggttacc\n+ 40321 ctgctgttgc tgaagcgtgc cggtaagcct caccacttca cccttgaagg cggcaaagag\n+ 40381 ggcaccctga agcgtcataa agacttctgg gcggcactgc aacactacag cgaccgtcag\n+ 40441 gacgcactgg gcatcaaggg ccacgctgta accgctgtgg ttggcaccga ggacggccct\n+ 40501 gttgtcactg aaggcgagct ggccgcaatc gaactgatgg catcgcaagg ggccacaacg\n+ 40561 ggccgtgtta gcgccaagga accgaactaa tgccaaagcc taataagtac aagggtgacg\n+ 40621 gctccaagaa gcctgaaggg accgtagagg gcgcctacgt catgcacaag ggtcgcatgg\n+ 40681 tcccgaactt tcaagccacc gactcggcta tcgagcgggg catcaacgcc tacaaggcat\n+ 40741 tcaaggggaa cgctatgctg aagtgtgtga ctcgcatcat gctgtacatc atggtcgctg\n+ 40801 caatgtgtgc ggcgctgctg ttcgggatga ctggctgtca agtcaacgtc gtgaacgtga\n+ 40861 ttcacagtga catcgggctg gatgcgtcca gtaacctcaa cgctttgacc gagtagcgtt\n+ 40921 aatcggtggc cttctcttga gggtcacccg ttaaatccac tcactgctaa tgc\n+//\n+\n' |
b |
diff -r 000000000000 -r 34fb34df4473 test-data/output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,60 @@ +>NC_015264:12 phage (238..260) score=0.606 +AATAAGGAGTCACCATGACCAAC +>NC_015264:150 phage (2996..3018) score=0.931 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:154 host (3062..3088) score=0.729 +TAAGATTACTCTTATAGTAATTATCAT +>NC_015264:216 host (4313..4342) score=0.653 +TGAAATGAACGTGGAGACGGCTCGTAAGTT +>NC_015264:242 phage (4835..4855) score=0.516 +ACTGGTCTCCGACGGTGTTAA +>NC_015264:244 host (4876..4907) score=0.519 +TTGATAACGGCACAGATGATTCCTCGGAGACT +>NC_015264:300 phage (6000..6022) score=0.981 +TAAAACCCCTCACCAAAACAGGG +>NC_015264:412 phage (8257..8279) score=0.985 +TAAAAACCCTCACCAAAACAGGG +>NC_015264:473 phage (9458..9480) score=0.966 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:532 phage (10632..10654) score=0.657 +CCAAACCGATCCCTAAAGGGGTC +>NC_015264:557 phage (11141..11163) score=0.948 +TAAAAACCCTCACCAGAACAGGG +>NC_015264:586 phage (11710..11730) score=0.557 +CTGACTTGCCGATACCCTGAA +>NC_015264:698 phage (13956..13978) score=0.677 +CAAATACCCTCACCTAAACAGCT +>NC_015264:826 host (16512..16542) score=0.748 +TTGACATCGAAGAGGTCTTCGTTGAGACAGT +>NC_015264:890 phage (17793..17815) score=0.99 +TAAAACCCCTCACCTAAACAGGG +>NC_015264:892 phage (17842..17864) score=0.7 +ACTTAAAGATCACTCTAAGGGAG +>NC_015264:893 host (17869..17896) score=0.777 +TATGCTTAAAGAGATCCAGCACTATCTG +>NC_015264:979 host (19585..19615) score=0.529 +TTGAGAACCTGCACGAAGCCATGATTAAGTT +>NC_015264:1019 phage (20361..20383) score=0.993 +TAAAAACCCTCACCTAAACAGGG +>NC_015264:1068 phage (21354..21376) score=0.99 +TAAAAACCCTCACCTAAACAGGG +>NC_015264:1303 phage (26071..26093) score=0.953 +TAAAAACACTCACCACAACAGGG +>NC_015264:1616 phage (32321..32341) score=0.665 +GCCGCTCACCAAGTTCCTTAC +>NC_015264:1667 phage (33327..33349) score=0.991 +TAAAACCCCTCACCTAAACAGGG +>NC_015264:1886 phage (37729..37751) score=0.991 +TAAAAACCCTCACCTAAAGAGGG +>NC_015264:1932 host (38653..38681) score=0.875 +TTGACAAAGCGATCCCTGTGATCTATTAC +>NC_015264:1936 phage (38709..38731) score=0.938 +TTAAAACCCTCACCTAAACAGGG +>NC_015264:1937 host (38756..38783) score=1.0 +TTGACACCCTCAAAGGAGTCTGTAGAAT +>NC_015264:1945 host (38917..38944) score=0.997 +TTGACACTGAGTAACAACGCTGTAGAAT +>NC_015264:1958 host (39147..39178) score=0.697 +TAGACATGCACGTCACTGGTCTTGAATATGAT +>NC_015264:2011 phage (40226..40248) score=0.63 +AATAAGGAGTCACCATGACCAAC |
b |
diff -r 000000000000 -r 34fb34df4473 test-data/output.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.html Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,192 @@ +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th>positions</th> + <th>promoter_seq</th> + <th>promoter_type</th> + <th>scores</th> + </tr> + </thead> + <tbody> + <tr> + <td>(238..260)</td> + <td>AATAAGGAGTCACCATGACCAAC</td> + <td>phage</td> + <td>0.606</td> + </tr> + <tr> + <td>(2996..3018)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.931</td> + </tr> + <tr> + <td>(3062..3088)</td> + <td>TAAGATTACTCTTATAGTAATTATCAT</td> + <td>host</td> + <td>0.729</td> + </tr> + <tr> + <td>(4313..4342)</td> + <td>TGAAATGAACGTGGAGACGGCTCGTAAGTT</td> + <td>host</td> + <td>0.653</td> + </tr> + <tr> + <td>(4835..4855)</td> + <td>ACTGGTCTCCGACGGTGTTAA</td> + <td>phage</td> + <td>0.516</td> + </tr> + <tr> + <td>(4876..4907)</td> + <td>TTGATAACGGCACAGATGATTCCTCGGAGACT</td> + <td>host</td> + <td>0.519</td> + </tr> + <tr> + <td>(6000..6022)</td> + <td>TAAAACCCCTCACCAAAACAGGG</td> + <td>phage</td> + <td>0.981</td> + </tr> + <tr> + <td>(8257..8279)</td> + <td>TAAAAACCCTCACCAAAACAGGG</td> + <td>phage</td> + <td>0.985</td> + </tr> + <tr> + <td>(9458..9480)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.966</td> + </tr> + <tr> + <td>(10632..10654)</td> + <td>CCAAACCGATCCCTAAAGGGGTC</td> + <td>phage</td> + <td>0.657</td> + </tr> + <tr> + <td>(11141..11163)</td> + <td>TAAAAACCCTCACCAGAACAGGG</td> + <td>phage</td> + <td>0.948</td> + </tr> + <tr> + <td>(11710..11730)</td> + <td>CTGACTTGCCGATACCCTGAA</td> + <td>phage</td> + <td>0.557</td> + </tr> + <tr> + <td>(13956..13978)</td> + <td>CAAATACCCTCACCTAAACAGCT</td> + <td>phage</td> + <td>0.677</td> + </tr> + <tr> + <td>(16512..16542)</td> + <td>TTGACATCGAAGAGGTCTTCGTTGAGACAGT</td> + <td>host</td> + <td>0.748</td> + </tr> + <tr> + <td>(17793..17815)</td> + <td>TAAAACCCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.990</td> + </tr> + <tr> + <td>(17842..17864)</td> + <td>ACTTAAAGATCACTCTAAGGGAG</td> + <td>phage</td> + <td>0.700</td> + </tr> + <tr> + <td>(17869..17896)</td> + <td>TATGCTTAAAGAGATCCAGCACTATCTG</td> + <td>host</td> + <td>0.777</td> + </tr> + <tr> + <td>(19585..19615)</td> + <td>TTGAGAACCTGCACGAAGCCATGATTAAGTT</td> + <td>host</td> + <td>0.529</td> + </tr> + <tr> + <td>(20361..20383)</td> + <td>TAAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.993</td> + </tr> + <tr> + <td>(21354..21376)</td> + <td>TAAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.990</td> + </tr> + <tr> + <td>(26071..26093)</td> + <td>TAAAAACACTCACCACAACAGGG</td> + <td>phage</td> + <td>0.953</td> + </tr> + <tr> + <td>(32321..32341)</td> + <td>GCCGCTCACCAAGTTCCTTAC</td> + <td>phage</td> + <td>0.665</td> + </tr> + <tr> + <td>(33327..33349)</td> + <td>TAAAACCCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.991</td> + </tr> + <tr> + <td>(37729..37751)</td> + <td>TAAAAACCCTCACCTAAAGAGGG</td> + <td>phage</td> + <td>0.991</td> + </tr> + <tr> + <td>(38653..38681)</td> + <td>TTGACAAAGCGATCCCTGTGATCTATTAC</td> + <td>host</td> + <td>0.875</td> + </tr> + <tr> + <td>(38709..38731)</td> + <td>TTAAAACCCTCACCTAAACAGGG</td> + <td>phage</td> + <td>0.938</td> + </tr> + <tr> + <td>(38756..38783)</td> + <td>TTGACACCCTCAAAGGAGTCTGTAGAAT</td> + <td>host</td> + <td>1.000</td> + </tr> + <tr> + <td>(38917..38944)</td> + <td>TTGACACTGAGTAACAACGCTGTAGAAT</td> + <td>host</td> + <td>0.997</td> + </tr> + <tr> + <td>(39147..39178)</td> + <td>TAGACATGCACGTCACTGGTCTTGAATATGAT</td> + <td>host</td> + <td>0.697</td> + </tr> + <tr> + <td>(40226..40248)</td> + <td>AATAAGGAGTCACCATGACCAAC</td> + <td>phage</td> + <td>0.630</td> + </tr> + </tbody> +</table> \ No newline at end of file |
b |
diff -r 000000000000 -r 34fb34df4473 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Jul 08 11:58:28 2018 -0400 |
b |
@@ -0,0 +1,6 @@ +<tool_dependency> + <package name="biopython"></package> + <package name="numpy" ></package> + <package name="pandas"></package> + <package name="scikit-learn"></package> +</tool_dependency> |