Repository 'pmlst'
hg clone https://toolshed.g2.bx.psu.edu/repos/dcouvin/pmlst

Changeset 0:6dcb66689a7d (2021-08-25)
Commit message:
Uploaded
added:
pmlst.pl
pmlst.xml
pmlst/.gitignore
pmlst/Dockerfile
pmlst/README.md
pmlst/data.json
pmlst/pmlst.py
pmlst/pmlst_db/.gitignore
pmlst/pmlst_db/INSTALL.py
pmlst/pmlst_db/config
pmlst/pmlst_db/incac.clpx
pmlst/pmlst_db/incac.fsa
pmlst/pmlst_db/incac.txt.clean
pmlst/pmlst_db/incf.clpx
pmlst/pmlst_db/incf.fsa
pmlst/pmlst_db/incf.txt.clean
pmlst/pmlst_db/inchi1.clpx
pmlst/pmlst_db/inchi1.fsa
pmlst/pmlst_db/inchi1.txt.clean
pmlst/pmlst_db/inchi2.clpx
pmlst/pmlst_db/inchi2.fsa
pmlst/pmlst_db/inchi2.txt.clean
pmlst/pmlst_db/inci1.clpx
pmlst/pmlst_db/inci1.fsa
pmlst/pmlst_db/inci1.txt.clean
pmlst/pmlst_db/incn.clpx
pmlst/pmlst_db/incn.fsa
pmlst/pmlst_db/incn.txt.clean
pmlst/pmlst_db/pbssb1-family.clpx
pmlst/pmlst_db/pbssb1-family.fsa
pmlst/pmlst_db/pbssb1-family.txt.clean
pmlst/test.fsa
pmlst/tmp_pMLST/tmp/out_inchi2.xml
b
diff -r 000000000000 -r 6dcb66689a7d pmlst.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst.pl Wed Aug 25 21:12:34 2021 +0000
[
@@ -0,0 +1,28 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings;   
+
+my $STpmlst = "";
+
+my $path = $ARGV[0];
+my $seq = $ARGV[1];
+my $scheme = $ARGV[2];
+
+
+open PMLST, "python3 $path/pmlst/pmlst.py -i $seq -s $scheme -p $path/pmlst/pmlst_db/ |";
+while (<PMLST>) {
+     chomp ($_);
+    if ($_ =~ "sequence_type") {
+        #print "ST:$_ \n";
+        my @table = split (/:/,$_);
+        $STpmlst = $table [1];
+        #chomp ($STpmlst); $STpmlst = chop($STpmlst);
+        $STpmlst =~ s/\s+//;
+        $STpmlst = substr($STpmlst,1);
+ my @table2 = split (/'/,$STpmlst);
+ $STpmlst = $table2[0];
+        #print "$STpmlst\n" ;
+    }
+}
+
+print "Sequence Type (pMLST)\t$STpmlst\n" ;
b
diff -r 000000000000 -r 6dcb66689a7d pmlst.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst.xml Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,49 @@
+<tool id="pmlst" name="pmlst" version="2.0">
+  <description>A plasmid multilocus sequence typing (pMLST) scheme was developed for rapid categorization of IncN plasmids</description>
+
+
+  <requirements>
+    <requirement type="package" version="3.2.0">python3-openid</requirement>
+    <requirement type="package" version="21.2.4">pip</requirement>
+    <requirement type="package" version="1.79">biopython</requirement>
+    <requirement type="package" version="1.5.6">cgecore</requirement>
+    <requirement type="package" version="0.8.9">tabulate</requirement>
+  </requirements>
+
+
+  <!--<command> python3 $__tool_directory__/pmlst/pmlst.py -i $input -s $s -p $__tool_directory__/pmlst/pmlst_db/ > $output  </command>-->
+
+  <command> perl $__tool_directory__/pmlst.pl $__tool_directory__ $input $s > $output </command> 
+
+  <inputs>
+   <param format="fasta" name="input" type="data" label="Select FASTA files: " />
+  
+   <param name="s" type="select" label="Scheme database used for pMLST prediction:">
+
+     <option value="incac">incac</option>
+     <option value="incf">incf</option>
+     <option value="inchi1">inchi1</option>
+     <option value="inchi2">inchi2</option>
+     <option value="inci1">inci1</option>
+     <option value="incn">incn</option>
+     <option value="pbssb1-family">pbssb1-family</option>
+      
+          </param>
+
+    
+ </inputs>
+
+ <outputs>  
+    <data format="tabular" name="output" />
+ </outputs>
+
+  <help>
+  
+  </help>
+  <citations>
+   <citation type="doi">10.1007/978-1-4939-9877-7_20</citation>
+         </citations> 
+</tool>
+
+
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/.gitignore Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,4 @@
+.DS_Store
+blast-2.2.26
+Output
+*.gz
\ No newline at end of file
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/Dockerfile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/Dockerfile Wed Aug 25 21:12:34 2021 +0000
[
@@ -0,0 +1,43 @@
+FROM debian:stretch
+
+ENV DEBIAN_FRONTEND noninteractive
+
+### RUN set -ex; \
+
+RUN apt-get update -qq; \
+    apt-get install -y -qq git \
+    apt-utils \
+    wget \
+    python3-pip \
+    ncbi-blast+ \
+    libz-dev \
+    ; \
+    rm -rf /var/cache/apt/* /var/lib/apt/lists/*;
+    
+ENV DEBIAN_FRONTEND Teletype
+
+# Install python dependencies
+RUN pip3 install -U biopython==1.73 tabulate cgecore==1.3.2;
+
+# Install kma 
+RUN git clone --branch 1.0.1 --depth 1 https://bitbucket.org/genomicepidemiology/kma.git; \
+    cd kma && make; \
+    mv kma* /bin/
+
+COPY pmlst.py /usr/src/pmlst.py 
+
+RUN chmod 755 /usr/src/pmlst.py; 
+
+ENV PATH $PATH:/usr/src
+# Setup .bashrc file for convenience during debugging
+RUN echo "alias ls='ls -h --color=tty'\n"\
+"alias ll='ls -lrt'\n"\
+"alias l='less'\n"\
+"alias du='du -hP --max-depth=1'\n"\
+"alias cwd='readlink -f .'\n"\
+"PATH=$PATH\n">> ~/.bashrc
+
+WORKDIR /workdir
+
+# Execute program when running the container
+ENTRYPOINT ["/usr/src/pmlst.py"]
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/README.md Wed Aug 25 21:12:34 2021 +0000
[
@@ -0,0 +1,146 @@
+pMLST
+===================
+
+Plasmid Multi-Locus Sequence Typing
+
+
+Documentation
+=============
+
+The pMLST service contains one python script *pmlst.py* which is the script of the latest
+version of the pMLST service. The method enables investigators to determine the ST based on WGS data.
+
+## Content of the repository
+1. pmlst.py     - the program
+2. README.md
+3. Dockerfile   - dockerfile for building the pmlst docker container
+4. test.fsa     - test fasta file
+
+
+## Installation
+
+Setting up pMLST program
+```bash
+# Go to wanted location for pmlst
+cd /path/to/some/dir
+# Clone and enter the pmlst directory
+git clone https://bitbucket.org/genomicepidemiology/pmlst.git
+cd pmlst
+```
+
+Build Docker container
+```bash
+# Build container
+docker build -t pmlst .
+```
+
+#Download and install pMLST database
+```bash
+# Go to the directory where you want to store the pmlst database
+cd /path/to/some/dir
+# Clone database from git repository (develop branch)
+git clone https://bitbucket.org/genomicepidemiology/pmlst_db.git
+cd pmlst_db
+pMLST_DB=$(pwd)
+# Install pMLST database with executable kma_index program
+python3 INSTALL.py kma_index
+```
+
+If kma_index has not bin install please install kma_index from the kma repository:
+https://bitbucket.org/genomicepidemiology/kma
+
+## Dependencies
+In order to run the program without using docker, Python 3.5 (or newer) should be installed along with the following versions of the modules (or newer).
+
+#### Modules
+- cgecore 1.5.5
+- tabulate 0.7.7
+
+Modules can be installed using the following command. Here, the installation of the module cgecore is used as an example:
+```bash
+pip3 install cgecore
+```
+#### KMA and BLAST
+Additionally KMA and BLAST version 2.8.1 or newer should be installed.
+The newest versions of KMA and BLAST can be installed from here:
+```url
+https://bitbucket.org/genomicepidemiology/kma
+```
+
+```url
+ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
+```
+
+## Usage
+
+The program can be invoked with the -h option to get help and more information of the service.
+Run Docker container
+
+
+```bash
+# Run pmlst container
+docker run --rm -it \
+       -v $pMLST_DB:/database \
+       -v $(pwd):/workdir \
+       pmlst -i [INPUTFILE] -o . -s [SCHEME] [-x] [-mp] [-p] [-t]
+```
+
+When running the docker file you have to mount 2 directory: 
+ 1. pmlst_db (pMLST database) downloaded from bitbucket
+ 2. An output/input folder from where the input file can be reached and an output files can be saved. 
+Here we mount the current working directory (using $pwd) and use this as the output directory, 
+the input file should be reachable from this directory as well.

+` -i INPUTFILE input file (fasta or fastq) relative to pwd `
+
+` -s SCHEME  pMLST scheme to be used, details are in config file `
+
+` -o OUTDIR outpur directory relative to pwd `
+
+` -x  extended output. Will create an extented output `
+
+` -mp METHOD_PATH Path to executable of the method to be used (kma or blast)`
+
+` -p DATABASE Path to database directory `
+
+` -t TMP_DIR Temporary directory for storage of results from external software. `
+
+
+## Web-server
+
+A webserver implementing the methods is available at the [CGE website](http://www.genomicepidemiology.org/) and can be found here:
+https://cge.cbs.dtu.dk/services/pMLST/
+
+Citation
+=======
+
+When using the method please cite:
+
+PlasmidFinder and pMLST: in silico detection and typing of plasmids.
+Carattoli A, Zankari E, Garcia-Fernandez A, Volby Larsen M, Lund O, Villa L, Aarestrup FM, Hasman H.
+Antimicrob. Agents Chemother. 2014. April 28th.
+[Epub ahead of print]
+
+References
+=======
+
+1. Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics 2009; 10:421. 
+2. Clausen PTLC, Aarestrup FM, Lund O. Rapid and precise alignment of raw reads against redundant databases with KMA. BMC Bioinformatics 2018; 19:307. 
+
+License
+=======
+
+Copyright (c) 2014, Ole Lund, Technical University of Denmark
+All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/data.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/data.json Wed Aug 25 21:12:34 2021 +0000
[
@@ -0,0 +1,1 @@
+{"pmlst": {"user_input": {"filename": ["test.fsa"], "scheme": "inchi2", "profile": "IncHI2 DLST", "file_format": "fasta"}, "run_info": {"date": "25.08.2021", "time": "08:48:07"}, "results": {"sequence_type": "Unknown", "allele_profile": {"smr0018": {"identity": "", "coverage": "", "allele": "", "allele_name": "No hit found", "align_len": "", "gaps": "", "sbj_len": ""}, "smr0199": {"identity": "", "coverage": "", "allele": "", "allele_name": "No hit found", "align_len": "", "gaps": "", "sbj_len": ""}}, "nearest_sts": "", "clonal_complex": "", "notes": "No MLST loci was found in the input data, make sure that the correct pMLST scheme was chosen."}}}
\ No newline at end of file
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst.py Wed Aug 25 21:12:34 2021 +0000
[
b'@@ -0,0 +1,691 @@\n+#!/usr/bin/env python3\n+\n+import os, sys, re, time, pprint, io, shutil\n+import argparse, subprocess\n+\n+from cgecore.alignment import extended_cigar\n+from cgecore.blaster.blaster import Blaster\n+from cgecore.cgefinder import CGEFinder\n+import json, gzip\n+from tabulate import tabulate\n+\n+\n+def get_read_filename(infiles):\n+    \'\'\' Infiles must be a list with 1 or 2 input files.\n+        Removes path from given string and removes extensions:\n+        .fq .fastq .gz and .trim\n+        extract the common sample name i 2 files are given.\n+    \'\'\'\n+    # Remove common fastq extensions\n+    seq_path = infiles[0]\n+    seq_file = os.path.basename(seq_path)\n+    seq_file = seq_file.replace(".fq", "")\n+    seq_file = seq_file.replace(".fastq", "")\n+    seq_file = seq_file.replace(".gz", "")\n+    seq_file = seq_file.replace(".trim", "")\n+    if len(infiles) == 1:\n+        return seq_file.rstrip()\n+\n+    # If two files are given get the common sample name\n+    sample_name = ""\n+    seq_file_2 = os.path.basename(infiles[1])\n+    for i in range(len(seq_file)):\n+        if seq_file_2[i] == seq_file[i]:\n+            sample_name += seq_file[i]\n+        else: \n+            break\n+    if sample_name == "":\n+        sys.error("Input error: sample names of input files, {} and {}, \\\n+                   does not share a common sample name. If these files \\\n+                   are paired end reads from the same sample, please rename \\\n+                   them with a common sample name (e.g. \'s22_R1.fq\', \'s22_R2.fq\') \\\n+                   or input them seperately.".format(infiles[0], infiles[1]))\n+\n+    return sample_name.rstrip("-").rstrip("_")\n+\n+def is_gzipped(file_path):\n+    \'\'\' Returns True if file is gzipped and False otherwise.\n+        The result is inferred from the first two bits in the file read\n+        from the input path.\n+        On unix systems this should be: 1f 8b\n+        Theoretically there could be exceptions to this test but it is\n+        unlikely and impossible if the input files are otherwise expected\n+        to be encoded in utf-8.\n+    \'\'\'\n+    with open(file_path, mode=\'rb\') as fh:\n+        bit_start = fh.read(2)\n+    if(bit_start == b\'\\x1f\\x8b\'):\n+        return True\n+    else:\n+        return False\n+\n+def get_file_format(input_files):\n+    """\n+    Takes all input files and checks their first character to assess\n+    the file format. Returns one of the following strings; fasta, fastq, \n+    other or mixed. fasta and fastq indicates that all input files are \n+    of the same format, either fasta or fastq. other indiates that all\n+    files are not fasta nor fastq files. mixed indicates that the inputfiles\n+    are a mix of different file formats.\n+    """\n+\n+    # Open all input files and get the first character\n+    file_format = []\n+    invalid_files = []\n+    for infile in input_files:\n+        if is_gzipped(infile):#[-3:] == ".gz":\n+            f = gzip.open(infile, "rb")\n+            fst_char = f.read(1);\n+        else:\n+            f = open(infile, "rb")\n+            fst_char = f.read(1);\n+        f.close()\n+        # Assess the first character\n+        if fst_char == b"@":\n+            file_format.append("fastq")\n+        elif fst_char == b">":\n+            file_format.append("fasta")\n+        else:\n+            invalid_files.append("other")\n+    if len(set(file_format)) != 1:\n+        return "mixed"\n+    return ",".join(set(file_format))\n+\n+def import_profile(database, scheme, loci_list):\n+    """Import all possible allele profiles with corresponding st\'s\n+    for the scheme into a dict. The profiles are stored in a dict \n+    of dicts, to easily look up what st types are accosiated with \n+    a specific allele number of each loci. \n+    """\n+    # Open allele profile file from databaseloci\n+    profile_file = open("{0}/{1}.txt.clean".format(database, scheme), "r")\n+    profile_header = profile_file.readline().strip().split("\\t")[1:len(loci_list)+1]\n+    \n+    # Create dict for looking up st-type'..b't(clpx))\n+\n+    # Write tsv table header\n+    table_header = ["Locus", "Identity", "Coverage", "Alignment Length", "Allele Length", "Gaps", "Allele"]\n+    table_file.write("\\t".join(table_header) + "\\n")\n+    rows = []\n+    for locus, allele_info in allele_matches.items():\n+\n+        identity = str(allele_info["identity"])\n+        coverage = str(allele_info["coverage"])\n+        allele = allele_info["allele"]\n+        allele_name = allele_info["allele_name"]\n+        align_len = str(allele_info["align_len"])\n+        sbj_len = str(allele_info["sbj_len"])\n+        gaps = str(allele_info["gaps"])\n+\n+        # Write alleles names with indications of imperfect hits\n+        if allele_name != "No hit found":\n+            allele_name_w_mark = locus + "_" + allele\n+        else:\n+            allele_name_w_mark = allele_name          \n+        \n+        # Write allele results to tsv table\n+        row = [locus, identity, coverage, align_len, sbj_len, gaps, allele_name_w_mark]\n+        rows.append(row)\n+        if "alternative_hit" in allele_info:\n+            for allele_name, dic in allele_info["alternative_hit"].items():\n+                row = [locus, identity, coverage, str(dic["align_len"]), str(dic["sbj_len"]), "0", allele_name + "!"]\n+                rows.append(row)                \n+        #\n+\n+        if allele_name == "No hit found":\n+            continue\n+\n+        # Write query fasta output\n+        hit_name = allele_info["hit_name"]\n+        query_seq = query_aligns[scheme][hit_name]\n+        sbjct_seq = sbjct_aligns[scheme][hit_name] \n+        homol_seq = homol_aligns[scheme][hit_name]\n+\n+        if allele_info["match_priority"] == 1:\n+            match = "PERFECT MATCH"\n+        else:\n+            match = "WARNING"\n+        header = ">{}:{} ID:{}% COV:{}% Best_match:{}\\n".format(locus, match, allele_info["identity"], \n+                                                  allele_info["coverage"], allele_info["allele_name"])\n+        query_file.write(header)\n+        for i in range(0,len(query_seq),60):\n+            query_file.write(query_seq[i:i+60] + "\\n")\n+\n+        # Write template fasta output\n+        header = ">{}\\n".format(allele_info["allele_name"])\n+        sbjct_file.write(header)\n+        for i in range(0,len(sbjct_seq),60):\n+            sbjct_file.write(sbjct_seq[i:i+60] + "\\n")\n+\n+        if "alternative_hit" in allele_info:\n+            for allele_name in allele_info["alternative_hit"]:\n+                header = ">{}:{} ID:{}% COV:{}% Best_match:{}\\n".format(locus, "PERFECT MATCH", 100, \n+                                                                        100, allele_name)\n+                hit_name = allele_info["alternative_hit"][allele_name]["hit_name"]\n+                query_seq = query_aligns[scheme][hit_name]\n+                sbjct_seq = sbjct_aligns[scheme][hit_name] \n+                homol_seq = homol_aligns[scheme][hit_name]\n+                query_file.write(header)\n+                for i in range(0,len(query_seq),60):\n+                    query_file.write(query_seq[i:i+60] + "\\n")\n+\n+                # Write template fasta output\n+                header = ">{}\\n".format(allele_name)\n+                sbjct_file.write(header)\n+                for i in range(0,len(sbjct_seq),60):\n+                    sbjct_file.write(sbjct_seq[i:i+60] + "\\n")\n+\n+    # Write Allele profile results tables in results file and table file\n+    rows.sort(key=lambda x: x[0])\n+    result_file.write(text_table(table_header, rows))\n+    for row in rows:\n+        table_file.write("\\t".join(row) + "\\n")\n+    # Write any notes\n+    if note != "":\n+       result_file.write("\\nNotes: {}\\n\\n".format(note))\n+\n+    # Write allignment output\n+    result_file.write("\\n\\nExtended Output:\\n\\n")\n+    make_aln(scheme, result_file, allele_matches, query_aligns, homol_aligns, sbjct_aligns)\n+\n+    # Close all files\n+    query_file.close()\n+    sbjct_file.close()\n+    table_file.close()\n+    result_file.close()\n+\n+if args.quiet:\n+    f.close()\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/.gitignore Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,2 @@
+inc*.b
+inc*.name
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/INSTALL.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/INSTALL.py Wed Aug 25 21:12:34 2021 +0000
[
@@ -0,0 +1,61 @@
+#!/usr/bin/python3
+import shutil, os, sys
+
+# This scripts installs the pMLST database for using KMA
+# KMA should be installed before running this script
+# The scripts assumes that it is placed together with the pMLST scheme fasta files
+# First clone the repository: git clone https://bitbucket.org/genomicepidemiology/pmlst_db.git
+
+# Check if executable kma_index program is installed, if not promt the user for path
+
+interactive = True
+if len(sys.argv) >= 2:
+    kma_index = sys.argv[1]
+    if "non_interactive" in sys.argv:
+        interactive = False
+else:
+    kma_index = "kma_index"
+
+while shutil.which(kma_index) is None:
+    if not interactive:
+        sys.exit("KMA index program, {}, does not exist or is not executable".format(kma_index))
+    ans = input("Please input path to executable kma_index program or enter 'q'/'quit' to exit:")
+    if ans == "q" or ans == "quit":
+        print("Exiting!\n\n \
+               Please install executable KMA programs in order to install this database.\n\n \
+               KMA can be obtained from bitbucked:\n\n\
+               git clone https://bitbucket.org/genomicepidemiology/kma.git\n\n\
+               KMA programs must afterwards be compiled:\n\n\
+               gcc -O3 -o kma KMA.c -lm -lpthread\n\
+               gcc -O3 -o kma_index KMA_index.c -lm")
+        sys.exit()
+
+    kma_index = ans
+
+    if shutil.which(kma_index) is None:
+        print("Path, {}, is not an executable path. Please provide absolute path\n".format(ans))
+
+
+# Index databases
+
+
+# Use config_file to go through database files
+dirname = os.path.dirname(sys.argv[0])
+
+config_file = open(os.path.join(dirname, "config"), "r")
+for line in config_file:
+    if line.startswith("#"):
+        continue
+    else:
+        line = line.rstrip().split("\t")
+        scheme = line[0] 
+        f = os.path.join(dirname, scheme) + ".fsa"
+        if not os.path.isfile(f):
+            sys.exit("Database file '{}' does not exists".format(f))
+        # for each dir index the fasta files
+        os.system("{} -i {} -o {}".format(kma_index, f, os.path.join(dirname, scheme)))
+
+config_file.close() 
+
+print("Done")
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/config Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,11 @@
+# Database configuration file - Describes the content of the database
+# Each db consist of 3 files with the following extensions: fsa, clpx, txt.clean
+# Other important files are: noGenes.txt
+#db_prefix name allele_lst
+incac IncA/C PMLST repA,parA,parB,A053
+incf IncF RST FII,FIC,FIIK,FIIS,FIIY,FIA,FIB
+inchi1 IncHI1 MLST HCM1_043,HCM1_064,HCM1_099,HCM1_116,HCM1_178ac,HCM1_259
+inchi2 IncHI2 DLST smr0018,smr0199
+inci1 IncI1 MLST repI1,ardA,trbA,sogS,pilL
+incn IncN MLST repN,traJ,korA
+pbssb1-family pBSSB1-family higB,mqsA,soj
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incac.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incac.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,436 @@\n+>repA_1\n+AAGAGAACCAAAGACAAAGACCTGGAGAAACTCGACGTAATCAAAGACTCACCGCAAATG\n+AGCCTGTTTGAGATCATTGAATCTCCGGCCAAGAAAGACGACTACTCCAACACCATCGAG\n+ATCTACGATGCGCTGCCGAAGTACATTTGGGACCAAAAGCGTGAGCATGAAGATTTATCC\n+AACGCTGTAGTGACACGACAATGCACCATCAGAGGCCAGCATTTCACGGTGAAGGTGAAG\n+CCAGCCATCATCGAGAAGGATGACGGAAGAACCGTGCTGATCTACGCGGGACAGCGAGAG\n+GAAATCCTTGAGGATGCTCTACGCAAGCTCGCAGTGAACGGGAAAGGCCATATCATCGAG\n+GGCAAGGCTGGAGTCATGTTCACTCTGTACGAACTCCAGAAAGAGCTCTCGAAGATGGGT\n+CACGGTTACAACCTGAACGAAATCAAGGAAGCAATCCAGGTTTGTCGTGGCGCAACACTC\n+GAATGTATCAGTGATGACGGCGAAGCCTTCATCAGCTCCAGCTTCTTCCCGATGGTGGGA\n+CTTACCACCAGAGGTGAGTTTCGCAAGAAAGGCGGGAACGCCAGGTGCTATGTGCAGTTC\n+AACCCGCTGGTAAACGAATCGATCATGAATCTGTCGTTTCGTCAGTACAACTACAAAATC\n+GGAATGCAAATCCGCTCCCCTCTTGCACGGTACATCTACAAGCGAATGAGCCACTACTGG\n+ACTCAAGCATCGCCAGATTCGCCGTACACGCCATCGCTTATCAGCTTCCTGACACAGAGC\n+CCTCGTGAATTGAGCCCACGGATGCCGGAGAACGTCAGAGCTATGAAGCTCGCTCTGGAG\n+GCCCTCATCAAACAAGAGGTCATAAGCGACTACGACGCGAACCAGATCAAGGATGGCCGC\n+AGAGTCATCGACGTGCGGTACGTCATAAGGCCTCATGAGAACTTCGTGAAGCAGGTGATG\n+GCGTCCAACAAGCGTAAGCAGC\n+>repA_2\n+AAGAGAACCAAAGACAAAGACCTGGAGAAACTCGACGTAATCAAAGACTCACCGCAAATG\n+AGCCTGTTTGAGATCATTGAATCTCCGGCCAAGAAAGACGACTACTCCAACACCATCGAG\n+ATCTACGATGCGCTGCCGAAGTACATTTGGGACCAAAAGCGTGAGCATGAAGATTTATCC\n+AACGCTGTAGTGACACGACAATGCACCATCAGAGGCCAGCATTTCACGGTGAAGGTGAAG\n+CCAGCCATCATCGAGAAGGATGACGGAAGAACCGTGCTGATCTACGCGGGACAGCGAGAG\n+GAAATCCTTGAGGATGCTCTACGCAAGCTCGCAGTGAACGGGAAAGGCCATATCATCGAG\n+GGCAAGGCTGGAGTCATGTTCACTCTGTACGAACTCCAGAAAGAGCTCTCGAAGATGGGT\n+CACGGGTACAACCTGAACGAAATCAAGGAAGCAATCCAGGTTTGTCGTGGCGCAACACTC\n+GAATGTATCAGTGATGACGGTGAAGCCTTCATCAGCTCCAGCTTCTTCCCGATGGTGGGA\n+CTGACCACCAGAGGTGAGTTTCGCAAGAAAGGCGGGAACGCCAGGTGCTATGTGCAGTTC\n+AACCCGCTGGTAAACGAATCGATCATGAATCTGTCGTTTCGTCAGTACAACTACAAAATC\n+GGAATGCAAATCCGCTCCCCTCTTGCACGGTACATCTACAAGCGAATGAGCCACTACTGG\n+ACTCAAGCATCACCAGATTCGCCGTACACGCCATCGCTTATCAGCTTCCTGACCCAGAGC\n+CCTCGTGAATTGAGCCCACGGATGCCGGAGAACGTCAGAGCCATGAAGCTCGCTCTGGAG\n+GCCCTCATCAAACAAGAGGTCATAAGCGACTACGACGCGAACCAGATCAAGGATGGCCGC\n+AGAGTCATCGACGTGCGGTACGTCATAAGGCCTCATGAGAACTTCGTGAAGCAGGTGATG\n+GCGTCCAACAAGCGTAAGCAGC\n+>repA_3\n+AAGAGAACCAAAGACAAAGACCTGGAGAAACTCGACGTAATCAAAGACTCACCGCAAATG\n+AGCCTGTTTGAGATCATTGAATCTCCGGCCAAGAAAGACGACTACTCCAACACCATCGAG\n+ATCTACGATGCGCTGCCGAAGTACATTTGGGACCAAAAGCGTGAGCATGAAGATTTATCC\n+AACGCTGTAGTGACACGACAATGCACCATCAGAGGCCAGCATTTCACGGTGAAGGTGAAG\n+CCAGCCATCATCGAGAAGGATGACGGAAGAACCGTGCTGATCTACGCGGGACAGCGAGAG\n+GAAATCCTTGAGGATGCTCTACGCAAGCTCGCAGTGAACGGGAAAGGCCATATCATCGAG\n+GGCAAGGCTGGAGTCATGTTCACTCTGTACGAACTCCAGAAAGAGCTCTCGAAGATGGGT\n+CACGGGTACAACCTGAACGAAATCAAGGAAGCAATCCAGGTTTGTCGTGGCGCAACACTC\n+GAATGTATCAGTGATGACGGTGAAGCCTTCATCAGCTCCAGCTTCTTCCCGATGGTGGGA\n+CTGACCACCAGAGGTGAGTTTCGCAAGAAAGGCGGGAACGCCAGGTGCTATGTGCAGTTC\n+AACCCGCTGGTAAACGAATCGATCATGAATCTGTCGTTTCGTCAGTACAACTACAAAATC\n+GGAATGCAAATCCGCTCCCCTCTTGCACGGTACATCTACAAGCGAATGAGCCACTACTGG\n+ACTCAAGCATCACCAGATTCGCCGTACACGCCATCGCTTATCAGCTTCCTGACCCAGAGC\n+CCTCGTGAATTGAGCCCACGGATGCCGGAGAACGTCAGAGCCATGAAGCTCGCTCTGGAG\n+GCCCTCATCAAACAAGAGGTCATAAGCGACTACGACGCGAACCAGATCAAGGATGGCCGC\n+AGAGTCATCGACGTGCGGTACGTCATAAGACCTCATGAGAACTTCGTGAAGCAGGTGATG\n+GCGTCCAACAAGCGTAAGCAGC\n+>repA_4\n+AAGAGAACCAAAGACAAAGACCTGGAGAAACTCGACGTAATCAAAGACTCACCGCAAATG\n+AGCCTGTTTGAGATCATTGAATCTCCGGCCAAGAAAGACGACTACTCCAACACCATCGAG\n+ATCTACGATGCGCTGCCTAAGTACATTTGGGACCAAAAGCGTGAGCATGAAGATTTGTCC\n+AACGCTGTAGTGACGCGACAATGCTCCATCAGAGGCCAGCAGTTCACGGTGAAGGTGAAG\n+CCAGCCATCATCGAGAAGGATGACGGAAGAACTGTGCTGATCTACGCGGGACAGCGAGAG\n+GAAATCCTTGAGGATGCTCTACGAAAGCTCGCAGTGAACGGGAAAGGCCATATCATCGAG\n+GGCAAGGCTGGAGTCATGTTCACTCTGTACGAACTCCAGAAAGAGCTCTCGAAGATGGGT\n+CACGGGTACAACCTGAACGAAATCAAGGAAGCAATCCAGGTTTGTCGTGGCGCAACACTC\n+GAATGTATCAGTGATGACGGTGAAGCCTTCATCAGCTCCAGCTTCTTCCCGATGGTGGGA\n+CTGACCACCAGAGGTGAGTTTCGCAAGAAAGGCGGGAACGCCAGGTGCTATGTGCAGTTC\n+AACCCGCTGGTAAACGAATCGATCATGAATCTGTCGTTTCGTCAGTACAACTACAAAATC\n+GGAATGCAAATCCGCTCCCCTCTTGCACGGTACATCTACAAGCGAATGAGCCACTACTGG\n+ACTCAAGCATCACCAGATTCGCCGTACACGCCATCGCTTATCAGCTTCCTGACCCAGAGC\n+CCTCGTGAATTGAGCCCACGGATGCCGGAGAACGTCAGAGCCATGAAGCTCGCTCTGGAG\n+GCCCTCATCAAACAAGAGGTCATAAGCG'..b'GTTGATCGAAAACATCCAGCGGGATGATTTGACTCCGGTAGAGATTGCCGAGGCGT\n+TAAACCTGTTTATTGAAGAAGGTTGGAAACAAAAGGATATTGCTGATCGTCTTGGTAAGA\n+ATATCACTTTCGTATCTACGCATCTGTCGTTGCTCAAGCTACCTGACTGTGTGCGTGAGT\n+TGTACGATAATGAAGTATGTTCTGATACAGAAACCTTGAACAATCTCCGTCTTCTGTTTG\n+AGCTTAACGAAGAAAGATGTCGCGCCGTCTGCGCTGTAGCTATGTCTGACGGGATTACCC\n+GTAAACAAAGCCGTGAGATGTTGAATGATGCCAAACGCATCAAAGAAGAAATGGAAAAAG\n+GTCCTCTGACGGGCCCCGACCAGAATGATAAACCTGGCGCTGGTAACACCGACGAGCAAG\n+CCATTAACTCTGGGAGTGGCACATCGGTACATATCGGAAATGGCGGCCAGAGCCCTGCCA\n+ACATGGAACTGGAAGGGGGGGCAAATCTCGGTGGTCAGGGCGATGATGACCAAGACCATT\n+TTCTTGACGAAGAGGGTAAGGGAAAAGCTCCTGTCCAGCAGCCAGTTAACAATGGTAAGA\n+GCAAAAATGAAGAAGGCGGCGATGCTCTTCCTCCTCTGCCGAAGGATAAGGAATGGAAGA\n+ACGTCAGGGCTGATAGTTTGATTTTTGCTGTCAACGTTAATCTGGATGGCGAGACCAAAC\n+GTGGAGTCATCATGACTGACCGTGTTGCTCTGGTTCCGGCTACAGTCTGGGTTAAAACGC\n+TCGATGGCGAAGGCAAGGAAAAGCACGTTCATGTGCCTGTGTCAG\n+>parB_8\n+TGTCCGAACTTGCTAAAGCCGCCAAAGGCAAGAAAGGCAAAGAGGTTCTTACCGTCCCTG\n+TTGACGACGTTGTTTCCAAGGTACAGGTGCGTAAGCGCTTCCGTAACATTGAAGATCTGG\n+CGGCCACCTTGCTGACCGAAGGGCAGCAGTCTCCGATCATCGTGTTTCCGAAGAACGAAG\n+AAGGCAAGTTCGTTATCCAGAAAGGGGAGCGGCGTTGGAGGGCTTGTAAACACGCTGGTA\n+TTGAGACTATCGACCTGGTGGTTAATGATAAGGTCCAGAACAACCTGGACGAGACTGCTG\n+GTGAGTTGATCGAAAACATCCAGCGGGATGATTTGACTCCGGTAGAGATTGCCGAGGCGT\n+TAAACCTGTTTGTTGAAGAAGGTTGGAAACAAAAGGATATTGCTGATCGTCTTGGTAAGA\n+ATATCACTTTCGTATCTACGCATCTGTCGTTGCTCAAGCTACCTGACTGTGTGCGCGAGT\n+TGTACGATAATGAAGTATGTTCTGATACAGAAACCTTGAACAATCTCCGTCTTCTGTTTG\n+AGCTTAACGAAGAAAGATGTCGCGCCGTCTGCGCTGTAGCTATGTCTGACGGGATTACCC\n+GTAAACAAAGCCGTGAGATGTTGAATGATGCCAAACGCATCAAAGAAGAAATGGAAAAAG\n+GTCCTCTGACGGGCTCCGACCAGAATGATAAACCTGGCGCTGGTAACACCGACGAGCAAG\n+CCATTAACTCTGGGAGTGGCACATCGGTACATATCGGAAATGGCGACCAGAGCCCTGCCA\n+ACATGGAACTGGAAGGGGGGGCAAATCTCGGTGGTCAGGGCGATGATGACCAAGACCATT\n+TTCTTGACGAAGAGGGTAAGGGCAAAACTCCTGTCCGGCAGCCAGTTAACAATGGTAAGA\n+GCAAAGATGAAGAAGGTGGCGATGCTCTTCCTCCTCTGCCGAAGGATAAGGAATGGAAGA\n+ACGTCAGGGCTGATAGTTTGATTTTTGCTGTCAACGTTAATCTGGATGGCGAGACCAAAC\n+GTGGAGTCATCATGACCGACCGTGTTGCTCTGGTTCCGGCTACAGTCTGGGTTAAAACGC\n+TCGATGGCGAAGGCAAGGAAAAGCACGTTCATGTGCCTGTGTCAG\n+>parB_9\n+TGTCCGAACTTGCTAAAGCCGCCAAAGGCAAGAAAGGCAAAGAGGTTCTTACCGTCCCTG\n+TTGACGACGTTGTTTCCAAGGTACAGGTGCGTAAGCGCTTCCGTAACATTGAAGATCTGG\n+CGGCCACCTTGCTGACCGAAGGGCAGCAGTCTCCGATCATCGTGTTTCCGAAGAACGAAG\n+AAGGCAAGTTCGTTATCCAAAAGGGGGAGCGGCGTTGGAGGGCTTGTAAACACGCTGGTA\n+TTGAGACTATCGACCTGGTGGTTAATGATAAGGTCCAGAACAACCTGGACGAGACTGCTG\n+GTGAGTTGATCGAAAACATCCAGCGGGATGATTTGACTCCGGTAGAGATTGCCGAGGCGT\n+TAAACCTGTTTATTGAAGAAGGTTGGAAACAAAAGGATATTGCTGATCGTCTTGGTAAGA\n+ATATCACTTTCGTATCTACGCATCTGTCGTTGCTCAAGCTACCTGACTGTGTGCGTGAGT\n+TGTACGATAATGAAGTATGTTCTGATACAGAAACCTTGAACAATCTCCGTCTTCTGTTTG\n+AGCTTAACGAAGAAAGATGTCGCGCCGTCTGCGCTGTAGCTATGTCTGACGGGATTACCC\n+GTAAACAAAGCCGTGAGATGTTGAATGATGCCAAACGCATCAAAGAAGAAATGGAAAAAG\n+GTCCTCTGACGGGCCCCGACCAGAATGATAAACCTGGCGCTGGTAACACCGACGAGCAAG\n+CCATTAACTCTGGGAGTGGCACATCGGTACATATCGGAAATGGCGGCCAGAGCCCTGACA\n+TCATGGAACTGGAAGGGGGGGCAAATCTCGGTGGTCAGGGCGATGATGACCAAGACCATT\n+TTCTTGACGAAGAGGGTAAGGGAAAAGCTCCTGTCCAGCAGCCAGTTAACAATGGTAAGG\n+GCAAAAATGAAGAAGGCGGCGATGCTCTTCCTCCTCTGCCGAAGGATAAGGAATGGAAGA\n+ACGTCAGGGCTGATAGTTTGATTTTTGCTGTCAACGTTAATCTGGATGGCGAGACCAAAC\n+GTGGAGTCATCATGACTGACCGTGTTGCTCTGGTTCCGGCTACAGTCTGGGTTAAAACGC\n+TCGATGGCGAAGGCAAGGAAAAGCACGTTCATGTGCCTGTGTCAG\n+>A053_1\n+AGATCTCACAGGACATGAAAAGAAAATTTGCTTTGGTGAATGCCCTGTCTAAAACGGAGA\n+AGCCAAGTCTCCAAGACCTTCACAAGGCAACAAATATTCCTGAATCAACGATCAAGAGAC\n+AGTTGTCTGCCCTGCGTGATGAGTTCGGAATGAATATCTTGTTCGTCAGGGAGTCTACCG\n+GCGAACGAGGTGCCACCGGCTACTACATGCTGACAGACTGGGGGATCTTAGACAGGTCTT\n+CGTTCTTGAA\n+>A053_2\n+AGATCTCACAGGACATGAAAAGAAAATTTGCTTTGGTGAATGCCCTGTCTAAAACGGAGA\n+AGCCAAGTCTCCAGGACCTTCACAAGGCAACAAATATTCCTGAATCAACGATCAAGAGAC\n+AGTTGTCTGCCCTGCGTGATGAGTTCGGAATGAATATCTTGTTCGTCAGGGAGTCTACCG\n+GCGAACGAGGTGCCACCGGCTACTACATGCTGACAGACTGGGGGATCTTAGACAGGTCTT\n+CGTTCTTGAA\n+>A053_3\n+AGATCTCACAGGACATGAAAAGAAAATTTGCTTTGGTGAATGCCCTGTCTAAAACGGAGA\n+AGCCAAGTCTCCAGGACCTTCACAAGGCAACAGAAATTCCTGAATCAACGATCAAGAGAC\n+AGTTGTCTGCTCTGCGTGATGAGTTCGGAATGAATATATTGTTCGTCAGGGAGTCTACCG\n+GCGAACGGGGGGCCACCGGCTACTATATGCTGACAGACTGGGGGATCTTAGACAGGTCTT\n+CGTTCTTGAA\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incac.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incac.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,15 @@
+ST repA parA parB A053
+1 1 1 1 1
+2 2 1 1 1
+3 2 2 2 1
+4 2 2 3 1
+5 2 2 4 1
+6 2 2 5 1
+7 2 3 2 1
+8 2 4 2 1
+9 3 2 2 1
+10 4 5 6 2
+11 5 6 7 3
+12 6 7 8 3
+13 7 8 9 3
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incf.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incf.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,1468 @@\n+>FII_1\n+CAAAAACCCCGATAATCTTCACCAGGTTTGGCGACTAAGAGAAGATTACCGGGGCTAACA\n+AGAAACTGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGACCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_2\n+CAAAAACCCCGATAATCTTCTTCAACTTTGGCGAGTACGAAAAGATTACCGGGGCCCACT\n+TAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCGCTCCTTCTGCGCATTG\n+>FII_3\n+CGAAAACCCCGATAATCTTTCTATGTTTGGCGACGAAGAAGATTACCGGGGCCATCTAAA\n+AACCGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTCAA\n+GACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_4\n+TGAAAACCCCGATAATCTTCAGCAAGTTTGGCGACTGAGAAGAAGATTAACGGGGCTAAC\n+AAGAAACCGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGT\n+TCAAGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_5\n+TGAAAACCCCGATAATCTTCAGCAAGTTTGGCGACTAAGAGAAGATTAACGGGGCTAACA\n+AGAAACCGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_6\n+TGAAAACCCCGATAATCTTCAGTAAGTTTGGCGACTGAGAAGATTACCGGGGCTAACAAG\n+AAACTGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTCA\n+AGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_7\n+TGAAAACCCCGATAATCTTCTTTAACTTTGGCGAGTGAGAAAGATTATCGGGGCTAACAA\n+GAAACTGCATAGAAGCGGTTGCTCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACA\n+>FII_8\n+TGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACTAGAAAGATTACCGGGGCCATCTAA\n+AAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_9\n+TGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACATGAAAGATTACCGGGGCCATCTAA\n+AAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGGCTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_10\n+TGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACATGAAAGATTACCGGGGCCATCTAA\n+AAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_11\n+CGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACATGAAAGATTACCGGGGCCATCTAA\n+AAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_12\n+TGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACTAGAAAGATTACCGGGGCTAACAAG\n+AAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTC\n+AAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_13\n+CAAAAACCCCGATAATCTTCTTTAACTTTGGCGAGTACAGAAAGATTACCGGGGCCATCT\n+AAAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGCTATATGCCCGGAAAAGT\n+TCAAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_14\n+CGAAAACCCCGATAATCTTCTCTAACTTTGGCGAGTGCAGAAAGATTACCGGGGCCATCT\n+AAAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGT\n+TCAAGACTTCTTTCTGTGCTCGCTCCTTCTGCGCATTG\n+>FII_15\n+TGAAAACCCCGATAATCTTCTCTAACTTTGGCGAGTACAGAAAGATTACCGGGGCCATCT\n+AAATACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCTGGAAAAGT\n+TCAAGACTTCTTTCTGTGCACACTCCTTCTGCGCATTG\n+>FII_16\n+TGAAAACCCCGATAATCTTCTTTAACTTTGGCGAGTGAGAAAGATTACCGGGGCCATCTA\n+AAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_17\n+TGAAAACCCCGATAATCTTCTTTAACTTTGGCGAGTCAGAAAGATTACCGGGGCCATCTA\n+AAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_18\n+CGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACTAGAAAGATTACCGGGGCCCCATAA\n+AACCGCATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTCA\n+AGACTTCTTTCTGTGCTCACTCCTTCTGTGCATTG\n+>FII_19\n+TGAAAACCCCGATAATCTTCTTCAAGTTTGGCGACTAGAAAGATTACCGGGGCCTCATAA\n+AACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCTGGAAAAGTTCA\n+AGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_20\n+CAAAAACCCCGATAATCTTCGAGAAGTTTGGCGACTAAGAGAAGATTACCGGGGCCCCAT\n+AAAACCGCATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_21\n+CAAAAACCCCGATAATCTTCTCAAAGTTTGGCGACTAGAAAGATTACCGGGGCCCACTTA\n+AACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTTCA\n+AGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_22\n+TAAAAACCCCGATAATCTTCACCAGGTTTGGCGACTAAGAGAAGATTACCGGGGCCCACT\n+TAAACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCCCGGAAAAGTT\n+CAAGACTTCTTTCTGTGCTCACTCCTTCTGCGCATTG\n+>FII_23\n+CAAAAACCCCGATAATCTTCAGAAAGTTTGGCGACTGAGAAGATTACCGGGGCCCACTTA\n+AACCGTATAGCCAACAATTCAGCTATGCGGGGAGTATAGTTATATGCTCGGAAAAGTTCA\n+AGACTTCTTTCTGTGCTCGCTCCTTCTGCGCATTG\n+>FII_24\n+CAAAAACCCCGATAATCTTCGTCAAGTTTGGCGACTGC'..b'TGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_69\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACAGCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTCTTTG\n+TACCGACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACACTGTCACGCACCGATGCCA\n+CGGAAGAACTGACGCGTCTTTCTCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCCGGCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTTGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_70\n+ATCTGGACTTAAAAAAGTTGTTTGAAGAGGTGGATAAATCCTCCGGTGAAGTCGTAAATC\n+TCACTCCTAATGCCAGTAATACAGTGCAGCCAGTGGCACTGATGCGGCTCGGAGTGTTTG\n+TCCCTACTCTCAAATCACTGAAAAACAGTAAAAAGAACACGCTTTCGCGTACTGATGCGA\n+CGGAAGAACTGACGCGTCTATCTCTCGCCAGAGCCGAAGGGTTTGATAAGGTGGAGATCA\n+CCGGTCCCCGCCTGGATATGGATAATGACTTCAAGACCTGGGTAGGGGTGATCCATTCCT\n+TTGCACGCCATAAGGTTATCGGCGATAAGGTTGAACTGCCGTTTGTTGAGTTCGCCAAAC\n+TCTGTGGGATCCC\n+>FIB_71\n+TTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACACT\n+GACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTCTTTGT\n+ACCGACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACTCTGTCACGCACCGATGCCAC\n+GGAAGAACTGACGCGTCTTTCCCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCAC\n+CGGCCCCCGGCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATTATTCATTCCTT\n+TGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAACT\n+GTGTGGTATACC\n+>FIB_72\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTCTTTG\n+TACCGACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACACTGTCACGCACCGATGCCA\n+CGGAAGAACTGACGCGTCTTTCTCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCCGCCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGACTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_73\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAACTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTTTTTG\n+TACCGACCCTTAAATCACTGAAGAACAGTAAAAAAAATACACTGTCACGTACTGATGCCA\n+CGGAAGAGCTGACACGTCTTTCCCTGGCCCGTGCTGAGGGATTCGATAAGGTTGAGATCA\n+CTGGCCCCCGCCTGGATATGGATAACGATTTCAAGACCTGGGTAGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTTGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_74\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTCTTTG\n+TACCTACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACACTGTCACGCACCGATGCCA\n+CGGAAGAACTGACGCGTCTTTCTCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCTGGCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_75\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAGCCTGTGGCGCTGATGCGTCTGGGCGTATTTG\n+TACCGACCCTTAAATCACTGAAGAATAGTAAAAAAAATACACTGTCACGCACCGATGCCA\n+CGGAAGAACTGACGCGTCTTTCCCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCCGGCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_76\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCCTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTTTTTG\n+TACCGACCCTTAAATCACTGAAGAACAGTAAAAAAAATACACTGTCACGTACTGATGCCA\n+CGGAAGAGCTGACACGTCTTTCCCTGGCCCGTGCTGAGGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCCGCCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTTGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_77\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTTGGCGTCTTTG\n+TACCGACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACACTGTCACGCACTGATGCCA\n+CGGAAGAACTGACGCGTCTTTCCCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGTCCCCGGCTGGATATGGATAACGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGACTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n+>FIB_78\n+ATTCAGACATCAAAAAACTGTTCGGCGAGGTGGATAAGTCGTCCGGTGAGCTGGTGACAC\n+TGACACCAAACAATAACAACACCGTACAACCTGTGGCGCTGATGCGTCTGGGCGTCTTTG\n+TACCGACCCTTAAGTCACTGAAGAACAGTAAAAAAAATACTCTGTCACGCACCGATGCCA\n+CGGAAGAACTGACGCGTCTTTCCCTGGCCCGTGCAGAAGGATTCGATAAGGTTGAGATCA\n+CCGGCCCCCGGCTGGATATGGATAATGATTTCAAGACCTGGGTGGGGATCATTCATTCCT\n+TTGCCCGCCATAACGTGATTGGTGACAAAGTTGAACTGCCTTTTGTCGAGTTTGCAAAAC\n+TGTGTGGTATACC\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incf.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incf.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,1 @@
+ST FII FIC FIIK FIIS FIIY FIA FIB
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inchi1.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inchi1.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,189 @@\n+>HCM1_043_1\n+GGCTGCTGAAGGCGCGTTGCAGAATGGCCAGCCCGTCCGCTATCTGGAAACCAAAGAAAG\n+CTCTCCGTATCTCATCGTACACCCTGACGCCTCACTGCCGTCGGTGACGCAAAACGTCTG\n+GATGCACGCTGATCATGATGGCTGGGAAGAGCATTTCAACGGAGAGACCGATGCCCTGAC\n+TCCAGAGATGAGCGAGCAGTTCCGCCAGCAGGTTTATGCACTGCTGACGCCGGAATCCCA\n+TACCACCTCTTCAGAAATGAGACTGACTTTGCAGGACTGGCAACTGGGTGAAGCCGAGAT\n+CCCAGAAGAAGACTGTCAGCCCTACCAGGTGAAGGTGTTAGCAGAAAATAA\n+>HCM1_043_2\n+GGCAGTTGATGAAGCTTTGCTGGATGGCCAGCCTGTCCGCTATCTGGAAACAAAAGAAAG\n+CTCCCCATATCTCATCGTACACCCTGACGACTCACTACCGTCTCTGACGCATAACGTTTG\n+GATGCACGCTGATCCTGATGGCTGGGAAGAACATTTCGATGGCGAGGACAATCCCCTGAT\n+GGAGGAGTTGAGCCAGCAGTTCCGCCAGCAGGTGTATGCGCTTCTGACGCCGGAATCGCA\n+GGCAACGCCTTCAGAGATTATCCGGACGTTACAGGACTGGCAACTGGGTGAAACCCAGCT\n+GCCAGAAGAAGACTGTCAGTCCTATCAGGTGAAGGTGTTGTTAGAAGAGAA\n+>HCM1_043_3\n+GGCTGCTGAAGGCGCGTTGCAGAATGGCCAGCCCGTCCGCTATCTGGAAACCAAAGAAAG\n+CTCTCCGTATCTCATCGTACACCCTGACGCCTCACTGCCGTCGGTGACGCAAAACGTCTG\n+GATGCACGCTGATCCTGATGGCTGGGAAGAGCATTTCAACGGAGAGACCGATGCCCTGAC\n+TCCAGAGATGAGCGAGCAGTTCCGCCAGCAGGTTTATGCACTGCTGACGCCGGAATCCCA\n+TACCACCTCTTCAGAAATGAGACTGACTTTGCAGGACTGGCAACTGGGTGAAGCCGAGAT\n+CCCAGAAGAAGACTGTCAGCCCTACCAGGTGAAGGTGTTAGCAGAAAATAA\n+>HCM1_064_1\n+GGTGGGAACTAAAGAGAAGCGATACAGAAATTTTGAGGTTGACGCTTCTGATGAGTTGTC\n+GAGCATGGAGCTGGCCCGTTCAGAAGGATACGATGACATTCGGATCACTGGTCTTAAGCT\n+TTCGATGTCAACCGATTTTAAGTGTTGGCTCGGTTGCATAATGGCGTTCAGCAAATATGG\n+TTTCGCTTCCGATAAAATCACATTGTCATTCAATGAGTTTGCAAAAATGTGTGGTATCAG\n+TTCTACAAACATAAACAAGCGAACTCGCTCACGATTTCAGGAGGCGCTAGCAAACCTAGC\n+TTCCGTTGTGATTTCTTTCCGTGATTCAAAAACTGAACGTTTTACTGTCACGCACCTTGT\n+GCAGAAAGCAGTGATTGATCCTAAGAAGGACACCGTAGAGTTGGTGGGCGATCCCTCAAT\n+GTGGGAGCTTTACCGGTATGATCATAAAACCTTACTTAGTTTGCAGGTGCTTTCGGTTCT\n+CGCTAAAAAAGAGGCAGCACAAAGCCTGTACATATATTTTGAGGCGA\n+>HCM1_064_2\n+GGTGGGAACTAAAGAGAAGCGATACAGAAATTTTGAGGTTGACGCTTCTGATGAGTTGTC\n+GAGCATGGAGCTGGCCCGTTCAGAAGGATACGATGACATTCGGATCACTGGTCTTAAGCT\n+TTCGATGTCAACCGATTTTAAGTGTTGGCTCGGTTGCATAATGGCGTTCAGCAAATATGG\n+TTTCGCTTCCGATAAAATCACATTGTCATTCAATGAGTTTGCAAAAATGTGTGGTATCAG\n+TTCTACAAACATAAACAAGCGAACTCGCTCACGATTTCAGGAGGCGCTAGCAAACCTAGC\n+TTCCGTTGTGATTTCTTTCCGTGATTCAAAAACTGAACGTTTTACTGTCACGCACCTTGT\n+GCAGAAAGCAGTGATTGATCCTAAGAAGGACACCGTAGAGTTGGTGGGCGATCCCTCAAT\n+GTGGGAGCTTTACCGGTATGATCATAAAACCTTACTTAGCTTGCAGGTGCTTTCGGTTCT\n+CGCTAAAAAAGAGGCAGCACAAAGCCTGTACATATATTTTGAGGCGA\n+>HCM1_064_3\n+GGTGGGAACTAAAGAGAAGCGATACAGAAATTTTGAGGTTGACGCTTCTGATGAGTTGTC\n+GAGCATGGAGCTGGCCCGTTCAGAAGGATACGATGACATTCGGATCACTGGTCTTAAGCT\n+TTCGATGTCAACCGATTTTAAGTGTTGGCTCGGTTGCATAATGGCGTTCAGCAAATATGG\n+TTTCGCTTCCGATAAAATCACATTGTCATTCAATGAGTTTGCAAAAATGTGTGGTATCAG\n+TTCTACAAACATAAACAAGCGAACTCGCTCACGATTTCAGGAGGCGCTAGCAAACCTAGC\n+TTCCGTTGTGATTTCTTTCCGTGATTCAAAAACTGAACGTTTTACTGTCACGCACCTTGT\n+GCAGAAAGCAGTGATTGATCCTAAGAAGGACACCGTAGAGTTGGTGGGCGATCCCTCAAT\n+GTGGGAGCTTTACCGGTATGATCATAAAACCTTACTTAGCTTGCAGGTGCTTTCGGTTCT\n+CGCTAAAAAAGAGGCAGCACAAAGTCTGTACATATATTTTGAGGCGA\n+>HCM1_064_4\n+GGTGGGAACTAAAGAGAAGCGATTCAGAAATTTTGAGGTTGATGCTTCTGATGAGTTGTC\n+GAGCATGGAACTGGCCCGGTCAGAAGGATACGATGACATCCGGATCACTGGCCTTAAGCT\n+TTCGATGTCCACGGACTTCAAGTGTTGGCTTGGGTGCATTATGGCGTTCAGTAAATATGG\n+ATTCGCCTCCGAGAAAATAACATTGTCGTTCAATGAGTTTGCAAAAATGTGTGGTATCAG\n+TTCTACAAATATCAACAAGCGAACCCGTTCACGATTTCAGGAAGCGCTGGCAAACCTCGC\n+GTCCGTTGTTATTTCTTTCCGTGATTCTAAAACTGAACGCTTCACTGTCACACACCTTGT\n+GCAGAAAGCTATGATTGACCCTAAGAAGGACACAGTAGAGCTCGTGGGTGATCCCTCGAT\n+GTGGGAGCTTTACCGGTATGATCATAAAACCCTATTAAGCTTACAGGTACTGTCGGTTCT\n+CGCCAAAAAAGAAGCTGCACAGAGCCTGTACATCTATTTTGAGGCAA\n+>HCM1_099_1\n+TGCAGGCAGTTATAGACCATACGCAGGCTATCCGCGCTCAGAGTAATAACTCGGAAGCAG\n+TTAACGACATCATTCGCAGACGTGATGAGATTCAGGGAAATTCACAGCTCAGCGAGTCTG\n+CACTGAAATCCGTCGAAAATAAACCAGAAGTTATGCGCAGTCAGTCTTCGAATATCGAAA\n+AGATGTTTGGTTCATCAGGAATCACCGCCGCTGACTTCGAGCGTAAAATAGACAGCAGTC\n+GGGAAGAGGTGCTCTCGACAGAAAACGGTATCACTATCTTTGCGTCATTTAGCTTGCCTG\n+ATTACGTCCTGGAAGATCTACTCCGTACCGCGTCAGAACATAAGGCGAGAGTGGTTTTTA\n+ATGGTCTGAAGAAAGGCACTACACGCCTCCCTGAAACTCAGGCAGCTATCA\n+>HCM1_099_2\n+TGCAGGCAGTTATAGACCATACGCAGGCTATCCGCGCTCAGAGTAATAACTCGGAAGCTG\n+TTAACGACATCATTCGCAGACGTGATGAGATTCAGGGAAATTCACAGCTCAGCGAGTCTG\n+CACTGAAATCCGTCGAAAATAAACCAGAAGTTATGCGCAGTCAGT'..b'ATCCTTG\n+CACCTGTAACT\n+>HCM1_116_3\n+GTGATTATAGCGATGAGAAATTCGTCAAGCTAATTGAAGGGTTTATATGTCATGAAGCTG\n+GACACGGTCGCTATACAGAACATGAAGTATACCGTGAAGCTTTTGTTGGGGAACTGATTA\n+ATGCAGATGGCTTCATCAGTATAGATGATAAGCTGAACGCTGAATTTCAAACCCTTAAAC\n+AAAAGAAAATCGCATATGCCCGGGCTTACCGTCTACATGGACTTATTAATCTCTTTGATG\n+ATGTCCAGATGGAAGAGAAAACGGGTATTGACTATCAGGAGGCAAAAAAGCGCCTCGCGG\n+TAAGTTATGCCTTGATGGTTGAAGCTGGGCGAATGACGGTTGATGTTTCTTCAAGCCCTC\n+AAAACCCTGTTCAGTTTATTGAAATGTTCCTGCTTAATACATTGCGGGTTAACGTTCTCC\n+AGCAAGAAGGGCATAAAGAAACGCTTGATCCATTTTTCGATTATGCAAAGAAAATCCTTG\n+AGCCTGTAATT\n+>HCM1_116_4\n+GTGATTATAGCGATGAGAAATTCGTCAAGCTAATTGAAGGGTTTATATGTCATGAAGCTG\n+GACACGGTCGCTATACAGAACATGAAGTATACCGTGAAGCTTTTGTTGGGGAACTGATTA\n+ATGCAGATGGCTTCATCAGTATAGATGATAAGCTGAACGCTGAATTTCAAACCCTTAAAC\n+AAAAGAAAATCGCATATGCCAGGGCTTACCGTCTACATGGACTTATTAATCTCTTTGATG\n+ATGTCCAGATGGAAGAGAAAACGGGTATTAACTATCAGGAGGCAAAAAAGCGCCTCGCGG\n+TAAGTTATGCCTTGATGGTTGAAGCTGGGCGAATGACGGTTGATGTTTCTTCAAGCCCTC\n+AAAACCCTGTTCAGTTTATTGAAATGTTCCTGCTTAATACATTGCGGGTTAACGTTCTCC\n+AGCAAGAAGGGCATAAAGAAACGCTTGATCCATTTTTCGATTATGCAAAGAAAATCCTTG\n+AGCCTGTAATT\n+>HCM1_178ac_1\n+GTCCAGGTTTTCGTTTCACCATTAACATCAGTGTATTTATATTTCGCAGGGCGCGGTTCG\n+CGAACTTTTTTAGGTGCTCCGGACTTTGCAGAGAAAGAACTCAGTAACTCTTCAGGGTCA\n+ATTCCATCTTCAAGCATTAATTGACGCAGGGATTCAATCTTCTCCAGACGCGCTTTCAGC\n+TCAGCTTCTTTTGAAGATTCTTCCTGACGTCGTTCTTCAACAACAACGCTTAATTTTTCC\n+AGCAGTTCTTCAAGGATCTCTAACGGCAGTTCACGGCCCTGCGCACGAAGAGTACGAATG\n+TTGTTTAATGATTTGAGTGCTTCGGACATATGACCTCACTAAGAGTAATAAATAAAATCA\n+ATAGAATACTTTGCGATATTACACTCTTTTATTATTT\n+>HCM1_178ac_2\n+GTCCAGGTTTTCGTTTTACCATTAACATCAGTGTATTTATATTTCGCAGGGCGCGGTTCG\n+CGAACTTTTTTAGGTGCTCCGGACTTTGCAGAGAAAGAACTCAGTAACTCTTCAGGGTCA\n+ATTCCATCTTCAAGCATTAATTGACGCAGGGATTCAATCTTCTCCAGACGCGCTTTCAGC\n+TCAGCTTCTTTTGAAGATTCTTCCTGACGTCGTTCTTCAACAACAACGCTTAATTTTTCC\n+AGCAGTTCTTCAAGGATCTCTAACGGCAGTTCACGGCCCTGCGCACGAAGAGTACGAATG\n+TTGTTTAATGATTTGAGTGCTTCGGACATATGACCTCACTAAGAGTAATAAATAAAATCA\n+ATAGAATACTTTGCGATATTACACTCTTTTATTATTT\n+>HCM1_178ac_3\n+GTCCAGGTTTTCGTTTCACCATTTTCATCGGTAAACGAATATTTCGCCGGACGCGGATCG\n+CGAGCTTTTTTTGGTGCGCTGGATTTAGCAGAGAAAGAGCTCAGAAGTTCTTCAGGATCG\n+ATTCCGTCTTCAAGCATTAATTTACGCAGGGAATCGAGTTTTTCCTGACGTGCTTCCTGC\n+TCAGCTTTTTTGGAAGATTCTTCCTGACGGCGTTCGTCTACGACAACGCTTAATTTTTCC\n+AGCAGTTCTTCAAGAACCTCTAATGGCAGTTCACGGCCCTGCGCACGAAGAGTACGAATG\n+TTGTTTAATGCTTTTAGTGCTTCGGACATATGGCCTCACTTAGAGATTTTGAAATTAAAA\n+GGATACGGAATGATAGTACACTGTTTTAATATTTTAC\n+>HCM1_259_1\n+CTCCCGTTTTTTCGGGATTGATGCGGAGTTGGTCTTTACGCTTTTTGATAATGAGGGCGG\n+AAAAGTCGGGACGTTCAGCCGTAATAAAAATGGCACTTATGATATTGGGCCTATGCAGAT\n+CAATTCGTCCAACCTGCCAGAAATACGCGACCATTTTCCGTCTGTTACGTGGCGGGTGCT\n+GGCCTATGACGCCTGTGCCAGTTTCTGGGTGGGCACCTGGTGGCTTTACCGGAAGATTGT\n+TGACCGGAAGGGTAACGTATTTGAAGGGATTGCCGATTACAACAGCAAAACCCCTAAGGT\n+CCGGGCGACGTATATCTTCAACTTCATGATCAAATACAATCGCCGGATTCAGCGCCGAAA\n+TGGAATGGACGAATTATATCAGTGGAC\n+>HCM1_259_2\n+CTCCCGTTTTTTCGGGATTGATGCGGAGTTGGTCTTTACGCTTTTTGATAATGAGGGCGG\n+AAAAGTCGGGACGTTCAGCCGTAATAAAAATGGCACTTATGATATTGGGCCTATGCAGAT\n+CAATTCGTCCAACCTGCCGGAAATACGCGACCATTTTCCGTCTGTTACGTGGCGGGTGCT\n+GGCCTATGACGCCTGTGCCAGTTTCTGGGTGGGCACCTGGTGGCTTTACCGGAAGATTGT\n+TGACCGGAAGGGTAACGTATTTGAAGGGATTGCCGATTACAACAGCAAAACCCCTAAGGT\n+CCGGGCGACGTATATCTTCAACTTCATGATCAAATACAATCGCCGGATTCAGCGCCGAAA\n+TGGAATGGACGAATTATATCAGTGGAC\n+>HCM1_259_3\n+CTCCCGTTTTTTCGGGATTGATGCGGAGTTGGTCTTTACGCTTTTTGATAATGAGGGCGG\n+AAAAGTGGGGACGTTCAGCCGTAATAAAAATGGCACTTATGATATTGGGCCTATGCAGAT\n+CAATTCGTCCAACCTGCCGGAAATACGCGACCATTTTCCGTCTGTTACGTGGCGGGTGCT\n+GGCCTATGACGCCTGTGCCAGTTTCTGGGTGGGCACCTGGTGGCTTTACCGGAAGATTGT\n+TGACCGGAAGGGTAACGTATTTGAAGGGATTGCCGATTACAACAGCAAAACCCCTAAGGT\n+CCGGGCGACGTATATCTTCAACTTCATGATCAAATACAATCGCCGGATTCAGCGCCGAAA\n+TGGAATGGACGAATTATATCAGTGGAC\n+>HCM1_259_4\n+CTCCCGCTTTTTCGGGATTGATGCAGAATTGGTCTTTACGCTTTTTGATAATGAAGGCGG\n+AAAAGTGGGAACGTTCAGCCGGAATAAAAATGGCACTTATGATATTGGACCTATGCAGAT\n+CAATTCGTCCAATCTGCCGGAAATACGTGGCCACTTTCCGTCTGTTACCTGGCGGGTATT\n+GGCCTATGACGCCTGTGCCAGTTTCTGGGTAGGTACCTGGTGGCTTTACCGGAAAATTGT\n+AGACCGAAATGGTAATGTATTTGAAGGGATTGCCGATTACAACAGCAAAACCCCTAAGGT\n+CCGGGCGAGGTATATCTTTAACTTCATGGTCAAATACAATCGCCGGATTCAGGGCCGGAA\n+TGGCATGGACGAGTTATATCAGTGGAC\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inchi1.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inchi1.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,18 @@
+ST HCM1_043 HCM1_064 HCM1_099 HCM1_116 HCM1_178ac HCM1_259
+1 1 1 1 1 1 1
+2 1 2 1 1 1 1
+3 1 1 1 1 1 3
+4 1 2 3 1 1 1
+5 3 2 1 2 1 2
+6 3 2 2 2 2 3
+7 3 2 2 2 1 3
+8 3 2 2 0 1 3
+9 1 3 1 1 1 1
+10 1 2 1 1 1 2
+11 2 4 4 3 3 4
+12 2 4 4 3 0 4
+13 2 4 4 4 0 4
+14 0 2 1 1 1 2
+15 1 1 2 2 1 1
+16 1 1 1 1 0 2
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inchi2.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inchi2.fsa Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,127 @@
+>smr0018_1
+TCGTCCAATAACCATAACCACCGAGGATATTTTTTCTTTTCTCAATATAAACGGGTTCAG
+AAAATACTAAATGTTGATATTTAGGAAGAGATAAGATTTCATATTTTCCTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGGATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGGTCAGAATAGTCAGGAACGGACACTTGATCATTAGACGTAT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTATATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_2
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTATCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_3
+TCGTCCAATAACGATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_4
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_5
+TCGTCCAATAACCATAACCACCGAGGATATTTTTTCTTTTCTCAATATAAACGGGTTCAG
+AAAATACTAAATGTTGATATTTAGGAAGAGATAAGATTTCATATTTTCCTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGGATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGGGCAGAATAGTCAGGAACGGACACTTGATCATTAGACGTAT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTATATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_6
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAATATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_7
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTTAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_8
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTATTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_9
+TCGTCCAATAACCATAACTACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTATCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_10
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCTT
+TCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGTT
+GTTTCTTGTGATCGAAACGATAGCCTGAAG
+>smr0018_11
+TCGTCCAATAACGATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTATCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_12
+TCGTCCAATAACCATAACCACCGAGGATATCTTTTCTTTTCTCAATATAAACCGGTTCAG
+AAAATTCTAAATGTTGATATTTCGGAAGAGATAAGATTTTATATTTTACTTGTCCAACTA
+AGATGTAATTCTTCTCTTTAGGATTGATGGTTACTGAATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGATCAGAATCGTCAGGAACGGACACTTGATCATTAGATGTCT
+TTCCGGGAAGAGTAAACCCTTTATCCTGATCCGGTCCAAAGATGGCAATTACATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0018_13
+TCGTCCAATAACCATAACCACCGAGGATATTTTTTCTTTTCTCAATATAAACGGGTTCAG
+AAAATACTAAATGTTGATATTTAGGAAGATATAAGATTTCATATTTTCCTTGTCCAACTA
+AGAGGTAATTCTTCTCTTTAGGATTGATGGTTACTGGATTACCAACACTTTGAGGTATGT
+TAAGTTTTTTTATATTTAGGTCAGAATAGTCAGGAACGGACACTTGATCATTAGACGTAT
+TTCCGGGAAGAGTAAACCCTTTACCCTGATCCGGTCCAAAGATGGCAATTATATCTTTGT
+TGTTTCTTGTGATCGAAACGATAGCCTGAA
+>smr0199_1
+ACCTATTCCCGTTCCTGATGAGATTTACACTCAGTGCATTACGGATGCCGCCCGCTACTT
+CGGAATTGATGCTGAACTGGTTTTTACGTTGTTTGACAATGAAGGCGGTAAGGTTGGTAC
+TTTCAGCAGGAATACTAACGGCACTTATGATATTGGCCCAATGCAGATCAACTCATCCAA
+TCTACCTGAAATAAAAAAGCATTTCCCGACGGTAACGTGGCGGGTTCTGGCTTACGATGC
+TTGCGCAAGTTTCTGGGTTGGAACATGGTGGCTCTATAGAAAAATTGTTGATCGCAAGGG
+CAATGTGTTTGAAGGGATTGCAGATTACAACAGCAAAACCCCAAAGGTACGTGCAAAGTA
+CATATTTAACTTCATGGTAAAGTACAATCGCCGGATCCAGCAGCGTAACGGGATGGGTGA
+GCTTTATCAATGGACCCAGCAACCTCCTCGATACAATGGC
+>smr0199_2
+ACCTATTCCCGTTCCTGATGAGATTTACACTCAGTGCATTACGGATGCCGCCCGCTTCTT
+CGGAATTGATGCCGAACTGGTTTTTACGTTGTTTGACAATGAAGGCGGTAAGGTTGGTAC
+TTTCAGCAGGAATACTAACGGCACTTATGATATTGGCCCGATGCAGATCAACTCATCCAA
+TCTGCCTGAAATAAAAAAGCATTTCCCGACTGTAACGTGGCGGGTTCTGGCTTACGATGC
+TTGCGCTAGTTTCTGGGTTGGAACATGGTGGCTCTACAGAAAAATTGTTGATCGCAAGGG
+CAATGTGTTTGAAGGGATTGCAGATTACAACAGCAAAACCCCAAAGGTACGTGCAAAGTA
+CATATTTAACTTCATGGTAAAGTACAATCGCCGGATCCAGCAGCGTAACGGGATGGGTGA
+GCTTTATCAATGGACCCAACAACCTCCTCGATACAATGGC
+>smr0199_3
+ACCTATTCCCGTTCCTGATGAGATTTACACTCAGTGCATTACGGATGCCGCCCGCTACTT
+CGGAATTGATGCTGAACTGGTTTTTACGTTGTTTGACAATGAAGGCGGTAAGGTTGGTAC
+TTTCAGCAGGAATACAAACGGCACTTATGATATTGGCCCAATGCAGATCAACTCATCCAA
+TCTGCCTGAAATAAAAAAGCATTTTCCGACGGTAACGTGGCGGGTTCTGGCTTACGATGC
+TTGCGCAAGTTTCTGGGTTGGAACATGGTGGCTCTATAGAAAAATTGTTGATCGCAAGGG
+CAATGTGTTTGAAGGGATTGCAGATTACAACAGCAAAACCCCAAAGGTACGTGCAAAGTA
+CATATTTAACTTCATGGTAAAGTACAATCGCCGAATCCAGCAGCGTAACGGGATGGGTGA
+GCTTTATCAATGGACCCAGCAACCTCCTCGATACAATGGC
+>smr0199_4
+ACCTATTCCCGTTCCTGATGAAATTTACACTCAGTGCATAACGGATGCCGCCCGCTACTT
+CGGAATTGATGCTGAACTGGTTTTTACGTTGTTTGACAATGAAGGCGGTAAGGTTGGTAC
+TTTCAGCAGGAATACTAACGGCACTTATGATATTGGCCCAATGCAGATCAACTCATCCAA
+TCTGCCTGAAATAAAAAAGCATTTCCCGACGGTAACGTGGCGTGTTCTGGCTTACGATGC
+TTGCGCAAGTTTCTGGGTTGGAACATGGTGGCTCTATAGAAAAATTGTTGATCGCAAGGG
+CAATGTGTTTGAAGGGATTGCAGATTACAACAGCAAAACCCCAAAGGTACGTGCAAAGTA
+CATATTTAACTTCATGGTAAAGTACAATCGTCGGATCCAGCAGCGTAACGGGATGGGTGA
+GCTTTATCAATGGACCCAGCAACCTCCTCGATACAATGGC
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inchi2.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inchi2.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,18 @@
+ST smr0018 smr0199
+1 1 1
+2 2 2
+3 3 2
+4 4 2
+5 2 3
+6 6 2
+7 4 3
+8 7 2
+9 8 2
+10 9 2
+11 10 2
+12 11 2
+13 12 2
+14 1 2
+15 13 1
+16 1 4
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inci1.clpx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inci1.clpx Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,31 @@
+ST CLONAL_COMPLEX
+2 CC-2
+3 CC-3
+5 CC-5
+7 CC-7
+8 CC-31
+9 CC-9
+10 CC-5
+12 CC-12
+15 CC-7
+17 CC-2
+21 CC-5
+23 CC-2
+26 CC-26
+27 CC-26
+28 CC-3
+29 CC-26
+30 CC-7
+31 CC-31
+36 CC-5
+38 CC-3
+42 CC-3
+49 CC-9
+50 CC-12
+58 CC-58
+59 CC-58
+61 CC-61
+62 CC-61
+64 CC-3
+66 CC-12
+68 CC-31
\ No newline at end of file
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inci1.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inci1.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,1193 @@\n+>repI1_1\n+GAGAGATGGCATGTACGGGCAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_2\n+GAGAGATGGCATGTACGGACAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_3\n+GAGAGATGGCATGTACGGGCAGTAAGTCAGAAGACTGAAGATGCTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_4\n+GAGAGATGGCATTTACGGGCAGTAAGTCAGAAGACCGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACAATCTTTCG\n+>repI1_5\n+GAGAGATGGCATGTACGGACAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACAATCTTTCG\n+>repI1_6\n+GAGAGATGGCACGTACGGGCAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACAATCTTTCG\n+>repI1_7\n+GAGAGATGGCATGTACGGGCAGTAAGTCAGAAGACAGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_8\n+GAGAGATGGCATGTACGGACAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTTTCTT\n+>repI1_9\n+GATAGATGGCATGTACGGGCAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_10\n+GAGAGATGGCATGTACGGGCAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACAATCTTTCG\n+>repI1_11\n+GAGGGATGGCATGTACGGGCAGTAAGTCAGAAGACTGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_12\n+GAGAGATGGCAGGTACGGAGAGTAAGTCAGAAGACTGAAGATGTGCCGGAAGCCATAAAA\n+GGAAAACCCCCATAATCTTCTC\n+>repI1_13\n+GAGAGATGGCATGTACGGGCGGTAAGTCAGAAGACTGAAGATGCTCCGGAAGCCATAAAA\n+GGAAAACCCCCACTATCTTTCTT\n+>repI1_14\n+GAAAGATGGCATTTACGGGCAGTAAGTCAGAAGACCGAAGATGTTCCGGAAGCCATAAAA\n+GGAAAACCCCCACAATCTTTCG\n+>ardA_1\n+AATACAACTGTGGAAGCATCGCTGGCCGCTGGTTTGACCTGGCCACGTTTGATGATGAAC\n+GCGACTTTTTCGCCGCCTGCCGCTCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCAGAATGCCATATCAACTGGGCCT\n+ATGTTGAAGGCTTCCGCCAGGCGCGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGATGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGCCAGTGATACCGG\n+>ardA_2\n+AATACAACTGTGGAAGCATCGCCGGACGCTGGTTTGACCTGACCACGTTTGATGATGAGC\n+GCGACTTTTTCGCCGCCTGCCGTGCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCTGAATGCCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCAGGCACGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGAGGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGCCAGTGATACCGG\n+>ardA_3\n+AATACAACTGTGGAAGCATCGCCGGACGCTGGTTTGACCTGACCACGTTTGATGATGAGC\n+GCGACTTTTTCGCCGCCTGCCGTGCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCTGAATGCCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCGGGCACGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGATGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGTCAGTGATACCGG\n+>ardA_4\n+AATACAACTGTGGAAGCATCGCCGGACGCTGGTTTGACCTGACCACGTTTGATGATGAGC\n+GCGACTTTTTCGCCGCCTGCCGTGCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCTGAATGCCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCTGGCACGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGAGGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGCCAGTGATACCGG\n+>ardA_5\n+AATACAACTGTGGAAGCATCGCCGGACGCTGGTTTGACCTGACCACGTTTGATGATGAGC\n+GCGACTTTTTCGCCGCCTGCCGTGCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCTGAATGTCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCGGGCACGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGATGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGCCAGTGATACCGG\n+>ardA_6\n+AATATAACTGCGGAAGCATCGCCGGCCGCTGGTTTGACCTGACCACGTTTGATGATGAAC\n+GCGATTTTTTCGCCGCCTGCCGTGCCCTTCACCAGGATGAAACCGATCCGGAACTGATGT\n+TTCAGGATTATGAGGGTTTCCCGGGGAATATGGCCTCTGAATGTCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCGGGCACGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGATGATACCGGTGAGACGGATTTTGACAGCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGTCAGTGATACCGG\n+>ardA_7\n+AATACAACTGTGGAAGCATCGCTGGCCGCTGGTTTGACCTGGCCACGTTTGATGATGAAC\n+GCGACTTTTTCGCCGCCTGCCGCTCTCTTCACCAGGATGAAGCCGATCCTGAACTGATGT\n+TTCAGGATTATGAGGGATTCCCGGGGAATATGGCCTCAGAATGCCATATCAACTGGGCCT\n+GGGTTGAAGGCTTCCGCCAGGCGCGGGATGAAGGCTGCGAAGAGGCTTATCGTCTCTGGG\n+TGGATGATACCGGTGAGACGGATTTTGACACCTTCCGCGATGCCTGGTGGGGCGAGGCTG\n+ACAGTGAGGAGGCTTTTGCGGTTGAGTTCGCCAGTGATACCGG\n+>ardA_8\n+AATACAACTGTGGAAGCATCGCCGGACGCTGGTTTGACCTGACCACGTTTGATGATGAGC\n'..b'CGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGCTTGTTTTGGCTATTTGCTGTCGTTGGAGATTCAGGGTAAAT\n+GGATGTGGTTACCA\n+>pilL_21\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTTCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGCGTTAACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTTGTTGGAGATTCAGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_22\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTTTCCCTTAATGTAGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTGACAGGTGTCCCGGTCATTAGTGGGATAGACTT\n+GACTGTAGAGCCGGTCGGAGTCGGGATTGCTGCGGGCGTAGACGGAGATACGCTGTTTCC\n+CCTGAATGGGTTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGAAGACTCCGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_23\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTTTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTGACAGGCGTCCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGGGTTGACGAAGATACGCTGTTTCC\n+TCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTTGTTGGAGATTCAGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_24\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCAGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTTCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGCTTGTTTTGGCTATTTGCTGTCGTTGGAGATTCAGGGTAAGT\n+GGATGTGGTTACCA\n+>pilL_25\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTTTCCCTTAATGTAGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTGACAGGTGTCCCGGTCATGAGTGGGATAGACTT\n+GACTGTAGAGCCGGTCGGAGTCGGGATTGATGCGGGCGTAGACGGAGATACGCTGTTTCC\n+CCTGAATGGGTTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGAAGACTCCGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_26\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTGACAGGCGTCCCGGTCATGAGCTGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTTGGGGTGGCGGCGGGGGTTGACGAAGATACGCTGTTTCC\n+TCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTTGTTGGAGATTCAGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_27\n+TTTGTTTCTTCTGCCCACTTAGATAATGTTTTCCCTTAATGTAGGTGCCTGCCGGCGCAC\n+GCCACTCTTTACCCTGAGATACCGGTTTGACAGGTGTCCCGGTCATGAGTGGGATAGACT\n+TGACTGTAGAGCCGGTCGGAGTCGGGATTGCTGCGGGCGTAGACGGAGATACGCTGTTTC\n+CCCTGAATGGGTTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGAAGACTCCGGGGAAG\n+TGGATGTGGTTACC\n+>pilL_28\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTTCCGGTCATGAGAGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGGGTTGACGAAGATACGCTGTTTCC\n+TCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTTGTTGGAGATTCAGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_29\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTTCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCAGGGTGGTGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGGAGATTCAGGGGTAGT\n+GGATGTGGTTACCA\n+>pilL_30\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCATTCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGCGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGCTTGTTTTGGCTATTTGCTGTCGTTGGAGATTCAGGGTAAGT\n+GGATGTGGTTACCA\n+>pilL_31\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTTTCCCTTAATGTAGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTGACAGGTGTCCCGGTCATGAGTGGGATAGAATT\n+GACTGTGGAGCCGGTCGGAGTCGGGATTGCAGCGGGCGTAGACGGAGATACGCTGTTTCC\n+CCTGAATGGGTTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGAAGACTCCGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_32\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTTCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGTCGGTTGGAGTCGGGGTGGCGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGGAGATTCAGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_33\n+TTTTGTTTCTTCTGCCCACTTAATAATGTTTTCCCTTAATGTAGTGCCTGCCGGCGCACG\n+CCATTCTTTACCCTGAGATACCGGTTTGACAGGTGTCCCGGTCATGAGTGGGATAGACTT\n+GACTGTAGAGCCGGTCGGAGTCGGGATTGCTGCGGGCGTAGACGGAGATACGCTGTTTCC\n+CCTGAATGGGTTTCGTGGTTTGTTCTGGCTATTTGCTGTCGTTGAAGACTCCGGGGAAGT\n+GGATGTGGTTACCA\n+>pilL_34\n+TTTTGTTTCTTCTGCCCACTTGATAATGTTCTCTCTTAATGTGGTGCCTGCCGGCGCACG\n+CCACTCTTTACCCTGAGATACCGGTTTAACAGGCGTCCCGGTCATGAGCGGGATAGACTT\n+GACTGTGGAGCCGGTTGGAGTCGGGGTGGTGGCGGGCGTTGACGAAGATACGCTGTTTCC\n+CCTGAATGGATTTCGTGGTTTGTTTTGGCTATTTGCTGTCGTTGGAGATTTAGGGGTAGT\n+GGATGTGGTTACCA\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/inci1.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/inci1.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,331 @@
+ST repI1 ardA trbA sogS pilL
+1 1 1 1 1 1
+2 1 2 3 2 1
+3 2 1 4 1 2
+4 1 3 3 4 1
+5 1 2 2 3 3
+6 1 2 5 4 2
+7 2 1 5 4 2
+8 3 2 6 3 3
+9 3 2 3 3 3
+10 1 4 2 3 3
+11 1 2 3 6 2
+12 1 4 3 4 1
+13 1 1 5 1 1
+14 1 2 9 7 5
+15 2 1 5 1 2
+16 1 5 10 8 6
+17 1 2 3 6 1
+18 1 4 3 2 1
+19 1 3 3 4 3
+20 1 1 3 9 1
+21 1 2 11 3 3
+22 1 6 3 4 1
+23 1 2 3 1 1
+24 1 4 4 1 1
+25 1 4 5 4 1
+26 1 4 13 2 1
+27 1 2 13 2 1
+28 2 4 4 1 2
+29 1 1 13 2 1
+30 2 7 5 4 2
+31 3 4 6 3 3
+32 3 4 14 3 3
+33 1 2 7 1 1
+34 1 2 8 5 4
+35 3 5 10 3 3
+36 1 4 2 9 3
+37 1 8 12 3 7
+38 1 1 4 1 2
+39 1 5 4 1 3
+40 1 6 3 8 3
+41 1 9 13 3 6
+42 2 6 4 1 2
+43 1 1 15 11 1
+44 1 5 3 2 3
+45 1 1 3 2 1
+46 1 2 2 9 2
+47 1 4 2 2 2
+48 1 4 16 9 2
+49 3 10 3 3 3
+50 1 4 12 4 1
+51 1 1 15 9 3
+52 1 4 15 11 2
+53 2 1 15 11 2
+54 1 4 5 11 2
+55 4 5 15 11 3
+56 2 1 15 11 3
+57 3 2 6 13 8
+58 1 2 17 1 7
+59 1 2 4 1 7
+60 1 2 8 6 3
+61 2 1 11 3 2
+62 2 1 11 7 2
+63 1 4 5 4 7
+64 2 1 4 1 4
+65 1 4 18 10 3
+66 1 4 11 4 1
+67 1 4 6 10 3
+68 1 4 6 3 3
+69 1 4 19 4 1
+70 1 9 5 6 9
+71 4 11 8 14 10
+72 1 4 15 4 2
+73 1 2 15 3 3
+74 1 5 15 4 2
+75 1 2 15 11 3
+76 1 8 15 1 3
+77 1 5 13 11 3
+78 4 5 13 11 3
+79 1 5 15 11 6
+80 1 4 5 4 2
+81 4 2 15 11 1
+82 2 6 15 11 3
+83 2 1 15 1 2
+84 1 12 16 3 6
+85 2 4 15 11 2
+86 1 4 3 4 2
+87 8 1 4 1 2
+88 1 16 20 16 3
+89 1 3 15 15 6
+90 4 5 15 4 2
+91 2 4 3 4 2
+92 1 1 15 1 1
+93 1 14 4 8 2
+94 1 14 15 11 2
+95 1 17 13 2 1
+96 1 5 15 11 2
+97 1 2 16 9 1
+98 1 3 16 9 10
+99 2 1 4 1 1
+100 1 4 13 2 2
+101 2 1 4 2 2
+102 1 1 16 6 10
+103 1 18 13 2 1
+104 2 3 8 6 12
+105 3 19 15 3 13
+106 4 10 3 1 7
+107 1 9 5 4 14
+108 2 13 5 1 2
+109 5 3 15 11 10
+110 2 1 15 11 15
+111 1 3 15 11 2
+112 1 5 10 6 10
+113 1 2 5 10 10
+114 1 2 8 10 3
+115 1 4 5 10 10
+116 1 2 6 2 1
+117 4 19 10 17 16
+118 4 2 21 8 17
+119 1 21 17 6 18
+120 1 2 13 1 1
+121 1 4 22 2 1
+122 1 1 5 9 1
+123 1 22 18 2 1
+124 1 14 13 2 1
+125 1 14 5 2 1
+126 3 4 18 10 3
+127 1 2 9 2 7
+128 1 4 3 1 1
+129 1 2 3 18 1
+130 1 2 8 10 19
+131 1 9 23 4 3
+132 1 2 23 4 3
+133 4 4 5 6 7
+134 1 2 8 6 10
+135 2 23 4 1 2
+136 1 2 24 9 1
+137 4 7 15 11 3
+138 1 1 15 11 2
+139 1 4 4 11 3
+140 2 4 15 1 2
+141 4 5 15 4 3
+142 2 1 14 1 2
+143 1 1 14 11 2
+144 1 5 14 11 2
+145 3 2 10 3 3
+146 1 4 3 10 1
+147 1 14 3 2 1
+148 1 4 3 4 3
+149 1 2 17 2 1
+150 1 5 3 1 1
+151 1 2 3 20 1
+152 1 4 25 19 2
+153 6 24 17 7 10
+154 1 3 8 10 10
+155 1 2 16 9 2
+156 1 2 5 4 1
+157 2 18 4 1 2
+158 3 19 15 3 8
+159 1 2 9 1 7
+160 1 2 8 1 6
+161 1 5 10 4 6
+162 1 13 5 1 1
+163 1 19 6 1 2
+164 1 4 13 2 3
+165 1 4 5 4 3
+166 1 3 8 1 6
+167 1 5 10 8 1
+168 4 4 15 11 3
+169 1 2 16 21 6
+170 3 25 6 3 3
+171 1 3 27 6 10
+172 2 26 5 1 2
+173 1 27 20 16 3
+174 3 2 6 22 3
+175 3 28 6 3 3
+176 1 8 12 3 20
+177 3 2 15 23 13
+178 1 4 3 18 1
+179 1 2 17 1 3
+180 1 1 8 3 10
+181 1 2 8 3 10
+182 1 4 13 11 2
+183 1 2 8 10 10
+184 4 5 10 8 22
+185 3 2 29 3 13
+186 1 3 6 25 3
+187 2 1 5 26 2
+188 3 4 15 3 3
+189 4 2 3 6 7
+190 1 4 5 1 1
+191 1 30 15 11 6
+192 1 31 8 6 12
+193 1 32 17 1 7
+194 2 34 4 1 2
+195 2 33 4 1 2
+196 1 3 8 3 6
+197 1 2 17 1 2
+198 1 4 4 1 2
+199 1 4 16 10 2
+200 1 2 17 3 2
+201 1 9 3 4 3
+202 1 3 5 4 3
+203 7 4 13 2 1
+204 1 2 30 1 5
+205 1 36 6 7 23
+206 1 1 5 2 1
+207 1 2 16 1 1
+208 1 2 3 10 3
+209 1 2 21 9 3
+210 1 1 9 2 7
+211 1 19 2 9 3
+212 1 4 8 6 12
+213 1 2 2 9 3
+214 2 37 4 1 2
+215 1 6 13 2 1
+216 1 38 16 3 2
+217 1 4 15 11 3
+218 2 40 4 1 2
+219 2 4 3 18 2
+220 2 1 4 27 2
+221 2 39 4 1 2
+222 1 3 5 4 1
+223 10 41 31 7 3
+224 1 4 17 1 24
+225 1 2 3 1 3
+226 1 2 5 2 1
+227 1 9 4 5 3
+228 1 2 21 10 10
+229 1 9 32 23 3
+230 1 2 5 4 3
+231 1 9 5 4 3
+232 4 29 28 24 21
+233 1 4 3 29 1
+234 1 43 3 1 1
+235 1 2 17 5 26
+236 1 25 6 3 27
+237 1 2 21 4 1
+238 1 25 6 3 3
+239 4 5 15 9 3
+240 1 2 17 9 1
+241 1 4 17 4 1
+242 1 2 16 3 6
+243 1 2 3 4 28
+244 1 2 2 10 10
+245 11 44 34 3 2
+246 2 2 5 6 12
+247 3 45 35 30 29
+248 1 2 36 6 6
+249 1 2 37 3 2
+250 1 2 38 2 3
+251 1 2 27 1 30
+252 1 46 27 1 3
+253 1 47 2 4 1
+254 1 47 2 9 1
+255 1 4 5 4 4
+256 1 2 27 2 10
+257 1 4 17 1 2
+258 4 14 15 4 4
+259 2 4 5 10 2
+260 1 4 2 3 4
+261 1 2 26 12 1
+262 1 4 9 12 1
+263 1 19 6 3 1
+264 1 2 9 1 1
+265 1 11 3 1 1
+266 1 2 3 9 2
+267 1 2 3 9 3
+268 2 3 3 10 2
+269 1 7 5 1 1
+270 1 6 5 4 3
+271 1 1 5 4 1
+272 3 14 6 3 8
+273 3 4 6 3 8
+274 1 3 8 6 10
+275 4 2 10 3 3
+276 1 37 13 2 1
+277 1 4 16 1 4
+278 1 3 16 6 3
+279 3 2 16 3 8
+280 3 29 16 3 8
+281 3 4 17 3 3
+282 1 2 39 10 1
+283 1 2 40 2 1
+284 1 4 41 9 6
+285 1 2 42 10 10
+286 3 2 43 23 13
+287 4 5 15 31 3
+288 13 1 3 30 5
+289 14 2 40 6 5
+290 1 3 3 6 31
+291 1 4 3 4 32
+292 10 15 26 7 3
+293 3 4 13 3 25
+294 2 14 5 4 2
+295 2 48 4 1 2
+296 2 49 4 1 2
+297 2 2 15 4 17
+298 1 18 3 4 1
+299 1 2 5 4 10
+300 1 4 44 4 1
+301 1 2 45 3 3
+302 1 3 3 6 10
+303 1 1 1 3 1
+304 1 5 3 2 1
+305 2 2 3 4 2
+306 1 2 2 2 1
+307 1 4 5 9 3
+308 1 2 4 2 1
+309 2 1 12 1 2
+310 1 8 33 3 7
+311 1 7 2 10 3
+312 2 50 4 1 2
+313 1 51 3 1 1
+314 1 2 17 4 1
+315 1 2 11 1 1
+316 1 38 16 2 33
+317 2 19 4 1 2
+318 2 52 4 1 2
+319 1 4 27 4 6
+320 3 2 15 3 34
+321 1 4 46 4 2
+322 3 19 15 3 34
+323 3 4 13 32 3
+324 1 38 4 11 4
+325 1 2 27 1 2
+326 1 2 47 3 3
+327 1 2 16 10 10
+328 2 2 5 4 2
+329 2 1 17 1 2
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incn.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incn.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,252 @@\n+>repN_1\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATCAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+AGGGAGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCACGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_2\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATGAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+AGGGAGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCACGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_3\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATGAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+AGGAAGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCACGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_4\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTATCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATGAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGTGGTAATCTTTTACCCGAAAGA\n+AGGGAGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCACGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_5\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGCCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATCAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+AGGGCGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCCGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCCCGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_6\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATCAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+AGGGCGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCCCGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>repN_7\n+TGCTTACTACCTCTCGCTAAAAGCAAAGCGCGTTCTCTGGTTATGTCTTATGCAGACGTA\n+TTTCACAGCTTCAGTAAGCGAAGATGATGATGAGATGGCTGTACTCGGTGACTCTACTTT\n+CAAAGTAAAGGTGGCTGACTATCAGCAAATTTTTCAGGTAAGCCGTAACCAGGCTATCAA\n+GGATGTTAAAGAAGGCGTGTTTGAGTTAAGCCGTTCTGCGGTAATCTTTTACCCGAAAGA\n+GGGGCGTTTTGACTGCGTCGCGCGCCCCTGGCTAACAGAGGCTGGCAGCCGATCAGCTCG\n+TGGTATCTGGGAAATCGAATTTAACCATAAACTCCTGCGGTACATTTACGGCCTGACGAA\n+CCAGTTCACCACCTACTCGCTCCGCGATTGTGGCAGTCTTCGAAATCCCCGGACGATCCG\n+CCTTTATGAAAGTCTTGCTCAATTCAAATCTTCAGGCTTATGGG\n+>traJ_1\n+GTATAGGCTGTGAAGTTTTTTCGAAACTTCACTAAAAATAAGACGGCCATAGCCGAACCA\n+TTCTTCAGTAGCCATATCAGGGCTTTCCTGAACAATAGAGTTCACTAAGCGCTCGTAATC\n+ATATGAACGGCGAATTTCATTGAAAAACACCCAGCCTTCAGTGCGTTTATCATAGGCGTT\n+TAAAATAACATCGCCGGGACGATAGAAATTCTTTAAGAACCCCCCATTTGGATCTAAAGC\n+AATATTTTTGCCGCCTCTAATGATGCTCTTAAATAACAGTTCATTGAAAATTGTGGTTTT\n+ACCAGTACCGGTTGTACCGGCAATCGAAAAATGCAAGTTCTCAGCGTATGTAGGTATGGG\n+GATATTAGCCACGGTTAACTGGTTGACACCTCTTTCGCGTGTTTTATCAGCGAGTGTTCT\n+GGCGCGAACAAGCTCTGTACCACGATAAATCTTTTTGAATCTTTCGCCTTTAAACACGCG\n+TGATTTATCATAAATGATAAAAGCGATCAGACCGCCAACACCAATAAACCAG'..b'CTTCACTAAAAATAAGACGGCCATAGCCGAACCA\n+TTCTTCAGTAGCCATATCAGGGCTTTCCTGAACAATAGAGTTCACTAAGCGCTCGTAATC\n+ATATGAACGGCGAATTTCATTGAAAAACACCCAGCCTTCAGTGCGTTTATCATAGGCGTT\n+TAAAATAACATCGCCGGGACGATAGAAATTCTTTAAGAAGCCCCCATATGGATCTAAAGC\n+AATATTTTTGCCGCCTCTAATGATGCTCTTAAATAACAGTTCATTGAAAATTGTGGTTTT\n+ACCAGTACCGGTTGTACCGGCAATCGAAAAATGCAAGTTCTCAGCGTATGTAGGTATGGG\n+GATATTAGCCACGGTTAACTGGTTGACACCTCTTTCGCGTGTTTTATCAGCGAGTGTTCT\n+GGCGCGAACAAGCTCTGTACCACGATAAATCTTTTTGAATCTTTCGCCTTTAAACACGCG\n+TGATTTATCATAAATGATAAAAGCGATCAGACCGCCAACACCAATAAACCAGCCAGCAAT\n+TAAAGCTGACCATAAAGG\n+>traJ_12\n+GTATAGGCTGTGAAGTTTTTTCGAAACTTCACTAAAAATAAGACGGCCATAGCCGAACCA\n+TTCTTCAGTAGCCATATCAGGGCTTTCCTGAACAATAGAGTTCACTAAGCGCTCGTAATC\n+ATATGAACGGCGAATTTCATTGAAAAACACCCAGCCTTCAGTGCGTTTATCATAGGCGTT\n+TAAAATAACATCGCCGGGACGATAGAAATTCTTTAAGAACCCCCCATTTGGATCTAAAGC\n+AATATTTTTGCCGCCTCTAATGATGCTCTTAAATAACAGTTCATTGAAAATTGTGGTTTT\n+ACCAGTACCGGTTGTACCGGCAATCGAAAAATGCAAGTTCTCAGCGTATGTAGGTATGGG\n+GATATTAGCCACGGTTAACTGGTTGACACCTCTTTCGAGTGTTTTATCAGCGAGTGTTCT\n+GGCGCTAACAAGCTCTGTACCACGATAAATCTTTTTGAATCTTTCGCCTTTAAACACGCG\n+TGATTTATCATAAATGATAAAAGCGATCAGACCGCCAACACCAATAAACCAGCCAGCAAT\n+TAAAGCTGACCATAAAGG\n+>traJ_13\n+GTATAGGCTGTGAAGTTTTTTCGAAACTTCACTAAAAATAAGACGGCCATAGCCGAACCA\n+TTCTTCAGTAGCCATATCAGGGCTTTCCTGAACAATAGAGTTCACTAAGCGCTCGTAATC\n+ATATGAACGGCGAATTTCATTGAAAAACACCCAGCCTTCAGTGCGTTTATCATAGGCATT\n+TAAAATAACATCGCCGGGACGATAGAAATTCTTTAAGAACCCCCCATTTGGATCTAAAGC\n+AATATTTTTGCCGCCTCTAATGATGCTCTTAAATAACAGTTCATTGAAAATTGTGGTTTT\n+ACCAGTACCGGTTGTACCGGCAATCGAAAAATGCAAGTTCTCAGCGTATGTAGGTATGGG\n+GATATTAGCCACGGTTAACTGGTTGACACCTCTTTCGCGTGTTTTATCAGCGAGTGTTCT\n+GGCGCTAACAAGCTCTGTACCACGATAAATCTTTTTGAATCTTTCGCCTTTAAACACGCG\n+TGATTTATCATAAATGATAAAAGCGATCAGACCGCCAACACCAATAAACCAGCCAGCAAT\n+TAAAGCTGACCATAAAGG\n+>traJ_14\n+GTATAGGCTGTGAAGTTTTTTCGAAACTTCACTAAAAATAAGACGGCCATAGCCGAACCA\n+TTCTTCAGTAGCCATATCAGGGCTTTCCTGAACAATAGAGTTCACTAAGCGCTCGTAATC\n+ATATGAACGGCGAATTTCATTGAAAAAAACCCAGCCTTCAGTGCGTTTATCATAGGCGTT\n+TAAAATAACATCGCCGGGACGATAGAAATTCTTTAAGAACCCCCCATTTGGATCTAAAGC\n+AATATTTTTGCCGCCTCTAATGATGCTCTTAAATAACAGTTCATTGAAAATTGTGGTTTT\n+ACCAGTACCGGTTGTACCGGCAATCGAAAAATGCAAGTTCTCAGCGTATGTAGGTATGGG\n+GATATTAGCCACGGTTAACTGGTTGACACCTCTTTCGCGTGTTTTATCAGCGAGTGTTCT\n+GGCGCGAACAAGCTCTGTACCACGATAAATCTTTTTGAATCTTTCGCCTTTAAACACACG\n+TGATTTATCATAAATGATAAAAGCGATCAGACCGCCAACACCAATAAACCAGCCAGCAAT\n+TAAAGCTGACCATAAAGG\n+>korA_1\n+CCTGGAGTTATTTGAAGCAGGAAAAGTGAAGGAGGTAACGATAGAACGGGTTTCATTAAA\n+GAAGTGGTATCCCGTTTTTCAGATAGACGATGAACAGCTGGGCCAGATCGCATGTTCCAT\n+TCGGGTTAACAAAGAGCATGAGCTACGAACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_2\n+CCTGGAGCTATTTGAAGCAGGAAAAGTGAAGGAGGTAACGATAGAACGGGTTTCATTAAA\n+GAAGTGGTATCCCGTTTTTCAGATAGACGATGAACAGCTGGGCCAGATCGCATGTTCCAT\n+TAGGGTTAACAAAGAGCATGAGCTACGAACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_3\n+CCTGGAGCTATTTGAAACAGGAAAAGTGAAGGAGGTAACGATAAAACGGGTTTCATTAAA\n+GACGTGGTATCCCGTTTTTCAGATAGACGATGAACAGTTGGGCCAGATCGCATGTTCCAT\n+TCGGGTTAACAAAGAGCATGAACTACGAACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_4\n+CCTGGAGCTATTTGAAGCAGGAAAAGTGAAGGAGGTAACGATAGAACGGGTTTCATTAAA\n+GAAGTGGTATCCCGTTTTTCAGATAGACGATGAACAGCTGGGCCAGATCGCATGTTCCAT\n+TCGGGTTAACAAAGAGCATGAGCTACGAACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_5\n+CCTGGAGCTATTTGAAGCAGGAAAAGTGAAGGAGGTAACGATAGAACGGGTTTCATTAAA\n+GAAGTGGTATCCCGTTTTTCAGATAGACGATGAACAGCTGGGCCAGATCGCATGTTCCAT\n+TAGGGTTAACAAAGAGCATGAGCTACGCACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_6\n+CCTGGAGCTATTTGAAGCAGGAAAAAGTGAAGGAGGTAACGATAGAACGGGTTTCATTAA\n+AGAAGTGGTATCCCGTTTTTCAGATAGACGTATGAACAGCTGGGCCAGATCGCATGTTCC\n+ATTAGGGTTAACAAAAGAGCATGAGCTACGAACCTGGGCTGATTTAAGGCTACTGGCAGA\n+GTTTTTGAAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n+>korA_7\n+CCTGGAGCTATTTGAAGCAGGAAAAGTGAAGGAGGTAACGATAAAACGGGTTTCATTAAA\n+GACGTGGTATCCCGTTTTTCAGATAGACGATGAACAGCTGGGCCAGATCGCATGTTCCAT\n+TCGGGTTAACAAAGAGCATGAGCTACGAACCTGGGCTGATTTAAGGCTACTGGCAGAGTT\n+TTTGAAAGATAAGTGTGGCGTTGAAGAATGCCGGTTAAATCTG\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/incn.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/incn.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,25 @@
+ST repN traJ korA
+1 1 1 1
+2 1 2 1
+3 1 8 1
+4 2 1 2
+5 2 3 2
+6 2 4 2
+7 3 4 2
+8 4 5 1
+9 5 6 3
+10 6 7 1
+11 6 7 4
+12 1 9 1
+13 2 4 1
+14 2 3 5
+15 7 6 3
+16 7 4 3
+17 1 3 2
+18 6 10 1
+19 3 11 2
+20 3 4 6
+21 2 12 2
+22 2 13 2
+23 6 14 7
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/pbssb1-family.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/pbssb1-family.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,544 @@\n+>higB_1\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTC\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAAATTCGCTGGGCTTTACCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCCAAGAACGAGAAAGACAATCTCACTAATGCTGAAAAGAACCAGTTGAAA\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_2\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTGATTGATGGATTGCTT\n+TCACCTGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAA\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATAGCTTATCCCAAGAACGAGAAAGACAATCTCACTAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_3\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTC\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACTAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATCGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_4\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTGATCGATGGATTGCTT\n+TCACCTGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAA\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACTAATACTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_5\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTT\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAA\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATAGCTTATCCCAAGAACGAGAAAGACAATCTCACTAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_6\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTT\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACCAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_7\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTT\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACCAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTTTAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_8\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTAATCGATGGATTGCTT\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCCAAGAATGAGAAAGACAATCTCACCAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_9\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTGATTGATGGATTGCTT\n+TCACCTGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACCAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATCGAGCAAGCCTCCCAGGAGGCAAGGGGGAAGAAATGA\n+>higB_10\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTGATCGATGGATTGCTT\n+TCACCTGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACGGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAA\n+AGTGGTGGTATCAGGGTCATATACTACTATCTGACAGCAGATAACGAAGTATTCCTTCTT\n+ATCGCTTATCCTAAGAACGAGAAAGACAATCTCACTAATGCTGAAAAGAACCAGTTGAAG\n+AAAGCCGTAGAAGGTATAGAGCAAGCCTCCCAGGAGGCAAAGGGGAAGAAATGA\n+>higB_11\n+ATGAATTTACTACACTTCATTGAAACCAAAGTCTTTCAGAGAGTGATTGATGGATTGCTT\n+TCACCCGACGAACTCCGGGAGTTTCAGGAAGTGTTAAGGCAAGATCCTACCGCTGGCGAT\n+ACCATATCAGGTACAGGCGGTTGCCGTAAGATTCGCTGGGCTTTGCCTGGTATGGGTAAG\n+AGTGGTGGTATCAGGGTCATATACTACTATCTG'..b'CGAATCGAAAAGCACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_19\n+ATGATTCCGTATTCTGTCTGGAACTTCAAGGGAGGTGTCGGGAAGAGCCACACTAGTCTA\n+ACTCTCTCCGGGTATCTGGCCAGTCAGGGTAAGCGTGTGGCACTGGTTGATTATGACCCA\n+CAAAACGGGGCTATGATCTGTTCAGACATTGCCAAAGCAAATGGAAGCCCTTTACCCTTT\n+GTGGTTGTTAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCTGTTGTGTTCGAT\n+CACTCCCCAGGTGTTAACCCTGGCGGTCAGCTTGCCCCTATCGTTATCATTCCGACTATC\n+CTCGATGCAGCAAGCCATTCAATCACTATCAAGAGTGTTCACGAACTGGACGGAACGGAC\n+AAGCTCTGTATCGTCATTCCTAATCGTGTAGAACTGCAAAACAAAGAGCATAAAGACCTG\n+CTCGAAATGCAGTTTGAAAATGTCCCCTACATGAAGAAGCGAGTCTCTTACTCTCGTGCC\n+TACGGCATGGGTGTAACTATCTATTCGGAAGGTACAGGCTTACCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGTGTAGCTAAATACATTCATAAACGAATCGAAAAACACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_20\n+ATGATTCCGTATTCTGTCTGGAATTTCAAGGGAGGTGTTGGGAAGAGCCATACTAGTCTA\n+ACTCTCTCCGGGTATCTGGCCAATCAGGGTAAGCGTGTGGCACTGGTTGATTATGACCCA\n+CAAAACGGGGCTATGATCTGTTCAGACATTGCCAAAGCAAATGGAAGCCCTTTACCCTTT\n+TTGGTTGTTAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCTGTTGTGTTCGAT\n+CACTCCCCAGGTGTTAACCCTGGCGGTCAGCTTGCCCCTATCGTTATCATTCCGACTATC\n+CTCGATGCAGCAAGCCATTCAATCACTATCAAGAGTGTTCACGAACTGGACGGAACGGAC\n+AAGCTCTGTATCGTCATTCCTAATCGTGTAGAACTGCAAAACAAAGAGCATAAAGACCTG\n+CTCGAAATGCAGTTTGAAAATGTCCCCTACATGAAGAAGCGAGTCTCTTACTCTCGTGCC\n+TACGGCATGGGTGTAACTATCTATTCGGAAGGTACAGGCTTACCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGTGTAGCTAAATACATTCATAAACGAATCGAAAAACACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_21\n+ATGATTCCGTATTCAGTATGGAACTTCAAGGGAGGTGTTGGGAAAACCCACACTAGTTTA\n+ACTCTCTCCGGGCATCTGGCCAGTCAAGGTAAACGGGTGGCACTTGTTGACTACGACCCT\n+CAGAACGGGGCTACAATCTGTTCAGATATTGCCAAAGCAAATGGAAGCCCTTTACCGTTT\n+TTGGTGGTAAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCTGTTGTCTTCGAC\n+CATTCTCCAGGTGTCAACCCTGGCGGTCAGCTTGCCCCTATCGTTATCGTTCCTACTATC\n+CTCGATGCAGCAAGCCATTCCATCACTATCAAGAGTGTTCACGAACTGGACGGAACGGAC\n+AAGCTATGTATCGTCATTCCTAACCGCGTAGAACTGCAAAACAGGGAACATAAAGACCTC\n+CTGGAAATGCAGTTTGAAAATGTTCCTTATATGAAGAAGCGAGTCTCTTACTCTCGTGCA\n+TACGGCATGGGTGTAACTATCTATTCAGAAGGTACGGGCTTACCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGGGTAGCTAAATACATTCATAAACGAATCGAAAAACACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_22\n+ATGATTCCGTATTCAGTATGGAACTTCAAGGGAGGTGTTGGGAAAACTCACACTAGTTTA\n+ACTCTCTCCGGGCATCTGGCCAGTCAAGGTAAACGGGTGGCACTTGTTGACTACGACCCT\n+CAGAACGGGGCTACAATCTGTTCAGATATTGCCAAAGCAAATGGAAGCCCTTTACCGTTT\n+TTGGTGGTAAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCCGTTGTCTTTGAC\n+CATTCCCCAGGTGTTAACCCTGGTGGTCAGCTTGCCCCTATCGTTATTGTTCCTACCATC\n+CTCGATGCTGCAAGCCATTCTATCACTATCAAAAGTGTTCACGAACTGGACGGAACAGAT\n+AAGCTCTGTATCGTCATTCCAAACCGTGTCGAACTGCAAAACAGGGAACATAAAGACCTT\n+CTGGAAATGCAGTTTGAAAATGTTCCCTACATGAAGAAGCGAGTCTCTTACTCTCGTGCA\n+TACGGTATGGGTGTAACTATCTATTCAGAAGGAACAGGCTTACCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGGGTAGCTAAATACATTCATAAACGAATCGAAAAACACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_23\n+ATGATTCCGTATTCTGTCTGGAATTTCAAGGGAGGTGTTGGGAAGAGTCACACGAGTTTA\n+ACTCTCTCTGGGTATCTGGCCAGTCAGGGTAAGCGGGTGGCACTGGTTGATTATGACCCA\n+CAGAACGGGGCTATGATCTGTTCTGATATCGCCAAAGCAAATGGAAGCCCTTTACCATTT\n+TTGGTTGTTAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCTGTTATCTTCGAC\n+CATTCCCCAGGTGTCAACCCTGGCGGTCAGCTTGCCCCTATCGTTATCATTCCGACTATC\n+CTCGATGCAGCAAGCCATTCAATCACTATCAAGAGTGTTCACGAACTGGACGGAACGGAC\n+AAGCTCTGTATCGTCATTCCTAACCGTGTAGAACTGCAAAACAGGGAACACAAAGACCTC\n+CTGGAAATGCAGTTTGAAAATGTTCCATACATGAAAAAGCGAGTCTCTTACTCTCGTGCA\n+TACGGTATGGGGGTAACTATCTATTCGGAAGGTACTGGCTTGCCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGTGTAGCTAAATATATTCATAAACGAATCGAAAAGCACATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n+>soj_24\n+ATGATTCCGTATTCCGTCTGGAACTTTAAGGGTGGCGTTGGGAAGAGTCACACGAGTTTA\n+ACTCTCTCTGGGTATCTGGCCAGTCAGGGTAAGCGGGTGGCACTGGTTGATTATGACCCA\n+CAGAACGGGGCTATGATCTGTTCTGATATCGCCAAAGCAAATGGAAGCCCTTTACCATTT\n+TTGGTTGTTAAAAGCCTGAGTGAAATAAACCGCGACGAGATAGATGCTGTTATCTTCGAC\n+CATTCCCCAGGTGTCAACCCTGGCGGTCAGCTTGCCCCTATCGTTATCATTCCGACTATC\n+CTCGATGCTGCAAGCCATTCCATCACTATCAAGAGTGTTCACGAACTGGATGGAACAGAT\n+AAGCTCTGTATCGTCATTCCTAACCGTGTCGAACTGCAAAACAGGGAACACAAAGACCTC\n+CTGGAAATGCAGTTTGAAAATGTTCCCTACATGAAGAAGCGAGTCTCTTACTCTCGTGCA\n+TACGGTATGGGTGTAACTATCTATTCAGAAGGTACAGGCTTACCGAACCTGGGGATCACT\n+CGCCAGGAGTTCGACGGTGTAGCTAAATACATTCACAAACGAATCGAAAAACATATTGCT\n+GATATGAAAGCAAAAGAGAAGGCATAA\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/pmlst_db/pbssb1-family.txt.clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/pmlst_db/pbssb1-family.txt.clean Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,37 @@
+ST higB mqsA soj
+1 4 1 4
+2 5 2 3
+3 6 4 5
+4 11 4 9
+5 9 7 6
+6 12 9 10
+7 9 7 3
+8 13 10 11
+9 6 4 3
+10 3 3 3
+11 14 11 12
+12 2 2 13
+13 6 4 14
+14 6 4 6
+15 2 12 3
+16 1 1 1
+17 5 2 16
+18 2 2 3
+19 6 4 17
+20 6 14 5
+21 15 13 18
+22 16 15 3
+23 6 9 19
+24 2 16 20
+25 1 13 15
+26 17 7 21
+27 18 7 22
+28 19 4 3
+29 8 6 7
+30 10 8 8
+31 21 2 3
+32 22 17 24
+33 20 14 23
+34 7 5 3
+35 2 2 2
+
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/test.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/test.fsa Wed Aug 25 21:12:34 2021 +0000
b
b'@@ -0,0 +1,1734 @@\n+>pSFO157\r\n+ATGACAGAGCAGAAGCGACCGGTAGTGACACTGAAGCGGAAGACAGAGGGAACAGCGCCTGTCCGCAGCC\r\n+GGAAAACCATCATCAATGTCACCACGCCACCAAAATGGAAGGTGAAAAAGCAGAAGCTGGCGGAGAAAGC\r\n+GGCTCGGGAAGCAGAGCTGGTGGCAAAAAAAGCGCAGGCCAGACAGGCGCTGTCCATTTATCTGAACCTG\r\n+CCCACACTGGATGAAGCCGTGAACACCCTGAAGCCCTGGTGGCCGGGATTATTTGACGGTGACACCCCCC\r\n+GGCTTCTGGCCTGCGGTATCCGGGACGTGTTACTGGAAGACGTGGCGCAGCGGAATATCCCGCTTTCGCA\r\n+TAAAAAACTGCGCAGGGCGCTGAAGGCCATCACCCGTTCAGAAAGCTATCTTGGCGCCATGAAAGCCGGT\r\n+GCCTGCCGGTATGACACGGAAGGGTATGTGACGGAGCATATTTCTCAGGAGGAGGAAGCGTATGCGGCAG\r\n+CGCGTCTGGATAAAATCCGCCGCCAGAACCGGATAAAGGCAGAACTTCAGTCCGTGCTTGATGAGAAATA\r\n+AAAAAAAGCCTCCCTCAGAGAGAGGAGGCAGGGAAATAAGGCTTAAAAGGAAATAACTTCCGTACCCGAA\r\n+AGGAAGTCAGTCCAGTGGATTATTTAACACCATAATACAGACGGGTTAAAGGGGAGAGCTAAAGATGAAT\r\n+GGATTCAGAAACAGTTCACGGAACGGTCAGGTCTGGCGTTACCAGCGTGCCGGTGGGCGGGCTGTTATTC\r\n+TGGAGGTCAGTGGACGCTGGATGGAAGCGGCAGAAGCATGGCGACGGGCTGCCTGTATAGCTCCCCGGAC\r\n+AGACTGGCAGCAGTTTGCCCGAAAAAGGGCTGAACACTGCCACCGGCGCTGCCGGGGTAGGGTGTAACGA\r\n+AAAAAGCAGCCTTCATTACTCCAGGGATTCAGAAGGCTGAATAATGCATGAGATTTGCTTTTTTTAATAT\r\n+CCAGTCGGGCAGTATGGCCGGACAGTTCTGAGTAGCACGGAGTTTTTTCCGTATTAAGTCGTGATTTTAT\r\n+ATTTTGTGATTAATTTCACAAAATAAGGTGTTGTTCAGTGTGTGCTGCGATATTCAGGATTGCCTGAAAT\r\n+ACAGGTGCCATTTATCTGATATGGAGAATAACATGAGGAAATATATTCCACTGGTATTATTTATCTTTTC\r\n+ATGGCCGGTATTGTCTGCAGATATTCATGGACGGGTTGTTCGTGTACTGGACGGTGATACCATCGAGGTA\r\n+ATGGACTCACTGAAGGCAGTGCGGATCAGGCTGGTAAATATTGATGCACCGGAAAAGAAACAGGATTATG\r\n+GCCGGTGGTCGACAGATATGATGAAATCACTGGTTGCAGGAAAAACGATTACCGTCACATATTTTCAGCG\r\n+AGATCGTTATGGTCGTATTCTCGGGCAGGTTTATGCACCAGACGGAATGAATGTTAATCAGTTTATGGTC\r\n+CGGGCCGGGGGTGCCTGGGTATATGAGCAGTACAACACAGATCCCGTGCTGCCTGTCCTTCAGAACGAAG\r\n+CCAGACAACAAAAACGTGGGCTCTGGTCAGATGCTGATCCGGTTCCACCCTGGATATGGAGACACCGTAA\r\n+ATAAAGCGATGGCAATCTTATCATTCATCTGTTCTCAGCGGGAATAAAAATGGCAAAAACAAAACAGGAA\r\n+TGGCTGTATCCGTTACGGCGATGTTCATCAGTTAATTAATACGCTGGGAAAAATTATTCATAAAAACAGA\r\n+GACAGTTTATCGGCCAGTGAATGTGAATCATTTAATCCAGCAGCAGACCACCGGTTAGTGGAATTAATCA\r\n+CCGGAAAGTTATATGACAGGATACCAAAAGATATATGGAAATATGTAAGGTAATAATTCAGCACAATTAA\r\n+TTGCCAGGAGCAGATAAATAATGAAGTTGATTATTTTTATTCTCATTGTTCTGATTATTGCAGCGTTACT\r\n+TATCCGGATCATCCTCAGAAGTGTAAATCAGCATTCTCCTTTGCTGATGCAATTGCATGCGGCAGGTATA\r\n+CGGACCGGTGATGCAGAACGAATACTGTCCGGCGGTGAATACTGGCAACGTCAGAAGACGCTGCTGACAG\r\n+AAAGGGAAGTCAGTTTTATGAAAGGACTGTTCAGAATTGTGGATATGAAGCGGTGGTATCTGTGTCCGCA\r\n+GGTACGGGTCGCGGATATCGTCCAGCTGAACGGGAATATCCGGCCACGATCACGCCAGTGGTGGCAGTTA\r\n+TTCAGGATGGTGTCTCAGTGGCATGTTGATGTGGTTATCGTTGAGCGGTGTTCGTTCAGTATTGTTGCTG\r\n+CAGTAGAGCTGGATGATGCCAGCCATTTACGACCGGAACGCAGACGCCGGGATATTCTTCTGGAAGAGGT\r\n+TCTGAGGCAGGCTGGTATTCCGTTGCTCAGAAGCCACGATGCCAGAAAACTGCTGCAGATGACCGGAGAA\r\n+TGGCTGAATACAACAGGGGCTGCTCAGCAGTCACCGGAACATCGTAGCTGACGCCTTCGCGTTGCTCAGT\r\n+TGTCCAACCCCGGAAACGGGAAAAAGCAAGTTTTCCCCGCTCCCGGCGTTTCTGTAACAAAAACAGGTAA\r\n+ATGGGGGTAATAGTCAGCAACAGGGAATGTGGTATTATCGCGGCGGGTGTCTGAGCCTTTCTGGTTCAGG\r\n+CAAGACGCAGGTACCAGAAATGCGAAGACCCCACTCGTTAATCCATTAACTCGTGAGGTCTGCATGAAGT\r\n+TCCTAGCAGTTCTGATTGTAGCCTCTTCCTTGCAGAGAGGTCAAAGTTTATGACGAAATATGCCCTTATC\r\n+GGGTTGCTCGCCGTGTGTGCCACGGTGTTGTGTTTTTCACTGATATTCAGGGAACGGTTATGTGAACTGA\r\n+ATATTCACAGGGGAAATACAGTGGTGCAGGTTACTTTGGCCTACGAAGCACGGAAGTAAGCTGCCGGGCG\r\n+GGGACGGAAGTCCCCGCTTTCCGGAAGTGTGAGGTATTTCAGGGGCAGACACCCGACATGCCAGAAACAG\r\n+CCGGTCCCGCCCGGGGCCGGCACCCGGGTTAAGGCATTTCCTGTTTTTCAGCCATTTCGCAACAAAATTC\r\n+ACACTAAACGACGGTAATTCCTGTTGATTTGTGCGCCAACACAGATCTTCGTCACAATTCTCAAGTCGCT\r\n+GATTTCAAAAAACTGTAGTATCCTCTGCGAAACGATCCCTGTTTAAGTATTGAGGAGGCGAGATGTCACA\r\n+GACAGAAAATGCAGTGACTTCCTCATCTGGCGTAAAGCGAGCATACAGAAAAGGTAATCCTCTGACACTT\r\n+GCAGAACGTCAGCAGGCATCCCTGGCAAGAAAACGAGCTACACATAAAGAACTCAGAGTGTTCATTCCTG\r\n+CTGCGTTAAAAGCGCAATTGCAGGAGATGTGTGAAGCTGAGGGCGTTACCCAGGCTGAGATGATTGCCGA\r\n+ACTGATAAAGCAGAAGAGTGCTTTTAGCTAACTAGAATGTAGCGACATCACATTCTTGCCTGTTTCCGGT\r\n+GTGCGTGATAGATTGCTGATCGTTTAAGGAATTTTGTGGCTGGCCACGCCATAAGGTGGCAGGGAACTGG\r\n+TTCTGATGTGGATGTACAGGAGCCAGAAAAGCGAAAACCCCGATAATCTTTCTATGTTTGGCGACGAAGA\r\n+AGATTACCGGGGCCATCTAAAAACCGCATAGAAGCTGTTGCTCTATGCGGGGAGTATAGTTATATGCCCG\r\n+GAAAAGTTCAAGACTTCTTTCTGTGCTCACTCCTTCTGTGCAACATAAGTGCAGGATGGTGTGACTGATC\r\n+TTCAACAAACGTATTACCGCCAGGTA'..b'GAACCGGCCACCCTGCCTGTGGCTGATTCACCGTTCACGGCACTGAAGCTGGA\r\n+GAACGGCTGGGTGGAAACGCCCGGGCATTCCGTCAGCGACAGTGCGAAGGTTTTTGCCTCCGTCACACAG\r\n+ATGGCAATGGACAACGCCACCCTGAACGGTCTGGCCCGCAGCGGTCGTGATGTCCGGCTGTATTCCTCAC\r\n+TGGATGAAACCCGTACTGCGGAAAAACTTGCCCGCCATCCCTCCTTTACGGTGGTTTCTGAGCAGATAAA\r\n+GGCGCGTGCCGGTGAGACATTGCTGGAAACCGCTATCAGTCTGCAGAAAGCCGGGCTTCACACGCCGGCA\r\n+CAGCAGGCTATTCATCTGGCGCTTCCTGTGGTGGAAAGTAAAAACCTGGCCTTCAGCATGGTGGACCTGC\r\n+TGACAGAGGCGAAGTCGTTTGCTGCAGAAGGAACCAGTTTTACTGAACTGGGAGGGGAAATCAATGCGCA\r\n+GATAAAACGCGGTGATTTACTGTATGTGGATGTGGCAAAAGGCTATGGCACAGGCCTGCTGGTTTCCCGT\r\n+GCGTCGTATGAGGCAGAAAAGAGCATTCTTCGCCATATTCTCGAAGGTAAGGAGGCGGTCACGCCGCTGA\r\n+TGGAGAGAGTACCTGGCGAACTCATGGAGAAACTGACATCAGGACAGCGTGCCGCTACCCGCATGATACT\r\n+GGAAACGTCCGACCGTTTCACGGTGGTGCAGGGCTATGCCGGTGTGGGTAAGACCACACAGTTCCGGGCG\r\n+GTGATGTCAGCCGTGAACATGCTGCCGGAGAGTGAACGTCCCCGTGTCGTGGGGCTGGGGCCCACGCACC\r\n+GTGCGGTCGGTGAGATGCGCAGCGCCGGCGTGGATGCACAGACGCTGGCGTCCTTTCTGCATGACACGCA\r\n+GCTGCTGCAGCGCAGCGGAGAAACGCCGAATTTCAGCAACACGCTGTTCCTGCTCGATGAGAGCTCTATG\r\n+GTGGGCAATACCGACATGGCACGGGCATACGCCCTGATTGCGGCCGGTGGCGGTCGTGCCGTGGCCCGTG\r\n+GTGACACGGACCAGTTGCAGGCCATCGCGCCCGGTCAGCCTTTCCGTCTCCAGCAGACGCGCAGTGCTGC\r\n+CGATGTGGCCATCATGAAGGAGATTGTGCGCCAGACGCCGGAGCTGCGGGAGGCGGTATACAGCCTGATT\r\n+AACCGGGATGTGGAAAGGGCACTGTCCGGGCTTGAGAGTGTGAAACCGTCTCAGGTGCCACGTCAGGAGG\r\n+GCGCATGGGTACCGGAGCACTCCGTGACGGAGTTCAGTCACAGCCAGGAAGCGAAACTGGCAGAAGCGCA\r\n+GCAGAAGGCGATGCTGAAAGGTGAGGCTTTTCCGGATATTCCCATGACACTGTATGAAGCCATTGTCCGC\r\n+GATTATACCGGCAGGACGCCGGAAGCACGGGAGCAGACGCTGATTGTCACGCATCTGAATGAGGACCGGC\r\n+GCGTACTGAACAGCATGATTCATGATGCACGGGAAAAGGCCGGTGAGCTGGGAAAAGAGCAGGTCATGGT\r\n+GCCTGTCCTGAACACAGCGAATATACGCGACGGGGAGTTGCGTCGTCTCTCCACCTGGGAGACACATCGG\r\n+GACGCACTTGCCCTGGTGGATAATGTGTATCACCGGATTGCCGGTATCAGCAAGGATGACGGGCTGATAA\r\n+CCCTGCAGGATGCGGAAGGTAACACGCGGCTGATTTCGCCCCGGGAGGCGGTGGCTGAAGGCGTCACACT\r\n+GTACACTCCGGACACCATCCGGGTGGGAACCGGTGACCGGATGCGCTTCACGAAGAGTGACCGGGAGCGC\r\n+GGTTATGTGGCCAACAGCGTCTGGACGGTGACAGCAGTTTCCGGTGACAGTGTCACGCTGTCGGACGGAC\r\n+AGCAGACCCGGGTGATTCGCTCCGGTCAGGAGCGGGCAGAGCAACATATTGACCTGGCCTATGCCATCAC\r\n+CGCCCACGGTGCGCAGGGGGCAAGTGAAACCTTTGCCATTGCGCTTGAAGGCACGGAAGGTAACCGGAAA\r\n+CAGATGGCCGGCTTTGAGTCAGCCTACGTGGCCCTGTCGCGTATGAAGCAGCATGTGCAGGTGTATACCG\r\n+ATAACCGTCAGGGCTGGACGGATGCCATTAACAATGCCGTACAGAAAGGAACTGCCCACGATGTGCTTGA\r\n+GCCGAAATCGGACCGGGAGGTCATGAATGCAGAGCGGCTGTTCAGTACGGCGCGGGAGTTGCGGGACGTG\r\n+GCGGCCGGCCGTGCCGTTCTCCGTCAGGCAGGGCTGGCCGGGGGAGACAGTCCTGCACGGTTTATTGCAC\r\n+CCGGGCGAAAATATCCGCAGCCGTATGTGGCACTGCCGGCTTTTGACCGTAACGGCAAGTCAGCTGGTAT\r\n+CTGGCTGAACCCGCTGACCACGGATGACGGAAACGGACTGCGGGGCTTCAGTGGTGAAGGCCGGGTGAAA\r\n+GGCAGCGGGGATGCGCAGTTCGTGGCCCTGCAGGGCAGCCGTAACGGAGAGAGCCTGCTGACTGATAATA\r\n+TGCAGGATGGTGTCCGGATTGCCCGTGATAATCCTGACAGCGGTGTGGTGGTAAGAATCGCCGGTGAAGG\r\n+TCGTCCGTGGAATCCCGGTACCATCACCGGTGGTCGCGTGTGGGGGGATATCCCGGACAACAGCGTTCAG\r\n+CCGGGAGCCGGAAATGGCGAACCGGTCACGGCAGAGGTACTGGCACAGCGGCAGGCTGAAGAGGCCATCC\r\n+GCCGTGAAACGGAACGCCGTGCGGATGAAATTGTCCGTAAAATGGCAGAGAACAAACCTGACCTGCCGGA\r\n+CGGCAAAACAGAGCAGGCTGTCAGGGATATTGCCGGGCTGGAGCGTGACCGGTCTGCCATATCTGAACGG\r\n+GAAGCCGCGCTGCCGGAGAGTGTACTGCGTGAACCACAGCGGGTGCGGGAAGCGGTCCGGGAGGTTGCCC\r\n+GGGAAAATCTGCTGCAGGAGCGACTGCAGCAGATGGAGCGGGATATGGTTCGTGACCTGCAGAAAGAGAA\r\n+AACCCTGGGCGGAGACTGATACAGGAAGATAAACGCTGATGACAACGGATAACACGAACACAACACGTAA\r\n+TGATTCACTGGCTGCCCGGACCGATACCTGGTTGCAGTCGTTGCTGGTCTGGTCACCCGGACAGCGGGAT\r\n+ATCATCAAAACGGTGGCACTGGTGCTGATGGTTCTGGACCATGCAAACCGCATTCTGCATCTGGACCAGT\r\n+CGTGGATGTTTCTGGTCGGGCGTGGGGCCTTTCCGCTGTTTGCGCTGGTCTGGGGACTGAATCTGTCCCG\r\n+TGATGCGCATATCCGGCAACCGGCCATTAACCGGCTGTGGGGATGGGCTGTGATTGCCCAGTTCGCGTAT\r\n+TACGTTGCCGGCTTTCCCTGGTATGAGGGGAATATCCTGTTTGCCTTTGCAGTGGCGGCACAGGTGCTGA\r\n+CGTGGTGTGAGACGCGCAGCCTGTGGCGTACTGCCGCGGCCATACTCCTGATGGCATTGTGGGGGCCTTT\r\n+GTCCGGCACCAGTTACGGCATTGCCGGGCTGCTGATGCTGGCGGTCAGTCACCGGCTGTACCGGGCGGAA\r\n+GACAGAACGGAACGTCTGGCGCTGGTTGCATGCCTGCTGGCCGTGATTCCGGCACTTAACCTTGCCTCCA\r\n+GTGATGCGGCGGCTGTAGCTGGTCTTGTGATGACGGTGCTGACCGTTGGTCTGGTGTCGTGTGCAGGGAA\r\n+ATCATTACCCCGTTTCTGGCCCGGGGATTTTTTCCTGACGTTCTATGCCTGTCATCTGGCTGTGCTGGGC\r\n+GTTCTGGCGCTGTGACGGGTGTGGTATCTTTGGCCGCAAGAGGATGATTCGTCAGAGGCAGAACACAGC\r\n+\r\n'
b
diff -r 000000000000 -r 6dcb66689a7d pmlst/tmp_pMLST/tmp/out_inchi2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pmlst/tmp_pMLST/tmp/out_inchi2.xml Wed Aug 25 21:12:34 2021 +0000
b
@@ -0,0 +1,44 @@
+<?xml version="1.0"?>
+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
+<BlastOutput>
+  <BlastOutput_program>blastn</BlastOutput_program>
+  <BlastOutput_version>BLASTN 2.11.0+</BlastOutput_version>
+  <BlastOutput_reference>Zheng Zhang, Scott Schwartz, Lukas Wagner, and Webb Miller (2000), &quot;A greedy algorithm for aligning DNA sequences&quot;, J Comput Biol 2000; 7(1-2):203-14.</BlastOutput_reference>
+  <BlastOutput_db></BlastOutput_db>
+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>
+  <BlastOutput_query-def>pSFO157</BlastOutput_query-def>
+  <BlastOutput_query-len>121239</BlastOutput_query-len>
+  <BlastOutput_param>
+    <Parameters>
+      <Parameters_expect>10</Parameters_expect>
+      <Parameters_sc-match>1</Parameters_sc-match>
+      <Parameters_sc-mismatch>-2</Parameters_sc-mismatch>
+      <Parameters_gap-open>0</Parameters_gap-open>
+      <Parameters_gap-extend>0</Parameters_gap-extend>
+      <Parameters_filter>m;</Parameters_filter>
+    </Parameters>
+  </BlastOutput_param>
+<BlastOutput_iterations>
+<Iteration>
+  <Iteration_iter-num>1</Iteration_iter-num>
+  <Iteration_query-ID>Query_1</Iteration_query-ID>
+  <Iteration_query-def>pSFO157</Iteration_query-def>
+  <Iteration_query-len>121239</Iteration_query-len>
+<Iteration_hits>
+</Iteration_hits>
+  <Iteration_stat>
+    <Statistics>
+      <Statistics_db-num>0</Statistics_db-num>
+      <Statistics_db-len>0</Statistics_db-len>
+      <Statistics_hsp-len>20</Statistics_hsp-len>
+      <Statistics_eff-space>701858010</Statistics_eff-space>
+      <Statistics_kappa>0.46</Statistics_kappa>
+      <Statistics_lambda>1.28</Statistics_lambda>
+      <Statistics_entropy>0.85</Statistics_entropy>
+    </Statistics>
+  </Iteration_stat>
+  <Iteration_message>No hits found</Iteration_message>
+</Iteration>
+</BlastOutput_iterations>
+</BlastOutput>
+