changeset 0:715bc9aeef69 draft

planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
author card
date Wed, 27 Feb 2019 09:08:21 -0500
parents
children f96cbb663aa9
files README.rst data_managers/data_manager_conf.xml data_managers/data_manager_rgi_build_db/data_manager/.gitignore data_managers/data_manager_rgi_build_db/data_manager/import_data.py data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.py data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.xml data_managers/data_manager_rgi_build_db/tool-data/rgi_databases.loc.sample data_managers/data_manager_rgi_build_db/tool_data_table_conf.xml.sample rgi.xml test-data/test1.fasta
diffstat 10 files changed, 573 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,6 @@
+RGI wrapper
+--------------------
+
+This wrapper is used to run Resistance Gene Identifier (RGI) on galaxy environment.
+
+This tool is used together with data manager for RGI: `rgi_database_builder <https://github.com/arpcard/rgi_database_builder>`_.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_conf.xml	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager_rgi_build_db/data_manager/rgi_database_builder.xml" id="rgi_database_builder" version="1.0.0">
+        <!-- Defines a Data Table to be modified -->
+        <data_table name="rgi_databases">
+            <!-- Handle the output of the Data Manager Tool -->
+            <output>
+                <!-- Columns that will be specified by the Data Manager Tool -->
+                <column name="value" /> 
+                <column name="name" />
+                <!-- The value of this column will be modified based upon data in "out_file" -->
+                <column name="path" output_ref="out_file" >
+                    <!-- Moving a file from the extra files path of "out_file" -->
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rgi_databases/${value}/${path}</target>
+                    </move>
+                    <!-- Store this value in the final Data Table -->
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rgi_databases/${value}/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/.gitignore	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,1 @@
+*.pyc
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/import_data.py	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,199 @@
+import argparse
+import datetime
+import json
+import os
+import shutil
+import sys
+import tarfile
+import urllib.request, urllib.error, urllib.parse
+import zipfile
+import logging
+
+path = os.path.join(os.getcwd(), 'rgi-database') 
+data_path = path
+
+level = logging.WARNING
+logger = logging.getLogger(__name__)
+logger.setLevel(level)
+
+def url_download(url, workdir):
+    file_path = os.path.join(workdir, 'download.dat')
+    if not os.path.exists(workdir):
+        os.makedirs(workdir)
+    src = None
+    dst = None
+    try:
+        req = urllib.request.Request(url)
+        src = urllib.request.urlopen(req)
+        dst = open(file_path, 'wb')
+        while True:
+            chunk = src.read(2**10)
+            if chunk:
+                dst.write(chunk)
+            else:
+                break
+    except Exception as e:
+        print(str(e), file=sys.stderr)
+    finally:
+        if src:
+            src.close()
+        if dst:
+            dst.close()
+    if tarfile.is_tarfile(file_path):
+        fh = tarfile.open(file_path, 'r:*')
+    elif zipfile.is_zipfile(file_path):
+        fh = zipfile.ZipFile(file_path, 'r')
+    else:
+        return
+    # extract only one file : card.json
+    for member in fh.getmembers():
+        if member.isreg():  # skip if the TarInfo is not files
+            member.name = os.path.basename(member.name) # remove the path by reset it
+            if member.name == 'card.json':
+                print('[import_data] extracting file: {}'.format(str(member.name)))
+                fh.extract(member.name,workdir)
+    os.remove(file_path)
+
+def checkKeyExisted(key, my_dict):
+    try:
+        nonNone = my_dict[key] is not None
+    except KeyError:
+        nonNone = False
+    return nonNone
+
+def data_version():
+    data_version = ''
+    with open(os.path.join(data_path, 'card.json')) as json_file:
+        json_data = json.load(json_file)
+        for item in list(json_data.keys()):
+            if item == '_version':
+                data_version = json_data[item]
+    json_file.close()
+    return data_version
+
+def makeBlastDB():
+    if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:
+        print('[import_data] create blast DB.')
+        os.system('makeblastdb -in {}/proteindb.fsa -dbtype prot -out {}/protein.db > /dev/null 2>&1'.format(path, path))
+
+def makeDiamondDB():
+    if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:
+        print('[import_data] create diamond DB.')
+        os.system('diamond makedb --quiet --in {}/proteindb.fsa --db {}/protein.db'.format(path, path))
+
+def write_fasta_from_json():
+		'''Creates a fasta file from card.json file.'''
+		if os.path.isfile(os.path.join(path, 'proteindb.fsa')):
+			return
+		else:
+			try:
+				with open(os.path.join(data_path, 'card.json'), 'r') as jfile:
+					j = json.load(jfile)
+			except Exception as e:
+				logger.error(e)
+				exit()
+
+			with open(os.path.join(path, 'proteindb.fsa'), 'w') as fout:
+				for i in j:
+					if i.isdigit():
+		            	# model_type: protein homolog model
+						if j[i]['model_type_id'] == '40292':
+							try:
+								pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
+							except KeyError:
+								logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
+									% (j[i]['model_id'], j[i]['model_name']))
+								logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+							else:
+								try:
+									for seq in j[i]['model_sequences']['sequence']:
+										fout.write('>%s_%s | model_type_id: 40292 | pass_bitscore: %s | %s\n' % (i, seq, pass_bit_score, j[i]['ARO_name']))
+										fout.write('%s\n' %(j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
+								except Exception as e:
+									logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
+										% (j[i]['model_id'], j[i]['model_name']))
+									logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+
+
+		            	# model_type: protein variant model
+						elif j[i]['model_type_id'] == '40293':
+							try:
+								pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
+							except KeyError:
+								logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
+									% (j[i]['model_id'], j[i]['model_name']))
+								logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+							else:
+								try:
+									snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]
+								except Exception as e:
+									logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \
+										% (j[i]['model_id'], j[i]['model_name']))
+									logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+
+								try:
+									for seq in j[i]['model_sequences']['sequence']:
+										fout.write('>%s_%s | model_type_id: 40293 | pass_bit_score: %s | SNP: %s | %s\n' \
+											% (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))
+										fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
+								except Exception as e:
+									logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
+										% (j[i]['model_id'], j[i]['model_name']))
+									logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+
+		            	# model_type: protein overexpression model
+						elif j[i]['model_type_id'] == '41091':
+							try:
+								pass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']
+							except KeyError:
+								logger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \
+									% (j[i]['model_id'], j[i]['model_name']))
+								logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+							else:
+								try:
+									snpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]
+								except Exception as e:
+									logger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \
+										% (j[i]['model_id'], j[i]['model_name']))
+									logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+
+								try:
+									for seq in j[i]['model_sequences']['sequence']:
+										fout.write('>%s_%s | model_type_id: 41091 | pass_bit_score: %s | SNP: %s | %s\n' \
+											% (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))
+										fout.write('%s\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))
+								except Exception as e:
+									logger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \
+										% (j[i]['model_id'], j[i]['model_name']))
+									logger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')
+
+def _main(args):
+    if not os.path.exists(path):
+        print('[import_data] mkdir: {}'.format(path))
+        os.makedirs(path)
+    print('[import_data] path: {}'.format(path))
+    print(args)
+
+    if args.url == None:
+        url = 'https://card.mcmaster.ca/latest/data'
+    else:
+        url = args.url
+    print('[import_data] url: {}'.format(url))
+    workdir = os.path.join(os.getcwd(), 'rgi-database')
+    print('[import_data] working directory: {}'.format(workdir))
+    url_download(url, workdir)
+    write_fasta_from_json()
+    makeBlastDB()
+    makeDiamondDB()
+    version = data_version()
+    print('[import_data] data version: {}'.format(version))
+    return version
+
+def run():
+    parser = argparse.ArgumentParser(description='Create data manager json.')
+    parser.add_argument('--url', dest='url', action='store', help='Url for CARD data')
+    args = parser.parse_args()
+    _main(args)
+
+if __name__ == '__main__':
+    run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.py	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,47 @@
+import argparse
+import datetime
+import json
+import os
+import shutil
+import sys
+import tarfile
+import urllib.request, urllib.error, urllib.parse
+import zipfile
+from import_data import run, _main
+
+parser = argparse.ArgumentParser(description='Create data manager json.')
+parser.add_argument('--url', dest='url', action='store', help='Url for CARD data')
+parser.add_argument('--out', dest='output', action='store', help='JSON filename')
+parser.add_argument('--name', dest='name', action='store', default='CARD_data-' + str(datetime.datetime.now().strftime('%Y-%B-%d-%H:%M:%S')), help='Data table database name')
+args = parser.parse_args()
+
+print('[rgi_database_builder] Importing...')
+
+_main(args)
+
+def main(args):
+    print('[rgi_database_builder] Building......')
+
+    data_manager_entry = {}
+    data_manager_entry['value'] = args.name.lower()
+    data_manager_entry['name'] = args.name
+    data_manager_entry['path'] = '.'
+
+    data_manager_json = dict(data_tables = dict(rgi_databases=data_manager_entry))
+
+    params = json.loads(open(args.output,'r').read())
+
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_directory)
+    output_path = os.path.join(os.getcwd(), 'rgi-database')
+
+    for filename in os.listdir(output_path):
+        print('[rgi_database_builder] move file: {} from {} to {}'.format(filename, output_path, target_directory))
+        shutil.move(os.path.join(output_path, filename), target_directory)
+
+    print(args.output)
+    print('[rgi_database_builder] write file: {}'.format(args.output))
+    open(args.output, 'w').write(json.dumps(data_manager_json))
+
+if __name__ == '__main__':
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.xml	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<tool id="rgi_database_builder" name="RGI Database Builder" tool_type="manage_data" version="1.0.0">
+    <description>Download and build the CARD database for RGI</description>
+    <requirements>
+	<requirement type="package" version="4.2.2">rgi</requirement>
+    </requirements>
+    <stdio>
+        <exit_code description="Error" level="fatal" range="1:" />
+    </stdio>
+    <command interpreter="python3">
+    <![CDATA[
+        rgi_database_builder.py --out "${out_file}"
+        #if $name:
+            --name "${name}"
+        #end if
+        #if $url:
+            --url "${url}"
+        #end if
+    ]]>
+    </command>
+    <inputs>
+        <param name="name" type="text" value="" label="Database name" help="Name for this database, or leave blank for today's date" optional="True"/>
+        <param name="url" type="text" value="https://card.mcmaster.ca/latest/data" label="Url" help="Url for the CARD data e.g https://card.mcmaster.ca/lastest/data. Visit https://card.mcmaster.ca/download to see previous CARD releases." optional="True"/>
+    </inputs>
+    <outputs>
+        <data format="data_manager_json" name="out_file" label="${tool.name} (JSON)"/>
+    </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/tool-data/rgi_databases.loc.sample	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,5 @@
+#This file lists the columns that will be specified by the RGI Data Manager tool.
+#
+#For example:
+#<value>    <name>    <path>
+#rgi_20181001    rgi_20181001    /galaxy-central/tool-data/rgi_databases/rgi_20181001
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/tool_data_table_conf.xml.sample	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of RGI database in the required format -->
+    <table name="rgi_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/rgi_databases.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgi.xml	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,187 @@
+<tool id="rgi" name="Resistance Gene Identifier (RGI)" version="4.2.2">
+    <description>This tool predicts resistome(s) from protein or nucleotide data based on homology and SNP models.</description>
+    <requirements>
+        <requirement type="package" version="4.2.2">rgi</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+#if $db_opts.db_opts_selector != "default":
+    rgi load
+        #if $db_opts.db_opts_selector == "local":
+            -i "${__tool_data_path__}/rgi_databases/$db_opts.rgi_db_local/card.json" 
+        #else:
+            -i "$db_opts.rgi_db_hist"
+        #end if
+        --local
+    &&
+#end if
+
+rgi main
+    -i '$input_sequence'
+    -o out
+    -t $input_type
+    -a $alignment_tool
+    -n "\${GALAXY_SLOTS:-1}"
+    $include_loose
+    $low_quality
+    -d $data
+
+#if $db_opts.db_opts_selector != "default":
+    --local
+#end if
+       ]]></command>
+    <inputs>
+        <param argument="--input_sequence" type="data" format="fastq,fasta" label="Input sequence" help="input file must be in either FASTA (contig and protein) or gzip format e.g myFile.fasta, myFasta.fasta.gz"/>
+        <param argument="--input_type" type="select" multiple="false" label="Input type" help="specify data input type (default = contig)">
+            <option value="contig" selected="true">CONTIG (Nucleotide Sequence)</option>
+            <option value="protein">PROTEIN (Protein Sequence)</option>
+        </param>
+        <param argument="--alignment_tool" type="select" label="Alignment tool" help="specify alignment tool (default = BLAST)">
+            <option value="blast" selected="true">BLAST</option>
+            <option value="diamond">DIAMOND</option>
+        </param>
+        <param argument="--include_loose" type="boolean" checked="false" truevalue="--include_loose" falsevalue="" label="Include loose hits" help="include loose hits in addition to strict and perfect hits"/>
+        <param argument="--low_quality" type="boolean" checked="false" truevalue="--low_quality" falsevalue="" label="Low quality" help="use for short contigs to predict partial genes"/>
+        <param argument="--data" type="select" multiple="false" label="Data type" help="specify a data-type (default = NA)">
+            <option value="wgs">WGS</option>
+            <option value="plasmid">PLASMID</option>
+            <option value="chromosome">CHROMOSOME</option>
+            <option value="NA" selected="true">NA</option>
+        </param>
+        <conditional name="db_opts">
+            <param name="db_opts_selector" type="select" label="Select an RGI database">
+                <option value="default" selected="true">Default RGI database</option>
+                <option value="local">Locally installed RGI database</option>
+                <option value="hist">RGI database from your history</option>
+            </param>
+            <when value="default">
+                <param name="rgi_db_local" type="hidden" value="" /> 
+                <param name="rgi_db_hist" type="hidden" value="" />
+            </when>
+            <when value="local">
+                <param name="rgi_db_local" type="select" multiple="false" label="Locally installed RGI database">
+                    <options from_data_table="rgi_databases">
+                        <validator type="no_options" message="No databases are available built-in"/>
+                    </options>
+                </param>
+                <param name="rgi_db_hist" type="hidden" value="" />
+            </when>
+            <when value="hist">
+                <param name="rgi_db_local" type="hidden" value="" />
+                <param name="rgi_db_hist" type="data" format="json" label="RGI database from your history" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="report" format="json" from_work_dir="out.json" label="${tool.name} on ${on_string}: report.json"/>
+        <data name="summary" format="tabular" from_work_dir="out.txt" label="${tool.name} on ${on_string}: summary.txt"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence" value="test1.fasta" />
+            <param name="input_type" value="contig" />
+            <param name="alignment_tool" value="blast" />
+            <param name="include_loose" value="false" />
+            <param name="low_quality" value="false" />
+            <param name="data" value="NA" />
+            <conditional name="db_opts">
+                <param name="db_opts_selector" value="default" />
+            </conditional>
+            <output name="summary">
+                <assert_contents>
+                    <has_text text="AY123251" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+Resistance Gene Identifier (RGI)
+================================
+
+RGI is used to predict resistome(s) from protein or nucleotide data based on homology and SNP models. The tool uses data from the CARD database.
+
+Usage
+-----
+
+1. Select your input sequence (in FASTA format).
+2. Select your input type (CONTIG or PROTEIN).
+3. Select your alignment tool (DIAMOND or BLAST).  
+4. Specify if you want to include loose hits (YES or NO).
+5. Specify if you want to remove temporary files (YES or NO).
+6. Specify if you want to low quality predictions (YES or NO).
+7. Select your data type (WGS, PLASMID, CHROMOSOME or NA). 
+8. Run the tool.
+
+Output
+------
+
+There are 2 different output files produced by `rgi`.
+
+summary.txt
+^^^^^^^^^^^
+
+A tabular file of all detected resistance genes, one gene per line.
+

+| ORF_ID                                                                                                                   | Contig        | Start | Stop | Orientation | Cut_Off  | Pass_Bitscore  | Best_Hit_Bitscore   | Best_Hit_ARO   | Best_Identities  | ARO      | Model_type            | SNPs_in_Best_Hit_ARO     | Other_SNPs  | Drug Class | Resistance Mechanism    | AMR Gene Family      | Predicted_DNA             | Predicted_Protein      | CARD_Protein_Sequence     | Percentage Length of Reference Sequence | ID                             | Model_ID  |
++==========================================================================================================================+===============+=======+======+=============+==========+================+=====================+================+==================+==========+=======================+==========================+=============+============+=========================+======================+===========================+========================+===========================+=========================================+================================+===========+
+| AY123251.1_6 # 3575 # 4489 # 1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.399           | AY123251.1_6  | 3575  | 4489 | \+          | Strict   | 500            | 506.908             | CARB-7         | 83.68            | 3002246  | protein homolog model | n/a                      | n/a         | penam      | antibiotic inactivation | CARB beta-lactamase  | ATGC                      | MLLYKMCDNQNYGVTYMKFLLV | MKSLLVFALLMPSVVFASSSKFQSV | 105.56                                  | gnl|BL_ORD_ID|589|hsp_num:0    | 1443      |

+
+report.json
+^^^^^^^^^^^
+
+A json version of summary.txt.
+
+Help
+----
+
+**usage:**
+    ``rgi main [-h] -i INPUT_SEQUENCE -o OUTPUT_FILE [-t {read,contig,protein,wgs}] [-a {DIAMOND,BLAST}] [-n THREADS] [--include_loose] [--local] [--clean] [--debug] [--low_quality] [-d {wgs,plasmid,chromosome,NA}] [-v]``
+
+**optional arguments:**
+
+- ``-h, --help``
+    show this help message and exit
+- ``-i INPUT_SEQUENCE, --input_sequence INPUT_SEQUENCE``
+    input file must be in either FASTA (contig and protein) or gzip format e.g myFile.fasta, myFasta.fasta.gz
+- ``-o OUTPUT_FILE, --output_file OUTPUT_FILE``
+    output folder and base filename
+- ``-t {read,contig,protein,wgs}, --input_type {read,contig,protein,wgs}``
+    specify data input type (default = contig)
+- ``-a {DIAMOND,BLAST}, --alignment_tool {DIAMOND,BLAST}``
+    specify alignment tool (default = BLAST)
+- ``-n THREADS, --num_threads THREADS``
+    number of threads (CPUs) to use in the BLAST search (default=8)
+- ``--include_loose``
+    include loose hits in addition to strict and perfect hits
+- ``--local``
+    use local database (default: uses database in executable directory)
+- ``--clean``
+    removes temporary files
+- ``--debug``
+    debug mode
+- ``--low_quality``
+    use for short contigs to predict partial genes
+- ``-d {wgs,plasmid,chromosome,NA}, --data {wgs,plasmid,chromosome,NA}``
+    specify a data-type (default = NA)
+- ``-v, --version``
+    prints software version number
+
+Links
+-----
+
+RGI: https://card.mcmaster.ca/analyze/rgi
+
+Github: https://github.com/arpcard/rgi
+
+CARD database: https://card.mcmaster.ca
+     ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkw1004</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fasta	Wed Feb 27 09:08:21 2019 -0500
@@ -0,0 +1,68 @@
+>AY123251.1 Salmonella enterica subsp. enterica serovar Typhi R-plasmid pST2301 class I integron aminoglycoside 6'-N-acetyltransferase (aacA4), chloramphenicol acetyltransferase (catB8), aminoglycoside 3'-adenyltransferase (aadA1), dihydrofolate reductase type I (dhfr1), aminoglycoside 6'-n-acetyltransferase (aac6-II), and CARB-8 beta-lactamase (blaCARB-8) genes, complete cds
+TCATGGCTTGTTATGACTGTTTTTTTGTACAGTCTATGCCTCGGGCATCCAAGCAGCAAGCGCGTTACGC
+CGTGGGTCGATGTTTGATGTTATGGAGCAGCAACGATGTTACGCAGCAGGGCAGTCGCCCTAAAACAAAG
+TTAGGCATCACAAAGTACAGCATCGTGACCAACAGCAACGATTCCGTCACACTGCGCCTCATGACTGAGC
+ATGACCTTGCGATGCTCTATGAGTGGCTAAATCGATCTCATATCGTCGAGTGGTGGGGCGGAGAAGAAGC
+ACGCCCGACACTTGCTGACGTACAGGAACAGTACTTGCCAAGCGTTTTAGCGCAAGAGTCCGTCACTCCA
+TACATTGCAATGCTGAATGGAGAGCCGATTGGGTATGCCCAGTCGTACGTTGCTCTTGGAAGCGGGGACG
+GATGGTGGGAAGAAGAAACCGATCCAGGAGTACGCGGAATAGACCAGTCACTGGCGAATGCATCACAACT
+GGGCAAAGGCTTGGGAACCAAGCTGGTTCGAGCTCTGGTTGAGTTGCTGTTCAATGATCCCGAGGTCACC
+AAGATCCAAACGGACCCGTCGCCGAGCAACTTGCGAGCGATCCGATGCTACGAGAAAGCGGGGTTTGAGA
+GGCAAGGTACCGTAACCACCCCAGATGGTCCAGCCGTGTACATGGTTCAAACACGCCAGGCATTCGAGCG
+AACACGCAGTGTTGCCTAACCCTTCCATCGAGGGGGACGTCCAAGGGCTGGCGCCCTTGGCCGCCCCTCA
+TGTCAAACGTTAGACGGCAAGAAAAGGTTCCACGAACTCTGATGAAAAACTACTTTAACAGCCCTTTCAA
+AGGGGAACTTCTTTCTGAGCAAGTGAAAAATCCAAATATCAGAGTAGGCCGGTATAGCTATTACTCTGGC
+TACTATCACGGGCACTCATTTGATGAATGCGCGCGATACTTGCTTCCAGATCGTGATGACGTTGATAAAT
+TGATCATTGGCAGCTTTTGTTCTATAGGAAGCGGGGCTTCCTTCATCATGGCTGGCAATCAGGGGCATCG
+GCATGACTGGGCATCATCCTTCCCCTTCTTCTATATGCAAGAGGAGCCTGCTTTCTCAAGAGCACTCGAC
+GCCTTCCAAAGAGCAGGTGATACCGTCATTGGCAATGATGTCTGGATAGGCTCGGAGGCAATGATTATGC
+CTGGCATCAAAATTGGAGACGGTGCCGTGATAGGTAGTCGCTCGTTGGTGACAAAAGATGTAGAGCCTTA
+TGCCATCATCGGGGGAAATCCCGCAAAGCAAATTAAGAAGCGCTTCTCCGATGAGGAAATCTCATTGCTC
+ATGGAGATGGAGTGGTGGAACTGGCCACTAGATAAAATTAAGACAGCAATGCCTCTGCTGTGCTCGTCAA
+ATATTTTTGGTCTGCATAAGTATTGGCGCGAGTTTGCCGTCTAACAATTCATTCAAGCCGACGCCGCTTC
+GCGGCACGGCTTAATTCTGGCGTTAAACATCATGAGGGAAGCGGTGATCGCCGAAGTATCGACTCAACTA
+TCAGAGGTAGTTGGCGTCATCGAGCGCCATCTCGAACCGACGTTGCTGGCCGTACATTTGTACGGCTCCG
+CAGTGGATGGCGGCCTGAAGCCACACAGTGATATTGATTTGCTGGTTACGGTGACCGTAAGGCTTGATGA
+AACAACGCGGCGAGCTTTGATCAACGACCTTTTGGAAACTTCGGCTTCCCCTGGAGAGAGCGAGATTCTC
+CGCGCTGTAGAAGTCACCATTGTTGTGCACGACGACATCATTCCGTGGCGTTATCCAGCTAAGCGCGAAC
+TGCAATTTGGAGAATGGCAGCGCAATGACATTCTTGCAGGTATCTTCGAGCCAGCCACGATCGACATTGA
+TCTGGCTATCTTGCTGACAAAAGCAAGAGAACATAGCGTTGCCTTGGTAGGTCCAGCGGCGGAGGAACTC
+TTTGATCCGGTTCCTGAACAGGATCTATTTGAGGCGCTAAATGAAACCTTAACGCTATGGAACTCGCCGC
+CCGACTGGGCTGGCGATGAGCGAAATGTAGTGCTTACGTTGTCCCGCATTTGGTACAGCGCAGTAACCGG
+CAAAATCGCGCCGAAGGATGTCGCTGCCGACTGGGCAATGGAGCGCCTGCCGGCCCAGTATCAGCCCGTC
+ATACTTGAAGCTAGACAGGCTTATCTTGGACAAGAAGAAGATCGCTTGGCCTCCCGCGCAGATCAGTTGG
+AAGAATTTGTTCACTACGTGAAAGGCGAGATCACCAAGGTAGTCGGCAAATAATGTCTAACAATTCGTTC
+AAGCCGACGCCGCTTCGCGGCGCGGCTTAACTCAAGCGTTAACCTCTGAGGAAGAATTGTGAAACTATCA
+CTAATGGTAGCTATATCGAAGAATGGAGTTATCGGGAATGGCCCTGATATTCCATGGAGTGCCAAAGGTG
+AACAGCTCCTGTTTAAAGCTATTACCTATAACCAATGGCTGTTGGTTGGACGCAAGACTTTTGAGTCAAT
+GGGAGCATTACCCAACCGAAAGTATGCGGTCGTAACACGTTCAAGTTTTACATCTGACAATGAGAACGTA
+GTGATCTTTCCATCAATTAAAGATGCTTTAACCAACCTAAAGAAAATAACGGATCATGTCATTGTTTCAG
+GTGGTGGGGAGATATACAAAAGCCTGATCGATCAAGTAGATACACTACATATATCTACAATAGACATCGA
+GCCGGAAGGTGATGTTTACTTTCCTGAAATCCCCAGCAATTTTAGGCCAGTTTTTACCCAAGACTTCGCC
+TCTAACATAAATTATAGTTACCAAATCTGGCAAAAGGGTTAACAAGTGGCAGCAACGGATTCGCAAACCT
+GTCACGCCTTTTGTACCAAAACCCGCGCCAGGTTTGCGATCCGCTGTGCCAGGCGTTAGGCAGCACAGAG
+CGACCATTTCATGTCCGCGAGCACCCCCCCCATAACTCTTCGCCTCATGACCGAGCGCGACCTGCCGATG
+CTCCATGATTGGCTCAACCGGCCGCACATCGTTGAGTGGTGGGGTGGTGACGAAGAGCGACCGACTCTTG
+ATGAAGTGCTGGAACACTACCTGCCCAGAGCGATGGCGGAAGAGTCCGTAACACCGTACATCGCAATGCT
+GGGCGAGGAACCGATCGGCTATGCTCAGTCGTACGTCGCGCTCGGAAGCGGTGATGGCTGGTGGGAAGAT
+GAAACTGATCCAGGAGTGCGAGGAATAGACCAGTCTCTGGCTGACCCGACACAGTTGAACAAAGGCCTAG
+GAACAAGGCTTGTCCGCGCTCTCGTTGAACTACTGTTCTCGGACCCCACCGTGACGAAGATTCAGACCGA
+CCCGACTCCGAACAACCATCGAGCCATACGCTGCTATGAGAAGGCAGGATTCGTGCGGGAGAAGATCATC
+ACCACGCCTGACGGGCCGGCGGTTTACATGGTTCAAACACGACAAGCCTTCGAGAGAAAGCGCGGTGTTG
+CCTAACAACTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAGGTGTTAGCCATATTATGGAG
+CCTCATGCTTTTATATAAAATGTGTGACAATCAAAATTATGGGGTTACTTACATGAAGTTTTTATTGGTA
+TTTTCGCTTTTAATACCATCCGTGGTTTTTGCAAGTAGTTCAAAGTTTCGGCAAGTTGAACAAGACGTTA
+AGGCAATTGAAGTTTCTCTTTCTGCTCGTATAGGTGTTTCCGTTCTTGATACTCAAAATGGAGAATACTG
+GGATTACAATGGCAATCAGCGCTTCCCGTTGACAAGTACTTTTAAAACAATAGCTTGCGCTAAATTACTA
+TATGATGCTGAGCAAGGAAAAGTTAATCCCAATAGTACAATCGAGATTAAGAAAGCAGATCTTGTGACCT
+ATTCCCCTGTAATAGAAAAGCAAGTAGGGCAGGCAATCACACTCGATGATGCGTGCTTCGCAACTATGAC
+TACAAGTGATAATACTGCGGCAAATATCATCCTAAGTGCTGTAGGTGGCTCCAAAGGCGTTACTGATTTT
+TTAAGACAAATTGGGGACAAAGAGACTCGTCTAGACCGTATTGAGCCTGATTTAAATGAAGGTAAGCTCG
+GTGATTTGAGGGATACGACAACTCCTAAGGCAATAGCCAGTACTTTGAATAAATTTTTATTTGGTTCAGC
+GCTATCTGAAATGAACAAAAAAAAATTAGAGTCTTGGATGGTGAACAATCAAGTCACTGGTAATTTACTA
+CGTTCAGTATTGCCGGCGGGATGGAACATTGCGGATCGTTCAGGTGCTGGCGGATTTGGTGCTCGGAGTA
+TTACAGCAGTTGTGTGGAGTGAGCATCAAGCCCCAATTATTGTGAGCATCTATCTAGCTCAAACACAGGC
+TTCAATGGCAGAGCGAAATGATGCGATTGTTAAAATTGGTCGTTCAATTTTTGACGTTTATACATCACAG
+TCGCGCTGATAAGGCTAACAAGGCCATCAAGTTGACGGCTTTTCCGTCGCTTGTTTTGTGGCTTAACGCT
+ACGCTACCACAAAACAATCAACTACAAAGCCGCAACTTATGGCGGCGTTAGATACACTAAGCACATAATT
+GCTCACAGCCAAACTATCAGGTCAAGTCTGCTTTTATTATTTTTAAGCGTGCATAATAAGCCCTAC
\ No newline at end of file