Repository 'rgi'
hg clone https://toolshed.g2.bx.psu.edu/repos/card/rgi

Changeset 0:715bc9aeef69 (2019-02-27)
Next changeset 1:f96cbb663aa9 (2020-02-28)
Commit message:
planemo upload for repository https://github.com/arpcard/rgi commit 7a78289be23c5a14ae39f454610fa8eca3f05188
added:
README.rst
data_managers/data_manager_conf.xml
data_managers/data_manager_rgi_build_db/data_manager/.gitignore
data_managers/data_manager_rgi_build_db/data_manager/import_data.py
data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.py
data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.xml
data_managers/data_manager_rgi_build_db/tool-data/rgi_databases.loc.sample
data_managers/data_manager_rgi_build_db/tool_data_table_conf.xml.sample
rgi.xml
test-data/test1.fasta
b
diff -r 000000000000 -r 715bc9aeef69 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,6 @@
+RGI wrapper
+--------------------
+
+This wrapper is used to run Resistance Gene Identifier (RGI) on galaxy environment.
+
+This tool is used together with data manager for RGI: `rgi_database_builder <https://github.com/arpcard/rgi_database_builder>`_.
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_conf.xml Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager_rgi_build_db/data_manager/rgi_database_builder.xml" id="rgi_database_builder" version="1.0.0">
+        <!-- Defines a Data Table to be modified -->
+        <data_table name="rgi_databases">
+            <!-- Handle the output of the Data Manager Tool -->
+            <output>
+                <!-- Columns that will be specified by the Data Manager Tool -->
+                <column name="value" /> 
+                <column name="name" />
+                <!-- The value of this column will be modified based upon data in "out_file" -->
+                <column name="path" output_ref="out_file" >
+                    <!-- Moving a file from the extra files path of "out_file" -->
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rgi_databases/${value}/${path}</target>
+                    </move>
+                    <!-- Store this value in the final Data Table -->
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rgi_databases/${value}/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/data_manager/.gitignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/.gitignore Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,1 @@
+*.pyc
\ No newline at end of file
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/data_manager/import_data.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/import_data.py Wed Feb 27 09:08:21 2019 -0500
[
b"@@ -0,0 +1,199 @@\n+import argparse\n+import datetime\n+import json\n+import os\n+import shutil\n+import sys\n+import tarfile\n+import urllib.request, urllib.error, urllib.parse\n+import zipfile\n+import logging\n+\n+path = os.path.join(os.getcwd(), 'rgi-database') \n+data_path = path\n+\n+level = logging.WARNING\n+logger = logging.getLogger(__name__)\n+logger.setLevel(level)\n+\n+def url_download(url, workdir):\n+    file_path = os.path.join(workdir, 'download.dat')\n+    if not os.path.exists(workdir):\n+        os.makedirs(workdir)\n+    src = None\n+    dst = None\n+    try:\n+        req = urllib.request.Request(url)\n+        src = urllib.request.urlopen(req)\n+        dst = open(file_path, 'wb')\n+        while True:\n+            chunk = src.read(2**10)\n+            if chunk:\n+                dst.write(chunk)\n+            else:\n+                break\n+    except Exception as e:\n+        print(str(e), file=sys.stderr)\n+    finally:\n+        if src:\n+            src.close()\n+        if dst:\n+            dst.close()\n+    if tarfile.is_tarfile(file_path):\n+        fh = tarfile.open(file_path, 'r:*')\n+    elif zipfile.is_zipfile(file_path):\n+        fh = zipfile.ZipFile(file_path, 'r')\n+    else:\n+        return\n+    # extract only one file : card.json\n+    for member in fh.getmembers():\n+        if member.isreg():  # skip if the TarInfo is not files\n+            member.name = os.path.basename(member.name) # remove the path by reset it\n+            if member.name == 'card.json':\n+                print('[import_data] extracting file: {}'.format(str(member.name)))\n+                fh.extract(member.name,workdir)\n+    os.remove(file_path)\n+\n+def checkKeyExisted(key, my_dict):\n+    try:\n+        nonNone = my_dict[key] is not None\n+    except KeyError:\n+        nonNone = False\n+    return nonNone\n+\n+def data_version():\n+    data_version = ''\n+    with open(os.path.join(data_path, 'card.json')) as json_file:\n+        json_data = json.load(json_file)\n+        for item in list(json_data.keys()):\n+            if item == '_version':\n+                data_version = json_data[item]\n+    json_file.close()\n+    return data_version\n+\n+def makeBlastDB():\n+    if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:\n+        print('[import_data] create blast DB.')\n+        os.system('makeblastdb -in {}/proteindb.fsa -dbtype prot -out {}/protein.db > /dev/null 2>&1'.format(path, path))\n+\n+def makeDiamondDB():\n+    if os.path.isfile(os.path.join(path, 'proteindb.fsa')) == True:\n+        print('[import_data] create diamond DB.')\n+        os.system('diamond makedb --quiet --in {}/proteindb.fsa --db {}/protein.db'.format(path, path))\n+\n+def write_fasta_from_json():\n+\t\t'''Creates a fasta file from card.json file.'''\n+\t\tif os.path.isfile(os.path.join(path, 'proteindb.fsa')):\n+\t\t\treturn\n+\t\telse:\n+\t\t\ttry:\n+\t\t\t\twith open(os.path.join(data_path, 'card.json'), 'r') as jfile:\n+\t\t\t\t\tj = json.load(jfile)\n+\t\t\texcept Exception as e:\n+\t\t\t\tlogger.error(e)\n+\t\t\t\texit()\n+\n+\t\t\twith open(os.path.join(path, 'proteindb.fsa'), 'w') as fout:\n+\t\t\t\tfor i in j:\n+\t\t\t\t\tif i.isdigit():\n+\t\t            \t# model_type: protein homolog model\n+\t\t\t\t\t\tif j[i]['model_type_id'] == '40292':\n+\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\tpass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']\n+\t\t\t\t\t\t\texcept KeyError:\n+\t\t\t\t\t\t\t\tlogger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\t\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\t\tfor seq in j[i]['model_sequences']['sequence']:\n+\t\t\t\t\t\t\t\t\t\tfout.write('>%s_%s | model_type_id: 40292 | pass_bitscore: %s | %s\\n' % (i, seq, pass_bit_score, j[i]['ARO_name']))\n+\t\t\t\t\t\t\t\t\t\tfout.write('%s\\n' %(j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))\n+\t\t\t\t\t\t\t\texcept Exception as e:\n+\t\t\t\t\t\t\t\t\tlogger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t\t% ("..b"l: card@mcmaster.ca')\n+\n+\n+\t\t            \t# model_type: protein variant model\n+\t\t\t\t\t\telif j[i]['model_type_id'] == '40293':\n+\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\tpass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']\n+\t\t\t\t\t\t\texcept KeyError:\n+\t\t\t\t\t\t\t\tlogger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\t\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\t\tsnpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]\n+\t\t\t\t\t\t\t\texcept Exception as e:\n+\t\t\t\t\t\t\t\t\tlogger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\n+\t\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\t\tfor seq in j[i]['model_sequences']['sequence']:\n+\t\t\t\t\t\t\t\t\t\tfout.write('>%s_%s | model_type_id: 40293 | pass_bit_score: %s | SNP: %s | %s\\n' \\\n+\t\t\t\t\t\t\t\t\t\t\t% (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))\n+\t\t\t\t\t\t\t\t\t\tfout.write('%s\\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))\n+\t\t\t\t\t\t\t\texcept Exception as e:\n+\t\t\t\t\t\t\t\t\tlogger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\n+\t\t            \t# model_type: protein overexpression model\n+\t\t\t\t\t\telif j[i]['model_type_id'] == '41091':\n+\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\tpass_bit_score = j[i]['model_param']['blastp_bit_score']['param_value']\n+\t\t\t\t\t\t\texcept KeyError:\n+\t\t\t\t\t\t\t\tlogger.warning('No bitscore for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\t\t\t\t\t\t\telse:\n+\t\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\t\tsnpList = [j[i]['model_param']['snp']['param_value'][k] for k in j[i]['model_param']['snp']['param_value']]\n+\t\t\t\t\t\t\t\texcept Exception as e:\n+\t\t\t\t\t\t\t\t\tlogger.warning('No snp for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\n+\t\t\t\t\t\t\t\ttry:\n+\t\t\t\t\t\t\t\t\tfor seq in j[i]['model_sequences']['sequence']:\n+\t\t\t\t\t\t\t\t\t\tfout.write('>%s_%s | model_type_id: 41091 | pass_bit_score: %s | SNP: %s | %s\\n' \\\n+\t\t\t\t\t\t\t\t\t\t\t% (i, seq, pass_bit_score, ','.join(snpList), j[i]['ARO_name']))\n+\t\t\t\t\t\t\t\t\t\tfout.write('%s\\n' % (j[i]['model_sequences']['sequence'][seq]['protein_sequence']['sequence']))\n+\t\t\t\t\t\t\t\texcept Exception as e:\n+\t\t\t\t\t\t\t\t\tlogger.warning('No model sequences for model (%s, %s). RGI will omit this model and keep running.' \\\n+\t\t\t\t\t\t\t\t\t\t% (j[i]['model_id'], j[i]['model_name']))\n+\t\t\t\t\t\t\t\t\tlogger.info('Please let the CARD Admins know! Email: card@mcmaster.ca')\n+\n+def _main(args):\n+    if not os.path.exists(path):\n+        print('[import_data] mkdir: {}'.format(path))\n+        os.makedirs(path)\n+    print('[import_data] path: {}'.format(path))\n+    print(args)\n+\n+    if args.url == None:\n+        url = 'https://card.mcmaster.ca/latest/data'\n+    else:\n+        url = args.url\n+    print('[import_data] url: {}'.format(url))\n+    workdir = os.path.join(os.getcwd(), 'rgi-database')\n+    print('[import_data] working directory: {}'.format(workdir))\n+    url_download(url, workdir)\n+    write_fasta_from_json()\n+    makeBlastDB()\n+    makeDiamondDB()\n+    version = data_version()\n+    print('[import_data] data version: {}'.format(version))\n+    return version\n+\n+def run():\n+    parser = argparse.ArgumentParser(description='Create data manager json.')\n+    parser.add_argument('--url', dest='url', action='store', help='Url for CARD data')\n+    args = parser.parse_args()\n+    _main(args)\n+\n+if __name__ == '__main__':\n+    run()\n"
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.py Wed Feb 27 09:08:21 2019 -0500
[
@@ -0,0 +1,47 @@
+import argparse
+import datetime
+import json
+import os
+import shutil
+import sys
+import tarfile
+import urllib.request, urllib.error, urllib.parse
+import zipfile
+from import_data import run, _main
+
+parser = argparse.ArgumentParser(description='Create data manager json.')
+parser.add_argument('--url', dest='url', action='store', help='Url for CARD data')
+parser.add_argument('--out', dest='output', action='store', help='JSON filename')
+parser.add_argument('--name', dest='name', action='store', default='CARD_data-' + str(datetime.datetime.now().strftime('%Y-%B-%d-%H:%M:%S')), help='Data table database name')
+args = parser.parse_args()
+
+print('[rgi_database_builder] Importing...')
+
+_main(args)
+
+def main(args):
+    print('[rgi_database_builder] Building......')
+
+    data_manager_entry = {}
+    data_manager_entry['value'] = args.name.lower()
+    data_manager_entry['name'] = args.name
+    data_manager_entry['path'] = '.'
+
+    data_manager_json = dict(data_tables = dict(rgi_databases=data_manager_entry))
+
+    params = json.loads(open(args.output,'r').read())
+
+    target_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(target_directory)
+    output_path = os.path.join(os.getcwd(), 'rgi-database')
+
+    for filename in os.listdir(output_path):
+        print('[rgi_database_builder] move file: {} from {} to {}'.format(filename, output_path, target_directory))
+        shutil.move(os.path.join(output_path, filename), target_directory)
+
+    print(args.output)
+    print('[rgi_database_builder] write file: {}'.format(args.output))
+    open(args.output, 'w').write(json.dumps(data_manager_json))
+
+if __name__ == '__main__':
+    main(args)
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/data_manager/rgi_database_builder.xml Wed Feb 27 09:08:21 2019 -0500
[
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<tool id="rgi_database_builder" name="RGI Database Builder" tool_type="manage_data" version="1.0.0">
+    <description>Download and build the CARD database for RGI</description>
+    <requirements>
+ <requirement type="package" version="4.2.2">rgi</requirement>
+    </requirements>
+    <stdio>
+        <exit_code description="Error" level="fatal" range="1:" />
+    </stdio>
+    <command interpreter="python3">
+    <![CDATA[
+        rgi_database_builder.py --out "${out_file}"
+        #if $name:
+            --name "${name}"
+        #end if
+        #if $url:
+            --url "${url}"
+        #end if
+    ]]>
+    </command>
+    <inputs>
+        <param name="name" type="text" value="" label="Database name" help="Name for this database, or leave blank for today's date" optional="True"/>
+        <param name="url" type="text" value="https://card.mcmaster.ca/latest/data" label="Url" help="Url for the CARD data e.g https://card.mcmaster.ca/lastest/data. Visit https://card.mcmaster.ca/download to see previous CARD releases." optional="True"/>
+    </inputs>
+    <outputs>
+        <data format="data_manager_json" name="out_file" label="${tool.name} (JSON)"/>
+    </outputs>
+</tool>
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/tool-data/rgi_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/tool-data/rgi_databases.loc.sample Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,5 @@
+#This file lists the columns that will be specified by the RGI Data Manager tool.
+#
+#For example:
+#<value>    <name>    <path>
+#rgi_20181001    rgi_20181001    /galaxy-central/tool-data/rgi_databases/rgi_20181001
b
diff -r 000000000000 -r 715bc9aeef69 data_managers/data_manager_rgi_build_db/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_managers/data_manager_rgi_build_db/tool_data_table_conf.xml.sample Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of RGI database in the required format -->
+    <table name="rgi_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/rgi_databases.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 715bc9aeef69 rgi.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgi.xml Wed Feb 27 09:08:21 2019 -0500
[
b'@@ -0,0 +1,187 @@\n+<tool id="rgi" name="Resistance Gene Identifier (RGI)" version="4.2.2">\n+    <description>This tool predicts resistome(s) from protein or nucleotide data based on homology and SNP models.</description>\n+    <requirements>\n+        <requirement type="package" version="4.2.2">rgi</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:" />\n+    </stdio>\n+    <command><![CDATA[\n+#if $db_opts.db_opts_selector != "default":\n+    rgi load\n+        #if $db_opts.db_opts_selector == "local":\n+            -i "${__tool_data_path__}/rgi_databases/$db_opts.rgi_db_local/card.json" \n+        #else:\n+            -i "$db_opts.rgi_db_hist"\n+        #end if\n+        --local\n+    &&\n+#end if\n+\n+rgi main\n+    -i \'$input_sequence\'\n+    -o out\n+    -t $input_type\n+    -a $alignment_tool\n+    -n "\\${GALAXY_SLOTS:-1}"\n+    $include_loose\n+    $low_quality\n+    -d $data\n+\n+#if $db_opts.db_opts_selector != "default":\n+    --local\n+#end if\n+       ]]></command>\n+    <inputs>\n+        <param argument="--input_sequence" type="data" format="fastq,fasta" label="Input sequence" help="input file must be in either FASTA (contig and protein) or gzip format e.g myFile.fasta, myFasta.fasta.gz"/>\n+        <param argument="--input_type" type="select" multiple="false" label="Input type" help="specify data input type (default = contig)">\n+            <option value="contig" selected="true">CONTIG (Nucleotide Sequence)</option>\n+            <option value="protein">PROTEIN (Protein Sequence)</option>\n+        </param>\n+        <param argument="--alignment_tool" type="select" label="Alignment tool" help="specify alignment tool (default = BLAST)">\n+            <option value="blast" selected="true">BLAST</option>\n+            <option value="diamond">DIAMOND</option>\n+        </param>\n+        <param argument="--include_loose" type="boolean" checked="false" truevalue="--include_loose" falsevalue="" label="Include loose hits" help="include loose hits in addition to strict and perfect hits"/>\n+        <param argument="--low_quality" type="boolean" checked="false" truevalue="--low_quality" falsevalue="" label="Low quality" help="use for short contigs to predict partial genes"/>\n+        <param argument="--data" type="select" multiple="false" label="Data type" help="specify a data-type (default = NA)">\n+            <option value="wgs">WGS</option>\n+            <option value="plasmid">PLASMID</option>\n+            <option value="chromosome">CHROMOSOME</option>\n+            <option value="NA" selected="true">NA</option>\n+        </param>\n+        <conditional name="db_opts">\n+            <param name="db_opts_selector" type="select" label="Select an RGI database">\n+                <option value="default" selected="true">Default RGI database</option>\n+                <option value="local">Locally installed RGI database</option>\n+                <option value="hist">RGI database from your history</option>\n+            </param>\n+            <when value="default">\n+                <param name="rgi_db_local" type="hidden" value="" /> \n+                <param name="rgi_db_hist" type="hidden" value="" />\n+            </when>\n+            <when value="local">\n+                <param name="rgi_db_local" type="select" multiple="false" label="Locally installed RGI database">\n+                    <options from_data_table="rgi_databases">\n+                        <validator type="no_options" message="No databases are available built-in"/>\n+                    </options>\n+                </param>\n+                <param name="rgi_db_hist" type="hidden" value="" />\n+            </when>\n+            <when value="hist">\n+                <param name="rgi_db_local" type="hidden" value="" />\n+                <param name="rgi_db_hist" type="data" format="json" label="RGI database from your history" />\n+            </when>\n+        </conditional>\n+    </inputs>\n+    <outputs>\n+        <data name="report" format="json" from_work_dir="out.json" label="${tool.name} '..b'                                                                                                                | Contig        | Start | Stop | Orientation | Cut_Off  | Pass_Bitscore  | Best_Hit_Bitscore   | Best_Hit_ARO   | Best_Identities  | ARO      | Model_type            | SNPs_in_Best_Hit_ARO     | Other_SNPs  | Drug Class | Resistance Mechanism    | AMR Gene Family      | Predicted_DNA             | Predicted_Protein      | CARD_Protein_Sequence     | Percentage Length of Reference Sequence | ID                             | Model_ID  |\n++==========================================================================================================================+===============+=======+======+=============+==========+================+=====================+================+==================+==========+=======================+==========================+=============+============+=========================+======================+===========================+========================+===========================+=========================================+================================+===========+\n+| AY123251.1_6 # 3575 # 4489 # 1 # ID=1_6;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.399           | AY123251.1_6  | 3575  | 4489 | \\+          | Strict   | 500            | 506.908             | CARB-7         | 83.68            | 3002246  | protein homolog model | n/a                      | n/a         | penam      | antibiotic inactivation | CARB beta-lactamase  | ATGC                      | MLLYKMCDNQNYGVTYMKFLLV | MKSLLVFALLMPSVVFASSSKFQSV | 105.56                                  | gnl|BL_ORD_ID|589|hsp_num:0    | 1443      |\n++--------------------------------------------------------------------------------------------------------------------------+---------------+-------+------+-------------+----------+----------------+---------------------+----------------+------------------+----------+-----------------------+--------------------------+-------------+------------+-------------------------+----------------------+---------------------------+------------------------+---------------------------+-----------------------------------------+--------------------------------+-----------+\n+\n+report.json\n+^^^^^^^^^^^\n+\n+A json version of summary.txt.\n+\n+Help\n+----\n+\n+**usage:**\n+    ``rgi main [-h] -i INPUT_SEQUENCE -o OUTPUT_FILE [-t {read,contig,protein,wgs}] [-a {DIAMOND,BLAST}] [-n THREADS] [--include_loose] [--local] [--clean] [--debug] [--low_quality] [-d {wgs,plasmid,chromosome,NA}] [-v]``\n+\n+**optional arguments:**\n+\n+- ``-h, --help``\n+    show this help message and exit\n+- ``-i INPUT_SEQUENCE, --input_sequence INPUT_SEQUENCE``\n+    input file must be in either FASTA (contig and protein) or gzip format e.g myFile.fasta, myFasta.fasta.gz\n+- ``-o OUTPUT_FILE, --output_file OUTPUT_FILE``\n+    output folder and base filename\n+- ``-t {read,contig,protein,wgs}, --input_type {read,contig,protein,wgs}``\n+    specify data input type (default = contig)\n+- ``-a {DIAMOND,BLAST}, --alignment_tool {DIAMOND,BLAST}``\n+    specify alignment tool (default = BLAST)\n+- ``-n THREADS, --num_threads THREADS``\n+    number of threads (CPUs) to use in the BLAST search (default=8)\n+- ``--include_loose``\n+    include loose hits in addition to strict and perfect hits\n+- ``--local``\n+    use local database (default: uses database in executable directory)\n+- ``--clean``\n+    removes temporary files\n+- ``--debug``\n+    debug mode\n+- ``--low_quality``\n+    use for short contigs to predict partial genes\n+- ``-d {wgs,plasmid,chromosome,NA}, --data {wgs,plasmid,chromosome,NA}``\n+    specify a data-type (default = NA)\n+- ``-v, --version``\n+    prints software version number\n+\n+Links\n+-----\n+\n+RGI: https://card.mcmaster.ca/analyze/rgi\n+\n+Github: https://github.com/arpcard/rgi\n+\n+CARD database: https://card.mcmaster.ca\n+     ]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1093/nar/gkw1004</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 715bc9aeef69 test-data/test1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fasta Wed Feb 27 09:08:21 2019 -0500
b
@@ -0,0 +1,68 @@
+>AY123251.1 Salmonella enterica subsp. enterica serovar Typhi R-plasmid pST2301 class I integron aminoglycoside 6'-N-acetyltransferase (aacA4), chloramphenicol acetyltransferase (catB8), aminoglycoside 3'-adenyltransferase (aadA1), dihydrofolate reductase type I (dhfr1), aminoglycoside 6'-n-acetyltransferase (aac6-II), and CARB-8 beta-lactamase (blaCARB-8) genes, complete cds
+TCATGGCTTGTTATGACTGTTTTTTTGTACAGTCTATGCCTCGGGCATCCAAGCAGCAAGCGCGTTACGC
+CGTGGGTCGATGTTTGATGTTATGGAGCAGCAACGATGTTACGCAGCAGGGCAGTCGCCCTAAAACAAAG
+TTAGGCATCACAAAGTACAGCATCGTGACCAACAGCAACGATTCCGTCACACTGCGCCTCATGACTGAGC
+ATGACCTTGCGATGCTCTATGAGTGGCTAAATCGATCTCATATCGTCGAGTGGTGGGGCGGAGAAGAAGC
+ACGCCCGACACTTGCTGACGTACAGGAACAGTACTTGCCAAGCGTTTTAGCGCAAGAGTCCGTCACTCCA
+TACATTGCAATGCTGAATGGAGAGCCGATTGGGTATGCCCAGTCGTACGTTGCTCTTGGAAGCGGGGACG
+GATGGTGGGAAGAAGAAACCGATCCAGGAGTACGCGGAATAGACCAGTCACTGGCGAATGCATCACAACT
+GGGCAAAGGCTTGGGAACCAAGCTGGTTCGAGCTCTGGTTGAGTTGCTGTTCAATGATCCCGAGGTCACC
+AAGATCCAAACGGACCCGTCGCCGAGCAACTTGCGAGCGATCCGATGCTACGAGAAAGCGGGGTTTGAGA
+GGCAAGGTACCGTAACCACCCCAGATGGTCCAGCCGTGTACATGGTTCAAACACGCCAGGCATTCGAGCG
+AACACGCAGTGTTGCCTAACCCTTCCATCGAGGGGGACGTCCAAGGGCTGGCGCCCTTGGCCGCCCCTCA
+TGTCAAACGTTAGACGGCAAGAAAAGGTTCCACGAACTCTGATGAAAAACTACTTTAACAGCCCTTTCAA
+AGGGGAACTTCTTTCTGAGCAAGTGAAAAATCCAAATATCAGAGTAGGCCGGTATAGCTATTACTCTGGC
+TACTATCACGGGCACTCATTTGATGAATGCGCGCGATACTTGCTTCCAGATCGTGATGACGTTGATAAAT
+TGATCATTGGCAGCTTTTGTTCTATAGGAAGCGGGGCTTCCTTCATCATGGCTGGCAATCAGGGGCATCG
+GCATGACTGGGCATCATCCTTCCCCTTCTTCTATATGCAAGAGGAGCCTGCTTTCTCAAGAGCACTCGAC
+GCCTTCCAAAGAGCAGGTGATACCGTCATTGGCAATGATGTCTGGATAGGCTCGGAGGCAATGATTATGC
+CTGGCATCAAAATTGGAGACGGTGCCGTGATAGGTAGTCGCTCGTTGGTGACAAAAGATGTAGAGCCTTA
+TGCCATCATCGGGGGAAATCCCGCAAAGCAAATTAAGAAGCGCTTCTCCGATGAGGAAATCTCATTGCTC
+ATGGAGATGGAGTGGTGGAACTGGCCACTAGATAAAATTAAGACAGCAATGCCTCTGCTGTGCTCGTCAA
+ATATTTTTGGTCTGCATAAGTATTGGCGCGAGTTTGCCGTCTAACAATTCATTCAAGCCGACGCCGCTTC
+GCGGCACGGCTTAATTCTGGCGTTAAACATCATGAGGGAAGCGGTGATCGCCGAAGTATCGACTCAACTA
+TCAGAGGTAGTTGGCGTCATCGAGCGCCATCTCGAACCGACGTTGCTGGCCGTACATTTGTACGGCTCCG
+CAGTGGATGGCGGCCTGAAGCCACACAGTGATATTGATTTGCTGGTTACGGTGACCGTAAGGCTTGATGA
+AACAACGCGGCGAGCTTTGATCAACGACCTTTTGGAAACTTCGGCTTCCCCTGGAGAGAGCGAGATTCTC
+CGCGCTGTAGAAGTCACCATTGTTGTGCACGACGACATCATTCCGTGGCGTTATCCAGCTAAGCGCGAAC
+TGCAATTTGGAGAATGGCAGCGCAATGACATTCTTGCAGGTATCTTCGAGCCAGCCACGATCGACATTGA
+TCTGGCTATCTTGCTGACAAAAGCAAGAGAACATAGCGTTGCCTTGGTAGGTCCAGCGGCGGAGGAACTC
+TTTGATCCGGTTCCTGAACAGGATCTATTTGAGGCGCTAAATGAAACCTTAACGCTATGGAACTCGCCGC
+CCGACTGGGCTGGCGATGAGCGAAATGTAGTGCTTACGTTGTCCCGCATTTGGTACAGCGCAGTAACCGG
+CAAAATCGCGCCGAAGGATGTCGCTGCCGACTGGGCAATGGAGCGCCTGCCGGCCCAGTATCAGCCCGTC
+ATACTTGAAGCTAGACAGGCTTATCTTGGACAAGAAGAAGATCGCTTGGCCTCCCGCGCAGATCAGTTGG
+AAGAATTTGTTCACTACGTGAAAGGCGAGATCACCAAGGTAGTCGGCAAATAATGTCTAACAATTCGTTC
+AAGCCGACGCCGCTTCGCGGCGCGGCTTAACTCAAGCGTTAACCTCTGAGGAAGAATTGTGAAACTATCA
+CTAATGGTAGCTATATCGAAGAATGGAGTTATCGGGAATGGCCCTGATATTCCATGGAGTGCCAAAGGTG
+AACAGCTCCTGTTTAAAGCTATTACCTATAACCAATGGCTGTTGGTTGGACGCAAGACTTTTGAGTCAAT
+GGGAGCATTACCCAACCGAAAGTATGCGGTCGTAACACGTTCAAGTTTTACATCTGACAATGAGAACGTA
+GTGATCTTTCCATCAATTAAAGATGCTTTAACCAACCTAAAGAAAATAACGGATCATGTCATTGTTTCAG
+GTGGTGGGGAGATATACAAAAGCCTGATCGATCAAGTAGATACACTACATATATCTACAATAGACATCGA
+GCCGGAAGGTGATGTTTACTTTCCTGAAATCCCCAGCAATTTTAGGCCAGTTTTTACCCAAGACTTCGCC
+TCTAACATAAATTATAGTTACCAAATCTGGCAAAAGGGTTAACAAGTGGCAGCAACGGATTCGCAAACCT
+GTCACGCCTTTTGTACCAAAACCCGCGCCAGGTTTGCGATCCGCTGTGCCAGGCGTTAGGCAGCACAGAG
+CGACCATTTCATGTCCGCGAGCACCCCCCCCATAACTCTTCGCCTCATGACCGAGCGCGACCTGCCGATG
+CTCCATGATTGGCTCAACCGGCCGCACATCGTTGAGTGGTGGGGTGGTGACGAAGAGCGACCGACTCTTG
+ATGAAGTGCTGGAACACTACCTGCCCAGAGCGATGGCGGAAGAGTCCGTAACACCGTACATCGCAATGCT
+GGGCGAGGAACCGATCGGCTATGCTCAGTCGTACGTCGCGCTCGGAAGCGGTGATGGCTGGTGGGAAGAT
+GAAACTGATCCAGGAGTGCGAGGAATAGACCAGTCTCTGGCTGACCCGACACAGTTGAACAAAGGCCTAG
+GAACAAGGCTTGTCCGCGCTCTCGTTGAACTACTGTTCTCGGACCCCACCGTGACGAAGATTCAGACCGA
+CCCGACTCCGAACAACCATCGAGCCATACGCTGCTATGAGAAGGCAGGATTCGTGCGGGAGAAGATCATC
+ACCACGCCTGACGGGCCGGCGGTTTACATGGTTCAAACACGACAAGCCTTCGAGAGAAAGCGCGGTGTTG
+CCTAACAACTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAGGTGTTAGCCATATTATGGAG
+CCTCATGCTTTTATATAAAATGTGTGACAATCAAAATTATGGGGTTACTTACATGAAGTTTTTATTGGTA
+TTTTCGCTTTTAATACCATCCGTGGTTTTTGCAAGTAGTTCAAAGTTTCGGCAAGTTGAACAAGACGTTA
+AGGCAATTGAAGTTTCTCTTTCTGCTCGTATAGGTGTTTCCGTTCTTGATACTCAAAATGGAGAATACTG
+GGATTACAATGGCAATCAGCGCTTCCCGTTGACAAGTACTTTTAAAACAATAGCTTGCGCTAAATTACTA
+TATGATGCTGAGCAAGGAAAAGTTAATCCCAATAGTACAATCGAGATTAAGAAAGCAGATCTTGTGACCT
+ATTCCCCTGTAATAGAAAAGCAAGTAGGGCAGGCAATCACACTCGATGATGCGTGCTTCGCAACTATGAC
+TACAAGTGATAATACTGCGGCAAATATCATCCTAAGTGCTGTAGGTGGCTCCAAAGGCGTTACTGATTTT
+TTAAGACAAATTGGGGACAAAGAGACTCGTCTAGACCGTATTGAGCCTGATTTAAATGAAGGTAAGCTCG
+GTGATTTGAGGGATACGACAACTCCTAAGGCAATAGCCAGTACTTTGAATAAATTTTTATTTGGTTCAGC
+GCTATCTGAAATGAACAAAAAAAAATTAGAGTCTTGGATGGTGAACAATCAAGTCACTGGTAATTTACTA
+CGTTCAGTATTGCCGGCGGGATGGAACATTGCGGATCGTTCAGGTGCTGGCGGATTTGGTGCTCGGAGTA
+TTACAGCAGTTGTGTGGAGTGAGCATCAAGCCCCAATTATTGTGAGCATCTATCTAGCTCAAACACAGGC
+TTCAATGGCAGAGCGAAATGATGCGATTGTTAAAATTGGTCGTTCAATTTTTGACGTTTATACATCACAG
+TCGCGCTGATAAGGCTAACAAGGCCATCAAGTTGACGGCTTTTCCGTCGCTTGTTTTGTGGCTTAACGCT
+ACGCTACCACAAAACAATCAACTACAAAGCCGCAACTTATGGCGGCGTTAGATACACTAAGCACATAATT
+GCTCACAGCCAAACTATCAGGTCAAGTCTGCTTTTATTATTTTTAAGCGTGCATAATAAGCCCTAC
\ No newline at end of file