Repository 'screen_abricate_report'
hg clone https://toolshed.g2.bx.psu.edu/repos/public-health-bioinformatics/screen_abricate_report

Changeset 3:2262e531c50b (2020-01-02)
Previous changeset 2:912a3a3dc082 (2019-12-19) Next changeset 4:22247b1a59d5 (2020-01-03)
Commit message:
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 50a464c3e6f87ca8d2c874842cbcee370d8aa9c4"
modified:
screen_abricate_report.py
screen_abricate_report.xml
test-data/SAMN13042171_abricate_report_screened.tsv
test-data/SAMN13042171_gene_detection_status.tsv
test-data/screening_file.tsv
b
diff -r 912a3a3dc082 -r 2262e531c50b screen_abricate_report.py
--- a/screen_abricate_report.py Thu Dec 19 20:31:11 2019 -0500
+++ b/screen_abricate_report.py Thu Jan 02 17:14:34 2020 -0500
[
@@ -7,6 +7,27 @@
 import re
 
 
+class Range(object):
+    """
+    Used to limit the min_coverage and min_identity args to range 0.0 - 100.0
+    """
+    def __init__(self, start, end):
+        self.start = start
+        self.end = end
+
+    def __eq__(self, other):
+        return self.start <= other <= self.end
+
+    def __contains__(self, item):
+        return self.__eq__(item)
+
+    def __iter__(self):
+        yield self
+
+    def __repr__(self):
+        return str(self.start) + " - " + str(self.end)
+
+
 def parse_screen_file(screen_file):
     screen = []
     with open(screen_file) as f:
@@ -24,6 +45,16 @@
     return fieldnames
 
 
+def detect_gene(abricate_report_row, regex, min_coverage, min_identity):
+    gene_of_interest = bool(re.search(regex, abricate_report_row['GENE']))
+    sufficient_coverage = float(abricate_report_row['%COVERAGE']) >= min_coverage
+    sufficient_identity = float(abricate_report_row['%IDENTITY']) >= min_identity
+    if gene_of_interest and sufficient_coverage and sufficient_identity:
+        return True
+    else:
+        return False
+
+
 def main(args):
     screen = parse_screen_file(args.screening_file)
     gene_detection_status_fieldnames = ['gene_name', 'detected']
@@ -44,20 +75,25 @@
                 'detected': False
             }
             for abricate_report_row in abricate_report_reader:
-                if re.search(gene['regex'], abricate_report_row['GENE']):
+                if detect_gene(abricate_report_row, gene['regex'], args.min_coverage, args.min_identity):
                     gene_detection_status['detected'] = True
                     screened_report_writer.writerow(abricate_report_row)
             gene_detection_status_writer.writerow(gene_detection_status)
             f1.seek(0)  # return file pointer to start of abricate report
+            next(abricate_report_reader)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
     parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
-    parser.add_argument("--screened_report", help=("Output: Screened abricate report ",
-                                                   "including only genes of interest (tsv)"))
-    parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes ",
-                                                         "listed in the screening file (tsv)"))
+    parser.add_argument("--screened_report",
+                        help=("Output: Screened abricate report, including only genes of interest (tsv)"))
+    parser.add_argument("--gene_detection_status",
+                        help=("Output: detection status for all genes listed in the screening file (tsv)"))
+    parser.add_argument("--min_coverage", type=float,  default=90.0,
+                        choices=Range(0.0, 100.0), help=("Minimum percent coverage"))
+    parser.add_argument("--min_identity", type=float, default=90.0,
+                        choices=Range(0.0, 100.0), help=("Minimum percent identity"))
     args = parser.parse_args()
     main(args)
b
diff -r 912a3a3dc082 -r 2262e531c50b screen_abricate_report.xml
--- a/screen_abricate_report.xml Thu Dec 19 20:31:11 2019 -0500
+++ b/screen_abricate_report.xml Thu Jan 02 17:14:34 2020 -0500
b
@@ -1,4 +1,4 @@
-<tool id="screen_abricate_report" name="Screen Abricate Report" version="0.2.0">
+<tool id="screen_abricate_report" name="Screen Abricate Report" version="0.3.0">
     <description>Screens an abricate report for genes of interest</description>
     <requirements>
     </requirements>
@@ -12,7 +12,9 @@
              #end if
              --screening_file '${input_screening_file}'
              --screened_report '${screened_report}'
-             --gene_detection_status '${gene_detection_status}' &&
+             --gene_detection_status '${gene_detection_status}'
+             --min_coverage '${min_coverage}'
+             --min_identity '${min_identity}' &&
              cp '${input_screening_file}' '${output_screening_file}'
     ]]></command>
     <inputs>
@@ -34,6 +36,8 @@
             </when>
         </conditional>
         <param name="abricate_report" type="data" format="tabular" />
+        <param name="min_coverage" type="float" min="0.0" value="90.0" max="100.0" />
+        <param name="min_identity" type="float" min="0.0" value="90.0" max="100.0" />
     </inputs>
     <outputs>
         <data name="screened_report" format="tabular" label="Screened Abricate Report" />
@@ -65,7 +69,7 @@
     gene of interest, and the second column is a regular expression that can be used 
     to identify examples of that gene in the 'GENE' column of the abricate report.
 
-    For example, one might use the regex 'KPC\-{d+}' to identify all alleles of the 
+    For example, one might use the regex '^KPC-\d+$' to identify all alleles of the
     KPC gene (KPC-2, KPC-3, KPC-4, ..., KPC-10, ...)
     ]]></help>
     <citations>
b
diff -r 912a3a3dc082 -r 2262e531c50b test-data/SAMN13042171_abricate_report_screened.tsv
--- a/test-data/SAMN13042171_abricate_report_screened.tsv Thu Dec 19 20:31:11 2019 -0500
+++ b/test-data/SAMN13042171_abricate_report_screened.tsv Thu Jan 02 17:14:34 2020 -0500
b
@@ -1,2 +1,3 @@
 #FILE SEQUENCE START END STRAND GENE COVERAGE COVERAGE_MAP GAPS %COVERAGE %IDENTITY DATABASE ACCESSION PRODUCT RESISTANCE
 SAMN13042171.fasta contig00063 2498 3310 - NDM-1 1-813/813 =============== 0/0 100.00 100.00 card FN396876:2407-3220 NDM-1 is a metallo-beta-lactamase isolated from Klebsiella pneumoniae with nearly complete resistance to all beta-lactam antibiotics. cephalosporin/penam/carbapenem/cephamycin
+SAMN13042171.fasta contig00035 9413 10237 - OXA-9 1-825/825 =============== 0/0 100.00 100.00 card M55547:1-826 OXA-9 is a beta-lactamase found in Klebsiella pneumoniae. cephalosporin/penam
b
diff -r 912a3a3dc082 -r 2262e531c50b test-data/SAMN13042171_gene_detection_status.tsv
--- a/test-data/SAMN13042171_gene_detection_status.tsv Thu Dec 19 20:31:11 2019 -0500
+++ b/test-data/SAMN13042171_gene_detection_status.tsv Thu Jan 02 17:14:34 2020 -0500
b
@@ -1,4 +1,5 @@
 gene_name detected
 KPC False
 NDM True
-OXA-48 False
+OXA True
+rpoB2 False
b
diff -r 912a3a3dc082 -r 2262e531c50b test-data/screening_file.tsv
--- a/test-data/screening_file.tsv Thu Dec 19 20:31:11 2019 -0500
+++ b/test-data/screening_file.tsv Thu Jan 02 17:14:34 2020 -0500
b
@@ -1,4 +1,5 @@
 gene_name regex
-KPC KPC
-NDM NDM
-OXA-48 OXA\-48
+KPC ^KPC-\d+$
+NDM ^NDM-\d+$
+OXA ^OXA-\d+$
+rpoB2 rpoB2