diff integron_finder.xml @ 0:3a24265075bd draft default tip

planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/blob/master/tools/integron_finder commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author thanhlv
date Mon, 13 Feb 2023 13:53:43 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/integron_finder.xml	Mon Feb 13 13:53:43 2023 +0000
@@ -0,0 +1,228 @@
+<tool id="integron_finder" name="Integron Finder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description> is a program that detects integrons in DNA sequences</description>
+    <macros>
+       <import>macro.xml</import>
+    </macros>
+    <expand macro="edam_info"/>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+        integron_finder
+        '$sequence'
+        --cpu @THREADS@
+        --keep-tmp
+        $local_max
+        #if $type_replicon
+          $type_replicon
+        #end if
+        #if $topology_file
+            --topology-file '$topology_file'
+        #end if
+        $promoter_attI
+        -dt $settings.attc_settings.dist_thresh
+        --calin-threshold $settings.attc_settings.calin_threshold
+        --max-attc-size $settings.attc_settings.max_attc_size
+        --min-attc-size $settings.attc_settings.min_attc_size
+        $settings.attc_settings.keep_palindromes
+        #if $settings.attc_settings.covar_matrix
+            --attc-model '$settings.attc_settings.covar_matrix'
+        #end if
+        $settings.protein_settings.no_proteins
+        $settings.protein_settings.union_integrases
+        $settings.protein_settings.func_annot
+        $gbk
+        $pdf
+        && mv Results_Integron_Finder_* Results_Integron_Finder
+    ]]></command>
+    <inputs>
+        <param type="data" name="sequence" format="fasta" label="Replicon file" help="Replicon can be entire chromosome, contif, PCR fragments..." />
+        <param name="local_max" argument="--local-max" type="boolean" checked="false" truevalue="--local-max" falsevalue="" label="Thorough local detection" help="This option allows a more sensitive search. I will be slower (dependant on the number of hits) if integrons are found, but will be as fast if nothing is detected and will not increase the false positive rate." />
+	      <param name="type_replicon" type="select" optional="true" label="Default replicons topology" help="Set the default topology for replicons, linear, circular (deault: no topology)">
+	          <option value="--linear">linear (--linear)</option>
+	          <option value="--circ">circular (--circ)</option>
+	      </param>
+        <param name="topology_file" argument="--topology-file" type="data" format="txt" optional="true" label="Select a topology file from your history"/>
+        <param name="promoter_attI" argument="--promoter-attI" type="boolean" checked="false" truevalue="--promoter-attI" falsevalue="" label="Search also for promoter and attI sites?" />
+        <param argument="--gbk" type="boolean" checked="false" truevalue="--gbk" falsevalue="" label="Genbank output?" help="Generate a GenBank file with the sequence annotated with the same annotations than .integrons file."/>
+        <param argument="--pdf" type="boolean" checked="false" truevalue="--pdf" falsevalue="" label="pdf output?" help="For each complete integron, a simple graphic of the region is depicted (in pdf format)"/>
+        <section name="settings" title="Advanced Parameters" expanded="False">
+            <section name="attc_settings" title="Attc options" expanded="False">
+               <param name="dist_thresh" argument="--distance-thresh" type="integer" value="4000" label="Threshold for clustering (in base)" min="0" help="By default, to cluster an array of attC sites and an integron integrase, they must be less than 4 kb apart. You can here change this value." />
+               <param name="calin_threshold" type="integer" value="2" label="Threshold to filter CALIN" min="0" help="Keep 'CALIN' only if attC sites number >= calin-threshold" />
+               <param name="max_attc_size" type="integer" value="200" label="Maximum value for attC size" min="0"/>
+               <param name="min_attc_size" type="integer" value="40" label="Minimum value for attC size" min="0" />
+               <param name="keep_palindromes" argument="--keep-palindromes" type="boolean" checked="false" truevalue="--keep-palindromes" falsevalue="" label="Keep palindromes with the highest evalue" help="For a given hit, if the palindromic version is found, don't remove the one with highest evalue"/>
+               <param name="covar_matrix" argument="--attc-model" type="data"  optional="true" format="txt" label="Covariance Matrix" />
+            </section>
+            <section name="protein_settings" title="Protein options" expanded="False">
+                <param name="no_proteins" argument="--no-proteins" type="boolean" checked="false" truevalue="--no-proteins" falsevalue="" label="Just look for attC sites" help="When enabled, it does not annotate CDS and does not find integrase."/>
+                <param name="union_integrases" argument="--union-integrases" type="boolean" checked="false" truevalue="--union-integrases" falsevalue="" label="Use the union of the hits" help="Instead of taking intersection of hits from Phage_int profile (Tyr recombinases) and integron_integrase profile, use the union of the hits" />
+                <param name="func_annot" argument="--func-annot" type="boolean" checked="false" truevalue="--func-annot" falsevalue="" label="Annotate cassettes given HMM profiles" />
+            </section>
+        </section>
+        <param name="no_logfile" type="boolean" truevalue="true" falsevalue="false" label="Remove log file"/>
+    </inputs>
+    <outputs>
+        <collection type="list" label="Genbank files from [$tool.name] on $[on_string]" name="genbank_out">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gbk" format="gbk" visible="false" directory="Results_Integron_Finder/" />
+            <filter>gbk</filter>
+        </collection>
+	      <data format="txt" name="integron_log" from_work_dir="Results_Integron_Finder/integron_finder.out" label="Log from [$tool.name] on $[on_string]">
+            <filter> no_logfile == False</filter>
+        </data>
+        <data format="tsv" name="integrons_table" from_work_dir="Results_Integron_Finder/*.integrons" label="Integrons annotations from [$tool.name] on $[on_string]"/>
+        <data format="tsv" name="summary" from_work_dir="Results_Integron_Finder/*.summary" label="Summary from [$tool.name] on $[on_string]"/>
+        <collection type="list" label="Graphic from [$tool.name] on $[on_string]" name="pdf_out">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.pdf" format="pdf" visible="false" directory="Results_Integron_Finder/" />
+            <filter>pdf</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="sequence" value="input.fasta"/>
+            <output name="integron_log" value="integron_log" lines_diff="3" />
+            <output name="integrons_table" value="test1_integrons_table.tsv" lines_diff="3"/>
+            <output name="summary" value="summary.tsv" lines_diff="3"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="local_max" value="true"/>
+            <param name="type_replicon" value="--linear"/>
+            <param name="no_logfile" value="true"/>
+            <output name="integrons_table" value="test2_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="4" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="type_replicon" value="--circ"/>
+            <param name="no_logfile" value="true"/>
+            <output name="integrons_table" value="test3_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="topology_file" value="topology.txt"/>
+            <param name="no_logfile" value="true"/>
+            <output name="integrons_table" value="test4_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="5" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="promoter_attI" value="true"/>
+            <param name="no_logfile" value="true"/>
+            <output name="integrons_table" value="test5_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="4">
+            <param name="sequence" value="input.fasta"/>
+            <param name="gbk" value="true"/>
+            <param name="pdf" value="true"/>
+            <param name="no_logfile" value="true"/>
+            <output_collection name="genbank_out" type="list">
+                <element name="ACBA.007.P01_13">
+                    <assert_contents>
+                        <has_text text="MKTATAPLPPLRSVKVLDQLRERIRYLHYSLRTEQAYVNWVRAFI"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="pdf_out" type="list">
+                <element name="ACBA.007.P01_13_1">
+                    <assert_contents>
+                        <has_text text=">"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="integrons_table" value="test6_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="no_logfile" value="true"/>
+            <section name="settings">
+                <section name="attc_settings">
+                    <param name="dist_thresh" value="2000"/>
+                    <param name="calin_threshold" value="3"/>
+                    <param name="max_attc_size" value="188"/>
+                    <param name="min_attc_size" value="30"/>
+                    <param name="keep_palindromes" value=""/>
+                </section>
+            </section>
+            <output name="integrons_table" value="test7_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="no_logfile" value="true"/>
+            <section name="settings">
+                <section name="attc_settings">
+                    <param name="covar_matrix" value="covar.txt"/>
+                </section>
+            </section>
+            <output name="integrons_table" value="test8_integrons_table.tsv" lines_diff="10" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="no_logfile" value="true"/>
+            <section name="settings">
+                <section name="protein_settings">
+                    <param name="no_proteins" value="true"/>
+                </section>
+            </section>
+            <output name="integrons_table" value="test9_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="test9_summary.tsv" lines_diff="3" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="sequence" value="input.fasta"/>
+            <param name="no_logfile" value="true"/>
+            <section name="settings">
+                <section name="protein_settings">
+                    <param name="union_integrases" value="true" />
+                    <param name="func_annot" value="true"/>
+                </section>
+            </section>
+            <output name="integrons_table" value="test10_integrons_table.tsv" lines_diff="3" />
+            <output name="summary" value="summary.tsv" lines_diff="3" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+How does it work ?
+==================
+
+- First, IntegronFinder annotates the DNA sequence's CDS with Prodigal.
+
+- Second, IntegronFinder detects independently integron integrase and *attC*
+  recombination sites. The Integron integrase is detected by using the intersection
+  of two HMM profiles:
+
+  - one specific of tyrosine-recombinase (PF00589)
+  - one specific of the integron integrase, near the patch III domain of tyrosine recombinases.
+
+The *attC* recombination site is detected with a covariance model (CM), which
+models the secondary structure in addition to the few conserved sequence
+positions.
+
+
+- Third, the results are integrated, and IntegronFinder distinguishes 3 types of
+  elements:
+
+  - complete integron
+      Integron with integron integrase nearby *attC* site(s)
+  - In0 element
+      Integron integrase only, without any *attC* site nearby
+  - CALIN element
+      Cluster of *attC* sites Lacking INtegrase nearby.
+      A rule of thumb to avoid false positive is to filter out singleton of
+      *attC* site.
+
+IntegronFinder can also annotate gene cassettes (CDS nearby *attC* sites) using
+Resfams, a database of HMM profiles aiming at annotating antibiotic resistance
+genes. This database is provided but the user can add any other HMM profiles
+database of its own interest.
+
+When available, IntegronFinder annotates the promoters and attI sites by pattern
+matching.
+    ]]></help>
+    <expand macro="citations"/>
+</tool>