Mercurial > repos > thanhlv > integron_finder
diff integron_finder.xml @ 0:3a24265075bd draft default tip
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/blob/master/tools/integron_finder commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 13:53:43 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/integron_finder.xml Mon Feb 13 13:53:43 2023 +0000 @@ -0,0 +1,228 @@ +<tool id="integron_finder" name="Integron Finder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description> is a program that detects integrons in DNA sequences</description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro="edam_info"/> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ + integron_finder + '$sequence' + --cpu @THREADS@ + --keep-tmp + $local_max + #if $type_replicon + $type_replicon + #end if + #if $topology_file + --topology-file '$topology_file' + #end if + $promoter_attI + -dt $settings.attc_settings.dist_thresh + --calin-threshold $settings.attc_settings.calin_threshold + --max-attc-size $settings.attc_settings.max_attc_size + --min-attc-size $settings.attc_settings.min_attc_size + $settings.attc_settings.keep_palindromes + #if $settings.attc_settings.covar_matrix + --attc-model '$settings.attc_settings.covar_matrix' + #end if + $settings.protein_settings.no_proteins + $settings.protein_settings.union_integrases + $settings.protein_settings.func_annot + $gbk + $pdf + && mv Results_Integron_Finder_* Results_Integron_Finder + ]]></command> + <inputs> + <param type="data" name="sequence" format="fasta" label="Replicon file" help="Replicon can be entire chromosome, contif, PCR fragments..." /> + <param name="local_max" argument="--local-max" type="boolean" checked="false" truevalue="--local-max" falsevalue="" label="Thorough local detection" help="This option allows a more sensitive search. I will be slower (dependant on the number of hits) if integrons are found, but will be as fast if nothing is detected and will not increase the false positive rate." /> + <param name="type_replicon" type="select" optional="true" label="Default replicons topology" help="Set the default topology for replicons, linear, circular (deault: no topology)"> + <option value="--linear">linear (--linear)</option> + <option value="--circ">circular (--circ)</option> + </param> + <param name="topology_file" argument="--topology-file" type="data" format="txt" optional="true" label="Select a topology file from your history"/> + <param name="promoter_attI" argument="--promoter-attI" type="boolean" checked="false" truevalue="--promoter-attI" falsevalue="" label="Search also for promoter and attI sites?" /> + <param argument="--gbk" type="boolean" checked="false" truevalue="--gbk" falsevalue="" label="Genbank output?" help="Generate a GenBank file with the sequence annotated with the same annotations than .integrons file."/> + <param argument="--pdf" type="boolean" checked="false" truevalue="--pdf" falsevalue="" label="pdf output?" help="For each complete integron, a simple graphic of the region is depicted (in pdf format)"/> + <section name="settings" title="Advanced Parameters" expanded="False"> + <section name="attc_settings" title="Attc options" expanded="False"> + <param name="dist_thresh" argument="--distance-thresh" type="integer" value="4000" label="Threshold for clustering (in base)" min="0" help="By default, to cluster an array of attC sites and an integron integrase, they must be less than 4 kb apart. You can here change this value." /> + <param name="calin_threshold" type="integer" value="2" label="Threshold to filter CALIN" min="0" help="Keep 'CALIN' only if attC sites number >= calin-threshold" /> + <param name="max_attc_size" type="integer" value="200" label="Maximum value for attC size" min="0"/> + <param name="min_attc_size" type="integer" value="40" label="Minimum value for attC size" min="0" /> + <param name="keep_palindromes" argument="--keep-palindromes" type="boolean" checked="false" truevalue="--keep-palindromes" falsevalue="" label="Keep palindromes with the highest evalue" help="For a given hit, if the palindromic version is found, don't remove the one with highest evalue"/> + <param name="covar_matrix" argument="--attc-model" type="data" optional="true" format="txt" label="Covariance Matrix" /> + </section> + <section name="protein_settings" title="Protein options" expanded="False"> + <param name="no_proteins" argument="--no-proteins" type="boolean" checked="false" truevalue="--no-proteins" falsevalue="" label="Just look for attC sites" help="When enabled, it does not annotate CDS and does not find integrase."/> + <param name="union_integrases" argument="--union-integrases" type="boolean" checked="false" truevalue="--union-integrases" falsevalue="" label="Use the union of the hits" help="Instead of taking intersection of hits from Phage_int profile (Tyr recombinases) and integron_integrase profile, use the union of the hits" /> + <param name="func_annot" argument="--func-annot" type="boolean" checked="false" truevalue="--func-annot" falsevalue="" label="Annotate cassettes given HMM profiles" /> + </section> + </section> + <param name="no_logfile" type="boolean" truevalue="true" falsevalue="false" label="Remove log file"/> + </inputs> + <outputs> + <collection type="list" label="Genbank files from [$tool.name] on $[on_string]" name="genbank_out"> + <discover_datasets pattern="(?P<designation>.+)\.gbk" format="gbk" visible="false" directory="Results_Integron_Finder/" /> + <filter>gbk</filter> + </collection> + <data format="txt" name="integron_log" from_work_dir="Results_Integron_Finder/integron_finder.out" label="Log from [$tool.name] on $[on_string]"> + <filter> no_logfile == False</filter> + </data> + <data format="tsv" name="integrons_table" from_work_dir="Results_Integron_Finder/*.integrons" label="Integrons annotations from [$tool.name] on $[on_string]"/> + <data format="tsv" name="summary" from_work_dir="Results_Integron_Finder/*.summary" label="Summary from [$tool.name] on $[on_string]"/> + <collection type="list" label="Graphic from [$tool.name] on $[on_string]" name="pdf_out"> + <discover_datasets pattern="(?P<designation>.+)\.pdf" format="pdf" visible="false" directory="Results_Integron_Finder/" /> + <filter>pdf</filter> + </collection> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="sequence" value="input.fasta"/> + <output name="integron_log" value="integron_log" lines_diff="3" /> + <output name="integrons_table" value="test1_integrons_table.tsv" lines_diff="3"/> + <output name="summary" value="summary.tsv" lines_diff="3"/> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="local_max" value="true"/> + <param name="type_replicon" value="--linear"/> + <param name="no_logfile" value="true"/> + <output name="integrons_table" value="test2_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="4" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="type_replicon" value="--circ"/> + <param name="no_logfile" value="true"/> + <output name="integrons_table" value="test3_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="topology_file" value="topology.txt"/> + <param name="no_logfile" value="true"/> + <output name="integrons_table" value="test4_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="5" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="promoter_attI" value="true"/> + <param name="no_logfile" value="true"/> + <output name="integrons_table" value="test5_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="4"> + <param name="sequence" value="input.fasta"/> + <param name="gbk" value="true"/> + <param name="pdf" value="true"/> + <param name="no_logfile" value="true"/> + <output_collection name="genbank_out" type="list"> + <element name="ACBA.007.P01_13"> + <assert_contents> + <has_text text="MKTATAPLPPLRSVKVLDQLRERIRYLHYSLRTEQAYVNWVRAFI"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="pdf_out" type="list"> + <element name="ACBA.007.P01_13_1"> + <assert_contents> + <has_text text=">"/> + </assert_contents> + </element> + </output_collection> + <output name="integrons_table" value="test6_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="no_logfile" value="true"/> + <section name="settings"> + <section name="attc_settings"> + <param name="dist_thresh" value="2000"/> + <param name="calin_threshold" value="3"/> + <param name="max_attc_size" value="188"/> + <param name="min_attc_size" value="30"/> + <param name="keep_palindromes" value=""/> + </section> + </section> + <output name="integrons_table" value="test7_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="no_logfile" value="true"/> + <section name="settings"> + <section name="attc_settings"> + <param name="covar_matrix" value="covar.txt"/> + </section> + </section> + <output name="integrons_table" value="test8_integrons_table.tsv" lines_diff="10" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="no_logfile" value="true"/> + <section name="settings"> + <section name="protein_settings"> + <param name="no_proteins" value="true"/> + </section> + </section> + <output name="integrons_table" value="test9_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="test9_summary.tsv" lines_diff="3" /> + </test> + <test expect_num_outputs="2"> + <param name="sequence" value="input.fasta"/> + <param name="no_logfile" value="true"/> + <section name="settings"> + <section name="protein_settings"> + <param name="union_integrases" value="true" /> + <param name="func_annot" value="true"/> + </section> + </section> + <output name="integrons_table" value="test10_integrons_table.tsv" lines_diff="3" /> + <output name="summary" value="summary.tsv" lines_diff="3" /> + </test> + </tests> + <help><![CDATA[ + +How does it work ? +================== + +- First, IntegronFinder annotates the DNA sequence's CDS with Prodigal. + +- Second, IntegronFinder detects independently integron integrase and *attC* + recombination sites. The Integron integrase is detected by using the intersection + of two HMM profiles: + + - one specific of tyrosine-recombinase (PF00589) + - one specific of the integron integrase, near the patch III domain of tyrosine recombinases. + +The *attC* recombination site is detected with a covariance model (CM), which +models the secondary structure in addition to the few conserved sequence +positions. + + +- Third, the results are integrated, and IntegronFinder distinguishes 3 types of + elements: + + - complete integron + Integron with integron integrase nearby *attC* site(s) + - In0 element + Integron integrase only, without any *attC* site nearby + - CALIN element + Cluster of *attC* sites Lacking INtegrase nearby. + A rule of thumb to avoid false positive is to filter out singleton of + *attC* site. + +IntegronFinder can also annotate gene cassettes (CDS nearby *attC* sites) using +Resfams, a database of HMM profiles aiming at annotating antibiotic resistance +genes. This database is provided but the user can add any other HMM profiles +database of its own interest. + +When available, IntegronFinder annotates the promoters and attI sites by pattern +matching. + ]]></help> + <expand macro="citations"/> +</tool>